]> git.ozlabs.org Git - ccan/commitdiff
tdb2: remove: it's now in SAMBA where it belongs.
authorRusty Russell <rusty@rustcorp.com.au>
Fri, 9 Mar 2012 02:55:03 +0000 (13:25 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Fri, 9 Mar 2012 02:55:03 +0000 (13:25 +1030)
133 files changed:
ccan/tdb2/LICENSE [deleted symlink]
ccan/tdb2/_info [deleted file]
ccan/tdb2/check.c [deleted file]
ccan/tdb2/doc/TDB1_porting.txt [deleted file]
ccan/tdb2/doc/design-1.3.txt [deleted file]
ccan/tdb2/doc/design.lyx [deleted file]
ccan/tdb2/doc/design.lyx,v [deleted file]
ccan/tdb2/doc/design.pdf [deleted file]
ccan/tdb2/doc/design.txt [deleted file]
ccan/tdb2/free.c [deleted file]
ccan/tdb2/hash.c [deleted file]
ccan/tdb2/io.c [deleted file]
ccan/tdb2/lock.c [deleted file]
ccan/tdb2/open.c [deleted file]
ccan/tdb2/private.h [deleted file]
ccan/tdb2/summary.c [deleted file]
ccan/tdb2/tdb.c [deleted file]
ccan/tdb2/tdb1_check.c [deleted file]
ccan/tdb2/tdb1_freelist.c [deleted file]
ccan/tdb2/tdb1_hash.c [deleted file]
ccan/tdb2/tdb1_io.c [deleted file]
ccan/tdb2/tdb1_lock.c [deleted file]
ccan/tdb2/tdb1_open.c [deleted file]
ccan/tdb2/tdb1_private.h [deleted file]
ccan/tdb2/tdb1_summary.c [deleted file]
ccan/tdb2/tdb1_tdb.c [deleted file]
ccan/tdb2/tdb1_transaction.c [deleted file]
ccan/tdb2/tdb1_traverse.c [deleted file]
ccan/tdb2/tdb2.h [deleted file]
ccan/tdb2/test/api-12-store.c [deleted file]
ccan/tdb2/test/api-13-delete.c [deleted file]
ccan/tdb2/test/api-14-exists.c [deleted file]
ccan/tdb2/test/api-16-wipe_all.c [deleted file]
ccan/tdb2/test/api-21-parse_record.c [deleted file]
ccan/tdb2/test/api-55-transaction.c [deleted file]
ccan/tdb2/test/api-80-tdb_fd.c [deleted file]
ccan/tdb2/test/api-81-seqnum.c [deleted file]
ccan/tdb2/test/api-82-lockattr.c [deleted file]
ccan/tdb2/test/api-83-openhook.c [deleted file]
ccan/tdb2/test/api-91-get-stats.c [deleted file]
ccan/tdb2/test/api-92-get-set-readonly.c [deleted file]
ccan/tdb2/test/api-93-repack.c [deleted file]
ccan/tdb2/test/api-add-remove-flags.c [deleted file]
ccan/tdb2/test/api-check-callback.c [deleted file]
ccan/tdb2/test/api-firstkey-nextkey.c [deleted file]
ccan/tdb2/test/api-fork-test.c [deleted file]
ccan/tdb2/test/api-locktimeout.c [deleted file]
ccan/tdb2/test/api-missing-entries.c [deleted file]
ccan/tdb2/test/api-open-multiple-times.c [deleted file]
ccan/tdb2/test/api-record-expand.c [deleted file]
ccan/tdb2/test/api-simple-delete.c [deleted file]
ccan/tdb2/test/api-summary.c [deleted file]
ccan/tdb2/test/api-tdb1-flag-removal.c [deleted file]
ccan/tdb2/test/external-agent.c [deleted file]
ccan/tdb2/test/external-agent.h [deleted file]
ccan/tdb2/test/failtest_helper.c [deleted file]
ccan/tdb2/test/failtest_helper.h [deleted file]
ccan/tdb2/test/jenkins-be-hash.tdb1 [deleted file]
ccan/tdb2/test/jenkins-le-hash.tdb1 [deleted file]
ccan/tdb2/test/layout.c [deleted file]
ccan/tdb2/test/layout.h [deleted file]
ccan/tdb2/test/lock-tracking.c [deleted file]
ccan/tdb2/test/lock-tracking.h [deleted file]
ccan/tdb2/test/logging.c [deleted file]
ccan/tdb2/test/logging.h [deleted file]
ccan/tdb2/test/old-nohash-be.tdb1 [deleted file]
ccan/tdb2/test/old-nohash-le.tdb1 [deleted file]
ccan/tdb2/test/run-001-encode.c [deleted file]
ccan/tdb2/test/run-001-fls.c [deleted file]
ccan/tdb2/test/run-01-new_database.c [deleted file]
ccan/tdb2/test/run-02-expand.c [deleted file]
ccan/tdb2/test/run-03-coalesce.c [deleted file]
ccan/tdb2/test/run-04-basichash.c [deleted file]
ccan/tdb2/test/run-05-readonly-open.c [deleted file]
ccan/tdb2/test/run-10-simple-store.c [deleted file]
ccan/tdb2/test/run-11-simple-fetch.c [deleted file]
ccan/tdb2/test/run-12-check.c [deleted file]
ccan/tdb2/test/run-15-append.c [deleted file]
ccan/tdb2/test/run-20-growhash.c [deleted file]
ccan/tdb2/test/run-25-hashoverload.c [deleted file]
ccan/tdb2/test/run-30-exhaust-before-expand.c [deleted file]
ccan/tdb2/test/run-35-convert.c [deleted file]
ccan/tdb2/test/run-50-multiple-freelists.c [deleted file]
ccan/tdb2/test/run-56-open-during-transaction.c [deleted file]
ccan/tdb2/test/run-57-die-during-transaction.c [deleted file]
ccan/tdb2/test/run-64-bit-tdb.c [deleted file]
ccan/tdb2/test/run-90-get-set-attributes.c [deleted file]
ccan/tdb2/test/run-capabilities.c [deleted file]
ccan/tdb2/test/run-expand-in-transaction.c [deleted file]
ccan/tdb2/test/run-features.c [deleted file]
ccan/tdb2/test/run-lockall.c [deleted file]
ccan/tdb2/test/run-remap-in-read_traverse.c [deleted file]
ccan/tdb2/test/run-seed.c [deleted file]
ccan/tdb2/test/run-tdb1-3G-file.c [deleted file]
ccan/tdb2/test/run-tdb1-bad-tdb-header.c [deleted file]
ccan/tdb2/test/run-tdb1-check.c [deleted file]
ccan/tdb2/test/run-tdb1-corrupt.c [deleted file]
ccan/tdb2/test/run-tdb1-endian.c [deleted file]
ccan/tdb2/test/run-tdb1-hashsize.c [deleted file]
ccan/tdb2/test/run-tdb1-incompatible.c [deleted file]
ccan/tdb2/test/run-tdb1-nested-transactions.c [deleted file]
ccan/tdb2/test/run-tdb1-nested-traverse.c [deleted file]
ccan/tdb2/test/run-tdb1-no-lock-during-traverse.c [deleted file]
ccan/tdb2/test/run-tdb1-oldhash.c [deleted file]
ccan/tdb2/test/run-tdb1-readonly-check.c [deleted file]
ccan/tdb2/test/run-tdb1-rwlock-check.c [deleted file]
ccan/tdb2/test/run-tdb1-seqnum-wrap.c [deleted file]
ccan/tdb2/test/run-tdb1-summary.c [deleted file]
ccan/tdb2/test/run-tdb1-traverse-in-transaction.c [deleted file]
ccan/tdb2/test/run-tdb1-wronghash-fail.c [deleted file]
ccan/tdb2/test/run-tdb1-zero-append.c [deleted file]
ccan/tdb2/test/run-tdb1.c [deleted file]
ccan/tdb2/test/run-tdb_errorstr.c [deleted file]
ccan/tdb2/test/run-tdb_foreach.c [deleted file]
ccan/tdb2/test/run-traverse.c [deleted file]
ccan/tdb2/test/rwlock-be.tdb1 [deleted file]
ccan/tdb2/test/rwlock-le.tdb1 [deleted file]
ccan/tdb2/test/tdb1-external-agent.c [deleted file]
ccan/tdb2/test/tdb1-external-agent.h [deleted file]
ccan/tdb2/test/tdb1-lock-tracking.c [deleted file]
ccan/tdb2/test/tdb1-lock-tracking.h [deleted file]
ccan/tdb2/test/tdb1.corrupt [deleted file]
ccan/tdb2/test/tdb2-source.h [deleted file]
ccan/tdb2/tools/Makefile [deleted file]
ccan/tdb2/tools/growtdb-bench.c [deleted file]
ccan/tdb2/tools/mktdb2.c [deleted file]
ccan/tdb2/tools/speed.c [deleted file]
ccan/tdb2/tools/tdb2dump.c [deleted file]
ccan/tdb2/tools/tdb2restore.c [deleted file]
ccan/tdb2/tools/tdb2tool.c [deleted file]
ccan/tdb2/tools/tdb2torture.c [deleted file]
ccan/tdb2/transaction.c [deleted file]
ccan/tdb2/traverse.c [deleted file]

diff --git a/ccan/tdb2/LICENSE b/ccan/tdb2/LICENSE
deleted file mode 120000 (symlink)
index 7455044..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../licenses/LGPL-3
\ No newline at end of file
diff --git a/ccan/tdb2/_info b/ccan/tdb2/_info
deleted file mode 100644 (file)
index d26e06b..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <string.h>
-#include <stdio.h>
-
-/**
- * tdb2 - [[WORK IN PROGRESS!]] The trivial (64bit transactional) database
- *
- * The tdb2 module provides an efficient keyword data mapping (usually
- * within a file).  It supports transactions, so the contents of the
- * database is reliable even across crashes.
- *
- * Example:
- *     #include <ccan/tdb2/tdb2.h>
- *     #include <ccan/str/str.h>
- *     #include <err.h>
- *     #include <stdio.h>
- *     
- *     static void usage(const char *argv0)
- *     {
- *             errx(1, "Usage: %s fetch <dbfile> <key>\n"
- *                  "OR %s store <dbfile> <key> <data>", argv0, argv0);
- *     }
- *     
- *     int main(int argc, char *argv[])
- *     {
- *             struct tdb_context *tdb;
- *             TDB_DATA key, value;
- *             enum TDB_ERROR error;
- *
- *             if (argc < 4)
- *                     usage(argv[0]);
- *     
- *             tdb = tdb_open(argv[2], TDB_DEFAULT, O_CREAT|O_RDWR,0600, NULL);
- *             if (!tdb)
- *                     err(1, "Opening %s", argv[2]);
- *     
- *             key.dptr = (void *)argv[3];
- *             key.dsize = strlen(argv[3]);
- *     
- *             if (streq(argv[1], "fetch")) {
- *                     if (argc != 4)
- *                             usage(argv[0]);
- *                     error = tdb_fetch(tdb, key, &value);
- *                     if (error)
- *                             errx(1, "fetch %s: %s",
- *                                  argv[3], tdb_errorstr(error));
- *                     printf("%.*s\n", value.dsize, (char *)value.dptr);
- *                     free(value.dptr);
- *             } else if (streq(argv[1], "store")) {
- *                     if (argc != 5)
- *                             usage(argv[0]);
- *                     value.dptr = (void *)argv[4];
- *                     value.dsize = strlen(argv[4]);
- *                     error = tdb_store(tdb, key, value, 0);
- *                     if (error)
- *                             errx(1, "store %s: %s",
- *                                  argv[3], tdb_errorstr(error));
- *             } else
- *                     usage(argv[0]);
- *     
- *             return 0;
- *     }
- *
- * Maintainer: Rusty Russell <rusty@rustcorp.com.au>
- *
- * Author: Rusty Russell
- *
- * License: LGPL (v3 or any later version)
- *
- * Ccanlint:
- *     // valgrind breaks fcntl locks.
- *     tests_pass_valgrind test/api-83-openhook.c:FAIL
- */
-int main(int argc, char *argv[])
-{
-       if (argc != 2)
-               return 1;
-
-       if (strcmp(argv[1], "depends") == 0) {
-               printf("ccan/asprintf\n");
-               printf("ccan/hash\n");
-               printf("ccan/likely\n");
-               printf("ccan/asearch\n");
-               printf("ccan/compiler\n");
-               printf("ccan/build_assert\n");
-               printf("ccan/ilog\n");
-               printf("ccan/failtest\n");
-               printf("ccan/tally\n");
-               printf("ccan/typesafe_cb\n");
-               printf("ccan/cast\n");
-               printf("ccan/endian\n");
-               return 0;
-       }
-
-       return 1;
-}
diff --git a/ccan/tdb2/check.c b/ccan/tdb2/check.c
deleted file mode 100644 (file)
index ecd6c13..0000000
+++ /dev/null
@@ -1,870 +0,0 @@
- /*
-   Trivial Database 2: free list/block handling
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-#include <ccan/asearch/asearch.h>
-
-/* We keep an ordered array of offsets. */
-static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
-{
-       tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
-       if (!new)
-               return false;
-       new[(*num)++] = off;
-       *arr = new;
-       return true;
-}
-
-static enum TDB_ERROR check_header(struct tdb_context *tdb, tdb_off_t *recovery,
-                                  uint64_t *features, size_t *num_capabilities)
-{
-       uint64_t hash_test;
-       struct tdb_header hdr;
-       enum TDB_ERROR ecode;
-       tdb_off_t off, next;
-
-       ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-       /* magic food should not be converted, so convert back. */
-       tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
-
-       hash_test = TDB_HASH_MAGIC;
-       hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
-       if (hdr.hash_test != hash_test) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "check: hash test %llu should be %llu",
-                                 (long long)hdr.hash_test,
-                                 (long long)hash_test);
-       }
-
-       if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "check: bad magic '%.*s'",
-                                 (unsigned)sizeof(hdr.magic_food),
-                                 hdr.magic_food);
-       }
-
-       /* Features which are used must be a subset of features offered. */
-       if (hdr.features_used & ~hdr.features_offered) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "check: features used (0x%llx) which"
-                                 " are not offered (0x%llx)",
-                                 (long long)hdr.features_used,
-                                 (long long)hdr.features_offered);
-       }
-
-       *features = hdr.features_offered;
-       *recovery = hdr.recovery;
-       if (*recovery) {
-               if (*recovery < sizeof(hdr)
-                   || *recovery > tdb->file->map_size) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "tdb_check:"
-                                         " invalid recovery offset %zu",
-                                         (size_t)*recovery);
-               }
-       }
-
-       for (off = hdr.capabilities; off && ecode == TDB_SUCCESS; off = next) {
-               const struct tdb_capability *cap;
-               enum TDB_ERROR err;
-
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
-                       return TDB_PTR_ERR(cap);
-               }
-
-               /* All capabilities are unknown. */
-               err = unknown_capability(tdb, "tdb_check", cap->type);
-               next = cap->next;
-               tdb_access_release(tdb, cap);
-               if (err)
-                       return err;
-               (*num_capabilities)++;
-       }
-
-       /* Don't check reserved: they *can* be used later. */
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
-                                     tdb_off_t off, unsigned int group_bits,
-                                     uint64_t hprefix,
-                                     unsigned hprefix_bits,
-                                     tdb_off_t used[],
-                                     size_t num_used,
-                                     size_t *num_found,
-                                     enum TDB_ERROR (*check)(TDB_DATA,
-                                                             TDB_DATA, void *),
-                                     void *data);
-
-static enum TDB_ERROR check_hash_chain(struct tdb_context *tdb,
-                                      tdb_off_t off,
-                                      uint64_t hash,
-                                      tdb_off_t used[],
-                                      size_t num_used,
-                                      size_t *num_found,
-                                      enum TDB_ERROR (*check)(TDB_DATA,
-                                                              TDB_DATA,
-                                                              void *),
-                                      void *data)
-{
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash chain magic %llu",
-                                 (long long)rec_magic(&rec));
-       }
-
-       if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check:"
-                                 " Bad hash chain length %llu vs %zu",
-                                 (long long)rec_data_length(&rec),
-                                 sizeof(struct tdb_chain));
-       }
-       if (rec_key_length(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash chain key length %llu",
-                                 (long long)rec_key_length(&rec));
-       }
-       if (rec_hash(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash chain hash value %llu",
-                                 (long long)rec_hash(&rec));
-       }
-
-       off += sizeof(rec);
-       ecode = check_hash_tree(tdb, off, 0, hash, 64,
-                               used, num_used, num_found, check, data);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
-       if (TDB_OFF_IS_ERR(off)) {
-               return TDB_OFF_TO_ERR(off);
-       }
-       if (off == 0)
-               return TDB_SUCCESS;
-       (*num_found)++;
-       return check_hash_chain(tdb, off, hash, used, num_used, num_found,
-                               check, data);
-}
-
-static enum TDB_ERROR check_hash_record(struct tdb_context *tdb,
-                                       tdb_off_t off,
-                                       uint64_t hprefix,
-                                       unsigned hprefix_bits,
-                                       tdb_off_t used[],
-                                       size_t num_used,
-                                       size_t *num_found,
-                                       enum TDB_ERROR (*check)(TDB_DATA,
-                                                               TDB_DATA,
-                                                               void *),
-                                       void *data)
-{
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       if (hprefix_bits >= 64)
-               return check_hash_chain(tdb, off, hprefix, used, num_used,
-                                       num_found, check, data);
-
-       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash table magic %llu",
-                                 (long long)rec_magic(&rec));
-       }
-       if (rec_data_length(&rec)
-           != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check:"
-                                 " Bad hash table length %llu vs %llu",
-                                 (long long)rec_data_length(&rec),
-                                 (long long)sizeof(tdb_off_t)
-                                 << TDB_SUBLEVEL_HASH_BITS);
-       }
-       if (rec_key_length(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash table key length %llu",
-                                 (long long)rec_key_length(&rec));
-       }
-       if (rec_hash(&rec) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Bad hash table hash value %llu",
-                                 (long long)rec_hash(&rec));
-       }
-
-       off += sizeof(rec);
-       return check_hash_tree(tdb, off,
-                              TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
-                              hprefix, hprefix_bits,
-                              used, num_used, num_found, check, data);
-}
-
-static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
-{
-       /* Can overflow an int. */
-       return *a > *b ? 1
-               : *a < *b ? -1
-               : 0;
-}
-
-static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
-{
-       *used += num;
-
-       return (h >> (64 - *used)) & ((1U << num) - 1);
-}
-
-static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
-                                     tdb_off_t off, unsigned int group_bits,
-                                     uint64_t hprefix,
-                                     unsigned hprefix_bits,
-                                     tdb_off_t used[],
-                                     size_t num_used,
-                                     size_t *num_found,
-                                     enum TDB_ERROR (*check)(TDB_DATA,
-                                                             TDB_DATA, void *),
-                                     void *data)
-{
-       unsigned int g, b;
-       const tdb_off_t *hash;
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       hash = tdb_access_read(tdb, off,
-                              sizeof(tdb_off_t)
-                              << (group_bits + TDB_HASH_GROUP_BITS),
-                              true);
-       if (TDB_PTR_IS_ERR(hash)) {
-               return TDB_PTR_ERR(hash);
-       }
-
-       for (g = 0; g < (1 << group_bits); g++) {
-               const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
-               for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
-                       unsigned int bucket, i, used_bits;
-                       uint64_t h;
-                       tdb_off_t *p;
-                       if (group[b] == 0)
-                               continue;
-
-                       off = group[b] & TDB_OFF_MASK;
-                       p = asearch(&off, used, num_used, off_cmp);
-                       if (!p) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "tdb_check: Invalid offset"
-                                                  " %llu in hash",
-                                                  (long long)off);
-                               goto fail;
-                       }
-                       /* Mark it invalid. */
-                       *p ^= 1;
-                       (*num_found)++;
-
-                       if (hprefix_bits == 64) {
-                               /* Chained entries are unordered. */
-                               if (is_subhash(group[b])) {
-                                       ecode = TDB_ERR_CORRUPT;
-                                       tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "tdb_check: Invalid chain"
-                                                  " entry subhash");
-                                       goto fail;
-                               }
-                               h = hash_record(tdb, off);
-                               if (h != hprefix) {
-                                       ecode = TDB_ERR_CORRUPT;
-                                       tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad hash chain"
-                                                  " placement"
-                                                  " 0x%llx vs 0x%llx",
-                                                  (long long)h,
-                                                  (long long)hprefix);
-                                       goto fail;
-                               }
-                               ecode = tdb_read_convert(tdb, off, &rec,
-                                                        sizeof(rec));
-                               if (ecode != TDB_SUCCESS) {
-                                       goto fail;
-                               }
-                               goto check;
-                       }
-
-                       if (is_subhash(group[b])) {
-                               uint64_t subprefix;
-                               subprefix = (hprefix
-                                    << (group_bits + TDB_HASH_GROUP_BITS))
-                                       + g * (1 << TDB_HASH_GROUP_BITS) + b;
-
-                               ecode = check_hash_record(tdb,
-                                              group[b] & TDB_OFF_MASK,
-                                              subprefix,
-                                              hprefix_bits
-                                                      + group_bits
-                                                      + TDB_HASH_GROUP_BITS,
-                                              used, num_used, num_found,
-                                              check, data);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto fail;
-                               }
-                               continue;
-                       }
-                       /* A normal entry */
-
-                       /* Does it belong here at all? */
-                       h = hash_record(tdb, off);
-                       used_bits = 0;
-                       if (get_bits(h, hprefix_bits, &used_bits) != hprefix
-                           && hprefix_bits) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad hash placement"
-                                                  " 0x%llx vs 0x%llx",
-                                                  (long long)h,
-                                                  (long long)hprefix);
-                               goto fail;
-                       }
-
-                       /* Does it belong in this group? */
-                       if (get_bits(h, group_bits, &used_bits) != g) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad group %llu"
-                                                  " vs %u",
-                                                  (long long)h, g);
-                               goto fail;
-                       }
-
-                       /* Are bucket bits correct? */
-                       bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
-                       if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
-                           != bucket) {
-                               used_bits -= TDB_HASH_GROUP_BITS;
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad bucket %u vs %u",
-                                                  (unsigned)get_bits(h,
-                                                       TDB_HASH_GROUP_BITS,
-                                                       &used_bits),
-                                                  bucket);
-                               goto fail;
-                       }
-
-                       /* There must not be any zero entries between
-                        * the bucket it belongs in and this one! */
-                       for (i = bucket;
-                            i != b;
-                            i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
-                               if (group[i] == 0) {
-                                       ecode = TDB_ERR_CORRUPT;
-                                       tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "check: bad group placement"
-                                                  " %u vs %u",
-                                                  b, bucket);
-                                       goto fail;
-                               }
-                       }
-
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-                       if (ecode != TDB_SUCCESS) {
-                               goto fail;
-                       }
-
-                       /* Bottom bits must match header. */
-                       if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "tdb_check: Bad hash magic"
-                                                  " at offset %llu"
-                                                  " (0x%llx vs 0x%llx)",
-                                                  (long long)off,
-                                                  (long long)h,
-                                                  (long long)rec_hash(&rec));
-                               goto fail;
-                       }
-
-               check:
-                       if (check) {
-                               TDB_DATA k, d;
-                               const unsigned char *kptr;
-
-                               kptr = tdb_access_read(tdb,
-                                                      off + sizeof(rec),
-                                                      rec_key_length(&rec)
-                                                      + rec_data_length(&rec),
-                                                      false);
-                               if (TDB_PTR_IS_ERR(kptr)) {
-                                       ecode = TDB_PTR_ERR(kptr);
-                                       goto fail;
-                               }
-
-                               k = tdb_mkdata(kptr, rec_key_length(&rec));
-                               d = tdb_mkdata(kptr + k.dsize,
-                                              rec_data_length(&rec));
-                               ecode = check(k, d, data);
-                               tdb_access_release(tdb, kptr);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto fail;
-                               }
-                       }
-               }
-       }
-       tdb_access_release(tdb, hash);
-       return TDB_SUCCESS;
-
-fail:
-       tdb_access_release(tdb, hash);
-       return ecode;
-}
-
-static enum TDB_ERROR check_hash(struct tdb_context *tdb,
-                                tdb_off_t used[],
-                                size_t num_used, size_t num_other_used,
-                                enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *),
-                                void *data)
-{
-       /* Free tables and capabilities also show up as used. */
-       size_t num_found = num_other_used;
-       enum TDB_ERROR ecode;
-
-       ecode = check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
-                               TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
-                               0, 0, used, num_used, &num_found,
-                               check, data);
-       if (ecode == TDB_SUCCESS) {
-               if (num_found != num_used) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                          "tdb_check: Not all entries"
-                                          " are in hash");
-               }
-       }
-       return ecode;
-}
-
-static enum TDB_ERROR check_free(struct tdb_context *tdb,
-                                tdb_off_t off,
-                                const struct tdb_free_record *frec,
-                                tdb_off_t prev, unsigned int ftable,
-                                unsigned int bucket)
-{
-       enum TDB_ERROR ecode;
-
-       if (frec_magic(frec) != TDB_FREE_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu bad magic 0x%llx",
-                                 (long long)off,
-                                 (long long)frec->magic_and_prev);
-       }
-       if (frec_ftable(frec) != ftable) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu bad freetable %u",
-                                 (long long)off, frec_ftable(frec));
-
-       }
-
-       ecode = tdb->tdb2.io->oob(tdb, off,
-                                 frec_len(frec)
-                                 + sizeof(struct tdb_used_record),
-                                 false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-       if (size_to_bucket(frec_len(frec)) != bucket) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu in wrong bucket"
-                                 " (%u vs %u)",
-                                 (long long)off,
-                                 bucket, size_to_bucket(frec_len(frec)));
-       }
-       if (prev && prev != frec_prev(frec)) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: offset %llu bad prev"
-                                 " (%llu vs %llu)",
-                                 (long long)off,
-                                 (long long)prev, (long long)frec_len(frec));
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR check_free_table(struct tdb_context *tdb,
-                                      tdb_off_t ftable_off,
-                                      unsigned ftable_num,
-                                      tdb_off_t fr[],
-                                      size_t num_free,
-                                      size_t *num_found)
-{
-       struct tdb_freetable ft;
-       tdb_off_t h;
-       unsigned int i;
-       enum TDB_ERROR ecode;
-
-       ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft));
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
-           || rec_key_length(&ft.hdr) != 0
-           || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
-           || rec_hash(&ft.hdr) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: Invalid header on free table");
-       }
-
-       for (i = 0; i < TDB_FREE_BUCKETS; i++) {
-               tdb_off_t off, prev = 0, *p, first = 0;
-               struct tdb_free_record f;
-
-               h = bucket_off(ftable_off, i);
-               for (off = tdb_read_off(tdb, h); off; off = f.next) {
-                       if (TDB_OFF_IS_ERR(off)) {
-                               return TDB_OFF_TO_ERR(off);
-                       }
-                       if (!first) {
-                               off &= TDB_OFF_MASK;
-                               first = off;
-                       }
-                       ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       ecode = check_free(tdb, off, &f, prev, ftable_num, i);
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-
-                       /* FIXME: Check hash bits */
-                       p = asearch(&off, fr, num_free, off_cmp);
-                       if (!p) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: Invalid offset"
-                                                 " %llu in free table",
-                                                 (long long)off);
-                       }
-                       /* Mark it invalid. */
-                       *p ^= 1;
-                       (*num_found)++;
-                       prev = off;
-               }
-
-               if (first) {
-                       /* Now we can check first back pointer. */
-                       ecode = tdb_read_convert(tdb, first, &f, sizeof(f));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       ecode = check_free(tdb, first, &f, prev, ftable_num, i);
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-               }
-       }
-       return TDB_SUCCESS;
-}
-
-/* Slow, but should be very rare. */
-tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off)
-{
-       size_t len;
-       enum TDB_ERROR ecode;
-
-       for (len = 0; off + len < tdb->file->map_size; len++) {
-               char c;
-               ecode = tdb->tdb2.io->tread(tdb, off, &c, 1);
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-               if (c != 0 && c != 0x43)
-                       break;
-       }
-       return len;
-}
-
-static enum TDB_ERROR check_linear(struct tdb_context *tdb,
-                                  tdb_off_t **used, size_t *num_used,
-                                  tdb_off_t **fr, size_t *num_free,
-                                  uint64_t features, tdb_off_t recovery)
-{
-       tdb_off_t off;
-       tdb_len_t len;
-       enum TDB_ERROR ecode;
-       bool found_recovery = false;
-
-       for (off = sizeof(struct tdb_header);
-            off < tdb->file->map_size;
-            off += len) {
-               union {
-                       struct tdb_used_record u;
-                       struct tdb_free_record f;
-                       struct tdb_recovery_record r;
-               } rec;
-               /* r is larger: only get that if we need to. */
-               ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f));
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-               /* If we crash after ftruncate, we can get zeroes or fill. */
-               if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
-                   || rec.r.magic ==  0x4343434343434343ULL) {
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       if (recovery == off) {
-                               found_recovery = true;
-                               len = sizeof(rec.r) + rec.r.max_len;
-                       } else {
-                               len = dead_space(tdb, off);
-                               if (TDB_OFF_IS_ERR(len)) {
-                                       return TDB_OFF_TO_ERR(len);
-                               }
-                               if (len < sizeof(rec.r)) {
-                                       return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                         TDB_LOG_ERROR,
-                                                         "tdb_check: invalid"
-                                                         " dead space at %zu",
-                                                         (size_t)off);
-                               }
-
-                               tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                                          "Dead space at %zu-%zu (of %zu)",
-                                          (size_t)off, (size_t)(off + len),
-                                          (size_t)tdb->file->map_size);
-                       }
-               } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       if (recovery != off) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: unexpected"
-                                                 " recovery record at offset"
-                                                 " %zu",
-                                                 (size_t)off);
-                       }
-                       if (rec.r.len > rec.r.max_len) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: invalid recovery"
-                                                 " length %zu",
-                                                 (size_t)rec.r.len);
-                       }
-                       if (rec.r.eof > tdb->file->map_size) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: invalid old EOF"
-                                                 " %zu", (size_t)rec.r.eof);
-                       }
-                       found_recovery = true;
-                       len = sizeof(rec.r) + rec.r.max_len;
-               } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
-                       len = sizeof(rec.u) + frec_len(&rec.f);
-                       if (off + len > tdb->file->map_size) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: free overlength"
-                                                 " %llu at offset %llu",
-                                                 (long long)len,
-                                                 (long long)off);
-                       }
-                       /* This record should be in free lists. */
-                       if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
-                           && !append(fr, num_free, off)) {
-                               return tdb_logerr(tdb, TDB_ERR_OOM,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: tracking %zu'th"
-                                                 " free record.", *num_free);
-                       }
-               } else if (rec_magic(&rec.u) == TDB_USED_MAGIC
-                          || rec_magic(&rec.u) == TDB_CHAIN_MAGIC
-                          || rec_magic(&rec.u) == TDB_HTABLE_MAGIC
-                          || rec_magic(&rec.u) == TDB_FTABLE_MAGIC
-                          || rec_magic(&rec.u) == TDB_CAP_MAGIC) {
-                       uint64_t klen, dlen, extra;
-
-                       /* This record is used! */
-                       if (!append(used, num_used, off)) {
-                               return tdb_logerr(tdb, TDB_ERR_OOM,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: tracking %zu'th"
-                                                 " used record.", *num_used);
-                       }
-
-                       klen = rec_key_length(&rec.u);
-                       dlen = rec_data_length(&rec.u);
-                       extra = rec_extra_padding(&rec.u);
-
-                       len = sizeof(rec.u) + klen + dlen + extra;
-                       if (off + len > tdb->file->map_size) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: used overlength"
-                                                 " %llu at offset %llu",
-                                                 (long long)len,
-                                                 (long long)off);
-                       }
-
-                       if (len < sizeof(rec.f)) {
-                               return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                 TDB_LOG_ERROR,
-                                                 "tdb_check: too short record"
-                                                 " %llu at %llu",
-                                                 (long long)len,
-                                                 (long long)off);
-                       }
-
-                       /* Check that records have correct 0 at end (but may
-                        * not in future). */
-                       if (extra && !features
-                           && rec_magic(&rec.u) != TDB_CAP_MAGIC) {
-                               const char *p;
-                               char c;
-                               p = tdb_access_read(tdb, off + sizeof(rec.u)
-                                                   + klen + dlen, 1, false);
-                               if (TDB_PTR_IS_ERR(p))
-                                       return TDB_PTR_ERR(p);
-                               c = *p;
-                               tdb_access_release(tdb, p);
-
-                               if (c != '\0') {
-                                       return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                         TDB_LOG_ERROR,
-                                                         "tdb_check:"
-                                                         " non-zero extra"
-                                                         " at %llu",
-                                                         (long long)off);
-                               }
-                       }
-               } else {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                         TDB_LOG_ERROR,
-                                         "tdb_check: Bad magic 0x%llx"
-                                         " at offset %zu",
-                                         (long long)rec_magic(&rec.u),
-                                         (size_t)off);
-               }
-       }
-
-       /* We must have found recovery area if there was one. */
-       if (recovery != 0 && !found_recovery) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_check: expected a recovery area at %zu",
-                                 (size_t)recovery);
-       }
-
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
-                         enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *),
-                         void *data)
-{
-       tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
-       size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0,
-               num_capabilities = 0;
-       uint64_t features;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_CANT_CHECK) {
-               return tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                                 "tdb_check: database has unknown capability,"
-                                 " cannot check.");
-       }
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_check(tdb, check, data) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
-       if (ecode != TDB_SUCCESS) {
-               return tdb->last_error = ecode;
-       }
-
-       ecode = tdb_lock_expand(tdb, F_RDLCK);
-       if (ecode != TDB_SUCCESS) {
-               tdb_allrecord_unlock(tdb, F_RDLCK);
-               return tdb->last_error = ecode;
-       }
-
-       ecode = check_header(tdb, &recovery, &features, &num_capabilities);
-       if (ecode != TDB_SUCCESS)
-               goto out;
-
-       /* First we do a linear scan, checking all records. */
-       ecode = check_linear(tdb, &used, &num_used, &fr, &num_free, features,
-                            recovery);
-       if (ecode != TDB_SUCCESS)
-               goto out;
-
-       for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
-               if (TDB_OFF_IS_ERR(ft)) {
-                       ecode = TDB_OFF_TO_ERR(ft);
-                       goto out;
-               }
-               ecode = check_free_table(tdb, ft, num_ftables, fr, num_free,
-                                        &num_found);
-               if (ecode != TDB_SUCCESS)
-                       goto out;
-               num_ftables++;
-       }
-
-       /* FIXME: Check key uniqueness? */
-       ecode = check_hash(tdb, used, num_used, num_ftables + num_capabilities,
-                          check, data);
-       if (ecode != TDB_SUCCESS)
-               goto out;
-
-       if (num_found != num_free) {
-               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                  "tdb_check: Not all entries are in"
-                                  " free table");
-       }
-
-out:
-       tdb_allrecord_unlock(tdb, F_RDLCK);
-       tdb_unlock_expand(tdb, F_RDLCK);
-       free(fr);
-       free(used);
-       return tdb->last_error = ecode;
-}
diff --git a/ccan/tdb2/doc/TDB1_porting.txt b/ccan/tdb2/doc/TDB1_porting.txt
deleted file mode 100644 (file)
index ef305ca..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-Interface differences between TDB1 and TDB2.
-
-- tdb2 uses 'struct tdb_data', tdb1 uses 'struct TDB_DATA'.  Use the
-  TDB_DATA typedef if you want portability between the two.
-
-- tdb2 functions return 0 on success, and a negative error on failure,
-  whereas tdb1 functions returned 0 on success, and -1 on failure.
-  tdb1 then used tdb_error() to determine the error; this is also
-  supported in tdb2 to ease backwards compatibility, though the other
-  form is preferred.
-
-- tdb2's tdb_fetch() returns an error, tdb1's returned the data directly
-  (or tdb_null, and you were supposed to check tdb_error() to find out why).
-
-- tdb2's tdb_nextkey() frees the old key's dptr, in tdb2 you needed to do
-  this manually.
-
-- tdb1's tdb_open/tdb_open_ex took an explicit hash size.  tdb2's hash table
-  resizes as required.
-
-- tdb2 uses a linked list of attribute structures to implement logging and
-  alternate hashes.  tdb1 used tdb_open_ex, which was not extensible.
-
-- tdb2 does locking on read-only databases (ie. O_RDONLY passed to tdb_open).
-  tdb1 did not: use the TDB_NOLOCK flag if you want to suppress locking.
-
-- tdb2's log function is simpler than tdb1's log function.  The string is
-  already formatted, and it takes an enum tdb_log_level not a tdb_debug_level,
-  and which has only three values: TDB_LOG_ERROR, TDB_LOG_USE_ERROR and
-  TDB_LOG_WARNING.
-
-- tdb2 provides tdb_deq() for comparing two struct tdb_data.
-
-- tdb2's tdb_name() returns a copy of the name even for TDB_INTERNAL dbs.
-
-- tdb2 does not need tdb_reopen() or tdb_reopen_all().  If you call
-  fork() after during certain operations the child should close the
-  tdb, or complete the operations before continuing to use the tdb:
-
-       tdb_transaction_start(): child must tdb_transaction_cancel()
-       tdb_lockall(): child must call tdb_unlockall()
-       tdb_lockall_read(): child must call tdb_unlockall_read()
-       tdb_chainlock(): child must call tdb_chainunlock()
-       tdb_parse() callback: child must return from tdb_parse()
-
-- tdb2 will not open a non-tdb file, even if O_CREAT is specified.
-
-- There is no tdb_traverse_read.  For operating on TDB1 files, you can
-  simulate it by tdb_add_flag(tdb, TDB_RDONLY); tdb_traverse();
-  tdb_remove_flag(tdb, TDB_RDONLY).  This may be desirable because
-  traverse on TDB1 files use a write lock on the entire database
-  unless it's read-only.
-
-- Failure inside a transaction (such as a lock function failing) does
-  not implicitly cancel the transaction; you still need to call
-  tdb_transaction_cancel().
-
-TDB1 Compatibility:
-
-- tdb2's offers a tdb1_incompatible_hash function, which is the same
-  as the default hash with the TDB_INCOMPATIBLE_HASH flag.  There is
-  no way of marking an old TDB incompatible with versions < 1.2.6
-  while using any other hash.
-
-- The TDB_ATTRIBUTE_TDB1_HASHSIZE attribute can be used to control the
-  hash size, but only when creating (ie. O_CREAT) a TDB1
-  (ie. TDB_VERSION1).
-
-- There is no TDB_CLEAR_IF_FIRST flag; it has severe scalability and
-  API problems.  If necessary, you can emulate this by using the open
-  hook and placing a 1-byte lock at offset 4.  If your program forks,
-  you will need to place this lock again in the child.
diff --git a/ccan/tdb2/doc/design-1.3.txt b/ccan/tdb2/doc/design-1.3.txt
deleted file mode 100644 (file)
index 651ada0..0000000
+++ /dev/null
@@ -1,1050 +0,0 @@
-TDB2: A Redesigning The Trivial DataBase
-
-Rusty Russell, IBM Corporation
-
-27-April-2010
-
-Abstract
-
-The Trivial DataBase on-disk format is 32 bits; with usage cases 
-heading towards the 4G limit, that must change. This required 
-breakage provides an opportunity to revisit TDB's other design 
-decisions and reassess them.
-
-1 Introduction
-
-The Trivial DataBase was originally written by Andrew Tridgell as 
-a simple key/data pair storage system with the same API as dbm, 
-but allowing multiple readers and writers while being small 
-enough (< 1000 lines of C) to include in SAMBA. The simple design 
-created in 1999 has proven surprisingly robust and performant, 
-used in Samba versions 3 and 4 as well as numerous other 
-projects. Its useful life was greatly increased by the 
-(backwards-compatible!) addition of transaction support in 2005.
-
-The wider variety and greater demands of TDB-using code has lead 
-to some organic growth of the API, as well as some compromises on 
-the implementation. None of these, by themselves, are seen as 
-show-stoppers, but the cumulative effect is to a loss of elegance 
-over the initial, simple TDB implementation. Here is a table of 
-the approximate number of lines of implementation code and number 
-of API functions at the end of each year:
-
-
-+-----------+----------------+--------------------------------+
-| Year End  | API Functions  | Lines of C Code Implementation |
-+-----------+----------------+--------------------------------+
-+-----------+----------------+--------------------------------+
-|   1999    |      13        |              1195              |
-+-----------+----------------+--------------------------------+
-|   2000    |      24        |              1725              |
-+-----------+----------------+--------------------------------+
-|   2001    |      32        |              2228              |
-+-----------+----------------+--------------------------------+
-|   2002    |      35        |              2481              |
-+-----------+----------------+--------------------------------+
-|   2003    |      35        |              2552              |
-+-----------+----------------+--------------------------------+
-|   2004    |      40        |              2584              |
-+-----------+----------------+--------------------------------+
-|   2005    |      38        |              2647              |
-+-----------+----------------+--------------------------------+
-|   2006    |      52        |              3754              |
-+-----------+----------------+--------------------------------+
-|   2007    |      66        |              4398              |
-+-----------+----------------+--------------------------------+
-|   2008    |      71        |              4768              |
-+-----------+----------------+--------------------------------+
-|   2009    |      73        |              5715              |
-+-----------+----------------+--------------------------------+
-
-
-This review is an attempt to catalog and address all the known 
-issues with TDB and create solutions which address the problems 
-without significantly increasing complexity; all involved are far 
-too aware of the dangers of second system syndrome in rewriting a 
-successful project like this.
-
-2 API Issues
-
-2.1 tdb_open_ex Is Not Expandable
-
-The tdb_open() call was expanded to tdb_open_ex(), which added an 
-optional hashing function and an optional logging function 
-argument. Additional arguments to open would require the 
-introduction of a tdb_open_ex2 call etc.
-
-2.1.1 Proposed Solution
-
-tdb_open() will take a linked-list of attributes:
-
-enum tdb_attribute {
-
-    TDB_ATTRIBUTE_LOG = 0,
-
-    TDB_ATTRIBUTE_HASH = 1
-
-};
-
-struct tdb_attribute_base {
-
-    enum tdb_attribute attr;
-
-    union tdb_attribute *next;
-
-};
-
-struct tdb_attribute_log {
-
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG 
-*/
-
-    tdb_log_func log_fn;
-
-    void *log_private;
-
-};
-
-struct tdb_attribute_hash {
-
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH 
-*/
-
-    tdb_hash_func hash_fn;
-
-    void *hash_private;
-
-};
-
-union tdb_attribute {
-
-    struct tdb_attribute_base base;
-
-    struct tdb_attribute_log log;
-
-    struct tdb_attribute_hash hash;
-
-};
-
-This allows future attributes to be added, even if this expands 
-the size of the union.
-
-2.2 tdb_traverse Makes Impossible Guarantees
-
-tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, 
-and it was thought that it was important to guarantee that all 
-records which exist at the start and end of the traversal would 
-be included, and no record would be included twice.
-
-This adds complexity (see[Reliable-Traversal-Adds]) and does not 
-work anyway for records which are altered (in particular, those 
-which are expanded may be effectively deleted and re-added behind 
-the traversal).
-
-2.2.1 <traverse-Proposed-Solution>Proposed Solution
-
-Abandon the guarantee. You will see every record if no changes 
-occur during your traversal, otherwise you will see some subset. 
-You can prevent changes by using a transaction or the locking 
-API.
-
-2.3 Nesting of Transactions Is Fraught
-
-TDB has alternated between allowing nested transactions and not 
-allowing them. Various paths in the Samba codebase assume that 
-transactions will nest, and in a sense they can: the operation is 
-only committed to disk when the outer transaction is committed. 
-There are two problems, however:
-
-1. Canceling the inner transaction will cause the outer 
-  transaction commit to fail, and will not undo any operations 
-  since the inner transaction began. This problem is soluble with 
-  some additional internal code.
-
-2. An inner transaction commit can be cancelled by the outer 
-  transaction. This is desirable in the way which Samba's 
-  database initialization code uses transactions, but could be a 
-  surprise to any users expecting a successful transaction commit 
-  to expose changes to others.
-
-The current solution is to specify the behavior at tdb_open(), 
-with the default currently that nested transactions are allowed. 
-This flag can also be changed at runtime.
-
-2.3.1 Proposed Solution
-
-Given the usage patterns, it seems that the “least-surprise” 
-behavior of disallowing nested transactions should become the 
-default. Additionally, it seems the outer transaction is the only 
-code which knows whether inner transactions should be allowed, so 
-a flag to indicate this could be added to tdb_transaction_start. 
-However, this behavior can be simulated with a wrapper which uses 
-tdb_add_flags() and tdb_remove_flags(), so the API should not be 
-expanded for this relatively-obscure case.
-
-2.4 Incorrect Hash Function is Not Detected
-
-tdb_open_ex() allows the calling code to specify a different hash 
-function to use, but does not check that all other processes 
-accessing this tdb are using the same hash function. The result 
-is that records are missing from tdb_fetch().
-
-2.4.1 Proposed Solution
-
-The header should contain an example hash result (eg. the hash of 
-0xdeadbeef), and tdb_open_ex() should check that the given hash 
-function produces the same answer, or fail the tdb_open call.
-
-2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation
-
-In response to scalability issues with the free list ([TDB-Freelist-Is]
-) two API workarounds have been incorporated in TDB: 
-tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The 
-latter actually calls the former with an argument of “5”.
-
-This code allows deleted records to accumulate without putting 
-them in the free list. On delete we iterate through each chain 
-and free them in a batch if there are more than max_dead entries. 
-These are never otherwise recycled except as a side-effect of a 
-tdb_repack.
-
-2.5.1 Proposed Solution
-
-With the scalability problems of the freelist solved, this API 
-can be removed. The TDB_VOLATILE flag may still be useful as a 
-hint that store and delete of records will be at least as common 
-as fetch in order to allow some internal tuning, but initially 
-will become a no-op.
-
-2.6 <TDB-Files-Cannot>TDB Files Cannot Be Opened Multiple Times 
-  In The Same Process
-
-No process can open the same TDB twice; we check and disallow it. 
-This is an unfortunate side-effect of fcntl locks, which operate 
-on a per-file rather than per-file-descriptor basis, and do not 
-nest. Thus, closing any file descriptor on a file clears all the 
-locks obtained by this process, even if they were placed using a 
-different file descriptor!
-
-Note that even if this were solved, deadlock could occur if 
-operations were nested: this is a more manageable programming 
-error in most cases.
-
-2.6.1 Proposed Solution
-
-We could lobby POSIX to fix the perverse rules, or at least lobby 
-Linux to violate them so that the most common implementation does 
-not have this restriction. This would be a generally good idea 
-for other fcntl lock users.
-
-Samba uses a wrapper which hands out the same tdb_context to 
-multiple callers if this happens, and does simple reference 
-counting. We should do this inside the tdb library, which already 
-emulates lock nesting internally; it would need to recognize when 
-deadlock occurs within a single process. This would create a new 
-failure mode for tdb operations (while we currently handle 
-locking failures, they are impossible in normal use and a process 
-encountering them can do little but give up).
-
-I do not see benefit in an additional tdb_open flag to indicate 
-whether re-opening is allowed, as though there may be some 
-benefit to adding a call to detect when a tdb_context is shared, 
-to allow other to create such an API.
-
-2.7 TDB API Is Not POSIX Thread-safe
-
-The TDB API uses an error code which can be queried after an 
-operation to determine what went wrong. This programming model 
-does not work with threads, unless specific additional guarantees 
-are given by the implementation. In addition, even 
-otherwise-independent threads cannot open the same TDB (as in [TDB-Files-Cannot]
-).
-
-2.7.1 Proposed Solution
-
-Reachitecting the API to include a tdb_errcode pointer would be a 
-great deal of churn; we are better to guarantee that the 
-tdb_errcode is per-thread so the current programming model can be 
-maintained.
-
-This requires dynamic per-thread allocations, which is awkward 
-with POSIX threads (pthread_key_create space is limited and we 
-cannot simply allocate a key for every TDB).
-
-Internal locking is required to make sure that fcntl locks do not 
-overlap between threads, and also that the global list of tdbs is 
-maintained.
-
-The aim is that building tdb with -DTDB_PTHREAD will result in a 
-pthread-safe version of the library, and otherwise no overhead 
-will exist.
-
-2.8 *_nonblock Functions And *_mark Functions Expose 
-  Implementation
-
-CTDB[footnote:
-Clustered TDB, see http://ctdb.samba.org
-] wishes to operate on TDB in a non-blocking manner. This is 
-currently done as follows:
-
-1. Call the _nonblock variant of an API function (eg. 
-  tdb_lockall_nonblock). If this fails:
-
-2. Fork a child process, and wait for it to call the normal 
-  variant (eg. tdb_lockall).
-
-3. If the child succeeds, call the _mark variant to indicate we 
-  already have the locks (eg. tdb_lockall_mark).
-
-4. Upon completion, tell the child to release the locks (eg. 
-  tdb_unlockall).
-
-5. Indicate to tdb that it should consider the locks removed (eg. 
-  tdb_unlockall_mark).
-
-There are several issues with this approach. Firstly, adding two 
-new variants of each function clutters the API for an obscure 
-use, and so not all functions have three variants. Secondly, it 
-assumes that all paths of the functions ask for the same locks, 
-otherwise the parent process will have to get a lock which the 
-child doesn't have under some circumstances. I don't believe this 
-is currently the case, but it constrains the implementation. 
-
-2.8.1 <Proposed-Solution-locking-hook>Proposed Solution
-
-Implement a hook for locking methods, so that the caller can 
-control the calls to create and remove fcntl locks. In this 
-scenario, ctdbd would operate as follows:
-
-1. Call the normal API function, eg tdb_lockall().
-
-2. When the lock callback comes in, check if the child has the 
-  lock. Initially, this is always false. If so, return 0. 
-  Otherwise, try to obtain it in non-blocking mode. If that 
-  fails, return EWOULDBLOCK.
-
-3. Release locks in the unlock callback as normal.
-
-4. If tdb_lockall() fails, see if we recorded a lock failure; if 
-  so, call the child to repeat the operation.
-
-5. The child records what locks it obtains, and returns that 
-  information to the parent.
-
-6. When the child has succeeded, goto 1.
-
-This is flexible enough to handle any potential locking scenario, 
-even when lock requirements change. It can be optimized so that 
-the parent does not release locks, just tells the child which 
-locks it doesn't need to obtain.
-
-It also keeps the complexity out of the API, and in ctdbd where 
-it is needed.
-
-2.9 tdb_chainlock Functions Expose Implementation
-
-tdb_chainlock locks some number of records, including the record 
-indicated by the given key. This gave atomicity guarantees; 
-no-one can start a transaction, alter, read or delete that key 
-while the lock is held.
-
-It also makes the same guarantee for any other key in the chain, 
-which is an internal implementation detail and potentially a 
-cause for deadlock.
-
-2.9.1 Proposed Solution
-
-None. It would be nice to have an explicit single entry lock 
-which effected no other keys. Unfortunately, this won't work for 
-an entry which doesn't exist. Thus while chainlock may be 
-implemented more efficiently for the existing case, it will still 
-have overlap issues with the non-existing case. So it is best to 
-keep the current (lack of) guarantee about which records will be 
-effected to avoid constraining our implementation.
-
-2.10 Signal Handling is Not Race-Free
-
-The tdb_setalarm_sigptr() call allows the caller's signal handler 
-to indicate that the tdb locking code should return with a 
-failure, rather than trying again when a signal is received (and 
-errno == EAGAIN). This is usually used to implement timeouts.
-
-Unfortunately, this does not work in the case where the signal is 
-received before the tdb code enters the fcntl() call to place the 
-lock: the code will sleep within the fcntl() code, unaware that 
-the signal wants it to exit. In the case of long timeouts, this 
-does not happen in practice.
-
-2.10.1 Proposed Solution
-
-The locking hooks proposed in[Proposed-Solution-locking-hook] 
-would allow the user to decide on whether to fail the lock 
-acquisition on a signal. This allows the caller to choose their 
-own compromise: they could narrow the race by checking 
-immediately before the fcntl call.[footnote:
-It may be possible to make this race-free in some implementations 
-by having the signal handler alter the struct flock to make it 
-invalid. This will cause the fcntl() lock call to fail with 
-EINVAL if the signal occurs before the kernel is entered, 
-otherwise EAGAIN.
-]
-
-2.11 The API Uses Gratuitous Typedefs, Capitals
-
-typedefs are useful for providing source compatibility when types 
-can differ across implementations, or arguably in the case of 
-function pointer definitions which are hard for humans to parse. 
-Otherwise it is simply obfuscation and pollutes the namespace.
-
-Capitalization is usually reserved for compile-time constants and 
-macros.
-
-  TDB_CONTEXT There is no reason to use this over 'struct 
-  tdb_context'; the definition isn't visible to the API user 
-  anyway.
-
-  TDB_DATA There is no reason to use this over struct TDB_DATA; 
-  the struct needs to be understood by the API user.
-
-  struct TDB_DATA This would normally be called 'struct 
-  tdb_data'.
-
-  enum TDB_ERROR Similarly, this would normally be enum 
-  tdb_error.
-
-2.11.1 Proposed Solution
-
-None. Introducing lower case variants would please pedants like 
-myself, but if it were done the existing ones should be kept. 
-There is little point forcing a purely cosmetic change upon tdb 
-users.
-
-2.12 <tdb_log_func-Doesnt-Take>tdb_log_func Doesn't Take The 
-  Private Pointer
-
-For API compatibility reasons, the logging function needs to call 
-tdb_get_logging_private() to retrieve the pointer registered by 
-the tdb_open_ex for logging.
-
-2.12.1 Proposed Solution
-
-It should simply take an extra argument, since we are prepared to 
-break the API/ABI.
-
-2.13 Various Callback Functions Are Not Typesafe
-
-The callback functions in tdb_set_logging_function (after [tdb_log_func-Doesnt-Take]
- is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read 
-and tdb_check all take void * and must internally convert it to 
-the argument type they were expecting.
-
-If this type changes, the compiler will not produce warnings on 
-the callers, since it only sees void *.
-
-2.13.1 Proposed Solution
-
-With careful use of macros, we can create callback functions 
-which give a warning when used on gcc and the types of the 
-callback and its private argument differ. Unsupported compilers 
-will not give a warning, which is no worse than now. In addition, 
-the callbacks become clearer, as they need not use void * for 
-their parameter.
-
-See CCAN's typesafe_cb module at 
-http://ccan.ozlabs.org/info/typesafe_cb.html
-
-2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, 
-  tdb_reopen_all Problematic
-
-The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB 
-file should be cleared if the caller discovers it is the only 
-process with the TDB open. However, if any caller does not 
-specify TDB_CLEAR_IF_FIRST it will not be detected, so will have 
-the TDB erased underneath them (usually resulting in a crash).
-
-There is a similar issue on fork(); if the parent exits (or 
-otherwise closes the tdb) before the child calls tdb_reopen_all() 
-to establish the lock used to indicate the TDB is opened by 
-someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe 
-it alone has opened the TDB and will erase it.
-
-2.14.1 Proposed Solution
-
-Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but 
-see [TDB_CLEAR_IF_FIRST-Imposes-Performance].
-
-3 Performance And Scalability Issues
-
-3.1 <TDB_CLEAR_IF_FIRST-Imposes-Performance>TDB_CLEAR_IF_FIRST 
-  Imposes Performance Penalty
-
-When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is 
-placed at offset 4 (aka. the ACTIVE_LOCK). While these locks 
-never conflict in normal tdb usage, they do add substantial 
-overhead for most fcntl lock implementations when the kernel 
-scans to detect if a lock conflict exists. This is often a single 
-linked list, making the time to acquire and release a fcntl lock 
-O(N) where N is the number of processes with the TDB open, not 
-the number actually doing work.
-
-In a Samba server it is common to have huge numbers of clients 
-sitting idle, and thus they have weaned themselves off the 
-TDB_CLEAR_IF_FIRST flag.[footnote:
-There is a flag to tdb_reopen_all() which is used for this 
-optimization: if the parent process will outlive the child, the 
-child does not need the ACTIVE_LOCK. This is a workaround for 
-this very performance issue.
-]
-
-3.1.1 Proposed Solution
-
-Remove the flag. It was a neat idea, but even trivial servers 
-tend to know when they are initializing for the first time and 
-can simply unlink the old tdb at that point.
-
-3.2 TDB Files Have a 4G Limit
-
-This seems to be becoming an issue (so much for “trivial”!), 
-particularly for ldb.
-
-3.2.1 Proposed Solution
-
-A new, incompatible TDB format which uses 64 bit offsets 
-internally rather than 32 bit as now. For simplicity of endian 
-conversion (which TDB does on the fly if required), all values 
-will be 64 bit on disk. In practice, some upper bits may be used 
-for other purposes, but at least 56 bits will be available for 
-file offsets.
-
-tdb_open() will automatically detect the old version, and even 
-create them if TDB_VERSION6 is specified to tdb_open.
-
-32 bit processes will still be able to access TDBs larger than 4G 
-(assuming that their off_t allows them to seek to 64 bits), they 
-will gracefully fall back as they fail to mmap. This can happen 
-already with large TDBs.
-
-Old versions of tdb will fail to open the new TDB files (since 28 
-August 2009, commit 398d0c29290: prior to that any unrecognized 
-file format would be erased and initialized as a fresh tdb!)
-
-3.3 TDB Records Have a 4G Limit
-
-This has not been a reported problem, and the API uses size_t 
-which can be 64 bit on 64 bit platforms. However, other limits 
-may have made such an issue moot.
-
-3.3.1 Proposed Solution
-
-Record sizes will be 64 bit, with an error returned on 32 bit 
-platforms which try to access such records (the current 
-implementation would return TDB_ERR_OOM in a similar case). It 
-seems unlikely that 32 bit keys will be a limitation, so the 
-implementation may not support this (see [sub:Records-Incur-A]).
-
-3.4 Hash Size Is Determined At TDB Creation Time
-
-TDB contains a number of hash chains in the header; the number is 
-specified at creation time, and defaults to 131. This is such a 
-bottleneck on large databases (as each hash chain gets quite 
-long), that LDB uses 10,000 for this hash. In general it is 
-impossible to know what the 'right' answer is at database 
-creation time.
-
-3.4.1 Proposed Solution
-
-After comprehensive performance testing on various scalable hash 
-variants[footnote:
-http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 
-This was annoying because I was previously convinced that an 
-expanding tree of hashes would be very close to optimal.
-], it became clear that it is hard to beat a straight linear hash 
-table which doubles in size when it reaches saturation. There are 
-three details which become important:
-
-1. On encountering a full bucket, we use the next bucket.
-
-2. Extra hash bits are stored with the offset, to reduce 
-  comparisons.
-
-3. A marker entry is used on deleting an entry.
-
-The doubling of the table must be done under a transaction; we 
-will not reduce it on deletion, so it will be an unusual case. It 
-will either be placed at the head (other entries will be moved 
-out the way so we can expand). We could have a pointer in the 
-header to the current hashtable location, but that pointer would 
-have to be read frequently to check for hashtable moves.
-
-The locking for this is slightly more complex than the chained 
-case; we currently have one lock per bucket, and that means we 
-would need to expand the lock if we overflow to the next bucket. 
-The frequency of such collisions will effect our locking 
-heuristics: we can always lock more buckets than we need.
-
-One possible optimization is to only re-check the hash size on an 
-insert or a lookup miss.
-
-3.5 <TDB-Freelist-Is>TDB Freelist Is Highly Contended
-
-TDB uses a single linked list for the free list. Allocation 
-occurs as follows, using heuristics which have evolved over time:
-
-1. Get the free list lock for this whole operation.
-
-2. Multiply length by 1.25, so we always over-allocate by 25%.
-
-3. Set the slack multiplier to 1.
-
-4. Examine the current freelist entry: if it is > length but < 
-  the current best case, remember it as the best case.
-
-5. Multiply the slack multiplier by 1.05.
-
-6. If our best fit so far is less than length * slack multiplier, 
-  return it. The slack will be turned into a new free record if 
-  it's large enough.
-
-7. Otherwise, go onto the next freelist entry.
-
-Deleting a record occurs as follows:
-
-1. Lock the hash chain for this whole operation.
-
-2. Walk the chain to find the record, keeping the prev pointer 
-  offset.
-
-3. If max_dead is non-zero:
-
-  (a) Walk the hash chain again and count the dead records.
-
-  (b) If it's more than max_dead, bulk free all the dead ones 
-    (similar to steps 4 and below, but the lock is only obtained 
-    once).
-
-  (c) Simply mark this record as dead and return. 
-
-4. Get the free list lock for the remainder of this operation.
-
-5. <right-merging>Examine the following block to see if it is 
-  free; if so, enlarge the current block and remove that block 
-  from the free list. This was disabled, as removal from the free 
-  list was O(entries-in-free-list).
-
-6. Examine the preceeding block to see if it is free: for this 
-  reason, each block has a 32-bit tailer which indicates its 
-  length. If it is free, expand it to cover our new block and 
-  return.
-
-7. Otherwise, prepend ourselves to the free list.
-
-Disabling right-merging (step [right-merging]) causes 
-fragmentation; the other heuristics proved insufficient to 
-address this, so the final answer to this was that when we expand 
-the TDB file inside a transaction commit, we repack the entire 
-tdb.
-
-The single list lock limits our allocation rate; due to the other 
-issues this is not currently seen as a bottleneck.
-
-3.5.1 Proposed Solution
-
-The first step is to remove all the current heuristics, as they 
-obviously interact, then examine them once the lock contention is 
-addressed.
-
-The free list must be split to reduce contention. Assuming 
-perfect free merging, we can at most have 1 free list entry for 
-each entry. This implies that the number of free lists is related 
-to the size of the hash table, but as it is rare to walk a large 
-number of free list entries we can use far fewer, say 1/32 of the 
-number of hash buckets.
-
-There are various benefits in using per-size free lists (see [sub:TDB-Becomes-Fragmented]
-) but it's not clear this would reduce contention in the common 
-case where all processes are allocating/freeing the same size. 
-Thus we almost certainly need to divide in other ways: the most 
-obvious is to divide the file into zones, and using a free list 
-(or set of free lists) for each. This approximates address 
-ordering.
-
-Note that this means we need to split the free lists when we 
-expand the file; this is probably acceptable when we double the 
-hash table size, since that is such an expensive operation 
-already. In the case of increasing the file size, there is an 
-optimization we can use: if we use M in the formula above as the 
-file size rounded up to the next power of 2, we only need 
-reshuffle free lists when the file size crosses a power of 2 
-boundary, and reshuffling the free lists is trivial: we simply 
-merge every consecutive pair of free lists.
-
-The basic algorithm is as follows. Freeing is simple:
-
-1. Identify the correct zone.
-
-2. Lock the corresponding list.
-
-3. Re-check the zone (we didn't have a lock, sizes could have 
-  changed): relock if necessary.
-
-4. Place the freed entry in the list for that zone.
-
-Allocation is a little more complicated, as we perform delayed 
-coalescing at this point:
-
-1. Pick a zone either the zone we last freed into, or based on a “
-  random” number.
-
-2. Lock the corresponding list.
-
-3. Re-check the zone: relock if necessary.
-
-4. If the top entry is -large enough, remove it from the list and 
-  return it.
-
-5. Otherwise, coalesce entries in the list.
-
-  (a) 
-
-  (b) 
-
-  (c) 
-
-  (d) 
-
-6. If there was no entry large enough, unlock the list and try 
-  the next zone.
-
-7. 
-
-8. 
-
-9. If no zone satisfies, expand the file.
-
-This optimizes rapid insert/delete of free list entries by not 
-coalescing them all the time.. First-fit address ordering 
-ordering seems to be fairly good for keeping fragmentation low 
-(see [sub:TDB-Becomes-Fragmented]). Note that address ordering 
-does not need a tailer to coalesce, though if we needed one we 
-could have one cheaply: see [sub:Records-Incur-A]. 
-
-
-
-I anticipate that the number of entries in each free zone would 
-be small, but it might be worth using one free entry to hold 
-pointers to the others for cache efficiency.
-
-3.6 <sub:TDB-Becomes-Fragmented>TDB Becomes Fragmented
-
-Much of this is a result of allocation strategy[footnote:
-The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 
-ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps
-] and deliberate hobbling of coalescing; internal fragmentation 
-(aka overallocation) is deliberately set at 25%, and external 
-fragmentation is only cured by the decision to repack the entire 
-db when a transaction commit needs to enlarge the file.
-
-3.6.1 Proposed Solution
-
-The 25% overhead on allocation works in practice for ldb because 
-indexes tend to expand by one record at a time. This internal 
-fragmentation can be resolved by having an “expanded” bit in the 
-header to note entries that have previously expanded, and 
-allocating more space for them.
-
-There are is a spectrum of possible solutions for external 
-fragmentation: one is to use a fragmentation-avoiding allocation 
-strategy such as best-fit address-order allocator. The other end 
-of the spectrum would be to use a bump allocator (very fast and 
-simple) and simply repack the file when we reach the end.
-
-There are three problems with efficient fragmentation-avoiding 
-allocators: they are non-trivial, they tend to use a single free 
-list for each size, and there's no evidence that tdb allocation 
-patterns will match those recorded for general allocators (though 
-it seems likely).
-
-Thus we don't spend too much effort on external fragmentation; we 
-will be no worse than the current code if we need to repack on 
-occasion. More effort is spent on reducing freelist contention, 
-and reducing overhead.
-
-3.7 <sub:Records-Incur-A>Records Incur A 28-Byte Overhead
-
-Each TDB record has a header as follows:
-
-struct tdb_record {
-
-        tdb_off_t next; /* offset of the next record in the list 
-*/
-
-        tdb_len_t rec_len; /* total byte length of record */
-
-        tdb_len_t key_len; /* byte length of key */
-
-        tdb_len_t data_len; /* byte length of data */
-
-        uint32_t full_hash; /* the full 32 bit hash of the key */
-
-        uint32_t magic;   /* try to catch errors */
-
-        /* the following union is implied:
-
-                union {
-
-                        char record[rec_len];
-
-                        struct {
-
-                                char key[key_len];
-
-                                char data[data_len];
-
-                        }
-
-                        uint32_t totalsize; (tailer)
-
-                }
-
-        */
-
-};
-
-Naively, this would double to a 56-byte overhead on a 64 bit 
-implementation.
-
-3.7.1 Proposed Solution
-
-We can use various techniques to reduce this for an allocated 
-block:
-
-1. The 'next' pointer is not required, as we are using a flat 
-  hash table.
-
-2. 'rec_len' can instead be expressed as an addition to key_len 
-  and data_len (it accounts for wasted or overallocated length in 
-  the record). Since the record length is always a multiple of 8, 
-  we can conveniently fit it in 32 bits (representing up to 35 
-  bits).
-
-3. 'key_len' and 'data_len' can be reduced. I'm unwilling to 
-  restrict 'data_len' to 32 bits, but instead we can combine the 
-  two into one 64-bit field and using a 5 bit value which 
-  indicates at what bit to divide the two. Keys are unlikely to 
-  scale as fast as data, so I'm assuming a maximum key size of 32 
-  bits.
-
-4. 'full_hash' is used to avoid a memcmp on the “miss” case, but 
-  this is diminishing returns after a handful of bits (at 10 
-  bits, it reduces 99.9% of false memcmp). As an aside, as the 
-  lower bits are already incorporated in the hash table 
-  resolution, the upper bits should be used here.
-
-5. 'magic' does not need to be enlarged: it currently reflects 
-  one of 5 values (used, free, dead, recovery, and 
-  unused_recovery). It is useful for quick sanity checking 
-  however, and should not be eliminated.
-
-6. 'tailer' is only used to coalesce free blocks (so a block to 
-  the right can find the header to check if this block is free). 
-  This can be replaced by a single 'free' bit in the header of 
-  the following block (and the tailer only exists in free 
-  blocks).[footnote:
-This technique from Thomas Standish. Data Structure Techniques. 
-Addison-Wesley, Reading, Massachusetts, 1980.
-] The current proposed coalescing algorithm doesn't need this, 
-  however.
-
-This produces a 16 byte used header like this:
-
-struct tdb_used_record {
-
-        uint32_t magic : 16,
-
-                 prev_is_free: 1,
-
-                 key_data_divide: 5,
-
-                 top_hash: 10;
-
-        uint32_t extra_octets;
-
-        uint64_t key_and_data_len;
-
-};
-
-And a free record like this:
-
-struct tdb_free_record {
-
-        uint32_t free_magic;
-
-        uint64_t total_length;
-
-        ...
-
-        uint64_t tailer;
-
-};
-
-
-
-3.8 Transaction Commit Requires 4 fdatasync
-
-The current transaction algorithm is:
-
-1. write_recovery_data();
-
-2. sync();
-
-3. write_recovery_header();
-
-4. sync();
-
-5. overwrite_with_new_data();
-
-6. sync();
-
-7. remove_recovery_header();
-
-8. sync(); 
-
-On current ext3, each sync flushes all data to disk, so the next 
-3 syncs are relatively expensive. But this could become a 
-performance bottleneck on other filesystems such as ext4.
-
-3.8.1 Proposed Solution
-
-
-
-
-
-
-
-
-
-Neil Brown points out that this is overzealous, and only one sync 
-is needed:
-
-1. Bundle the recovery data, a transaction counter and a strong 
-  checksum of the new data.
-
-2. Strong checksum that whole bundle.
-
-3. Store the bundle in the database.
-
-4. Overwrite the oldest of the two recovery pointers in the 
-  header (identified using the transaction counter) with the 
-  offset of this bundle.
-
-5. sync.
-
-6. Write the new data to the file.
-
-Checking for recovery means identifying the latest bundle with a 
-valid checksum and using the new data checksum to ensure that it 
-has been applied. This is more expensive than the current check, 
-but need only be done at open. For running databases, a separate 
-header field can be used to indicate a transaction in progress; 
-we need only check for recovery if this is set.
-
-3.9 TDB Does Not Have Snapshot Support
-
-3.9.1 Proposed Solution
-
-None. At some point you say “use a real database”.
-
-But as a thought experiment, if we implemented transactions to 
-only overwrite free entries (this is tricky: there must not be a 
-header in each entry which indicates whether it is free, but use 
-of presence in metadata elsewhere), and a pointer to the hash 
-table, we could create an entirely new commit without destroying 
-existing data. Then it would be easy to implement snapshots in a 
-similar way.
-
-This would not allow arbitrary changes to the database, such as 
-tdb_repack does, and would require more space (since we have to 
-preserve the current and future entries at once). If we used hash 
-trees rather than one big hash table, we might only have to 
-rewrite some sections of the hash, too.
-
-We could then implement snapshots using a similar method, using 
-multiple different hash tables/free tables.
-
-3.10 Transactions Cannot Operate in Parallel
-
-This would be useless for ldb, as it hits the index records with 
-just about every update. It would add significant complexity in 
-resolving clashes, and cause the all transaction callers to write 
-their code to loop in the case where the transactions spuriously 
-failed.
-
-3.10.1 Proposed Solution
-
-We could solve a small part of the problem by providing read-only 
-transactions. These would allow one write transaction to begin, 
-but it could not commit until all r/o transactions are done. This 
-would require a new RO_TRANSACTION_LOCK, which would be upgraded 
-on commit.
-
-3.11 Default Hash Function Is Suboptimal
-
-The Knuth-inspired multiplicative hash used by tdb is fairly slow 
-(especially if we expand it to 64 bits), and works best when the 
-hash bucket size is a prime number (which also means a slow 
-modulus). In addition, it is highly predictable which could 
-potentially lead to a Denial of Service attack in some TDB uses.
-
-3.11.1 Proposed Solution
-
-The Jenkins lookup3 hash[footnote:
-http://burtleburtle.net/bob/c/lookup3.c
-] is a fast and superbly-mixing hash. It's used by the Linux 
-kernel and almost everything else. This has the particular 
-properties that it takes an initial seed, and produces two 32 bit 
-hash numbers, which we can combine into a 64-bit hash.
-
-The seed should be created at tdb-creation time from some random 
-source, and placed in the header. This is far from foolproof, but 
-adds a little bit of protection against hash bombing.
-
-3.12 <Reliable-Traversal-Adds>Reliable Traversal Adds Complexity
-
-We lock a record during traversal iteration, and try to grab that 
-lock in the delete code. If that grab on delete fails, we simply 
-mark it deleted and continue onwards; traversal checks for this 
-condition and does the delete when it moves off the record.
-
-If traversal terminates, the dead record may be left 
-indefinitely.
-
-3.12.1 Proposed Solution
-
-Remove reliability guarantees; see [traverse-Proposed-Solution].
-
-3.13 Fcntl Locking Adds Overhead
-
-Placing a fcntl lock means a system call, as does removing one. 
-This is actually one reason why transactions can be faster 
-(everything is locked once at transaction start). In the 
-uncontended case, this overhead can theoretically be eliminated.
-
-3.13.1 Proposed Solution
-
-None.
-
-We tried this before with spinlock support, in the early days of 
-TDB, and it didn't make much difference except in manufactured 
-benchmarks.
-
-We could use spinlocks (with futex kernel support under Linux), 
-but it means that we lose automatic cleanup when a process dies 
-with a lock. There is a method of auto-cleanup under Linux, but 
-it's not supported by other operating systems. We could 
-reintroduce a clear-if-first-style lock and sweep for dead 
-futexes on open, but that wouldn't help the normal case of one 
-concurrent opener dying. Increasingly elaborate repair schemes 
-could be considered, but they require an ABI change (everyone 
-must use them) anyway, so there's no need to do this at the same 
-time as everything else.
-
diff --git a/ccan/tdb2/doc/design.lyx b/ccan/tdb2/doc/design.lyx
deleted file mode 100644 (file)
index ba3f9cc..0000000
+++ /dev/null
@@ -1,2689 +0,0 @@
-#LyX 1.6.7 created this file. For more info see http://www.lyx.org/
-\lyxformat 345
-\begin_document
-\begin_header
-\textclass article
-\use_default_options true
-\language english
-\inputencoding auto
-\font_roman default
-\font_sans default
-\font_typewriter default
-\font_default_family default
-\font_sc false
-\font_osf false
-\font_sf_scale 100
-\font_tt_scale 100
-
-\graphics default
-\paperfontsize default
-\use_hyperref false
-\papersize default
-\use_geometry false
-\use_amsmath 1
-\use_esint 1
-\cite_engine basic
-\use_bibtopic false
-\paperorientation portrait
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation indent
-\defskip medskip
-\quotes_language english
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tracking_changes true
-\output_changes true
-\author "" 
-\author "" 
-\end_header
-
-\begin_body
-
-\begin_layout Title
-TDB2: A Redesigning The Trivial DataBase
-\end_layout
-
-\begin_layout Author
-Rusty Russell, IBM Corporation
-\end_layout
-
-\begin_layout Date
-17-March-2011
-\end_layout
-
-\begin_layout Abstract
-The Trivial DataBase on-disk format is 32 bits; with usage cases heading
- towards the 4G limit, that must change.
- This required breakage provides an opportunity to revisit TDB's other design
- decisions and reassess them.
-\end_layout
-
-\begin_layout Section
-Introduction
-\end_layout
-
-\begin_layout Standard
-The Trivial DataBase was originally written by Andrew Tridgell as a simple
- key/data pair storage system with the same API as dbm, but allowing multiple
- readers and writers while being small enough (< 1000 lines of C) to include
- in SAMBA.
- The simple design created in 1999 has proven surprisingly robust and performant
-, used in Samba versions 3 and 4 as well as numerous other projects.
- Its useful life was greatly increased by the (backwards-compatible!) addition
- of transaction support in 2005.
-\end_layout
-
-\begin_layout Standard
-The wider variety and greater demands of TDB-using code has lead to some
- organic growth of the API, as well as some compromises on the implementation.
- None of these, by themselves, are seen as show-stoppers, but the cumulative
- effect is to a loss of elegance over the initial, simple TDB implementation.
- Here is a table of the approximate number of lines of implementation code
- and number of API functions at the end of each year:
-\end_layout
-
-\begin_layout Standard
-\begin_inset Tabular
-<lyxtabular version="3" rows="12" columns="3">
-<features>
-<column alignment="center" valignment="top" width="0">
-<column alignment="center" valignment="top" width="0">
-<column alignment="center" valignment="top" width="0">
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-Year End
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-API Functions
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-Lines of C Code Implementation
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1999
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-13
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1195
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2000
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-24
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1725
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2001
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-32
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2228
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2002
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-35
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2481
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2003
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-35
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2552
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2004
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-40
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2584
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2005
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-38
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2647
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2006
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-52
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-3754
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2007
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-66
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-4398
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2008
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-71
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-4768
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2009
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-73
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-5715
-\end_layout
-
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-This review is an attempt to catalog and address all the known issues with
- TDB and create solutions which address the problems without significantly
- increasing complexity; all involved are far too aware of the dangers of
- second system syndrome in rewriting a successful project like this.
-\end_layout
-
-\begin_layout Section
-API Issues
-\end_layout
-
-\begin_layout Subsection
-tdb_open_ex Is Not Expandable
-\end_layout
-
-\begin_layout Standard
-The tdb_open() call was expanded to tdb_open_ex(), which added an optional
- hashing function and an optional logging function argument.
- Additional arguments to open would require the introduction of a tdb_open_ex2
- call etc.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\begin_inset CommandInset label
-LatexCommand label
-name "attributes"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-tdb_open() will take a linked-list of attributes:
-\end_layout
-
-\begin_layout LyX-Code
-enum tdb_attribute {
-\end_layout
-
-\begin_layout LyX-Code
-    TDB_ATTRIBUTE_LOG = 0,
-\end_layout
-
-\begin_layout LyX-Code
-    TDB_ATTRIBUTE_HASH = 1
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_base {
-\end_layout
-
-\begin_layout LyX-Code
-    enum tdb_attribute attr;
-\end_layout
-
-\begin_layout LyX-Code
-    union tdb_attribute *next;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_log {
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
-\end_layout
-
-\begin_layout LyX-Code
-    tdb_log_func log_fn;
-\end_layout
-
-\begin_layout LyX-Code
-    void *log_private;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_hash {
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
-\end_layout
-
-\begin_layout LyX-Code
-    tdb_hash_func hash_fn;
-\end_layout
-
-\begin_layout LyX-Code
-    void *hash_private;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-union tdb_attribute {
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_base base;
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_log log;
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_hash hash;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-This allows future attributes to be added, even if this expands the size
- of the union.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_traverse Makes Impossible Guarantees
-\end_layout
-
-\begin_layout Standard
-tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it
- was thought that it was important to guarantee that all records which exist
- at the start and end of the traversal would be included, and no record
- would be included twice.
-\end_layout
-
-\begin_layout Standard
-This adds complexity (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Reliable-Traversal-Adds"
-
-\end_inset
-
-) and does not work anyway for records which are altered (in particular,
- those which are expanded may be effectively deleted and re-added behind
- the traversal).
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "traverse-Proposed-Solution"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Abandon the guarantee.
- You will see every record if no changes occur during your traversal, otherwise
- you will see some subset.
- You can prevent changes by using a transaction or the locking API.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
- Delete-during-traverse will still delete every record, too (assuming no
- other changes).
-\end_layout
-
-\begin_layout Subsection
-Nesting of Transactions Is Fraught
-\end_layout
-
-\begin_layout Standard
-TDB has alternated between allowing nested transactions and not allowing
- them.
- Various paths in the Samba codebase assume that transactions will nest,
- and in a sense they can: the operation is only committed to disk when the
- outer transaction is committed.
- There are two problems, however:
-\end_layout
-
-\begin_layout Enumerate
-Canceling the inner transaction will cause the outer transaction commit
- to fail, and will not undo any operations since the inner transaction began.
- This problem is soluble with some additional internal code.
-\end_layout
-
-\begin_layout Enumerate
-An inner transaction commit can be cancelled by the outer transaction.
- This is desirable in the way which Samba's database initialization code
- uses transactions, but could be a surprise to any users expecting a successful
- transaction commit to expose changes to others.
-\end_layout
-
-\begin_layout Standard
-The current solution is to specify the behavior at tdb_open(), with the
- default currently that nested transactions are allowed.
- This flag can also be changed at runtime.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Given the usage patterns, it seems that the 
-\begin_inset Quotes eld
-\end_inset
-
-least-surprise
-\begin_inset Quotes erd
-\end_inset
-
- behavior of disallowing nested transactions should become the default.
- Additionally, it seems the outer transaction is the only code which knows
- whether inner transactions should be allowed, so a flag to indicate this
- could be added to tdb_transaction_start.
- However, this behavior can be simulated with a wrapper which uses tdb_add_flags
-() and tdb_remove_flags(), so the API should not be expanded for this relatively
--obscure case.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete; the nesting flag has been removed.
-\end_layout
-
-\begin_layout Subsection
-Incorrect Hash Function is Not Detected
-\end_layout
-
-\begin_layout Standard
-tdb_open_ex() allows the calling code to specify a different hash function
- to use, but does not check that all other processes accessing this tdb
- are using the same hash function.
- The result is that records are missing from tdb_fetch().
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The header should contain an example hash result (eg.
- the hash of 0xdeadbeef), and tdb_open_ex() should check that the given
- hash function produces the same answer, or fail the tdb_open call.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_set_max_dead/TDB_VOLATILE Expose Implementation
-\end_layout
-
-\begin_layout Standard
-In response to scalability issues with the free list (
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB-Freelist-Is"
-
-\end_inset
-
-) two API workarounds have been incorporated in TDB: tdb_set_max_dead()
- and the TDB_VOLATILE flag to tdb_open.
- The latter actually calls the former with an argument of 
-\begin_inset Quotes eld
-\end_inset
-
-5
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-This code allows deleted records to accumulate without putting them in the
- free list.
- On delete we iterate through each chain and free them in a batch if there
- are more than max_dead entries.
- These are never otherwise recycled except as a side-effect of a tdb_repack.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-With the scalability problems of the freelist solved, this API can be removed.
- The TDB_VOLATILE flag may still be useful as a hint that store and delete
- of records will be at least as common as fetch in order to allow some internal
- tuning, but initially will become a no-op.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
- Unknown flags cause tdb_open() to fail as well, so they can be detected
- at runtime.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB-Files-Cannot"
-
-\end_inset
-
-TDB Files Cannot Be Opened Multiple Times In The Same Process
-\end_layout
-
-\begin_layout Standard
-No process can open the same TDB twice; we check and disallow it.
- This is an unfortunate side-effect of fcntl locks, which operate on a per-file
- rather than per-file-descriptor basis, and do not nest.
- Thus, closing any file descriptor on a file clears all the locks obtained
- by this process, even if they were placed using a different file descriptor!
-\end_layout
-
-\begin_layout Standard
-Note that even if this were solved, deadlock could occur if operations were
- nested: this is a more manageable programming error in most cases.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We could lobby POSIX to fix the perverse rules, or at least lobby Linux
- to violate them so that the most common implementation does not have this
- restriction.
- This would be a generally good idea for other fcntl lock users.
-\end_layout
-
-\begin_layout Standard
-Samba uses a wrapper which hands out the same tdb_context to multiple callers
- if this happens, and does simple reference counting.
- We should do this inside the tdb library, which already emulates lock nesting
- internally; it would need to recognize when deadlock occurs within a single
- process.
- This would create a new failure mode for tdb operations (while we currently
- handle locking failures, they are impossible in normal use and a process
- encountering them can do little but give up).
-\end_layout
-
-\begin_layout Standard
-I do not see benefit in an additional tdb_open flag to indicate whether
- re-opening is allowed, as though there may be some benefit to adding a
- call to detect when a tdb_context is shared, to allow other to create such
- an API.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB API Is Not POSIX Thread-safe
-\end_layout
-
-\begin_layout Standard
-The TDB API uses an error code which can be queried after an operation to
- determine what went wrong.
- This programming model does not work with threads, unless specific additional
- guarantees are given by the implementation.
- In addition, even otherwise-independent threads cannot open the same TDB
- (as in 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB-Files-Cannot"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Reachitecting the API to include a tdb_errcode pointer would be a great
- deal of churn, but fortunately most functions return 0 on success and -1
- on error: we can change these to return 0 on success and a negative error
- code on error, and the API remains similar to previous.
- The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA
- pointer and return an error code.
- It is also simpler to have tdb_nextkey replace its key argument in place,
- freeing up any old .dptr.
-\end_layout
-
-\begin_layout Standard
-Internal locking is required to make sure that fcntl locks do not overlap
- between threads, and also that the global list of tdbs is maintained.
-\end_layout
-
-\begin_layout Standard
-The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe
- version of the library, and otherwise no overhead will exist.
- Alternatively, a hooking mechanism similar to that proposed for 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Proposed-Solution-locking-hook"
-
-\end_inset
-
- could be used to enable pthread locking at runtime.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete; API has been changed but thread safety has not been implemented.
-\end_layout
-
-\begin_layout Subsection
-*_nonblock Functions And *_mark Functions Expose Implementation
-\end_layout
-
-\begin_layout Standard
-CTDB
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-Clustered TDB, see http://ctdb.samba.org
-\end_layout
-
-\end_inset
-
- wishes to operate on TDB in a non-blocking manner.
- This is currently done as follows:
-\end_layout
-
-\begin_layout Enumerate
-Call the _nonblock variant of an API function (eg.
- tdb_lockall_nonblock).
- If this fails:
-\end_layout
-
-\begin_layout Enumerate
-Fork a child process, and wait for it to call the normal variant (eg.
- tdb_lockall).
-\end_layout
-
-\begin_layout Enumerate
-If the child succeeds, call the _mark variant to indicate we already have
- the locks (eg.
- tdb_lockall_mark).
-\end_layout
-
-\begin_layout Enumerate
-Upon completion, tell the child to release the locks (eg.
- tdb_unlockall).
-\end_layout
-
-\begin_layout Enumerate
-Indicate to tdb that it should consider the locks removed (eg.
- tdb_unlockall_mark).
-\end_layout
-
-\begin_layout Standard
-There are several issues with this approach.
- Firstly, adding two new variants of each function clutters the API for
- an obscure use, and so not all functions have three variants.
- Secondly, it assumes that all paths of the functions ask for the same locks,
- otherwise the parent process will have to get a lock which the child doesn't
- have under some circumstances.
- I don't believe this is currently the case, but it constrains the implementatio
-n.
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "Proposed-Solution-locking-hook"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Implement a hook for locking methods, so that the caller can control the
- calls to create and remove fcntl locks.
- In this scenario, ctdbd would operate as follows:
-\end_layout
-
-\begin_layout Enumerate
-Call the normal API function, eg tdb_lockall().
-\end_layout
-
-\begin_layout Enumerate
-When the lock callback comes in, check if the child has the lock.
- Initially, this is always false.
- If so, return 0.
- Otherwise, try to obtain it in non-blocking mode.
- If that fails, return EWOULDBLOCK.
-\end_layout
-
-\begin_layout Enumerate
-Release locks in the unlock callback as normal.
-\end_layout
-
-\begin_layout Enumerate
-If tdb_lockall() fails, see if we recorded a lock failure; if so, call the
- child to repeat the operation.
-\end_layout
-
-\begin_layout Enumerate
-The child records what locks it obtains, and returns that information to
- the parent.
-\end_layout
-
-\begin_layout Enumerate
-When the child has succeeded, goto 1.
-\end_layout
-
-\begin_layout Standard
-This is flexible enough to handle any potential locking scenario, even when
- lock requirements change.
- It can be optimized so that the parent does not release locks, just tells
- the child which locks it doesn't need to obtain.
-\end_layout
-
-\begin_layout Standard
-It also keeps the complexity out of the API, and in ctdbd where it is needed.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-tdb_chainlock Functions Expose Implementation
-\end_layout
-
-\begin_layout Standard
-tdb_chainlock locks some number of records, including the record indicated
- by the given key.
- This gave atomicity guarantees; no-one can start a transaction, alter,
- read or delete that key while the lock is held.
-\end_layout
-
-\begin_layout Standard
-It also makes the same guarantee for any other key in the chain, which is
- an internal implementation detail and potentially a cause for deadlock.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- It would be nice to have an explicit single entry lock which effected no
- other keys.
- Unfortunately, this won't work for an entry which doesn't exist.
- Thus while chainlock may be implemented more efficiently for the existing
- case, it will still have overlap issues with the non-existing case.
- So it is best to keep the current (lack of) guarantee about which records
- will be effected to avoid constraining our implementation.
-\end_layout
-
-\begin_layout Subsection
-Signal Handling is Not Race-Free
-\end_layout
-
-\begin_layout Standard
-The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate
- that the tdb locking code should return with a failure, rather than trying
- again when a signal is received (and errno == EAGAIN).
- This is usually used to implement timeouts.
-\end_layout
-
-\begin_layout Standard
-Unfortunately, this does not work in the case where the signal is received
- before the tdb code enters the fcntl() call to place the lock: the code
- will sleep within the fcntl() code, unaware that the signal wants it to
- exit.
- In the case of long timeouts, this does not happen in practice.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The locking hooks proposed in
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Proposed-Solution-locking-hook"
-
-\end_inset
-
- would allow the user to decide on whether to fail the lock acquisition
- on a signal.
- This allows the caller to choose their own compromise: they could narrow
- the race by checking immediately before the fcntl call.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-It may be possible to make this race-free in some implementations by having
- the signal handler alter the struct flock to make it invalid.
- This will cause the fcntl() lock call to fail with EINVAL if the signal
- occurs before the kernel is entered, otherwise EAGAIN.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-The API Uses Gratuitous Typedefs, Capitals
-\end_layout
-
-\begin_layout Standard
-typedefs are useful for providing source compatibility when types can differ
- across implementations, or arguably in the case of function pointer definitions
- which are hard for humans to parse.
- Otherwise it is simply obfuscation and pollutes the namespace.
-\end_layout
-
-\begin_layout Standard
-Capitalization is usually reserved for compile-time constants and macros.
-\end_layout
-
-\begin_layout Description
-TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the
- definition isn't visible to the API user anyway.
-\end_layout
-
-\begin_layout Description
-TDB_DATA There is no reason to use this over struct TDB_DATA; the struct
- needs to be understood by the API user.
-\end_layout
-
-\begin_layout Description
-struct
-\begin_inset space ~
-\end_inset
-
-TDB_DATA This would normally be called 'struct tdb_data'.
-\end_layout
-
-\begin_layout Description
-enum
-\begin_inset space ~
-\end_inset
-
-TDB_ERROR Similarly, this would normally be enum tdb_error.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- Introducing lower case variants would please pedants like myself, but if
- it were done the existing ones should be kept.
- There is little point forcing a purely cosmetic change upon tdb users.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "tdb_log_func-Doesnt-Take"
-
-\end_inset
-
-tdb_log_func Doesn't Take The Private Pointer
-\end_layout
-
-\begin_layout Standard
-For API compatibility reasons, the logging function needs to call tdb_get_loggin
-g_private() to retrieve the pointer registered by the tdb_open_ex for logging.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-It should simply take an extra argument, since we are prepared to break
- the API/ABI.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Various Callback Functions Are Not Typesafe
-\end_layout
-
-\begin_layout Standard
-The callback functions in tdb_set_logging_function (after 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "tdb_log_func-Doesnt-Take"
-
-\end_inset
-
- is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check
- all take void * and must internally convert it to the argument type they
- were expecting.
-\end_layout
-
-\begin_layout Standard
-If this type changes, the compiler will not produce warnings on the callers,
- since it only sees void *.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-With careful use of macros, we can create callback functions which give
- a warning when used on gcc and the types of the callback and its private
- argument differ.
- Unsupported compilers will not give a warning, which is no worse than now.
- In addition, the callbacks become clearer, as they need not use void *
- for their parameter.
-\end_layout
-
-\begin_layout Standard
-See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic
-\end_layout
-
-\begin_layout Standard
-The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should
- be cleared if the caller discovers it is the only process with the TDB
- open.
- However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not
- be detected, so will have the TDB erased underneath them (usually resulting
- in a crash).
-\end_layout
-
-\begin_layout Standard
-There is a similar issue on fork(); if the parent exits (or otherwise closes
- the tdb) before the child calls tdb_reopen_all() to establish the lock
- used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener
- at that moment will believe it alone has opened the TDB and will erase
- it.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove TDB_CLEAR_IF_FIRST.
- Other workarounds are possible, but see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB_CLEAR_IF_FIRST-Imposes-Performance"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Extending The Header Is Difficult
-\end_layout
-
-\begin_layout Standard
-We have reserved (zeroed) words in the TDB header, which can be used for
- future features.
- If the future features are compulsory, the version number must be updated
- to prevent old code from accessing the database.
- But if the future feature is optional, we have no way of telling if older
- code is accessing the database or not.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The header should contain a 
-\begin_inset Quotes eld
-\end_inset
-
-format variant
-\begin_inset Quotes erd
-\end_inset
-
- value (64-bit).
- This is divided into two 32-bit parts:
-\end_layout
-
-\begin_layout Enumerate
-The lower part reflects the format variant understood by code accessing
- the database.
-\end_layout
-
-\begin_layout Enumerate
-The upper part reflects the format variant you must understand to write
- to the database (otherwise you can only open for reading).
-\end_layout
-
-\begin_layout Standard
-The latter field can only be written at creation time, the former should
- be written under the OPEN_LOCK when opening the database for writing, if
- the variant of the code is lower than the current lowest variant.
-\end_layout
-
-\begin_layout Standard
-This should allow backwards-compatible features to be added, and detection
- if older code (which doesn't understand the feature) writes to the database.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Record Headers Are Not Expandible
-\end_layout
-
-\begin_layout Standard
-If we later want to add (say) checksums on keys and data, it would require
- another format change, which we'd like to avoid.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We often have extra padding at the tail of a record.
- If we ensure that the first byte (if any) of this padding is zero, we will
- have a way for future changes to detect code which doesn't understand a
- new format: the new code would write (say) a 1 at the tail, and thus if
- there is no tail or the first byte is 0, we would know the extension is
- not present on that record.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Does Not Use Talloc
-\end_layout
-
-\begin_layout Standard
-Many users of TDB (particularly Samba) use the talloc allocator, and thus
- have to wrap TDB in a talloc context to use it conveniently.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The allocation within TDB is not complicated enough to justify the use of
- talloc, and I am reluctant to force another (excellent) library on TDB
- users.
- Nonetheless a compromise is possible.
- An attribute (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "attributes"
-
-\end_inset
-
-) can be added later to tdb_open() to provide an alternate allocation mechanism,
- specifically for talloc but usable by any other allocator (which would
- ignore the 
-\begin_inset Quotes eld
-\end_inset
-
-context
-\begin_inset Quotes erd
-\end_inset
-
- argument).
-\end_layout
-
-\begin_layout Standard
-This would form a talloc heirarchy as expected, but the caller would still
- have to attach a destructor to the tdb context returned from tdb_open to
- close it.
- All TDB_DATA fields would be children of the tdb_context, and the caller
- would still have to manage them (using talloc_free() or talloc_steal()).
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Section
-Performance And Scalability Issues
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB_CLEAR_IF_FIRST-Imposes-Performance"
-
-\end_inset
-
-TDB_CLEAR_IF_FIRST Imposes Performance Penalty
-\end_layout
-
-\begin_layout Standard
-When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset
- 4 (aka.
- the ACTIVE_LOCK).
- While these locks never conflict in normal tdb usage, they do add substantial
- overhead for most fcntl lock implementations when the kernel scans to detect
- if a lock conflict exists.
- This is often a single linked list, making the time to acquire and release
- a fcntl lock O(N) where N is the number of processes with the TDB open,
- not the number actually doing work.
-\end_layout
-
-\begin_layout Standard
-In a Samba server it is common to have huge numbers of clients sitting idle,
- and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-There is a flag to tdb_reopen_all() which is used for this optimization:
- if the parent process will outlive the child, the child does not need the
- ACTIVE_LOCK.
- This is a workaround for this very performance issue.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove the flag.
- It was a neat idea, but even trivial servers tend to know when they are
- initializing for the first time and can simply unlink the old tdb at that
- point.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Files Have a 4G Limit
-\end_layout
-
-\begin_layout Standard
-This seems to be becoming an issue (so much for 
-\begin_inset Quotes eld
-\end_inset
-
-trivial
-\begin_inset Quotes erd
-\end_inset
-
-!), particularly for ldb.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-A new, incompatible TDB format which uses 64 bit offsets internally rather
- than 32 bit as now.
- For simplicity of endian conversion (which TDB does on the fly if required),
- all values will be 64 bit on disk.
- In practice, some upper bits may be used for other purposes, but at least
- 56 bits will be available for file offsets.
-\end_layout
-
-\begin_layout Standard
-tdb_open() will automatically detect the old version, and even create them
- if TDB_VERSION6 is specified to tdb_open.
-\end_layout
-
-\begin_layout Standard
-32 bit processes will still be able to access TDBs larger than 4G (assuming
- that their off_t allows them to seek to 64 bits), they will gracefully
- fall back as they fail to mmap.
- This can happen already with large TDBs.
-\end_layout
-
-\begin_layout Standard
-Old versions of tdb will fail to open the new TDB files (since 28 August
- 2009, commit 398d0c29290: prior to that any unrecognized file format would
- be erased and initialized as a fresh tdb!)
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Records Have a 4G Limit
-\end_layout
-
-\begin_layout Standard
-This has not been a reported problem, and the API uses size_t which can
- be 64 bit on 64 bit platforms.
- However, other limits may have made such an issue moot.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Record sizes will be 64 bit, with an error returned on 32 bit platforms
- which try to access such records (the current implementation would return
- TDB_ERR_OOM in a similar case).
- It seems unlikely that 32 bit keys will be a limitation, so the implementation
- may not support this (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Records-Incur-A"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Hash Size Is Determined At TDB Creation Time
-\end_layout
-
-\begin_layout Standard
-TDB contains a number of hash chains in the header; the number is specified
- at creation time, and defaults to 131.
- This is such a bottleneck on large databases (as each hash chain gets quite
- long), that LDB uses 10,000 for this hash.
- In general it is impossible to know what the 'right' answer is at database
- creation time.
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:Hash-Size-Solution"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-After comprehensive performance testing on various scalable hash variants
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying
- because I was previously convinced that an expanding tree of hashes would
- be very close to optimal.
-\end_layout
-
-\end_inset
-
-, it became clear that it is hard to beat a straight linear hash table which
- doubles in size when it reaches saturation.
- Unfortunately, altering the hash table introduces serious locking complications
-: the entire hash table needs to be locked to enlarge the hash table, and
- others might be holding locks.
- Particularly insidious are insertions done under tdb_chainlock.
-\end_layout
-
-\begin_layout Standard
-Thus an expanding layered hash will be used: an array of hash groups, with
- each hash group exploding into pointers to lower hash groups once it fills,
- turning into a hash tree.
- This has implications for locking: we must lock the entire group in case
- we need to expand it, yet we don't know how deep the tree is at that point.
-\end_layout
-
-\begin_layout Standard
-Note that bits from the hash table entries should be stolen to hold more
- hash bits to reduce the penalty of collisions.
- We can use the otherwise-unused lower 3 bits.
- If we limit the size of the database to 64 exabytes, we can use the top
- 8 bits of the hash entry as well.
- These 11 bits would reduce false positives down to 1 in 2000 which is more
- than we need: we can use one of the bits to indicate that the extra hash
- bits are valid.
- This means we can choose not to re-hash all entries when we expand a hash
- group; simply use the next bits we need and mark them invalid.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB-Freelist-Is"
-
-\end_inset
-
-TDB Freelist Is Highly Contended
-\end_layout
-
-\begin_layout Standard
-TDB uses a single linked list for the free list.
- Allocation occurs as follows, using heuristics which have evolved over
- time:
-\end_layout
-
-\begin_layout Enumerate
-Get the free list lock for this whole operation.
-\end_layout
-
-\begin_layout Enumerate
-Multiply length by 1.25, so we always over-allocate by 25%.
-\end_layout
-
-\begin_layout Enumerate
-Set the slack multiplier to 1.
-\end_layout
-
-\begin_layout Enumerate
-Examine the current freelist entry: if it is > length but < the current
- best case, remember it as the best case.
-\end_layout
-
-\begin_layout Enumerate
-Multiply the slack multiplier by 1.05.
-\end_layout
-
-\begin_layout Enumerate
-If our best fit so far is less than length * slack multiplier, return it.
- The slack will be turned into a new free record if it's large enough.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, go onto the next freelist entry.
-\end_layout
-
-\begin_layout Standard
-Deleting a record occurs as follows:
-\end_layout
-
-\begin_layout Enumerate
-Lock the hash chain for this whole operation.
-\end_layout
-
-\begin_layout Enumerate
-Walk the chain to find the record, keeping the prev pointer offset.
-\end_layout
-
-\begin_layout Enumerate
-If max_dead is non-zero:
-\end_layout
-
-\begin_deeper
-\begin_layout Enumerate
-Walk the hash chain again and count the dead records.
-\end_layout
-
-\begin_layout Enumerate
-If it's more than max_dead, bulk free all the dead ones (similar to steps
- 4 and below, but the lock is only obtained once).
-\end_layout
-
-\begin_layout Enumerate
-Simply mark this record as dead and return.
-\end_layout
-
-\end_deeper
-\begin_layout Enumerate
-Get the free list lock for the remainder of this operation.
-\end_layout
-
-\begin_layout Enumerate
-\begin_inset CommandInset label
-LatexCommand label
-name "right-merging"
-
-\end_inset
-
-Examine the following block to see if it is free; if so, enlarge the current
- block and remove that block from the free list.
- This was disabled, as removal from the free list was O(entries-in-free-list).
-\end_layout
-
-\begin_layout Enumerate
-Examine the preceeding block to see if it is free: for this reason, each
- block has a 32-bit tailer which indicates its length.
- If it is free, expand it to cover our new block and return.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, prepend ourselves to the free list.
-\end_layout
-
-\begin_layout Standard
-Disabling right-merging (step 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "right-merging"
-
-\end_inset
-
-) causes fragmentation; the other heuristics proved insufficient to address
- this, so the final answer to this was that when we expand the TDB file
- inside a transaction commit, we repack the entire tdb.
-\end_layout
-
-\begin_layout Standard
-The single list lock limits our allocation rate; due to the other issues
- this is not currently seen as a bottleneck.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The first step is to remove all the current heuristics, as they obviously
- interact, then examine them once the lock contention is addressed.
-\end_layout
-
-\begin_layout Standard
-The free list must be split to reduce contention.
- Assuming perfect free merging, we can at most have 1 free list entry for
- each entry.
- This implies that the number of free lists is related to the size of the
- hash table, but as it is rare to walk a large number of free list entries
- we can use far fewer, say 1/32 of the number of hash buckets.
-\end_layout
-
-\begin_layout Standard
-It seems tempting to try to reuse the hash implementation which we use for
- records here, but we have two ways of searching for free entries: for allocatio
-n we search by size (and possibly zone) which produces too many clashes
- for our hash table to handle well, and for coalescing we search by address.
- Thus an array of doubly-linked free lists seems preferable.
-\end_layout
-
-\begin_layout Standard
-There are various benefits in using per-size free lists (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-) but it's not clear this would reduce contention in the common case where
- all processes are allocating/freeing the same size.
- Thus we almost certainly need to divide in other ways: the most obvious
- is to divide the file into zones, and using a free list (or table of free
- lists) for each.
- This approximates address ordering.
-\end_layout
-
-\begin_layout Standard
-Unfortunately it is difficult to know what heuristics should be used to
- determine zone sizes, and our transaction code relies on being able to
- create a 
-\begin_inset Quotes eld
-\end_inset
-
-recovery area
-\begin_inset Quotes erd
-\end_inset
-
- by simply appending to the file (difficult if it would need to create a
- new zone header).
- Thus we use a linked-list of free tables; currently we only ever create
- one, but if there is more than one we choose one at random to use.
- In future we may use heuristics to add new free tables on contention.
- We only expand the file when all free tables are exhausted.
-\end_layout
-
-\begin_layout Standard
-The basic algorithm is as follows.
- Freeing is simple:
-\end_layout
-
-\begin_layout Enumerate
-Identify the correct free list.
-\end_layout
-
-\begin_layout Enumerate
-Lock the corresponding list.
-\end_layout
-
-\begin_layout Enumerate
-Re-check the list (we didn't have a lock, sizes could have changed): relock
- if necessary.
-\end_layout
-
-\begin_layout Enumerate
-Place the freed entry in the list.
-\end_layout
-
-\begin_layout Standard
-Allocation is a little more complicated, as we perform delayed coalescing
- at this point:
-\end_layout
-
-\begin_layout Enumerate
-Pick a free table; usually the previous one.
-\end_layout
-
-\begin_layout Enumerate
-Lock the corresponding list.
-\end_layout
-
-\begin_layout Enumerate
-If the top entry is -large enough, remove it from the list and return it.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, coalesce entries in the list.If there was no entry large enough,
- unlock the list and try the next largest list
-\end_layout
-
-\begin_layout Enumerate
-If no list has an entry which meets our needs, try the next free table.
-\end_layout
-
-\begin_layout Enumerate
-If no zone satisfies, expand the file.
-\end_layout
-
-\begin_layout Standard
-This optimizes rapid insert/delete of free list entries by not coalescing
- them all the time..
- First-fit address ordering ordering seems to be fairly good for keeping
- fragmentation low (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-).
- Note that address ordering does not need a tailer to coalesce, though if
- we needed one we could have one cheaply: see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Records-Incur-A"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-Each free entry has the free table number in the header: less than 255.
- It also contains a doubly-linked list for easy deletion.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-TDB Becomes Fragmented
-\end_layout
-
-\begin_layout Standard
-Much of this is a result of allocation strategy
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute
-xas.edu/pub/garbage/malloc/ismm98.ps
-\end_layout
-
-\end_inset
-
- and deliberate hobbling of coalescing; internal fragmentation (aka overallocati
-on) is deliberately set at 25%, and external fragmentation is only cured
- by the decision to repack the entire db when a transaction commit needs
- to enlarge the file.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The 25% overhead on allocation works in practice for ldb because indexes
- tend to expand by one record at a time.
- This internal fragmentation can be resolved by having an 
-\begin_inset Quotes eld
-\end_inset
-
-expanded
-\begin_inset Quotes erd
-\end_inset
-
- bit in the header to note entries that have previously expanded, and allocating
- more space for them.
-\end_layout
-
-\begin_layout Standard
-There are is a spectrum of possible solutions for external fragmentation:
- one is to use a fragmentation-avoiding allocation strategy such as best-fit
- address-order allocator.
- The other end of the spectrum would be to use a bump allocator (very fast
- and simple) and simply repack the file when we reach the end.
-\end_layout
-
-\begin_layout Standard
-There are three problems with efficient fragmentation-avoiding allocators:
- they are non-trivial, they tend to use a single free list for each size,
- and there's no evidence that tdb allocation patterns will match those recorded
- for general allocators (though it seems likely).
-\end_layout
-
-\begin_layout Standard
-Thus we don't spend too much effort on external fragmentation; we will be
- no worse than the current code if we need to repack on occasion.
- More effort is spent on reducing freelist contention, and reducing overhead.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:Records-Incur-A"
-
-\end_inset
-
-Records Incur A 28-Byte Overhead
-\end_layout
-
-\begin_layout Standard
-Each TDB record has a header as follows:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_record {
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_off_t next; /* offset of the next record in the list */
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_len_t rec_len; /* total byte length of record */
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_len_t key_len; /* byte length of key */
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_len_t data_len; /* byte length of data */
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t full_hash; /* the full 32 bit hash of the key */
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t magic;   /* try to catch errors */
-\end_layout
-
-\begin_layout LyX-Code
-        /* the following union is implied:
-\end_layout
-
-\begin_layout LyX-Code
-                union {
-\end_layout
-
-\begin_layout LyX-Code
-                        char record[rec_len];
-\end_layout
-
-\begin_layout LyX-Code
-                        struct {
-\end_layout
-
-\begin_layout LyX-Code
-                                char key[key_len];
-\end_layout
-
-\begin_layout LyX-Code
-                                char data[data_len];
-\end_layout
-
-\begin_layout LyX-Code
-                        }
-\end_layout
-
-\begin_layout LyX-Code
-                        uint32_t totalsize; (tailer)
-\end_layout
-
-\begin_layout LyX-Code
-                }
-\end_layout
-
-\begin_layout LyX-Code
-        */
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-Naively, this would double to a 56-byte overhead on a 64 bit implementation.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We can use various techniques to reduce this for an allocated block:
-\end_layout
-
-\begin_layout Enumerate
-The 'next' pointer is not required, as we are using a flat hash table.
-\end_layout
-
-\begin_layout Enumerate
-'rec_len' can instead be expressed as an addition to key_len and data_len
- (it accounts for wasted or overallocated length in the record).
- Since the record length is always a multiple of 8, we can conveniently
- fit it in 32 bits (representing up to 35 bits).
-\end_layout
-
-\begin_layout Enumerate
-'key_len' and 'data_len' can be reduced.
- I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine
- the two into one 64-bit field and using a 5 bit value which indicates at
- what bit to divide the two.
- Keys are unlikely to scale as fast as data, so I'm assuming a maximum key
- size of 32 bits.
-\end_layout
-
-\begin_layout Enumerate
-'full_hash' is used to avoid a memcmp on the 
-\begin_inset Quotes eld
-\end_inset
-
-miss
-\begin_inset Quotes erd
-\end_inset
-
- case, but this is diminishing returns after a handful of bits (at 10 bits,
- it reduces 99.9% of false memcmp).
- As an aside, as the lower bits are already incorporated in the hash table
- resolution, the upper bits should be used here.
- Note that it's not clear that these bits will be a win, given the extra
- bits in the hash table itself (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Hash-Size-Solution"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Enumerate
-'magic' does not need to be enlarged: it currently reflects one of 5 values
- (used, free, dead, recovery, and unused_recovery).
- It is useful for quick sanity checking however, and should not be eliminated.
-\end_layout
-
-\begin_layout Enumerate
-'tailer' is only used to coalesce free blocks (so a block to the right can
- find the header to check if this block is free).
- This can be replaced by a single 'free' bit in the header of the following
- block (and the tailer only exists in free blocks).
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-This technique from Thomas Standish.
- Data Structure Techniques.
- Addison-Wesley, Reading, Massachusetts, 1980.
-\end_layout
-
-\end_inset
-
- The current proposed coalescing algorithm doesn't need this, however.
-\end_layout
-
-\begin_layout Standard
-This produces a 16 byte used header like this:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_used_record {
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t used_magic : 16,
-\end_layout
-
-\begin_layout LyX-Code
-
-\end_layout
-
-\begin_layout LyX-Code
-                 key_data_divide: 5,
-\end_layout
-
-\begin_layout LyX-Code
-                 top_hash: 11;
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t extra_octets;
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t key_and_data_len;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-And a free record like this:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_free_record {
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t free_magic: 8,
-\end_layout
-
-\begin_layout LyX-Code
-                   prev : 56;
-\end_layout
-
-\begin_layout LyX-Code
-
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t free_table: 8,
-\end_layout
-
-\begin_layout LyX-Code
-                 total_length : 56
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t next;;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-Note that by limiting valid offsets to 56 bits, we can pack everything we
- need into 3 64-byte words, meaning our minimum record size is 8 bytes.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Transaction Commit Requires 4 fdatasync
-\end_layout
-
-\begin_layout Standard
-The current transaction algorithm is:
-\end_layout
-
-\begin_layout Enumerate
-write_recovery_data();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-write_recovery_header();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-overwrite_with_new_data();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-remove_recovery_header();
-\end_layout
-
-\begin_layout Enumerate
-sync(); 
-\end_layout
-
-\begin_layout Standard
-On current ext3, each sync flushes all data to disk, so the next 3 syncs
- are relatively expensive.
- But this could become a performance bottleneck on other filesystems such
- as ext4.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Neil Brown points out that this is overzealous, and only one sync is needed:
-\end_layout
-
-\begin_layout Enumerate
-Bundle the recovery data, a transaction counter and a strong checksum of
- the new data.
-\end_layout
-
-\begin_layout Enumerate
-Strong checksum that whole bundle.
-\end_layout
-
-\begin_layout Enumerate
-Store the bundle in the database.
-\end_layout
-
-\begin_layout Enumerate
-Overwrite the oldest of the two recovery pointers in the header (identified
- using the transaction counter) with the offset of this bundle.
-\end_layout
-
-\begin_layout Enumerate
-sync.
-\end_layout
-
-\begin_layout Enumerate
-Write the new data to the file.
-\end_layout
-
-\begin_layout Standard
-Checking for recovery means identifying the latest bundle with a valid checksum
- and using the new data checksum to ensure that it has been applied.
- This is more expensive than the current check, but need only be done at
- open.
- For running databases, a separate header field can be used to indicate
- a transaction in progress; we need only check for recovery if this is set.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:TDB-Does-Not"
-
-\end_inset
-
-TDB Does Not Have Snapshot Support
-\end_layout
-
-\begin_layout Subsubsection
-Proposed SolutionNone.
- At some point you say 
-\begin_inset Quotes eld
-\end_inset
-
-use a real database
-\begin_inset Quotes erd
-\end_inset
-
- (but see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "replay-attribute"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Standard
-But as a thought experiment, if we implemented transactions to only overwrite
- free entries (this is tricky: there must not be a header in each entry
- which indicates whether it is free, but use of presence in metadata elsewhere),
- and a pointer to the hash table, we could create an entirely new commit
- without destroying existing data.
- Then it would be easy to implement snapshots in a similar way.
-\end_layout
-
-\begin_layout Standard
-This would not allow arbitrary changes to the database, such as tdb_repack
- does, and would require more space (since we have to preserve the current
- and future entries at once).
- If we used hash trees rather than one big hash table, we might only have
- to rewrite some sections of the hash, too.
-\end_layout
-
-\begin_layout Standard
-We could then implement snapshots using a similar method, using multiple
- different hash tables/free tables.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-Transactions Cannot Operate in Parallel
-\end_layout
-
-\begin_layout Standard
-This would be useless for ldb, as it hits the index records with just about
- every update.
- It would add significant complexity in resolving clashes, and cause the
- all transaction callers to write their code to loop in the case where the
- transactions spuriously failed.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None (but see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "replay-attribute"
-
-\end_inset
-
-).
- We could solve a small part of the problem by providing read-only transactions.
- These would allow one write transaction to begin, but it could not commit
- until all r/o transactions are done.
- This would require a new RO_TRANSACTION_LOCK, which would be upgraded on
- commit.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-Default Hash Function Is Suboptimal
-\end_layout
-
-\begin_layout Standard
-The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially
- if we expand it to 64 bits), and works best when the hash bucket size is
- a prime number (which also means a slow modulus).
- In addition, it is highly predictable which could potentially lead to a
- Denial of Service attack in some TDB uses.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The Jenkins lookup3 hash
-\begin_inset Foot
-status open
-
-\begin_layout Plain Layout
-http://burtleburtle.net/bob/c/lookup3.c
-\end_layout
-
-\end_inset
-
- is a fast and superbly-mixing hash.
- It's used by the Linux kernel and almost everything else.
- This has the particular properties that it takes an initial seed, and produces
- two 32 bit hash numbers, which we can combine into a 64-bit hash.
-\end_layout
-
-\begin_layout Standard
-The seed should be created at tdb-creation time from some random source,
- and placed in the header.
- This is far from foolproof, but adds a little bit of protection against
- hash bombing.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "Reliable-Traversal-Adds"
-
-\end_inset
-
-Reliable Traversal Adds Complexity
-\end_layout
-
-\begin_layout Standard
-We lock a record during traversal iteration, and try to grab that lock in
- the delete code.
- If that grab on delete fails, we simply mark it deleted and continue onwards;
- traversal checks for this condition and does the delete when it moves off
- the record.
-\end_layout
-
-\begin_layout Standard
-If traversal terminates, the dead record may be left indefinitely.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove reliability guarantees; see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "traverse-Proposed-Solution"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Fcntl Locking Adds Overhead
-\end_layout
-
-\begin_layout Standard
-Placing a fcntl lock means a system call, as does removing one.
- This is actually one reason why transactions can be faster (everything
- is locked once at transaction start).
- In the uncontended case, this overhead can theoretically be eliminated.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
-\end_layout
-
-\begin_layout Standard
-We tried this before with spinlock support, in the early days of TDB, and
- it didn't make much difference except in manufactured benchmarks.
-\end_layout
-
-\begin_layout Standard
-We could use spinlocks (with futex kernel support under Linux), but it means
- that we lose automatic cleanup when a process dies with a lock.
- There is a method of auto-cleanup under Linux, but it's not supported by
- other operating systems.
- We could reintroduce a clear-if-first-style lock and sweep for dead futexes
- on open, but that wouldn't help the normal case of one concurrent opener
- dying.
- Increasingly elaborate repair schemes could be considered, but they require
- an ABI change (everyone must use them) anyway, so there's no need to do
- this at the same time as everything else.
-\end_layout
-
-\begin_layout Subsection
-Some Transactions Don't Require Durability
-\end_layout
-
-\begin_layout Standard
-Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast)
- usage, and occasionally empties the results into a transactional TDB.
- This kind of usage prioritizes performance over durability: as long as
- we are consistent, data can be lost.
-\end_layout
-
-\begin_layout Standard
-This would be more neatly implemented inside tdb: a 
-\begin_inset Quotes eld
-\end_inset
-
-soft
-\begin_inset Quotes erd
-\end_inset
-
- transaction commit (ie.
- syncless) which meant that data may be reverted on a crash.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
-\end_layout
-
-\begin_layout Standard
-Unfortunately any transaction scheme which overwrites old data requires
- a sync before that overwrite to avoid the possibility of corruption.
-\end_layout
-
-\begin_layout Standard
-It seems possible to use a scheme similar to that described in 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Does-Not"
-
-\end_inset
-
-,where transactions are committed without overwriting existing data, and
- an array of top-level pointers were available in the header.
- If the transaction is 
-\begin_inset Quotes eld
-\end_inset
-
-soft
-\begin_inset Quotes erd
-\end_inset
-
- then we would not need a sync at all: existing processes would pick up
- the new hash table and free list and work with that.
-\end_layout
-
-\begin_layout Standard
-At some later point, a sync would allow recovery of the old data into the
- free lists (perhaps when the array of top-level pointers filled).
- On crash, tdb_open() would examine the array of top levels, and apply the
- transactions until it encountered an invalid checksum.
-\end_layout
-
-\begin_layout Subsection
-Tracing Is Fragile, Replay Is External
-\end_layout
-
-\begin_layout Standard
-The current TDB has compile-time-enabled tracing code, but it often breaks
- as it is not enabled by default.
- In a similar way, the ctdb code has an external wrapper which does replay
- tracing so it can coordinate cluster-wide transactions.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\begin_inset CommandInset label
-LatexCommand label
-name "replay-attribute"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Tridge points out that an attribute can be later added to tdb_open (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "attributes"
-
-\end_inset
-
-) to provide replay/trace hooks, which could become the basis for this and
- future parallel transactions and snapshot support.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\end_body
-\end_document
diff --git a/ccan/tdb2/doc/design.lyx,v b/ccan/tdb2/doc/design.lyx,v
deleted file mode 100644 (file)
index 68e5ed2..0000000
+++ /dev/null
@@ -1,4679 +0,0 @@
-head   1.13;
-access;
-symbols;
-locks; strict;
-comment        @# @;
-
-
-1.13
-date   2011.03.01.11.46.54;    author rusty;   state Exp;
-branches;
-next   1.12;
-
-1.12
-date   2010.12.01.12.20.49;    author rusty;   state Exp;
-branches;
-next   1.11;
-
-1.11
-date   2010.12.01.11.55.20;    author rusty;   state Exp;
-branches;
-next   1.10;
-
-1.10
-date   2010.09.14.00.33.57;    author rusty;   state Exp;
-branches;
-next   1.9;
-
-1.9
-date   2010.09.09.07.25.12;    author rusty;   state Exp;
-branches;
-next   1.8;
-
-1.8
-date   2010.09.02.02.29.05;    author rusty;   state Exp;
-branches;
-next   1.7;
-
-1.7
-date   2010.09.01.10.58.12;    author rusty;   state Exp;
-branches;
-next   1.6;
-
-1.6
-date   2010.08.02.00.21.43;    author rusty;   state Exp;
-branches;
-next   1.5;
-
-1.5
-date   2010.08.02.00.21.16;    author rusty;   state Exp;
-branches;
-next   1.4;
-
-1.4
-date   2010.05.10.13.09.11;    author rusty;   state Exp;
-branches;
-next   1.3;
-
-1.3
-date   2010.05.10.11.58.37;    author rusty;   state Exp;
-branches;
-next   1.2;
-
-1.2
-date   2010.05.10.05.35.13;    author rusty;   state Exp;
-branches;
-next   1.1;
-
-1.1
-date   2010.05.04.02.29.16;    author rusty;   state Exp;
-branches;
-next   ;
-
-
-desc
-@First draft
-@
-
-
-1.13
-log
-@Thread-safe API
-@
-text
-@#LyX 1.6.7 created this file. For more info see http://www.lyx.org/
-\lyxformat 345
-\begin_document
-\begin_header
-\textclass article
-\use_default_options true
-\language english
-\inputencoding auto
-\font_roman default
-\font_sans default
-\font_typewriter default
-\font_default_family default
-\font_sc false
-\font_osf false
-\font_sf_scale 100
-\font_tt_scale 100
-
-\graphics default
-\paperfontsize default
-\use_hyperref false
-\papersize default
-\use_geometry false
-\use_amsmath 1
-\use_esint 1
-\cite_engine basic
-\use_bibtopic false
-\paperorientation portrait
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation indent
-\defskip medskip
-\quotes_language english
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tracking_changes true
-\output_changes true
-\author "Rusty Russell,,," 
-\author "" 
-\end_header
-
-\begin_body
-
-\begin_layout Title
-TDB2: A Redesigning The Trivial DataBase
-\end_layout
-
-\begin_layout Author
-Rusty Russell, IBM Corporation
-\end_layout
-
-\begin_layout Date
-1-December-2010
-\end_layout
-
-\begin_layout Abstract
-The Trivial DataBase on-disk format is 32 bits; with usage cases heading
- towards the 4G limit, that must change.
- This required breakage provides an opportunity to revisit TDB's other design
- decisions and reassess them.
-\end_layout
-
-\begin_layout Section
-Introduction
-\end_layout
-
-\begin_layout Standard
-The Trivial DataBase was originally written by Andrew Tridgell as a simple
- key/data pair storage system with the same API as dbm, but allowing multiple
- readers and writers while being small enough (< 1000 lines of C) to include
- in SAMBA.
- The simple design created in 1999 has proven surprisingly robust and performant
-, used in Samba versions 3 and 4 as well as numerous other projects.
- Its useful life was greatly increased by the (backwards-compatible!) addition
- of transaction support in 2005.
-\end_layout
-
-\begin_layout Standard
-The wider variety and greater demands of TDB-using code has lead to some
- organic growth of the API, as well as some compromises on the implementation.
- None of these, by themselves, are seen as show-stoppers, but the cumulative
- effect is to a loss of elegance over the initial, simple TDB implementation.
- Here is a table of the approximate number of lines of implementation code
- and number of API functions at the end of each year:
-\end_layout
-
-\begin_layout Standard
-\begin_inset Tabular
-<lyxtabular version="3" rows="12" columns="3">
-<features>
-<column alignment="center" valignment="top" width="0">
-<column alignment="center" valignment="top" width="0">
-<column alignment="center" valignment="top" width="0">
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-Year End
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-API Functions
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-Lines of C Code Implementation
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1999
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-13
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1195
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2000
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-24
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1725
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2001
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-32
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2228
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2002
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-35
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2481
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2003
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-35
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2552
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2004
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-40
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2584
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2005
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-38
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2647
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2006
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-52
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-3754
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2007
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-66
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-4398
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2008
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-71
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-4768
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2009
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-73
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-5715
-\end_layout
-
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-This review is an attempt to catalog and address all the known issues with
- TDB and create solutions which address the problems without significantly
- increasing complexity; all involved are far too aware of the dangers of
- second system syndrome in rewriting a successful project like this.
-\end_layout
-
-\begin_layout Section
-API Issues
-\end_layout
-
-\begin_layout Subsection
-tdb_open_ex Is Not Expandable
-\end_layout
-
-\begin_layout Standard
-The tdb_open() call was expanded to tdb_open_ex(), which added an optional
- hashing function and an optional logging function argument.
- Additional arguments to open would require the introduction of a tdb_open_ex2
- call etc.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\begin_inset CommandInset label
-LatexCommand label
-name "attributes"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-tdb_open() will take a linked-list of attributes:
-\end_layout
-
-\begin_layout LyX-Code
-enum tdb_attribute {
-\end_layout
-
-\begin_layout LyX-Code
-    TDB_ATTRIBUTE_LOG = 0,
-\end_layout
-
-\begin_layout LyX-Code
-    TDB_ATTRIBUTE_HASH = 1
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_base {
-\end_layout
-
-\begin_layout LyX-Code
-    enum tdb_attribute attr;
-\end_layout
-
-\begin_layout LyX-Code
-    union tdb_attribute *next;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_log {
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
-\end_layout
-
-\begin_layout LyX-Code
-    tdb_log_func log_fn;
-\end_layout
-
-\begin_layout LyX-Code
-    void *log_private;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_hash {
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
-\end_layout
-
-\begin_layout LyX-Code
-    tdb_hash_func hash_fn;
-\end_layout
-
-\begin_layout LyX-Code
-    void *hash_private;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-union tdb_attribute {
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_base base;
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_log log;
-\end_layout
-
-\begin_layout LyX-Code
-    struct tdb_attribute_hash hash;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-This allows future attributes to be added, even if this expands the size
- of the union.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_traverse Makes Impossible Guarantees
-\end_layout
-
-\begin_layout Standard
-tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it
- was thought that it was important to guarantee that all records which exist
- at the start and end of the traversal would be included, and no record
- would be included twice.
-\end_layout
-
-\begin_layout Standard
-This adds complexity (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Reliable-Traversal-Adds"
-
-\end_inset
-
-) and does not work anyway for records which are altered (in particular,
- those which are expanded may be effectively deleted and re-added behind
- the traversal).
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "traverse-Proposed-Solution"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Abandon the guarantee.
- You will see every record if no changes occur during your traversal, otherwise
- you will see some subset.
- You can prevent changes by using a transaction or the locking API.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
- Delete-during-traverse will still delete every record, too (assuming no
- other changes).
-\end_layout
-
-\begin_layout Subsection
-Nesting of Transactions Is Fraught
-\end_layout
-
-\begin_layout Standard
-TDB has alternated between allowing nested transactions and not allowing
- them.
- Various paths in the Samba codebase assume that transactions will nest,
- and in a sense they can: the operation is only committed to disk when the
- outer transaction is committed.
- There are two problems, however:
-\end_layout
-
-\begin_layout Enumerate
-Canceling the inner transaction will cause the outer transaction commit
- to fail, and will not undo any operations since the inner transaction began.
- This problem is soluble with some additional internal code.
-\end_layout
-
-\begin_layout Enumerate
-An inner transaction commit can be cancelled by the outer transaction.
- This is desirable in the way which Samba's database initialization code
- uses transactions, but could be a surprise to any users expecting a successful
- transaction commit to expose changes to others.
-\end_layout
-
-\begin_layout Standard
-The current solution is to specify the behavior at tdb_open(), with the
- default currently that nested transactions are allowed.
- This flag can also be changed at runtime.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Given the usage patterns, it seems that the 
-\begin_inset Quotes eld
-\end_inset
-
-least-surprise
-\begin_inset Quotes erd
-\end_inset
-
- behavior of disallowing nested transactions should become the default.
- Additionally, it seems the outer transaction is the only code which knows
- whether inner transactions should be allowed, so a flag to indicate this
- could be added to tdb_transaction_start.
- However, this behavior can be simulated with a wrapper which uses tdb_add_flags
-() and tdb_remove_flags(), so the API should not be expanded for this relatively
--obscure case.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1298979572
-Incomplete; nesting flag is still defined as per tdb1.
-\change_inserted 0 1298979584
-Complete; the nesting flag has been removed.
-\change_unchanged
-
-\end_layout
-
-\begin_layout Subsection
-Incorrect Hash Function is Not Detected
-\end_layout
-
-\begin_layout Standard
-tdb_open_ex() allows the calling code to specify a different hash function
- to use, but does not check that all other processes accessing this tdb
- are using the same hash function.
- The result is that records are missing from tdb_fetch().
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The header should contain an example hash result (eg.
- the hash of 0xdeadbeef), and tdb_open_ex() should check that the given
- hash function produces the same answer, or fail the tdb_open call.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_set_max_dead/TDB_VOLATILE Expose Implementation
-\end_layout
-
-\begin_layout Standard
-In response to scalability issues with the free list (
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB-Freelist-Is"
-
-\end_inset
-
-) two API workarounds have been incorporated in TDB: tdb_set_max_dead()
- and the TDB_VOLATILE flag to tdb_open.
- The latter actually calls the former with an argument of 
-\begin_inset Quotes eld
-\end_inset
-
-5
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-This code allows deleted records to accumulate without putting them in the
- free list.
- On delete we iterate through each chain and free them in a batch if there
- are more than max_dead entries.
- These are never otherwise recycled except as a side-effect of a tdb_repack.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-With the scalability problems of the freelist solved, this API can be removed.
- The TDB_VOLATILE flag may still be useful as a hint that store and delete
- of records will be at least as common as fetch in order to allow some internal
- tuning, but initially will become a no-op.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
- TDB_VOLATILE still defined, but implementation should fail on unknown flags
- to be future-proof.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB-Files-Cannot"
-
-\end_inset
-
-TDB Files Cannot Be Opened Multiple Times In The Same Process
-\end_layout
-
-\begin_layout Standard
-No process can open the same TDB twice; we check and disallow it.
- This is an unfortunate side-effect of fcntl locks, which operate on a per-file
- rather than per-file-descriptor basis, and do not nest.
- Thus, closing any file descriptor on a file clears all the locks obtained
- by this process, even if they were placed using a different file descriptor!
-\end_layout
-
-\begin_layout Standard
-Note that even if this were solved, deadlock could occur if operations were
- nested: this is a more manageable programming error in most cases.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We could lobby POSIX to fix the perverse rules, or at least lobby Linux
- to violate them so that the most common implementation does not have this
- restriction.
- This would be a generally good idea for other fcntl lock users.
-\end_layout
-
-\begin_layout Standard
-Samba uses a wrapper which hands out the same tdb_context to multiple callers
- if this happens, and does simple reference counting.
- We should do this inside the tdb library, which already emulates lock nesting
- internally; it would need to recognize when deadlock occurs within a single
- process.
- This would create a new failure mode for tdb operations (while we currently
- handle locking failures, they are impossible in normal use and a process
- encountering them can do little but give up).
-\end_layout
-
-\begin_layout Standard
-I do not see benefit in an additional tdb_open flag to indicate whether
- re-opening is allowed, as though there may be some benefit to adding a
- call to detect when a tdb_context is shared, to allow other to create such
- an API.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-TDB API Is Not POSIX Thread-safe
-\end_layout
-
-\begin_layout Standard
-The TDB API uses an error code which can be queried after an operation to
- determine what went wrong.
- This programming model does not work with threads, unless specific additional
- guarantees are given by the implementation.
- In addition, even otherwise-independent threads cannot open the same TDB
- (as in 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB-Files-Cannot"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Reachitecting the API to include a tdb_errcode pointer would be a great
- deal of churn
-\change_inserted 0 1298979557
-, but fortunately most functions return 0 on success and -1 on error: we
- can change these to return 0 on success and a negative error code on error,
- and the API remains similar to previous.
- The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA
- pointer and return an error code.
- It is also simpler to have tdb_nextkey replace its key argument in place,
- freeing up any old .dptr.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1298979438
-; we are better to guarantee that the tdb_errcode is per-thread so the current
- programming model can be maintained.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1298979438
-This requires dynamic per-thread allocations, which is awkward with POSIX
- threads (pthread_key_create space is limited and we cannot simply allocate
- a key for every TDB).
-\change_unchanged
-
-\end_layout
-
-\begin_layout Standard
-Internal locking is required to make sure that fcntl locks do not overlap
- between threads, and also that the global list of tdbs is maintained.
-\end_layout
-
-\begin_layout Standard
-The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe
- version of the library, and otherwise no overhead will exist.
- Alternatively, a hooking mechanism similar to that proposed for 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Proposed-Solution-locking-hook"
-
-\end_inset
-
- could be used to enable pthread locking at runtime.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete
-\change_inserted 0 1298979681
-; API has been changed but thread safety has not been implemented.
-\change_deleted 0 1298979669
-.
-\change_unchanged
-
-\end_layout
-
-\begin_layout Subsection
-*_nonblock Functions And *_mark Functions Expose Implementation
-\end_layout
-
-\begin_layout Standard
-CTDB
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-Clustered TDB, see http://ctdb.samba.org
-\end_layout
-
-\end_inset
-
- wishes to operate on TDB in a non-blocking manner.
- This is currently done as follows:
-\end_layout
-
-\begin_layout Enumerate
-Call the _nonblock variant of an API function (eg.
- tdb_lockall_nonblock).
- If this fails:
-\end_layout
-
-\begin_layout Enumerate
-Fork a child process, and wait for it to call the normal variant (eg.
- tdb_lockall).
-\end_layout
-
-\begin_layout Enumerate
-If the child succeeds, call the _mark variant to indicate we already have
- the locks (eg.
- tdb_lockall_mark).
-\end_layout
-
-\begin_layout Enumerate
-Upon completion, tell the child to release the locks (eg.
- tdb_unlockall).
-\end_layout
-
-\begin_layout Enumerate
-Indicate to tdb that it should consider the locks removed (eg.
- tdb_unlockall_mark).
-\end_layout
-
-\begin_layout Standard
-There are several issues with this approach.
- Firstly, adding two new variants of each function clutters the API for
- an obscure use, and so not all functions have three variants.
- Secondly, it assumes that all paths of the functions ask for the same locks,
- otherwise the parent process will have to get a lock which the child doesn't
- have under some circumstances.
- I don't believe this is currently the case, but it constrains the implementatio
-n.
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "Proposed-Solution-locking-hook"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Implement a hook for locking methods, so that the caller can control the
- calls to create and remove fcntl locks.
- In this scenario, ctdbd would operate as follows:
-\end_layout
-
-\begin_layout Enumerate
-Call the normal API function, eg tdb_lockall().
-\end_layout
-
-\begin_layout Enumerate
-When the lock callback comes in, check if the child has the lock.
- Initially, this is always false.
- If so, return 0.
- Otherwise, try to obtain it in non-blocking mode.
- If that fails, return EWOULDBLOCK.
-\end_layout
-
-\begin_layout Enumerate
-Release locks in the unlock callback as normal.
-\end_layout
-
-\begin_layout Enumerate
-If tdb_lockall() fails, see if we recorded a lock failure; if so, call the
- child to repeat the operation.
-\end_layout
-
-\begin_layout Enumerate
-The child records what locks it obtains, and returns that information to
- the parent.
-\end_layout
-
-\begin_layout Enumerate
-When the child has succeeded, goto 1.
-\end_layout
-
-\begin_layout Standard
-This is flexible enough to handle any potential locking scenario, even when
- lock requirements change.
- It can be optimized so that the parent does not release locks, just tells
- the child which locks it doesn't need to obtain.
-\end_layout
-
-\begin_layout Standard
-It also keeps the complexity out of the API, and in ctdbd where it is needed.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-tdb_chainlock Functions Expose Implementation
-\end_layout
-
-\begin_layout Standard
-tdb_chainlock locks some number of records, including the record indicated
- by the given key.
- This gave atomicity guarantees; no-one can start a transaction, alter,
- read or delete that key while the lock is held.
-\end_layout
-
-\begin_layout Standard
-It also makes the same guarantee for any other key in the chain, which is
- an internal implementation detail and potentially a cause for deadlock.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- It would be nice to have an explicit single entry lock which effected no
- other keys.
- Unfortunately, this won't work for an entry which doesn't exist.
- Thus while chainlock may be implemented more efficiently for the existing
- case, it will still have overlap issues with the non-existing case.
- So it is best to keep the current (lack of) guarantee about which records
- will be effected to avoid constraining our implementation.
-\end_layout
-
-\begin_layout Subsection
-Signal Handling is Not Race-Free
-\end_layout
-
-\begin_layout Standard
-The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate
- that the tdb locking code should return with a failure, rather than trying
- again when a signal is received (and errno == EAGAIN).
- This is usually used to implement timeouts.
-\end_layout
-
-\begin_layout Standard
-Unfortunately, this does not work in the case where the signal is received
- before the tdb code enters the fcntl() call to place the lock: the code
- will sleep within the fcntl() code, unaware that the signal wants it to
- exit.
- In the case of long timeouts, this does not happen in practice.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The locking hooks proposed in
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Proposed-Solution-locking-hook"
-
-\end_inset
-
- would allow the user to decide on whether to fail the lock acquisition
- on a signal.
- This allows the caller to choose their own compromise: they could narrow
- the race by checking immediately before the fcntl call.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-It may be possible to make this race-free in some implementations by having
- the signal handler alter the struct flock to make it invalid.
- This will cause the fcntl() lock call to fail with EINVAL if the signal
- occurs before the kernel is entered, otherwise EAGAIN.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-The API Uses Gratuitous Typedefs, Capitals
-\end_layout
-
-\begin_layout Standard
-typedefs are useful for providing source compatibility when types can differ
- across implementations, or arguably in the case of function pointer definitions
- which are hard for humans to parse.
- Otherwise it is simply obfuscation and pollutes the namespace.
-\end_layout
-
-\begin_layout Standard
-Capitalization is usually reserved for compile-time constants and macros.
-\end_layout
-
-\begin_layout Description
-TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the
- definition isn't visible to the API user anyway.
-\end_layout
-
-\begin_layout Description
-TDB_DATA There is no reason to use this over struct TDB_DATA; the struct
- needs to be understood by the API user.
-\end_layout
-
-\begin_layout Description
-struct
-\begin_inset space ~
-\end_inset
-
-TDB_DATA This would normally be called 'struct tdb_data'.
-\end_layout
-
-\begin_layout Description
-enum
-\begin_inset space ~
-\end_inset
-
-TDB_ERROR Similarly, this would normally be enum tdb_error.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- Introducing lower case variants would please pedants like myself, but if
- it were done the existing ones should be kept.
- There is little point forcing a purely cosmetic change upon tdb users.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "tdb_log_func-Doesnt-Take"
-
-\end_inset
-
-tdb_log_func Doesn't Take The Private Pointer
-\end_layout
-
-\begin_layout Standard
-For API compatibility reasons, the logging function needs to call tdb_get_loggin
-g_private() to retrieve the pointer registered by the tdb_open_ex for logging.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-It should simply take an extra argument, since we are prepared to break
- the API/ABI.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Various Callback Functions Are Not Typesafe
-\end_layout
-
-\begin_layout Standard
-The callback functions in tdb_set_logging_function (after 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "tdb_log_func-Doesnt-Take"
-
-\end_inset
-
- is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check
- all take void * and must internally convert it to the argument type they
- were expecting.
-\end_layout
-
-\begin_layout Standard
-If this type changes, the compiler will not produce warnings on the callers,
- since it only sees void *.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-With careful use of macros, we can create callback functions which give
- a warning when used on gcc and the types of the callback and its private
- argument differ.
- Unsupported compilers will not give a warning, which is no worse than now.
- In addition, the callbacks become clearer, as they need not use void *
- for their parameter.
-\end_layout
-
-\begin_layout Standard
-See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic
-\end_layout
-
-\begin_layout Standard
-The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should
- be cleared if the caller discovers it is the only process with the TDB
- open.
- However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not
- be detected, so will have the TDB erased underneath them (usually resulting
- in a crash).
-\end_layout
-
-\begin_layout Standard
-There is a similar issue on fork(); if the parent exits (or otherwise closes
- the tdb) before the child calls tdb_reopen_all() to establish the lock
- used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener
- at that moment will believe it alone has opened the TDB and will erase
- it.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove TDB_CLEAR_IF_FIRST.
- Other workarounds are possible, but see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB_CLEAR_IF_FIRST-Imposes-Performance"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1298979699
-Incomplete, TDB_CLEAR_IF_FIRST still defined, but not implemented.
-\change_inserted 0 1298979700
-Complete.
-\change_unchanged
-
-\end_layout
-
-\begin_layout Subsection
-Extending The Header Is Difficult
-\end_layout
-
-\begin_layout Standard
-We have reserved (zeroed) words in the TDB header, which can be used for
- future features.
- If the future features are compulsory, the version number must be updated
- to prevent old code from accessing the database.
- But if the future feature is optional, we have no way of telling if older
- code is accessing the database or not.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The header should contain a 
-\begin_inset Quotes eld
-\end_inset
-
-format variant
-\begin_inset Quotes erd
-\end_inset
-
- value (64-bit).
- This is divided into two 32-bit parts:
-\end_layout
-
-\begin_layout Enumerate
-The lower part reflects the format variant understood by code accessing
- the database.
-\end_layout
-
-\begin_layout Enumerate
-The upper part reflects the format variant you must understand to write
- to the database (otherwise you can only open for reading).
-\end_layout
-
-\begin_layout Standard
-The latter field can only be written at creation time, the former should
- be written under the OPEN_LOCK when opening the database for writing, if
- the variant of the code is lower than the current lowest variant.
-\end_layout
-
-\begin_layout Standard
-This should allow backwards-compatible features to be added, and detection
- if older code (which doesn't understand the feature) writes to the database.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-Record Headers Are Not Expandible
-\end_layout
-
-\begin_layout Standard
-If we later want to add (say) checksums on keys and data, it would require
- another format change, which we'd like to avoid.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We often have extra padding at the tail of a record.
- If we ensure that the first byte (if any) of this padding is zero, we will
- have a way for future changes to detect code which doesn't understand a
- new format: the new code would write (say) a 1 at the tail, and thus if
- there is no tail or the first byte is 0, we would know the extension is
- not present on that record.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete.
-\end_layout
-
-\begin_layout Subsection
-TDB Does Not Use Talloc
-\end_layout
-
-\begin_layout Standard
-Many users of TDB (particularly Samba) use the talloc allocator, and thus
- have to wrap TDB in a talloc context to use it conveniently.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The allocation within TDB is not complicated enough to justify the use of
- talloc, and I am reluctant to force another (excellent) library on TDB
- users.
- Nonetheless a compromise is possible.
- An attribute (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "attributes"
-
-\end_inset
-
-) can be added later to tdb_open() to provide an alternate allocation mechanism,
- specifically for talloc but usable by any other allocator (which would
- ignore the 
-\begin_inset Quotes eld
-\end_inset
-
-context
-\begin_inset Quotes erd
-\end_inset
-
- argument).
-\end_layout
-
-\begin_layout Standard
-This would form a talloc heirarchy as expected, but the caller would still
- have to attach a destructor to the tdb context returned from tdb_open to
- close it.
- All TDB_DATA fields would be children of the tdb_context, and the caller
- would still have to manage them (using talloc_free() or talloc_steal()).
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Section
-Performance And Scalability Issues
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB_CLEAR_IF_FIRST-Imposes-Performance"
-
-\end_inset
-
-TDB_CLEAR_IF_FIRST Imposes Performance Penalty
-\end_layout
-
-\begin_layout Standard
-When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset
- 4 (aka.
- the ACTIVE_LOCK).
- While these locks never conflict in normal tdb usage, they do add substantial
- overhead for most fcntl lock implementations when the kernel scans to detect
- if a lock conflict exists.
- This is often a single linked list, making the time to acquire and release
- a fcntl lock O(N) where N is the number of processes with the TDB open,
- not the number actually doing work.
-\end_layout
-
-\begin_layout Standard
-In a Samba server it is common to have huge numbers of clients sitting idle,
- and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-There is a flag to tdb_reopen_all() which is used for this optimization:
- if the parent process will outlive the child, the child does not need the
- ACTIVE_LOCK.
- This is a workaround for this very performance issue.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove the flag.
- It was a neat idea, but even trivial servers tend to know when they are
- initializing for the first time and can simply unlink the old tdb at that
- point.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1298979837
-Incomplete; TDB_CLEAR_IF_FIRST still defined, but does nothing.
-\change_inserted 0 1298979837
-Complete.
-\change_unchanged
-
-\end_layout
-
-\begin_layout Subsection
-TDB Files Have a 4G Limit
-\end_layout
-
-\begin_layout Standard
-This seems to be becoming an issue (so much for 
-\begin_inset Quotes eld
-\end_inset
-
-trivial
-\begin_inset Quotes erd
-\end_inset
-
-!), particularly for ldb.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-A new, incompatible TDB format which uses 64 bit offsets internally rather
- than 32 bit as now.
- For simplicity of endian conversion (which TDB does on the fly if required),
- all values will be 64 bit on disk.
- In practice, some upper bits may be used for other purposes, but at least
- 56 bits will be available for file offsets.
-\end_layout
-
-\begin_layout Standard
-tdb_open() will automatically detect the old version, and even create them
- if TDB_VERSION6 is specified to tdb_open.
-\end_layout
-
-\begin_layout Standard
-32 bit processes will still be able to access TDBs larger than 4G (assuming
- that their off_t allows them to seek to 64 bits), they will gracefully
- fall back as they fail to mmap.
- This can happen already with large TDBs.
-\end_layout
-
-\begin_layout Standard
-Old versions of tdb will fail to open the new TDB files (since 28 August
- 2009, commit 398d0c29290: prior to that any unrecognized file format would
- be erased and initialized as a fresh tdb!)
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Records Have a 4G Limit
-\end_layout
-
-\begin_layout Standard
-This has not been a reported problem, and the API uses size_t which can
- be 64 bit on 64 bit platforms.
- However, other limits may have made such an issue moot.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Record sizes will be 64 bit, with an error returned on 32 bit platforms
- which try to access such records (the current implementation would return
- TDB_ERR_OOM in a similar case).
- It seems unlikely that 32 bit keys will be a limitation, so the implementation
- may not support this (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Records-Incur-A"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Hash Size Is Determined At TDB Creation Time
-\end_layout
-
-\begin_layout Standard
-TDB contains a number of hash chains in the header; the number is specified
- at creation time, and defaults to 131.
- This is such a bottleneck on large databases (as each hash chain gets quite
- long), that LDB uses 10,000 for this hash.
- In general it is impossible to know what the 'right' answer is at database
- creation time.
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:Hash-Size-Solution"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-After comprehensive performance testing on various scalable hash variants
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying
- because I was previously convinced that an expanding tree of hashes would
- be very close to optimal.
-\end_layout
-
-\end_inset
-
-, it became clear that it is hard to beat a straight linear hash table which
- doubles in size when it reaches saturation.
- Unfortunately, altering the hash table introduces serious locking complications
-: the entire hash table needs to be locked to enlarge the hash table, and
- others might be holding locks.
- Particularly insidious are insertions done under tdb_chainlock.
-\end_layout
-
-\begin_layout Standard
-Thus an expanding layered hash will be used: an array of hash groups, with
- each hash group exploding into pointers to lower hash groups once it fills,
- turning into a hash tree.
- This has implications for locking: we must lock the entire group in case
- we need to expand it, yet we don't know how deep the tree is at that point.
-\end_layout
-
-\begin_layout Standard
-Note that bits from the hash table entries should be stolen to hold more
- hash bits to reduce the penalty of collisions.
- We can use the otherwise-unused lower 3 bits.
- If we limit the size of the database to 64 exabytes, we can use the top
- 8 bits of the hash entry as well.
- These 11 bits would reduce false positives down to 1 in 2000 which is more
- than we need: we can use one of the bits to indicate that the extra hash
- bits are valid.
- This means we can choose not to re-hash all entries when we expand a hash
- group; simply use the next bits we need and mark them invalid.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB-Freelist-Is"
-
-\end_inset
-
-TDB Freelist Is Highly Contended
-\end_layout
-
-\begin_layout Standard
-TDB uses a single linked list for the free list.
- Allocation occurs as follows, using heuristics which have evolved over
- time:
-\end_layout
-
-\begin_layout Enumerate
-Get the free list lock for this whole operation.
-\end_layout
-
-\begin_layout Enumerate
-Multiply length by 1.25, so we always over-allocate by 25%.
-\end_layout
-
-\begin_layout Enumerate
-Set the slack multiplier to 1.
-\end_layout
-
-\begin_layout Enumerate
-Examine the current freelist entry: if it is > length but < the current
- best case, remember it as the best case.
-\end_layout
-
-\begin_layout Enumerate
-Multiply the slack multiplier by 1.05.
-\end_layout
-
-\begin_layout Enumerate
-If our best fit so far is less than length * slack multiplier, return it.
- The slack will be turned into a new free record if it's large enough.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, go onto the next freelist entry.
-\end_layout
-
-\begin_layout Standard
-Deleting a record occurs as follows:
-\end_layout
-
-\begin_layout Enumerate
-Lock the hash chain for this whole operation.
-\end_layout
-
-\begin_layout Enumerate
-Walk the chain to find the record, keeping the prev pointer offset.
-\end_layout
-
-\begin_layout Enumerate
-If max_dead is non-zero:
-\end_layout
-
-\begin_deeper
-\begin_layout Enumerate
-Walk the hash chain again and count the dead records.
-\end_layout
-
-\begin_layout Enumerate
-If it's more than max_dead, bulk free all the dead ones (similar to steps
- 4 and below, but the lock is only obtained once).
-\end_layout
-
-\begin_layout Enumerate
-Simply mark this record as dead and return.
-\end_layout
-
-\end_deeper
-\begin_layout Enumerate
-Get the free list lock for the remainder of this operation.
-\end_layout
-
-\begin_layout Enumerate
-\begin_inset CommandInset label
-LatexCommand label
-name "right-merging"
-
-\end_inset
-
-Examine the following block to see if it is free; if so, enlarge the current
- block and remove that block from the free list.
- This was disabled, as removal from the free list was O(entries-in-free-list).
-\end_layout
-
-\begin_layout Enumerate
-Examine the preceeding block to see if it is free: for this reason, each
- block has a 32-bit tailer which indicates its length.
- If it is free, expand it to cover our new block and return.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, prepend ourselves to the free list.
-\end_layout
-
-\begin_layout Standard
-Disabling right-merging (step 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "right-merging"
-
-\end_inset
-
-) causes fragmentation; the other heuristics proved insufficient to address
- this, so the final answer to this was that when we expand the TDB file
- inside a transaction commit, we repack the entire tdb.
-\end_layout
-
-\begin_layout Standard
-The single list lock limits our allocation rate; due to the other issues
- this is not currently seen as a bottleneck.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The first step is to remove all the current heuristics, as they obviously
- interact, then examine them once the lock contention is addressed.
-\end_layout
-
-\begin_layout Standard
-The free list must be split to reduce contention.
- Assuming perfect free merging, we can at most have 1 free list entry for
- each entry.
- This implies that the number of free lists is related to the size of the
- hash table, but as it is rare to walk a large number of free list entries
- we can use far fewer, say 1/32 of the number of hash buckets.
-\end_layout
-
-\begin_layout Standard
-It seems tempting to try to reuse the hash implementation which we use for
- records here, but we have two ways of searching for free entries: for allocatio
-n we search by size (and possibly zone) which produces too many clashes
- for our hash table to handle well, and for coalescing we search by address.
- Thus an array of doubly-linked free lists seems preferable.
-\end_layout
-
-\begin_layout Standard
-There are various benefits in using per-size free lists (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-) but it's not clear this would reduce contention in the common case where
- all processes are allocating/freeing the same size.
- Thus we almost certainly need to divide in other ways: the most obvious
- is to divide the file into zones, and using a free list (or table of free
- lists) for each.
- This approximates address ordering.
-\end_layout
-
-\begin_layout Standard
-Unfortunately it is difficult to know what heuristics should be used to
- determine zone sizes, and our transaction code relies on being able to
- create a 
-\begin_inset Quotes eld
-\end_inset
-
-recovery area
-\begin_inset Quotes erd
-\end_inset
-
- by simply appending to the file (difficult if it would need to create a
- new zone header).
- Thus we use a linked-list of free tables; currently we only ever create
- one, but if there is more than one we choose one at random to use.
- In future we may use heuristics to add new free tables on contention.
- We only expand the file when all free tables are exhausted.
-\end_layout
-
-\begin_layout Standard
-The basic algorithm is as follows.
- Freeing is simple:
-\end_layout
-
-\begin_layout Enumerate
-Identify the correct free list.
-\end_layout
-
-\begin_layout Enumerate
-Lock the corresponding list.
-\end_layout
-
-\begin_layout Enumerate
-Re-check the list (we didn't have a lock, sizes could have changed): relock
- if necessary.
-\end_layout
-
-\begin_layout Enumerate
-Place the freed entry in the list.
-\end_layout
-
-\begin_layout Standard
-Allocation is a little more complicated, as we perform delayed coalescing
- at this point:
-\end_layout
-
-\begin_layout Enumerate
-Pick a free table; usually the previous one.
-\end_layout
-
-\begin_layout Enumerate
-Lock the corresponding list.
-\end_layout
-
-\begin_layout Enumerate
-If the top entry is -large enough, remove it from the list and return it.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, coalesce entries in the list.If there was no entry large enough,
- unlock the list and try the next largest list
-\end_layout
-
-\begin_layout Enumerate
-If no list has an entry which meets our needs, try the next free table.
-\end_layout
-
-\begin_layout Enumerate
-If no zone satisfies, expand the file.
-\end_layout
-
-\begin_layout Standard
-This optimizes rapid insert/delete of free list entries by not coalescing
- them all the time..
- First-fit address ordering ordering seems to be fairly good for keeping
- fragmentation low (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-).
- Note that address ordering does not need a tailer to coalesce, though if
- we needed one we could have one cheaply: see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Records-Incur-A"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-Each free entry has the free table number in the header: less than 255.
- It also contains a doubly-linked list for easy deletion.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-TDB Becomes Fragmented
-\end_layout
-
-\begin_layout Standard
-Much of this is a result of allocation strategy
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute
-xas.edu/pub/garbage/malloc/ismm98.ps
-\end_layout
-
-\end_inset
-
- and deliberate hobbling of coalescing; internal fragmentation (aka overallocati
-on) is deliberately set at 25%, and external fragmentation is only cured
- by the decision to repack the entire db when a transaction commit needs
- to enlarge the file.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The 25% overhead on allocation works in practice for ldb because indexes
- tend to expand by one record at a time.
- This internal fragmentation can be resolved by having an 
-\begin_inset Quotes eld
-\end_inset
-
-expanded
-\begin_inset Quotes erd
-\end_inset
-
- bit in the header to note entries that have previously expanded, and allocating
- more space for them.
-\end_layout
-
-\begin_layout Standard
-There are is a spectrum of possible solutions for external fragmentation:
- one is to use a fragmentation-avoiding allocation strategy such as best-fit
- address-order allocator.
- The other end of the spectrum would be to use a bump allocator (very fast
- and simple) and simply repack the file when we reach the end.
-\end_layout
-
-\begin_layout Standard
-There are three problems with efficient fragmentation-avoiding allocators:
- they are non-trivial, they tend to use a single free list for each size,
- and there's no evidence that tdb allocation patterns will match those recorded
- for general allocators (though it seems likely).
-\end_layout
-
-\begin_layout Standard
-Thus we don't spend too much effort on external fragmentation; we will be
- no worse than the current code if we need to repack on occasion.
- More effort is spent on reducing freelist contention, and reducing overhead.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:Records-Incur-A"
-
-\end_inset
-
-Records Incur A 28-Byte Overhead
-\end_layout
-
-\begin_layout Standard
-Each TDB record has a header as follows:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_record {
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_off_t next; /* offset of the next record in the list */
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_len_t rec_len; /* total byte length of record */
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_len_t key_len; /* byte length of key */
-\end_layout
-
-\begin_layout LyX-Code
-        tdb_len_t data_len; /* byte length of data */
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t full_hash; /* the full 32 bit hash of the key */
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t magic;   /* try to catch errors */
-\end_layout
-
-\begin_layout LyX-Code
-        /* the following union is implied:
-\end_layout
-
-\begin_layout LyX-Code
-                union {
-\end_layout
-
-\begin_layout LyX-Code
-                        char record[rec_len];
-\end_layout
-
-\begin_layout LyX-Code
-                        struct {
-\end_layout
-
-\begin_layout LyX-Code
-                                char key[key_len];
-\end_layout
-
-\begin_layout LyX-Code
-                                char data[data_len];
-\end_layout
-
-\begin_layout LyX-Code
-                        }
-\end_layout
-
-\begin_layout LyX-Code
-                        uint32_t totalsize; (tailer)
-\end_layout
-
-\begin_layout LyX-Code
-                }
-\end_layout
-
-\begin_layout LyX-Code
-        */
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-Naively, this would double to a 56-byte overhead on a 64 bit implementation.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We can use various techniques to reduce this for an allocated block:
-\end_layout
-
-\begin_layout Enumerate
-The 'next' pointer is not required, as we are using a flat hash table.
-\end_layout
-
-\begin_layout Enumerate
-'rec_len' can instead be expressed as an addition to key_len and data_len
- (it accounts for wasted or overallocated length in the record).
- Since the record length is always a multiple of 8, we can conveniently
- fit it in 32 bits (representing up to 35 bits).
-\end_layout
-
-\begin_layout Enumerate
-'key_len' and 'data_len' can be reduced.
- I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine
- the two into one 64-bit field and using a 5 bit value which indicates at
- what bit to divide the two.
- Keys are unlikely to scale as fast as data, so I'm assuming a maximum key
- size of 32 bits.
-\end_layout
-
-\begin_layout Enumerate
-'full_hash' is used to avoid a memcmp on the 
-\begin_inset Quotes eld
-\end_inset
-
-miss
-\begin_inset Quotes erd
-\end_inset
-
- case, but this is diminishing returns after a handful of bits (at 10 bits,
- it reduces 99.9% of false memcmp).
- As an aside, as the lower bits are already incorporated in the hash table
- resolution, the upper bits should be used here.
- Note that it's not clear that these bits will be a win, given the extra
- bits in the hash table itself (see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Hash-Size-Solution"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Enumerate
-'magic' does not need to be enlarged: it currently reflects one of 5 values
- (used, free, dead, recovery, and unused_recovery).
- It is useful for quick sanity checking however, and should not be eliminated.
-\end_layout
-
-\begin_layout Enumerate
-'tailer' is only used to coalesce free blocks (so a block to the right can
- find the header to check if this block is free).
- This can be replaced by a single 'free' bit in the header of the following
- block (and the tailer only exists in free blocks).
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-This technique from Thomas Standish.
- Data Structure Techniques.
- Addison-Wesley, Reading, Massachusetts, 1980.
-\end_layout
-
-\end_inset
-
- The current proposed coalescing algorithm doesn't need this, however.
-\end_layout
-
-\begin_layout Standard
-This produces a 16 byte used header like this:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_used_record {
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t used_magic : 16,
-\end_layout
-
-\begin_layout LyX-Code
-
-\end_layout
-
-\begin_layout LyX-Code
-                 key_data_divide: 5,
-\end_layout
-
-\begin_layout LyX-Code
-                 top_hash: 11;
-\end_layout
-
-\begin_layout LyX-Code
-        uint32_t extra_octets;
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t key_and_data_len;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-And a free record like this:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_free_record {
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t free_magic: 8,
-\end_layout
-
-\begin_layout LyX-Code
-                   prev : 56;
-\end_layout
-
-\begin_layout LyX-Code
-
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t free_table: 8,
-\end_layout
-
-\begin_layout LyX-Code
-                 total_length : 56
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t next;;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1291206079
-\change_unchanged
-Note that by limiting valid offsets to 56 bits, we can pack everything we
- need into 3 64-byte words, meaning our minimum record size is 8 bytes.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Transaction Commit Requires 4 fdatasync
-\end_layout
-
-\begin_layout Standard
-The current transaction algorithm is:
-\end_layout
-
-\begin_layout Enumerate
-write_recovery_data();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-write_recovery_header();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-overwrite_with_new_data();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-remove_recovery_header();
-\end_layout
-
-\begin_layout Enumerate
-sync(); 
-\end_layout
-
-\begin_layout Standard
-On current ext3, each sync flushes all data to disk, so the next 3 syncs
- are relatively expensive.
- But this could become a performance bottleneck on other filesystems such
- as ext4.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Neil Brown points out that this is overzealous, and only one sync is needed:
-\end_layout
-
-\begin_layout Enumerate
-Bundle the recovery data, a transaction counter and a strong checksum of
- the new data.
-\end_layout
-
-\begin_layout Enumerate
-Strong checksum that whole bundle.
-\end_layout
-
-\begin_layout Enumerate
-Store the bundle in the database.
-\end_layout
-
-\begin_layout Enumerate
-Overwrite the oldest of the two recovery pointers in the header (identified
- using the transaction counter) with the offset of this bundle.
-\end_layout
-
-\begin_layout Enumerate
-sync.
-\end_layout
-
-\begin_layout Enumerate
-Write the new data to the file.
-\end_layout
-
-\begin_layout Standard
-Checking for recovery means identifying the latest bundle with a valid checksum
- and using the new data checksum to ensure that it has been applied.
- This is more expensive than the current check, but need only be done at
- open.
- For running databases, a separate header field can be used to indicate
- a transaction in progress; we need only check for recovery if this is set.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:TDB-Does-Not"
-
-\end_inset
-
-TDB Does Not Have Snapshot Support
-\end_layout
-
-\begin_layout Subsubsection
-Proposed SolutionNone.
- At some point you say 
-\begin_inset Quotes eld
-\end_inset
-
-use a real database
-\begin_inset Quotes erd
-\end_inset
-
- (but see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "replay-attribute"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Standard
-But as a thought experiment, if we implemented transactions to only overwrite
- free entries (this is tricky: there must not be a header in each entry
- which indicates whether it is free, but use of presence in metadata elsewhere),
- and a pointer to the hash table, we could create an entirely new commit
- without destroying existing data.
- Then it would be easy to implement snapshots in a similar way.
-\end_layout
-
-\begin_layout Standard
-This would not allow arbitrary changes to the database, such as tdb_repack
- does, and would require more space (since we have to preserve the current
- and future entries at once).
- If we used hash trees rather than one big hash table, we might only have
- to rewrite some sections of the hash, too.
-\end_layout
-
-\begin_layout Standard
-We could then implement snapshots using a similar method, using multiple
- different hash tables/free tables.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-Transactions Cannot Operate in Parallel
-\end_layout
-
-\begin_layout Standard
-This would be useless for ldb, as it hits the index records with just about
- every update.
- It would add significant complexity in resolving clashes, and cause the
- all transaction callers to write their code to loop in the case where the
- transactions spuriously failed.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None (but see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "replay-attribute"
-
-\end_inset
-
-).
- We could solve a small part of the problem by providing read-only transactions.
- These would allow one write transaction to begin, but it could not commit
- until all r/o transactions are done.
- This would require a new RO_TRANSACTION_LOCK, which would be upgraded on
- commit.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-Default Hash Function Is Suboptimal
-\end_layout
-
-\begin_layout Standard
-The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially
- if we expand it to 64 bits), and works best when the hash bucket size is
- a prime number (which also means a slow modulus).
- In addition, it is highly predictable which could potentially lead to a
- Denial of Service attack in some TDB uses.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The Jenkins lookup3 hash
-\begin_inset Foot
-status open
-
-\begin_layout Plain Layout
-http://burtleburtle.net/bob/c/lookup3.c
-\end_layout
-
-\end_inset
-
- is a fast and superbly-mixing hash.
- It's used by the Linux kernel and almost everything else.
- This has the particular properties that it takes an initial seed, and produces
- two 32 bit hash numbers, which we can combine into a 64-bit hash.
-\end_layout
-
-\begin_layout Standard
-The seed should be created at tdb-creation time from some random source,
- and placed in the header.
- This is far from foolproof, but adds a little bit of protection against
- hash bombing.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "Reliable-Traversal-Adds"
-
-\end_inset
-
-Reliable Traversal Adds Complexity
-\end_layout
-
-\begin_layout Standard
-We lock a record during traversal iteration, and try to grab that lock in
- the delete code.
- If that grab on delete fails, we simply mark it deleted and continue onwards;
- traversal checks for this condition and does the delete when it moves off
- the record.
-\end_layout
-
-\begin_layout Standard
-If traversal terminates, the dead record may be left indefinitely.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove reliability guarantees; see 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "traverse-Proposed-Solution"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Fcntl Locking Adds Overhead
-\end_layout
-
-\begin_layout Standard
-Placing a fcntl lock means a system call, as does removing one.
- This is actually one reason why transactions can be faster (everything
- is locked once at transaction start).
- In the uncontended case, this overhead can theoretically be eliminated.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
-\end_layout
-
-\begin_layout Standard
-We tried this before with spinlock support, in the early days of TDB, and
- it didn't make much difference except in manufactured benchmarks.
-\end_layout
-
-\begin_layout Standard
-We could use spinlocks (with futex kernel support under Linux), but it means
- that we lose automatic cleanup when a process dies with a lock.
- There is a method of auto-cleanup under Linux, but it's not supported by
- other operating systems.
- We could reintroduce a clear-if-first-style lock and sweep for dead futexes
- on open, but that wouldn't help the normal case of one concurrent opener
- dying.
- Increasingly elaborate repair schemes could be considered, but they require
- an ABI change (everyone must use them) anyway, so there's no need to do
- this at the same time as everything else.
-\end_layout
-
-\begin_layout Subsection
-Some Transactions Don't Require Durability
-\end_layout
-
-\begin_layout Standard
-Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast)
- usage, and occasionally empties the results into a transactional TDB.
- This kind of usage prioritizes performance over durability: as long as
- we are consistent, data can be lost.
-\end_layout
-
-\begin_layout Standard
-This would be more neatly implemented inside tdb: a 
-\begin_inset Quotes eld
-\end_inset
-
-soft
-\begin_inset Quotes erd
-\end_inset
-
- transaction commit (ie.
- syncless) which meant that data may be reverted on a crash.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
-\end_layout
-
-\begin_layout Standard
-Unfortunately any transaction scheme which overwrites old data requires
- a sync before that overwrite to avoid the possibility of corruption.
-\end_layout
-
-\begin_layout Standard
-It seems possible to use a scheme similar to that described in 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Does-Not"
-
-\end_inset
-
-,where transactions are committed without overwriting existing data, and
- an array of top-level pointers were available in the header.
- If the transaction is 
-\begin_inset Quotes eld
-\end_inset
-
-soft
-\begin_inset Quotes erd
-\end_inset
-
- then we would not need a sync at all: existing processes would pick up
- the new hash table and free list and work with that.
-\end_layout
-
-\begin_layout Standard
-At some later point, a sync would allow recovery of the old data into the
- free lists (perhaps when the array of top-level pointers filled).
- On crash, tdb_open() would examine the array of top levels, and apply the
- transactions until it encountered an invalid checksum.
-\end_layout
-
-\begin_layout Subsection
-Tracing Is Fragile, Replay Is External
-\end_layout
-
-\begin_layout Standard
-The current TDB has compile-time-enabled tracing code, but it often breaks
- as it is not enabled by default.
- In a similar way, the ctdb code has an external wrapper which does replay
- tracing so it can coordinate cluster-wide transactions.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\begin_inset CommandInset label
-LatexCommand label
-name "replay-attribute"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Tridge points out that an attribute can be later added to tdb_open (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "attributes"
-
-\end_inset
-
-) to provide replay/trace hooks, which could become the basis for this and
- future parallel transactions and snapshot support.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\end_body
-\end_document
-@
-
-
-1.12
-log
-@Add status, some fixes, linked freelists.
-@
-text
-@d53 1
-a53 7
-
-\change_deleted 0 1291204535
-14-September
-\change_inserted 0 1291204533
-1-December
-\change_unchanged
--2010
-a580 2
-\change_inserted 0 1291204563
-
-a583 2
-
-\change_inserted 0 1291204572
-a587 2
-
-\change_inserted 0 1291204573
-a588 2
-\change_unchanged
-
-a629 2
-\change_inserted 0 1291204588
-
-a632 2
-
-\change_inserted 0 1291204588
-a636 2
-
-\change_inserted 0 1291204631
-a639 2
-\change_unchanged
-
-a693 2
-\change_inserted 0 1291204639
-
-a696 2
-
-\change_inserted 0 1291204640
-d702 1
-a702 1
-\change_inserted 0 1291204665
-d704 2
-a728 2
-\change_inserted 0 1291204671
-
-a731 2
-
-\change_inserted 0 1291204671
-a735 2
-
-\change_inserted 0 1291204673
-a736 2
-\change_unchanged
-
-a780 2
-\change_inserted 0 1291204731
-
-a783 2
-
-\change_inserted 0 1291204732
-a787 2
-
-\change_inserted 0 1291204779
-a790 2
-\change_unchanged
-
-a842 2
-\change_inserted 0 1291204830
-
-a845 2
-
-\change_inserted 0 1291204831
-a849 2
-
-\change_inserted 0 1291204834
-a850 2
-\change_unchanged
-
-d879 9
-a887 2
- deal of churn; we are better to guarantee that the tdb_errcode is per-thread
- so the current programming model can be maintained.
-d891 9
-d903 2
-a922 2
-\change_inserted 0 1291204847
-
-a925 2
-
-\change_inserted 0 1291204847
-d930 5
-a934 3
-
-\change_inserted 0 1291204852
-Incomplete.
-a1051 2
-\change_inserted 0 1291204881
-
-a1054 2
-
-\change_inserted 0 1291204881
-a1058 2
-
-\change_inserted 0 1291204885
-a1059 2
-\change_unchanged
-
-a1140 2
-\change_inserted 0 1291204898
-
-a1143 2
-
-\change_inserted 0 1291204898
-a1147 2
-
-\change_inserted 0 1291204901
-a1148 2
-\change_unchanged
-
-a1224 2
-\change_inserted 0 1291204908
-
-a1227 2
-
-\change_inserted 0 1291204908
-a1231 2
-
-\change_inserted 0 1291204908
-a1232 2
-\change_unchanged
-
-a1271 2
-\change_inserted 0 1291204917
-
-a1274 2
-
-\change_inserted 0 1291204917
-a1278 2
-
-\change_inserted 0 1291204920
-a1279 2
-\change_unchanged
-
-a1316 2
-\change_inserted 0 1291204927
-
-a1319 2
-
-\change_inserted 0 1291204928
-d1325 1
-a1325 1
-\change_inserted 0 1291204942
-d1327 2
-a1381 2
-\change_inserted 0 1291205003
-
-a1384 2
-
-\change_inserted 0 1291205004
-a1388 2
-
-\change_inserted 0 1291205007
-a1411 2
-\change_inserted 0 1291205019
-
-a1414 2
-
-\change_inserted 0 1291205019
-a1418 2
-
-\change_inserted 0 1291205023
-a1419 2
-\change_unchanged
-
-a1465 2
-\change_inserted 0 1291205029
-
-a1468 2
-
-\change_inserted 0 1291205029
-a1472 2
-
-\change_inserted 0 1291206020
-a1473 2
-\change_unchanged
-
-a1528 2
-\change_inserted 0 1291205043
-
-a1531 2
-
-\change_inserted 0 1291205043
-d1537 1
-a1537 1
-\change_inserted 0 1291205057
-d1539 2
-a1589 2
-\change_inserted 0 1291205062
-
-a1592 2
-
-\change_inserted 0 1291205062
-a1596 2
-
-\change_inserted 0 1291205062
-a1597 2
-\change_unchanged
-
-a1626 2
-\change_inserted 0 1291205072
-
-a1629 2
-
-\change_inserted 0 1291205073
-a1633 2
-
-\change_inserted 0 1291205073
-a1634 2
-\change_unchanged
-
-a1674 4
-
-\change_deleted 0 1291204504
-\change_unchanged
-a1699 2
-\change_inserted 0 1291205079
-
-a1702 2
-
-\change_inserted 0 1291205080
-a1706 2
-
-\change_inserted 0 1291205080
-a1707 2
-\change_unchanged
-
-a1833 2
-\change_inserted 0 1291205090
-
-d1869 2
-a1870 7
- is to divide the file into zones, and using a free list (or 
-\change_inserted 0 1291205498
-table
-\change_deleted 0 1291205497
-set
-\change_unchanged
- of free lists) for each.
-a1871 2
-\change_inserted 0 1291205203
-
-a1874 2
-
-\change_inserted 0 1291205358
-a1890 21
-\change_unchanged
-
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1291205198
-Note that this means we need to split the free lists when we expand the
- file; this is probably acceptable when we double the hash table size, since
- that is such an expensive operation already.
- In the case of increasing the file size, there is an optimization we can
- use: if we use M in the formula above as the file size rounded up to the
- next power of 2, we only need reshuffle free lists when the file size crosses
- a power of 2 boundary, 
-\emph on
-and 
-\emph default
-reshuffling the free lists is trivial: we simply merge every consecutive
- pair of free lists.
-\change_unchanged
-
-d1899 1
-a1899 7
-Identify the correct 
-\change_inserted 0 1291205366
-free list
-\change_deleted 0 1291205364
-zone
-\change_unchanged
-.
-d1907 2
-a1908 7
-Re-check the 
-\change_inserted 0 1291205372
-list
-\change_deleted 0 1291205371
-zone
-\change_unchanged
- (we didn't have a lock, sizes could have changed): relock if necessary.
-d1912 1
-a1912 5
-Place the freed entry in the list
-\change_deleted 0 1291205382
- for that zone
-\change_unchanged
-.
-d1921 1
-a1921 15
-Pick a 
-\change_deleted 0 1291205403
-zone either the zone we last freed into, or based on a 
-\begin_inset Quotes eld
-\end_inset
-
-random
-\begin_inset Quotes erd
-\end_inset
-
- number.
-\change_inserted 0 1291205411
-free table; usually the previous one.
-\change_unchanged
-
-a1925 10
-\change_deleted 0 1291205432
-
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1291205428
-Re-check the zone: relock if necessary.
-\change_unchanged
-
-d1934 1
-a1934 7
- unlock the list and try the next 
-\change_inserted 0 1291205455
-largest list
-\change_deleted 0 1291205452
-zone.
-\change_inserted 0 1291205457
-
-a1937 2
-
-\change_inserted 0 1291205476
-a1938 2
-\change_unchanged
-
-a1966 2
-\change_inserted 0 1291205542
-
-a1969 2
-
-\change_inserted 0 1291205591
-a1971 70
-\change_unchanged
-
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1291205539
-I anticipate that the number of entries in each free zone would be small,
- but it might be worth using one free entry to hold pointers to the others
- for cache efficiency.
-\change_unchanged
-
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1291205534
-\begin_inset CommandInset label
-LatexCommand label
-name "freelist-in-zone"
-
-\end_inset
-
-If we want to avoid locking complexity (enlarging the free lists when we
- enlarge the file) we could place the array of free lists at the beginning
- of each zone.
- This means existing array lists never move, but means that a record cannot
- be larger than a zone.
- That in turn implies that zones should be variable sized (say, power of
- 2), which makes the question 
-\begin_inset Quotes eld
-\end_inset
-
-what zone is this record in?
-\begin_inset Quotes erd
-\end_inset
-
- much harder (and 
-\begin_inset Quotes eld
-\end_inset
-
-pick a random zone
-\begin_inset Quotes erd
-\end_inset
-
-, but that's less common).
- It could be done with as few as 4 bits from the record header.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-Using 
-\begin_inset Formula $2^{16+N*3}$
-\end_inset
-
-means 0 gives a minimal 65536-byte zone, 15 gives the maximal 
-\begin_inset Formula $2^{61}$
-\end_inset
-
- byte zone.
- Zones range in factor of 8 steps.
- Given the zone size for the zone the current record is in, we can determine
- the start of the zone.
-\end_layout
-
-\end_inset
-
-
-\change_inserted 0 1291205139
-
-d2218 1
-a2218 5
-        uint32_t 
-\change_inserted 0 1291205758
-used_
-\change_unchanged
-magic : 16,
-a2222 4
-\change_deleted 0 1291205693
-                 prev_is_free: 1,
-\change_unchanged
-
-d2230 1
-a2230 7
-                 top_hash: 1
-\change_inserted 0 1291205704
-1
-\change_deleted 0 1291205704
-0
-\change_unchanged
-;
-d2254 1
-a2254 9
-        uint
-\change_inserted 0 1291205725
-64
-\change_deleted 0 1291205723
-32
-\change_unchanged
-_t 
-\change_inserted 0 1291205753
-free_magic: 8,
-a2257 2
-
-\change_inserted 0 1291205746
-a2262 24
-\change_deleted 0 1291205749
-free_magic;
-\change_unchanged
-
-\end_layout
-
-\begin_layout LyX-Code
-        uint64_t 
-\change_inserted 0 1291205786
-free_table: 8,
-\end_layout
-
-\begin_layout LyX-Code
-
-\change_inserted 0 1291205788
-                 
-\change_unchanged
-total_length
-\change_inserted 0 1291205792
- : 56
-\change_deleted 0 1291205790
-;
-\change_unchanged
-
-d2266 1
-a2266 7
-        uint64_t 
-\change_deleted 0 1291205801
-prev, 
-\change_unchanged
-next;
-\change_deleted 0 1291205811
-
-d2270 1
-a2270 3
-
-\change_deleted 0 1291205811
-        ...
-d2274 1
-a2274 5
-
-\change_deleted 0 1291205808
-        uint64_t tailer
-\change_unchanged
-;
-d2283 5
-a2287 16
-\change_deleted 0 1291205827
-We might want to take some bits from the used record's top_hash (and the
- free record which has 32 bits of padding to spare anyway) if we use variable
- sized zones.
- See 
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "freelist-in-zone"
-
-\end_inset
-
-.
-
-\change_inserted 0 1291205885
- Note that by limiting valid offsets to 56 bits, we can pack everything
- we need into 3 64-byte words, meaning our minimum record size is 8 bytes.
-a2290 2
-
-\change_inserted 0 1291205886
-a2294 2
-
-\change_inserted 0 1291205886
-a2295 2
-\change_unchanged
-
-a2385 2
-\change_inserted 0 1291205894
-
-a2388 2
-
-\change_inserted 0 1291205894
-a2392 2
-
-\change_inserted 0 1291205902
-a2393 2
-\change_unchanged
-
-a2415 4
-
-\change_deleted 0 1291204504
-\change_unchanged
-a2445 2
-\change_inserted 0 1291205910
-
-a2448 2
-
-\change_inserted 0 1291205910
-a2452 2
-
-\change_inserted 0 1291205914
-a2453 2
-\change_unchanged
-
-a2485 2
-\change_inserted 0 1291205919
-
-a2488 2
-
-\change_inserted 0 1291205919
-a2492 2
-
-\change_inserted 0 1291205922
-a2493 2
-\change_unchanged
-
-a2533 2
-\change_inserted 0 1291205929
-
-a2536 2
-
-\change_inserted 0 1291205929
-a2540 2
-
-\change_inserted 0 1291205929
-a2541 2
-\change_unchanged
-
-a2578 2
-\change_inserted 0 1291205932
-
-a2581 2
-
-\change_inserted 0 1291205933
-a2585 2
-
-\change_inserted 0 1291205933
-a2586 2
-\change_unchanged
-
-a2724 2
-\change_inserted 0 1291205944
-
-a2727 2
-
-\change_inserted 0 1291205945
-a2731 2
-
-\change_inserted 0 1291205948
-a2732 2
-\change_unchanged
-
-@
-
-
-1.11
-log
-@Merge changes
-@
-text
-@d53 7
-a59 1
-14-September-2010
-d587 16
-d644 18
-d716 16
-d753 16
-d813 18
-d883 16
-d953 16
-d1084 16
-d1181 16
-d1273 16
-d1328 16
-d1381 16
-d1447 19
-a1465 2
- if older code (which doesn't understand the feature) writes to the database.Reco
-rd Headers Are Not Expandible
-d1484 16
-d1546 16
-d1617 16
-d1680 16
-d1725 16
-d1810 16
-d1951 8
-a1958 3
-Proposed SolutionThe first step is to remove all the current heuristics,
- as they obviously interact, then examine them once the lock contention
- is addressed.
-d1989 7
-a1995 2
- is to divide the file into zones, and using a free list (or set of free
- lists) for each.
-d1997 2
-d2002 25
-d2039 2
-d2049 7
-a2055 1
-Identify the correct zone.
-d2063 7
-a2069 2
-Re-check the zone (we didn't have a lock, sizes could have changed): relock
- if necessary.
-d2073 5
-a2077 1
-Place the freed entry in the list for that zone.
-d2086 3
-a2088 1
-Pick a zone either the zone we last freed into, or based on a 
-d2097 4
-d2105 2
-d2110 2
-d2113 2
-d2123 15
-a2137 1
- unlock the list and try the next zone.
-d2166 11
-d2180 2
-d2185 2
-d2190 2
-d2223 1
-a2223 1
-status open
-d2243 2
-d2491 5
-a2495 1
-        uint32_t magic : 16,
-d2499 2
-d2502 2
-d2511 7
-a2517 1
-                 top_hash: 10;
-d2541 29
-a2569 1
-        uint32_t free_magic;
-d2573 11
-a2583 1
-        uint64_t total_length;
-d2587 7
-a2593 1
-        uint64_t prev, next;
-d2597 2
-d2603 5
-a2607 1
-        uint64_t tailer;
-d2615 2
-d2628 18
-d2736 16
-d2808 16
-d2856 16
-d2912 16
-d2965 16
-d3119 16
-@
-
-
-1.10
-log
-@Tracing attribute, talloc support.
-@
-text
-@d1 1
-a1 1
-#LyX 1.6.5 created this file. For more info see http://www.lyx.org/
-d53 1
-a53 7
-
-\change_deleted 0 1283307542
-26-July
-\change_inserted 0 1284423485
-14-September
-\change_unchanged
--2010
-a472 2
-\change_inserted 0 1284422789
-
-a479 2
-\change_unchanged
-
-a838 2
-
-\change_inserted 0 1284016998
-a846 2
-\change_unchanged
-
-a1194 2
-\change_inserted 0 1284015637
-
-a1197 2
-
-\change_inserted 0 1284015716
-a1201 2
-
-\change_inserted 0 1284015906
-a1210 2
-
-\change_inserted 0 1284015637
-a1214 2
-
-\change_inserted 0 1284016114
-a1227 2
-
-\change_inserted 0 1284016149
-a1232 2
-
-\change_inserted 0 1284016639
-a1237 2
-
-\change_inserted 0 1284016821
-a1243 2
-
-\change_inserted 0 1284016803
-d1245 2
-a1246 9
- if older code (which doesn't understand the feature) writes to the database.
-\change_deleted 0 1284016101
-
-\end_layout
-
-\begin_layout Subsection
-
-\change_inserted 0 1284015634
-Record Headers Are Not Expandible
-a1249 2
-
-\change_inserted 0 1284015634
-a1254 2
-
-\change_inserted 0 1284015634
-a1258 2
-
-\change_inserted 0 1284422552
-a1267 2
-
-\change_inserted 0 1284422568
-a1271 2
-
-\change_inserted 0 1284422646
-a1276 2
-
-\change_inserted 0 1284422656
-a1280 2
-
-\change_inserted 0 1284423065
-a1305 2
-
-\change_inserted 0 1284423042
-a1310 2
-\change_unchanged
-
-a1457 2
-
-\change_inserted 0 1283336713
-a1463 2
-
-\change_unchanged
-d1482 2
-d1485 1
-a1485 51
-\change_deleted 0 1283307675
-There are three details which become important:
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1283307675
-On encountering a full bucket, we use the next bucket.
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1283307675
-Extra hash bits are stored with the offset, to reduce comparisons.
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1283307675
-A marker entry is used on deleting an entry.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1283307675
-The doubling of the table must be done under a transaction; we will not
- reduce it on deletion, so it will be an unusual case.
- It will either be placed at the head (other entries will be moved out the
- way so we can expand).
- We could have a pointer in the header to the current hashtable location,
- but that pointer would have to be read frequently to check for hashtable
- moves.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1283307675
-The locking for this is slightly more complex than the chained case; we
- currently have one lock per bucket, and that means we would need to expand
- the lock if we overflow to the next bucket.
- The frequency of such collisions will effect our locking heuristics: we
- can always lock more buckets than we need.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1283307675
-One possible optimization is to only re-check the hash size on an insert
- or a lookup miss.
-
-\change_inserted 0 1283307770
-a1492 2
-
-\change_inserted 0 1283336187
-a1500 2
-
-\change_inserted 0 1283336586
-a1510 2
-\change_unchanged
-
-d1636 3
-a1638 8
-Proposed Solution
-\change_deleted 0 1283336858
-
-\end_layout
-
-\begin_layout Standard
-The first step is to remove all the current heuristics, as they obviously
- interact, then examine them once the lock contention is addressed.
-a1647 2
-\change_inserted 0 1283336910
-
-a1650 2
-
-\change_inserted 0 1283337052
-a1655 2
-\change_unchanged
-
-a1776 2
-\change_inserted 0 1283309850
-
-a1779 2
-
-\change_inserted 0 1283337216
-a1813 2
-
-\change_inserted 0 1284424151
-a1825 2
-\change_unchanged
-
-a1830 2
-\change_unchanged
-
-a2031 2
-
-\change_inserted 0 1283336739
-a2040 2
-\change_unchanged
-
-a2117 2
-\change_inserted 0 1283337133
-
-a2120 2
-
-\change_inserted 0 1283337139
-a2121 2
-\change_unchanged
-
-a2136 2
-
-\change_inserted 0 1283337235
-a2147 2
-\change_unchanged
-
-d2251 1
-a2251 7
-Proposed Solution
-\change_deleted 0 1284423472
-
-\end_layout
-
-\begin_layout Standard
-None.
-d2261 1
-a2261 1
-\change_inserted 0 1284423891
-d2263 1
-a2263 4
-\change_deleted 0 1284423891
-.
-
-\change_inserted 0 1284423901
-a2271 2
-\change_unchanged
-
-a2293 2
-\change_inserted 0 1284423495
-
-a2312 2
-
-\change_inserted 0 1284424201
-d2321 1
-a2321 3
-\change_unchanged
-We could solve a small part of the problem by providing read-only transactions.
-a2505 2
-\change_inserted 0 1284423555
-
-a2508 2
-
-\change_inserted 0 1284423617
-a2512 2
-
-\change_inserted 0 1284423719
-a2519 2
-
-\change_inserted 0 1284423864
-a2530 2
-
-\change_inserted 0 1284423850
-a2540 2
-\change_unchanged
-
-@
-
-
-1.9
-log
-@Extension mechanism.
-@
-text
-@d56 2
-a57 2
-\change_inserted 0 1284016854
-9-September
-d479 11
-d1303 1
-a1303 1
-\change_inserted 0 1284016847
-d1310 56
-d1945 1
-a1945 1
-\change_inserted 0 1283310945
-d1956 2
-d2402 2
-d2416 4
-d2421 12
-d2455 2
-d2476 12
-d2673 47
-@
-
-
-1.8
-log
-@Remove bogus footnote
-@
-text
-@d56 2
-a57 2
-\change_inserted 0 1283307544
-1-September
-d838 12
-d1198 103
-@
-
-
-1.7
-log
-@Moving hash table does not work.
-@
-text
-@a1436 12
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-
-\change_inserted 0 1283336450
-If we make the hash offsets zone-relative, then this only restricts the
- zone size, not the overall database size.
-\end_layout
-
-\end_inset
-
-@
-
-
-1.6
-log
-@Commit changes
-@
-text
-@d38 1
-a38 1
-\author "" 
-d53 7
-a59 1
-26-July-2010
-d1333 10
-d1361 3
-a1363 1
- There are three details which become important:
-d1367 2
-d1373 2
-d1379 2
-d1385 2
-d1397 2
-d1407 2
-d1411 45
-d1582 2
-d1598 14
-d1733 62
-d1996 13
-d2086 10
-d2110 15
-a2124 1
-\begin_layout LyX-Code
-@
-
-
-1.5
-log
-@Soft transaction commit
-@
-text
-@d38 1
-a38 1
-\author "Rusty Russell,,," 
-a52 4
-
-\change_deleted 0 1280141199
-10-May-2010
-\change_inserted 0 1280141202
-a53 2
-\change_unchanged
-
-a2028 2
-
-\change_inserted 0 1280140902
-a2034 2
-
-\change_unchanged
-a2212 2
-\change_inserted 0 1280140661
-
-a2215 2
-
-\change_inserted 0 1280140703
-a2219 2
-
-\change_inserted 0 1280708312
-a2226 2
-
-\change_inserted 0 1280708400
-a2239 2
-
-\change_inserted 0 1280140836
-a2243 2
-
-\change_inserted 0 1280708255
-a2247 2
-
-\change_inserted 0 1280708374
-a2252 2
-
-\change_inserted 0 1280141181
-a2274 2
-
-\change_inserted 0 1280141345
-@
-
-
-1.4
-log
-@Merge changes
-@
-text
-@d38 1
-a38 1
-\author "" 
-d53 2
-d56 4
-d2035 10
-d2223 84
-@
-
-
-1.3
-log
-@Transaction and freelist rethink.
-@
-text
-@d38 1
-a38 1
-\author "Rusty Russell,,," 
-d53 1
-a53 1
-27-April-2010
-d662 1
-a662 5
- behavior of disallowing 
-\change_inserted 0 1272940179
-nested 
-\change_unchanged
-transactions should become the default.
-a1210 2
-\change_inserted 0 1272944650
-
-a1214 2
-
-\change_inserted 0 1272944763
-a1218 2
-\change_unchanged
-
-a1223 2
-\change_unchanged
-
-a1301 2
-
-\change_inserted 0 1273478114
-a1310 2
-\change_unchanged
-
-d1515 1
-a1515 11
-The free list 
-\change_deleted 0 1273469807
-should
-\change_inserted 0 1273469810
-must
-\change_unchanged
- be split 
-\change_deleted 0 1273469815
-into multiple lists 
-\change_unchanged
-to reduce contention.
-a1520 2
-\change_inserted 0 1273470006
-
-a1523 2
-
-\change_inserted 0 1273492055
-a1539 2
-
-\change_inserted 0 1273483888
-a1551 2
-\change_unchanged
-
-a1554 8
-
-\change_deleted 0 1272942055
-There are various ways to organize these lisys, but because we want to be
- able to quickly identify which free list an entry is in, and reduce the
- number of locks required for merging, we will use zoning (eg.
- each free list covers some fixed fraction of the file).
-\change_inserted 0 1273484187
-d1556 1
-a1556 7
-\change_deleted 0 1273484194
-The algorithm for f
-\change_inserted 0 1273484194
-F
-\change_unchanged
-reeing is simple:
-d1560 1
-a1560 7
-Identify the correct 
-\change_deleted 0 1273482856
-free list
-\change_inserted 0 1273482857
-zone
-\change_unchanged
-.
-d1564 1
-a1564 7
-Lock the 
-\change_inserted 0 1273482895
-corresponding 
-\change_unchanged
-list
-\change_inserted 0 1273482863
-.
-a1567 2
-
-\change_inserted 0 1273482909
-d1573 1
-a1573 13
-
-\change_deleted 0 1273482885
-, and p
-\change_inserted 0 1273482888
-P
-\change_unchanged
-lace the freed entry 
-\change_deleted 0 1273492415
-at the head
-\change_inserted 0 1273492415
-in the list for that zone
-\change_unchanged
-.
-d1577 2
-a1578 7
-Allocation is a little more complicated, as we 
-\change_deleted 0 1273483240
-merge entries as we walk the list:
-\change_inserted 0 1273484250
-perform delayed coalescing at this point:
-\change_unchanged
-
-d1582 1
-a1582 19
-Pick a 
-\change_deleted 0 1273482955
-free list;
-\change_inserted 0 1273482957
-zone
-\change_unchanged
- either the 
-\change_deleted 0 1273482962
-list
-\change_inserted 0 1273482962
-zone
-\change_unchanged
- we last freed 
-\change_deleted 0 1273482966
-o
-\change_inserted 0 1273482966
-i
-\change_unchanged
-nto, or based on a 
-d1594 1
-a1594 9
-Lock th
-\change_inserted 0 1273482980
-e corresponding
-\change_deleted 0 1273482973
-at
-\change_unchanged
- list.
-\change_inserted 0 1273482982
-
-a1597 2
-
-\change_inserted 0 1273483084
-a1598 53
-\change_unchanged
-
-\end_layout
-
-\begin_layout Enumerate
-If the top entry is 
-\change_deleted 0 1273492155
-well-sized, 
-\change_inserted 0 1273492159
--large enough, 
-\change_unchanged
-remove it from the list and return it.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, 
-\change_inserted 0 1273492206
-coalesce entries in the list.
-\change_deleted 0 1273492200
-examine the entry to the right of it in the file.
- If it is free:
-\end_layout
-
-\begin_deeper
-\begin_layout Enumerate
-
-\change_deleted 0 1273492200
-If that entry is in a different list, lock that list too.
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1273492200
-If we had to place a new lock, re-check that the entry is free.
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1273492200
-Remove that entry from its free list and expand this entry to cover it.
-\end_layout
-
-\begin_layout Enumerate
-
-\change_deleted 0 1273485554
-Goto step 3.
-\end_layout
-
-\end_deeper
-\begin_layout Enumerate
-
-\change_inserted 0 1273485311
-If there was no entry large enough, unlock the list and try the next zone.
-d1602 1
-a1602 5
-
-\change_deleted 0 1273483646
-Repeat step 3 with each entry in the list.
-\change_unchanged
-
-d1606 2
-a1607 5
-
-\change_deleted 0 1273483668
-Unlock the list and repeat step 2 with the next list.
-\change_unchanged
-
-d1611 1
-a1611 7
-If no 
-\change_deleted 0 1273483671
-list
-\change_inserted 0 1273483671
-zone
-\change_unchanged
- satisfies, expand the file.
-d1615 2
-a1616 9
-This optimizes rapid insert/delete of free list entries
-\change_inserted 0 1273485794
- by not coalescing them all the time.
-\change_deleted 0 1273483685
-, and allows us to get rid of the tailer altogether
-\change_unchanged
-.
-
-\change_inserted 0 1273492299
-a1638 39
-
-\change_deleted 0 1273476840
-The question of 
-\begin_inset Quotes eld
-\end_inset
-
-well-sized
-\begin_inset Quotes erd
-\end_inset
-
- free entries is more difficult: the 25% overhead works in practice for
- ldb because indexes tend to expand by one record at a time.
- This can be resolved by having an 
-\begin_inset Quotes eld
-\end_inset
-
-expanded
-\begin_inset Quotes erd
-\end_inset
-
- bit in the header to note entries that have previously expanded, and allocating
- more space for them.
- Whether the 
-\begin_inset Quotes eld
-\end_inset
-
-increasing slack
-\begin_inset Quotes erd
-\end_inset
-
- algorithm should be implemented or first-fit used is still unknown: we
- will determine this once these other ideas are implemented.
-\change_inserted 0 1273483750
-
-\end_layout
-
-\begin_layout Standard
-
-\change_inserted 0 1273492450
-a1644 2
-
-\change_inserted 0 1273470441
-a1654 2
-
-\change_inserted 0 1273476556
-a1659 2
-
-\change_inserted 0 1273470423
-a1661 2
-\change_unchanged
-
-a1672 2
-
-\change_inserted 0 1273476847
-a1676 2
-
-\change_inserted 0 1273476886
-a1691 2
-
-\change_inserted 0 1273477233
-a1699 2
-
-\change_inserted 0 1273477534
-a1706 2
-
-\change_inserted 0 1273482700
-a1712 2
-
-\change_inserted 0 1273478079
-a1722 2
-
-\change_inserted 0 1273477839
-a1726 2
-
-\change_inserted 0 1273477925
-a1730 2
-
-\change_inserted 0 1273477925
-a1734 2
-
-\change_inserted 0 1273477925
-a1738 2
-
-\change_inserted 0 1273477925
-a1742 2
-
-\change_inserted 0 1273477925
-a1746 2
-
-\change_inserted 0 1273477925
-a1750 2
-
-\change_inserted 0 1273477925
-a1754 2
-
-\change_inserted 0 1273477925
-a1758 2
-
-\change_inserted 0 1273477925
-a1762 2
-
-\change_inserted 0 1273477925
-a1766 2
-
-\change_inserted 0 1273477925
-a1770 2
-
-\change_inserted 0 1273477925
-a1774 2
-
-\change_inserted 0 1273477925
-a1778 2
-
-\change_inserted 0 1273477925
-a1782 2
-
-\change_inserted 0 1273477925
-a1786 2
-
-\change_inserted 0 1273477925
-a1790 2
-
-\change_inserted 0 1273477925
-a1794 2
-
-\change_inserted 0 1273477925
-a1798 2
-
-\change_inserted 0 1273492522
-a1802 2
-
-\change_inserted 0 1273492530
-a1806 2
-
-\change_inserted 0 1273492546
-a1810 2
-
-\change_inserted 0 1273478239
-a1814 2
-
-\change_inserted 0 1273479960
-a1821 2
-
-\change_inserted 0 1273480265
-a1830 2
-
-\change_inserted 0 1273480354
-a1845 2
-
-\change_inserted 0 1273478968
-a1851 2
-
-\change_inserted 0 1273492604
-a1859 2
-
-\change_inserted 0 1273479572
-a1862 2
-\change_unchanged
-
-a1870 2
-
-\change_inserted 0 1273480282
-a1874 2
-
-\change_inserted 0 1273478931
-a1878 2
-
-\change_inserted 0 1273481549
-a1882 2
-
-\change_inserted 0 1273481557
-a1886 2
-
-\change_inserted 0 1273480307
-a1890 2
-
-\change_inserted 0 1273480335
-a1894 2
-
-\change_inserted 0 1273479897
-a1898 2
-
-\change_inserted 0 1273479653
-a1902 2
-
-\change_inserted 0 1273480371
-a1906 2
-
-\change_inserted 0 1273480464
-a1910 2
-
-\change_inserted 0 1273480399
-a1914 2
-
-\change_inserted 0 1273480425
-a1918 2
-
-\change_inserted 0 1273480453
-a1922 2
-
-\change_inserted 0 1273480455
-a1926 2
-
-\change_inserted 0 1273480450
-a1930 2
-
-\change_inserted 0 1273480452
-a1935 2
-\change_inserted 0 1273478830
-
-a1942 5
-
-\change_deleted 0 1273481604
-In theory, we could get away with 2: one after we write the new data, and
- one to somehow atomically change over to it.
-\change_inserted 0 1273481632
-a1946 2
-
-\change_inserted 0 1273481724
-a1950 2
-
-\change_inserted 0 1273481713
-a1954 2
-
-\change_inserted 0 1273481717
-a1958 2
-
-\change_inserted 0 1273481730
-a1962 2
-
-\change_inserted 0 1273481736
-a1966 2
-
-\change_inserted 0 1273481744
-a1970 2
-
-\change_inserted 0 1273481748
-a1974 2
-
-\change_inserted 0 1273482185
-a1978 2
-
-\change_inserted 0 1273482259
-a1989 50
-
-\change_deleted 0 1273481848
-None.
- Trying to rewrite the transaction code is a separate experiment, which
- I encourage someone else to do.
- At some point you say 
-\begin_inset Quotes eld
-\end_inset
-
-use a real database
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1273481848
-But as a thought experiment:
-\change_unchanged
-
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1273481788
-Say there was a pointer in the header which said where the hash table and
- free list tables were, and that no blocks were labeled with whether they
- were free or not (it had to be derived from what list they were in).
- We could create new hash table and free list in some free space, and populate
- it as we want the post-committed state to look.
- Then we sync, then we switch the offset in the header, then we sync again.
-\end_layout
-
-\begin_layout Standard
-
-\change_deleted 0 1273481788
-This would not allow arbitrary changes to the database, such as tdb_repack
- does, and would require more space (since we have to preserve the current
- and future entries at once).
- If we used hash trees rather than one big hash table, we might only have
- to rewrite some sections of the hash, too.
-\change_inserted 0 1273481854
-
-\end_layout
-
-\begin_layout Standard
-
-\change_inserted 0 1273482102
-a1993 2
-
-\change_inserted 0 1273482061
-a1998 2
-
-\change_inserted 0 1273482063
-a2002 2
-
-\change_inserted 0 1273482072
-a2006 2
-
-\change_inserted 0 1273482139
-a2011 2
-
-\change_inserted 0 1273482364
-a2015 2
-
-\change_inserted 0 1273482163
-a2019 2
-
-\change_inserted 0 1273482493
-a2037 2
-
-\change_inserted 0 1273482536
-a2046 2
-\change_unchanged
-
-a2049 2
-
-\change_inserted 0 1273482641
-a2058 2
-
-\change_inserted 0 1273481827
-d2067 2
-a2068 11
-We could 
-\change_inserted 0 1273481829
-then 
-\change_unchanged
-implement snapshots using a similar method
-\change_deleted 0 1273481838
- to the above, only
-\change_inserted 0 1273481840
-,
-\change_unchanged
- using multiple different hash tables/free tables.
-@
-
-
-1.2
-log
-@After first feedback (Ronnie & Volker)
-@
-text
-@d1314 13
-d1531 11
-a1541 1
-The free list should be split into multiple lists to reduce contention.
-d1547 39
-d1596 7
-d1604 1
-a1604 1
-The algorithm for freeing is simple:
-d1608 7
-a1614 1
-Identify the correct free list.
-d1618 30
-a1647 1
-Lock the list, and place the freed entry at the head.
-d1651 7
-a1657 2
-Allocation is a little more complicated, as we merge entries as we walk
- the list:
-d1661 19
-a1679 1
-Pick a free list; either the list we last freed onto, or based on a 
-d1691 17
-a1707 1
-Lock that list.
-d1711 7
-a1717 1
-If the top entry is well-sized, remove it from the list and return it.
-d1721 5
-a1725 1
-Otherwise, examine the entry to the right of it in the file.
-d1731 2
-d1737 2
-d1743 2
-d1749 2
-d1756 8
-d1765 2
-d1770 2
-d1773 2
-d1778 7
-a1784 1
-If no list satisfies, expand the file.
-d1788 28
-a1815 2
-This optimizes rapid insert/delete of free list entries, and allows us to
- get rid of the tailer altogether.
-d1819 2
-d1851 1
-a1851 1
-\change_inserted 0 1272941474
-d1857 303
-a2159 18
-\change_inserted 0 1272942759
-There are various ways to organize these lists, but because we want to be
- able to quickly identify which free list an entry is in, and reduce the
- number of locks required for merging, we will use zoning (eg.
- each of the N free lists in a tdb file of size M covers a fixed fraction
- M/N).
- Note that this means we need to reshuffle the free lists when we expand
- the file; this is probably acceptable when we double the hash table size,
- since that is such an expensive operation already.
- In the case of increasing the file size, there is an optimization we can
- use: if we use M in the formula above as the file size rounded up to the
- next power of 2, we only need reshuffle free lists when the file size crosses
- a power of 2 boundary, 
-\emph on
-and 
-\emph default
-reshuffling the free lists is trivial: we simply merge every consecutive
- pair of free lists.
-d2164 107
-d2276 2
-d2280 59
-d2346 2
-d2363 2
-d2366 2
-d2371 2
-d2382 2
-d2389 57
-d2458 13
-d2474 32
-a2505 2
-We could implement snapshots using a similar method to the above, only using
- multiple different hash tables/free tables.
-@
-
-
-1.1
-log
-@Initial revision
-@
-text
-@d1 1
-a1 1
-#LyX 1.6.4 created this file. For more info see http://www.lyx.org/
-d36 3
-a38 3
-\tracking_changes false
-\output_changes false
-\author "" 
-d662 5
-a666 1
- behavior of disallowing transactions should become the default.
-d1215 21
-d1527 2
-d1533 3
-a1535 1
- The algorithm for freeing is simple:
-d1642 26
-@
diff --git a/ccan/tdb2/doc/design.pdf b/ccan/tdb2/doc/design.pdf
deleted file mode 100644 (file)
index 558dc1f..0000000
Binary files a/ccan/tdb2/doc/design.pdf and /dev/null differ
diff --git a/ccan/tdb2/doc/design.txt b/ccan/tdb2/doc/design.txt
deleted file mode 100644 (file)
index c2994a4..0000000
+++ /dev/null
@@ -1,1259 +0,0 @@
-TDB2: A Redesigning The Trivial DataBase
-
-Rusty Russell, IBM Corporation
-
-1-December-2010
-
-Abstract
-
-The Trivial DataBase on-disk format is 32 bits; with usage cases 
-heading towards the 4G limit, that must change. This required 
-breakage provides an opportunity to revisit TDB's other design 
-decisions and reassess them.
-
-1 Introduction
-
-The Trivial DataBase was originally written by Andrew Tridgell as 
-a simple key/data pair storage system with the same API as dbm, 
-but allowing multiple readers and writers while being small 
-enough (< 1000 lines of C) to include in SAMBA. The simple design 
-created in 1999 has proven surprisingly robust and performant, 
-used in Samba versions 3 and 4 as well as numerous other 
-projects. Its useful life was greatly increased by the 
-(backwards-compatible!) addition of transaction support in 2005.
-
-The wider variety and greater demands of TDB-using code has lead 
-to some organic growth of the API, as well as some compromises on 
-the implementation. None of these, by themselves, are seen as 
-show-stoppers, but the cumulative effect is to a loss of elegance 
-over the initial, simple TDB implementation. Here is a table of 
-the approximate number of lines of implementation code and number 
-of API functions at the end of each year:
-
-
-+-----------+----------------+--------------------------------+
-| Year End  | API Functions  | Lines of C Code Implementation |
-+-----------+----------------+--------------------------------+
-+-----------+----------------+--------------------------------+
-|   1999    |      13        |              1195              |
-+-----------+----------------+--------------------------------+
-|   2000    |      24        |              1725              |
-+-----------+----------------+--------------------------------+
-|   2001    |      32        |              2228              |
-+-----------+----------------+--------------------------------+
-|   2002    |      35        |              2481              |
-+-----------+----------------+--------------------------------+
-|   2003    |      35        |              2552              |
-+-----------+----------------+--------------------------------+
-|   2004    |      40        |              2584              |
-+-----------+----------------+--------------------------------+
-|   2005    |      38        |              2647              |
-+-----------+----------------+--------------------------------+
-|   2006    |      52        |              3754              |
-+-----------+----------------+--------------------------------+
-|   2007    |      66        |              4398              |
-+-----------+----------------+--------------------------------+
-|   2008    |      71        |              4768              |
-+-----------+----------------+--------------------------------+
-|   2009    |      73        |              5715              |
-+-----------+----------------+--------------------------------+
-
-
-This review is an attempt to catalog and address all the known 
-issues with TDB and create solutions which address the problems 
-without significantly increasing complexity; all involved are far 
-too aware of the dangers of second system syndrome in rewriting a 
-successful project like this.
-
-2 API Issues
-
-2.1 tdb_open_ex Is Not Expandable
-
-The tdb_open() call was expanded to tdb_open_ex(), which added an 
-optional hashing function and an optional logging function 
-argument. Additional arguments to open would require the 
-introduction of a tdb_open_ex2 call etc.
-
-2.1.1 Proposed Solution<attributes>
-
-tdb_open() will take a linked-list of attributes:
-
-enum tdb_attribute {
-
-    TDB_ATTRIBUTE_LOG = 0,
-
-    TDB_ATTRIBUTE_HASH = 1
-
-};
-
-struct tdb_attribute_base {
-
-    enum tdb_attribute attr;
-
-    union tdb_attribute *next;
-
-};
-
-struct tdb_attribute_log {
-
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG 
-*/
-
-    tdb_log_func log_fn;
-
-    void *log_private;
-
-};
-
-struct tdb_attribute_hash {
-
-    struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH 
-*/
-
-    tdb_hash_func hash_fn;
-
-    void *hash_private;
-
-};
-
-union tdb_attribute {
-
-    struct tdb_attribute_base base;
-
-    struct tdb_attribute_log log;
-
-    struct tdb_attribute_hash hash;
-
-};
-
-This allows future attributes to be added, even if this expands 
-the size of the union.
-
-2.1.2 Status
-
-Complete.
-
-2.2 tdb_traverse Makes Impossible Guarantees
-
-tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, 
-and it was thought that it was important to guarantee that all 
-records which exist at the start and end of the traversal would 
-be included, and no record would be included twice.
-
-This adds complexity (see[Reliable-Traversal-Adds]) and does not 
-work anyway for records which are altered (in particular, those 
-which are expanded may be effectively deleted and re-added behind 
-the traversal).
-
-2.2.1 <traverse-Proposed-Solution>Proposed Solution
-
-Abandon the guarantee. You will see every record if no changes 
-occur during your traversal, otherwise you will see some subset. 
-You can prevent changes by using a transaction or the locking 
-API.
-
-2.2.2 Status
-
-Complete. Delete-during-traverse will still delete every record, 
-too (assuming no other changes).
-
-2.3 Nesting of Transactions Is Fraught
-
-TDB has alternated between allowing nested transactions and not 
-allowing them. Various paths in the Samba codebase assume that 
-transactions will nest, and in a sense they can: the operation is 
-only committed to disk when the outer transaction is committed. 
-There are two problems, however:
-
-1. Canceling the inner transaction will cause the outer 
-  transaction commit to fail, and will not undo any operations 
-  since the inner transaction began. This problem is soluble with 
-  some additional internal code.
-
-2. An inner transaction commit can be cancelled by the outer 
-  transaction. This is desirable in the way which Samba's 
-  database initialization code uses transactions, but could be a 
-  surprise to any users expecting a successful transaction commit 
-  to expose changes to others.
-
-The current solution is to specify the behavior at tdb_open(), 
-with the default currently that nested transactions are allowed. 
-This flag can also be changed at runtime.
-
-2.3.1 Proposed Solution
-
-Given the usage patterns, it seems that the “least-surprise” 
-behavior of disallowing nested transactions should become the 
-default. Additionally, it seems the outer transaction is the only 
-code which knows whether inner transactions should be allowed, so 
-a flag to indicate this could be added to tdb_transaction_start. 
-However, this behavior can be simulated with a wrapper which uses 
-tdb_add_flags() and tdb_remove_flags(), so the API should not be 
-expanded for this relatively-obscure case.
-
-2.3.2 Status
-
-Incomplete; nesting flag is still defined as per tdb1.
-
-2.4 Incorrect Hash Function is Not Detected
-
-tdb_open_ex() allows the calling code to specify a different hash 
-function to use, but does not check that all other processes 
-accessing this tdb are using the same hash function. The result 
-is that records are missing from tdb_fetch().
-
-2.4.1 Proposed Solution
-
-The header should contain an example hash result (eg. the hash of 
-0xdeadbeef), and tdb_open_ex() should check that the given hash 
-function produces the same answer, or fail the tdb_open call.
-
-2.4.2 Status
-
-Complete.
-
-2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation
-
-In response to scalability issues with the free list ([TDB-Freelist-Is]
-) two API workarounds have been incorporated in TDB: 
-tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The 
-latter actually calls the former with an argument of “5”.
-
-This code allows deleted records to accumulate without putting 
-them in the free list. On delete we iterate through each chain 
-and free them in a batch if there are more than max_dead entries. 
-These are never otherwise recycled except as a side-effect of a 
-tdb_repack.
-
-2.5.1 Proposed Solution
-
-With the scalability problems of the freelist solved, this API 
-can be removed. The TDB_VOLATILE flag may still be useful as a 
-hint that store and delete of records will be at least as common 
-as fetch in order to allow some internal tuning, but initially 
-will become a no-op.
-
-2.5.2 Status
-
-Incomplete. TDB_VOLATILE still defined, but implementation should 
-fail on unknown flags to be future-proof.
-
-2.6 <TDB-Files-Cannot>TDB Files Cannot Be Opened Multiple Times 
-  In The Same Process
-
-No process can open the same TDB twice; we check and disallow it. 
-This is an unfortunate side-effect of fcntl locks, which operate 
-on a per-file rather than per-file-descriptor basis, and do not 
-nest. Thus, closing any file descriptor on a file clears all the 
-locks obtained by this process, even if they were placed using a 
-different file descriptor!
-
-Note that even if this were solved, deadlock could occur if 
-operations were nested: this is a more manageable programming 
-error in most cases.
-
-2.6.1 Proposed Solution
-
-We could lobby POSIX to fix the perverse rules, or at least lobby 
-Linux to violate them so that the most common implementation does 
-not have this restriction. This would be a generally good idea 
-for other fcntl lock users.
-
-Samba uses a wrapper which hands out the same tdb_context to 
-multiple callers if this happens, and does simple reference 
-counting. We should do this inside the tdb library, which already 
-emulates lock nesting internally; it would need to recognize when 
-deadlock occurs within a single process. This would create a new 
-failure mode for tdb operations (while we currently handle 
-locking failures, they are impossible in normal use and a process 
-encountering them can do little but give up).
-
-I do not see benefit in an additional tdb_open flag to indicate 
-whether re-opening is allowed, as though there may be some 
-benefit to adding a call to detect when a tdb_context is shared, 
-to allow other to create such an API.
-
-2.6.2 Status
-
-Incomplete.
-
-2.7 TDB API Is Not POSIX Thread-safe
-
-The TDB API uses an error code which can be queried after an 
-operation to determine what went wrong. This programming model 
-does not work with threads, unless specific additional guarantees 
-are given by the implementation. In addition, even 
-otherwise-independent threads cannot open the same TDB (as in [TDB-Files-Cannot]
-).
-
-2.7.1 Proposed Solution
-
-Reachitecting the API to include a tdb_errcode pointer would be a 
-great deal of churn; we are better to guarantee that the 
-tdb_errcode is per-thread so the current programming model can be 
-maintained.
-
-This requires dynamic per-thread allocations, which is awkward 
-with POSIX threads (pthread_key_create space is limited and we 
-cannot simply allocate a key for every TDB).
-
-Internal locking is required to make sure that fcntl locks do not 
-overlap between threads, and also that the global list of tdbs is 
-maintained.
-
-The aim is that building tdb with -DTDB_PTHREAD will result in a 
-pthread-safe version of the library, and otherwise no overhead 
-will exist. Alternatively, a hooking mechanism similar to that 
-proposed for [Proposed-Solution-locking-hook] could be used to 
-enable pthread locking at runtime.
-
-2.7.2 Status
-
-Incomplete.
-
-2.8 *_nonblock Functions And *_mark Functions Expose 
-  Implementation
-
-CTDB[footnote:
-Clustered TDB, see http://ctdb.samba.org
-] wishes to operate on TDB in a non-blocking manner. This is 
-currently done as follows:
-
-1. Call the _nonblock variant of an API function (eg. 
-  tdb_lockall_nonblock). If this fails:
-
-2. Fork a child process, and wait for it to call the normal 
-  variant (eg. tdb_lockall).
-
-3. If the child succeeds, call the _mark variant to indicate we 
-  already have the locks (eg. tdb_lockall_mark).
-
-4. Upon completion, tell the child to release the locks (eg. 
-  tdb_unlockall).
-
-5. Indicate to tdb that it should consider the locks removed (eg. 
-  tdb_unlockall_mark).
-
-There are several issues with this approach. Firstly, adding two 
-new variants of each function clutters the API for an obscure 
-use, and so not all functions have three variants. Secondly, it 
-assumes that all paths of the functions ask for the same locks, 
-otherwise the parent process will have to get a lock which the 
-child doesn't have under some circumstances. I don't believe this 
-is currently the case, but it constrains the implementation. 
-
-2.8.1 <Proposed-Solution-locking-hook>Proposed Solution
-
-Implement a hook for locking methods, so that the caller can 
-control the calls to create and remove fcntl locks. In this 
-scenario, ctdbd would operate as follows:
-
-1. Call the normal API function, eg tdb_lockall().
-
-2. When the lock callback comes in, check if the child has the 
-  lock. Initially, this is always false. If so, return 0. 
-  Otherwise, try to obtain it in non-blocking mode. If that 
-  fails, return EWOULDBLOCK.
-
-3. Release locks in the unlock callback as normal.
-
-4. If tdb_lockall() fails, see if we recorded a lock failure; if 
-  so, call the child to repeat the operation.
-
-5. The child records what locks it obtains, and returns that 
-  information to the parent.
-
-6. When the child has succeeded, goto 1.
-
-This is flexible enough to handle any potential locking scenario, 
-even when lock requirements change. It can be optimized so that 
-the parent does not release locks, just tells the child which 
-locks it doesn't need to obtain.
-
-It also keeps the complexity out of the API, and in ctdbd where 
-it is needed.
-
-2.8.2 Status
-
-Incomplete.
-
-2.9 tdb_chainlock Functions Expose Implementation
-
-tdb_chainlock locks some number of records, including the record 
-indicated by the given key. This gave atomicity guarantees; 
-no-one can start a transaction, alter, read or delete that key 
-while the lock is held.
-
-It also makes the same guarantee for any other key in the chain, 
-which is an internal implementation detail and potentially a 
-cause for deadlock.
-
-2.9.1 Proposed Solution
-
-None. It would be nice to have an explicit single entry lock 
-which effected no other keys. Unfortunately, this won't work for 
-an entry which doesn't exist. Thus while chainlock may be 
-implemented more efficiently for the existing case, it will still 
-have overlap issues with the non-existing case. So it is best to 
-keep the current (lack of) guarantee about which records will be 
-effected to avoid constraining our implementation.
-
-2.10 Signal Handling is Not Race-Free
-
-The tdb_setalarm_sigptr() call allows the caller's signal handler 
-to indicate that the tdb locking code should return with a 
-failure, rather than trying again when a signal is received (and 
-errno == EAGAIN). This is usually used to implement timeouts.
-
-Unfortunately, this does not work in the case where the signal is 
-received before the tdb code enters the fcntl() call to place the 
-lock: the code will sleep within the fcntl() code, unaware that 
-the signal wants it to exit. In the case of long timeouts, this 
-does not happen in practice.
-
-2.10.1 Proposed Solution
-
-The locking hooks proposed in[Proposed-Solution-locking-hook] 
-would allow the user to decide on whether to fail the lock 
-acquisition on a signal. This allows the caller to choose their 
-own compromise: they could narrow the race by checking 
-immediately before the fcntl call.[footnote:
-It may be possible to make this race-free in some implementations 
-by having the signal handler alter the struct flock to make it 
-invalid. This will cause the fcntl() lock call to fail with 
-EINVAL if the signal occurs before the kernel is entered, 
-otherwise EAGAIN.
-]
-
-2.10.2 Status
-
-Incomplete.
-
-2.11 The API Uses Gratuitous Typedefs, Capitals
-
-typedefs are useful for providing source compatibility when types 
-can differ across implementations, or arguably in the case of 
-function pointer definitions which are hard for humans to parse. 
-Otherwise it is simply obfuscation and pollutes the namespace.
-
-Capitalization is usually reserved for compile-time constants and 
-macros.
-
-  TDB_CONTEXT There is no reason to use this over 'struct 
-  tdb_context'; the definition isn't visible to the API user 
-  anyway.
-
-  TDB_DATA There is no reason to use this over struct TDB_DATA; 
-  the struct needs to be understood by the API user.
-
-  struct TDB_DATA This would normally be called 'struct 
-  tdb_data'.
-
-  enum TDB_ERROR Similarly, this would normally be enum 
-  tdb_error.
-
-2.11.1 Proposed Solution
-
-None. Introducing lower case variants would please pedants like 
-myself, but if it were done the existing ones should be kept. 
-There is little point forcing a purely cosmetic change upon tdb 
-users.
-
-2.12 <tdb_log_func-Doesnt-Take>tdb_log_func Doesn't Take The 
-  Private Pointer
-
-For API compatibility reasons, the logging function needs to call 
-tdb_get_logging_private() to retrieve the pointer registered by 
-the tdb_open_ex for logging.
-
-2.12.1 Proposed Solution
-
-It should simply take an extra argument, since we are prepared to 
-break the API/ABI.
-
-2.12.2 Status
-
-Complete.
-
-2.13 Various Callback Functions Are Not Typesafe
-
-The callback functions in tdb_set_logging_function (after [tdb_log_func-Doesnt-Take]
- is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read 
-and tdb_check all take void * and must internally convert it to 
-the argument type they were expecting.
-
-If this type changes, the compiler will not produce warnings on 
-the callers, since it only sees void *.
-
-2.13.1 Proposed Solution
-
-With careful use of macros, we can create callback functions 
-which give a warning when used on gcc and the types of the 
-callback and its private argument differ. Unsupported compilers 
-will not give a warning, which is no worse than now. In addition, 
-the callbacks become clearer, as they need not use void * for 
-their parameter.
-
-See CCAN's typesafe_cb module at 
-http://ccan.ozlabs.org/info/typesafe_cb.html
-
-2.13.2 Status
-
-Incomplete.
-
-2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, 
-  tdb_reopen_all Problematic
-
-The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB 
-file should be cleared if the caller discovers it is the only 
-process with the TDB open. However, if any caller does not 
-specify TDB_CLEAR_IF_FIRST it will not be detected, so will have 
-the TDB erased underneath them (usually resulting in a crash).
-
-There is a similar issue on fork(); if the parent exits (or 
-otherwise closes the tdb) before the child calls tdb_reopen_all() 
-to establish the lock used to indicate the TDB is opened by 
-someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe 
-it alone has opened the TDB and will erase it.
-
-2.14.1 Proposed Solution
-
-Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but 
-see [TDB_CLEAR_IF_FIRST-Imposes-Performance].
-
-2.14.2 Status
-
-Incomplete, TDB_CLEAR_IF_FIRST still defined, but not 
-implemented.
-
-2.15 Extending The Header Is Difficult
-
-We have reserved (zeroed) words in the TDB header, which can be 
-used for future features. If the future features are compulsory, 
-the version number must be updated to prevent old code from 
-accessing the database. But if the future feature is optional, we 
-have no way of telling if older code is accessing the database or 
-not.
-
-2.15.1 Proposed Solution
-
-The header should contain a “format variant” value (64-bit). This 
-is divided into two 32-bit parts:
-
-1. The lower part reflects the format variant understood by code 
-  accessing the database.
-
-2. The upper part reflects the format variant you must understand 
-  to write to the database (otherwise you can only open for 
-  reading).
-
-The latter field can only be written at creation time, the former 
-should be written under the OPEN_LOCK when opening the database 
-for writing, if the variant of the code is lower than the current 
-lowest variant.
-
-This should allow backwards-compatible features to be added, and 
-detection if older code (which doesn't understand the feature) 
-writes to the database.
-
-2.15.2 Status
-
-Incomplete.
-
-2.16 Record Headers Are Not Expandible
-
-If we later want to add (say) checksums on keys and data, it 
-would require another format change, which we'd like to avoid.
-
-2.16.1 Proposed Solution
-
-We often have extra padding at the tail of a record. If we ensure 
-that the first byte (if any) of this padding is zero, we will 
-have a way for future changes to detect code which doesn't 
-understand a new format: the new code would write (say) a 1 at 
-the tail, and thus if there is no tail or the first byte is 0, we 
-would know the extension is not present on that record.
-
-2.16.2 Status
-
-Incomplete.
-
-2.17 TDB Does Not Use Talloc
-
-Many users of TDB (particularly Samba) use the talloc allocator, 
-and thus have to wrap TDB in a talloc context to use it 
-conveniently.
-
-2.17.1 Proposed Solution
-
-The allocation within TDB is not complicated enough to justify 
-the use of talloc, and I am reluctant to force another 
-(excellent) library on TDB users. Nonetheless a compromise is 
-possible. An attribute (see [attributes]) can be added later to 
-tdb_open() to provide an alternate allocation mechanism, 
-specifically for talloc but usable by any other allocator (which 
-would ignore the “context” argument).
-
-This would form a talloc heirarchy as expected, but the caller 
-would still have to attach a destructor to the tdb context 
-returned from tdb_open to close it. All TDB_DATA fields would be 
-children of the tdb_context, and the caller would still have to 
-manage them (using talloc_free() or talloc_steal()).
-
-2.17.2 Status
-
-Deferred.
-
-3 Performance And Scalability Issues
-
-3.1 <TDB_CLEAR_IF_FIRST-Imposes-Performance>TDB_CLEAR_IF_FIRST 
-  Imposes Performance Penalty
-
-When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is 
-placed at offset 4 (aka. the ACTIVE_LOCK). While these locks 
-never conflict in normal tdb usage, they do add substantial 
-overhead for most fcntl lock implementations when the kernel 
-scans to detect if a lock conflict exists. This is often a single 
-linked list, making the time to acquire and release a fcntl lock 
-O(N) where N is the number of processes with the TDB open, not 
-the number actually doing work.
-
-In a Samba server it is common to have huge numbers of clients 
-sitting idle, and thus they have weaned themselves off the 
-TDB_CLEAR_IF_FIRST flag.[footnote:
-There is a flag to tdb_reopen_all() which is used for this 
-optimization: if the parent process will outlive the child, the 
-child does not need the ACTIVE_LOCK. This is a workaround for 
-this very performance issue.
-]
-
-3.1.1 Proposed Solution
-
-Remove the flag. It was a neat idea, but even trivial servers 
-tend to know when they are initializing for the first time and 
-can simply unlink the old tdb at that point.
-
-3.1.2 Status
-
-Incomplete; TDB_CLEAR_IF_FIRST still defined, but does nothing.
-
-3.2 TDB Files Have a 4G Limit
-
-This seems to be becoming an issue (so much for “trivial”!), 
-particularly for ldb.
-
-3.2.1 Proposed Solution
-
-A new, incompatible TDB format which uses 64 bit offsets 
-internally rather than 32 bit as now. For simplicity of endian 
-conversion (which TDB does on the fly if required), all values 
-will be 64 bit on disk. In practice, some upper bits may be used 
-for other purposes, but at least 56 bits will be available for 
-file offsets.
-
-tdb_open() will automatically detect the old version, and even 
-create them if TDB_VERSION6 is specified to tdb_open.
-
-32 bit processes will still be able to access TDBs larger than 4G 
-(assuming that their off_t allows them to seek to 64 bits), they 
-will gracefully fall back as they fail to mmap. This can happen 
-already with large TDBs.
-
-Old versions of tdb will fail to open the new TDB files (since 28 
-August 2009, commit 398d0c29290: prior to that any unrecognized 
-file format would be erased and initialized as a fresh tdb!)
-
-3.2.2 Status
-
-Complete.
-
-3.3 TDB Records Have a 4G Limit
-
-This has not been a reported problem, and the API uses size_t 
-which can be 64 bit on 64 bit platforms. However, other limits 
-may have made such an issue moot.
-
-3.3.1 Proposed Solution
-
-Record sizes will be 64 bit, with an error returned on 32 bit 
-platforms which try to access such records (the current 
-implementation would return TDB_ERR_OOM in a similar case). It 
-seems unlikely that 32 bit keys will be a limitation, so the 
-implementation may not support this (see [sub:Records-Incur-A]).
-
-3.3.2 Status
-
-Complete.
-
-3.4 Hash Size Is Determined At TDB Creation Time
-
-TDB contains a number of hash chains in the header; the number is 
-specified at creation time, and defaults to 131. This is such a 
-bottleneck on large databases (as each hash chain gets quite 
-long), that LDB uses 10,000 for this hash. In general it is 
-impossible to know what the 'right' answer is at database 
-creation time.
-
-3.4.1 <sub:Hash-Size-Solution>Proposed Solution
-
-After comprehensive performance testing on various scalable hash 
-variants[footnote:
-http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 
-This was annoying because I was previously convinced that an 
-expanding tree of hashes would be very close to optimal.
-], it became clear that it is hard to beat a straight linear hash 
-table which doubles in size when it reaches saturation. 
-Unfortunately, altering the hash table introduces serious locking 
-complications: the entire hash table needs to be locked to 
-enlarge the hash table, and others might be holding locks. 
-Particularly insidious are insertions done under tdb_chainlock.
-
-Thus an expanding layered hash will be used: an array of hash 
-groups, with each hash group exploding into pointers to lower 
-hash groups once it fills, turning into a hash tree. This has 
-implications for locking: we must lock the entire group in case 
-we need to expand it, yet we don't know how deep the tree is at 
-that point.
-
-Note that bits from the hash table entries should be stolen to 
-hold more hash bits to reduce the penalty of collisions. We can 
-use the otherwise-unused lower 3 bits. If we limit the size of 
-the database to 64 exabytes, we can use the top 8 bits of the 
-hash entry as well. These 11 bits would reduce false positives 
-down to 1 in 2000 which is more than we need: we can use one of 
-the bits to indicate that the extra hash bits are valid. This 
-means we can choose not to re-hash all entries when we expand a 
-hash group; simply use the next bits we need and mark them 
-invalid.
-
-3.4.2 Status
-
-Complete.
-
-3.5 <TDB-Freelist-Is>TDB Freelist Is Highly Contended
-
-TDB uses a single linked list for the free list. Allocation 
-occurs as follows, using heuristics which have evolved over time:
-
-1. Get the free list lock for this whole operation.
-
-2. Multiply length by 1.25, so we always over-allocate by 25%.
-
-3. Set the slack multiplier to 1.
-
-4. Examine the current freelist entry: if it is > length but < 
-  the current best case, remember it as the best case.
-
-5. Multiply the slack multiplier by 1.05.
-
-6. If our best fit so far is less than length * slack multiplier, 
-  return it. The slack will be turned into a new free record if 
-  it's large enough.
-
-7. Otherwise, go onto the next freelist entry.
-
-Deleting a record occurs as follows:
-
-1. Lock the hash chain for this whole operation.
-
-2. Walk the chain to find the record, keeping the prev pointer 
-  offset.
-
-3. If max_dead is non-zero:
-
-  (a) Walk the hash chain again and count the dead records.
-
-  (b) If it's more than max_dead, bulk free all the dead ones 
-    (similar to steps 4 and below, but the lock is only obtained 
-    once).
-
-  (c) Simply mark this record as dead and return. 
-
-4. Get the free list lock for the remainder of this operation.
-
-5. <right-merging>Examine the following block to see if it is 
-  free; if so, enlarge the current block and remove that block 
-  from the free list. This was disabled, as removal from the free 
-  list was O(entries-in-free-list).
-
-6. Examine the preceeding block to see if it is free: for this 
-  reason, each block has a 32-bit tailer which indicates its 
-  length. If it is free, expand it to cover our new block and 
-  return.
-
-7. Otherwise, prepend ourselves to the free list.
-
-Disabling right-merging (step [right-merging]) causes 
-fragmentation; the other heuristics proved insufficient to 
-address this, so the final answer to this was that when we expand 
-the TDB file inside a transaction commit, we repack the entire 
-tdb.
-
-The single list lock limits our allocation rate; due to the other 
-issues this is not currently seen as a bottleneck.
-
-3.5.1 Proposed Solution
-
-The first step is to remove all the current heuristics, as they 
-obviously interact, then examine them once the lock contention is 
-addressed.
-
-The free list must be split to reduce contention. Assuming 
-perfect free merging, we can at most have 1 free list entry for 
-each entry. This implies that the number of free lists is related 
-to the size of the hash table, but as it is rare to walk a large 
-number of free list entries we can use far fewer, say 1/32 of the 
-number of hash buckets.
-
-It seems tempting to try to reuse the hash implementation which 
-we use for records here, but we have two ways of searching for 
-free entries: for allocation we search by size (and possibly 
-zone) which produces too many clashes for our hash table to 
-handle well, and for coalescing we search by address. Thus an 
-array of doubly-linked free lists seems preferable.
-
-There are various benefits in using per-size free lists (see [sub:TDB-Becomes-Fragmented]
-) but it's not clear this would reduce contention in the common 
-case where all processes are allocating/freeing the same size. 
-Thus we almost certainly need to divide in other ways: the most 
-obvious is to divide the file into zones, and using a free list 
-(or table of free lists) for each. This approximates address 
-ordering.
-
-Unfortunately it is difficult to know what heuristics should be 
-used to determine zone sizes, and our transaction code relies on 
-being able to create a “recovery area” by simply appending to the 
-file (difficult if it would need to create a new zone header). 
-Thus we use a linked-list of free tables; currently we only ever 
-create one, but if there is more than one we choose one at random 
-to use. In future we may use heuristics to add new free tables on 
-contention. We only expand the file when all free tables are 
-exhausted.
-
-The basic algorithm is as follows. Freeing is simple:
-
-1. Identify the correct free list.
-
-2. Lock the corresponding list.
-
-3. Re-check the list (we didn't have a lock, sizes could have 
-  changed): relock if necessary.
-
-4. Place the freed entry in the list.
-
-Allocation is a little more complicated, as we perform delayed 
-coalescing at this point:
-
-1. Pick a free table; usually the previous one.
-
-2. Lock the corresponding list.
-
-3. If the top entry is -large enough, remove it from the list and 
-  return it.
-
-4. Otherwise, coalesce entries in the list.If there was no entry 
-  large enough, unlock the list and try the next largest list
-
-5. If no list has an entry which meets our needs, try the next 
-  free table.
-
-6. If no zone satisfies, expand the file.
-
-This optimizes rapid insert/delete of free list entries by not 
-coalescing them all the time.. First-fit address ordering 
-ordering seems to be fairly good for keeping fragmentation low 
-(see [sub:TDB-Becomes-Fragmented]). Note that address ordering 
-does not need a tailer to coalesce, though if we needed one we 
-could have one cheaply: see [sub:Records-Incur-A]. 
-
-Each free entry has the free table number in the header: less 
-than 255. It also contains a doubly-linked list for easy 
-deletion.
-
-3.6 <sub:TDB-Becomes-Fragmented>TDB Becomes Fragmented
-
-Much of this is a result of allocation strategy[footnote:
-The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 
-ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps
-] and deliberate hobbling of coalescing; internal fragmentation 
-(aka overallocation) is deliberately set at 25%, and external 
-fragmentation is only cured by the decision to repack the entire 
-db when a transaction commit needs to enlarge the file.
-
-3.6.1 Proposed Solution
-
-The 25% overhead on allocation works in practice for ldb because 
-indexes tend to expand by one record at a time. This internal 
-fragmentation can be resolved by having an “expanded” bit in the 
-header to note entries that have previously expanded, and 
-allocating more space for them.
-
-There are is a spectrum of possible solutions for external 
-fragmentation: one is to use a fragmentation-avoiding allocation 
-strategy such as best-fit address-order allocator. The other end 
-of the spectrum would be to use a bump allocator (very fast and 
-simple) and simply repack the file when we reach the end.
-
-There are three problems with efficient fragmentation-avoiding 
-allocators: they are non-trivial, they tend to use a single free 
-list for each size, and there's no evidence that tdb allocation 
-patterns will match those recorded for general allocators (though 
-it seems likely).
-
-Thus we don't spend too much effort on external fragmentation; we 
-will be no worse than the current code if we need to repack on 
-occasion. More effort is spent on reducing freelist contention, 
-and reducing overhead.
-
-3.7 <sub:Records-Incur-A>Records Incur A 28-Byte Overhead
-
-Each TDB record has a header as follows:
-
-struct tdb_record {
-
-        tdb_off_t next; /* offset of the next record in the list 
-*/
-
-        tdb_len_t rec_len; /* total byte length of record */
-
-        tdb_len_t key_len; /* byte length of key */
-
-        tdb_len_t data_len; /* byte length of data */
-
-        uint32_t full_hash; /* the full 32 bit hash of the key */
-
-        uint32_t magic;   /* try to catch errors */
-
-        /* the following union is implied:
-
-                union {
-
-                        char record[rec_len];
-
-                        struct {
-
-                                char key[key_len];
-
-                                char data[data_len];
-
-                        }
-
-                        uint32_t totalsize; (tailer)
-
-                }
-
-        */
-
-};
-
-Naively, this would double to a 56-byte overhead on a 64 bit 
-implementation.
-
-3.7.1 Proposed Solution
-
-We can use various techniques to reduce this for an allocated 
-block:
-
-1. The 'next' pointer is not required, as we are using a flat 
-  hash table.
-
-2. 'rec_len' can instead be expressed as an addition to key_len 
-  and data_len (it accounts for wasted or overallocated length in 
-  the record). Since the record length is always a multiple of 8, 
-  we can conveniently fit it in 32 bits (representing up to 35 
-  bits).
-
-3. 'key_len' and 'data_len' can be reduced. I'm unwilling to 
-  restrict 'data_len' to 32 bits, but instead we can combine the 
-  two into one 64-bit field and using a 5 bit value which 
-  indicates at what bit to divide the two. Keys are unlikely to 
-  scale as fast as data, so I'm assuming a maximum key size of 32 
-  bits.
-
-4. 'full_hash' is used to avoid a memcmp on the “miss” case, but 
-  this is diminishing returns after a handful of bits (at 10 
-  bits, it reduces 99.9% of false memcmp). As an aside, as the 
-  lower bits are already incorporated in the hash table 
-  resolution, the upper bits should be used here. Note that it's 
-  not clear that these bits will be a win, given the extra bits 
-  in the hash table itself (see [sub:Hash-Size-Solution]).
-
-5. 'magic' does not need to be enlarged: it currently reflects 
-  one of 5 values (used, free, dead, recovery, and 
-  unused_recovery). It is useful for quick sanity checking 
-  however, and should not be eliminated.
-
-6. 'tailer' is only used to coalesce free blocks (so a block to 
-  the right can find the header to check if this block is free). 
-  This can be replaced by a single 'free' bit in the header of 
-  the following block (and the tailer only exists in free 
-  blocks).[footnote:
-This technique from Thomas Standish. Data Structure Techniques. 
-Addison-Wesley, Reading, Massachusetts, 1980.
-] The current proposed coalescing algorithm doesn't need this, 
-  however.
-
-This produces a 16 byte used header like this:
-
-struct tdb_used_record {
-
-        uint32_t used_magic : 16,
-
-
-
-                 key_data_divide: 5,
-
-                 top_hash: 11;
-
-        uint32_t extra_octets;
-
-        uint64_t key_and_data_len;
-
-};
-
-And a free record like this:
-
-struct tdb_free_record {
-
-        uint64_t free_magic: 8,
-
-                   prev : 56;
-
-
-
-        uint64_t free_table: 8,
-
-                 total_length : 56
-
-        uint64_t next;;
-
-};
-
-Note that by limiting valid offsets to 56 bits, we can pack 
-everything we need into 3 64-byte words, meaning our minimum 
-record size is 8 bytes.
-
-3.7.2 Status
-
-Complete.
-
-3.8 Transaction Commit Requires 4 fdatasync
-
-The current transaction algorithm is:
-
-1. write_recovery_data();
-
-2. sync();
-
-3. write_recovery_header();
-
-4. sync();
-
-5. overwrite_with_new_data();
-
-6. sync();
-
-7. remove_recovery_header();
-
-8. sync(); 
-
-On current ext3, each sync flushes all data to disk, so the next 
-3 syncs are relatively expensive. But this could become a 
-performance bottleneck on other filesystems such as ext4.
-
-3.8.1 Proposed Solution
-
-Neil Brown points out that this is overzealous, and only one sync 
-is needed:
-
-1. Bundle the recovery data, a transaction counter and a strong 
-  checksum of the new data.
-
-2. Strong checksum that whole bundle.
-
-3. Store the bundle in the database.
-
-4. Overwrite the oldest of the two recovery pointers in the 
-  header (identified using the transaction counter) with the 
-  offset of this bundle.
-
-5. sync.
-
-6. Write the new data to the file.
-
-Checking for recovery means identifying the latest bundle with a 
-valid checksum and using the new data checksum to ensure that it 
-has been applied. This is more expensive than the current check, 
-but need only be done at open. For running databases, a separate 
-header field can be used to indicate a transaction in progress; 
-we need only check for recovery if this is set.
-
-3.8.2 Status
-
-Deferred.
-
-3.9 <sub:TDB-Does-Not>TDB Does Not Have Snapshot Support
-
-3.9.1 Proposed SolutionNone. At some point you say “use a real 
-  database” (but see [replay-attribute]).
-
-But as a thought experiment, if we implemented transactions to 
-only overwrite free entries (this is tricky: there must not be a 
-header in each entry which indicates whether it is free, but use 
-of presence in metadata elsewhere), and a pointer to the hash 
-table, we could create an entirely new commit without destroying 
-existing data. Then it would be easy to implement snapshots in a 
-similar way.
-
-This would not allow arbitrary changes to the database, such as 
-tdb_repack does, and would require more space (since we have to 
-preserve the current and future entries at once). If we used hash 
-trees rather than one big hash table, we might only have to 
-rewrite some sections of the hash, too.
-
-We could then implement snapshots using a similar method, using 
-multiple different hash tables/free tables.
-
-3.9.2 Status
-
-Deferred.
-
-3.10 Transactions Cannot Operate in Parallel
-
-This would be useless for ldb, as it hits the index records with 
-just about every update. It would add significant complexity in 
-resolving clashes, and cause the all transaction callers to write 
-their code to loop in the case where the transactions spuriously 
-failed.
-
-3.10.1 Proposed Solution
-
-None (but see [replay-attribute]). We could solve a small part of 
-the problem by providing read-only transactions. These would 
-allow one write transaction to begin, but it could not commit 
-until all r/o transactions are done. This would require a new 
-RO_TRANSACTION_LOCK, which would be upgraded on commit.
-
-3.10.2 Status
-
-Deferred.
-
-3.11 Default Hash Function Is Suboptimal
-
-The Knuth-inspired multiplicative hash used by tdb is fairly slow 
-(especially if we expand it to 64 bits), and works best when the 
-hash bucket size is a prime number (which also means a slow 
-modulus). In addition, it is highly predictable which could 
-potentially lead to a Denial of Service attack in some TDB uses.
-
-3.11.1 Proposed Solution
-
-The Jenkins lookup3 hash[footnote:
-http://burtleburtle.net/bob/c/lookup3.c
-] is a fast and superbly-mixing hash. It's used by the Linux 
-kernel and almost everything else. This has the particular 
-properties that it takes an initial seed, and produces two 32 bit 
-hash numbers, which we can combine into a 64-bit hash.
-
-The seed should be created at tdb-creation time from some random 
-source, and placed in the header. This is far from foolproof, but 
-adds a little bit of protection against hash bombing.
-
-3.11.2 Status
-
-Complete.
-
-3.12 <Reliable-Traversal-Adds>Reliable Traversal Adds Complexity
-
-We lock a record during traversal iteration, and try to grab that 
-lock in the delete code. If that grab on delete fails, we simply 
-mark it deleted and continue onwards; traversal checks for this 
-condition and does the delete when it moves off the record.
-
-If traversal terminates, the dead record may be left 
-indefinitely.
-
-3.12.1 Proposed Solution
-
-Remove reliability guarantees; see [traverse-Proposed-Solution].
-
-3.12.2 Status
-
-Complete.
-
-3.13 Fcntl Locking Adds Overhead
-
-Placing a fcntl lock means a system call, as does removing one. 
-This is actually one reason why transactions can be faster 
-(everything is locked once at transaction start). In the 
-uncontended case, this overhead can theoretically be eliminated.
-
-3.13.1 Proposed Solution
-
-None.
-
-We tried this before with spinlock support, in the early days of 
-TDB, and it didn't make much difference except in manufactured 
-benchmarks.
-
-We could use spinlocks (with futex kernel support under Linux), 
-but it means that we lose automatic cleanup when a process dies 
-with a lock. There is a method of auto-cleanup under Linux, but 
-it's not supported by other operating systems. We could 
-reintroduce a clear-if-first-style lock and sweep for dead 
-futexes on open, but that wouldn't help the normal case of one 
-concurrent opener dying. Increasingly elaborate repair schemes 
-could be considered, but they require an ABI change (everyone 
-must use them) anyway, so there's no need to do this at the same 
-time as everything else.
-
-3.14 Some Transactions Don't Require Durability
-
-Volker points out that gencache uses a CLEAR_IF_FIRST tdb for 
-normal (fast) usage, and occasionally empties the results into a 
-transactional TDB. This kind of usage prioritizes performance 
-over durability: as long as we are consistent, data can be lost.
-
-This would be more neatly implemented inside tdb: a “soft” 
-transaction commit (ie. syncless) which meant that data may be 
-reverted on a crash.
-
-3.14.1 Proposed Solution
-
-None.
-
-Unfortunately any transaction scheme which overwrites old data 
-requires a sync before that overwrite to avoid the possibility of 
-corruption.
-
-It seems possible to use a scheme similar to that described in [sub:TDB-Does-Not]
-,where transactions are committed without overwriting existing 
-data, and an array of top-level pointers were available in the 
-header. If the transaction is “soft” then we would not need a 
-sync at all: existing processes would pick up the new hash table 
-and free list and work with that.
-
-At some later point, a sync would allow recovery of the old data 
-into the free lists (perhaps when the array of top-level pointers 
-filled). On crash, tdb_open() would examine the array of top 
-levels, and apply the transactions until it encountered an 
-invalid checksum.
-
-3.15 Tracing Is Fragile, Replay Is External
-
-The current TDB has compile-time-enabled tracing code, but it 
-often breaks as it is not enabled by default. In a similar way, 
-the ctdb code has an external wrapper which does replay tracing 
-so it can coordinate cluster-wide transactions.
-
-3.15.1 Proposed Solution<replay-attribute>
-
-Tridge points out that an attribute can be later added to 
-tdb_open (see [attributes]) to provide replay/trace hooks, which 
-could become the basis for this and future parallel transactions 
-and snapshot support.
-
-3.15.2 Status
-
-Deferred.
-
diff --git a/ccan/tdb2/free.c b/ccan/tdb2/free.c
deleted file mode 100644 (file)
index e693fe8..0000000
+++ /dev/null
@@ -1,975 +0,0 @@
- /*
-   Trivial Database 2: free list/block handling
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-#include <ccan/ilog/ilog.h>
-#include <time.h>
-#include <assert.h>
-#include <limits.h>
-
-static unsigned fls64(uint64_t val)
-{
-       return ilog64(val);
-}
-
-/* In which bucket would we find a particular record size? (ignoring header) */
-unsigned int size_to_bucket(tdb_len_t data_len)
-{
-       unsigned int bucket;
-
-       /* We can't have records smaller than this. */
-       assert(data_len >= TDB_MIN_DATA_LEN);
-
-       /* Ignoring the header... */
-       if (data_len - TDB_MIN_DATA_LEN <= 64) {
-               /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */
-               bucket = (data_len - TDB_MIN_DATA_LEN) / 8;
-       } else {
-               /* After that we go power of 2. */
-               bucket = fls64(data_len - TDB_MIN_DATA_LEN) + 2;
-       }
-
-       if (unlikely(bucket >= TDB_FREE_BUCKETS))
-               bucket = TDB_FREE_BUCKETS - 1;
-       return bucket;
-}
-
-tdb_off_t first_ftable(struct tdb_context *tdb)
-{
-       return tdb_read_off(tdb, offsetof(struct tdb_header, free_table));
-}
-
-tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable)
-{
-       return tdb_read_off(tdb, ftable + offsetof(struct tdb_freetable,next));
-}
-
-enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb)
-{
-       /* Use reservoir sampling algorithm to select a free list at random. */
-       unsigned int rnd, max = 0, count = 0;
-       tdb_off_t off;
-
-       tdb->tdb2.ftable_off = off = first_ftable(tdb);
-       tdb->tdb2.ftable = 0;
-
-       while (off) {
-               if (TDB_OFF_IS_ERR(off)) {
-                       return TDB_OFF_TO_ERR(off);
-               }
-
-               rnd = random();
-               if (rnd >= max) {
-                       tdb->tdb2.ftable_off = off;
-                       tdb->tdb2.ftable = count;
-                       max = rnd;
-               }
-
-               off = next_ftable(tdb, off);
-               count++;
-       }
-       return TDB_SUCCESS;
-}
-
-/* Offset of a given bucket. */
-tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket)
-{
-       return ftable_off + offsetof(struct tdb_freetable, buckets)
-               + bucket * sizeof(tdb_off_t);
-}
-
-/* Returns free_buckets + 1, or list number to search, or -ve error. */
-static tdb_off_t find_free_head(struct tdb_context *tdb,
-                               tdb_off_t ftable_off,
-                               tdb_off_t bucket)
-{
-       /* Speculatively search for a non-zero bucket. */
-       return tdb_find_nonzero_off(tdb, bucket_off(ftable_off, 0),
-                                   bucket, TDB_FREE_BUCKETS);
-}
-
-static void check_list(struct tdb_context *tdb, tdb_off_t b_off)
-{
-#ifdef CCAN_TDB2_DEBUG
-       tdb_off_t off, prev = 0, first;
-       struct tdb_free_record r;
-
-       first = off = (tdb_read_off(tdb, b_off) & TDB_OFF_MASK);
-       while (off != 0) {
-               tdb_read_convert(tdb, off, &r, sizeof(r));
-               if (frec_magic(&r) != TDB_FREE_MAGIC)
-                       abort();
-               if (prev && frec_prev(&r) != prev)
-                       abort();
-               prev = off;
-               off = r.next;
-       }
-
-       if (first) {
-               tdb_read_convert(tdb, first, &r, sizeof(r));
-               if (frec_prev(&r) != prev)
-                       abort();
-       }
-#endif
-}
-
-/* Remove from free bucket. */
-static enum TDB_ERROR remove_from_list(struct tdb_context *tdb,
-                                      tdb_off_t b_off, tdb_off_t r_off,
-                                      const struct tdb_free_record *r)
-{
-       tdb_off_t off, prev_next, head;
-       enum TDB_ERROR ecode;
-
-       /* Is this only element in list?  Zero out bucket, and we're done. */
-       if (frec_prev(r) == r_off)
-               return tdb_write_off(tdb, b_off, 0);
-
-       /* off = &r->prev->next */
-       off = frec_prev(r) + offsetof(struct tdb_free_record, next);
-
-       /* Get prev->next */
-       prev_next = tdb_read_off(tdb, off);
-       if (TDB_OFF_IS_ERR(prev_next))
-               return TDB_OFF_TO_ERR(prev_next);
-
-       /* If prev->next == 0, we were head: update bucket to point to next. */
-       if (prev_next == 0) {
-               /* We must preserve upper bits. */
-               head = tdb_read_off(tdb, b_off);
-               if (TDB_OFF_IS_ERR(head))
-                       return TDB_OFF_TO_ERR(head);
-
-               if ((head & TDB_OFF_MASK) != r_off) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "remove_from_list:"
-                                         " %llu head %llu on list %llu",
-                                         (long long)r_off,
-                                         (long long)head,
-                                         (long long)b_off);
-               }
-               head = ((head & ~TDB_OFF_MASK) | r->next);
-               ecode = tdb_write_off(tdb, b_off, head);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-       } else {
-               /* r->prev->next = r->next */
-               ecode = tdb_write_off(tdb, off, r->next);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-       }
-
-       /* If we were the tail, off = &head->prev. */
-       if (r->next == 0) {
-               head = tdb_read_off(tdb, b_off);
-               if (TDB_OFF_IS_ERR(head))
-                       return TDB_OFF_TO_ERR(head);
-               head &= TDB_OFF_MASK;
-               off = head + offsetof(struct tdb_free_record, magic_and_prev);
-       } else {
-               /* off = &r->next->prev */
-               off = r->next + offsetof(struct tdb_free_record,
-                                        magic_and_prev);
-       }
-
-#ifdef CCAN_TDB2_DEBUG
-       /* *off == r */
-       if ((tdb_read_off(tdb, off) & TDB_OFF_MASK) != r_off) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "remove_from_list:"
-                                 " %llu bad prev in list %llu",
-                                 (long long)r_off, (long long)b_off);
-       }
-#endif
-       /* r->next->prev = r->prev */
-       return tdb_write_off(tdb, off, r->magic_and_prev);
-}
-
-/* Enqueue in this free bucket: sets coalesce if we've added 128
- * entries to it. */
-static enum TDB_ERROR enqueue_in_free(struct tdb_context *tdb,
-                                     tdb_off_t b_off,
-                                     tdb_off_t off,
-                                     tdb_len_t len,
-                                     bool *coalesce)
-{
-       struct tdb_free_record new;
-       enum TDB_ERROR ecode;
-       tdb_off_t prev, head;
-       uint64_t magic = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL));
-
-       head = tdb_read_off(tdb, b_off);
-       if (TDB_OFF_IS_ERR(head))
-               return TDB_OFF_TO_ERR(head);
-
-       /* We only need to set ftable_and_len; rest is set in enqueue_in_free */
-       new.ftable_and_len = ((uint64_t)tdb->tdb2.ftable << (64 - TDB_OFF_UPPER_STEAL))
-               | len;
-
-       /* new->next = head. */
-       new.next = (head & TDB_OFF_MASK);
-
-       /* First element?  Prev points to ourselves. */
-       if (!new.next) {
-               new.magic_and_prev = (magic | off);
-       } else {
-               /* new->prev = next->prev */
-               prev = tdb_read_off(tdb,
-                                   new.next + offsetof(struct tdb_free_record,
-                                                       magic_and_prev));
-               new.magic_and_prev = prev;
-               if (frec_magic(&new) != TDB_FREE_MAGIC) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "enqueue_in_free: %llu bad head"
-                                         " prev %llu",
-                                         (long long)new.next,
-                                         (long long)prev);
-               }
-               /* next->prev = new. */
-               ecode = tdb_write_off(tdb, new.next
-                                     + offsetof(struct tdb_free_record,
-                                                magic_and_prev),
-                                     off | magic);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-#ifdef CCAN_TDB2_DEBUG
-               prev = tdb_read_off(tdb, frec_prev(&new)
-                                   + offsetof(struct tdb_free_record, next));
-               if (prev != 0) {
-                       return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                         "enqueue_in_free:"
-                                         " %llu bad tail next ptr %llu",
-                                         (long long)frec_prev(&new)
-                                         + offsetof(struct tdb_free_record,
-                                                    next),
-                                         (long long)prev);
-               }
-#endif
-       }
-
-       /* Update enqueue count, but don't set high bit: see TDB_OFF_IS_ERR */
-       if (*coalesce)
-               head += (1ULL << (64 - TDB_OFF_UPPER_STEAL));
-       head &= ~(TDB_OFF_MASK | (1ULL << 63));
-       head |= off;
-
-       ecode = tdb_write_off(tdb, b_off, head);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* It's time to coalesce if counter wrapped. */
-       if (*coalesce)
-               *coalesce = ((head & ~TDB_OFF_MASK) == 0);
-
-       return tdb_write_convert(tdb, off, &new, sizeof(new));
-}
-
-static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable)
-{
-       tdb_off_t off;
-       unsigned int i;
-
-       if (likely(tdb->tdb2.ftable == ftable))
-               return tdb->tdb2.ftable_off;
-
-       off = first_ftable(tdb);
-       for (i = 0; i < ftable; i++) {
-               if (TDB_OFF_IS_ERR(off)) {
-                       break;
-               }
-               off = next_ftable(tdb, off);
-       }
-       return off;
-}
-
-/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and
- * need to blatt the *protect record (which is set to an error). */
-static tdb_len_t coalesce(struct tdb_context *tdb,
-                         tdb_off_t off, tdb_off_t b_off,
-                         tdb_len_t data_len,
-                         tdb_off_t *protect)
-{
-       tdb_off_t end;
-       struct tdb_free_record rec;
-       enum TDB_ERROR ecode;
-
-       tdb->stats.alloc_coalesce_tried++;
-       end = off + sizeof(struct tdb_used_record) + data_len;
-
-       while (end < tdb->file->map_size) {
-               const struct tdb_free_record *r;
-               tdb_off_t nb_off;
-               unsigned ftable, bucket;
-
-               r = tdb_access_read(tdb, end, sizeof(*r), true);
-               if (TDB_PTR_IS_ERR(r)) {
-                       ecode = TDB_PTR_ERR(r);
-                       goto err;
-               }
-
-               if (frec_magic(r) != TDB_FREE_MAGIC
-                   || frec_ftable(r) == TDB_FTABLE_NONE) {
-                       tdb_access_release(tdb, r);
-                       break;
-               }
-
-               ftable = frec_ftable(r);
-               bucket = size_to_bucket(frec_len(r));
-               nb_off = ftable_offset(tdb, ftable);
-               if (TDB_OFF_IS_ERR(nb_off)) {
-                       tdb_access_release(tdb, r);
-                       ecode = TDB_OFF_TO_ERR(nb_off);
-                       goto err;
-               }
-               nb_off = bucket_off(nb_off, bucket);
-               tdb_access_release(tdb, r);
-
-               /* We may be violating lock order here, so best effort. */
-               if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT)
-                   != TDB_SUCCESS) {
-                       tdb->stats.alloc_coalesce_lockfail++;
-                       break;
-               }
-
-               /* Now we have lock, re-check. */
-               ecode = tdb_read_convert(tdb, end, &rec, sizeof(rec));
-               if (ecode != TDB_SUCCESS) {
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       goto err;
-               }
-
-               if (unlikely(frec_magic(&rec) != TDB_FREE_MAGIC)) {
-                       tdb->stats.alloc_coalesce_race++;
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       break;
-               }
-
-               if (unlikely(frec_ftable(&rec) != ftable)
-                   || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
-                       tdb->stats.alloc_coalesce_race++;
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       break;
-               }
-
-               /* Did we just mess up a record you were hoping to use? */
-               if (end == *protect) {
-                       tdb->stats.alloc_coalesce_iterate_clash++;
-                       *protect = TDB_ERR_TO_OFF(TDB_ERR_NOEXIST);
-               }
-
-               ecode = remove_from_list(tdb, nb_off, end, &rec);
-               check_list(tdb, nb_off);
-               if (ecode != TDB_SUCCESS) {
-                       tdb_unlock_free_bucket(tdb, nb_off);
-                       goto err;
-               }
-
-               end += sizeof(struct tdb_used_record) + frec_len(&rec);
-               tdb_unlock_free_bucket(tdb, nb_off);
-               tdb->stats.alloc_coalesce_num_merged++;
-       }
-
-       /* Didn't find any adjacent free? */
-       if (end == off + sizeof(struct tdb_used_record) + data_len)
-               return 0;
-
-       /* Before we expand, check this isn't one you wanted protected? */
-       if (off == *protect) {
-               *protect = TDB_ERR_TO_OFF(TDB_ERR_EXISTS);
-               tdb->stats.alloc_coalesce_iterate_clash++;
-       }
-
-       /* OK, expand initial record */
-       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               goto err;
-       }
-
-       if (frec_len(&rec) != data_len) {
-               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                  "coalesce: expected data len %zu not %zu",
-                                  (size_t)data_len, (size_t)frec_len(&rec));
-               goto err;
-       }
-
-       ecode = remove_from_list(tdb, b_off, off, &rec);
-       check_list(tdb, b_off);
-       if (ecode != TDB_SUCCESS) {
-               goto err;
-       }
-
-       /* Try locking violation first.  We don't allow coalesce recursion! */
-       ecode = add_free_record(tdb, off, end - off, TDB_LOCK_NOWAIT, false);
-       if (ecode != TDB_SUCCESS) {
-               /* Need to drop lock.  Can't rely on anything stable. */
-               tdb->stats.alloc_coalesce_lockfail++;
-               *protect = TDB_ERR_TO_OFF(TDB_ERR_CORRUPT);
-
-               /* We have to drop this to avoid deadlocks, so make sure record
-                * doesn't get coalesced by someone else! */
-               rec.ftable_and_len = (TDB_FTABLE_NONE
-                                     << (64 - TDB_OFF_UPPER_STEAL))
-                       | (end - off - sizeof(struct tdb_used_record));
-               ecode = tdb_write_off(tdb,
-                                     off + offsetof(struct tdb_free_record,
-                                                    ftable_and_len),
-                                     rec.ftable_and_len);
-               if (ecode != TDB_SUCCESS) {
-                       goto err;
-               }
-
-               tdb_unlock_free_bucket(tdb, b_off);
-
-               ecode = add_free_record(tdb, off, end - off, TDB_LOCK_WAIT,
-                                       false);
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-       } else if (TDB_OFF_IS_ERR(*protect)) {
-               /* For simplicity, we always drop lock if they can't continue */
-               tdb_unlock_free_bucket(tdb, b_off);
-       }
-       tdb->stats.alloc_coalesce_succeeded++;
-
-       /* Return usable length. */
-       return end - off - sizeof(struct tdb_used_record);
-
-err:
-       /* To unify error paths, we *always* unlock bucket on error. */
-       tdb_unlock_free_bucket(tdb, b_off);
-       return TDB_ERR_TO_OFF(ecode);
-}
-
-/* List is locked: we unlock it. */
-static enum TDB_ERROR coalesce_list(struct tdb_context *tdb,
-                                   tdb_off_t ftable_off,
-                                   tdb_off_t b_off,
-                                   unsigned int limit)
-{
-       enum TDB_ERROR ecode;
-       tdb_off_t off;
-
-       off = tdb_read_off(tdb, b_off);
-       if (TDB_OFF_IS_ERR(off)) {
-               ecode = TDB_OFF_TO_ERR(off);
-               goto unlock_err;
-       }
-       /* A little bit of paranoia: counter should be 0. */
-       off &= TDB_OFF_MASK;
-
-       while (off && limit--) {
-               struct tdb_free_record rec;
-               tdb_len_t coal;
-               tdb_off_t next;
-
-               ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               next = rec.next;
-               coal = coalesce(tdb, off, b_off, frec_len(&rec), &next);
-               if (TDB_OFF_IS_ERR(coal)) {
-                       /* This has already unlocked on error. */
-                       return TDB_OFF_TO_ERR(coal);
-               }
-               if (TDB_OFF_IS_ERR(next)) {
-                       /* Coalescing had to unlock, so stop. */
-                       return TDB_SUCCESS;
-               }
-               /* Keep going if we're doing well... */
-               limit += size_to_bucket(coal / 16 + TDB_MIN_DATA_LEN);
-               off = next;
-       }
-
-       /* Now, move those elements to the tail of the list so we get something
-        * else next time. */
-       if (off) {
-               struct tdb_free_record oldhrec, newhrec, oldtrec, newtrec;
-               tdb_off_t oldhoff, oldtoff, newtoff;
-
-               /* The record we were up to is the new head. */
-               ecode = tdb_read_convert(tdb, off, &newhrec, sizeof(newhrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Get the new tail. */
-               newtoff = frec_prev(&newhrec);
-               ecode = tdb_read_convert(tdb, newtoff, &newtrec,
-                                        sizeof(newtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Get the old head. */
-               oldhoff = tdb_read_off(tdb, b_off);
-               if (TDB_OFF_IS_ERR(oldhoff)) {
-                       ecode = TDB_OFF_TO_ERR(oldhoff);
-                       goto unlock_err;
-               }
-
-               /* This could happen if they all coalesced away. */
-               if (oldhoff == off)
-                       goto out;
-
-               ecode = tdb_read_convert(tdb, oldhoff, &oldhrec,
-                                        sizeof(oldhrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Get the old tail. */
-               oldtoff = frec_prev(&oldhrec);
-               ecode = tdb_read_convert(tdb, oldtoff, &oldtrec,
-                                        sizeof(oldtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               /* Old tail's next points to old head. */
-               oldtrec.next = oldhoff;
-
-               /* Old head's prev points to old tail. */
-               oldhrec.magic_and_prev
-                       = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL))
-                       | oldtoff;
-
-               /* New tail's next is 0. */
-               newtrec.next = 0;
-
-               /* Write out the modified versions. */
-               ecode = tdb_write_convert(tdb, oldtoff, &oldtrec,
-                                         sizeof(oldtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               ecode = tdb_write_convert(tdb, oldhoff, &oldhrec,
-                                         sizeof(oldhrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-
-               ecode = tdb_write_convert(tdb, newtoff, &newtrec,
-                                         sizeof(newtrec));
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-               
-               /* And finally link in new head. */
-               ecode = tdb_write_off(tdb, b_off, off);
-               if (ecode != TDB_SUCCESS)
-                       goto unlock_err;
-       }
-out:
-       tdb_unlock_free_bucket(tdb, b_off);
-       return TDB_SUCCESS;
-
-unlock_err:
-       tdb_unlock_free_bucket(tdb, b_off);
-       return ecode;
-}
-
-/* List must not be locked if coalesce_ok is set. */
-enum TDB_ERROR add_free_record(struct tdb_context *tdb,
-                              tdb_off_t off, tdb_len_t len_with_header,
-                              enum tdb_lock_flags waitflag,
-                              bool coalesce)
-{
-       tdb_off_t b_off;
-       tdb_len_t len;
-       enum TDB_ERROR ecode;
-
-       assert(len_with_header >= sizeof(struct tdb_free_record));
-
-       len = len_with_header - sizeof(struct tdb_used_record);
-
-       b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(len));
-       ecode = tdb_lock_free_bucket(tdb, b_off, waitflag);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       ecode = enqueue_in_free(tdb, b_off, off, len, &coalesce);
-       check_list(tdb, b_off);
-
-       /* Coalescing unlocks free list. */
-       if (!ecode && coalesce)
-               ecode = coalesce_list(tdb, tdb->tdb2.ftable_off, b_off, 2);
-       else
-               tdb_unlock_free_bucket(tdb, b_off);
-       return ecode;
-}
-
-static size_t adjust_size(size_t keylen, size_t datalen)
-{
-       size_t size = keylen + datalen;
-
-       if (size < TDB_MIN_DATA_LEN)
-               size = TDB_MIN_DATA_LEN;
-
-       /* Round to next uint64_t boundary. */
-       return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
-}
-
-/* If we have enough left over to be useful, split that off. */
-static size_t record_leftover(size_t keylen, size_t datalen,
-                             bool want_extra, size_t total_len)
-{
-       ssize_t leftover;
-
-       if (want_extra)
-               datalen += datalen / 2;
-       leftover = total_len - adjust_size(keylen, datalen);
-
-       if (leftover < (ssize_t)sizeof(struct tdb_free_record))
-               return 0;
-
-       return leftover;
-}
-
-/* We need size bytes to put our key and data in. */
-static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
-                               tdb_off_t ftable_off,
-                               tdb_off_t bucket,
-                               size_t keylen, size_t datalen,
-                               bool want_extra,
-                               unsigned magic,
-                               unsigned hashlow)
-{
-       tdb_off_t off, b_off,best_off;
-       struct tdb_free_record best = { 0 };
-       double multiplier;
-       size_t size = adjust_size(keylen, datalen);
-       enum TDB_ERROR ecode;
-
-       tdb->stats.allocs++;
-       b_off = bucket_off(ftable_off, bucket);
-
-       /* FIXME: Try non-blocking wait first, to measure contention. */
-       /* Lock this bucket. */
-       ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT);
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       best.ftable_and_len = -1ULL;
-       best_off = 0;
-
-       /* Get slack if we're after extra. */
-       if (want_extra)
-               multiplier = 1.5;
-       else
-               multiplier = 1.0;
-
-       /* Walk the list to see if any are large enough, getting less fussy
-        * as we go. */
-       off = tdb_read_off(tdb, b_off);
-       if (TDB_OFF_IS_ERR(off)) {
-               ecode = TDB_OFF_TO_ERR(off);
-               goto unlock_err;
-       }
-       off &= TDB_OFF_MASK;
-
-       while (off) {
-               const struct tdb_free_record *r;
-               tdb_len_t len;
-               tdb_off_t next;
-
-               r = tdb_access_read(tdb, off, sizeof(*r), true);
-               if (TDB_PTR_IS_ERR(r)) {
-                       ecode = TDB_PTR_ERR(r);
-                       goto unlock_err;
-               }
-
-               if (frec_magic(r) != TDB_FREE_MAGIC) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                          "lock_and_alloc:"
-                                          " %llu non-free 0x%llx",
-                                          (long long)off,
-                                          (long long)r->magic_and_prev);
-                       tdb_access_release(tdb, r);
-                       goto unlock_err;
-               }
-
-               if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) {
-                       best_off = off;
-                       best = *r;
-               }
-
-               if (frec_len(&best) <= size * multiplier && best_off) {
-                       tdb_access_release(tdb, r);
-                       break;
-               }
-
-               multiplier *= 1.01;
-
-               next = r->next;
-               len = frec_len(r);
-               tdb_access_release(tdb, r);
-               off = next;
-       }
-
-       /* If we found anything at all, use it. */
-       if (best_off) {
-               struct tdb_used_record rec;
-               size_t leftover;
-
-               /* We're happy with this size: take it. */
-               ecode = remove_from_list(tdb, b_off, best_off, &best);
-               check_list(tdb, b_off);
-               if (ecode != TDB_SUCCESS) {
-                       goto unlock_err;
-               }
-
-               leftover = record_leftover(keylen, datalen, want_extra,
-                                          frec_len(&best));
-
-               assert(keylen + datalen + leftover <= frec_len(&best));
-               /* We need to mark non-free before we drop lock, otherwise
-                * coalesce() could try to merge it! */
-               ecode = set_header(tdb, &rec, magic, keylen, datalen,
-                                  frec_len(&best) - leftover, hashlow);
-               if (ecode != TDB_SUCCESS) {
-                       goto unlock_err;
-               }
-
-               ecode = tdb_write_convert(tdb, best_off, &rec, sizeof(rec));
-               if (ecode != TDB_SUCCESS) {
-                       goto unlock_err;
-               }
-
-               /* For futureproofing, we put a 0 in any unused space. */
-               if (rec_extra_padding(&rec)) {
-                       ecode = tdb->tdb2.io->twrite(tdb, best_off + sizeof(rec)
-                                                    + keylen + datalen, "", 1);
-                       if (ecode != TDB_SUCCESS) {
-                               goto unlock_err;
-                       }
-               }
-
-               /* Bucket of leftover will be <= current bucket, so nested
-                * locking is allowed. */
-               if (leftover) {
-                       tdb->stats.alloc_leftover++;
-                       ecode = add_free_record(tdb,
-                                               best_off + sizeof(rec)
-                                               + frec_len(&best) - leftover,
-                                               leftover, TDB_LOCK_WAIT, false);
-                       if (ecode != TDB_SUCCESS) {
-                               best_off = TDB_ERR_TO_OFF(ecode);
-                       }
-               }
-               tdb_unlock_free_bucket(tdb, b_off);
-
-               return best_off;
-       }
-
-       tdb_unlock_free_bucket(tdb, b_off);
-       return 0;
-
-unlock_err:
-       tdb_unlock_free_bucket(tdb, b_off);
-       return TDB_ERR_TO_OFF(ecode);
-}
-
-/* Get a free block from current free list, or 0 if none, -ve on error. */
-static tdb_off_t get_free(struct tdb_context *tdb,
-                         size_t keylen, size_t datalen, bool want_extra,
-                         unsigned magic, unsigned hashlow)
-{
-       tdb_off_t off, ftable_off;
-       tdb_off_t start_b, b, ftable;
-       bool wrapped = false;
-
-       /* If they are growing, add 50% to get to higher bucket. */
-       if (want_extra)
-               start_b = size_to_bucket(adjust_size(keylen,
-                                                    datalen + datalen / 2));
-       else
-               start_b = size_to_bucket(adjust_size(keylen, datalen));
-
-       ftable_off = tdb->tdb2.ftable_off;
-       ftable = tdb->tdb2.ftable;
-       while (!wrapped || ftable_off != tdb->tdb2.ftable_off) {
-               /* Start at exact size bucket, and search up... */
-               for (b = find_free_head(tdb, ftable_off, start_b);
-                    b < TDB_FREE_BUCKETS;
-                    b = find_free_head(tdb, ftable_off, b + 1)) {
-                       /* Try getting one from list. */
-                       off = lock_and_alloc(tdb, ftable_off,
-                                            b, keylen, datalen, want_extra,
-                                            magic, hashlow);
-                       if (TDB_OFF_IS_ERR(off))
-                               return off;
-                       if (off != 0) {
-                               if (b == start_b)
-                                       tdb->stats.alloc_bucket_exact++;
-                               if (b == TDB_FREE_BUCKETS - 1)
-                                       tdb->stats.alloc_bucket_max++;
-                               /* Worked?  Stay using this list. */
-                               tdb->tdb2.ftable_off = ftable_off;
-                               tdb->tdb2.ftable = ftable;
-                               return off;
-                       }
-                       /* Didn't work.  Try next bucket. */
-               }
-
-               if (TDB_OFF_IS_ERR(b)) {
-                       return b;
-               }
-
-               /* Hmm, try next table. */
-               ftable_off = next_ftable(tdb, ftable_off);
-               if (TDB_OFF_IS_ERR(ftable_off)) {
-                       return ftable_off;
-               }
-               ftable++;
-
-               if (ftable_off == 0) {
-                       wrapped = true;
-                       ftable_off = first_ftable(tdb);
-                       if (TDB_OFF_IS_ERR(ftable_off)) {
-                               return ftable_off;
-                       }
-                       ftable = 0;
-               }
-       }
-
-       return 0;
-}
-
-enum TDB_ERROR set_header(struct tdb_context *tdb,
-                         struct tdb_used_record *rec,
-                         unsigned magic, uint64_t keylen, uint64_t datalen,
-                         uint64_t actuallen, unsigned hashlow)
-{
-       uint64_t keybits = (fls64(keylen) + 1) / 2;
-
-       /* Use bottom bits of hash, so it's independent of hash table size. */
-       rec->magic_and_meta = (hashlow & ((1 << 11)-1))
-               | ((actuallen - (keylen + datalen)) << 11)
-               | (keybits << 43)
-               | ((uint64_t)magic << 48);
-       rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
-
-       /* Encoding can fail on big values. */
-       if (rec_key_length(rec) != keylen
-           || rec_data_length(rec) != datalen
-           || rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "Could not encode k=%llu,d=%llu,a=%llu",
-                                 (long long)keylen, (long long)datalen,
-                                 (long long)actuallen);
-       }
-       return TDB_SUCCESS;
-}
-
-/* You need 'size', this tells you how much you should expand by. */
-tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size)
-{
-       tdb_off_t new_size, top_size;
-
-       /* limit size in order to avoid using up huge amounts of memory for
-        * in memory tdbs if an oddball huge record creeps in */
-       if (size > 100 * 1024) {
-               top_size = map_size + size * 2;
-       } else {
-               top_size = map_size + size * 100;
-       }
-
-       /* always make room for at least top_size more records, and at
-          least 25% more space. if the DB is smaller than 100MiB,
-          otherwise grow it by 10% only. */
-       if (map_size > 100 * 1024 * 1024) {
-               new_size = map_size * 1.10;
-       } else {
-               new_size = map_size * 1.25;
-       }
-
-       /* Round the database up to a multiple of the page size */
-       if (new_size < top_size)
-               new_size = top_size;
-       return new_size - map_size;
-}
-
-/* Expand the database. */
-static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
-{
-       uint64_t old_size;
-       tdb_len_t wanted;
-       enum TDB_ERROR ecode;
-
-       /* Need to hold a hash lock to expand DB: transactions rely on it. */
-       if (!(tdb->flags & TDB_NOLOCK)
-           && !tdb->file->allrecord_lock.count && !tdb_has_hash_locks(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_expand: must hold lock during expand");
-       }
-
-       /* Only one person can expand file at a time. */
-       ecode = tdb_lock_expand(tdb, F_WRLCK);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Someone else may have expanded the file, so retry. */
-       old_size = tdb->file->map_size;
-       tdb->tdb2.io->oob(tdb, tdb->file->map_size, 1, true);
-       if (tdb->file->map_size != old_size) {
-               tdb_unlock_expand(tdb, F_WRLCK);
-               return TDB_SUCCESS;
-       }
-
-       /* Overallocate. */
-       wanted = tdb_expand_adjust(old_size, size);
-       /* We need room for the record header too. */
-       wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted);
-
-       ecode = tdb->tdb2.io->expand_file(tdb, wanted);
-       if (ecode != TDB_SUCCESS) {
-               tdb_unlock_expand(tdb, F_WRLCK);
-               return ecode;
-       }
-
-       /* We need to drop this lock before adding free record. */
-       tdb_unlock_expand(tdb, F_WRLCK);
-
-       tdb->stats.expands++;
-       return add_free_record(tdb, old_size, wanted, TDB_LOCK_WAIT, true);
-}
-
-/* This won't fail: it will expand the database if it has to. */
-tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
-               uint64_t hash, unsigned magic, bool growing)
-{
-       tdb_off_t off;
-
-       /* We can't hold pointers during this: we could unmap! */
-       assert(!tdb->tdb2.direct_access);
-
-       for (;;) {
-               enum TDB_ERROR ecode;
-               off = get_free(tdb, keylen, datalen, growing, magic, hash);
-               if (likely(off != 0))
-                       break;
-
-               ecode = tdb_expand(tdb, adjust_size(keylen, datalen));
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-       }
-
-       return off;
-}
diff --git a/ccan/tdb2/hash.c b/ccan/tdb2/hash.c
deleted file mode 100644 (file)
index 619d56f..0000000
+++ /dev/null
@@ -1,913 +0,0 @@
- /*
-   Trivial Database 2: hash handling
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/hash/hash.h>
-#include <assert.h>
-
-/* Default hash function. */
-uint64_t tdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
-                         void *unused)
-{
-       uint64_t ret;
-       /* hash64_stable assumes lower bits are more important; they are a
-        * slightly better hash.  We use the upper bits first, so swap them. */
-       ret = hash64_stable((const unsigned char *)key, length, seed);
-       return (ret >> 32) | (ret << 32);
-}
-
-uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len)
-{
-       return tdb->hash_fn(ptr, len, tdb->hash_seed, tdb->hash_data);
-}
-
-uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
-{
-       const struct tdb_used_record *r;
-       const void *key;
-       uint64_t klen, hash;
-
-       r = tdb_access_read(tdb, off, sizeof(*r), true);
-       if (TDB_PTR_IS_ERR(r)) {
-               /* FIXME */
-               return 0;
-       }
-
-       klen = rec_key_length(r);
-       tdb_access_release(tdb, r);
-
-       key = tdb_access_read(tdb, off + sizeof(*r), klen, false);
-       if (TDB_PTR_IS_ERR(key)) {
-               return 0;
-       }
-
-       hash = tdb_hash(tdb, key, klen);
-       tdb_access_release(tdb, key);
-       return hash;
-}
-
-/* Get bits from a value. */
-static uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
-{
-       assert(num <= 32);
-       return (val >> start) & ((1U << num) - 1);
-}
-
-/* We take bits from the top: that way we can lock whole sections of the hash
- * by using lock ranges. */
-static uint32_t use_bits(struct hash_info *h, unsigned num)
-{
-       h->hash_used += num;
-       return bits_from(h->h, 64 - h->hash_used, num);
-}
-
-static tdb_bool_err key_matches(struct tdb_context *tdb,
-                               const struct tdb_used_record *rec,
-                               tdb_off_t off,
-                               const struct tdb_data *key)
-{
-       tdb_bool_err ret = false;
-       const char *rkey;
-
-       if (rec_key_length(rec) != key->dsize) {
-               tdb->stats.compare_wrong_keylen++;
-               return ret;
-       }
-
-       rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false);
-       if (TDB_PTR_IS_ERR(rkey)) {
-               return (tdb_bool_err)TDB_PTR_ERR(rkey);
-       }
-       if (memcmp(rkey, key->dptr, key->dsize) == 0)
-               ret = true;
-       else
-               tdb->stats.compare_wrong_keycmp++;
-       tdb_access_release(tdb, rkey);
-       return ret;
-}
-
-/* Does entry match? */
-static tdb_bool_err match(struct tdb_context *tdb,
-                         struct hash_info *h,
-                         const struct tdb_data *key,
-                         tdb_off_t val,
-                         struct tdb_used_record *rec)
-{
-       tdb_off_t off;
-       enum TDB_ERROR ecode;
-
-       tdb->stats.compares++;
-       /* Desired bucket must match. */
-       if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) {
-               tdb->stats.compare_wrong_bucket++;
-               return false;
-       }
-
-       /* Top bits of offset == next bits of hash. */
-       if (bits_from(val, TDB_OFF_HASH_EXTRA_BIT, TDB_OFF_UPPER_STEAL_EXTRA)
-           != bits_from(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
-                   TDB_OFF_UPPER_STEAL_EXTRA)) {
-               tdb->stats.compare_wrong_offsetbits++;
-               return false;
-       }
-
-       off = val & TDB_OFF_MASK;
-       ecode = tdb_read_convert(tdb, off, rec, sizeof(*rec));
-       if (ecode != TDB_SUCCESS) {
-               return (tdb_bool_err)ecode;
-       }
-
-       if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) {
-               tdb->stats.compare_wrong_rechash++;
-               return false;
-       }
-
-       return key_matches(tdb, rec, off, key);
-}
-
-static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket)
-{
-       return group_start
-               + (bucket % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
-}
-
-bool is_subhash(tdb_off_t val)
-{
-       return (val >> TDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1;
-}
-
-/* FIXME: Guess the depth, don't over-lock! */
-static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size)
-{
-       *size = 1ULL << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
-       return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
-}
-
-static tdb_off_t COLD find_in_chain(struct tdb_context *tdb,
-                                   struct tdb_data key,
-                                   tdb_off_t chain,
-                                   struct hash_info *h,
-                                   struct tdb_used_record *rec,
-                                   struct traverse_info *tinfo)
-{
-       tdb_off_t off, next;
-       enum TDB_ERROR ecode;
-
-       /* In case nothing is free, we set these to zero. */
-       h->home_bucket = h->found_bucket = 0;
-
-       for (off = chain; off; off = next) {
-               unsigned int i;
-
-               h->group_start = off;
-               ecode = tdb_read_convert(tdb, off, h->group, sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       return TDB_ERR_TO_OFF(ecode);
-               }
-
-               for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-                       tdb_off_t recoff;
-                       if (!h->group[i]) {
-                               /* Remember this empty bucket. */
-                               h->home_bucket = h->found_bucket = i;
-                               continue;
-                       }
-
-                       /* We can insert extra bits via add_to_hash
-                        * empty bucket logic. */
-                       recoff = h->group[i] & TDB_OFF_MASK;
-                       ecode = tdb_read_convert(tdb, recoff, rec,
-                                                sizeof(*rec));
-                       if (ecode != TDB_SUCCESS) {
-                               return TDB_ERR_TO_OFF(ecode);
-                       }
-
-                       ecode = TDB_OFF_TO_ERR(key_matches(tdb, rec, recoff,
-                                                          &key));
-                       if (ecode < 0) {
-                               return TDB_ERR_TO_OFF(ecode);
-                       }
-                       if (ecode == (enum TDB_ERROR)1) {
-                               h->home_bucket = h->found_bucket = i;
-
-                               if (tinfo) {
-                                       tinfo->levels[tinfo->num_levels]
-                                               .hashtable = off;
-                                       tinfo->levels[tinfo->num_levels]
-                                               .total_buckets
-                                               = 1 << TDB_HASH_GROUP_BITS;
-                                       tinfo->levels[tinfo->num_levels].entry
-                                               = i;
-                                       tinfo->num_levels++;
-                               }
-                               return recoff;
-                       }
-               }
-               next = tdb_read_off(tdb, off
-                                   + offsetof(struct tdb_chain, next));
-               if (TDB_OFF_IS_ERR(next)) {
-                       return next;
-               }
-               if (next)
-                       next += sizeof(struct tdb_used_record);
-       }
-       return 0;
-}
-
-/* This is the core routine which searches the hashtable for an entry.
- * On error, no locks are held and -ve is returned.
- * Otherwise, hinfo is filled in (and the optional tinfo).
- * If not found, the return value is 0.
- * If found, the return value is the offset, and *rec is the record. */
-tdb_off_t find_and_lock(struct tdb_context *tdb,
-                       struct tdb_data key,
-                       int ltype,
-                       struct hash_info *h,
-                       struct tdb_used_record *rec,
-                       struct traverse_info *tinfo)
-{
-       uint32_t i, group;
-       tdb_off_t hashtable;
-       enum TDB_ERROR ecode;
-
-       h->h = tdb_hash(tdb, key.dptr, key.dsize);
-       h->hash_used = 0;
-       group = use_bits(h, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS);
-       h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
-
-       h->hlock_start = hlock_range(group, &h->hlock_range);
-       ecode = tdb_lock_hashes(tdb, h->hlock_start, h->hlock_range, ltype,
-                               TDB_LOCK_WAIT);
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       hashtable = offsetof(struct tdb_header, hashtable);
-       if (tinfo) {
-               tinfo->toplevel_group = group;
-               tinfo->num_levels = 1;
-               tinfo->levels[0].entry = 0;
-               tinfo->levels[0].hashtable = hashtable
-                       + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t);
-               tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS;
-       }
-
-       while (h->hash_used <= 64) {
-               /* Read in the hash group. */
-               h->group_start = hashtable
-                       + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-
-               ecode = tdb_read_convert(tdb, h->group_start, &h->group,
-                                        sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-
-               /* Pointer to another hash table?  Go down... */
-               if (is_subhash(h->group[h->home_bucket])) {
-                       hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK)
-                               + sizeof(struct tdb_used_record);
-                       if (tinfo) {
-                               /* When we come back, use *next* bucket */
-                               tinfo->levels[tinfo->num_levels-1].entry
-                                       += h->home_bucket + 1;
-                       }
-                       group = use_bits(h, TDB_SUBLEVEL_HASH_BITS
-                                        - TDB_HASH_GROUP_BITS);
-                       h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
-                       if (tinfo) {
-                               tinfo->levels[tinfo->num_levels].hashtable
-                                       = hashtable;
-                               tinfo->levels[tinfo->num_levels].total_buckets
-                                       = 1 << TDB_SUBLEVEL_HASH_BITS;
-                               tinfo->levels[tinfo->num_levels].entry
-                                       = group << TDB_HASH_GROUP_BITS;
-                               tinfo->num_levels++;
-                       }
-                       continue;
-               }
-
-               /* It's in this group: search (until 0 or all searched) */
-               for (i = 0, h->found_bucket = h->home_bucket;
-                    i < (1 << TDB_HASH_GROUP_BITS);
-                    i++, h->found_bucket = ((h->found_bucket+1)
-                                            % (1 << TDB_HASH_GROUP_BITS))) {
-                       tdb_bool_err berr;
-                       if (is_subhash(h->group[h->found_bucket]))
-                               continue;
-
-                       if (!h->group[h->found_bucket])
-                               break;
-
-                       berr = match(tdb, h, &key, h->group[h->found_bucket],
-                                    rec);
-                       if (berr < 0) {
-                               ecode = TDB_OFF_TO_ERR(berr);
-                               goto fail;
-                       }
-                       if (berr) {
-                               if (tinfo) {
-                                       tinfo->levels[tinfo->num_levels-1].entry
-                                               += h->found_bucket;
-                               }
-                               return h->group[h->found_bucket] & TDB_OFF_MASK;
-                       }
-               }
-               /* Didn't find it: h indicates where it would go. */
-               return 0;
-       }
-
-       return find_in_chain(tdb, key, hashtable, h, rec, tinfo);
-
-fail:
-       tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype);
-       return TDB_ERR_TO_OFF(ecode);
-}
-
-/* I wrote a simple test, expanding a hash to 2GB, for the following
- * cases:
- * 1) Expanding all the buckets at once,
- * 2) Expanding the bucket we wanted to place the new entry into.
- * 3) Expanding the most-populated bucket,
- *
- * I measured the worst/average/best density during this process.
- * 1) 3%/16%/30%
- * 2) 4%/20%/38%
- * 3) 6%/22%/41%
- *
- * So we figure out the busiest bucket for the moment.
- */
-static unsigned fullest_bucket(struct tdb_context *tdb,
-                              const tdb_off_t *group,
-                              unsigned new_bucket)
-{
-       unsigned counts[1 << TDB_HASH_GROUP_BITS] = { 0 };
-       unsigned int i, best_bucket;
-
-       /* Count the new entry. */
-       counts[new_bucket]++;
-       best_bucket = new_bucket;
-
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned this_bucket;
-
-               if (is_subhash(group[i]))
-                       continue;
-               this_bucket = group[i] & TDB_OFF_HASH_GROUP_MASK;
-               if (++counts[this_bucket] > counts[best_bucket])
-                       best_bucket = this_bucket;
-       }
-
-       return best_bucket;
-}
-
-static bool put_into_group(tdb_off_t *group,
-                          unsigned bucket, tdb_off_t encoded)
-{
-       unsigned int i;
-
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned b = (bucket + i) % (1 << TDB_HASH_GROUP_BITS);
-
-               if (group[b] == 0) {
-                       group[b] = encoded;
-                       return true;
-               }
-       }
-       return false;
-}
-
-static void force_into_group(tdb_off_t *group,
-                            unsigned bucket, tdb_off_t encoded)
-{
-       if (!put_into_group(group, bucket, encoded))
-               abort();
-}
-
-static tdb_off_t encode_offset(tdb_off_t new_off, struct hash_info *h)
-{
-       return h->home_bucket
-               | new_off
-               | ((uint64_t)bits_from(h->h,
-                                 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
-                                 TDB_OFF_UPPER_STEAL_EXTRA)
-                  << TDB_OFF_HASH_EXTRA_BIT);
-}
-
-/* Simply overwrite the hash entry we found before. */
-enum TDB_ERROR replace_in_hash(struct tdb_context *tdb,
-                              struct hash_info *h,
-                              tdb_off_t new_off)
-{
-       return tdb_write_off(tdb, hbucket_off(h->group_start, h->found_bucket),
-                            encode_offset(new_off, h));
-}
-
-/* We slot in anywhere that's empty in the chain. */
-static enum TDB_ERROR COLD add_to_chain(struct tdb_context *tdb,
-                                       tdb_off_t subhash,
-                                       tdb_off_t new_off)
-{
-       tdb_off_t entry;
-       enum TDB_ERROR ecode;
-
-       entry = tdb_find_zero_off(tdb, subhash, 1<<TDB_HASH_GROUP_BITS);
-       if (TDB_OFF_IS_ERR(entry)) {
-               return TDB_OFF_TO_ERR(entry);
-       }
-
-       if (entry == 1 << TDB_HASH_GROUP_BITS) {
-               tdb_off_t next;
-
-               next = tdb_read_off(tdb, subhash
-                                   + offsetof(struct tdb_chain, next));
-               if (TDB_OFF_IS_ERR(next)) {
-                       return TDB_OFF_TO_ERR(next);
-               }
-
-               if (!next) {
-                       next = alloc(tdb, 0, sizeof(struct tdb_chain), 0,
-                                    TDB_CHAIN_MAGIC, false);
-                       if (TDB_OFF_IS_ERR(next))
-                               return TDB_OFF_TO_ERR(next);
-                       ecode = zero_out(tdb,
-                                        next+sizeof(struct tdb_used_record),
-                                        sizeof(struct tdb_chain));
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-                       ecode = tdb_write_off(tdb, subhash
-                                             + offsetof(struct tdb_chain,
-                                                        next),
-                                             next);
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-               }
-               return add_to_chain(tdb, next, new_off);
-       }
-
-       return tdb_write_off(tdb, subhash + entry * sizeof(tdb_off_t),
-                            new_off);
-}
-
-/* Add into a newly created subhash. */
-static enum TDB_ERROR add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash,
-                                    unsigned hash_used, tdb_off_t val)
-{
-       tdb_off_t off = (val & TDB_OFF_MASK), *group;
-       struct hash_info h;
-       unsigned int gnum;
-
-       h.hash_used = hash_used;
-
-       if (hash_used + TDB_SUBLEVEL_HASH_BITS > 64)
-               return add_to_chain(tdb, subhash, off);
-
-       h.h = hash_record(tdb, off);
-       gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS);
-       h.group_start = subhash
-               + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-       h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS);
-
-       group = tdb_access_write(tdb, h.group_start,
-                                sizeof(*group) << TDB_HASH_GROUP_BITS, true);
-       if (TDB_PTR_IS_ERR(group)) {
-               return TDB_PTR_ERR(group);
-       }
-       force_into_group(group, h.home_bucket, encode_offset(off, &h));
-       return tdb_access_commit(tdb, group);
-}
-
-static enum TDB_ERROR expand_group(struct tdb_context *tdb, struct hash_info *h)
-{
-       unsigned bucket, num_vals, i, magic;
-       size_t subsize;
-       tdb_off_t subhash;
-       tdb_off_t vals[1 << TDB_HASH_GROUP_BITS];
-       enum TDB_ERROR ecode;
-
-       /* Attach new empty subhash under fullest bucket. */
-       bucket = fullest_bucket(tdb, h->group, h->home_bucket);
-
-       if (h->hash_used == 64) {
-               tdb->stats.alloc_chain++;
-               subsize = sizeof(struct tdb_chain);
-               magic = TDB_CHAIN_MAGIC;
-       } else {
-               tdb->stats.alloc_subhash++;
-               subsize = (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS);
-               magic = TDB_HTABLE_MAGIC;
-       }
-
-       subhash = alloc(tdb, 0, subsize, 0, magic, false);
-       if (TDB_OFF_IS_ERR(subhash)) {
-               return TDB_OFF_TO_ERR(subhash);
-       }
-
-       ecode = zero_out(tdb, subhash + sizeof(struct tdb_used_record),
-                        subsize);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Remove any which are destined for bucket or are in wrong place. */
-       num_vals = 0;
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned home_bucket = h->group[i] & TDB_OFF_HASH_GROUP_MASK;
-               if (!h->group[i] || is_subhash(h->group[i]))
-                       continue;
-               if (home_bucket == bucket || home_bucket != i) {
-                       vals[num_vals++] = h->group[i];
-                       h->group[i] = 0;
-               }
-       }
-       /* FIXME: This assert is valid, but we do this during unit test :( */
-       /* assert(num_vals); */
-
-       /* Overwrite expanded bucket with subhash pointer. */
-       h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT);
-
-       /* Point to actual contents of record. */
-       subhash += sizeof(struct tdb_used_record);
-
-       /* Put values back. */
-       for (i = 0; i < num_vals; i++) {
-               unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK;
-
-               if (this_bucket == bucket) {
-                       ecode = add_to_subhash(tdb, subhash, h->hash_used,
-                                              vals[i]);
-                       if (ecode != TDB_SUCCESS)
-                               return ecode;
-               } else {
-                       /* There should be room to put this back. */
-                       force_into_group(h->group, this_bucket, vals[i]);
-               }
-       }
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h)
-{
-       unsigned int i, num_movers = 0;
-       tdb_off_t movers[1 << TDB_HASH_GROUP_BITS];
-
-       h->group[h->found_bucket] = 0;
-       for (i = 1; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned this_bucket;
-
-               this_bucket = (h->found_bucket+i) % (1 << TDB_HASH_GROUP_BITS);
-               /* Empty bucket?  We're done. */
-               if (!h->group[this_bucket])
-                       break;
-
-               /* Ignore subhashes. */
-               if (is_subhash(h->group[this_bucket]))
-                       continue;
-
-               /* If this one is not happy where it is, we'll move it. */
-               if ((h->group[this_bucket] & TDB_OFF_HASH_GROUP_MASK)
-                   != this_bucket) {
-                       movers[num_movers++] = h->group[this_bucket];
-                       h->group[this_bucket] = 0;
-               }
-       }
-
-       /* Put back the ones we erased. */
-       for (i = 0; i < num_movers; i++) {
-               force_into_group(h->group, movers[i] & TDB_OFF_HASH_GROUP_MASK,
-                                movers[i]);
-       }
-
-       /* Now we write back the hash group */
-       return tdb_write_convert(tdb, h->group_start,
-                                h->group, sizeof(h->group));
-}
-
-enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h,
-                          tdb_off_t new_off)
-{
-       enum TDB_ERROR ecode;
-
-       /* We hit an empty bucket during search?  That's where it goes. */
-       if (!h->group[h->found_bucket]) {
-               h->group[h->found_bucket] = encode_offset(new_off, h);
-               /* Write back the modified group. */
-               return tdb_write_convert(tdb, h->group_start,
-                                        h->group, sizeof(h->group));
-       }
-
-       if (h->hash_used > 64)
-               return add_to_chain(tdb, h->group_start, new_off);
-
-       /* We're full.  Expand. */
-       ecode = expand_group(tdb, h);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (is_subhash(h->group[h->home_bucket])) {
-               /* We were expanded! */
-               tdb_off_t hashtable;
-               unsigned int gnum;
-
-               /* Write back the modified group. */
-               ecode = tdb_write_convert(tdb, h->group_start, h->group,
-                                         sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-               /* Move hashinfo down a level. */
-               hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK)
-                       + sizeof(struct tdb_used_record);
-               gnum = use_bits(h,TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS);
-               h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
-               h->group_start = hashtable
-                       + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-               ecode = tdb_read_convert(tdb, h->group_start, &h->group,
-                                        sizeof(h->group));
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-       }
-
-       /* Expanding the group must have made room if it didn't choose this
-        * bucket. */
-       if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){
-               return tdb_write_convert(tdb, h->group_start,
-                                        h->group, sizeof(h->group));
-       }
-
-       /* This can happen if all hashes in group (and us) dropped into same
-        * group in subhash. */
-       return add_to_hash(tdb, h, new_off);
-}
-
-/* Traverse support: returns offset of record, or 0 or -ve error. */
-static tdb_off_t iterate_hash(struct tdb_context *tdb,
-                             struct traverse_info *tinfo)
-{
-       tdb_off_t off, val, i;
-       struct traverse_level *tlevel;
-
-       tlevel = &tinfo->levels[tinfo->num_levels-1];
-
-again:
-       for (i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
-                                     tlevel->entry, tlevel->total_buckets);
-            i != tlevel->total_buckets;
-            i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
-                                     i+1, tlevel->total_buckets)) {
-               if (TDB_OFF_IS_ERR(i)) {
-                       return i;
-               }
-
-               val = tdb_read_off(tdb, tlevel->hashtable+sizeof(tdb_off_t)*i);
-               if (TDB_OFF_IS_ERR(val)) {
-                       return val;
-               }
-
-               off = val & TDB_OFF_MASK;
-
-               /* This makes the delete-all-in-traverse case work
-                * (and simplifies our logic a little). */
-               if (off == tinfo->prev)
-                       continue;
-
-               tlevel->entry = i;
-
-               if (!is_subhash(val)) {
-                       /* Found one. */
-                       tinfo->prev = off;
-                       return off;
-               }
-
-               /* When we come back, we want the next one */
-               tlevel->entry++;
-               tinfo->num_levels++;
-               tlevel++;
-               tlevel->hashtable = off + sizeof(struct tdb_used_record);
-               tlevel->entry = 0;
-               /* Next level is a chain? */
-               if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1))
-                       tlevel->total_buckets = (1 << TDB_HASH_GROUP_BITS);
-               else
-                       tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS);
-               goto again;
-       }
-
-       /* Nothing there? */
-       if (tinfo->num_levels == 1)
-               return 0;
-
-       /* Handle chained entries. */
-       if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) {
-               tlevel->hashtable = tdb_read_off(tdb, tlevel->hashtable
-                                                + offsetof(struct tdb_chain,
-                                                           next));
-               if (TDB_OFF_IS_ERR(tlevel->hashtable)) {
-                       return tlevel->hashtable;
-               }
-               if (tlevel->hashtable) {
-                       tlevel->hashtable += sizeof(struct tdb_used_record);
-                       tlevel->entry = 0;
-                       goto again;
-               }
-       }
-
-       /* Go back up and keep searching. */
-       tinfo->num_levels--;
-       tlevel--;
-       goto again;
-}
-
-/* Return success if we find something, TDB_ERR_NOEXIST if none. */
-enum TDB_ERROR next_in_hash(struct tdb_context *tdb,
-                           struct traverse_info *tinfo,
-                           TDB_DATA *kbuf, size_t *dlen)
-{
-       const unsigned group_bits = TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS;
-       tdb_off_t hl_start, hl_range, off;
-       enum TDB_ERROR ecode;
-
-       while (tinfo->toplevel_group < (1 << group_bits)) {
-               hl_start = (tdb_off_t)tinfo->toplevel_group
-                       << (64 - group_bits);
-               hl_range = 1ULL << group_bits;
-               ecode = tdb_lock_hashes(tdb, hl_start, hl_range, F_RDLCK,
-                                       TDB_LOCK_WAIT);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-
-               off = iterate_hash(tdb, tinfo);
-               if (off) {
-                       struct tdb_used_record rec;
-
-                       if (TDB_OFF_IS_ERR(off)) {
-                               ecode = TDB_OFF_TO_ERR(off);
-                               goto fail;
-                       }
-
-                       ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
-                       if (ecode != TDB_SUCCESS) {
-                               goto fail;
-                       }
-                       if (rec_magic(&rec) != TDB_USED_MAGIC) {
-                               ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                  TDB_LOG_ERROR,
-                                                  "next_in_hash:"
-                                                  " corrupt record at %llu",
-                                                  (long long)off);
-                               goto fail;
-                       }
-
-                       kbuf->dsize = rec_key_length(&rec);
-
-                       /* They want data as well? */
-                       if (dlen) {
-                               *dlen = rec_data_length(&rec);
-                               kbuf->dptr = tdb_alloc_read(tdb,
-                                                           off + sizeof(rec),
-                                                           kbuf->dsize
-                                                           + *dlen);
-                       } else {
-                               kbuf->dptr = tdb_alloc_read(tdb,
-                                                           off + sizeof(rec),
-                                                           kbuf->dsize);
-                       }
-                       tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
-                       if (TDB_PTR_IS_ERR(kbuf->dptr)) {
-                               return TDB_PTR_ERR(kbuf->dptr);
-                       }
-                       return TDB_SUCCESS;
-               }
-
-               tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
-
-               tinfo->toplevel_group++;
-               tinfo->levels[0].hashtable
-                       += (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-               tinfo->levels[0].entry = 0;
-       }
-       return TDB_ERR_NOEXIST;
-
-fail:
-       tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
-       return ecode;
-
-}
-
-enum TDB_ERROR first_in_hash(struct tdb_context *tdb,
-                            struct traverse_info *tinfo,
-                            TDB_DATA *kbuf, size_t *dlen)
-{
-       tinfo->prev = 0;
-       tinfo->toplevel_group = 0;
-       tinfo->num_levels = 1;
-       tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable);
-       tinfo->levels[0].entry = 0;
-       tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS);
-
-       return next_in_hash(tdb, tinfo, kbuf, dlen);
-}
-
-/* Even if the entry isn't in this hash bucket, you'd have to lock this
- * bucket to find it. */
-static enum TDB_ERROR chainlock(struct tdb_context *tdb, const TDB_DATA *key,
-                               int ltype, enum tdb_lock_flags waitflag,
-                               const char *func)
-{
-       enum TDB_ERROR ecode;
-       uint64_t h = tdb_hash(tdb, key->dptr, key->dsize);
-       tdb_off_t lockstart, locksize;
-       unsigned int group, gbits;
-
-       gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
-       group = bits_from(h, 64 - gbits, gbits);
-
-       lockstart = hlock_range(group, &locksize);
-
-       ecode = tdb_lock_hashes(tdb, lockstart, locksize, ltype, waitflag);
-       tdb_trace_1rec(tdb, func, *key);
-       return ecode;
-}
-
-/* lock/unlock one hash chain. This is meant to be used to reduce
-   contention - it cannot guarantee how many records will be locked */
-enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
-{
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_chainlock(tdb, key) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-       return tdb->last_error = chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT,
-                                          "tdb_chainlock");
-}
-
-void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
-{
-       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t lockstart, locksize;
-       unsigned int group, gbits;
-
-       if (tdb->flags & TDB_VERSION1) {
-               tdb1_chainunlock(tdb, key);
-               return;
-       }
-
-       gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
-       group = bits_from(h, 64 - gbits, gbits);
-
-       lockstart = hlock_range(group, &locksize);
-
-       tdb_trace_1rec(tdb, "tdb_chainunlock", key);
-       tdb_unlock_hashes(tdb, lockstart, locksize, F_WRLCK);
-}
-
-enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
-{
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_chainlock_read(tdb, key) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-       return tdb->last_error = chainlock(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
-                                          "tdb_chainlock_read");
-}
-
-void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
-{
-       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t lockstart, locksize;
-       unsigned int group, gbits;
-
-       if (tdb->flags & TDB_VERSION1) {
-               tdb1_chainunlock_read(tdb, key);
-               return;
-       }
-       gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
-       group = bits_from(h, 64 - gbits, gbits);
-
-       lockstart = hlock_range(group, &locksize);
-
-       tdb_trace_1rec(tdb, "tdb_chainunlock_read", key);
-       tdb_unlock_hashes(tdb, lockstart, locksize, F_RDLCK);
-}
diff --git a/ccan/tdb2/io.c b/ccan/tdb2/io.c
deleted file mode 100644 (file)
index b4a6f0b..0000000
+++ /dev/null
@@ -1,640 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-   Copyright (C) Rusty Russell                    2010
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <assert.h>
-#include <ccan/likely/likely.h>
-
-void tdb_munmap(struct tdb_file *file)
-{
-       if (file->fd == -1)
-               return;
-
-       if (file->map_ptr) {
-               munmap(file->map_ptr, file->map_size);
-               file->map_ptr = NULL;
-       }
-}
-
-void tdb_mmap(struct tdb_context *tdb)
-{
-       int mmap_flags;
-
-       if (tdb->flags & TDB_INTERNAL)
-               return;
-
-       if (tdb->flags & TDB_NOMMAP)
-               return;
-
-       if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
-               mmap_flags = PROT_READ;
-       else
-               mmap_flags = PROT_READ | PROT_WRITE;
-
-       /* size_t can be smaller than off_t. */
-       if ((size_t)tdb->file->map_size == tdb->file->map_size) {
-               tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
-                                         mmap_flags,
-                                         MAP_SHARED, tdb->file->fd, 0);
-       } else
-               tdb->file->map_ptr = MAP_FAILED;
-
-       /*
-        * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
-        */
-       if (tdb->file->map_ptr == MAP_FAILED) {
-               tdb->file->map_ptr = NULL;
-               tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                          "tdb_mmap failed for size %lld (%s)",
-                          (long long)tdb->file->map_size, strerror(errno));
-       }
-}
-
-/* check for an out of bounds access - if it is out of bounds then
-   see if the database has been expanded by someone else and expand
-   if necessary
-   note that "len" is the minimum length needed for the db.
-
-   If probe is true, len being too large isn't a failure.
-*/
-static enum TDB_ERROR tdb_oob(struct tdb_context *tdb,
-                             tdb_off_t off, tdb_len_t len, bool probe)
-{
-       struct stat st;
-       enum TDB_ERROR ecode;
-
-       /* We can't hold pointers during this: we could unmap! */
-       assert(!tdb->tdb2.direct_access
-              || (tdb->flags & TDB_NOLOCK)
-              || tdb_has_expansion_lock(tdb));
-
-       if (len + off < len) {
-               if (probe)
-                       return TDB_SUCCESS;
-
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_oob off %llu len %llu wrap\n",
-                                 (long long)off, (long long)len);
-       }
-
-       if (len + off <= tdb->file->map_size)
-               return TDB_SUCCESS;
-       if (tdb->flags & TDB_INTERNAL) {
-               if (probe)
-                       return TDB_SUCCESS;
-
-               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                          "tdb_oob len %lld beyond internal"
-                          " malloc size %lld",
-                          (long long)(off + len),
-                          (long long)tdb->file->map_size);
-               return TDB_ERR_IO;
-       }
-
-       ecode = tdb_lock_expand(tdb, F_RDLCK);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (fstat(tdb->file->fd, &st) != 0) {
-               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                          "Failed to fstat file: %s", strerror(errno));
-               tdb_unlock_expand(tdb, F_RDLCK);
-               return TDB_ERR_IO;
-       }
-
-       tdb_unlock_expand(tdb, F_RDLCK);
-
-       if (st.st_size < off + len) {
-               if (probe)
-                       return TDB_SUCCESS;
-
-               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                          "tdb_oob len %llu beyond eof at %zu",
-                          (long long)(off + len), st.st_size);
-               return TDB_ERR_IO;
-       }
-
-       /* Unmap, update size, remap */
-       tdb_munmap(tdb->file);
-
-       tdb->file->map_size = st.st_size;
-       tdb_mmap(tdb);
-       return TDB_SUCCESS;
-}
-
-/* Endian conversion: we only ever deal with 8 byte quantities */
-void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
-{
-       assert(size % 8 == 0);
-       if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
-               uint64_t i, *p = (uint64_t *)buf;
-               for (i = 0; i < size / 8; i++)
-                       p[i] = bswap_64(p[i]);
-       }
-       return buf;
-}
-
-/* Return first non-zero offset in offset array, or end, or -ve error. */
-/* FIXME: Return the off? */
-uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
-                             tdb_off_t base, uint64_t start, uint64_t end)
-{
-       uint64_t i;
-       const uint64_t *val;
-
-       /* Zero vs non-zero is the same unconverted: minor optimization. */
-       val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
-                             (end - start) * sizeof(tdb_off_t), false);
-       if (TDB_PTR_IS_ERR(val)) {
-               return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
-       }
-
-       for (i = 0; i < (end - start); i++) {
-               if (val[i])
-                       break;
-       }
-       tdb_access_release(tdb, val);
-       return start + i;
-}
-
-/* Return first zero offset in num offset array, or num, or -ve error. */
-uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
-                          uint64_t num)
-{
-       uint64_t i;
-       const uint64_t *val;
-
-       /* Zero vs non-zero is the same unconverted: minor optimization. */
-       val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
-       if (TDB_PTR_IS_ERR(val)) {
-               return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
-       }
-
-       for (i = 0; i < num; i++) {
-               if (!val[i])
-                       break;
-       }
-       tdb_access_release(tdb, val);
-       return i;
-}
-
-enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
-{
-       char buf[8192] = { 0 };
-       void *p = tdb->tdb2.io->direct(tdb, off, len, true);
-       enum TDB_ERROR ecode = TDB_SUCCESS;
-
-       assert(!(tdb->flags & TDB_RDONLY));
-       if (TDB_PTR_IS_ERR(p)) {
-               return TDB_PTR_ERR(p);
-       }
-       if (p) {
-               memset(p, 0, len);
-               return ecode;
-       }
-       while (len) {
-               unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
-               ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo);
-               if (ecode != TDB_SUCCESS) {
-                       break;
-               }
-               len -= todo;
-               off += todo;
-       }
-       return ecode;
-}
-
-tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
-{
-       tdb_off_t ret;
-       enum TDB_ERROR ecode;
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
-                                                   false);
-               if (TDB_PTR_IS_ERR(p)) {
-                       return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
-               }
-               if (p)
-                       return *p;
-       }
-
-       ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-       return ret;
-}
-
-/* write a lump of data at a specified offset */
-static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
-                               const void *buf, tdb_len_t len)
-{
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Write to read-only database");
-       }
-
-       ecode = tdb->tdb2.io->oob(tdb, off, len, false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (tdb->file->map_ptr) {
-               memcpy(off + (char *)tdb->file->map_ptr, buf, len);
-       } else {
-               ssize_t ret;
-               ret = pwrite(tdb->file->fd, buf, len, off);
-               if (ret != len) {
-                       /* This shouldn't happen: we avoid sparse files. */
-                       if (ret >= 0)
-                               errno = ENOSPC;
-
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb_write: %zi at %zu len=%zu (%s)",
-                                         ret, (size_t)off, (size_t)len,
-                                         strerror(errno));
-               }
-       }
-       return TDB_SUCCESS;
-}
-
-/* read a lump of data at a specified offset */
-static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
-                              void *buf, tdb_len_t len)
-{
-       enum TDB_ERROR ecode;
-
-       ecode = tdb->tdb2.io->oob(tdb, off, len, false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (tdb->file->map_ptr) {
-               memcpy(buf, off + (char *)tdb->file->map_ptr, len);
-       } else {
-               ssize_t r = pread(tdb->file->fd, buf, len, off);
-               if (r != len) {
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb_read failed with %zi at %zu "
-                                         "len=%zu (%s) map_size=%zu",
-                                         r, (size_t)off, (size_t)len,
-                                         strerror(errno),
-                                         (size_t)tdb->file->map_size);
-               }
-       }
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
-                                const void *rec, size_t len)
-{
-       enum TDB_ERROR ecode;
-
-       if (unlikely((tdb->flags & TDB_CONVERT))) {
-               void *conv = malloc(len);
-               if (!conv) {
-                       return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                         "tdb_write: no memory converting"
-                                         " %zu bytes", len);
-               }
-               memcpy(conv, rec, len);
-               ecode = tdb->tdb2.io->twrite(tdb, off,
-                                          tdb_convert(tdb, conv, len), len);
-               free(conv);
-       } else {
-               ecode = tdb->tdb2.io->twrite(tdb, off, rec, len);
-       }
-       return ecode;
-}
-
-enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
-                               void *rec, size_t len)
-{
-       enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len);
-       tdb_convert(tdb, rec, len);
-       return ecode;
-}
-
-enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
-                            tdb_off_t off, tdb_off_t val)
-{
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Write to read-only database");
-       }
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
-                                                   true);
-               if (TDB_PTR_IS_ERR(p)) {
-                       return TDB_PTR_ERR(p);
-               }
-               if (p) {
-                       *p = val;
-                       return TDB_SUCCESS;
-               }
-       }
-       return tdb_write_convert(tdb, off, &val, sizeof(val));
-}
-
-static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
-                            tdb_len_t len, unsigned int prefix)
-{
-       unsigned char *buf;
-       enum TDB_ERROR ecode;
-
-       /* some systems don't like zero length malloc */
-       buf = malloc(prefix + len ? prefix + len : 1);
-       if (!buf) {
-               tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
-                          "tdb_alloc_read malloc failed len=%zu",
-                          (size_t)(prefix + len));
-               return TDB_ERR_PTR(TDB_ERR_OOM);
-       } else {
-               ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len);
-               if (unlikely(ecode != TDB_SUCCESS)) {
-                       free(buf);
-                       return TDB_ERR_PTR(ecode);
-               }
-       }
-       return buf;
-}
-
-/* read a lump of data, allocating the space for it */
-void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
-{
-       return _tdb_alloc_read(tdb, offset, len, 0);
-}
-
-static enum TDB_ERROR fill(struct tdb_context *tdb,
-                          const void *buf, size_t size,
-                          tdb_off_t off, tdb_len_t len)
-{
-       while (len) {
-               size_t n = len > size ? size : len;
-               ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
-               if (ret != n) {
-                       if (ret >= 0)
-                               errno = ENOSPC;
-
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "fill failed:"
-                                         " %zi at %zu len=%zu (%s)",
-                                         ret, (size_t)off, (size_t)len,
-                                         strerror(errno));
-               }
-               len -= n;
-               off += n;
-       }
-       return TDB_SUCCESS;
-}
-
-/* expand a file.  we prefer to use ftruncate, as that is what posix
-  says to use for mmap expansion */
-static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
-                                     tdb_len_t addition)
-{
-       char buf[8192];
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Expand on read-only database");
-       }
-
-       if (tdb->flags & TDB_INTERNAL) {
-               char *new = realloc(tdb->file->map_ptr,
-                                   tdb->file->map_size + addition);
-               if (!new) {
-                       return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                         "No memory to expand database");
-               }
-               tdb->file->map_ptr = new;
-               tdb->file->map_size += addition;
-       } else {
-               /* Unmap before trying to write; old TDB claimed OpenBSD had
-                * problem with this otherwise. */
-               tdb_munmap(tdb->file);
-
-               /* If this fails, we try to fill anyway. */
-               if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
-                       ;
-
-               /* now fill the file with something. This ensures that the
-                  file isn't sparse, which would be very bad if we ran out of
-                  disk. This must be done with write, not via mmap */
-               memset(buf, 0x43, sizeof(buf));
-               ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
-                            addition);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-               tdb->file->map_size += addition;
-               tdb_mmap(tdb);
-       }
-       return TDB_SUCCESS;
-}
-
-const void *tdb_access_read(struct tdb_context *tdb,
-                           tdb_off_t off, tdb_len_t len, bool convert)
-{
-       void *ret = NULL;
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               ret = tdb->tdb2.io->direct(tdb, off, len, false);
-
-               if (TDB_PTR_IS_ERR(ret)) {
-                       return ret;
-               }
-       }
-       if (!ret) {
-               struct tdb_access_hdr *hdr;
-               hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
-               if (TDB_PTR_IS_ERR(hdr)) {
-                       return hdr;
-               }
-               hdr->next = tdb->tdb2.access;
-               tdb->tdb2.access = hdr;
-               ret = hdr + 1;
-               if (convert) {
-                       tdb_convert(tdb, (void *)ret, len);
-               }
-       } else
-               tdb->tdb2.direct_access++;
-
-       return ret;
-}
-
-void *tdb_access_write(struct tdb_context *tdb,
-                      tdb_off_t off, tdb_len_t len, bool convert)
-{
-       void *ret = NULL;
-
-       if (tdb->flags & TDB_RDONLY) {
-               tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                          "Write to read-only database");
-               return TDB_ERR_PTR(TDB_ERR_RDONLY);
-       }
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               ret = tdb->tdb2.io->direct(tdb, off, len, true);
-
-               if (TDB_PTR_IS_ERR(ret)) {
-                       return ret;
-               }
-       }
-
-       if (!ret) {
-               struct tdb_access_hdr *hdr;
-               hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
-               if (TDB_PTR_IS_ERR(hdr)) {
-                       return hdr;
-               }
-               hdr->next = tdb->tdb2.access;
-               tdb->tdb2.access = hdr;
-               hdr->off = off;
-               hdr->len = len;
-               hdr->convert = convert;
-               ret = hdr + 1;
-               if (convert)
-                       tdb_convert(tdb, (void *)ret, len);
-       } else
-               tdb->tdb2.direct_access++;
-
-       return ret;
-}
-
-static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
-{
-       struct tdb_access_hdr **hp;
-
-       for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) {
-               if (*hp + 1 == p)
-                       return hp;
-       }
-       return NULL;
-}
-
-void tdb_access_release(struct tdb_context *tdb, const void *p)
-{
-       struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
-
-       if (hp) {
-               hdr = *hp;
-               *hp = hdr->next;
-               free(hdr);
-       } else
-               tdb->tdb2.direct_access--;
-}
-
-enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
-{
-       struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
-       enum TDB_ERROR ecode;
-
-       if (hp) {
-               hdr = *hp;
-               if (hdr->convert)
-                       ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
-               else
-                       ecode = tdb_write(tdb, hdr->off, p, hdr->len);
-               *hp = hdr->next;
-               free(hdr);
-       } else {
-               tdb->tdb2.direct_access--;
-               ecode = TDB_SUCCESS;
-       }
-
-       return ecode;
-}
-
-static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
-                       bool write_mode)
-{
-       enum TDB_ERROR ecode;
-
-       if (unlikely(!tdb->file->map_ptr))
-               return NULL;
-
-       ecode = tdb_oob(tdb, off, len, false);
-       if (unlikely(ecode != TDB_SUCCESS))
-               return TDB_ERR_PTR(ecode);
-       return (char *)tdb->file->map_ptr + off;
-}
-
-void tdb_inc_seqnum(struct tdb_context *tdb)
-{
-       tdb_off_t seq;
-
-       if (tdb->flags & TDB_VERSION1) {
-               tdb1_increment_seqnum_nonblock(tdb);
-               return;
-       }
-
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               int64_t *direct;
-
-               direct = tdb->tdb2.io->direct(tdb,
-                                             offsetof(struct tdb_header,
-                                                      seqnum),
-                                             sizeof(*direct), true);
-               if (likely(direct)) {
-                       /* Don't let it go negative, even briefly */
-                       if (unlikely((*direct) + 1) < 0)
-                               *direct = 0;
-                       (*direct)++;
-                       return;
-               }
-       }
-
-       seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
-       if (!TDB_OFF_IS_ERR(seq)) {
-               seq++;
-               if (unlikely((int64_t)seq < 0))
-                       seq = 0;
-               tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
-       }
-}
-
-static const struct tdb_methods io_methods = {
-       tdb_read,
-       tdb_write,
-       tdb_oob,
-       tdb_expand_file,
-       tdb_direct,
-};
-
-/*
-  initialise the default methods table
-*/
-void tdb_io_init(struct tdb_context *tdb)
-{
-       tdb->tdb2.io = &io_methods;
-}
diff --git a/ccan/tdb2/lock.c b/ccan/tdb2/lock.c
deleted file mode 100644 (file)
index a71c95f..0000000
+++ /dev/null
@@ -1,895 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-#include <assert.h>
-#include <ccan/build_assert/build_assert.h>
-
-/* If we were threaded, we could wait for unlock, but we're not, so fail. */
-enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call)
-{
-       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                         "%s: lock owned by another tdb in this process.",
-                         call);
-}
-
-/* If we fork, we no longer really own locks. */
-bool check_lock_pid(struct tdb_context *tdb, const char *call, bool log)
-{
-       /* No locks?  No problem! */
-       if (tdb->file->allrecord_lock.count == 0
-           && tdb->file->num_lockrecs == 0) {
-               return true;
-       }
-
-       /* No fork?  No problem! */
-       if (tdb->file->locker == getpid()) {
-               return true;
-       }
-
-       if (log) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                          "%s: fork() detected after lock acquisition!"
-                          " (%u vs %u)", call, tdb->file->locker, getpid());
-       }
-       return false;
-}
-
-int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
-                  void *unused)
-{
-       struct flock fl;
-       int ret;
-
-       do {
-               fl.l_type = rw;
-               fl.l_whence = SEEK_SET;
-               fl.l_start = off;
-               fl.l_len = len;
-
-               if (waitflag)
-                       ret = fcntl(fd, F_SETLKW, &fl);
-               else
-                       ret = fcntl(fd, F_SETLK, &fl);
-       } while (ret != 0 && errno == EINTR);
-       return ret;
-}
-
-int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
-{
-       struct flock fl;
-       int ret;
-
-       do {
-               fl.l_type = F_UNLCK;
-               fl.l_whence = SEEK_SET;
-               fl.l_start = off;
-               fl.l_len = len;
-
-               ret = fcntl(fd, F_SETLKW, &fl);
-       } while (ret != 0 && errno == EINTR);
-       return ret;
-}
-
-static int lock(struct tdb_context *tdb,
-                     int rw, off_t off, off_t len, bool waitflag)
-{
-       int ret;
-       if (tdb->file->allrecord_lock.count == 0
-           && tdb->file->num_lockrecs == 0) {
-               tdb->file->locker = getpid();
-       }
-
-       tdb->stats.lock_lowlevel++;
-       ret = tdb->lock_fn(tdb->file->fd, rw, off, len, waitflag,
-                          tdb->lock_data);
-       if (!waitflag) {
-               tdb->stats.lock_nonblock++;
-               if (ret != 0)
-                       tdb->stats.lock_nonblock_fail++;
-       }
-       return ret;
-}
-
-static int unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
-{
-#if 0 /* Check they matched up locks and unlocks correctly. */
-       char line[80];
-       FILE *locks;
-       bool found = false;
-
-       locks = fopen("/proc/locks", "r");
-
-       while (fgets(line, 80, locks)) {
-               char *p;
-               int type, start, l;
-
-               /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
-               p = strchr(line, ':') + 1;
-               if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
-                       continue;
-               p += strlen(" FLOCK  ADVISORY  ");
-               if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
-                       type = F_RDLCK;
-               else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
-                       type = F_WRLCK;
-               else
-                       abort();
-               p += 6;
-               if (atoi(p) != getpid())
-                       continue;
-               p = strchr(strchr(p, ' ') + 1, ' ') + 1;
-               start = atoi(p);
-               p = strchr(p, ' ') + 1;
-               if (strncmp(p, "EOF", 3) == 0)
-                       l = 0;
-               else
-                       l = atoi(p) - start + 1;
-
-               if (off == start) {
-                       if (len != l) {
-                               fprintf(stderr, "Len %u should be %u: %s",
-                                       (int)len, l, line);
-                               abort();
-                       }
-                       if (type != rw) {
-                               fprintf(stderr, "Type %s wrong: %s",
-                                       rw == F_RDLCK ? "READ" : "WRITE", line);
-                               abort();
-                       }
-                       found = true;
-                       break;
-               }
-       }
-
-       if (!found) {
-               fprintf(stderr, "Unlock on %u@%u not found!",
-                       (int)off, (int)len);
-               abort();
-       }
-
-       fclose(locks);
-#endif
-
-       return tdb->unlock_fn(tdb->file->fd, rw, off, len, tdb->lock_data);
-}
-
-/* a byte range locking function - return 0 on success
-   this functions locks len bytes at the specified offset.
-
-   note that a len of zero means lock to end of file
-*/
-enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
-                         int rw_type, tdb_off_t offset, tdb_off_t len,
-                         enum tdb_lock_flags flags)
-{
-       int ret;
-
-       if (tdb->flags & TDB_NOLOCK) {
-               return TDB_SUCCESS;
-       }
-
-       if (rw_type == F_WRLCK && (tdb->flags & TDB_RDONLY)) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
-                                 "Write lock attempted on read-only database");
-       }
-
-       /* A 32 bit system cannot open a 64-bit file, but it could have
-        * expanded since then: check here. */
-       if ((size_t)(offset + len) != offset + len) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_brlock: lock on giant offset %llu",
-                                 (long long)(offset + len));
-       }
-
-       ret = lock(tdb, rw_type, offset, len, flags & TDB_LOCK_WAIT);
-       if (ret != 0) {
-               /* Generic lock error. errno set by fcntl.
-                * EAGAIN is an expected return from non-blocking
-                * locks. */
-               if (!(flags & TDB_LOCK_PROBE)
-                   && (errno != EAGAIN && errno != EINTR)) {
-                       tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                  "tdb_brlock failed (fd=%d) at"
-                                  " offset %zu rw_type=%d flags=%d len=%zu:"
-                                  " %s",
-                                  tdb->file->fd, (size_t)offset, rw_type,
-                                  flags, (size_t)len, strerror(errno));
-               }
-               return TDB_ERR_LOCK;
-       }
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
-                           int rw_type, tdb_off_t offset, size_t len)
-{
-       if (tdb->flags & TDB_NOLOCK) {
-               return TDB_SUCCESS;
-       }
-
-       if (!check_lock_pid(tdb, "tdb_brunlock", true))
-               return TDB_ERR_LOCK;
-
-       if (unlock(tdb, rw_type, offset, len) == -1) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_brunlock failed (fd=%d) at offset %zu"
-                                 " rw_type=%d len=%zu: %s",
-                                 tdb->file->fd, (size_t)offset, rw_type,
-                                 (size_t)len, strerror(errno));
-       }
-       return TDB_SUCCESS;
-}
-
-/*
-  upgrade a read lock to a write lock. This needs to be handled in a
-  special way as some OSes (such as solaris) have too conservative
-  deadlock detection and claim a deadlock when progress can be
-  made. For those OSes we may loop for a while.
-*/
-enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb, off_t start)
-{
-       int count = 1000;
-
-       if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
-               return TDB_ERR_LOCK;
-
-       if (tdb->file->allrecord_lock.count != 1) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_allrecord_upgrade failed:"
-                                 " count %u too high",
-                                 tdb->file->allrecord_lock.count);
-       }
-
-       if (tdb->file->allrecord_lock.off != 1) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_allrecord_upgrade failed:"
-                                 " already upgraded?");
-       }
-
-       if (tdb->file->allrecord_lock.owner != tdb) {
-               return owner_conflict(tdb, "tdb_allrecord_upgrade");
-       }
-
-       while (count--) {
-               struct timeval tv;
-               if (tdb_brlock(tdb, F_WRLCK, start, 0,
-                              TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
-                       tdb->file->allrecord_lock.ltype = F_WRLCK;
-                       tdb->file->allrecord_lock.off = 0;
-                       return TDB_SUCCESS;
-               }
-               if (errno != EDEADLK) {
-                       break;
-               }
-               /* sleep for as short a time as we can - more portable than usleep() */
-               tv.tv_sec = 0;
-               tv.tv_usec = 1;
-               select(0, NULL, NULL, NULL, &tv);
-       }
-
-       if (errno != EAGAIN && errno != EINTR)
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                          "tdb_allrecord_upgrade failed");
-       return TDB_ERR_LOCK;
-}
-
-static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
-                                     const struct tdb_context *owner)
-{
-       unsigned int i;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off == offset) {
-                       if (owner && tdb->file->lockrecs[i].owner != owner)
-                               return NULL;
-                       return &tdb->file->lockrecs[i];
-               }
-       }
-       return NULL;
-}
-
-enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode;
-
-       if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
-               return TDB_ERR_LOCK;
-
-       ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
-                                  false);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
-       if (ecode != TDB_SUCCESS) {
-               tdb_allrecord_unlock(tdb, F_WRLCK);
-               return ecode;
-       }
-       ecode = tdb_transaction_recover(tdb);
-       tdb_unlock_open(tdb, F_WRLCK);
-       tdb_allrecord_unlock(tdb, F_WRLCK);
-
-       return ecode;
-}
-
-/* lock an offset in the database. */
-enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
-                            tdb_off_t offset, int ltype,
-                            enum tdb_lock_flags flags)
-{
-       struct tdb_lock *new_lck;
-       enum TDB_ERROR ecode;
-
-       if (!(tdb->flags & TDB_VERSION1)
-           && offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
-                        + tdb->file->map_size / 8)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_nest_lock: invalid offset %zu ltype=%d",
-                                 (size_t)offset, ltype);
-       }
-
-       if (tdb->flags & TDB_NOLOCK)
-               return TDB_SUCCESS;
-
-       if (!check_lock_pid(tdb, "tdb_nest_lock", true)) {
-               return TDB_ERR_LOCK;
-       }
-
-       tdb->stats.locks++;
-
-       new_lck = find_nestlock(tdb, offset, NULL);
-       if (new_lck) {
-               if (new_lck->owner != tdb) {
-                       return owner_conflict(tdb, "tdb_nest_lock");
-               }
-
-               if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
-                       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                         "tdb_nest_lock:"
-                                         " offset %zu has read lock",
-                                         (size_t)offset);
-               }
-               /* Just increment the struct, posix locks don't stack. */
-               new_lck->count++;
-               return TDB_SUCCESS;
-       }
-
-#if 0
-       if (tdb->file->num_lockrecs
-           && offset >= TDB_HASH_LOCK_START
-           && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_nest_lock: already have a hash lock?");
-       }
-#endif
-
-       new_lck = (struct tdb_lock *)realloc(
-               tdb->file->lockrecs,
-               sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
-       if (new_lck == NULL) {
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "tdb_nest_lock:"
-                                 " unable to allocate %zu lock struct",
-                                 tdb->file->num_lockrecs + 1);
-       }
-       tdb->file->lockrecs = new_lck;
-
-       /* Since fcntl locks don't nest, we do a lock for the first one,
-          and simply bump the count for future ones */
-       ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* First time we grab a lock, perhaps someone died in commit? */
-       if (!(flags & TDB_LOCK_NOCHECK)
-           && tdb->file->num_lockrecs == 0) {
-               tdb_bool_err berr = tdb_needs_recovery(tdb);
-               if (berr != false) {
-                       tdb_brunlock(tdb, ltype, offset, 1);
-
-                       if (berr < 0)
-                               return TDB_OFF_TO_ERR(berr);
-                       ecode = tdb_lock_and_recover(tdb);
-                       if (ecode == TDB_SUCCESS) {
-                               ecode = tdb_brlock(tdb, ltype, offset, 1,
-                                                  flags);
-                       }
-                       if (ecode != TDB_SUCCESS) {
-                               return ecode;
-                       }
-               }
-       }
-
-       tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
-       tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
-       tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
-       tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
-       tdb->file->num_lockrecs++;
-
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
-                              tdb_off_t off, int ltype)
-{
-       struct tdb_lock *lck;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_NOLOCK)
-               return TDB_SUCCESS;
-
-       lck = find_nestlock(tdb, off, tdb);
-       if ((lck == NULL) || (lck->count == 0)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_nest_unlock: no lock for %zu",
-                                 (size_t)off);
-       }
-
-       if (lck->count > 1) {
-               lck->count--;
-               return TDB_SUCCESS;
-       }
-
-       /*
-        * This lock has count==1 left, so we need to unlock it in the
-        * kernel. We don't bother with decrementing the in-memory array
-        * element, we're about to overwrite it with the last array element
-        * anyway.
-        */
-       ecode = tdb_brunlock(tdb, ltype, off, 1);
-
-       /*
-        * Shrink the array by overwriting the element just unlocked with the
-        * last array element.
-        */
-       *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
-
-       return ecode;
-}
-
-/*
-  get the transaction lock
- */
-enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
-{
-       return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
-}
-
-/*
-  release the transaction lock
- */
-void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
-{
-       tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
-}
-
-/* We only need to lock individual bytes, but Linux merges consecutive locks
- * so we lock in contiguous ranges. */
-enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
-                               int ltype, enum tdb_lock_flags flags,
-                               tdb_off_t off, tdb_off_t len)
-{
-       enum TDB_ERROR ecode;
-       enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
-
-       if (len <= 1) {
-               /* 0 would mean to end-of-file... */
-               assert(len != 0);
-               /* Single hash.  Just do blocking lock. */
-               return tdb_brlock(tdb, ltype, off, len, flags);
-       }
-
-       /* First we try non-blocking. */
-       ecode = tdb_brlock(tdb, ltype, off, len, nb_flags);
-       if (ecode != TDB_ERR_LOCK) {
-               return ecode;
-       }
-
-       /* Try locking first half, then second. */
-       ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       ecode = tdb_lock_gradual(tdb, ltype, flags,
-                                off + len / 2, len - len / 2);
-       if (ecode != TDB_SUCCESS) {
-               tdb_brunlock(tdb, ltype, off, len / 2);
-       }
-       return ecode;
-}
-
-/* lock/unlock entire database.  It can only be upgradable if you have some
- * other way of guaranteeing exclusivity (ie. transaction write lock). */
-enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
-                                 enum tdb_lock_flags flags, bool upgradable)
-{
-       enum TDB_ERROR ecode;
-       tdb_bool_err berr;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_allrecord_lock(tdb, ltype, flags, upgradable) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       if (tdb->flags & TDB_NOLOCK)
-               return TDB_SUCCESS;
-
-       if (!check_lock_pid(tdb, "tdb_allrecord_lock", true)) {
-               return TDB_ERR_LOCK;
-       }
-
-       if (tdb->file->allrecord_lock.count) {
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       return owner_conflict(tdb, "tdb_allrecord_lock");
-               }
-
-               if (ltype == F_RDLCK
-                   || tdb->file->allrecord_lock.ltype == F_WRLCK) {
-                       tdb->file->allrecord_lock.count++;
-                       return TDB_SUCCESS;
-               }
-
-               /* a global lock of a different type exists */
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                 "tdb_allrecord_lock: already have %s lock",
-                                 tdb->file->allrecord_lock.ltype == F_RDLCK
-                                 ? "read" : "write");
-       }
-
-       if (tdb_has_hash_locks(tdb)) {
-               /* can't combine global and chain locks */
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                 "tdb_allrecord_lock:"
-                                 " already have chain lock");
-       }
-
-       if (upgradable && ltype != F_RDLCK) {
-               /* tdb error: you can't upgrade a write lock! */
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_allrecord_lock:"
-                                 " can't upgrade a write lock");
-       }
-
-       tdb->stats.locks++;
-again:
-       /* Lock hashes, gradually. */
-       ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
-                                TDB_HASH_LOCK_RANGE);
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       /* Lock free tables: there to end of file. */
-       ecode = tdb_brlock(tdb, ltype,
-                          TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
-                          0, flags);
-       if (ecode != TDB_SUCCESS) {
-               tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
-                            TDB_HASH_LOCK_RANGE);
-               return ecode;
-       }
-
-       tdb->file->allrecord_lock.owner = tdb;
-       tdb->file->allrecord_lock.count = 1;
-       /* If it's upgradable, it's actually exclusive so we can treat
-        * it as a write lock. */
-       tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
-       tdb->file->allrecord_lock.off = upgradable;
-
-       /* Now check for needing recovery. */
-       if (flags & TDB_LOCK_NOCHECK)
-               return TDB_SUCCESS;
-
-       berr = tdb_needs_recovery(tdb);
-       if (likely(berr == false))
-               return TDB_SUCCESS;
-
-       tdb_allrecord_unlock(tdb, ltype);
-       if (berr < 0)
-               return TDB_OFF_TO_ERR(berr);
-       ecode = tdb_lock_and_recover(tdb);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-       goto again;
-}
-
-enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb,
-                            int ltype, enum tdb_lock_flags flags)
-{
-       return tdb_nest_lock(tdb, TDB_OPEN_LOCK, ltype, flags);
-}
-
-void tdb_unlock_open(struct tdb_context *tdb, int ltype)
-{
-       tdb_nest_unlock(tdb, TDB_OPEN_LOCK, ltype);
-}
-
-bool tdb_has_open_lock(struct tdb_context *tdb)
-{
-       return !(tdb->flags & TDB_NOLOCK)
-               && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
-}
-
-enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
-{
-       /* Lock doesn't protect data, so don't check (we recurse if we do!) */
-       return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
-                            TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
-}
-
-void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
-{
-       tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
-}
-
-/* unlock entire db */
-void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
-{
-       if (tdb->flags & TDB_VERSION1) {
-               tdb1_allrecord_unlock(tdb, ltype);
-               return;
-       }
-
-       if (tdb->flags & TDB_NOLOCK)
-               return;
-
-       if (tdb->file->allrecord_lock.count == 0) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                          "tdb_allrecord_unlock: not locked!");
-               return;
-       }
-
-       if (tdb->file->allrecord_lock.owner != tdb) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                          "tdb_allrecord_unlock: not locked by us!");
-               return;
-       }
-
-       /* Upgradable locks are marked as write locks. */
-       if (tdb->file->allrecord_lock.ltype != ltype
-           && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
-               tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                          "tdb_allrecord_unlock: have %s lock",
-                          tdb->file->allrecord_lock.ltype == F_RDLCK
-                          ? "read" : "write");
-               return;
-       }
-
-       if (tdb->file->allrecord_lock.count > 1) {
-               tdb->file->allrecord_lock.count--;
-               return;
-       }
-
-       tdb->file->allrecord_lock.count = 0;
-       tdb->file->allrecord_lock.ltype = 0;
-
-       tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
-}
-
-bool tdb_has_expansion_lock(struct tdb_context *tdb)
-{
-       return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
-}
-
-bool tdb_has_hash_locks(struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
-                   && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
-                                                    + TDB_HASH_LOCK_RANGE))
-                       return true;
-       }
-       return false;
-}
-
-static bool tdb_has_free_lock(struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       if (tdb->flags & TDB_NOLOCK)
-               return false;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off
-                   > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
-                       return true;
-       }
-       return false;
-}
-
-enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
-                              tdb_off_t hash_lock,
-                              tdb_len_t hash_range,
-                              int ltype, enum tdb_lock_flags waitflag)
-{
-       /* FIXME: Do this properly, using hlock_range */
-       unsigned l = TDB_HASH_LOCK_START
-               + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb_lock_hashes", true))
-                       return TDB_ERR_LOCK;
-
-               if (tdb->file->allrecord_lock.owner != tdb)
-                       return owner_conflict(tdb, "tdb_lock_hashes");
-               if (ltype == tdb->file->allrecord_lock.ltype
-                   || ltype == F_RDLCK) {
-                       return TDB_SUCCESS;
-               }
-
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                 "tdb_lock_hashes:"
-                                 " already have %s allrecordlock",
-                                 tdb->file->allrecord_lock.ltype == F_RDLCK
-                                 ? "read" : "write");
-       }
-
-       if (tdb_has_free_lock(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_hashes: already have free lock");
-       }
-
-       if (tdb_has_expansion_lock(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_hashes:"
-                                 " already have expansion lock");
-       }
-
-       return tdb_nest_lock(tdb, l, ltype, waitflag);
-}
-
-enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
-                                tdb_off_t hash_lock,
-                                tdb_len_t hash_range, int ltype)
-{
-       unsigned l = TDB_HASH_LOCK_START
-               + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
-
-       if (tdb->flags & TDB_NOLOCK)
-               return 0;
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (tdb->file->allrecord_lock.ltype == F_RDLCK
-                   && ltype == F_WRLCK) {
-                       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                         "tdb_unlock_hashes RO allrecord!");
-               }
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                         "tdb_unlock_hashes:"
-                                         " not locked by us!");
-               }
-               return TDB_SUCCESS;
-       }
-
-       return tdb_nest_unlock(tdb, l, ltype);
-}
-
-/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
- * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
- * The result is that on 32 bit systems we don't use lock values > 2^31 on
- * files that are less than 4GB.
- */
-static tdb_off_t free_lock_off(tdb_off_t b_off)
-{
-       return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
-               + b_off / sizeof(tdb_off_t);
-}
-
-enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
-                                   enum tdb_lock_flags waitflag)
-{
-       assert(b_off >= sizeof(struct tdb_header));
-
-       if (tdb->flags & TDB_NOLOCK)
-               return 0;
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb_lock_free_bucket", true))
-                       return TDB_ERR_LOCK;
-
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       return owner_conflict(tdb, "tdb_lock_free_bucket");
-               }
-
-               if (tdb->file->allrecord_lock.ltype == F_WRLCK)
-                       return 0;
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_free_bucket with"
-                                 " read-only allrecordlock!");
-       }
-
-#if 0 /* FIXME */
-       if (tdb_has_expansion_lock(tdb)) {
-               return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                 "tdb_lock_free_bucket:"
-                                 " already have expansion lock");
-       }
-#endif
-
-       return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
-}
-
-void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
-{
-       if (tdb->file->allrecord_lock.count)
-               return;
-
-       tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
-}
-
-enum TDB_ERROR tdb_lockall(struct tdb_context *tdb)
-{
-       return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
-}
-
-void tdb_unlockall(struct tdb_context *tdb)
-{
-       tdb_allrecord_unlock(tdb, F_WRLCK);
-}
-
-enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb)
-{
-       return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
-}
-
-void tdb_unlockall_read(struct tdb_context *tdb)
-{
-       tdb_allrecord_unlock(tdb, F_RDLCK);
-}
-
-void tdb_lock_cleanup(struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       /* We don't want to warn: they're allowed to close tdb after fork. */
-       if (!check_lock_pid(tdb, "tdb_close", false))
-               return;
-
-       while (tdb->file->allrecord_lock.count
-              && tdb->file->allrecord_lock.owner == tdb) {
-               tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
-       }
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].owner == tdb) {
-                       tdb_nest_unlock(tdb,
-                                       tdb->file->lockrecs[i].off,
-                                       tdb->file->lockrecs[i].ltype);
-                       i--;
-               }
-       }
-}
diff --git a/ccan/tdb2/open.c b/ccan/tdb2/open.c
deleted file mode 100644 (file)
index e238d99..0000000
+++ /dev/null
@@ -1,884 +0,0 @@
- /*
-   Trivial Database 2: opening and closing TDBs
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/build_assert/build_assert.h>
-#include <assert.h>
-
-/* all tdbs, to detect double-opens (fcntl file don't nest!) */
-static struct tdb_context *tdbs = NULL;
-
-static struct tdb_file *find_file(dev_t device, ino_t ino)
-{
-       struct tdb_context *i;
-
-       for (i = tdbs; i; i = i->next) {
-               if (i->file->device == device && i->file->inode == ino) {
-                       i->file->refcnt++;
-                       return i->file;
-               }
-       }
-       return NULL;
-}
-
-static bool read_all(int fd, void *buf, size_t len)
-{
-       while (len) {
-               ssize_t ret;
-               ret = read(fd, buf, len);
-               if (ret < 0)
-                       return false;
-               if (ret == 0) {
-                       /* ETOOSHORT? */
-                       errno = EWOULDBLOCK;
-                       return false;
-               }
-               buf = (char *)buf + ret;
-               len -= ret;
-       }
-       return true;
-}
-
-static uint64_t random_number(struct tdb_context *tdb)
-{
-       int fd;
-       uint64_t ret = 0;
-       struct timeval now;
-
-       fd = open("/dev/urandom", O_RDONLY);
-       if (fd >= 0) {
-               if (read_all(fd, &ret, sizeof(ret))) {
-                       close(fd);
-                       return ret;
-               }
-               close(fd);
-       }
-       /* FIXME: Untested!  Based on Wikipedia protocol description! */
-       fd = open("/dev/egd-pool", O_RDWR);
-       if (fd >= 0) {
-               /* Command is 1, next byte is size we want to read. */
-               char cmd[2] = { 1, sizeof(uint64_t) };
-               if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
-                       char reply[1 + sizeof(uint64_t)];
-                       int r = read(fd, reply, sizeof(reply));
-                       if (r > 1) {
-                               /* Copy at least some bytes. */
-                               memcpy(&ret, reply+1, r - 1);
-                               if (reply[0] == sizeof(uint64_t)
-                                   && r == sizeof(reply)) {
-                                       close(fd);
-                                       return ret;
-                               }
-                       }
-               }
-               close(fd);
-       }
-
-       /* Fallback: pid and time. */
-       gettimeofday(&now, NULL);
-       ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
-       tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                  "tdb_open: random from getpid and time");
-       return ret;
-}
-
-static void tdb2_context_init(struct tdb_context *tdb)
-{
-       /* Initialize the TDB2 fields here */
-       tdb_io_init(tdb);
-       tdb->tdb2.direct_access = 0;
-       tdb->tdb2.transaction = NULL;
-       tdb->tdb2.access = NULL;
-}
-
-struct new_database {
-       struct tdb_header hdr;
-       struct tdb_freetable ftable;
-};
-
-/* initialise a new database */
-static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
-                                      struct tdb_attribute_seed *seed,
-                                      struct tdb_header *hdr)
-{
-       /* We make it up in memory, then write it out if not internal */
-       struct new_database newdb;
-       unsigned int magic_len;
-       ssize_t rlen;
-       enum TDB_ERROR ecode;
-
-       /* Fill in the header */
-       newdb.hdr.version = TDB_VERSION;
-       if (seed)
-               newdb.hdr.hash_seed = seed->seed;
-       else
-               newdb.hdr.hash_seed = random_number(tdb);
-       newdb.hdr.hash_test = TDB_HASH_MAGIC;
-       newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test,
-                                          sizeof(newdb.hdr.hash_test),
-                                          newdb.hdr.hash_seed,
-                                          tdb->hash_data);
-       newdb.hdr.recovery = 0;
-       newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK;
-       newdb.hdr.seqnum = 0;
-       newdb.hdr.capabilities = 0;
-       memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
-       /* Initial hashes are empty. */
-       memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
-
-       /* Free is empty. */
-       newdb.hdr.free_table = offsetof(struct new_database, ftable);
-       memset(&newdb.ftable, 0, sizeof(newdb.ftable));
-       ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
-                          sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
-                          sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
-                          0);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Magic food */
-       memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
-       strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
-
-       /* This creates an endian-converted database, as if read from disk */
-       magic_len = sizeof(newdb.hdr.magic_food);
-       tdb_convert(tdb,
-                   (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
-
-       *hdr = newdb.hdr;
-
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->file->map_size = sizeof(newdb);
-               tdb->file->map_ptr = malloc(tdb->file->map_size);
-               if (!tdb->file->map_ptr) {
-                       return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                         "tdb_new_database:"
-                                         " failed to allocate");
-               }
-               memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size);
-               return TDB_SUCCESS;
-       }
-       if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_new_database:"
-                                 " failed to seek: %s", strerror(errno));
-       }
-
-       if (ftruncate(tdb->file->fd, 0) == -1) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_new_database:"
-                                 " failed to truncate: %s", strerror(errno));
-       }
-
-       rlen = write(tdb->file->fd, &newdb, sizeof(newdb));
-       if (rlen != sizeof(newdb)) {
-               if (rlen >= 0)
-                       errno = ENOSPC;
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_new_database: %zi writing header: %s",
-                                 rlen, strerror(errno));
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb)
-{
-       tdb->file = malloc(sizeof(*tdb->file));
-       if (!tdb->file)
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "tdb_open: cannot alloc tdb_file structure");
-       tdb->file->num_lockrecs = 0;
-       tdb->file->lockrecs = NULL;
-       tdb->file->allrecord_lock.count = 0;
-       tdb->file->refcnt = 1;
-       tdb->file->map_ptr = NULL;
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
-                                const union tdb_attribute *attr)
-{
-       switch (attr->base.attr) {
-       case TDB_ATTRIBUTE_LOG:
-               tdb->log_fn = attr->log.fn;
-               tdb->log_data = attr->log.data;
-               break;
-       case TDB_ATTRIBUTE_HASH:
-       case TDB_ATTRIBUTE_SEED:
-       case TDB_ATTRIBUTE_OPENHOOK:
-       case TDB_ATTRIBUTE_TDB1_HASHSIZE:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_set_attribute:"
-                                    " cannot set %s after opening",
-                                    attr->base.attr == TDB_ATTRIBUTE_HASH
-                                    ? "TDB_ATTRIBUTE_HASH"
-                                    : attr->base.attr == TDB_ATTRIBUTE_SEED
-                                    ? "TDB_ATTRIBUTE_SEED"
-                                    : attr->base.attr == TDB_ATTRIBUTE_OPENHOOK
-                                    ? "TDB_ATTRIBUTE_OPENHOOK"
-                                    : "TDB_ATTRIBUTE_TDB1_HASHSIZE");
-       case TDB_ATTRIBUTE_STATS:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_set_attribute:"
-                                    " cannot set TDB_ATTRIBUTE_STATS");
-       case TDB_ATTRIBUTE_FLOCK:
-               tdb->lock_fn = attr->flock.lock;
-               tdb->unlock_fn = attr->flock.unlock;
-               tdb->lock_data = attr->flock.data;
-               break;
-       default:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_set_attribute:"
-                                    " unknown attribute type %u",
-                                    attr->base.attr);
-       }
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
-                                union tdb_attribute *attr)
-{
-       switch (attr->base.attr) {
-       case TDB_ATTRIBUTE_LOG:
-               if (!tdb->log_fn)
-                       return tdb->last_error = TDB_ERR_NOEXIST;
-               attr->log.fn = tdb->log_fn;
-               attr->log.data = tdb->log_data;
-               break;
-       case TDB_ATTRIBUTE_HASH:
-               attr->hash.fn = tdb->hash_fn;
-               attr->hash.data = tdb->hash_data;
-               break;
-       case TDB_ATTRIBUTE_SEED:
-               if (tdb->flags & TDB_VERSION1)
-                       return tdb->last_error
-                               = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                    "tdb_get_attribute:"
-                                    " cannot get TDB_ATTRIBUTE_SEED"
-                                    " on TDB1 tdb.");
-               attr->seed.seed = tdb->hash_seed;
-               break;
-       case TDB_ATTRIBUTE_OPENHOOK:
-               if (!tdb->openhook)
-                       return tdb->last_error = TDB_ERR_NOEXIST;
-               attr->openhook.fn = tdb->openhook;
-               attr->openhook.data = tdb->openhook_data;
-               break;
-       case TDB_ATTRIBUTE_STATS: {
-               size_t size = attr->stats.size;
-               if (size > tdb->stats.size)
-                       size = tdb->stats.size;
-               memcpy(&attr->stats, &tdb->stats, size);
-               break;
-       }
-       case TDB_ATTRIBUTE_FLOCK:
-               attr->flock.lock = tdb->lock_fn;
-               attr->flock.unlock = tdb->unlock_fn;
-               attr->flock.data = tdb->lock_data;
-               break;
-       case TDB_ATTRIBUTE_TDB1_HASHSIZE:
-               if (!(tdb->flags & TDB_VERSION1))
-                       return tdb->last_error
-                               = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                    "tdb_get_attribute:"
-                                    " cannot get TDB_ATTRIBUTE_TDB1_HASHSIZE"
-                                    " on TDB2 tdb.");
-               attr->tdb1_hashsize.hsize = tdb->tdb1.header.hash_size;
-               break;
-       default:
-               return tdb->last_error
-                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                    TDB_LOG_USE_ERROR,
-                                    "tdb_get_attribute:"
-                                    " unknown attribute type %u",
-                                    attr->base.attr);
-       }
-       attr->base.next = NULL;
-       return TDB_SUCCESS;
-}
-
-void tdb_unset_attribute(struct tdb_context *tdb,
-                        enum tdb_attribute_type type)
-{
-       switch (type) {
-       case TDB_ATTRIBUTE_LOG:
-               tdb->log_fn = NULL;
-               break;
-       case TDB_ATTRIBUTE_OPENHOOK:
-               tdb->openhook = NULL;
-               break;
-       case TDB_ATTRIBUTE_HASH:
-       case TDB_ATTRIBUTE_SEED:
-       case TDB_ATTRIBUTE_TDB1_HASHSIZE:
-               tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                          "tdb_unset_attribute: cannot unset %s after opening",
-                          type == TDB_ATTRIBUTE_HASH
-                          ? "TDB_ATTRIBUTE_HASH"
-                          : type == TDB_ATTRIBUTE_SEED
-                          ? "TDB_ATTRIBUTE_SEED"
-                          : "TDB_ATTRIBUTE_TDB1_HASHSIZE");
-               break;
-       case TDB_ATTRIBUTE_STATS:
-               tdb_logerr(tdb, TDB_ERR_EINVAL,
-                          TDB_LOG_USE_ERROR,
-                          "tdb_unset_attribute:"
-                          "cannot unset TDB_ATTRIBUTE_STATS");
-               break;
-       case TDB_ATTRIBUTE_FLOCK:
-               tdb->lock_fn = tdb_fcntl_lock;
-               tdb->unlock_fn = tdb_fcntl_unlock;
-               break;
-       default:
-               tdb_logerr(tdb, TDB_ERR_EINVAL,
-                          TDB_LOG_USE_ERROR,
-                          "tdb_unset_attribute: unknown attribute type %u",
-                          type);
-       }
-}
-
-static bool is_tdb1(struct tdb1_header *hdr, const void *buf, ssize_t rlen)
-{
-       /* This code assumes we've tried to read entire tdb1 header. */
-       BUILD_ASSERT(sizeof(*hdr) <= sizeof(struct tdb_header));
-
-       if (rlen < (ssize_t)sizeof(*hdr)) {
-               return false;
-       }
-
-       memcpy(hdr, buf, sizeof(*hdr));
-       if (strcmp(hdr->magic_food, TDB_MAGIC_FOOD) != 0)
-               return false;
-
-       return hdr->version == TDB1_VERSION
-               || hdr->version == TDB1_BYTEREV(TDB1_VERSION);
-}
-
-/* The top three bits of the capability tell us whether it matters. */
-enum TDB_ERROR unknown_capability(struct tdb_context *tdb, const char *caller,
-                                 tdb_off_t type)
-{
-       if (type & TDB_CAP_NOOPEN) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "%s: file has unknown capability %llu",
-                                 caller, type & TDB_CAP_NOOPEN);
-       }
-
-       if ((type & TDB_CAP_NOWRITE) && !(tdb->flags & TDB_RDONLY)) {
-               return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_ERROR,
-                                 "%s: file has unknown capability %llu"
-                                 " (cannot write to it)",
-                                 caller, type & TDB_CAP_NOOPEN);
-       }
-
-       if (type & TDB_CAP_NOCHECK) {
-               tdb->flags |= TDB_CANT_CHECK;
-       }
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR capabilities_ok(struct tdb_context *tdb,
-                                     tdb_off_t capabilities)
-{
-       tdb_off_t off, next;
-       enum TDB_ERROR ecode = TDB_SUCCESS;
-       const struct tdb_capability *cap;
-
-       /* Check capability list. */
-       for (off = capabilities; off && ecode == TDB_SUCCESS; off = next) {
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
-                       return TDB_PTR_ERR(cap);
-               }
-
-               switch (cap->type & TDB_CAP_TYPE_MASK) {
-               /* We don't understand any capabilities (yet). */
-               default:
-                       ecode = unknown_capability(tdb, "tdb_open", cap->type);
-               }
-               next = cap->next;
-               tdb_access_release(tdb, cap);
-       }
-       return ecode;
-}
-
-struct tdb_context *tdb_open(const char *name, int tdb_flags,
-                            int open_flags, mode_t mode,
-                            union tdb_attribute *attr)
-{
-       struct tdb_context *tdb;
-       struct stat st;
-       int saved_errno = 0;
-       uint64_t hash_test;
-       unsigned v;
-       ssize_t rlen;
-       struct tdb_header hdr;
-       struct tdb_attribute_seed *seed = NULL;
-       struct tdb_attribute_tdb1_hashsize *hsize_attr = NULL;
-       struct tdb_attribute_tdb1_max_dead *maxsize_attr = NULL;
-       tdb_bool_err berr;
-       enum TDB_ERROR ecode;
-       int openlock;
-
-       tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0));
-       if (!tdb) {
-               /* Can't log this */
-               errno = ENOMEM;
-               return NULL;
-       }
-       /* Set name immediately for logging functions. */
-       if (name) {
-               tdb->name = strcpy((char *)(tdb + 1), name);
-       } else {
-               tdb->name = NULL;
-       }
-       tdb->flags = tdb_flags;
-       tdb->log_fn = NULL;
-       tdb->open_flags = open_flags;
-       tdb->last_error = TDB_SUCCESS;
-       tdb->file = NULL;
-       tdb->openhook = NULL;
-       tdb->lock_fn = tdb_fcntl_lock;
-       tdb->unlock_fn = tdb_fcntl_unlock;
-       tdb->hash_fn = tdb_jenkins_hash;
-       memset(&tdb->stats, 0, sizeof(tdb->stats));
-       tdb->stats.base.attr = TDB_ATTRIBUTE_STATS;
-       tdb->stats.size = sizeof(tdb->stats);
-
-       while (attr) {
-               switch (attr->base.attr) {
-               case TDB_ATTRIBUTE_HASH:
-                       tdb->hash_fn = attr->hash.fn;
-                       tdb->hash_data = attr->hash.data;
-                       break;
-               case TDB_ATTRIBUTE_SEED:
-                       seed = &attr->seed;
-                       break;
-               case TDB_ATTRIBUTE_OPENHOOK:
-                       tdb->openhook = attr->openhook.fn;
-                       tdb->openhook_data = attr->openhook.data;
-                       break;
-               case TDB_ATTRIBUTE_TDB1_HASHSIZE:
-                       hsize_attr = &attr->tdb1_hashsize;
-                       break;
-               case TDB_ATTRIBUTE_TDB1_MAX_DEAD:
-                       maxsize_attr = &attr->tdb1_max_dead;
-                       break;
-               default:
-                       /* These are set as normal. */
-                       ecode = tdb_set_attribute(tdb, attr);
-                       if (ecode != TDB_SUCCESS)
-                               goto fail;
-               }
-               attr = attr->base.next;
-       }
-
-       if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT
-                         | TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING
-                         | TDB_RDONLY | TDB_VERSION1)) {
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                  "tdb_open: unknown flags %u", tdb_flags);
-               goto fail;
-       }
-
-       if (hsize_attr) {
-               if (!(tdb_flags & TDB_VERSION1) ||
-                   (!(tdb_flags & TDB_INTERNAL) && !(open_flags & O_CREAT))) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open: can only use"
-                                          " TDB_ATTRIBUTE_TDB1_HASHSIZE when"
-                                          " creating a TDB_VERSION1 tdb");
-                       goto fail;
-               }
-       }
-
-       if (seed) {
-               if (tdb_flags & TDB_VERSION1) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open:"
-                                          " cannot set TDB_ATTRIBUTE_SEED"
-                                          " on TDB1 tdb.");
-                       goto fail;
-               } else if (!(tdb_flags & TDB_INTERNAL)
-                          && !(open_flags & O_CREAT)) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open:"
-                                          " cannot set TDB_ATTRIBUTE_SEED"
-                                          " without O_CREAT.");
-                       goto fail;
-               }
-       }
-
-       if ((open_flags & O_ACCMODE) == O_WRONLY) {
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                  "tdb_open: can't open tdb %s write-only",
-                                  name);
-               goto fail;
-       }
-
-       if ((open_flags & O_ACCMODE) == O_RDONLY) {
-               openlock = F_RDLCK;
-               tdb->flags |= TDB_RDONLY;
-       } else {
-               if (tdb_flags & TDB_RDONLY) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open: can't use TDB_RDONLY"
-                                          " without O_RDONLY");
-                       goto fail;
-               }
-               openlock = F_WRLCK;
-       }
-
-       /* internal databases don't need any of the rest. */
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
-               ecode = tdb_new_file(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-               tdb->file->fd = -1;
-               if (tdb->flags & TDB_VERSION1)
-                       ecode = tdb1_new_database(tdb, hsize_attr, maxsize_attr);
-               else {
-                       ecode = tdb_new_database(tdb, seed, &hdr);
-                       if (ecode == TDB_SUCCESS) {
-                               tdb_convert(tdb, &hdr.hash_seed,
-                                           sizeof(hdr.hash_seed));
-                               tdb->hash_seed = hdr.hash_seed;
-                               tdb2_context_init(tdb);
-                               tdb_ftable_init(tdb);
-                       }
-               }
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-               return tdb;
-       }
-
-       if (stat(name, &st) != -1)
-               tdb->file = find_file(st.st_dev, st.st_ino);
-
-       if (!tdb->file) {
-               int fd;
-
-               if ((fd = open(name, open_flags, mode)) == -1) {
-                       /* errno set by open(2) */
-                       saved_errno = errno;
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: could not open file %s: %s",
-                                  name, strerror(errno));
-                       goto fail_errno;
-               }
-
-               /* on exec, don't inherit the fd */
-               v = fcntl(fd, F_GETFD, 0);
-               fcntl(fd, F_SETFD, v | FD_CLOEXEC);
-
-               if (fstat(fd, &st) == -1) {
-                       saved_errno = errno;
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: could not stat open %s: %s",
-                                  name, strerror(errno));
-                       close(fd);
-                       goto fail_errno;
-               }
-
-               ecode = tdb_new_file(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       close(fd);
-                       goto fail;
-               }
-
-               tdb->file->fd = fd;
-               tdb->file->device = st.st_dev;
-               tdb->file->inode = st.st_ino;
-               tdb->file->map_ptr = NULL;
-               tdb->file->map_size = 0;
-       }
-
-       /* ensure there is only one process initialising at once */
-       ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
-       if (ecode != TDB_SUCCESS) {
-               saved_errno = errno;
-               goto fail_errno;
-       }
-
-       /* call their open hook if they gave us one. */
-       if (tdb->openhook) {
-               ecode = tdb->openhook(tdb->file->fd, tdb->openhook_data);
-               if (ecode != TDB_SUCCESS) {
-                       tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                  "tdb_open: open hook failed");
-                       goto fail;
-               }
-               open_flags |= O_CREAT;
-       }
-
-       /* If they used O_TRUNC, read will return 0. */
-       rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0);
-       if (rlen == 0 && (open_flags & O_CREAT)) {
-               if (tdb->flags & TDB_VERSION1) {
-                       ecode = tdb1_new_database(tdb, hsize_attr, maxsize_attr);
-                       if (ecode != TDB_SUCCESS)
-                               goto fail;
-                       goto finished;
-               }
-               ecode = tdb_new_database(tdb, seed, &hdr);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-       } else if (rlen < 0) {
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: error %s reading %s",
-                                  strerror(errno), name);
-               goto fail;
-       } else if (rlen < sizeof(hdr)
-                  || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
-               if (is_tdb1(&tdb->tdb1.header, &hdr, rlen)) {
-                       ecode = tdb1_open(tdb, maxsize_attr);
-                       if (!ecode)
-                               goto finished;
-                       goto fail;
-               }
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: %s is not a tdb file", name);
-               goto fail;
-       }
-
-       if (hdr.version != TDB_VERSION) {
-               if (hdr.version == bswap_64(TDB_VERSION))
-                       tdb->flags |= TDB_CONVERT;
-               else {
-                       if (is_tdb1(&tdb->tdb1.header, &hdr, rlen)) {
-                               ecode = tdb1_open(tdb, maxsize_attr);
-                               if (!ecode)
-                                       goto finished;
-                               goto fail;
-                       }
-                       /* wrong version */
-                       ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                          "tdb_open:"
-                                          " %s is unknown version 0x%llx",
-                                          name, (long long)hdr.version);
-                       goto fail;
-               }
-       } else if (tdb->flags & TDB_CONVERT) {
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open:"
-                                  " %s does not need TDB_CONVERT",
-                                  name);
-               goto fail;
-       }
-
-       /* This is a version2 tdb. */
-       if (tdb->flags & TDB_VERSION1) {
-               tdb->flags &= ~TDB_VERSION1;
-       }
-
-       tdb2_context_init(tdb);
-
-       tdb_convert(tdb, &hdr, sizeof(hdr));
-       tdb->hash_seed = hdr.hash_seed;
-       hash_test = TDB_HASH_MAGIC;
-       hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
-       if (hdr.hash_test != hash_test) {
-               /* wrong hash variant */
-               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open:"
-                                  " %s uses a different hash function",
-                                  name);
-               goto fail;
-       }
-
-       ecode = capabilities_ok(tdb, hdr.capabilities);
-       if (ecode != TDB_SUCCESS) {
-               goto fail;
-       }
-
-       /* Clear any features we don't understand. */
-       if ((open_flags & O_ACCMODE) != O_RDONLY) {
-               hdr.features_used &= TDB_FEATURE_MASK;
-               ecode = tdb_write_convert(tdb, offsetof(struct tdb_header,
-                                                       features_used),
-                                         &hdr.features_used,
-                                         sizeof(hdr.features_used));
-               if (ecode != TDB_SUCCESS)
-                       goto fail;
-       }
-
-finished:
-       if (tdb->flags & TDB_VERSION1) {
-               /* if needed, run recovery */
-               if (tdb1_transaction_recover(tdb) == -1) {
-                       ecode = tdb->last_error;
-                       goto fail;
-               }
-       }
-
-       tdb_unlock_open(tdb, openlock);
-
-       /* This makes sure we have current map_size and mmap. */
-       if (tdb->flags & TDB_VERSION1) {
-               ecode = tdb1_probe_length(tdb);
-       } else {
-               ecode = tdb->tdb2.io->oob(tdb, tdb->file->map_size, 1, true);
-       }
-       if (unlikely(ecode != TDB_SUCCESS))
-               goto fail;
-
-       if (!(tdb->flags & TDB_VERSION1)) {
-               /* Now it's fully formed, recover if necessary. */
-               berr = tdb_needs_recovery(tdb);
-               if (unlikely(berr != false)) {
-                       if (berr < 0) {
-                               ecode = TDB_OFF_TO_ERR(berr);
-                               goto fail;
-                       }
-                       ecode = tdb_lock_and_recover(tdb);
-                       if (ecode != TDB_SUCCESS) {
-                               goto fail;
-                       }
-               }
-
-               ecode = tdb_ftable_init(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-       }
-
-       tdb->next = tdbs;
-       tdbs = tdb;
-       return tdb;
-
- fail:
-       /* Map ecode to some logical errno. */
-       switch (TDB_ERR_TO_OFF(ecode)) {
-       case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT):
-       case TDB_ERR_TO_OFF(TDB_ERR_IO):
-               saved_errno = EIO;
-               break;
-       case TDB_ERR_TO_OFF(TDB_ERR_LOCK):
-               saved_errno = EWOULDBLOCK;
-               break;
-       case TDB_ERR_TO_OFF(TDB_ERR_OOM):
-               saved_errno = ENOMEM;
-               break;
-       case TDB_ERR_TO_OFF(TDB_ERR_EINVAL):
-               saved_errno = EINVAL;
-               break;
-       default:
-               saved_errno = EINVAL;
-               break;
-       }
-
-fail_errno:
-#ifdef TDB_TRACE
-       close(tdb->tracefd);
-#endif
-       if (tdb->file) {
-               tdb_lock_cleanup(tdb);
-               if (--tdb->file->refcnt == 0) {
-                       assert(tdb->file->num_lockrecs == 0);
-                       if (tdb->file->map_ptr) {
-                               if (tdb->flags & TDB_INTERNAL) {
-                                       free(tdb->file->map_ptr);
-                               } else
-                                       tdb_munmap(tdb->file);
-                       }
-                       if (close(tdb->file->fd) != 0)
-                               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                          "tdb_open: failed to close tdb fd"
-                                          " on error: %s", strerror(errno));
-                       free(tdb->file->lockrecs);
-                       free(tdb->file);
-               }
-       }
-
-       free(tdb);
-       errno = saved_errno;
-       return NULL;
-}
-
-int tdb_close(struct tdb_context *tdb)
-{
-       int ret = 0;
-       struct tdb_context **i;
-
-       tdb_trace(tdb, "tdb_close");
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb->tdb1.transaction) {
-                       tdb1_transaction_cancel(tdb);
-               }
-       } else {
-               if (tdb->tdb2.transaction) {
-                       tdb_transaction_cancel(tdb);
-               }
-       }
-
-       if (tdb->file->map_ptr) {
-               if (tdb->flags & TDB_INTERNAL)
-                       free(tdb->file->map_ptr);
-               else
-                       tdb_munmap(tdb->file);
-       }
-       if (tdb->file) {
-               tdb_lock_cleanup(tdb);
-               if (--tdb->file->refcnt == 0) {
-                       ret = close(tdb->file->fd);
-                       free(tdb->file->lockrecs);
-                       free(tdb->file);
-               }
-       }
-
-       /* Remove from tdbs list */
-       for (i = &tdbs; *i; i = &(*i)->next) {
-               if (*i == tdb) {
-                       *i = tdb->next;
-                       break;
-               }
-       }
-
-#ifdef TDB_TRACE
-       close(tdb->tracefd);
-#endif
-       free(tdb);
-
-       return ret;
-}
-
-void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p)
-{
-       struct tdb_context *i;
-
-       for (i = tdbs; i; i = i->next) {
-               if (fn(i, p) != 0)
-                       break;
-       }
-}
diff --git a/ccan/tdb2/private.h b/ccan/tdb2/private.h
deleted file mode 100644 (file)
index ba7de3b..0000000
+++ /dev/null
@@ -1,762 +0,0 @@
-#ifndef TDB_PRIVATE_H
-#define TDB_PRIVATE_H
- /*
-   Trivial Database 2: private types and prototypes
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <ccan/tdb2/tdb2.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <sys/time.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <utime.h>
-#include <unistd.h>
-#include <ccan/likely/likely.h>
-#include <ccan/endian/endian.h>
-
-#ifndef TEST_IT
-#define TEST_IT(cond)
-#endif
-
-/* #define TDB_TRACE 1 */
-
-#ifndef __STRING
-#define __STRING(x)    #x
-#endif
-
-#ifndef __STRINGSTRING
-#define __STRINGSTRING(x) __STRING(x)
-#endif
-
-#ifndef __location__
-#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
-#endif
-
-typedef uint64_t tdb_len_t;
-typedef uint64_t tdb_off_t;
-
-#define TDB_MAGIC_FOOD "TDB file\n"
-#define TDB_VERSION ((uint64_t)(0x26011967 + 7))
-#define TDB1_VERSION (0x26011967 + 6)
-#define TDB_USED_MAGIC ((uint64_t)0x1999)
-#define TDB_HTABLE_MAGIC ((uint64_t)0x1888)
-#define TDB_CHAIN_MAGIC ((uint64_t)0x1777)
-#define TDB_FTABLE_MAGIC ((uint64_t)0x1666)
-#define TDB_CAP_MAGIC ((uint64_t)0x1555)
-#define TDB_FREE_MAGIC ((uint64_t)0xFE)
-#define TDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
-#define TDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
-#define TDB_RECOVERY_INVALID_MAGIC (0x0ULL)
-
-/* Capability bits. */
-#define TDB_CAP_TYPE_MASK      0x1FFFFFFFFFFFFFFFULL
-#define TDB_CAP_NOCHECK                0x8000000000000000ULL
-#define TDB_CAP_NOWRITE                0x4000000000000000ULL
-#define TDB_CAP_NOOPEN         0x2000000000000000ULL
-
-#define TDB_OFF_IS_ERR(off) unlikely(off >= (tdb_off_t)(long)TDB_ERR_LAST)
-#define TDB_OFF_TO_ERR(off) ((enum TDB_ERROR)(long)(off))
-#define TDB_ERR_TO_OFF(ecode) ((tdb_off_t)(long)(ecode))
-
-/* Packing errors into pointers and v.v. */
-#define TDB_PTR_IS_ERR(ptr) \
-       unlikely((unsigned long)(ptr) >= (unsigned long)TDB_ERR_LAST)
-#define TDB_PTR_ERR(p) ((enum TDB_ERROR)(long)(p))
-#define TDB_ERR_PTR(err) ((void *)(long)(err))
-
-/* Common case of returning true, false or -ve error. */
-typedef int tdb_bool_err;
-
-/* Prevent others from opening the file. */
-#define TDB_OPEN_LOCK 0
-/* Expanding file. */
-#define TDB_EXPANSION_LOCK 2
-/* Doing a transaction. */
-#define TDB_TRANSACTION_LOCK 8
-/* Hash chain locks. */
-#define TDB_HASH_LOCK_START 64
-
-/* Range for hash locks. */
-#define TDB_HASH_LOCK_RANGE_BITS 30
-#define TDB_HASH_LOCK_RANGE (1 << TDB_HASH_LOCK_RANGE_BITS)
-
-/* We have 1024 entries in the top level. */
-#define TDB_TOPLEVEL_HASH_BITS 10
-/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */
-#define TDB_SUBLEVEL_HASH_BITS 6
-/* And 8 entries in each group, ie 8 groups per sublevel. */
-#define TDB_HASH_GROUP_BITS 3
-/* This is currently 10: beyond this we chain. */
-#define TDB_MAX_LEVELS (1+(64-TDB_TOPLEVEL_HASH_BITS) / TDB_SUBLEVEL_HASH_BITS)
-
-/* Extend file by least 100 times larger than needed. */
-#define TDB_EXTENSION_FACTOR 100
-
-/* We steal bits from the offsets to store hash info. */
-#define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1)
-/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
-#define TDB_OFF_UPPER_STEAL 8
-#define   TDB_OFF_UPPER_STEAL_EXTRA 7
-/* The bit number where we store extra hash bits. */
-#define TDB_OFF_HASH_EXTRA_BIT 57
-#define TDB_OFF_UPPER_STEAL_SUBHASH_BIT 56
-
-/* Additional features we understand.  Currently: none. */
-#define TDB_FEATURE_MASK ((uint64_t)0)
-
-/* The bit number where we store the extra hash bits. */
-/* Convenience mask to get actual offset. */
-#define TDB_OFF_MASK \
-       (((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK)
-
-/* How many buckets in a free list: see size_to_bucket(). */
-#define TDB_FREE_BUCKETS (64 - TDB_OFF_UPPER_STEAL)
-
-/* We have to be able to fit a free record here. */
-#define TDB_MIN_DATA_LEN       \
-       (sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record))
-
-/* Indicates this entry is not on an flist (can happen during coalescing) */
-#define TDB_FTABLE_NONE ((1ULL << TDB_OFF_UPPER_STEAL) - 1)
-
-struct tdb_used_record {
-       /* For on-disk compatibility, we avoid bitfields:
-          magic: 16,        (highest)
-          key_len_bits: 5,
-          extra_padding: 32
-          hash_bits: 11
-       */
-        uint64_t magic_and_meta;
-       /* The bottom key_len_bits*2 are key length, rest is data length. */
-        uint64_t key_and_data_len;
-};
-
-static inline unsigned rec_key_bits(const struct tdb_used_record *r)
-{
-       return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
-}
-
-static inline uint64_t rec_key_length(const struct tdb_used_record *r)
-{
-       return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
-}
-
-static inline uint64_t rec_data_length(const struct tdb_used_record *r)
-{
-       return r->key_and_data_len >> rec_key_bits(r);
-}
-
-static inline uint64_t rec_extra_padding(const struct tdb_used_record *r)
-{
-       return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
-}
-
-static inline uint32_t rec_hash(const struct tdb_used_record *r)
-{
-       return r->magic_and_meta & ((1 << 11) - 1);
-}
-
-static inline uint16_t rec_magic(const struct tdb_used_record *r)
-{
-       return (r->magic_and_meta >> 48);
-}
-
-struct tdb_free_record {
-        uint64_t magic_and_prev; /* TDB_OFF_UPPER_STEAL bits magic, then prev */
-        uint64_t ftable_and_len; /* Len not counting these two fields. */
-       /* This is why the minimum record size is 8 bytes.  */
-       uint64_t next;
-};
-
-static inline uint64_t frec_prev(const struct tdb_free_record *f)
-{
-       return f->magic_and_prev & ((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1);
-}
-
-static inline uint64_t frec_magic(const struct tdb_free_record *f)
-{
-       return f->magic_and_prev >> (64 - TDB_OFF_UPPER_STEAL);
-}
-
-static inline uint64_t frec_len(const struct tdb_free_record *f)
-{
-       return f->ftable_and_len & ((1ULL << (64 - TDB_OFF_UPPER_STEAL))-1);
-}
-
-static inline unsigned frec_ftable(const struct tdb_free_record *f)
-{
-       return f->ftable_and_len >> (64 - TDB_OFF_UPPER_STEAL);
-}
-
-struct tdb_recovery_record {
-       uint64_t magic;
-       /* Length of record (add this header to get total length). */
-       uint64_t max_len;
-       /* Length used. */
-       uint64_t len;
-       /* Old length of file before transaction. */
-       uint64_t eof;
-};
-
-/* If we bottom out of the subhashes, we chain. */
-struct tdb_chain {
-       tdb_off_t rec[1 << TDB_HASH_GROUP_BITS];
-       tdb_off_t next;
-};
-
-/* this is stored at the front of every database */
-struct tdb_header {
-       char magic_food[64]; /* for /etc/magic */
-       /* FIXME: Make me 32 bit? */
-       uint64_t version; /* version of the code */
-       uint64_t hash_test; /* result of hashing HASH_MAGIC. */
-       uint64_t hash_seed; /* "random" seed written at creation time. */
-       tdb_off_t free_table; /* (First) free table. */
-       tdb_off_t recovery; /* Transaction recovery area. */
-
-       uint64_t features_used; /* Features all writers understand */
-       uint64_t features_offered; /* Features offered */
-
-       uint64_t seqnum; /* Sequence number for TDB_SEQNUM */
-
-       tdb_off_t capabilities; /* Optional linked list of capabilities. */
-       tdb_off_t reserved[22];
-
-       /* Top level hash table. */
-       tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS];
-};
-
-struct tdb_freetable {
-       struct tdb_used_record hdr;
-       tdb_off_t next;
-       tdb_off_t buckets[TDB_FREE_BUCKETS];
-};
-
-struct tdb_capability {
-       struct tdb_used_record hdr;
-       tdb_off_t type;
-       tdb_off_t next;
-       /* ... */
-};
-
-/* Information about a particular (locked) hash entry. */
-struct hash_info {
-       /* Full hash value of entry. */
-       uint64_t h;
-       /* Start and length of lock acquired. */
-       tdb_off_t hlock_start;
-       tdb_len_t hlock_range;
-       /* Start of hash group. */
-       tdb_off_t group_start;
-       /* Bucket we belong in. */
-       unsigned int home_bucket;
-       /* Bucket we (or an empty space) were found in. */
-       unsigned int found_bucket;
-       /* How many bits of the hash are already used. */
-       unsigned int hash_used;
-       /* Current working group. */
-       tdb_off_t group[1 << TDB_HASH_GROUP_BITS];
-};
-
-struct traverse_info {
-       struct traverse_level {
-               tdb_off_t hashtable;
-               /* We ignore groups here, and treat it as a big array. */
-               unsigned entry;
-               unsigned int total_buckets;
-       } levels[TDB_MAX_LEVELS + 1];
-       unsigned int num_levels;
-       unsigned int toplevel_group;
-       /* This makes delete-everything-inside-traverse work as expected. */
-       tdb_off_t prev;
-};
-
-typedef uint32_t tdb1_len_t;
-typedef uint32_t tdb1_off_t;
-
-enum tdb_lock_flags {
-       /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
-       TDB_LOCK_NOWAIT = 0,
-       TDB_LOCK_WAIT = 1,
-       /* If set, don't log an error on failure. */
-       TDB_LOCK_PROBE = 2,
-       /* If set, don't check for recovery (used by recovery code). */
-       TDB_LOCK_NOCHECK = 4,
-};
-
-struct tdb_lock {
-       struct tdb_context *owner;
-       off_t off;
-       uint32_t count;
-       uint32_t ltype;
-};
-
-/* This is only needed for tdb_access_commit, but used everywhere to
- * simplify. */
-struct tdb_access_hdr {
-       struct tdb_access_hdr *next;
-       tdb_off_t off;
-       tdb_len_t len;
-       bool convert;
-};
-
-struct tdb_file {
-       /* How many are sharing us? */
-       unsigned int refcnt;
-
-       /* Mmap (if any), or malloc (for TDB_INTERNAL). */
-       void *map_ptr;
-
-       /* How much space has been mapped (<= current file size) */
-       tdb_len_t map_size;
-
-       /* The file descriptor (-1 for TDB_INTERNAL). */
-       int fd;
-
-       /* Lock information */
-       pid_t locker;
-       struct tdb_lock allrecord_lock;
-       size_t num_lockrecs;
-       struct tdb_lock *lockrecs;
-
-       /* Identity of this file. */
-       dev_t device;
-       ino_t inode;
-};
-
-struct tdb_methods {
-       enum TDB_ERROR (*tread)(struct tdb_context *, tdb_off_t, void *,
-                               tdb_len_t);
-       enum TDB_ERROR (*twrite)(struct tdb_context *, tdb_off_t, const void *,
-                                tdb_len_t);
-       enum TDB_ERROR (*oob)(struct tdb_context *, tdb_off_t, tdb_len_t, bool);
-       enum TDB_ERROR (*expand_file)(struct tdb_context *, tdb_len_t);
-       void *(*direct)(struct tdb_context *, tdb_off_t, size_t, bool);
-};
-
-/*
-  internal prototypes
-*/
-/* hash.c: */
-uint64_t tdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
-                         void *unused);
-
-enum TDB_ERROR first_in_hash(struct tdb_context *tdb,
-                            struct traverse_info *tinfo,
-                            TDB_DATA *kbuf, size_t *dlen);
-
-enum TDB_ERROR next_in_hash(struct tdb_context *tdb,
-                           struct traverse_info *tinfo,
-                           TDB_DATA *kbuf, size_t *dlen);
-
-/* Hash random memory. */
-uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len);
-
-/* Hash on disk. */
-uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off);
-
-/* Find and lock a hash entry (or where it would be). */
-tdb_off_t find_and_lock(struct tdb_context *tdb,
-                       struct tdb_data key,
-                       int ltype,
-                       struct hash_info *h,
-                       struct tdb_used_record *rec,
-                       struct traverse_info *tinfo);
-
-enum TDB_ERROR replace_in_hash(struct tdb_context *tdb,
-                              struct hash_info *h,
-                              tdb_off_t new_off);
-
-enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h,
-                          tdb_off_t new_off);
-
-enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h);
-
-/* For tdb_check */
-bool is_subhash(tdb_off_t val);
-enum TDB_ERROR unknown_capability(struct tdb_context *tdb, const char *caller,
-                                 tdb_off_t type);
-
-/* free.c: */
-enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb);
-
-/* check.c needs these to iterate through free lists. */
-tdb_off_t first_ftable(struct tdb_context *tdb);
-tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable);
-
-/* This returns space or -ve error number. */
-tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
-               uint64_t hash, unsigned magic, bool growing);
-
-/* Put this record in a free list. */
-enum TDB_ERROR add_free_record(struct tdb_context *tdb,
-                              tdb_off_t off, tdb_len_t len_with_header,
-                              enum tdb_lock_flags waitflag,
-                              bool coalesce_ok);
-
-/* Set up header for a used/ftable/htable/chain/capability record. */
-enum TDB_ERROR set_header(struct tdb_context *tdb,
-                         struct tdb_used_record *rec,
-                         unsigned magic, uint64_t keylen, uint64_t datalen,
-                         uint64_t actuallen, unsigned hashlow);
-
-/* Used by tdb_check to verify. */
-unsigned int size_to_bucket(tdb_len_t data_len);
-tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket);
-
-/* Used by tdb_summary */
-tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off);
-
-/* Adjust expansion, used by create_recovery_area */
-tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size);
-
-/* io.c: */
-/* Initialize tdb->methods. */
-void tdb_io_init(struct tdb_context *tdb);
-
-/* Convert endian of the buffer if required. */
-void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size);
-
-/* Unmap and try to map the tdb. */
-void tdb_munmap(struct tdb_file *file);
-void tdb_mmap(struct tdb_context *tdb);
-
-/* Either alloc a copy, or give direct access.  Release frees or noop. */
-const void *tdb_access_read(struct tdb_context *tdb,
-                           tdb_off_t off, tdb_len_t len, bool convert);
-void *tdb_access_write(struct tdb_context *tdb,
-                      tdb_off_t off, tdb_len_t len, bool convert);
-
-/* Release result of tdb_access_read/write. */
-void tdb_access_release(struct tdb_context *tdb, const void *p);
-/* Commit result of tdb_acces_write. */
-enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p);
-
-/* Convenience routine to get an offset. */
-tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off);
-
-/* Write an offset at an offset. */
-enum TDB_ERROR tdb_write_off(struct tdb_context *tdb, tdb_off_t off,
-                            tdb_off_t val);
-
-/* Clear an ondisk area. */
-enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len);
-
-/* Return a non-zero offset between >= start < end in this array (or end). */
-tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb,
-                              tdb_off_t base,
-                              uint64_t start,
-                              uint64_t end);
-
-/* Return a zero offset in this array, or num. */
-tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
-                           uint64_t num);
-
-/* Allocate and make a copy of some offset. */
-void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
-
-/* Writes a converted copy of a record. */
-enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
-                                const void *rec, size_t len);
-
-/* Reads record and converts it */
-enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
-                               void *rec, size_t len);
-
-/* Bump the seqnum (caller checks for tdb->flags & TDB_SEQNUM) */
-void tdb_inc_seqnum(struct tdb_context *tdb);
-
-/* lock.c: */
-/* Print message because another tdb owns a lock we want. */
-enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call);
-
-/* If we fork, we no longer really own locks. */
-bool check_lock_pid(struct tdb_context *tdb, const char *call, bool log);
-
-/* Lock/unlock a range of hashes. */
-enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
-                              tdb_off_t hash_lock, tdb_len_t hash_range,
-                              int ltype, enum tdb_lock_flags waitflag);
-enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
-                                tdb_off_t hash_lock,
-                                tdb_len_t hash_range, int ltype);
-
-/* For closing the file. */
-void tdb_lock_cleanup(struct tdb_context *tdb);
-
-/* Lock/unlock a particular free bucket. */
-enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
-                                   enum tdb_lock_flags waitflag);
-void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off);
-
-/* Serialize transaction start. */
-enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype);
-void tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
-
-/* Do we have any hash locks (ie. via tdb_chainlock) ? */
-bool tdb_has_hash_locks(struct tdb_context *tdb);
-
-/* Lock entire database. */
-enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
-                                 enum tdb_lock_flags flags, bool upgradable);
-void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype);
-enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb, off_t start);
-
-/* Serialize db open. */
-enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb,
-                            int ltype, enum tdb_lock_flags flags);
-void tdb_unlock_open(struct tdb_context *tdb, int ltype);
-bool tdb_has_open_lock(struct tdb_context *tdb);
-
-/* Serialize db expand. */
-enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype);
-void tdb_unlock_expand(struct tdb_context *tdb, int ltype);
-bool tdb_has_expansion_lock(struct tdb_context *tdb);
-
-/* If it needs recovery, grab all the locks and do it. */
-enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb);
-
-/* Byte-range lock wrappers for TDB1 to access. */
-enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
-                         int rw_type, tdb_off_t offset, tdb_off_t len,
-                         enum tdb_lock_flags flags);
-
-enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
-                           int rw_type, tdb_off_t offset, size_t len);
-
-enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
-                            tdb_off_t offset, int ltype,
-                            enum tdb_lock_flags flags);
-
-enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
-                              tdb_off_t off, int ltype);
-
-enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
-                               int ltype, enum tdb_lock_flags flags,
-                               tdb_off_t off, tdb_off_t len);
-
-/* Default lock and unlock functions. */
-int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *);
-int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *);
-
-/* transaction.c: */
-enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb);
-tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb);
-
-/* this is stored at the front of every database */
-struct tdb1_header {
-       char magic_food[32]; /* for /etc/magic */
-       uint32_t version; /* version of the code */
-       uint32_t hash_size; /* number of hash entries */
-       tdb1_off_t rwlocks; /* obsolete - kept to detect old formats */
-       tdb1_off_t recovery_start; /* offset of transaction recovery region */
-       tdb1_off_t sequence_number; /* used when TDB1_SEQNUM is set */
-       uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */
-       uint32_t magic2_hash; /* hash of TDB1_MAGIC. */
-       tdb1_off_t reserved[27];
-};
-
-struct tdb1_traverse_lock {
-       struct tdb1_traverse_lock *next;
-       uint32_t off;
-       uint32_t hash;
-       int lock_rw;
-};
-
-struct tdb_context {
-       /* Single list of all TDBs, to detect multiple opens. */
-       struct tdb_context *next;
-
-       /* Filename of the database. */
-       const char *name;
-
-       /* Logging function */
-       void (*log_fn)(struct tdb_context *tdb,
-                      enum tdb_log_level level,
-                      enum TDB_ERROR ecode,
-                      const char *message,
-                      void *data);
-       void *log_data;
-
-       /* Open flags passed to tdb_open. */
-       int open_flags;
-
-       /* low level (fnctl) lock functions. */
-       int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *);
-       int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *);
-       void *lock_data;
-
-       /* the tdb flags passed to tdb_open. */
-       uint32_t flags;
-
-       /* Our statistics. */
-       struct tdb_attribute_stats stats;
-
-       /* The actual file information */
-       struct tdb_file *file;
-
-       /* Hash function. */
-       uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *);
-       void *hash_data;
-       uint64_t hash_seed;
-
-       /* Our open hook, if any. */
-       enum TDB_ERROR (*openhook)(int fd, void *data);
-       void *openhook_data;
-
-       /* Last error we returned. */
-       enum TDB_ERROR last_error;
-
-       struct {
-
-               /* Are we accessing directly? (debugging check). */
-               int direct_access;
-
-               /* Set if we are in a transaction. */
-               struct tdb_transaction *transaction;
-
-               /* What free table are we using? */
-               tdb_off_t ftable_off;
-               unsigned int ftable;
-
-               /* IO methods: changes for transactions. */
-               const struct tdb_methods *io;
-
-               /* Direct access information */
-               struct tdb_access_hdr *access;
-       } tdb2;
-
-       struct {
-               int traverse_read; /* read-only traversal */
-               int traverse_write; /* read-write traversal */
-
-               struct tdb1_header header; /* a cached copy of the header */
-               struct tdb1_traverse_lock travlocks; /* current traversal locks */
-               const struct tdb1_methods *io;
-               struct tdb1_transaction *transaction;
-               int page_size;
-               int max_dead_records;
-       } tdb1;
-};
-
-#define TDB1_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
-
-/* tdb1_check.c: */
-int tdb1_check(struct tdb_context *tdb,
-              enum TDB_ERROR (*check)(TDB_DATA key, TDB_DATA data, void *),
-              void *private_data);
-
-
-/* tdb1_open.c: */
-enum TDB_ERROR tdb1_new_database(struct tdb_context *tdb,
-                                struct tdb_attribute_tdb1_hashsize *hashsize,
-                                struct tdb_attribute_tdb1_max_dead *max_dead);
-enum TDB_ERROR tdb1_open(struct tdb_context *tdb,
-                        struct tdb_attribute_tdb1_max_dead *max_dead);
-
-/* tdb1_io.c: */
-enum TDB_ERROR tdb1_probe_length(struct tdb_context *tdb);
-
-/* tdb1_lock.c: */
-int tdb1_allrecord_lock(struct tdb_context *tdb, int ltype,
-                       enum tdb_lock_flags flags, bool upgradable);
-int tdb1_allrecord_unlock(struct tdb_context *tdb, int ltype);
-
-int tdb1_chainlock(struct tdb_context *tdb, TDB_DATA key);
-int tdb1_chainunlock(struct tdb_context *tdb, TDB_DATA key);
-int tdb1_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
-int tdb1_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
-
-/* tdb1_transaction.c: */
-int tdb1_transaction_recover(struct tdb_context *tdb);
-int tdb1_transaction_cancel(struct tdb_context *tdb);
-
-/* tdb1_traverse.c: */
-int tdb1_traverse(struct tdb_context *tdb,
-                 int (*)(struct tdb_context *, TDB_DATA, TDB_DATA, void *),
-                 void *private_data);
-
-/* tdb1_summary.c: */
-char *tdb1_summary(struct tdb_context *tdb);
-
-/* tdb1_tdb.c: */
-int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag);
-enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key,
-                         TDB_DATA *data);
-int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf);
-int tdb1_delete(struct tdb_context *tdb, TDB_DATA key);
-int tdb1_exists(struct tdb_context *tdb, TDB_DATA key);
-enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
-                                enum TDB_ERROR (*parser)(TDB_DATA key,
-                                                         TDB_DATA data,
-                                                         void *private_data),
-                                void *private_data);
-void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb);
-int tdb1_get_seqnum(struct tdb_context *tdb);
-int tdb1_wipe_all(struct tdb_context *tdb);
-
-/* tdb1_transaction.c: */
-int tdb1_transaction_start(struct tdb_context *tdb);
-int tdb1_transaction_prepare_commit(struct tdb_context *tdb);
-int tdb1_transaction_commit(struct tdb_context *tdb);
-
-/* tdb1_traverse.c: */
-TDB_DATA tdb1_firstkey(struct tdb_context *tdb);
-TDB_DATA tdb1_nextkey(struct tdb_context *tdb, TDB_DATA key);
-
-/* tdb.c: */
-enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
-                              enum TDB_ERROR ecode,
-                              enum tdb_log_level level,
-                              const char *fmt, ...);
-
-#ifdef TDB_TRACE
-void tdb_trace(struct tdb_context *tdb, const char *op);
-void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op);
-void tdb_trace_open(struct tdb_context *tdb, const char *op,
-                   unsigned hash_size, unsigned tdb_flags, unsigned open_flags);
-void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret);
-void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret);
-void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
-                   TDB_DATA rec);
-void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
-                       TDB_DATA rec, int ret);
-void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
-                          TDB_DATA rec, TDB_DATA ret);
-void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
-                            TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
-                            int ret);
-void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
-                          TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret);
-#else
-#define tdb_trace(tdb, op)
-#define tdb_trace_seqnum(tdb, seqnum, op)
-#define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags)
-#define tdb_trace_ret(tdb, op, ret)
-#define tdb_trace_retrec(tdb, op, ret)
-#define tdb_trace_1rec(tdb, op, rec)
-#define tdb_trace_1rec_ret(tdb, op, rec, ret)
-#define tdb_trace_1rec_retrec(tdb, op, rec, ret)
-#define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret)
-#define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret)
-#endif /* !TDB_TRACE */
-
-#endif
diff --git a/ccan/tdb2/summary.c b/ccan/tdb2/summary.c
deleted file mode 100644 (file)
index f3a3a08..0000000
+++ /dev/null
@@ -1,356 +0,0 @@
- /*
-   Trivial Database 2: human-readable summary code
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <assert.h>
-#include <ccan/tally/tally.h>
-
-#define SUMMARY_FORMAT \
-       "Size of file/data: %zu/%zu\n" \
-       "Number of records: %zu\n" \
-       "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
-       "Smallest/average/largest data: %zu/%zu/%zu\n%s" \
-       "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
-       "Number of free records: %zu\n" \
-       "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
-       "Number of uncoalesced records: %zu\n" \
-       "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
-       "Toplevel hash used: %u of %u\n" \
-       "Number of chains: %zu\n" \
-       "Number of subhashes: %zu\n" \
-       "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
-       "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
-
-#define BUCKET_SUMMARY_FORMAT_A                                        \
-       "Free bucket %zu: total entries %zu.\n"                 \
-       "Smallest/average/largest length: %zu/%zu/%zu\n%s"
-#define BUCKET_SUMMARY_FORMAT_B                                        \
-       "Free bucket %zu-%zu: total entries %zu.\n"             \
-       "Smallest/average/largest length: %zu/%zu/%zu\n%s"
-#define CAPABILITY_FORMAT                                      \
-       "Capability %llu%s\n"
-
-#define HISTO_WIDTH 70
-#define HISTO_HEIGHT 20
-
-static tdb_off_t count_hash(struct tdb_context *tdb,
-                           tdb_off_t hash_off, unsigned bits)
-{
-       const tdb_off_t *h;
-       tdb_off_t count = 0;
-       unsigned int i;
-
-       h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
-       if (TDB_PTR_IS_ERR(h)) {
-               return TDB_ERR_TO_OFF(TDB_PTR_ERR(h));
-       }
-       for (i = 0; i < (1 << bits); i++)
-               count += (h[i] != 0);
-
-       tdb_access_release(tdb, h);
-       return count;
-}
-
-static enum TDB_ERROR summarize(struct tdb_context *tdb,
-                               struct tally *hashes,
-                               struct tally *ftables,
-                               struct tally *fr,
-                               struct tally *keys,
-                               struct tally *data,
-                               struct tally *extra,
-                               struct tally *uncoal,
-                               struct tally *chains)
-{
-       tdb_off_t off;
-       tdb_len_t len;
-       tdb_len_t unc = 0;
-
-       for (off = sizeof(struct tdb_header);
-            off < tdb->file->map_size;
-            off += len) {
-               const union {
-                       struct tdb_used_record u;
-                       struct tdb_free_record f;
-                       struct tdb_recovery_record r;
-               } *p;
-               /* We might not be able to get the whole thing. */
-               p = tdb_access_read(tdb, off, sizeof(p->f), true);
-               if (TDB_PTR_IS_ERR(p)) {
-                       return TDB_PTR_ERR(p);
-               }
-               if (frec_magic(&p->f) != TDB_FREE_MAGIC) {
-                       if (unc > 1) {
-                               tally_add(uncoal, unc);
-                               unc = 0;
-                       }
-               }
-
-               if (p->r.magic == TDB_RECOVERY_INVALID_MAGIC
-                   || p->r.magic == TDB_RECOVERY_MAGIC) {
-                       len = sizeof(p->r) + p->r.max_len;
-               } else if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
-                       len = frec_len(&p->f);
-                       tally_add(fr, len);
-                       len += sizeof(p->u);
-                       unc++;
-               } else if (rec_magic(&p->u) == TDB_USED_MAGIC) {
-                       len = sizeof(p->u)
-                               + rec_key_length(&p->u)
-                               + rec_data_length(&p->u)
-                               + rec_extra_padding(&p->u);
-
-                       tally_add(keys, rec_key_length(&p->u));
-                       tally_add(data, rec_data_length(&p->u));
-                       tally_add(extra, rec_extra_padding(&p->u));
-               } else if (rec_magic(&p->u) == TDB_HTABLE_MAGIC) {
-                       tdb_off_t count = count_hash(tdb,
-                                                    off + sizeof(p->u),
-                                                    TDB_SUBLEVEL_HASH_BITS);
-                       if (TDB_OFF_IS_ERR(count)) {
-                               return TDB_OFF_TO_ERR(count);
-                       }
-                       tally_add(hashes, count);
-                       tally_add(extra, rec_extra_padding(&p->u));
-                       len = sizeof(p->u)
-                               + rec_data_length(&p->u)
-                               + rec_extra_padding(&p->u);
-               } else if (rec_magic(&p->u) == TDB_FTABLE_MAGIC) {
-                       len = sizeof(p->u)
-                               + rec_data_length(&p->u)
-                               + rec_extra_padding(&p->u);
-                       tally_add(ftables, rec_data_length(&p->u));
-                       tally_add(extra, rec_extra_padding(&p->u));
-               } else if (rec_magic(&p->u) == TDB_CHAIN_MAGIC) {
-                       len = sizeof(p->u)
-                               + rec_data_length(&p->u)
-                               + rec_extra_padding(&p->u);
-                       tally_add(chains, 1);
-                       tally_add(extra, rec_extra_padding(&p->u));
-               } else {
-                       len = dead_space(tdb, off);
-                       if (TDB_OFF_IS_ERR(len)) {
-                               return TDB_OFF_TO_ERR(len);
-                       }
-               }
-               tdb_access_release(tdb, p);
-       }
-       if (unc)
-               tally_add(uncoal, unc);
-       return TDB_SUCCESS;
-}
-
-static size_t num_capabilities(struct tdb_context *tdb)
-{
-       tdb_off_t off, next;
-       const struct tdb_capability *cap;
-       size_t count = 0;
-
-       off = tdb_read_off(tdb, offsetof(struct tdb_header, capabilities));
-       if (TDB_OFF_IS_ERR(off))
-               return count;
-
-       /* Count capability list. */
-       for (; off; off = next) {
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
-                       break;
-               }
-               count++;
-               next = cap->next;
-               tdb_access_release(tdb, cap);
-       }
-       return count;
-}
-
-static void add_capabilities(struct tdb_context *tdb, size_t num, char *summary)
-{
-       tdb_off_t off, next;
-       const struct tdb_capability *cap;
-       size_t count = 0;
-
-       /* Append to summary. */
-       summary += strlen(summary);
-
-       off = tdb_read_off(tdb, offsetof(struct tdb_header, capabilities));
-       if (TDB_OFF_IS_ERR(off))
-               return;
-
-       /* Walk capability list. */
-       for (; off; off = next) {
-               cap = tdb_access_read(tdb, off, sizeof(*cap), true);
-               if (TDB_PTR_IS_ERR(cap)) {
-                       break;
-               }
-               count++;
-               sprintf(summary, CAPABILITY_FORMAT,
-                       cap->type & TDB_CAP_TYPE_MASK,
-                       /* Noopen?  How did we get here? */
-                       (cap->type & TDB_CAP_NOOPEN) ? " (unopenable)"
-                       : ((cap->type & TDB_CAP_NOWRITE)
-                          && (cap->type & TDB_CAP_NOCHECK)) ? " (uncheckable,read-only)"
-                       : (cap->type & TDB_CAP_NOWRITE) ? " (read-only)"
-                       : (cap->type & TDB_CAP_NOCHECK) ? " (uncheckable)"
-                       : "");
-               summary += strlen(summary);
-               next = cap->next;
-               tdb_access_release(tdb, cap);
-       }
-}
-
-enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
-                          enum tdb_summary_flags flags,
-                          char **summary)
-{
-       tdb_len_t len;
-       size_t num_caps;
-       struct tally *ftables, *hashes, *freet, *keys, *data, *extra, *uncoal,
-               *chains;
-       char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               /* tdb1 doesn't do graphs. */
-               *summary = tdb1_summary(tdb);
-               if (!*summary)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
-
-       ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
-       if (ecode != TDB_SUCCESS) {
-               return tdb->last_error = ecode;
-       }
-
-       ecode = tdb_lock_expand(tdb, F_RDLCK);
-       if (ecode != TDB_SUCCESS) {
-               tdb_allrecord_unlock(tdb, F_RDLCK);
-               return tdb->last_error = ecode;
-       }
-
-       /* Start stats off empty. */
-       ftables = tally_new(HISTO_HEIGHT);
-       hashes = tally_new(HISTO_HEIGHT);
-       freet = tally_new(HISTO_HEIGHT);
-       keys = tally_new(HISTO_HEIGHT);
-       data = tally_new(HISTO_HEIGHT);
-       extra = tally_new(HISTO_HEIGHT);
-       uncoal = tally_new(HISTO_HEIGHT);
-       chains = tally_new(HISTO_HEIGHT);
-       if (!ftables || !hashes || !freet || !keys || !data || !extra
-           || !uncoal || !chains) {
-               ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                  "tdb_summary: failed to allocate"
-                                  " tally structures");
-               goto unlock;
-       }
-
-       ecode = summarize(tdb, hashes, ftables, freet, keys, data, extra,
-                         uncoal, chains);
-       if (ecode != TDB_SUCCESS) {
-               goto unlock;
-       }
-
-       if (flags & TDB_SUMMARY_HISTOGRAMS) {
-               hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
-               freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
-               keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
-               datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
-               extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
-               uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
-       }
-
-       num_caps = num_capabilities(tdb);
-
-       /* 20 is max length of a %llu. */
-       len = strlen(SUMMARY_FORMAT) + 33*20 + 1
-               + (hashesg ? strlen(hashesg) : 0)
-               + (freeg ? strlen(freeg) : 0)
-               + (keysg ? strlen(keysg) : 0)
-               + (datag ? strlen(datag) : 0)
-               + (extrag ? strlen(extrag) : 0)
-               + (uncoalg ? strlen(uncoalg) : 0)
-               + num_caps * (strlen(CAPABILITY_FORMAT) + 20*4);
-
-       *summary = malloc(len);
-       if (!*summary) {
-               ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                  "tdb_summary: failed to allocate string");
-               goto unlock;
-       }
-
-       sprintf(*summary, SUMMARY_FORMAT,
-               (size_t)tdb->file->map_size,
-               tally_total(keys, NULL) + tally_total(data, NULL),
-               tally_num(keys),
-               tally_min(keys), tally_mean(keys), tally_max(keys),
-               keysg ? keysg : "",
-               tally_min(data), tally_mean(data), tally_max(data),
-               datag ? datag : "",
-               tally_min(extra), tally_mean(extra), tally_max(extra),
-               extrag ? extrag : "",
-               tally_num(freet),
-               tally_min(freet), tally_mean(freet), tally_max(freet),
-               freeg ? freeg : "",
-               tally_total(uncoal, NULL),
-               tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
-               uncoalg ? uncoalg : "",
-               (unsigned)count_hash(tdb, offsetof(struct tdb_header,
-                                                  hashtable),
-                                    TDB_TOPLEVEL_HASH_BITS),
-               1 << TDB_TOPLEVEL_HASH_BITS,
-               tally_num(chains),
-               tally_num(hashes),
-               tally_min(hashes), tally_mean(hashes), tally_max(hashes),
-               hashesg ? hashesg : "",
-               tally_total(keys, NULL) * 100.0 / tdb->file->map_size,
-               tally_total(data, NULL) * 100.0 / tdb->file->map_size,
-               tally_total(extra, NULL) * 100.0 / tdb->file->map_size,
-               tally_total(freet, NULL) * 100.0 / tdb->file->map_size,
-               (tally_num(keys) + tally_num(freet) + tally_num(hashes))
-               * sizeof(struct tdb_used_record) * 100.0 / tdb->file->map_size,
-               tally_num(ftables) * sizeof(struct tdb_freetable)
-               * 100.0 / tdb->file->map_size,
-               (tally_num(hashes)
-                * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
-                + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)
-                + sizeof(struct tdb_chain) * tally_num(chains))
-               * 100.0 / tdb->file->map_size);
-
-       add_capabilities(tdb, num_caps, *summary);
-
-unlock:
-       free(hashesg);
-       free(freeg);
-       free(keysg);
-       free(datag);
-       free(extrag);
-       free(uncoalg);
-       free(hashes);
-       free(freet);
-       free(keys);
-       free(data);
-       free(extra);
-       free(uncoal);
-       free(ftables);
-       free(chains);
-
-       tdb_allrecord_unlock(tdb, F_RDLCK);
-       tdb_unlock_expand(tdb, F_RDLCK);
-       return tdb->last_error = ecode;
-}
diff --git a/ccan/tdb2/tdb.c b/ccan/tdb2/tdb.c
deleted file mode 100644 (file)
index 62607bf..0000000
+++ /dev/null
@@ -1,642 +0,0 @@
- /*
-   Trivial Database 2: fetch, store and misc routines.
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/asprintf/asprintf.h>
-#include <stdarg.h>
-
-static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
-                                    tdb_off_t off,
-                                    tdb_len_t keylen,
-                                    tdb_len_t datalen,
-                                    struct tdb_used_record *rec,
-                                    uint64_t h)
-{
-       uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
-       enum TDB_ERROR ecode;
-
-       ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
-                          keylen + dataroom, h);
-       if (ecode == TDB_SUCCESS) {
-               ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
-       }
-       return ecode;
-}
-
-static enum TDB_ERROR replace_data(struct tdb_context *tdb,
-                                  struct hash_info *h,
-                                  struct tdb_data key, struct tdb_data dbuf,
-                                  tdb_off_t old_off, tdb_len_t old_room,
-                                  bool growing)
-{
-       tdb_off_t new_off;
-       enum TDB_ERROR ecode;
-
-       /* Allocate a new record. */
-       new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
-                       growing);
-       if (TDB_OFF_IS_ERR(new_off)) {
-               return TDB_OFF_TO_ERR(new_off);
-       }
-
-       /* We didn't like the existing one: remove it. */
-       if (old_off) {
-               tdb->stats.frees++;
-               ecode = add_free_record(tdb, old_off,
-                                       sizeof(struct tdb_used_record)
-                                       + key.dsize + old_room,
-                                       TDB_LOCK_WAIT, true);
-               if (ecode == TDB_SUCCESS)
-                       ecode = replace_in_hash(tdb, h, new_off);
-       } else {
-               ecode = add_to_hash(tdb, h, new_off);
-       }
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       new_off += sizeof(struct tdb_used_record);
-       ecode = tdb->tdb2.io->twrite(tdb, new_off, key.dptr, key.dsize);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       new_off += key.dsize;
-       ecode = tdb->tdb2.io->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       if (tdb->flags & TDB_SEQNUM)
-               tdb_inc_seqnum(tdb);
-
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR update_data(struct tdb_context *tdb,
-                                 tdb_off_t off,
-                                 struct tdb_data dbuf,
-                                 tdb_len_t extra)
-{
-       enum TDB_ERROR ecode;
-
-       ecode = tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
-       if (ecode == TDB_SUCCESS && extra) {
-               /* Put a zero in; future versions may append other data. */
-               ecode = tdb->tdb2.io->twrite(tdb, off + dbuf.dsize, "", 1);
-       }
-       if (tdb->flags & TDB_SEQNUM)
-               tdb_inc_seqnum(tdb);
-
-       return ecode;
-}
-
-enum TDB_ERROR tdb_store(struct tdb_context *tdb,
-                        struct tdb_data key, struct tdb_data dbuf, int flag)
-{
-       struct hash_info h;
-       tdb_off_t off;
-       tdb_len_t old_room = 0;
-       struct tdb_used_record rec;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_store(tdb, key, dbuf, flag) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       /* Now we have lock on this hash bucket. */
-       if (flag == TDB_INSERT) {
-               if (off) {
-                       ecode = TDB_ERR_EXISTS;
-                       goto out;
-               }
-       } else {
-               if (off) {
-                       old_room = rec_data_length(&rec)
-                               + rec_extra_padding(&rec);
-                       if (old_room >= dbuf.dsize) {
-                               /* Can modify in-place.  Easy! */
-                               ecode = update_rec_hdr(tdb, off,
-                                                      key.dsize, dbuf.dsize,
-                                                      &rec, h.h);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto out;
-                               }
-                               ecode = update_data(tdb,
-                                                   off + sizeof(rec)
-                                                   + key.dsize, dbuf,
-                                                   old_room - dbuf.dsize);
-                               if (ecode != TDB_SUCCESS) {
-                                       goto out;
-                               }
-                               tdb_unlock_hashes(tdb, h.hlock_start,
-                                                 h.hlock_range, F_WRLCK);
-                               return tdb->last_error = TDB_SUCCESS;
-                       }
-               } else {
-                       if (flag == TDB_MODIFY) {
-                               /* if the record doesn't exist and we
-                                  are in TDB_MODIFY mode then we should fail
-                                  the store */
-                               ecode = TDB_ERR_NOEXIST;
-                               goto out;
-                       }
-               }
-       }
-
-       /* If we didn't use the old record, this implies we're growing. */
-       ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
-out:
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
-       return tdb->last_error = ecode;
-}
-
-enum TDB_ERROR tdb_append(struct tdb_context *tdb,
-                         struct tdb_data key, struct tdb_data dbuf)
-{
-       struct hash_info h;
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       tdb_len_t old_room = 0, old_dlen;
-       unsigned char *newdata;
-       struct tdb_data new_dbuf;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_append(tdb, key, dbuf) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (off) {
-               old_dlen = rec_data_length(&rec);
-               old_room = old_dlen + rec_extra_padding(&rec);
-
-               /* Fast path: can append in place. */
-               if (rec_extra_padding(&rec) >= dbuf.dsize) {
-                       ecode = update_rec_hdr(tdb, off, key.dsize,
-                                              old_dlen + dbuf.dsize, &rec,
-                                              h.h);
-                       if (ecode != TDB_SUCCESS) {
-                               goto out;
-                       }
-
-                       off += sizeof(rec) + key.dsize + old_dlen;
-                       ecode = update_data(tdb, off, dbuf,
-                                           rec_extra_padding(&rec));
-                       goto out;
-               }
-
-               /* Slow path. */
-               newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
-               if (!newdata) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                          "tdb_append:"
-                                          " failed to allocate %zu bytes",
-                                          (size_t)(key.dsize + old_dlen
-                                                   + dbuf.dsize));
-                       goto out;
-               }
-               ecode = tdb->tdb2.io->tread(tdb, off + sizeof(rec) + key.dsize,
-                                           newdata, old_dlen);
-               if (ecode != TDB_SUCCESS) {
-                       goto out_free_newdata;
-               }
-               memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
-               new_dbuf.dptr = newdata;
-               new_dbuf.dsize = old_dlen + dbuf.dsize;
-       } else {
-               newdata = NULL;
-               new_dbuf = dbuf;
-       }
-
-       /* If they're using tdb_append(), it implies they're growing record. */
-       ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
-
-out_free_newdata:
-       free(newdata);
-out:
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
-       return tdb->last_error = ecode;
-}
-
-enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
-                        struct tdb_data *data)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1)
-               return tdb1_fetch(tdb, key, data);
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (!off) {
-               ecode = TDB_ERR_NOEXIST;
-       } else {
-               data->dsize = rec_data_length(&rec);
-               data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
-                                           data->dsize);
-               if (TDB_PTR_IS_ERR(data->dptr)) {
-                       ecode = TDB_PTR_ERR(data->dptr);
-               } else
-                       ecode = TDB_SUCCESS;
-       }
-
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-       return tdb->last_error = ecode;
-}
-
-bool tdb_exists(struct tdb_context *tdb, TDB_DATA key)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-
-       if (tdb->flags & TDB_VERSION1) {
-               return tdb1_exists(tdb, key);
-       }
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               tdb->last_error = TDB_OFF_TO_ERR(off);
-               return false;
-       }
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-
-       tdb->last_error = TDB_SUCCESS;
-       return off ? true : false;
-}
-
-enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_delete(tdb, key) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (!off) {
-               ecode = TDB_ERR_NOEXIST;
-               goto unlock;
-       }
-
-       ecode = delete_from_hash(tdb, &h);
-       if (ecode != TDB_SUCCESS) {
-               goto unlock;
-       }
-
-       /* Free the deleted entry. */
-       tdb->stats.frees++;
-       ecode = add_free_record(tdb, off,
-                               sizeof(struct tdb_used_record)
-                               + rec_key_length(&rec)
-                               + rec_data_length(&rec)
-                               + rec_extra_padding(&rec),
-                               TDB_LOCK_WAIT, true);
-
-       if (tdb->flags & TDB_SEQNUM)
-               tdb_inc_seqnum(tdb);
-
-unlock:
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
-       return tdb->last_error = ecode;
-}
-
-unsigned int tdb_get_flags(struct tdb_context *tdb)
-{
-       return tdb->flags;
-}
-
-static bool inside_transaction(const struct tdb_context *tdb)
-{
-       if (tdb->flags & TDB_VERSION1)
-               return tdb->tdb1.transaction != NULL;
-       else
-               return tdb->tdb2.transaction != NULL;
-}
-
-static bool readonly_changable(struct tdb_context *tdb, const char *caller)
-{
-       if (inside_transaction(tdb)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "%s: can't change"
-                                            " TDB_RDONLY inside transaction",
-                                            caller);
-               return false;
-       }
-       return true;
-}
-
-void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
-{
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_add_flag: internal db");
-               return;
-       }
-       switch (flag) {
-       case TDB_NOLOCK:
-               tdb->flags |= TDB_NOLOCK;
-               break;
-       case TDB_NOMMAP:
-               tdb->flags |= TDB_NOMMAP;
-               tdb_munmap(tdb->file);
-               break;
-       case TDB_NOSYNC:
-               tdb->flags |= TDB_NOSYNC;
-               break;
-       case TDB_SEQNUM:
-               tdb->flags |= TDB_SEQNUM;
-               break;
-       case TDB_ALLOW_NESTING:
-               tdb->flags |= TDB_ALLOW_NESTING;
-               break;
-       case TDB_RDONLY:
-               if (readonly_changable(tdb, "tdb_add_flag"))
-                       tdb->flags |= TDB_RDONLY;
-               break;
-       default:
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_add_flag: Unknown flag %u",
-                                            flag);
-       }
-}
-
-void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
-{
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_remove_flag: internal db");
-               return;
-       }
-       switch (flag) {
-       case TDB_NOLOCK:
-               tdb->flags &= ~TDB_NOLOCK;
-               break;
-       case TDB_NOMMAP:
-               tdb->flags &= ~TDB_NOMMAP;
-               tdb_mmap(tdb);
-               break;
-       case TDB_NOSYNC:
-               tdb->flags &= ~TDB_NOSYNC;
-               break;
-       case TDB_SEQNUM:
-               tdb->flags &= ~TDB_SEQNUM;
-               break;
-       case TDB_ALLOW_NESTING:
-               tdb->flags &= ~TDB_ALLOW_NESTING;
-               break;
-       case TDB_RDONLY:
-               if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                                    TDB_LOG_USE_ERROR,
-                                                    "tdb_remove_flag: can't"
-                                                    " remove TDB_RDONLY on tdb"
-                                                    " opened with O_RDONLY");
-                       break;
-               }
-               if (readonly_changable(tdb, "tdb_remove_flag"))
-                       tdb->flags &= ~TDB_RDONLY;
-               break;
-       default:
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_remove_flag: Unknown flag %u",
-                                            flag);
-       }
-}
-
-const char *tdb_errorstr(enum TDB_ERROR ecode)
-{
-       /* Gcc warns if you miss a case in the switch, so use that. */
-       switch (TDB_ERR_TO_OFF(ecode)) {
-       case TDB_ERR_TO_OFF(TDB_SUCCESS): return "Success";
-       case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT): return "Corrupt database";
-       case TDB_ERR_TO_OFF(TDB_ERR_IO): return "IO Error";
-       case TDB_ERR_TO_OFF(TDB_ERR_LOCK): return "Locking error";
-       case TDB_ERR_TO_OFF(TDB_ERR_OOM): return "Out of memory";
-       case TDB_ERR_TO_OFF(TDB_ERR_EXISTS): return "Record exists";
-       case TDB_ERR_TO_OFF(TDB_ERR_EINVAL): return "Invalid parameter";
-       case TDB_ERR_TO_OFF(TDB_ERR_NOEXIST): return "Record does not exist";
-       case TDB_ERR_TO_OFF(TDB_ERR_RDONLY): return "write not permitted";
-       }
-       return "Invalid error code";
-}
-
-enum TDB_ERROR tdb_error(struct tdb_context *tdb)
-{
-       return tdb->last_error;
-}
-
-enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
-                              enum TDB_ERROR ecode,
-                              enum tdb_log_level level,
-                              const char *fmt, ...)
-{
-       char *message;
-       va_list ap;
-       size_t len;
-       /* tdb_open paths care about errno, so save it. */
-       int saved_errno = errno;
-
-       if (!tdb->log_fn)
-               return ecode;
-
-       va_start(ap, fmt);
-       len = vasprintf(&message, fmt, ap);
-       va_end(ap);
-
-       if (len < 0) {
-               tdb->log_fn(tdb, TDB_LOG_ERROR, TDB_ERR_OOM,
-                           "out of memory formatting message:", tdb->log_data);
-               tdb->log_fn(tdb, level, ecode, fmt, tdb->log_data);
-       } else {
-               tdb->log_fn(tdb, level, ecode, message, tdb->log_data);
-               free(message);
-       }
-       errno = saved_errno;
-       return ecode;
-}
-
-enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
-                                TDB_DATA key,
-                                enum TDB_ERROR (*parse)(TDB_DATA k,
-                                                        TDB_DATA d,
-                                                        void *data),
-                                void *data)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               return tdb->last_error = tdb1_parse_record(tdb, key, parse,
-                                                          data);
-       }
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(off);
-       }
-
-       if (!off) {
-               ecode = TDB_ERR_NOEXIST;
-       } else {
-               const void *dptr;
-               dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize,
-                                      rec_data_length(&rec), false);
-               if (TDB_PTR_IS_ERR(dptr)) {
-                       ecode = TDB_PTR_ERR(dptr);
-               } else {
-                       TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec));
-
-                       ecode = parse(key, d, data);
-                       tdb_access_release(tdb, dptr);
-               }
-       }
-
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-       return tdb->last_error = ecode;
-}
-
-const char *tdb_name(const struct tdb_context *tdb)
-{
-       return tdb->name;
-}
-
-int64_t tdb_get_seqnum(struct tdb_context *tdb)
-{
-       tdb_off_t off;
-
-       if (tdb->flags & TDB_VERSION1) {
-               tdb1_off_t val;
-               tdb->last_error = TDB_SUCCESS;
-               val = tdb1_get_seqnum(tdb);
-
-               if (tdb->last_error != TDB_SUCCESS)
-                       return TDB_ERR_TO_OFF(tdb->last_error);
-               else
-                       return val;
-       }
-
-       off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
-       if (TDB_OFF_IS_ERR(off))
-               tdb->last_error = TDB_OFF_TO_ERR(off);
-       else
-               tdb->last_error = TDB_SUCCESS;
-       return off;
-}
-       
-
-int tdb_fd(const struct tdb_context *tdb)
-{
-       return tdb->file->fd;
-}
-
-struct traverse_state {
-       enum TDB_ERROR error;
-       struct tdb_context *dest_db;
-};
-
-/*
-  traverse function for repacking
- */
-static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                          struct traverse_state *state)
-{
-       state->error = tdb_store(state->dest_db, key, data, TDB_INSERT);
-       if (state->error != TDB_SUCCESS) {
-               return -1;
-       }
-       return 0;
-}
-
-enum TDB_ERROR tdb_repack(struct tdb_context *tdb)
-{
-       struct tdb_context *tmp_db;
-       struct traverse_state state;
-
-       state.error = tdb_transaction_start(tdb);
-       if (state.error != TDB_SUCCESS) {
-               return state.error;
-       }
-
-       tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
-       if (tmp_db == NULL) {
-               state.error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                        __location__
-                                        " Failed to create tmp_db");
-               tdb_transaction_cancel(tdb);
-               return tdb->last_error = state.error;
-       }
-
-       state.dest_db = tmp_db;
-       if (tdb_traverse(tdb, repack_traverse, &state) < 0) {
-               goto fail;
-       }
-
-       state.error = tdb_wipe_all(tdb);
-       if (state.error != TDB_SUCCESS) {
-               goto fail;
-       }
-
-       state.dest_db = tdb;
-       if (tdb_traverse(tmp_db, repack_traverse, &state) < 0) {
-               goto fail;
-       }
-
-       tdb_close(tmp_db);
-       return tdb_transaction_commit(tdb);
-
-fail:
-       tdb_transaction_cancel(tdb);
-       tdb_close(tmp_db);
-       return state.error;
-}
diff --git a/ccan/tdb2/tdb1_check.c b/ccan/tdb2/tdb1_check.c
deleted file mode 100644 (file)
index 07ee075..0000000
+++ /dev/null
@@ -1,478 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Rusty Russell            2009
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "tdb1_private.h"
-
-/* Since we opened it, these shouldn't fail unless it's recent corruption. */
-static bool tdb1_check_header(struct tdb_context *tdb, tdb1_off_t *recovery)
-{
-       struct tdb1_header hdr;
-       uint32_t h1, h2;
-
-       if (tdb->tdb1.io->tdb1_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
-               return false;
-       if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
-               goto corrupt;
-
-       TDB1_CONV(hdr);
-       if (hdr.version != TDB1_VERSION)
-               goto corrupt;
-
-       if (hdr.rwlocks != 0 && hdr.rwlocks != TDB1_HASH_RWLOCK_MAGIC)
-               goto corrupt;
-
-       tdb1_header_hash(tdb, &h1, &h2);
-       if (hdr.magic1_hash && hdr.magic2_hash &&
-           (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
-               goto corrupt;
-
-       if (hdr.hash_size == 0)
-               goto corrupt;
-
-       if (hdr.hash_size != tdb->tdb1.header.hash_size)
-               goto corrupt;
-
-       if (hdr.recovery_start != 0 &&
-           hdr.recovery_start < TDB1_DATA_START(tdb->tdb1.header.hash_size))
-               goto corrupt;
-
-       *recovery = hdr.recovery_start;
-       return true;
-
-corrupt:
-       tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                               "Header is corrupt\n");
-       return false;
-}
-
-/* Generic record header check. */
-static bool tdb1_check_record(struct tdb_context *tdb,
-                            tdb1_off_t off,
-                            const struct tdb1_record *rec)
-{
-       tdb1_off_t tailer;
-
-       /* Check rec->next: 0 or points to record offset, aligned. */
-       if (rec->next > 0 && rec->next < TDB1_DATA_START(tdb->tdb1.header.hash_size)){
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                          "Record offset %d too small next %d\n",
-                          off, rec->next);
-               goto corrupt;
-       }
-       if (rec->next + sizeof(*rec) < rec->next) {
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                          "Record offset %d too large next %d\n",
-                          off, rec->next);
-               goto corrupt;
-       }
-       if ((rec->next % TDB1_ALIGNMENT) != 0) {
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                          "Record offset %d misaligned next %d\n",
-                          off, rec->next);
-               goto corrupt;
-       }
-       if (tdb->tdb1.io->tdb1_oob(tdb, rec->next, sizeof(*rec), 0))
-               goto corrupt;
-
-       /* Check rec_len: similar to rec->next, implies next record. */
-       if ((rec->rec_len % TDB1_ALIGNMENT) != 0) {
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                          "Record offset %d misaligned length %d\n",
-                          off, rec->rec_len);
-               goto corrupt;
-       }
-       /* Must fit tailer. */
-       if (rec->rec_len < sizeof(tailer)) {
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                          "Record offset %d too short length %d\n",
-                          off, rec->rec_len);
-               goto corrupt;
-       }
-       /* OOB allows "right at the end" access, so this works for last rec. */
-       if (tdb->tdb1.io->tdb1_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
-               goto corrupt;
-
-       /* Check tailer. */
-       if (tdb1_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
-                        &tailer) == -1)
-               goto corrupt;
-       if (tailer != sizeof(*rec) + rec->rec_len) {
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                          "Record offset %d invalid tailer\n", off);
-               goto corrupt;
-       }
-
-       return true;
-
-corrupt:
-       tdb->last_error = TDB_ERR_CORRUPT;
-       return false;
-}
-
-/* Grab some bytes: may copy if can't use mmap.
-   Caller has already done bounds check. */
-static TDB_DATA get_bytes(struct tdb_context *tdb,
-                         tdb1_off_t off, tdb1_len_t len)
-{
-       TDB_DATA d;
-
-       d.dsize = len;
-
-       if (tdb->tdb1.transaction == NULL && tdb->file->map_ptr != NULL)
-               d.dptr = (unsigned char *)tdb->file->map_ptr + off;
-       else
-               d.dptr = tdb1_alloc_read(tdb, off, d.dsize);
-       return d;
-}
-
-/* Frees data if we're not able to simply use mmap. */
-static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
-{
-       if (tdb->tdb1.transaction == NULL && tdb->file->map_ptr != NULL)
-               return;
-       free(d.dptr);
-}
-
-/* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
- * See: http://burtleburtle.net/bob/c/lookup3.c
- */
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-static void jhash(uint32_t key, uint32_t *pc, uint32_t *pb)
-{
-       uint32_t a,b,c;
-
-       /* Set up the internal state */
-       a = b = c = 0xdeadbeef + *pc;
-       c += *pb;
-       a += key;
-       c ^= b; c -= rot(b,14);
-       a ^= c; a -= rot(c,11);
-       b ^= a; b -= rot(a,25);
-       c ^= b; c -= rot(b,16);
-       a ^= c; a -= rot(c,4);
-       b ^= a; b -= rot(a,14);
-       c ^= b; c -= rot(b,24);
-       *pc=c; *pb=b;
-}
-
-/*
-  We want to check that all free records are in the free list
-  (only once), and all free list entries are free records.  Similarly
-  for each hash chain of used records.
-
-  Doing that naively (without walking hash chains, since we want to be
-  linear) means keeping a list of records which have been seen in each
-  hash chain, and another of records pointed to (ie. next pointers
-  from records and the initial hash chain heads).  These two lists
-  should be equal.  This will take 8 bytes per record, and require
-  sorting at the end.
-
-  So instead, we record each offset in a bitmap such a way that
-  recording it twice will cancel out.  Since each offset should appear
-  exactly twice, the bitmap should be zero at the end.
-
-  The approach was inspired by Bloom Filters (see Wikipedia).  For
-  each value, we flip K bits in a bitmap of size N.  The number of
-  distinct arrangements is:
-
-       N! / (K! * (N-K)!)
-
-  Of course, not all arrangements are actually distinct, but testing
-  shows this formula to be close enough.
-
-  So, if K == 8 and N == 256, the probability of two things flipping the same
-  bits is 1 in 409,663,695,276,000.
-
-  Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
-  (320k) seems reasonable.
-*/
-#define NUM_HASHES 8
-#define BITMAP_BITS 256
-
-static void bit_flip(unsigned char bits[], unsigned int idx)
-{
-       bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
-}
-
-/* We record offsets in a bitmap for the particular chain it should be in.  */
-static void record_offset(unsigned char bits[], tdb1_off_t off)
-{
-       uint32_t h1 = off, h2 = 0;
-       unsigned int i;
-
-       /* We get two good hash values out of jhash2, so we use both.  Then
-        * we keep going to produce further hash values. */
-       for (i = 0; i < NUM_HASHES / 2; i++) {
-               jhash(off, &h1, &h2);
-               bit_flip(bits, h1 % BITMAP_BITS);
-               bit_flip(bits, h2 % BITMAP_BITS);
-               h2++;
-       }
-}
-
-/* Check that an in-use record is valid. */
-static bool tdb1_check_used_record(struct tdb_context *tdb,
-                                 tdb1_off_t off,
-                                 const struct tdb1_record *rec,
-                                 unsigned char **hashes,
-                                 enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA,
-                                                         void *),
-                                 void *private_data)
-{
-       TDB_DATA key, data;
-
-       if (!tdb1_check_record(tdb, off, rec))
-               return false;
-
-       /* key + data + tailer must fit in record */
-       if (rec->key_len + rec->data_len + sizeof(tdb1_off_t) > rec->rec_len) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "Record offset %d too short for contents\n", off);
-               return false;
-       }
-
-       key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
-       if (!key.dptr)
-               return false;
-
-       if ((uint32_t)tdb_hash(tdb, key.dptr, key.dsize) != rec->full_hash) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "Record offset %d has incorrect hash\n", off);
-               goto fail_put_key;
-       }
-
-       /* Mark this offset as a known value for this hash bucket. */
-       record_offset(hashes[TDB1_BUCKET(rec->full_hash)+1], off);
-       /* And similarly if the next pointer is valid. */
-       if (rec->next)
-               record_offset(hashes[TDB1_BUCKET(rec->full_hash)+1], rec->next);
-
-       /* If they supply a check function and this record isn't dead,
-          get data and feed it. */
-       if (check && rec->magic != TDB1_DEAD_MAGIC) {
-               enum TDB_ERROR ecode;
-
-               data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
-                                rec->data_len);
-               if (!data.dptr)
-                       goto fail_put_key;
-
-               ecode = check(key, data, private_data);
-               if (ecode != TDB_SUCCESS) {
-                       tdb->last_error = ecode;
-                       goto fail_put_data;
-               }
-               put_bytes(tdb, data);
-       }
-
-       put_bytes(tdb, key);
-       return true;
-
-fail_put_data:
-       put_bytes(tdb, data);
-fail_put_key:
-       put_bytes(tdb, key);
-       return false;
-}
-
-/* Check that an unused record is valid. */
-static bool tdb1_check_free_record(struct tdb_context *tdb,
-                                 tdb1_off_t off,
-                                 const struct tdb1_record *rec,
-                                 unsigned char **hashes)
-{
-       if (!tdb1_check_record(tdb, off, rec))
-               return false;
-
-       /* Mark this offset as a known value for the free list. */
-       record_offset(hashes[0], off);
-       /* And similarly if the next pointer is valid. */
-       if (rec->next)
-               record_offset(hashes[0], rec->next);
-       return true;
-}
-
-/* Slow, but should be very rare. */
-size_t tdb1_dead_space(struct tdb_context *tdb, tdb1_off_t off)
-{
-       size_t len;
-
-       for (len = 0; off + len < tdb->file->map_size; len++) {
-               char c;
-               if (tdb->tdb1.io->tdb1_read(tdb, off, &c, 1, 0))
-                       return 0;
-               if (c != 0 && c != 0x42)
-                       break;
-       }
-       return len;
-}
-
-int tdb1_check(struct tdb_context *tdb,
-              enum TDB_ERROR (*check)(TDB_DATA key, TDB_DATA data, void *),
-              void *private_data)
-{
-       unsigned int h;
-       unsigned char **hashes;
-       tdb1_off_t off, recovery_start;
-       struct tdb1_record rec;
-       bool found_recovery = false;
-       tdb1_len_t dead;
-       bool locked;
-       size_t alloc_len;
-
-       /* We may have a write lock already, so don't re-lock. */
-       if (tdb->file->allrecord_lock.count != 0) {
-               locked = false;
-       } else {
-               if (tdb_lockall_read(tdb) != TDB_SUCCESS)
-                       return -1;
-               locked = true;
-       }
-
-       /* Make sure we know true size of the underlying file. */
-       tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
-
-       /* Header must be OK: also gets us the recovery ptr, if any. */
-       if (!tdb1_check_header(tdb, &recovery_start))
-               goto unlock;
-
-       /* We should have the whole header, too. */
-       if (tdb->file->map_size < TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "File too short for hashes\n");
-               goto unlock;
-       }
-
-       /* One big malloc: pointers then bit arrays. */
-       alloc_len = sizeof(hashes[0]) * (1+tdb->tdb1.header.hash_size)
-               + BITMAP_BITS / CHAR_BIT * (1+tdb->tdb1.header.hash_size);
-       hashes = (unsigned char **)calloc(1, alloc_len);
-       if (!hashes) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                            "tdb_check: could not allocate %zu",
-                                            alloc_len);
-               goto unlock;
-       }
-
-       /* Initialize pointers */
-       hashes[0] = (unsigned char *)(&hashes[1+tdb->tdb1.header.hash_size]);
-       for (h = 1; h < 1+tdb->tdb1.header.hash_size; h++)
-               hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
-
-       /* Freelist and hash headers are all in a row: read them. */
-       for (h = 0; h < 1+tdb->tdb1.header.hash_size; h++) {
-               if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP + h*sizeof(tdb1_off_t),
-                                &off) == -1)
-                       goto free;
-               if (off)
-                       record_offset(hashes[h], off);
-       }
-
-       /* For each record, read it in and check it's ok. */
-       for (off = TDB1_DATA_START(tdb->tdb1.header.hash_size);
-            off < tdb->file->map_size;
-            off += sizeof(rec) + rec.rec_len) {
-               if (tdb->tdb1.io->tdb1_read(tdb, off, &rec, sizeof(rec),
-                                          TDB1_DOCONV()) == -1)
-                       goto free;
-               switch (rec.magic) {
-               case TDB1_MAGIC:
-               case TDB1_DEAD_MAGIC:
-                       if (!tdb1_check_used_record(tdb, off, &rec, hashes,
-                                                  check, private_data))
-                               goto free;
-                       break;
-               case TDB1_FREE_MAGIC:
-                       if (!tdb1_check_free_record(tdb, off, &rec, hashes))
-                               goto free;
-                       break;
-               /* If we crash after ftruncate, we can get zeroes or fill. */
-               case TDB1_RECOVERY_INVALID_MAGIC:
-               case 0x42424242:
-                       if (recovery_start == off) {
-                               found_recovery = true;
-                               break;
-                       }
-                       dead = tdb1_dead_space(tdb, off);
-                       if (dead < sizeof(rec))
-                               goto corrupt;
-
-                       tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                                  "Dead space at %d-%d (of %u)\n",
-                                  off, off + dead, tdb->file->map_size);
-                       rec.rec_len = dead - sizeof(rec);
-                       break;
-               case TDB1_RECOVERY_MAGIC:
-                       if (recovery_start != off) {
-                               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                                       "Unexpected recovery record at offset %d\n",
-                                                       off);
-                               goto free;
-                       }
-                       found_recovery = true;
-                       break;
-               default: ;
-               corrupt:
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                               "Bad magic 0x%x at offset %d\n",
-                                               rec.magic, off);
-                       goto free;
-               }
-       }
-
-       /* Now, hashes should all be empty: each record exists and is referred
-        * to by one other. */
-       for (h = 0; h < 1+tdb->tdb1.header.hash_size; h++) {
-               unsigned int i;
-               for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
-                       if (hashes[h][i] != 0) {
-                               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                                       "Hashes do not match records\n");
-                               goto free;
-                       }
-               }
-       }
-
-       /* We must have found recovery area if there was one. */
-       if (recovery_start != 0 && !found_recovery) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "Expected a recovery area at %u\n",
-                                       recovery_start);
-               goto free;
-       }
-
-       free(hashes);
-       if (locked) {
-               tdb_unlockall_read(tdb);
-       }
-       return 0;
-
-free:
-       free(hashes);
-unlock:
-       if (locked) {
-               tdb_unlockall_read(tdb);
-       }
-       return -1;
-}
diff --git a/ccan/tdb2/tdb1_freelist.c b/ccan/tdb2/tdb1_freelist.c
deleted file mode 100644 (file)
index ea368ec..0000000
+++ /dev/null
@@ -1,322 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb1_private.h"
-
-/* read a freelist record and check for simple errors */
-int tdb1_rec_free_read(struct tdb_context *tdb, tdb1_off_t off, struct tdb1_record *rec)
-{
-       if (tdb->tdb1.io->tdb1_read(tdb, off, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
-               return -1;
-
-       if (rec->magic == TDB1_MAGIC) {
-               /* this happens when a app is showdown while deleting a record - we should
-                  not completely fail when this happens */
-               tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_WARNING,
-                          "tdb1_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
-                          rec->magic, off);
-               rec->magic = TDB1_FREE_MAGIC;
-               if (tdb->tdb1.io->tdb1_write(tdb, off, rec, sizeof(*rec)) == -1)
-                       return -1;
-       }
-
-       if (rec->magic != TDB1_FREE_MAGIC) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "tdb1_rec_free_read bad magic 0x%x at offset=%d\n",
-                                       rec->magic, off);
-               return -1;
-       }
-       if (tdb->tdb1.io->tdb1_oob(tdb, rec->next, sizeof(*rec), 0) != 0)
-               return -1;
-       return 0;
-}
-
-
-/* update a record tailer (must hold allocation lock) */
-static int update_tailer(struct tdb_context *tdb, tdb1_off_t offset,
-                        const struct tdb1_record *rec)
-{
-       tdb1_off_t totalsize;
-
-       /* Offset of tailer from record header */
-       totalsize = sizeof(*rec) + rec->rec_len;
-       return tdb1_ofs_write(tdb, offset + totalsize - sizeof(tdb1_off_t),
-                        &totalsize);
-}
-
-/* Add an element into the freelist. Merge adjacent records if
-   necessary. */
-int tdb1_free(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
-{
-       /* Allocation and tailer lock */
-       if (tdb1_lock(tdb, -1, F_WRLCK) != 0)
-               return -1;
-
-       /* set an initial tailer, so if we fail we don't leave a bogus record */
-       if (update_tailer(tdb, offset, rec) != 0) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb_free: update_tailer failed!\n");
-               goto fail;
-       }
-
-       tdb->stats.alloc_coalesce_tried++;
-       /* Look left */
-       if (offset - sizeof(tdb1_off_t) > TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
-               tdb1_off_t left = offset - sizeof(tdb1_off_t);
-               struct tdb1_record l;
-               tdb1_off_t leftsize;
-
-               /* Read in tailer and jump back to header */
-               if (tdb1_ofs_read(tdb, left, &leftsize) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_free: left offset read failed at %u", left);
-                       goto update;
-               }
-
-               /* it could be uninitialised data */
-               if (leftsize == 0 || leftsize == TDB1_PAD_U32) {
-                       goto update;
-               }
-
-               left = offset - leftsize;
-
-               if (leftsize > offset ||
-                   left < TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
-                       goto update;
-               }
-
-               /* Now read in the left record */
-               if (tdb->tdb1.io->tdb1_read(tdb, left, &l, sizeof(l), TDB1_DOCONV()) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_free: left read failed at %u (%u)", left, leftsize);
-                       goto update;
-               }
-
-               /* If it's free, expand to include it. */
-               if (l.magic == TDB1_FREE_MAGIC) {
-                       /* we now merge the new record into the left record, rather than the other
-                          way around. This makes the operation O(1) instead of O(n). This change
-                          prevents traverse from being O(n^2) after a lot of deletes */
-                       l.rec_len += sizeof(*rec) + rec->rec_len;
-                       if (tdb1_rec_write(tdb, left, &l) == -1) {
-                               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                          "tdb1_free: update_left failed at %u", left);
-                               goto fail;
-                       }
-                       if (update_tailer(tdb, left, &l) == -1) {
-                               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                          "tdb1_free: update_tailer failed at %u", offset);
-                               goto fail;
-                       }
-                       tdb->stats.alloc_coalesce_succeeded++;
-                       tdb->stats.alloc_coalesce_num_merged++;
-                       tdb->stats.frees++;
-                       tdb1_unlock(tdb, -1, F_WRLCK);
-                       return 0;
-               }
-       }
-
-update:
-
-       /* Now, prepend to free list */
-       rec->magic = TDB1_FREE_MAGIC;
-
-       if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec->next) == -1 ||
-           tdb1_rec_write(tdb, offset, rec) == -1 ||
-           tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_free record write failed at offset=%d",
-                          offset);
-               goto fail;
-       }
-
-       /* And we're done. */
-       tdb->stats.frees++;
-       tdb1_unlock(tdb, -1, F_WRLCK);
-       return 0;
-
- fail:
-       tdb1_unlock(tdb, -1, F_WRLCK);
-       return -1;
-}
-
-
-
-/*
-   the core of tdb1_allocate - called when we have decided which
-   free list entry to use
-
-   Note that we try to allocate by grabbing data from the end of an existing record,
-   not the beginning. This is so the left merge in a free is more likely to be
-   able to free up the record without fragmentation
- */
-static tdb1_off_t tdb1_allocate_ofs(struct tdb_context *tdb,
-                                 tdb1_len_t length, tdb1_off_t rec_ptr,
-                                 struct tdb1_record *rec, tdb1_off_t last_ptr)
-{
-#define MIN_REC_SIZE (sizeof(struct tdb1_record) + sizeof(tdb1_off_t) + 8)
-
-       if (rec->rec_len < length + MIN_REC_SIZE) {
-               /* we have to grab the whole record */
-
-               /* unlink it from the previous record */
-               if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1) {
-                       return 0;
-               }
-
-               /* mark it not free */
-               rec->magic = TDB1_MAGIC;
-               if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
-                       return 0;
-               }
-               tdb->stats.allocs++;
-               return rec_ptr;
-       }
-
-       /* we're going to just shorten the existing record */
-       rec->rec_len -= (length + sizeof(*rec));
-       if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
-               return 0;
-       }
-       if (update_tailer(tdb, rec_ptr, rec) == -1) {
-               return 0;
-       }
-
-       /* and setup the new record */
-       rec_ptr += sizeof(*rec) + rec->rec_len;
-
-       memset(rec, '\0', sizeof(*rec));
-       rec->rec_len = length;
-       rec->magic = TDB1_MAGIC;
-
-       if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
-               return 0;
-       }
-
-       if (update_tailer(tdb, rec_ptr, rec) == -1) {
-               return 0;
-       }
-
-       tdb->stats.allocs++;
-       tdb->stats.alloc_leftover++;
-       return rec_ptr;
-}
-
-/* allocate some space from the free list. The offset returned points
-   to a unconnected tdb1_record within the database with room for at
-   least length bytes of total data
-
-   0 is returned if the space could not be allocated
- */
-tdb1_off_t tdb1_allocate(struct tdb_context *tdb, tdb1_len_t length, struct tdb1_record *rec)
-{
-       tdb1_off_t rec_ptr, last_ptr, newrec_ptr;
-       struct {
-               tdb1_off_t rec_ptr, last_ptr;
-               tdb1_len_t rec_len;
-       } bestfit;
-       float multiplier = 1.0;
-
-       if (tdb1_lock(tdb, -1, F_WRLCK) == -1)
-               return 0;
-
-       /* over-allocate to reduce fragmentation */
-       length *= 1.25;
-
-       /* Extra bytes required for tailer */
-       length += sizeof(tdb1_off_t);
-       length = TDB1_ALIGN(length, TDB1_ALIGNMENT);
-
- again:
-       last_ptr = TDB1_FREELIST_TOP;
-
-       /* read in the freelist top */
-       if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec_ptr) == -1)
-               goto fail;
-
-       bestfit.rec_ptr = 0;
-       bestfit.last_ptr = 0;
-       bestfit.rec_len = 0;
-
-       /*
-          this is a best fit allocation strategy. Originally we used
-          a first fit strategy, but it suffered from massive fragmentation
-          issues when faced with a slowly increasing record size.
-        */
-       while (rec_ptr) {
-               if (tdb1_rec_free_read(tdb, rec_ptr, rec) == -1) {
-                       goto fail;
-               }
-
-               if (rec->rec_len >= length) {
-                       if (bestfit.rec_ptr == 0 ||
-                           rec->rec_len < bestfit.rec_len) {
-                               bestfit.rec_len = rec->rec_len;
-                               bestfit.rec_ptr = rec_ptr;
-                               bestfit.last_ptr = last_ptr;
-                       }
-               }
-
-               /* move to the next record */
-               last_ptr = rec_ptr;
-               rec_ptr = rec->next;
-
-               /* if we've found a record that is big enough, then
-                  stop searching if its also not too big. The
-                  definition of 'too big' changes as we scan
-                  through */
-               if (bestfit.rec_len > 0 &&
-                   bestfit.rec_len < length * multiplier) {
-                       break;
-               }
-
-               /* this multiplier means we only extremely rarely
-                  search more than 50 or so records. At 50 records we
-                  accept records up to 11 times larger than what we
-                  want */
-               multiplier *= 1.05;
-       }
-
-       if (bestfit.rec_ptr != 0) {
-               if (tdb1_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
-                       goto fail;
-               }
-
-               newrec_ptr = tdb1_allocate_ofs(tdb, length, bestfit.rec_ptr,
-                                             rec, bestfit.last_ptr);
-               tdb1_unlock(tdb, -1, F_WRLCK);
-               return newrec_ptr;
-       }
-
-       /* we didn't find enough space. See if we can expand the
-          database and if we can then try again */
-       if (tdb1_expand(tdb, length + sizeof(*rec)) == 0)
-               goto again;
- fail:
-       tdb1_unlock(tdb, -1, F_WRLCK);
-       return 0;
-}
diff --git a/ccan/tdb2/tdb1_hash.c b/ccan/tdb2/tdb1_hash.c
deleted file mode 100644 (file)
index 2d5e496..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Rusty Russell            2010
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "tdb1_private.h"
-
-/* This is based on the hash algorithm from gdbm */
-uint64_t tdb1_old_hash(const void *key, size_t len, uint64_t seed, void *unused)
-{
-       uint32_t value; /* Used to compute the hash value.  */
-       uint32_t   i;   /* Used to cycle through random values. */
-       const unsigned char *dptr = key;
-
-       /* Set the initial value from the key size. */
-       for (value = 0x238F13AF * len, i=0; i < len; i++)
-               value = (value + (dptr[i] << (i*5 % 24)));
-
-       return (1103515243 * value + 12345);
-}
-
-#ifndef WORDS_BIGENDIAN
-# define HASH_LITTLE_ENDIAN 1
-# define HASH_BIG_ENDIAN 0
-#else
-# define HASH_LITTLE_ENDIAN 0
-# define HASH_BIG_ENDIAN 1
-#endif
-
-/*
--------------------------------------------------------------------------------
-lookup3.c, by Bob Jenkins, May 2006, Public Domain.
-
-These are functions for producing 32-bit hashes for hash table lookup.
-hash_word(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
-are externally useful functions.  Routines to test the hash are included
-if SELF_TEST is defined.  You can use this free for any purpose.  It's in
-the public domain.  It has no warranty.
-
-You probably want to use hashlittle().  hashlittle() and hashbig()
-hash byte arrays.  hashlittle() is is faster than hashbig() on
-little-endian machines.  Intel and AMD are little-endian machines.
-On second thought, you probably want hashlittle2(), which is identical to
-hashlittle() except it returns two 32-bit hashes for the price of one.
-You could implement hashbig2() if you wanted but I haven't bothered here.
-
-If you want to find a hash of, say, exactly 7 integers, do
-  a = i1;  b = i2;  c = i3;
-  mix(a,b,c);
-  a += i4; b += i5; c += i6;
-  mix(a,b,c);
-  a += i7;
-  final(a,b,c);
-then use c as the hash value.  If you have a variable length array of
-4-byte integers to hash, use hash_word().  If you have a byte array (like
-a character string), use hashlittle().  If you have several byte arrays, or
-a mix of things, see the comments above hashlittle().
-
-Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
-then mix those integers.  This is fast (you can do a lot more thorough
-mixing with 12*3 instructions on 3 integers than you can with 3 instructions
-on 1 byte), but shoehorning those bytes into integers efficiently is messy.
-*/
-
-#define hashsize(n) ((uint32_t)1<<(n))
-#define hashmask(n) (hashsize(n)-1)
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-/*
--------------------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-
-This is reversible, so any information in (a,b,c) before mix() is
-still in (a,b,c) after mix().
-
-If four pairs of (a,b,c) inputs are run through mix(), or through
-mix() in reverse, there are at least 32 bits of the output that
-are sometimes the same for one pair and different for another pair.
-This was tested for:
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or
-  all zero plus a counter that starts at zero.
-
-Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
-satisfy this are
-    4  6  8 16 19  4
-    9 15  3 18 27 15
-   14  9  3  7 17  3
-Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
-for "differ" defined as + with a one-bit base and a two-bit delta.  I
-used http://burtleburtle.net/bob/hash/avalanche.html to choose
-the operations, constants, and arrangements of the variables.
-
-This does not achieve avalanche.  There are input bits of (a,b,c)
-that fail to affect some output bits of (a,b,c), especially of a.  The
-most thoroughly mixed value is c, but it doesn't really even achieve
-avalanche in c.
-
-This allows some parallelism.  Read-after-writes are good at doubling
-the number of bits affected, so the goal of mixing pulls in the opposite
-direction as the goal of parallelism.  I did what I could.  Rotates
-seem to cost as much as shifts on every machine I could lay my hands
-on, and rotates are much kinder to the top and bottom bits, so I used
-rotates.
--------------------------------------------------------------------------------
-*/
-#define mix(a,b,c) \
-{ \
-  a -= c;  a ^= rot(c, 4);  c += b; \
-  b -= a;  b ^= rot(a, 6);  a += c; \
-  c -= b;  c ^= rot(b, 8);  b += a; \
-  a -= c;  a ^= rot(c,16);  c += b; \
-  b -= a;  b ^= rot(a,19);  a += c; \
-  c -= b;  c ^= rot(b, 4);  b += a; \
-}
-
-/*
--------------------------------------------------------------------------------
-final -- final mixing of 3 32-bit values (a,b,c) into c
-
-Pairs of (a,b,c) values differing in only a few bits will usually
-produce values of c that look totally different.  This was tested for
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or
-  all zero plus a counter that starts at zero.
-
-These constants passed:
- 14 11 25 16 4 14 24
- 12 14 25 16 4 14 24
-and these came close:
-  4  8 15 26 3 22 24
- 10  8 15 26 3 22 24
- 11  8 15 26 3 22 24
--------------------------------------------------------------------------------
-*/
-#define final(a,b,c) \
-{ \
-  c ^= b; c -= rot(b,14); \
-  a ^= c; a -= rot(c,11); \
-  b ^= a; b -= rot(a,25); \
-  c ^= b; c -= rot(b,16); \
-  a ^= c; a -= rot(c,4);  \
-  b ^= a; b -= rot(a,14); \
-  c ^= b; c -= rot(b,24); \
-}
-
-
-/*
--------------------------------------------------------------------------------
-hashlittle() -- hash a variable-length key into a 32-bit value
-  k       : the key (the unaligned variable-length array of bytes)
-  length  : the length of the key, counting by bytes
-  val2    : IN: can be any 4-byte value OUT: second 32 bit hash.
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Two keys differing by one or two bits will have
-totally different hash values.  Note that the return value is better
-mixed than val2, so use that first.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (uint8_t **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
-
-By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-Use for hash table lookup, or anything where one collision in 2^^32 is
-acceptable.  Do NOT use for cryptographic purposes.
--------------------------------------------------------------------------------
-*/
-
-static uint32_t hashlittle( const void *key, size_t length )
-{
-  uint32_t a,b,c;                                          /* internal state */
-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length);
-
-  u.ptr = key;
-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-    const uint8_t  *k8;
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
-    case 9 : c+=k8[8];                   /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
-    case 5 : b+=k8[4];                   /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
-    case 1 : a+=k8[0]; break;
-    case 0 : return c;
-    }
-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
-    const uint8_t  *k8;
-
-    /*--------------- all but last block: aligned reads and different mixing */
-    while (length > 12)
-    {
-      a += k[0] + (((uint32_t)k[1])<<16);
-      b += k[2] + (((uint32_t)k[3])<<16);
-      c += k[4] + (((uint32_t)k[5])<<16);
-      mix(a,b,c);
-      length -= 12;
-      k += 6;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
-    case 10: c+=k[4];
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 9 : c+=k8[8];                      /* fall through */
-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
-    case 6 : b+=k[2];
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 5 : b+=k8[4];                      /* fall through */
-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
-    case 2 : a+=k[0];
-             break;
-    case 1 : a+=k8[0];
-             break;
-    case 0 : return c;                     /* zero length requires no mixing */
-    }
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = (const uint8_t *)key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      a += ((uint32_t)k[1])<<8;
-      a += ((uint32_t)k[2])<<16;
-      a += ((uint32_t)k[3])<<24;
-      b += k[4];
-      b += ((uint32_t)k[5])<<8;
-      b += ((uint32_t)k[6])<<16;
-      b += ((uint32_t)k[7])<<24;
-      c += k[8];
-      c += ((uint32_t)k[9])<<8;
-      c += ((uint32_t)k[10])<<16;
-      c += ((uint32_t)k[11])<<24;
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=((uint32_t)k[11])<<24;
-    case 11: c+=((uint32_t)k[10])<<16;
-    case 10: c+=((uint32_t)k[9])<<8;
-    case 9 : c+=k[8];
-    case 8 : b+=((uint32_t)k[7])<<24;
-    case 7 : b+=((uint32_t)k[6])<<16;
-    case 6 : b+=((uint32_t)k[5])<<8;
-    case 5 : b+=k[4];
-    case 4 : a+=((uint32_t)k[3])<<24;
-    case 3 : a+=((uint32_t)k[2])<<16;
-    case 2 : a+=((uint32_t)k[1])<<8;
-    case 1 : a+=k[0];
-             break;
-    case 0 : return c;
-    }
-  }
-
-  final(a,b,c);
-  return c;
-}
-
-uint64_t tdb1_incompatible_hash(const void *key, size_t len, uint64_t seed,
-                                void *unused)
-{
-       return hashlittle(key, len);
-}
diff --git a/ccan/tdb2/tdb1_io.c b/ccan/tdb2/tdb1_io.c
deleted file mode 100644 (file)
index 488f3d8..0000000
+++ /dev/null
@@ -1,543 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "tdb1_private.h"
-#ifndef MAX
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif
-
-/* check for an out of bounds access - if it is out of bounds then
-   see if the database has been expanded by someone else and expand
-   if necessary
-   note that "len" is the minimum length needed for the db
-*/
-static int tdb1_oob(struct tdb_context *tdb, tdb1_off_t off, tdb1_len_t len,
-                   int probe)
-{
-       struct stat st;
-       if (len + off < len) {
-               if (!probe) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                                    "tdb1_oob off %d len %d wrap\n",
-                                                    (int)off, (int)len);
-               }
-               return -1;
-       }
-
-       if (off + len <= tdb->file->map_size)
-               return 0;
-       if (tdb->flags & TDB_INTERNAL) {
-               if (!probe) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                                    "tdb1_oob len %d beyond internal malloc size %u",
-                                                    (int)(off + len), (int)tdb->file->map_size);
-               }
-               return -1;
-       }
-
-       if (fstat(tdb->file->fd, &st) == -1) {
-               tdb->last_error = TDB_ERR_IO;
-               return -1;
-       }
-
-       if (st.st_size < (size_t)off + len) {
-               if (!probe) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                                    "tdb1_oob len %u beyond eof at %u",
-                                                    (int)(off + len), (int)st.st_size);
-               }
-               return -1;
-       }
-
-       /* Beware >4G files! */
-       if ((tdb1_off_t)st.st_size != st.st_size) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                            "tdb1_oob len %llu too large!\n",
-                                            (long long)st.st_size);
-               return -1;
-       }
-
-       /* Unmap, update size, remap */
-       if (tdb1_munmap(tdb) == -1) {
-               tdb->last_error = TDB_ERR_IO;
-               return -1;
-       }
-       tdb->file->map_size = st.st_size;
-       tdb1_mmap(tdb);
-       return 0;
-}
-
-/* write a lump of data at a specified offset */
-static int tdb1_write(struct tdb_context *tdb, tdb1_off_t off,
-                    const void *buf, tdb1_len_t len)
-{
-       if (len == 0) {
-               return 0;
-       }
-
-       if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
-               tdb->last_error = TDB_ERR_RDONLY;
-               return -1;
-       }
-
-       if (tdb->tdb1.io->tdb1_oob(tdb, off, len, 0) != 0)
-               return -1;
-
-       if (tdb->file->map_ptr) {
-               memcpy(off + (char *)tdb->file->map_ptr, buf, len);
-       } else {
-               ssize_t written = pwrite(tdb->file->fd, buf, len, off);
-               if ((written != (ssize_t)len) && (written != -1)) {
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
-                                  "tdb1_write: wrote only "
-                                  "%d of %d bytes at %d, trying once more",
-                                  (int)written, len, off);
-                       written = pwrite(tdb->file->fd,
-                                        (const char *)buf+written,
-                                        len-written,
-                                        off+written);
-               }
-               if (written == -1) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "tdb1_write failed at %d "
-                                               "len=%d (%s)",
-                                               off, len, strerror(errno));
-                       return -1;
-               } else if (written != (ssize_t)len) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "tdb1_write: failed to "
-                                               "write %d bytes at %d in two attempts",
-                                               len, off);
-                       return -1;
-               }
-       }
-       return 0;
-}
-
-/* Endian conversion: we only ever deal with 4 byte quantities */
-void *tdb1_convert(void *buf, uint32_t size)
-{
-       uint32_t i, *p = (uint32_t *)buf;
-       for (i = 0; i < size / 4; i++)
-               p[i] = TDB1_BYTEREV(p[i]);
-       return buf;
-}
-
-
-/* read a lump of data at a specified offset, maybe convert */
-static int tdb1_read(struct tdb_context *tdb, tdb1_off_t off, void *buf,
-                   tdb1_len_t len, int cv)
-{
-       if (tdb->tdb1.io->tdb1_oob(tdb, off, len, 0) != 0) {
-               return -1;
-       }
-
-       if (tdb->file->map_ptr) {
-               memcpy(buf, off + (char *)tdb->file->map_ptr, len);
-       } else {
-               ssize_t ret = pread(tdb->file->fd, buf, len, off);
-               if (ret != (ssize_t)len) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "tdb1_read failed at %d "
-                                               "len=%d ret=%d (%s) map_size=%d",
-                                               (int)off, (int)len, (int)ret,
-                                               strerror(errno),
-                                               (int)tdb->file->map_size);
-                       return -1;
-               }
-       }
-       if (cv) {
-               tdb1_convert(buf, len);
-       }
-       return 0;
-}
-
-
-
-/*
-  do an unlocked scan of the hash table heads to find the next non-zero head. The value
-  will then be confirmed with the lock held
-*/
-static void tdb1_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
-{
-       uint32_t h = *chain;
-       if (tdb->file->map_ptr) {
-               for (;h < tdb->tdb1.header.hash_size;h++) {
-                       if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->file->map_ptr)) {
-                               break;
-                       }
-               }
-       } else {
-               uint32_t off=0;
-               for (;h < tdb->tdb1.header.hash_size;h++) {
-                       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
-                               break;
-                       }
-               }
-       }
-       (*chain) = h;
-}
-
-
-int tdb1_munmap(struct tdb_context *tdb)
-{
-       if (tdb->flags & TDB_INTERNAL)
-               return 0;
-
-#if HAVE_MMAP
-       if (tdb->file->map_ptr) {
-               int ret;
-
-               ret = munmap(tdb->file->map_ptr, tdb->file->map_size);
-               if (ret != 0)
-                       return ret;
-       }
-#endif
-       tdb->file->map_ptr = NULL;
-       return 0;
-}
-
-void tdb1_mmap(struct tdb_context *tdb)
-{
-       if (tdb->flags & TDB_INTERNAL)
-               return;
-
-#if HAVE_MMAP
-       if (!(tdb->flags & TDB_NOMMAP)) {
-               int mmap_flags;
-               if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
-                       mmap_flags = PROT_READ;
-               else
-                       mmap_flags = PROT_READ | PROT_WRITE;
-
-               tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
-                                   mmap_flags,
-                                   MAP_SHARED|MAP_FILE, tdb->file->fd, 0);
-
-               /*
-                * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
-                */
-
-               if (tdb->file->map_ptr == MAP_FAILED) {
-                       tdb->file->map_ptr = NULL;
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
-                                  "tdb1_mmap failed for size %d (%s)",
-                                  tdb->file->map_size, strerror(errno));
-               }
-       } else {
-               tdb->file->map_ptr = NULL;
-       }
-#else
-       tdb->file->map_ptr = NULL;
-#endif
-}
-
-/* expand a file.  we prefer to use ftruncate, as that is what posix
-  says to use for mmap expansion */
-static int tdb1_expand_file(struct tdb_context *tdb, tdb1_off_t size, tdb1_off_t addition)
-{
-       char buf[8192];
-
-       if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
-               tdb->last_error = TDB_ERR_RDONLY;
-               return -1;
-       }
-
-       if (ftruncate(tdb->file->fd, size+addition) == -1) {
-               char b = 0;
-               ssize_t written = pwrite(tdb->file->fd, &b, 1,
-                                        (size+addition) - 1);
-               if (written == 0) {
-                       /* try once more, potentially revealing errno */
-                       written = pwrite(tdb->file->fd, &b, 1,
-                                        (size+addition) - 1);
-               }
-               if (written == 0) {
-                       /* again - give up, guessing errno */
-                       errno = ENOSPC;
-               }
-               if (written != 1) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "expand_file to %d failed (%s)",
-                                               size+addition,
-                                               strerror(errno));
-                       return -1;
-               }
-       }
-
-       /* now fill the file with something. This ensures that the
-          file isn't sparse, which would be very bad if we ran out of
-          disk. This must be done with write, not via mmap */
-       memset(buf, TDB1_PAD_BYTE, sizeof(buf));
-       while (addition) {
-               size_t n = addition>sizeof(buf)?sizeof(buf):addition;
-               ssize_t written = pwrite(tdb->file->fd, buf, n, size);
-               if (written == 0) {
-                       /* prevent infinite loops: try _once_ more */
-                       written = pwrite(tdb->file->fd, buf, n, size);
-               }
-               if (written == 0) {
-                       /* give up, trying to provide a useful errno */
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "expand_file write "
-                                               "returned 0 twice: giving up!");
-                       errno = ENOSPC;
-                       return -1;
-               } else if (written == -1) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "expand_file write of "
-                                               "%d bytes failed (%s)", (int)n,
-                                               strerror(errno));
-                       return -1;
-               } else if (written != n) {
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
-                                  "expand_file: wrote "
-                                  "only %d of %d bytes - retrying",
-                                  (int)written, (int)n);
-               }
-               addition -= written;
-               size += written;
-       }
-       tdb->stats.expands++;
-       return 0;
-}
-
-
-/* You need 'size', this tells you how much you should expand by. */
-tdb1_off_t tdb1_expand_adjust(tdb1_off_t map_size, tdb1_off_t size, int page_size)
-{
-       tdb1_off_t new_size, top_size;
-
-       /* limit size in order to avoid using up huge amounts of memory for
-        * in memory tdbs if an oddball huge record creeps in */
-       if (size > 100 * 1024) {
-               top_size = map_size + size * 2;
-       } else {
-               top_size = map_size + size * 100;
-       }
-
-       /* always make room for at least top_size more records, and at
-          least 25% more space. if the DB is smaller than 100MiB,
-          otherwise grow it by 10% only. */
-       if (map_size > 100 * 1024 * 1024) {
-               new_size = map_size * 1.10;
-       } else {
-               new_size = map_size * 1.25;
-       }
-
-       /* Round the database up to a multiple of the page size */
-       new_size = MAX(top_size, new_size);
-       return TDB1_ALIGN(new_size, page_size) - map_size;
-}
-
-/* expand the database at least size bytes by expanding the underlying
-   file and doing the mmap again if necessary */
-int tdb1_expand(struct tdb_context *tdb, tdb1_off_t size)
-{
-       struct tdb1_record rec;
-       tdb1_off_t offset;
-
-       if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "lock failed in tdb1_expand");
-               return -1;
-       }
-
-       /* must know about any previous expansions by another process */
-       tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
-
-       size = tdb1_expand_adjust(tdb->file->map_size, size,
-                                 tdb->tdb1.page_size);
-
-       if (!(tdb->flags & TDB_INTERNAL))
-               tdb1_munmap(tdb);
-
-       /*
-        * We must ensure the file is unmapped before doing this
-        * to ensure consistency with systems like OpenBSD where
-        * writes and mmaps are not consistent.
-        */
-
-       /* expand the file itself */
-       if (!(tdb->flags & TDB_INTERNAL)) {
-               if (tdb->tdb1.io->tdb1_expand_file(tdb, tdb->file->map_size, size) != 0)
-                       goto fail;
-       }
-
-       tdb->file->map_size += size;
-
-       if (tdb->flags & TDB_INTERNAL) {
-               char *new_map_ptr = (char *)realloc(tdb->file->map_ptr,
-                                                   tdb->file->map_size);
-               if (!new_map_ptr) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM,
-                                                    TDB_LOG_ERROR,
-                                                    "tdb1_expand: no memory");
-                       tdb->file->map_size -= size;
-                       goto fail;
-               }
-               tdb->file->map_ptr = new_map_ptr;
-       } else {
-               /*
-                * We must ensure the file is remapped before adding the space
-                * to ensure consistency with systems like OpenBSD where
-                * writes and mmaps are not consistent.
-                */
-
-               /* We're ok if the mmap fails as we'll fallback to read/write */
-               tdb1_mmap(tdb);
-       }
-
-       /* form a new freelist record */
-       memset(&rec,'\0',sizeof(rec));
-       rec.rec_len = size - sizeof(rec);
-
-       /* link it into the free list */
-       offset = tdb->file->map_size - size;
-       if (tdb1_free(tdb, offset, &rec) == -1)
-               goto fail;
-
-       tdb1_unlock(tdb, -1, F_WRLCK);
-       return 0;
- fail:
-       tdb1_unlock(tdb, -1, F_WRLCK);
-       return -1;
-}
-
-/* read/write a tdb1_off_t */
-int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
-{
-       return tdb->tdb1.io->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
-}
-
-int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
-{
-       tdb1_off_t off = *d;
-       return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
-}
-
-
-/* read a lump of data, allocating the space for it */
-unsigned char *tdb1_alloc_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t len)
-{
-       unsigned char *buf;
-
-       /* some systems don't like zero length malloc */
-
-       if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                            "tdb1_alloc_read malloc failed"
-                                            " len=%d (%s)",
-                                            len, strerror(errno));
-               return NULL;
-       }
-       if (tdb->tdb1.io->tdb1_read(tdb, offset, buf, len, 0) == -1) {
-               SAFE_FREE(buf);
-               return NULL;
-       }
-       return buf;
-}
-
-/* Give a piece of tdb data to a parser */
-enum TDB_ERROR tdb1_parse_data(struct tdb_context *tdb, TDB_DATA key,
-                              tdb1_off_t offset, tdb1_len_t len,
-                              enum TDB_ERROR (*parser)(TDB_DATA key,
-                                                       TDB_DATA data,
-                                                       void *private_data),
-                              void *private_data)
-{
-       TDB_DATA data;
-       enum TDB_ERROR result;
-
-       data.dsize = len;
-
-       if ((tdb->tdb1.transaction == NULL) && (tdb->file->map_ptr != NULL)) {
-               /*
-                * Optimize by avoiding the malloc/memcpy/free, point the
-                * parser directly at the mmap area.
-                */
-               if (tdb->tdb1.io->tdb1_oob(tdb, offset, len, 0) != 0) {
-                       return tdb->last_error;
-               }
-               data.dptr = offset + (unsigned char *)tdb->file->map_ptr;
-               return parser(key, data, private_data);
-       }
-
-       if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
-               return tdb->last_error;
-       }
-
-       result = parser(key, data, private_data);
-       free(data.dptr);
-       return result;
-}
-
-/* read/write a record */
-int tdb1_rec_read(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
-{
-       if (tdb->tdb1.io->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
-               return -1;
-       if (TDB1_BAD_MAGIC(rec)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "tdb1_rec_read bad magic 0x%x at offset=%d",
-                                       rec->magic, offset);
-               return -1;
-       }
-       return tdb->tdb1.io->tdb1_oob(tdb, rec->next, sizeof(*rec), 0);
-}
-
-int tdb1_rec_write(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
-{
-       struct tdb1_record r = *rec;
-       return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
-}
-
-static const struct tdb1_methods io1_methods = {
-       tdb1_read,
-       tdb1_write,
-       tdb1_next_hash_chain,
-       tdb1_oob,
-       tdb1_expand_file,
-};
-
-/*
-  initialise the default methods table
-*/
-void tdb1_io_init(struct tdb_context *tdb)
-{
-       tdb->tdb1.io = &io1_methods;
-}
-
-enum TDB_ERROR tdb1_probe_length(struct tdb_context *tdb)
-{
-       tdb->last_error = TDB_SUCCESS;
-       tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, true);
-       return tdb->last_error;
-}
diff --git a/ccan/tdb2/tdb1_lock.c b/ccan/tdb2/tdb1_lock.c
deleted file mode 100644 (file)
index 5cc0ad6..0000000
+++ /dev/null
@@ -1,560 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb1_private.h"
-
-/* list -1 is the alloc list, otherwise a hash chain. */
-static tdb1_off_t lock_offset(int list)
-{
-       return TDB1_FREELIST_TOP + 4*list;
-}
-
-/* a byte range locking function - return 0 on success
-   this functions locks/unlocks 1 byte at the specified offset.
-
-   On error, errno is also set so that errors are passed back properly
-   through tdb1_open().
-
-   note that a len of zero means lock to end of file
-*/
-int tdb1_brlock(struct tdb_context *tdb,
-              int rw_type, tdb1_off_t offset, size_t len,
-              enum tdb_lock_flags flags)
-{
-       enum TDB_ERROR ecode = tdb_brlock(tdb, rw_type, offset, len, flags
-                                         | TDB_LOCK_NOCHECK);
-       if (ecode == TDB_SUCCESS)
-               return 0;
-       tdb->last_error = ecode;
-       return -1;
-}
-
-int tdb1_brunlock(struct tdb_context *tdb,
-                int rw_type, tdb1_off_t offset, size_t len)
-{
-       enum TDB_ERROR ecode = tdb_brunlock(tdb, rw_type, offset, len);
-       if (ecode == TDB_SUCCESS)
-               return 0;
-       tdb->last_error = ecode;
-       return -1;
-}
-
-int tdb1_allrecord_upgrade(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode = tdb_allrecord_upgrade(tdb, TDB1_FREELIST_TOP);
-       if (ecode == TDB_SUCCESS)
-               return 0;
-       tdb->last_error = ecode;
-       return -1;
-}
-
-static struct tdb_lock *tdb1_find_nestlock(struct tdb_context *tdb,
-                                          tdb1_off_t offset)
-{
-       unsigned int i;
-
-       for (i=0; i<tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off == offset) {
-                       return &tdb->file->lockrecs[i];
-               }
-       }
-       return NULL;
-}
-
-/* lock an offset in the database. */
-int tdb1_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
-                 enum tdb_lock_flags flags)
-{
-       enum TDB_ERROR ecode;
-
-       if (offset >= lock_offset(tdb->tdb1.header.hash_size)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                       "tdb1_lock: invalid offset %u for"
-                                       " ltype=%d",
-                                       offset, ltype);
-               return -1;
-       }
-
-       ecode = tdb_nest_lock(tdb, offset, ltype, flags | TDB_LOCK_NOCHECK);
-       if (unlikely(ecode != TDB_SUCCESS)) {
-               tdb->last_error = ecode;
-               return -1;
-       }
-       return 0;
-}
-
-static int tdb1_lock_and_recover(struct tdb_context *tdb)
-{
-       int ret;
-
-       /* We need to match locking order in transaction commit. */
-       if (tdb1_brlock(tdb, F_WRLCK, TDB1_FREELIST_TOP, 0,
-                       TDB_LOCK_WAIT|TDB_LOCK_NOCHECK)) {
-               return -1;
-       }
-
-       if (tdb1_brlock(tdb, F_WRLCK, TDB1_OPEN_LOCK, 1,
-                       TDB_LOCK_WAIT|TDB_LOCK_NOCHECK)) {
-               tdb1_brunlock(tdb, F_WRLCK, TDB1_FREELIST_TOP, 0);
-               return -1;
-       }
-
-       ret = tdb1_transaction_recover(tdb);
-
-       tdb1_brunlock(tdb, F_WRLCK, TDB1_OPEN_LOCK, 1);
-       tdb1_brunlock(tdb, F_WRLCK, TDB1_FREELIST_TOP, 0);
-
-       return ret;
-}
-
-static bool have_data_locks(const struct tdb_context *tdb)
-{
-       unsigned int i;
-
-       for (i = 0; i < tdb->file->num_lockrecs; i++) {
-               if (tdb->file->lockrecs[i].off >= lock_offset(-1))
-                       return true;
-       }
-       return false;
-}
-
-static int tdb1_lock_list(struct tdb_context *tdb, int list, int ltype,
-                        enum tdb_lock_flags waitflag)
-{
-       int ret;
-       bool check = false;
-
-       /* a allrecord lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb1_lock_list", true)) {
-                       tdb->last_error = TDB_ERR_LOCK;
-                       return -1;
-               }
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error = owner_conflict(tdb, "tdb1_lock_list");
-                       return -1;
-               }
-               if (ltype == tdb->file->allrecord_lock.ltype
-                   || ltype == F_RDLCK) {
-                       return 0;
-               }
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb1_lock_list:"
-                                            " already have read lock");
-               return -1;
-       }
-
-       /* Only check when we grab first data lock. */
-       check = !have_data_locks(tdb);
-       ret = tdb1_nest_lock(tdb, lock_offset(list), ltype, waitflag);
-
-       if (ret == 0 && check) {
-               tdb_bool_err berr = tdb1_needs_recovery(tdb);
-
-               if (berr < 0) {
-                       return -1;
-               }
-               if (berr == true) {
-                       tdb1_nest_unlock(tdb, lock_offset(list), ltype);
-
-                       if (tdb1_lock_and_recover(tdb) == -1) {
-                               return -1;
-                       }
-                       return tdb1_lock_list(tdb, list, ltype, waitflag);
-               }
-       }
-       return ret;
-}
-
-/* lock a list in the database. list -1 is the alloc list */
-int tdb1_lock(struct tdb_context *tdb, int list, int ltype)
-{
-       int ret;
-
-       ret = tdb1_lock_list(tdb, list, ltype, TDB_LOCK_WAIT);
-       /* Don't log for EAGAIN and EINTR: they could have overridden lock fns */
-       if (ret && errno != EAGAIN && errno != EINTR) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_lock failed on list %d "
-                          "ltype=%d (%s)",  list, ltype, strerror(errno));
-       }
-       return ret;
-}
-
-int tdb1_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype)
-{
-       enum TDB_ERROR ecode;
-
-       /* Sanity checks */
-       if (offset >= lock_offset(tdb->tdb1.header.hash_size)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
-                                       "tdb1_unlock: offset %u invalid (%d)",
-                                       offset, tdb->tdb1.header.hash_size);
-               return -1;
-       }
-
-       ecode = tdb_nest_unlock(tdb, offset, ltype);
-       if (unlikely(ecode != TDB_SUCCESS)) {
-               tdb->last_error = ecode;
-               return -1;
-       }
-       return 0;
-}
-
-int tdb1_unlock(struct tdb_context *tdb, int list, int ltype)
-{
-       /* a global lock allows us to avoid per chain locks */
-       if (tdb->file->allrecord_lock.count &&
-           (ltype == tdb->file->allrecord_lock.ltype || ltype == F_RDLCK)) {
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error = owner_conflict(tdb, "tdb1_unlock");
-                       return -1;
-               }
-               return 0;
-       }
-
-       if (tdb->file->allrecord_lock.count) {
-               tdb->last_error = TDB_ERR_LOCK;
-               return -1;
-       }
-
-       return tdb1_nest_unlock(tdb, lock_offset(list), ltype);
-}
-
-/*
-  get the transaction lock
- */
-int tdb1_transaction_lock(struct tdb_context *tdb, int ltype,
-                        enum tdb_lock_flags lockflags)
-{
-       return tdb1_nest_lock(tdb, TDB1_TRANSACTION_LOCK, ltype, lockflags);
-}
-
-/*
-  release the transaction lock
- */
-int tdb1_transaction_unlock(struct tdb_context *tdb, int ltype)
-{
-       return tdb1_nest_unlock(tdb, TDB1_TRANSACTION_LOCK, ltype);
-}
-
-/* lock/unlock entire database.  It can only be upgradable if you have some
- * other way of guaranteeing exclusivity (ie. transaction write lock).
- * We do the locking gradually to avoid being starved by smaller locks. */
-int tdb1_allrecord_lock(struct tdb_context *tdb, int ltype,
-                      enum tdb_lock_flags flags, bool upgradable)
-{
-       enum TDB_ERROR ecode;
-       tdb_bool_err berr;
-
-       /* tdb_lock_gradual() doesn't know about tdb->tdb1.traverse_read. */
-       if (tdb->tdb1.traverse_read && !(tdb->flags & TDB_NOLOCK)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb1_allrecord_lock during"
-                                            " tdb1_read_traverse");
-               return -1;
-       }
-
-       if (tdb->file->allrecord_lock.count
-           && tdb->file->allrecord_lock.ltype == ltype) {
-               tdb->file->allrecord_lock.count++;
-               return 0;
-       }
-
-       if (tdb1_have_extra_locks(tdb)) {
-               /* can't combine global and chain locks */
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb1_allrecord_lock holding"
-                                            " other locks");
-               return -1;
-       }
-
-       if (upgradable && ltype != F_RDLCK) {
-               /* tdb error: you can't upgrade a write lock! */
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
-                                            TDB_LOG_ERROR,
-                                            "tdb1_allrecord_lock cannot"
-                                            " have upgradable write lock");
-               return -1;
-       }
-
-       /* We cover two kinds of locks:
-        * 1) Normal chain locks.  Taken for almost all operations.
-        * 3) Individual records locks.  Taken after normal or free
-        *    chain locks.
-        *
-        * It is (1) which cause the starvation problem, so we're only
-        * gradual for that. */
-       ecode = tdb_lock_gradual(tdb, ltype, flags | TDB_LOCK_NOCHECK,
-                                TDB1_FREELIST_TOP, tdb->tdb1.header.hash_size * 4);
-       if (ecode != TDB_SUCCESS) {
-               tdb->last_error = ecode;
-               return -1;
-       }
-
-       /* Grab individual record locks. */
-       if (tdb1_brlock(tdb, ltype, lock_offset(tdb->tdb1.header.hash_size), 0,
-                      flags) == -1) {
-               tdb1_brunlock(tdb, ltype, TDB1_FREELIST_TOP,
-                            tdb->tdb1.header.hash_size * 4);
-               return -1;
-       }
-
-       tdb->file->allrecord_lock.owner = tdb;
-       tdb->file->allrecord_lock.count = 1;
-       tdb->file->locker = getpid();
-       /* If it's upgradable, it's actually exclusive so we can treat
-        * it as a write lock. */
-       tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
-       tdb->file->allrecord_lock.off = upgradable;
-
-       berr = tdb1_needs_recovery(tdb);
-       if (berr < 0) {
-               return -1;
-       }
-
-       if (berr == true) {
-               tdb1_allrecord_unlock(tdb, ltype);
-               if (tdb1_lock_and_recover(tdb) == -1) {
-                       return -1;
-               }
-               return tdb1_allrecord_lock(tdb, ltype, flags, upgradable);
-       }
-
-       return 0;
-}
-
-
-
-/* unlock entire db */
-int tdb1_allrecord_unlock(struct tdb_context *tdb, int ltype)
-{
-       /* Don't try this during r/o traversal! */
-       if (tdb->tdb1.traverse_read) {
-               tdb->last_error = TDB_ERR_LOCK;
-               return -1;
-       }
-
-       if (tdb->file->allrecord_lock.count == 0) {
-               tdb->last_error = TDB_ERR_LOCK;
-               return -1;
-       }
-
-       /* Upgradable locks are marked as write locks. */
-       if (tdb->file->allrecord_lock.ltype != ltype
-           && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
-               tdb->last_error = TDB_ERR_LOCK;
-               return -1;
-       }
-
-       if (tdb->file->allrecord_lock.count > 1) {
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error
-                               = owner_conflict(tdb, "tdb1_allrecord_unlock");
-                       return -1;
-               }
-               tdb->file->allrecord_lock.count--;
-               return 0;
-       }
-
-       tdb->file->allrecord_lock.count = 0;
-       tdb->file->allrecord_lock.ltype = 0;
-
-       if (tdb1_brunlock(tdb, ltype, TDB1_FREELIST_TOP, 0)) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_unlockall failed (%s)", strerror(errno));
-               return -1;
-       }
-
-       return 0;
-}
-
-/* lock/unlock one hash chain. This is meant to be used to reduce
-   contention - it cannot guarantee how many records will be locked */
-int tdb1_chainlock(struct tdb_context *tdb, TDB_DATA key)
-{
-       int ret = tdb1_lock(tdb,
-                           TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
-                           F_WRLCK);
-       return ret;
-}
-
-int tdb1_chainunlock(struct tdb_context *tdb, TDB_DATA key)
-{
-       return tdb1_unlock(tdb, TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
-                          F_WRLCK);
-}
-
-int tdb1_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
-{
-       int ret;
-       ret = tdb1_lock(tdb, TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
-                       F_RDLCK);
-       return ret;
-}
-
-int tdb1_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
-{
-       return tdb1_unlock(tdb, TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
-                          F_RDLCK);
-}
-
-/* record lock stops delete underneath */
-int tdb1_lock_record(struct tdb_context *tdb, tdb1_off_t off)
-{
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb1_lock_record", true)) {
-                       tdb->last_error = TDB_ERR_LOCK;
-                       return -1;
-               }
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error = owner_conflict(tdb,
-                                                        "tdb1_lock_record");
-                       return -1;
-               }
-               return 0;
-       }
-       return off ? tdb1_brlock(tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT) : 0;
-}
-
-/*
-  Write locks override our own fcntl readlocks, so check it here.
-  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
-  an error to fail to get the lock here.
-*/
-int tdb1_write_lock_record(struct tdb_context *tdb, tdb1_off_t off)
-{
-       struct tdb1_traverse_lock *i;
-       for (i = &tdb->tdb1.travlocks; i; i = i->next)
-               if (i->off == off)
-                       return -1;
-       if (tdb->file->allrecord_lock.count) {
-               if (!check_lock_pid(tdb, "tdb1_write_lock_record", true)) {
-                       tdb->last_error = TDB_ERR_LOCK;
-                       return -1;
-               }
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error
-                               = owner_conflict(tdb, "tdb1_write_lock_record");
-                       return -1;
-               }
-               if (tdb->file->allrecord_lock.ltype == F_WRLCK) {
-                       return 0;
-               }
-               return -1;
-       }
-       return tdb1_brlock(tdb, F_WRLCK, off, 1, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
-}
-
-int tdb1_write_unlock_record(struct tdb_context *tdb, tdb1_off_t off)
-{
-       if (tdb->file->allrecord_lock.count) {
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error
-                               = owner_conflict(tdb,
-                                                "tdb1_write_unlock_record");
-                       return -1;
-               }
-               return 0;
-       }
-       return tdb1_brunlock(tdb, F_WRLCK, off, 1);
-}
-
-/* fcntl locks don't stack: avoid unlocking someone else's */
-int tdb1_unlock_record(struct tdb_context *tdb, tdb1_off_t off)
-{
-       struct tdb1_traverse_lock *i;
-       uint32_t count = 0;
-
-       if (tdb->file->allrecord_lock.count) {
-               if (tdb->file->allrecord_lock.owner != tdb) {
-                       tdb->last_error = owner_conflict(tdb,
-                                                        "tdb1_unlock_record");
-                       return -1;
-               }
-               return 0;
-       }
-
-       if (off == 0)
-               return 0;
-       for (i = &tdb->tdb1.travlocks; i; i = i->next)
-               if (i->off == off)
-                       count++;
-       return (count == 1 ? tdb1_brunlock(tdb, F_RDLCK, off, 1) : 0);
-}
-
-bool tdb1_have_extra_locks(struct tdb_context *tdb)
-{
-       unsigned int extra = tdb->file->num_lockrecs;
-
-       /* A transaction holds the lock for all records. */
-       if (!tdb->tdb1.transaction && tdb->file->allrecord_lock.count) {
-               return true;
-       }
-
-       /* We always hold the active lock if CLEAR_IF_FIRST. */
-       if (tdb1_find_nestlock(tdb, TDB1_ACTIVE_LOCK)) {
-               extra--;
-       }
-
-       /* In a transaction, we expect to hold the transaction lock */
-       if (tdb->tdb1.transaction
-           && tdb1_find_nestlock(tdb, TDB1_TRANSACTION_LOCK)) {
-               extra--;
-       }
-
-       return extra;
-}
-
-/* The transaction code uses this to remove all locks. */
-void tdb1_release_transaction_locks(struct tdb_context *tdb)
-{
-       unsigned int i, active = 0;
-
-       if (tdb->file->allrecord_lock.count != 0) {
-               tdb1_brunlock(tdb, tdb->file->allrecord_lock.ltype, TDB1_FREELIST_TOP, 0);
-               tdb->file->allrecord_lock.count = 0;
-       }
-
-       for (i=0;i<tdb->file->num_lockrecs;i++) {
-               struct tdb_lock *lck = &tdb->file->lockrecs[i];
-
-               /* Don't release the active lock!  Copy it to first entry. */
-               if (lck->off == TDB1_ACTIVE_LOCK) {
-                       tdb->file->lockrecs[active++] = *lck;
-               } else {
-                       tdb1_brunlock(tdb, lck->ltype, lck->off, 1);
-               }
-       }
-       tdb->file->num_lockrecs = active;
-       if (tdb->file->num_lockrecs == 0) {
-               SAFE_FREE(tdb->file->lockrecs);
-       }
-}
diff --git a/ccan/tdb2/tdb1_open.c b/ccan/tdb2/tdb1_open.c
deleted file mode 100644 (file)
index e668616..0000000
+++ /dev/null
@@ -1,234 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include <assert.h>
-#include "tdb1_private.h"
-#include <assert.h>
-
-/* We use two hashes to double-check they're using the right hash function. */
-void tdb1_header_hash(struct tdb_context *tdb,
-                    uint32_t *magic1_hash, uint32_t *magic2_hash)
-{
-       uint32_t tdb1_magic = TDB1_MAGIC;
-
-       *magic1_hash = tdb_hash(tdb, TDB_MAGIC_FOOD, sizeof(TDB_MAGIC_FOOD));
-       *magic2_hash = tdb_hash(tdb, TDB1_CONV(tdb1_magic), sizeof(tdb1_magic));
-
-       /* Make sure at least one hash is non-zero! */
-       if (*magic1_hash == 0 && *magic2_hash == 0)
-               *magic1_hash = 1;
-}
-
-static void tdb_context_init(struct tdb_context *tdb,
-                            struct tdb_attribute_tdb1_max_dead *max_dead)
-{
-       assert(tdb->flags & TDB_VERSION1);
-
-       tdb1_io_init(tdb);
-
-       tdb->tdb1.traverse_read = tdb->tdb1.traverse_write = 0;
-       memset(&tdb->tdb1.travlocks, 0, sizeof(tdb->tdb1.travlocks));
-       tdb->tdb1.transaction = NULL;
-
-       /* cache the page size */
-       tdb->tdb1.page_size = getpagesize();
-       if (tdb->tdb1.page_size <= 0) {
-               tdb->tdb1.page_size = 0x2000;
-       }
-
-       if (max_dead) {
-               tdb->tdb1.max_dead_records = max_dead->max_dead;
-       } else {
-               tdb->tdb1.max_dead_records = 0;
-       }
-}
-
-/* initialise a new database */
-enum TDB_ERROR tdb1_new_database(struct tdb_context *tdb,
-                                struct tdb_attribute_tdb1_hashsize *hashsize,
-                                struct tdb_attribute_tdb1_max_dead *max_dead)
-{
-       struct tdb1_header *newdb;
-       size_t size;
-       int hash_size = TDB1_DEFAULT_HASH_SIZE;
-       enum TDB_ERROR ret;
-
-       tdb_context_init(tdb, max_dead);
-
-       /* Default TDB2 hash becomes default TDB1 hash. */
-       if (tdb->hash_fn == tdb_jenkins_hash)
-               tdb->hash_fn = tdb1_old_hash;
-
-       if (hashsize)
-               hash_size = hashsize->hsize;
-
-       /* We make it up in memory, then write it out if not internal */
-       size = sizeof(struct tdb1_header) + (hash_size+1)*sizeof(tdb1_off_t);
-       if (!(newdb = (struct tdb1_header *)calloc(size, 1))) {
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "Could not allocate new database header");
-       }
-
-       /* Fill in the header */
-       newdb->version = TDB1_VERSION;
-       newdb->hash_size = hash_size;
-
-       tdb1_header_hash(tdb, &newdb->magic1_hash, &newdb->magic2_hash);
-
-       /* Make sure older tdbs (which don't check the magic hash fields)
-        * will refuse to open this TDB. */
-       if (tdb->hash_fn == tdb1_incompatible_hash)
-               newdb->rwlocks = TDB1_HASH_RWLOCK_MAGIC;
-
-       memcpy(&tdb->tdb1.header, newdb, sizeof(tdb->tdb1.header));
-       /* This creates an endian-converted db. */
-       TDB1_CONV(*newdb);
-       /* Don't endian-convert the magic food! */
-       memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
-
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->file->map_size = size;
-               tdb->file->map_ptr = (char *)newdb;
-               return TDB_SUCCESS;
-       }
-       if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
-               ret = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                "tdb1_new_database: lseek failed");
-               goto fail;
-       }
-
-       if (ftruncate(tdb->file->fd, 0) == -1) {
-               ret = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                "tdb1_new_database: ftruncate failed");
-               goto fail;
-       }
-
-       if (!tdb1_write_all(tdb->file->fd, newdb, size)) {
-               ret = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                "tdb1_new_database: write failed");
-               goto fail;
-       }
-       ret = TDB_SUCCESS;
-
-  fail:
-       SAFE_FREE(newdb);
-       return ret;
-}
-
-typedef void (*tdb1_log_func)(struct tdb_context *, enum tdb_log_level, enum TDB_ERROR,
-                             const char *, void *);
-typedef uint64_t (*tdb1_hash_func)(const void *key, size_t len, uint64_t seed,
-                                  void *data);
-
-struct tdb1_logging_context {
-        tdb1_log_func log_fn;
-        void *log_private;
-};
-
-static bool hash_correct(struct tdb_context *tdb,
-                        uint32_t *m1, uint32_t *m2)
-{
-       /* older TDB without magic hash references */
-       if (tdb->tdb1.header.magic1_hash == 0
-           && tdb->tdb1.header.magic2_hash == 0) {
-               return true;
-       }
-
-       tdb1_header_hash(tdb, m1, m2);
-       return (tdb->tdb1.header.magic1_hash == *m1 &&
-               tdb->tdb1.header.magic2_hash == *m2);
-}
-
-static bool check_header_hash(struct tdb_context *tdb,
-                             uint32_t *m1, uint32_t *m2)
-{
-       if (hash_correct(tdb, m1, m2))
-               return true;
-
-       /* If they use one inbuilt, try the other inbuilt hash. */
-       if (tdb->hash_fn == tdb1_old_hash)
-               tdb->hash_fn = tdb1_incompatible_hash;
-       else if (tdb->hash_fn == tdb1_incompatible_hash)
-               tdb->hash_fn = tdb1_old_hash;
-       else
-               return false;
-       return hash_correct(tdb, m1, m2);
-}
-
-/* We are hold the TDB open lock on tdb->fd. */
-enum TDB_ERROR tdb1_open(struct tdb_context *tdb,
-                        struct tdb_attribute_tdb1_max_dead *max_dead)
-{
-       const char *hash_alg;
-       uint32_t magic1, magic2;
-
-       tdb->flags |= TDB_VERSION1;
-
-       tdb_context_init(tdb, max_dead);
-
-       /* Default TDB2 hash becomes default TDB1 hash. */
-       if (tdb->hash_fn == tdb_jenkins_hash) {
-               tdb->hash_fn = tdb1_old_hash;
-               hash_alg = "default";
-       } else if (tdb->hash_fn == tdb1_incompatible_hash)
-               hash_alg = "tdb1_incompatible_hash";
-       else
-               hash_alg = "the user defined";
-
-       if (tdb->tdb1.header.version != TDB1_BYTEREV(TDB1_VERSION)) {
-               if (tdb->flags & TDB_CONVERT) {
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb1_open:"
-                                         " %s does not need TDB_CONVERT",
-                                         tdb->name);
-               }
-       } else {
-               tdb->flags |= TDB_CONVERT;
-               tdb1_convert(&tdb->tdb1.header, sizeof(tdb->tdb1.header));
-       }
-
-       if (tdb->tdb1.header.rwlocks != 0 &&
-           tdb->tdb1.header.rwlocks != TDB1_HASH_RWLOCK_MAGIC) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb1_open: spinlocks no longer supported");
-       }
-
-       if (!check_header_hash(tdb, &magic1, &magic2)) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_USE_ERROR,
-                          "tdb1_open: "
-                          "%s was not created with %s hash function we are using\n"
-                          "magic1_hash[0x%08X %s 0x%08X] "
-                          "magic2_hash[0x%08X %s 0x%08X]",
-                          tdb->name, hash_alg,
-                          tdb->tdb1.header.magic1_hash,
-                          (tdb->tdb1.header.magic1_hash == magic1) ? "==" : "!=",
-                          magic1,
-                          tdb->tdb1.header.magic2_hash,
-                          (tdb->tdb1.header.magic2_hash == magic2) ? "==" : "!=",
-                          magic2);
-       }
-       return TDB_SUCCESS;
-}
diff --git a/ccan/tdb2/tdb1_private.h b/ccan/tdb2/tdb1_private.h
deleted file mode 100644 (file)
index cb22b9f..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-#ifndef CCAN_TDB2_TDB1_PRIVATE_H
-#define CCAN_TDB2_TDB1_PRIVATE_H
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library - private includes
-
-   Copyright (C) Andrew Tridgell              2005
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-
-#include <limits.h>
-
-/* #define TDB_TRACE 1 */
-#ifndef HAVE_GETPAGESIZE
-#define getpagesize() 0x2000
-#endif
-
-#ifndef __STRING
-#define __STRING(x)    #x
-#endif
-
-#ifndef __STRINGSTRING
-#define __STRINGSTRING(x) __STRING(x)
-#endif
-
-#ifndef __location__
-#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
-#endif
-
-#ifndef offsetof
-#define offsetof(t,f) ((unsigned int)&((t *)0)->f)
-#endif
-
-#define TDB1_VERSION (0x26011967 + 6)
-#define TDB1_MAGIC (0x26011999U)
-#define TDB1_FREE_MAGIC (~TDB1_MAGIC)
-#define TDB1_DEAD_MAGIC (0xFEE1DEAD)
-#define TDB1_RECOVERY_MAGIC (0xf53bc0e7U)
-#define TDB1_RECOVERY_INVALID_MAGIC (0x0)
-#define TDB1_HASH_RWLOCK_MAGIC (0xbad1a51U)
-#define TDB1_ALIGNMENT 4
-#define TDB1_DEFAULT_HASH_SIZE 131
-#define TDB1_FREELIST_TOP (sizeof(struct tdb1_header))
-#define TDB1_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
-#define TDB1_DEAD(r) ((r)->magic == TDB1_DEAD_MAGIC)
-#define TDB1_BAD_MAGIC(r) ((r)->magic != TDB1_MAGIC && !TDB1_DEAD(r))
-#define TDB1_HASH_TOP(hash) (TDB1_FREELIST_TOP + (TDB1_BUCKET(hash)+1)*sizeof(tdb1_off_t))
-#define TDB1_HASHTABLE_SIZE(tdb) ((tdb->tdb1.header.hash_size+1)*sizeof(tdb1_off_t))
-#define TDB1_DATA_START(hash_size) (TDB1_HASH_TOP(hash_size-1) + sizeof(tdb1_off_t))
-#define TDB1_RECOVERY_HEAD offsetof(struct tdb1_header, recovery_start)
-#define TDB1_SEQNUM_OFS    offsetof(struct tdb1_header, sequence_number)
-#define TDB1_PAD_BYTE 0x42
-#define TDB1_PAD_U32  0x42424242
-
-/* lock offsets */
-#define TDB1_OPEN_LOCK        0
-#define TDB1_ACTIVE_LOCK      4
-#define TDB1_TRANSACTION_LOCK 8
-
-/* free memory if the pointer is valid and zero the pointer */
-#ifndef SAFE_FREE
-#define SAFE_FREE(x) do { if ((x) != NULL) {free((void *)x); (x)=NULL;} } while(0)
-#endif
-
-#define TDB1_BUCKET(hash) ((hash) % tdb->tdb1.header.hash_size)
-
-#define TDB1_DOCONV() (tdb->flags & TDB_CONVERT)
-#define TDB1_CONV(x) (TDB1_DOCONV() ? tdb1_convert(&x, sizeof(x)) : &x)
-
-/* the body of the database is made of one tdb1_record for the free space
-   plus a separate data list for each hash value */
-struct tdb1_record {
-       tdb1_off_t next; /* offset of the next record in the list */
-       tdb1_len_t rec_len; /* total byte length of record */
-       tdb1_len_t key_len; /* byte length of key */
-       tdb1_len_t data_len; /* byte length of data */
-       uint32_t full_hash; /* the full 32 bit hash of the key */
-       uint32_t magic;   /* try to catch errors */
-       /* the following union is implied:
-               union {
-                       char record[rec_len];
-                       struct {
-                               char key[key_len];
-                               char data[data_len];
-                       }
-                       uint32_t totalsize; (tailer)
-               }
-       */
-};
-
-
-struct tdb1_methods {
-       int (*tdb1_read)(struct tdb_context *, tdb1_off_t , void *, tdb1_len_t , int );
-       int (*tdb1_write)(struct tdb_context *, tdb1_off_t, const void *, tdb1_len_t);
-       void (*next_hash_chain)(struct tdb_context *, uint32_t *);
-       int (*tdb1_oob)(struct tdb_context *, tdb1_off_t, tdb1_len_t, int );
-       int (*tdb1_expand_file)(struct tdb_context *, tdb1_off_t , tdb1_off_t );
-};
-
-
-/*
-  internal prototypes
-*/
-int tdb1_munmap(struct tdb_context *tdb);
-void tdb1_mmap(struct tdb_context *tdb);
-int tdb1_lock(struct tdb_context *tdb, int list, int ltype);
-int tdb1_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
-                 enum tdb_lock_flags flags);
-int tdb1_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype);
-int tdb1_unlock(struct tdb_context *tdb, int list, int ltype);
-int tdb1_brlock(struct tdb_context *tdb,
-              int rw_type, tdb1_off_t offset, size_t len,
-              enum tdb_lock_flags flags);
-int tdb1_brunlock(struct tdb_context *tdb,
-                int rw_type, tdb1_off_t offset, size_t len);
-bool tdb1_have_extra_locks(struct tdb_context *tdb);
-void tdb1_release_transaction_locks(struct tdb_context *tdb);
-int tdb1_transaction_lock(struct tdb_context *tdb, int ltype,
-                        enum tdb_lock_flags lockflags);
-int tdb1_transaction_unlock(struct tdb_context *tdb, int ltype);
-int tdb1_recovery_area(struct tdb_context *tdb,
-                     const struct tdb1_methods *methods,
-                     tdb1_off_t *recovery_offset,
-                     struct tdb1_record *rec);
-int tdb1_allrecord_upgrade(struct tdb_context *tdb);
-int tdb1_write_lock_record(struct tdb_context *tdb, tdb1_off_t off);
-int tdb1_write_unlock_record(struct tdb_context *tdb, tdb1_off_t off);
-int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d);
-int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d);
-void *tdb1_convert(void *buf, uint32_t size);
-int tdb1_free(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec);
-tdb1_off_t tdb1_allocate(struct tdb_context *tdb, tdb1_len_t length, struct tdb1_record *rec);
-int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d);
-int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d);
-int tdb1_lock_record(struct tdb_context *tdb, tdb1_off_t off);
-int tdb1_unlock_record(struct tdb_context *tdb, tdb1_off_t off);
-tdb_bool_err tdb1_needs_recovery(struct tdb_context *tdb);
-int tdb1_rec_read(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec);
-int tdb1_rec_write(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec);
-int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec);
-unsigned char *tdb1_alloc_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t len);
-enum TDB_ERROR tdb1_parse_data(struct tdb_context *tdb, TDB_DATA key,
-                              tdb1_off_t offset, tdb1_len_t len,
-                              enum TDB_ERROR (*parser)(TDB_DATA key,
-                                                       TDB_DATA data,
-                                                       void *private_data),
-                              void *private_data);
-tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
-                          struct tdb1_record *rec);
-void tdb1_io_init(struct tdb_context *tdb);
-int tdb1_expand(struct tdb_context *tdb, tdb1_off_t size);
-tdb1_off_t tdb1_expand_adjust(tdb1_off_t map_size, tdb1_off_t size, int page_size);
-int tdb1_rec_free_read(struct tdb_context *tdb, tdb1_off_t off,
-                     struct tdb1_record *rec);
-bool tdb1_write_all(int fd, const void *buf, size_t count);
-void tdb1_header_hash(struct tdb_context *tdb,
-                    uint32_t *magic1_hash, uint32_t *magic2_hash);
-uint64_t tdb1_old_hash(const void *key, size_t len, uint64_t seed, void *);
-size_t tdb1_dead_space(struct tdb_context *tdb, tdb1_off_t off);
-#endif /* CCAN_TDB2_TDB1_PRIVATE_H */
diff --git a/ccan/tdb2/tdb1_summary.c b/ccan/tdb2/tdb1_summary.c
deleted file mode 100644 (file)
index b74b8f4..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
- /*
-   Trivial Database: human-readable summary code
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "tdb1_private.h"
-
-#define SUMMARY_FORMAT1 \
-       "Size of file/data: %u/%zu\n" \
-       "Number of records: %zu\n" \
-       "Smallest/average/largest keys: %zu/%zu/%zu\n" \
-       "Smallest/average/largest data: %zu/%zu/%zu\n" \
-       "Smallest/average/largest padding: %zu/%zu/%zu\n" \
-       "Number of dead records: %zu\n" \
-       "Smallest/average/largest dead records: %zu/%zu/%zu\n" \
-       "Number of free records: %zu\n" \
-       "Smallest/average/largest free records: %zu/%zu/%zu\n" \
-       "Number of hash chains: %zu\n" \
-       "Smallest/average/largest hash chains: %zu/%zu/%zu\n" \
-       "Number of uncoalesced records: %zu\n" \
-       "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n" \
-       "Percentage keys/data/padding/free/dead/rechdrs&tailers/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
-
-/* We don't use tally module, to keep upstream happy. */
-struct tally {
-       size_t min, max, total;
-       size_t num;
-};
-
-static void tally1_init(struct tally *tally)
-{
-       tally->total = 0;
-       tally->num = 0;
-       tally->min = tally->max = 0;
-}
-
-static void tally1_add(struct tally *tally, size_t len)
-{
-       if (tally->num == 0)
-               tally->max = tally->min = len;
-       else if (len > tally->max)
-               tally->max = len;
-       else if (len < tally->min)
-               tally->min = len;
-       tally->num++;
-       tally->total += len;
-}
-
-static size_t tally1_mean(const struct tally *tally)
-{
-       if (!tally->num)
-               return 0;
-       return tally->total / tally->num;
-}
-
-static size_t get_hash_length(struct tdb_context *tdb, unsigned int i)
-{
-       tdb1_off_t rec_ptr;
-       size_t count = 0;
-
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(i), &rec_ptr) == -1)
-               return 0;
-
-       /* keep looking until we find the right record */
-       while (rec_ptr) {
-               struct tdb1_record r;
-               ++count;
-               if (tdb1_rec_read(tdb, rec_ptr, &r) == -1)
-                       return 0;
-               rec_ptr = r.next;
-       }
-       return count;
-}
-
-char *tdb1_summary(struct tdb_context *tdb)
-{
-       tdb1_off_t off, rec_off;
-       struct tally freet, keys, data, dead, extra, hash, uncoal;
-       struct tdb1_record rec;
-       char *ret = NULL;
-       bool locked;
-       size_t len, unc = 0;
-       struct tdb1_record recovery;
-
-       /* We may have a write lock already, so don't lock. */
-       if (tdb->file->allrecord_lock.count != 0) {
-               locked = false;
-       } else {
-               if (tdb_lockall_read(tdb) != TDB_SUCCESS)
-                       return NULL;
-               locked = true;
-       }
-
-       if (tdb1_recovery_area(tdb, tdb->tdb1.io, &rec_off, &recovery) != 0) {
-               goto unlock;
-       }
-
-       tally1_init(&freet);
-       tally1_init(&keys);
-       tally1_init(&data);
-       tally1_init(&dead);
-       tally1_init(&extra);
-       tally1_init(&hash);
-       tally1_init(&uncoal);
-
-       for (off = TDB1_DATA_START(tdb->tdb1.header.hash_size);
-            off < tdb->file->map_size - 1;
-            off += sizeof(rec) + rec.rec_len) {
-               if (tdb->tdb1.io->tdb1_read(tdb, off, &rec, sizeof(rec),
-                                          TDB1_DOCONV()) == -1)
-                       goto unlock;
-               switch (rec.magic) {
-               case TDB1_MAGIC:
-                       tally1_add(&keys, rec.key_len);
-                       tally1_add(&data, rec.data_len);
-                       tally1_add(&extra, rec.rec_len - (rec.key_len
-                                                        + rec.data_len));
-                       if (unc > 1)
-                               tally1_add(&uncoal, unc - 1);
-                       unc = 0;
-                       break;
-               case TDB1_FREE_MAGIC:
-                       tally1_add(&freet, rec.rec_len);
-                       unc++;
-                       break;
-               /* If we crash after ftruncate, we can get zeroes or fill. */
-               case TDB1_RECOVERY_INVALID_MAGIC:
-               case 0x42424242:
-                       unc++;
-                       /* If it's a valid recovery, we can trust rec_len. */
-                       if (off != rec_off) {
-                               rec.rec_len = tdb1_dead_space(tdb, off)
-                                       - sizeof(rec);
-                       }
-                       /* Fall through */
-               case TDB1_DEAD_MAGIC:
-                       tally1_add(&dead, rec.rec_len);
-                       break;
-               default:
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                               TDB_LOG_ERROR,
-                                               "Unexpected record magic 0x%x"
-                                               " at offset %d",
-                                               rec.magic, off);
-                       goto unlock;
-               }
-       }
-       if (unc > 1)
-               tally1_add(&uncoal, unc - 1);
-
-       for (off = 0; off < tdb->tdb1.header.hash_size; off++)
-               tally1_add(&hash, get_hash_length(tdb, off));
-
-       /* 20 is max length of a %zu. */
-       len = strlen(SUMMARY_FORMAT1) + 35*20 + 1;
-       ret = (char *)malloc(len);
-       if (!ret)
-               goto unlock;
-
-       snprintf(ret, len, SUMMARY_FORMAT1,
-                (tdb1_len_t)tdb->file->map_size, keys.total+data.total,
-                keys.num,
-                keys.min, tally1_mean(&keys), keys.max,
-                data.min, tally1_mean(&data), data.max,
-                extra.min, tally1_mean(&extra), extra.max,
-                dead.num,
-                dead.min, tally1_mean(&dead), dead.max,
-                freet.num,
-                freet.min, tally1_mean(&freet), freet.max,
-                hash.num,
-                hash.min, tally1_mean(&hash), hash.max,
-                uncoal.total,
-                uncoal.min, tally1_mean(&uncoal), uncoal.max,
-                keys.total * 100.0 / tdb->file->map_size,
-                data.total * 100.0 / tdb->file->map_size,
-                extra.total * 100.0 / tdb->file->map_size,
-                freet.total * 100.0 / tdb->file->map_size,
-                dead.total * 100.0 / tdb->file->map_size,
-                (keys.num + freet.num + dead.num)
-                * (sizeof(struct tdb1_record) + sizeof(uint32_t))
-                * 100.0 / tdb->file->map_size,
-                tdb->tdb1.header.hash_size * sizeof(tdb1_off_t)
-                * 100.0 / (tdb1_len_t)tdb->file->map_size);
-
-unlock:
-       if (locked) {
-               tdb_unlockall_read(tdb);
-       }
-       return ret;
-}
diff --git a/ccan/tdb2/tdb1_tdb.c b/ccan/tdb2/tdb1_tdb.c
deleted file mode 100644 (file)
index a220f47..0000000
+++ /dev/null
@@ -1,829 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb1_private.h"
-#include <assert.h>
-
-/*
-  non-blocking increment of the tdb sequence number if the tdb has been opened using
-  the TDB_SEQNUM flag
-*/
-void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
-{
-       tdb1_off_t seqnum=0;
-
-       if (!(tdb->flags & TDB_SEQNUM)) {
-               return;
-       }
-
-       /* we ignore errors from this, as we have no sane way of
-          dealing with them.
-       */
-       tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
-       seqnum++;
-       tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
-}
-
-/*
-  increment the tdb sequence number if the tdb has been opened using
-  the TDB_SEQNUM flag
-*/
-static void tdb1_increment_seqnum(struct tdb_context *tdb)
-{
-       if (!(tdb->flags & TDB_SEQNUM)) {
-               return;
-       }
-
-       if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
-                          TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
-               return;
-       }
-
-       tdb1_increment_seqnum_nonblock(tdb);
-
-       tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
-}
-
-static enum TDB_ERROR tdb1_key_compare(TDB_DATA key, TDB_DATA data,
-                                      void *matches_)
-{
-       bool *matches = matches_;
-       *matches = (memcmp(data.dptr, key.dptr, data.dsize) == 0);
-       return TDB_SUCCESS;
-}
-
-/* Returns 0 on fail; last_error will be TDB_ERR_NOEXIST if it simply
- * wasn't there, otherwise a real error.
- * On success, return offset of record, and fills in rec */
-static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
-                       struct tdb1_record *r)
-{
-       tdb1_off_t rec_ptr;
-
-       /* read in the hash top */
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
-               return 0;
-
-       /* keep looking until we find the right record */
-       while (rec_ptr) {
-               if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
-                       return 0;
-
-               tdb->stats.compares++;
-               if (TDB1_DEAD(r)) {
-                       tdb->stats.compare_wrong_bucket++;
-               } else if (key.dsize != r->key_len) {
-                       tdb->stats.compare_wrong_keylen++;
-               } else if (hash != r->full_hash) {
-                       tdb->stats.compare_wrong_rechash++;
-               } else {
-                       enum TDB_ERROR ecode;
-                       bool matches;
-                       ecode = tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
-                                               r->key_len, tdb1_key_compare,
-                                               &matches);
-
-                       if (ecode != TDB_SUCCESS) {
-                               tdb->last_error = ecode;
-                               return 0;
-                       }
-
-                       if (!matches) {
-                               tdb->stats.compare_wrong_keycmp++;
-                       } else {
-                               return rec_ptr;
-                       }
-               }
-               /* detect tight infinite loop */
-               if (rec_ptr == r->next) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                               TDB_LOG_ERROR,
-                                               "tdb1_find: loop detected.");
-                       return 0;
-               }
-               rec_ptr = r->next;
-       }
-       tdb->last_error = TDB_ERR_NOEXIST;
-       return 0;
-}
-
-/* As tdb1_find, but if you succeed, keep the lock */
-tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
-                          struct tdb1_record *rec)
-{
-       uint32_t rec_ptr;
-
-       if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
-               return 0;
-       if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
-               tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
-       return rec_ptr;
-}
-
-static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
-
-/* update an entry in place - this only works if the new data size
-   is <= the old data size and the key exists.
-   on failure return -1.
-*/
-static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
-{
-       struct tdb1_record rec;
-       tdb1_off_t rec_ptr;
-
-       /* find entry */
-       if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
-               return -1;
-
-       /* it could be an exact duplicate of what is there - this is
-        * surprisingly common (eg. with a ldb re-index). */
-       if (rec.key_len == key.dsize &&
-           rec.data_len == dbuf.dsize &&
-           rec.full_hash == hash) {
-               TDB_DATA data = _tdb1_fetch(tdb, key);
-               if (data.dsize == dbuf.dsize &&
-                   memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
-                       if (data.dptr) {
-                               free(data.dptr);
-                       }
-                       return 0;
-               }
-               if (data.dptr) {
-                       free(data.dptr);
-               }
-       }
-
-       /* must be long enough key, data and tailer */
-       if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
-               tdb->last_error = TDB_SUCCESS; /* Not really an error */
-               return -1;
-       }
-
-       if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
-                     dbuf.dptr, dbuf.dsize) == -1)
-               return -1;
-
-       if (dbuf.dsize != rec.data_len) {
-               /* update size */
-               rec.data_len = dbuf.dsize;
-               return tdb1_rec_write(tdb, rec_ptr, &rec);
-       }
-
-       return 0;
-}
-
-/* find an entry in the database given a key */
-/* If an entry doesn't exist tdb1_err will be set to
- * TDB_ERR_NOEXIST. If a key has no data attached
- * then the TDB_DATA will have zero length but
- * a non-zero pointer
- */
-static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
-{
-       tdb1_off_t rec_ptr;
-       struct tdb1_record rec;
-       TDB_DATA ret;
-       uint32_t hash;
-
-       /* find which hash bucket it is in */
-       hash = tdb_hash(tdb, key.dptr, key.dsize);
-       if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
-               ret.dptr = NULL;
-               ret.dsize = 0;
-               return ret;
-       }
-
-       ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
-                                 rec.data_len);
-       ret.dsize = rec.data_len;
-       tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
-       return ret;
-}
-
-enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
-{
-       *data = _tdb1_fetch(tdb, key);
-       if (data->dptr == NULL)
-               return tdb->last_error;
-       return TDB_SUCCESS;
-}
-
-enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
-                                enum TDB_ERROR (*parser)(TDB_DATA key,
-                                                         TDB_DATA data,
-                                                         void *private_data),
-                                void *private_data)
-{
-       tdb1_off_t rec_ptr;
-       struct tdb1_record rec;
-       enum TDB_ERROR ret;
-       uint32_t hash;
-
-       /* find which hash bucket it is in */
-       hash = tdb_hash(tdb, key.dptr, key.dsize);
-
-       if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
-               return tdb->last_error;
-       }
-
-       ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
-                            rec.data_len, parser, private_data);
-
-       tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
-
-       return ret;
-}
-
-/* check if an entry in the database exists
-
-   note that 1 is returned if the key is found and 0 is returned if not found
-   this doesn't match the conventions in the rest of this module, but is
-   compatible with gdbm
-*/
-static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
-{
-       struct tdb1_record rec;
-
-       if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
-               return 0;
-       tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
-       return 1;
-}
-
-int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
-{
-       uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
-       int ret;
-
-       assert(tdb->flags & TDB_VERSION1);
-       ret = tdb1_exists_hash(tdb, key, hash);
-       return ret;
-}
-
-/* actually delete an entry in the database given the offset */
-int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
-{
-       tdb1_off_t last_ptr, i;
-       struct tdb1_record lastrec;
-
-       if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
-
-       if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
-           tdb1_write_lock_record(tdb, rec_ptr) == -1) {
-               /* Someone traversing here: mark it as dead */
-               rec->magic = TDB1_DEAD_MAGIC;
-               return tdb1_rec_write(tdb, rec_ptr, rec);
-       }
-       if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
-               return -1;
-
-       /* find previous record in hash chain */
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
-               return -1;
-       for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
-               if (tdb1_rec_read(tdb, i, &lastrec) == -1)
-                       return -1;
-
-       /* unlink it: next ptr is at start of record. */
-       if (last_ptr == 0)
-               last_ptr = TDB1_HASH_TOP(rec->full_hash);
-       if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
-               return -1;
-
-       /* recover the space */
-       if (tdb1_free(tdb, rec_ptr, rec) == -1)
-               return -1;
-       return 0;
-}
-
-static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
-{
-       int res = 0;
-       tdb1_off_t rec_ptr;
-       struct tdb1_record rec;
-
-       /* read in the hash top */
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
-               return 0;
-
-       while (rec_ptr) {
-               if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
-                       return 0;
-
-               if (rec.magic == TDB1_DEAD_MAGIC) {
-                       res += 1;
-               }
-               rec_ptr = rec.next;
-       }
-       return res;
-}
-
-/*
- * Purge all DEAD records from a hash chain
- */
-static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
-{
-       int res = -1;
-       struct tdb1_record rec;
-       tdb1_off_t rec_ptr;
-
-       if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
-               return -1;
-       }
-
-       /* read in the hash top */
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
-               goto fail;
-
-       while (rec_ptr) {
-               tdb1_off_t next;
-
-               if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
-                       goto fail;
-               }
-
-               next = rec.next;
-
-               if (rec.magic == TDB1_DEAD_MAGIC
-                   && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
-                       goto fail;
-               }
-               rec_ptr = next;
-       }
-       res = 0;
- fail:
-       tdb1_unlock(tdb, -1, F_WRLCK);
-       return res;
-}
-
-/* delete an entry in the database given a key */
-static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
-{
-       tdb1_off_t rec_ptr;
-       struct tdb1_record rec;
-       int ret;
-
-       if (tdb->tdb1.max_dead_records != 0) {
-
-               /*
-                * Allow for some dead records per hash chain, mainly for
-                * tdb's with a very high create/delete rate like locking.tdb.
-                */
-
-               if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
-                       return -1;
-
-               if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
-                       /*
-                        * Don't let the per-chain freelist grow too large,
-                        * delete all existing dead records
-                        */
-                       tdb1_purge_dead(tdb, hash);
-               }
-
-               if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
-                       tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
-                       return -1;
-               }
-
-               /*
-                * Just mark the record as dead.
-                */
-               rec.magic = TDB1_DEAD_MAGIC;
-               ret = tdb1_rec_write(tdb, rec_ptr, &rec);
-       }
-       else {
-               if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
-                                                  &rec)))
-                       return -1;
-
-               ret = tdb1_do_delete(tdb, rec_ptr, &rec);
-       }
-
-       if (ret == 0) {
-               tdb1_increment_seqnum(tdb);
-       }
-
-       if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_delete: WARNING tdb1_unlock failed!");
-       return ret;
-}
-
-int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
-{
-       uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
-       int ret;
-
-       assert(tdb->flags & TDB_VERSION1);
-       ret = tdb1_delete_hash(tdb, key, hash);
-       return ret;
-}
-
-/*
- * See if we have a dead record around with enough space
- */
-static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
-                              struct tdb1_record *r, tdb1_len_t length)
-{
-       tdb1_off_t rec_ptr;
-
-       /* read in the hash top */
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
-               return 0;
-
-       /* keep looking until we find the right record */
-       while (rec_ptr) {
-               if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
-                       return 0;
-
-               if (TDB1_DEAD(r) && r->rec_len >= length) {
-                       /*
-                        * First fit for simple coding, TODO: change to best
-                        * fit
-                        */
-                       return rec_ptr;
-               }
-               rec_ptr = r->next;
-       }
-       return 0;
-}
-
-static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
-                      TDB_DATA dbuf, int flag, uint32_t hash)
-{
-       struct tdb1_record rec;
-       tdb1_off_t rec_ptr;
-       int ret = -1;
-
-       /* check for it existing, on insert. */
-       if (flag == TDB_INSERT) {
-               if (tdb1_exists_hash(tdb, key, hash)) {
-                       tdb->last_error = TDB_ERR_EXISTS;
-                       goto fail;
-               }
-               if (tdb->last_error != TDB_ERR_NOEXIST) {
-                       goto fail;
-               }
-       } else {
-               /* first try in-place update, on modify or replace. */
-               if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
-                       goto done;
-               }
-               if (tdb->last_error != TDB_SUCCESS) {
-                       if (tdb->last_error != TDB_ERR_NOEXIST) {
-                               goto fail;
-                       }
-                       if (flag == TDB_MODIFY) {
-                               /* if the record doesn't exist and we are in TDB1_MODIFY mode then
-                                  we should fail the store */
-                               goto fail;
-                       }
-               }
-       }
-       /* reset the error code potentially set by the tdb1_update() */
-       tdb->last_error = TDB_SUCCESS;
-
-       /* delete any existing record - if it doesn't exist we don't
-           care.  Doing this first reduces fragmentation, and avoids
-           coalescing with `allocated' block before it's updated. */
-       if (flag != TDB_INSERT)
-               tdb1_delete_hash(tdb, key, hash);
-
-       if (tdb->tdb1.max_dead_records != 0) {
-               /*
-                * Allow for some dead records per hash chain, look if we can
-                * find one that can hold the new record. We need enough space
-                * for key, data and tailer. If we find one, we don't have to
-                * consult the central freelist.
-                */
-               rec_ptr = tdb1_find_dead(
-                       tdb, hash, &rec,
-                       key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
-
-               if (rec_ptr != 0) {
-                       rec.key_len = key.dsize;
-                       rec.data_len = dbuf.dsize;
-                       rec.full_hash = hash;
-                       rec.magic = TDB1_MAGIC;
-                       if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
-                           || tdb->tdb1.io->tdb1_write(
-                                   tdb, rec_ptr + sizeof(rec),
-                                   key.dptr, key.dsize) == -1
-                           || tdb->tdb1.io->tdb1_write(
-                                   tdb, rec_ptr + sizeof(rec) + key.dsize,
-                                   dbuf.dptr, dbuf.dsize) == -1) {
-                               goto fail;
-                       }
-                       goto done;
-               }
-       }
-
-       /*
-        * We have to allocate some space from the freelist, so this means we
-        * have to lock it. Use the chance to purge all the DEAD records from
-        * the hash chain under the freelist lock.
-        */
-
-       if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
-               goto fail;
-       }
-
-       if ((tdb->tdb1.max_dead_records != 0)
-           && (tdb1_purge_dead(tdb, hash) == -1)) {
-               tdb1_unlock(tdb, -1, F_WRLCK);
-               goto fail;
-       }
-
-       /* we have to allocate some space */
-       rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
-
-       tdb1_unlock(tdb, -1, F_WRLCK);
-
-       if (rec_ptr == 0) {
-               goto fail;
-       }
-
-       /* Read hash top into next ptr */
-       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
-               goto fail;
-
-       rec.key_len = key.dsize;
-       rec.data_len = dbuf.dsize;
-       rec.full_hash = hash;
-       rec.magic = TDB1_MAGIC;
-
-       /* write out and point the top of the hash chain at it */
-       if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
-           || tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec),
-                                       key.dptr, key.dsize) == -1
-           || tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + key.dsize,
-                                       dbuf.dptr, dbuf.dsize) == -1
-           || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
-               /* Need to tdb1_unallocate() here */
-               goto fail;
-       }
-
- done:
-       ret = 0;
- fail:
-       if (ret == 0) {
-               tdb1_increment_seqnum(tdb);
-       }
-       return ret;
-}
-
-/* store an element in the database, replacing any existing element
-   with the same key
-
-   return 0 on success, -1 on failure
-*/
-int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
-{
-       uint32_t hash;
-       int ret;
-
-       assert(tdb->flags & TDB_VERSION1);
-
-       if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_store: read-only tdb");
-               return -1;
-       }
-
-       /* find which hash bucket it is in */
-       hash = tdb_hash(tdb, key.dptr, key.dsize);
-       if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
-               return -1;
-
-       ret = _tdb1_store(tdb, key, dbuf, flag, hash);
-       tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
-       return ret;
-}
-
-/* Append to an entry. Create if not exist. */
-int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
-{
-       uint32_t hash;
-       TDB_DATA dbuf;
-       int ret = -1;
-
-       assert(tdb->flags & TDB_VERSION1);
-
-       /* find which hash bucket it is in */
-       hash = tdb_hash(tdb, key.dptr, key.dsize);
-       if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
-               return -1;
-
-       dbuf = _tdb1_fetch(tdb, key);
-
-       if (dbuf.dptr == NULL) {
-               dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
-       } else {
-               unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
-               unsigned char *new_dptr;
-
-               /* realloc '0' is special: don't do that. */
-               if (new_len == 0)
-                       new_len = 1;
-               new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
-               if (new_dptr == NULL) {
-                       free(dbuf.dptr);
-               }
-               dbuf.dptr = new_dptr;
-       }
-
-       if (dbuf.dptr == NULL) {
-               tdb->last_error = TDB_ERR_OOM;
-               goto failed;
-       }
-
-       memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
-       dbuf.dsize += new_dbuf.dsize;
-
-       ret = _tdb1_store(tdb, key, dbuf, 0, hash);
-
-failed:
-       tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
-       SAFE_FREE(dbuf.dptr);
-       return ret;
-}
-
-
-/*
-  get the tdb sequence number. Only makes sense if the writers opened
-  with TDB1_SEQNUM set. Note that this sequence number will wrap quite
-  quickly, so it should only be used for a 'has something changed'
-  test, not for code that relies on the count of the number of changes
-  made. If you want a counter then use a tdb record.
-
-  The aim of this sequence number is to allow for a very lightweight
-  test of a possible tdb change.
-*/
-int tdb1_get_seqnum(struct tdb_context *tdb)
-{
-       tdb1_off_t seqnum=0;
-
-       tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
-       return seqnum;
-}
-
-
-/*
-  add a region of the file to the freelist. Length is the size of the region in bytes,
-  which includes the free list header that needs to be added
- */
-static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
-{
-       struct tdb1_record rec;
-       if (length <= sizeof(rec)) {
-               /* the region is not worth adding */
-               return 0;
-       }
-       if (length + offset > tdb->file->map_size) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "tdb1_free_region: adding region beyond"
-                                       " end of file");
-               return -1;
-       }
-       memset(&rec,'\0',sizeof(rec));
-       rec.rec_len = length - sizeof(rec);
-       if (tdb1_free(tdb, offset, &rec) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_free_region: failed to add free record");
-               return -1;
-       }
-       return 0;
-}
-
-/*
-  wipe the entire database, deleting all records. This can be done
-  very fast by using a allrecord lock. The entire data portion of the
-  file becomes a single entry in the freelist.
-
-  This code carefully steps around the recovery area, leaving it alone
- */
-int tdb1_wipe_all(struct tdb_context *tdb)
-{
-       int i;
-       tdb1_off_t offset = 0;
-       ssize_t data_len;
-       tdb1_off_t recovery_head;
-       tdb1_len_t recovery_size = 0;
-
-       if (tdb_lockall(tdb) != TDB_SUCCESS) {
-               return -1;
-       }
-
-
-       /* see if the tdb has a recovery area, and remember its size
-          if so. We don't want to lose this as otherwise each
-          tdb1_wipe_all() in a transaction will increase the size of
-          the tdb by the size of the recovery area */
-       if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_wipe_all: failed to read recovery head");
-               goto failed;
-       }
-
-       if (recovery_head != 0) {
-               struct tdb1_record rec;
-               if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_wipe_all: failed to read recovery record");
-                       return -1;
-               }
-               recovery_size = rec.rec_len + sizeof(rec);
-       }
-
-       /* wipe the hashes */
-       for (i=0;i<tdb->tdb1.header.hash_size;i++) {
-               if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_wipe_all: failed to write hash %d", i);
-                       goto failed;
-               }
-       }
-
-       /* wipe the freelist */
-       if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_wipe_all: failed to write freelist");
-               goto failed;
-       }
-
-       /* add all the rest of the file to the freelist, possibly leaving a gap
-          for the recovery area */
-       if (recovery_size == 0) {
-               /* the simple case - the whole file can be used as a freelist */
-               data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
-               if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
-                       goto failed;
-               }
-       } else {
-               /* we need to add two freelist entries - one on either
-                  side of the recovery area
-
-                  Note that we cannot shift the recovery area during
-                  this operation. Only the transaction.c code may
-                  move the recovery area or we risk subtle data
-                  corruption
-               */
-               data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
-               if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
-                       goto failed;
-               }
-               /* and the 2nd free list entry after the recovery area - if any */
-               data_len = tdb->file->map_size - (recovery_head+recovery_size);
-               if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
-                       goto failed;
-               }
-       }
-
-       tdb1_increment_seqnum_nonblock(tdb);
-       tdb_unlockall(tdb);
-       return 0;
-
-failed:
-       tdb_unlockall(tdb);
-       return -1;
-}
-
-/* Even on files, we can get partial writes due to signals. */
-bool tdb1_write_all(int fd, const void *buf, size_t count)
-{
-       while (count) {
-               ssize_t ret;
-               ret = write(fd, buf, count);
-               if (ret < 0)
-                       return false;
-               buf = (const char *)buf + ret;
-               count -= ret;
-       }
-       return true;
-}
diff --git a/ccan/tdb2/tdb1_transaction.c b/ccan/tdb2/tdb1_transaction.c
deleted file mode 100644 (file)
index 9cb9523..0000000
+++ /dev/null
@@ -1,1339 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              2005
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb1_private.h"
-
-/*
-  transaction design:
-
-  - only allow a single transaction at a time per database. This makes
-    using the transaction API simpler, as otherwise the caller would
-    have to cope with temporary failures in transactions that conflict
-    with other current transactions
-
-  - keep the transaction recovery information in the same file as the
-    database, using a special 'transaction recovery' record pointed at
-    by the header. This removes the need for extra journal files as
-    used by some other databases
-
-  - dynamically allocated the transaction recover record, re-using it
-    for subsequent transactions. If a larger record is needed then
-    tdb1_free() the old record to place it on the normal tdb freelist
-    before allocating the new record
-
-  - during transactions, keep a linked list of writes all that have
-    been performed by intercepting all tdb1_write() calls. The hooked
-    transaction versions of tdb1_read() and tdb1_write() check this
-    linked list and try to use the elements of the list in preference
-    to the real database.
-
-  - don't allow any locks to be held when a transaction starts,
-    otherwise we can end up with deadlock (plus lack of lock nesting
-    in posix locks would mean the lock is lost)
-
-  - if the caller gains a lock during the transaction but doesn't
-    release it then fail the commit
-
-  - allow for nested calls to tdb1_transaction_start(), re-using the
-    existing transaction record. If the inner transaction is cancelled
-    then a subsequent commit will fail
-
-  - keep a mirrored copy of the tdb hash chain heads to allow for the
-    fast hash heads scan on traverse, updating the mirrored copy in
-    the transaction version of tdb1_write
-
-  - allow callers to mix transaction and non-transaction use of tdb,
-    although once a transaction is started then an exclusive lock is
-    gained until the transaction is committed or cancelled
-
-  - the commit stategy involves first saving away all modified data
-    into a linearised buffer in the transaction recovery area, then
-    marking the transaction recovery area with a magic value to
-    indicate a valid recovery record. In total 4 fsync/msync calls are
-    needed per commit to prevent race conditions. It might be possible
-    to reduce this to 3 or even 2 with some more work.
-
-  - check for a valid recovery record on open of the tdb, while the
-    open lock is held. Automatically recover from the transaction
-    recovery area if needed, then continue with the open as
-    usual. This allows for smooth crash recovery with no administrator
-    intervention.
-
-  - if TDB_NOSYNC is passed to flags in tdb1_open then transactions are
-    still available, but no transaction recovery area is used and no
-    fsync/msync calls are made.
-
-  - if TDB_ALLOW_NESTING is passed to flags in tdb open, or added using
-    tdb1_add_flags() transaction nesting is enabled.
-    The default is that transaction nesting is NOT allowed.
-
-    Beware. when transactions are nested a transaction successfully
-    completed with tdb1_transaction_commit() can be silently unrolled later.
-*/
-
-
-/*
-  hold the context of any current transaction
-*/
-struct tdb1_transaction {
-       /* we keep a mirrored copy of the tdb hash heads here so
-          tdb1_next_hash_chain() can operate efficiently */
-       uint32_t *hash_heads;
-
-       /* the original io methods - used to do IOs to the real db */
-       const struct tdb1_methods *io_methods;
-
-       /* the list of transaction blocks. When a block is first
-          written to, it gets created in this list */
-       uint8_t **blocks;
-       uint32_t num_blocks;
-       uint32_t block_size;      /* bytes in each block */
-       uint32_t last_block_size; /* number of valid bytes in the last block */
-
-       /* non-zero when an internal transaction error has
-          occurred. All write operations will then fail until the
-          transaction is ended */
-       int transaction_error;
-
-       /* when inside a transaction we need to keep track of any
-          nested tdb1_transaction_start() calls, as these are allowed,
-          but don't create a new transaction */
-       int nesting;
-
-       /* set when a prepare has already occurred */
-       bool prepared;
-       tdb1_off_t magic_offset;
-
-       /* old file size before transaction */
-       tdb1_len_t old_map_size;
-
-       /* did we expand in this transaction */
-       bool expanded;
-};
-
-
-/*
-  read while in a transaction. We need to check first if the data is in our list
-  of transaction elements, then if not do a real read
-*/
-static int transaction1_read(struct tdb_context *tdb, tdb1_off_t off, void *buf,
-                            tdb1_len_t len, int cv)
-{
-       uint32_t blk;
-
-       /* break it down into block sized ops */
-       while (len + (off % tdb->tdb1.transaction->block_size) > tdb->tdb1.transaction->block_size) {
-               tdb1_len_t len2 = tdb->tdb1.transaction->block_size - (off % tdb->tdb1.transaction->block_size);
-               if (transaction1_read(tdb, off, buf, len2, cv) != 0) {
-                       return -1;
-               }
-               len -= len2;
-               off += len2;
-               buf = (void *)(len2 + (char *)buf);
-       }
-
-       if (len == 0) {
-               return 0;
-       }
-
-       blk = off / tdb->tdb1.transaction->block_size;
-
-       /* see if we have it in the block list */
-       if (tdb->tdb1.transaction->num_blocks <= blk ||
-           tdb->tdb1.transaction->blocks[blk] == NULL) {
-               /* nope, do a real read */
-               if (tdb->tdb1.transaction->io_methods->tdb1_read(tdb, off, buf, len, cv) != 0) {
-                       goto fail;
-               }
-               return 0;
-       }
-
-       /* it is in the block list. Now check for the last block */
-       if (blk == tdb->tdb1.transaction->num_blocks-1) {
-               if (len > tdb->tdb1.transaction->last_block_size) {
-                       goto fail;
-               }
-       }
-
-       /* now copy it out of this block */
-       memcpy(buf, tdb->tdb1.transaction->blocks[blk] + (off % tdb->tdb1.transaction->block_size), len);
-       if (cv) {
-               tdb1_convert(buf, len);
-       }
-       return 0;
-
-fail:
-       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                               "transaction_read: failed at off=%d len=%d",
-                               off, len);
-       tdb->tdb1.transaction->transaction_error = 1;
-       return -1;
-}
-
-
-/*
-  write while in a transaction
-*/
-static int transaction1_write(struct tdb_context *tdb, tdb1_off_t off,
-                            const void *buf, tdb1_len_t len)
-{
-       uint32_t blk;
-
-       /* Only a commit is allowed on a prepared transaction */
-       if (tdb->tdb1.transaction->prepared) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                       "transaction_write: transaction already"
-                                       " prepared, write not allowed");
-               tdb->tdb1.transaction->transaction_error = 1;
-               return -1;
-       }
-
-       /* if the write is to a hash head, then update the transaction
-          hash heads */
-       if (len == sizeof(tdb1_off_t) && off >= TDB1_FREELIST_TOP &&
-           off < TDB1_FREELIST_TOP+TDB1_HASHTABLE_SIZE(tdb)) {
-               uint32_t chain = (off-TDB1_FREELIST_TOP) / sizeof(tdb1_off_t);
-               memcpy(&tdb->tdb1.transaction->hash_heads[chain], buf, len);
-       }
-
-       /* break it up into block sized chunks */
-       while (len + (off % tdb->tdb1.transaction->block_size) > tdb->tdb1.transaction->block_size) {
-               tdb1_len_t len2 = tdb->tdb1.transaction->block_size - (off % tdb->tdb1.transaction->block_size);
-               if (transaction1_write(tdb, off, buf, len2) != 0) {
-                       return -1;
-               }
-               len -= len2;
-               off += len2;
-               if (buf != NULL) {
-                       buf = (const void *)(len2 + (const char *)buf);
-               }
-       }
-
-       if (len == 0) {
-               return 0;
-       }
-
-       blk = off / tdb->tdb1.transaction->block_size;
-       off = off % tdb->tdb1.transaction->block_size;
-
-       if (tdb->tdb1.transaction->num_blocks <= blk) {
-               uint8_t **new_blocks;
-               /* expand the blocks array */
-               if (tdb->tdb1.transaction->blocks == NULL) {
-                       new_blocks = (uint8_t **)malloc(
-                               (blk+1)*sizeof(uint8_t *));
-               } else {
-                       new_blocks = (uint8_t **)realloc(
-                               tdb->tdb1.transaction->blocks,
-                               (blk+1)*sizeof(uint8_t *));
-               }
-               if (new_blocks == NULL) {
-                       tdb->last_error = TDB_ERR_OOM;
-                       goto fail;
-               }
-               memset(&new_blocks[tdb->tdb1.transaction->num_blocks], 0,
-                      (1+(blk - tdb->tdb1.transaction->num_blocks))*sizeof(uint8_t *));
-               tdb->tdb1.transaction->blocks = new_blocks;
-               tdb->tdb1.transaction->num_blocks = blk+1;
-               tdb->tdb1.transaction->last_block_size = 0;
-       }
-
-       /* allocate and fill a block? */
-       if (tdb->tdb1.transaction->blocks[blk] == NULL) {
-               tdb->tdb1.transaction->blocks[blk] = (uint8_t *)calloc(tdb->tdb1.transaction->block_size, 1);
-               if (tdb->tdb1.transaction->blocks[blk] == NULL) {
-                       tdb->last_error = TDB_ERR_OOM;
-                       tdb->tdb1.transaction->transaction_error = 1;
-                       return -1;
-               }
-               if (tdb->tdb1.transaction->old_map_size > blk * tdb->tdb1.transaction->block_size) {
-                       tdb1_len_t len2 = tdb->tdb1.transaction->block_size;
-                       if (len2 + (blk * tdb->tdb1.transaction->block_size) > tdb->tdb1.transaction->old_map_size) {
-                               len2 = tdb->tdb1.transaction->old_map_size - (blk * tdb->tdb1.transaction->block_size);
-                       }
-                       if (tdb->tdb1.transaction->io_methods->tdb1_read(tdb, blk * tdb->tdb1.transaction->block_size,
-                                                                  tdb->tdb1.transaction->blocks[blk],
-                                                                  len2, 0) != 0) {
-                               SAFE_FREE(tdb->tdb1.transaction->blocks[blk]);
-                               tdb->last_error = TDB_ERR_IO;
-                               goto fail;
-                       }
-                       if (blk == tdb->tdb1.transaction->num_blocks-1) {
-                               tdb->tdb1.transaction->last_block_size = len2;
-                       }
-               }
-       }
-
-       /* overwrite part of an existing block */
-       if (buf == NULL) {
-               memset(tdb->tdb1.transaction->blocks[blk] + off, 0, len);
-       } else {
-               memcpy(tdb->tdb1.transaction->blocks[blk] + off, buf, len);
-       }
-       if (blk == tdb->tdb1.transaction->num_blocks-1) {
-               if (len + off > tdb->tdb1.transaction->last_block_size) {
-                       tdb->tdb1.transaction->last_block_size = len + off;
-               }
-       }
-
-       return 0;
-
-fail:
-       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                  "transaction_write: failed at off=%d len=%d",
-                  (blk*tdb->tdb1.transaction->block_size) + off, len);
-       tdb->tdb1.transaction->transaction_error = 1;
-       return -1;
-}
-
-
-/*
-  write while in a transaction - this varient never expands the transaction blocks, it only
-  updates existing blocks. This means it cannot change the recovery size
-*/
-static int transaction1_write_existing(struct tdb_context *tdb, tdb1_off_t off,
-                                     const void *buf, tdb1_len_t len)
-{
-       uint32_t blk;
-
-       /* break it up into block sized chunks */
-       while (len + (off % tdb->tdb1.transaction->block_size) > tdb->tdb1.transaction->block_size) {
-               tdb1_len_t len2 = tdb->tdb1.transaction->block_size - (off % tdb->tdb1.transaction->block_size);
-               if (transaction1_write_existing(tdb, off, buf, len2) != 0) {
-                       return -1;
-               }
-               len -= len2;
-               off += len2;
-               if (buf != NULL) {
-                       buf = (const void *)(len2 + (const char *)buf);
-               }
-       }
-
-       if (len == 0) {
-               return 0;
-       }
-
-       blk = off / tdb->tdb1.transaction->block_size;
-       off = off % tdb->tdb1.transaction->block_size;
-
-       if (tdb->tdb1.transaction->num_blocks <= blk ||
-           tdb->tdb1.transaction->blocks[blk] == NULL) {
-               return 0;
-       }
-
-       if (blk == tdb->tdb1.transaction->num_blocks-1 &&
-           off + len > tdb->tdb1.transaction->last_block_size) {
-               if (off >= tdb->tdb1.transaction->last_block_size) {
-                       return 0;
-               }
-               len = tdb->tdb1.transaction->last_block_size - off;
-       }
-
-       /* overwrite part of an existing block */
-       memcpy(tdb->tdb1.transaction->blocks[blk] + off, buf, len);
-
-       return 0;
-}
-
-
-/*
-  accelerated hash chain head search, using the cached hash heads
-*/
-static void transaction1_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
-{
-       uint32_t h = *chain;
-       for (;h < tdb->tdb1.header.hash_size;h++) {
-               /* the +1 takes account of the freelist */
-               if (0 != tdb->tdb1.transaction->hash_heads[h+1]) {
-                       break;
-               }
-       }
-       (*chain) = h;
-}
-
-/*
-  out of bounds check during a transaction
-*/
-static int transaction1_oob(struct tdb_context *tdb, tdb1_off_t off, tdb1_off_t len, int probe)
-{
-       if (off + len >= off && off + len <= tdb->file->map_size) {
-               return 0;
-       }
-       tdb->last_error = TDB_ERR_IO;
-       return -1;
-}
-
-/*
-  transaction version of tdb1_expand().
-*/
-static int transaction1_expand_file(struct tdb_context *tdb, tdb1_off_t size,
-                                   tdb1_off_t addition)
-{
-       /* add a write to the transaction elements, so subsequent
-          reads see the zero data */
-       if (transaction1_write(tdb, size, NULL, addition) != 0) {
-               return -1;
-       }
-
-       tdb->tdb1.transaction->expanded = true;
-
-       return 0;
-}
-
-static const struct tdb1_methods transaction1_methods = {
-       transaction1_read,
-       transaction1_write,
-       transaction1_next_hash_chain,
-       transaction1_oob,
-       transaction1_expand_file,
-};
-
-
-/*
-  start a tdb transaction. No token is returned, as only a single
-  transaction is allowed to be pending per tdb_context
-*/
-static int _tdb1_transaction_start(struct tdb_context *tdb)
-{
-       /* some sanity checks */
-       if (tdb->flags & TDB_INTERNAL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb1_transaction_start:"
-                                            " cannot start a"
-                                            " transaction on an"
-                                            " internal tdb");
-               return -1;
-       }
-
-       if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_transaction_start:"
-                                            " cannot start a"
-                                            " transaction on a "
-                                            " read-only tdb");
-               return -1;
-       }
-
-       /* cope with nested tdb1_transaction_start() calls */
-       if (tdb->tdb1.transaction != NULL) {
-               if (!(tdb->flags & TDB_ALLOW_NESTING)) {
-                       tdb->last_error
-                               = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_transaction_start:"
-                                            " already inside transaction");
-                       return -1;
-               }
-               tdb->stats.transaction_nest++;
-               tdb->tdb1.transaction->nesting++;
-               return 0;
-       }
-
-       if (tdb1_have_extra_locks(tdb)) {
-               /* the caller must not have any locks when starting a
-                  transaction as otherwise we'll be screwed by lack
-                  of nested locks in posix */
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_start: cannot start a"
-                                       " transaction with locks held");
-               return -1;
-       }
-
-       if (tdb->tdb1.travlocks.next != NULL) {
-               /* you cannot use transactions inside a traverse (although you can use
-                  traverse inside a transaction) as otherwise you can end up with
-                  deadlock */
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_start: cannot start a"
-                                       " transaction within a traverse");
-               return -1;
-       }
-
-       tdb->tdb1.transaction = (struct tdb1_transaction *)
-               calloc(sizeof(struct tdb1_transaction), 1);
-       if (tdb->tdb1.transaction == NULL) {
-               tdb->last_error = TDB_ERR_OOM;
-               return -1;
-       }
-
-       /* a page at a time seems like a reasonable compromise between compactness and efficiency */
-       tdb->tdb1.transaction->block_size = tdb->tdb1.page_size;
-
-       /* get the transaction write lock. This is a blocking lock. As
-          discussed with Volker, there are a number of ways we could
-          make this async, which we will probably do in the future */
-       if (tdb1_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT) == -1) {
-               SAFE_FREE(tdb->tdb1.transaction->blocks);
-               SAFE_FREE(tdb->tdb1.transaction);
-               return -1;
-       }
-
-       /* get a read lock from the freelist to the end of file. This
-          is upgraded to a write lock during the commit */
-       if (tdb1_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true) == -1) {
-               if (errno != EAGAIN && errno != EINTR) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_start:"
-                                  " failed to get hash locks");
-               }
-               goto fail_allrecord_lock;
-       }
-
-       /* setup a copy of the hash table heads so the hash scan in
-          traverse can be fast */
-       tdb->tdb1.transaction->hash_heads = (uint32_t *)
-               calloc(tdb->tdb1.header.hash_size+1, sizeof(uint32_t));
-       if (tdb->tdb1.transaction->hash_heads == NULL) {
-               tdb->last_error = TDB_ERR_OOM;
-               goto fail;
-       }
-       if (tdb->tdb1.io->tdb1_read(tdb, TDB1_FREELIST_TOP, tdb->tdb1.transaction->hash_heads,
-                                  TDB1_HASHTABLE_SIZE(tdb), 0) != 0) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_start: failed to read hash heads");
-               goto fail;
-       }
-
-       /* make sure we know about any file expansions already done by
-          anyone else */
-       tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
-       tdb->tdb1.transaction->old_map_size = tdb->file->map_size;
-
-       /* finally hook the io methods, replacing them with
-          transaction specific methods */
-       tdb->tdb1.transaction->io_methods = tdb->tdb1.io;
-       tdb->tdb1.io = &transaction1_methods;
-
-       tdb->stats.transactions++;
-       return 0;
-
-fail:
-       tdb1_allrecord_unlock(tdb, F_RDLCK);
-fail_allrecord_lock:
-       tdb1_transaction_unlock(tdb, F_WRLCK);
-       SAFE_FREE(tdb->tdb1.transaction->blocks);
-       SAFE_FREE(tdb->tdb1.transaction->hash_heads);
-       SAFE_FREE(tdb->tdb1.transaction);
-       return -1;
-}
-
-int tdb1_transaction_start(struct tdb_context *tdb)
-{
-       return _tdb1_transaction_start(tdb);
-}
-
-/*
-  sync to disk
-*/
-static int transaction1_sync(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t length)
-{
-       if (tdb->flags & TDB_NOSYNC) {
-               return 0;
-       }
-
-#if HAVE_FDATASYNC
-       if (fdatasync(tdb->file->fd) != 0) {
-#else
-       if (fsync(tdb->file->fd) != 0) {
-#endif
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                       "tdb1_transaction: fsync failed");
-               return -1;
-       }
-#if HAVE_MMAP
-       if (tdb->file->map_ptr) {
-               tdb1_off_t moffset = offset & ~(tdb->tdb1.page_size-1);
-               if (msync(moffset + (char *)tdb->file->map_ptr,
-                         length + (offset - moffset), MS_SYNC) != 0) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "tdb1_transaction:"
-                                               " msync failed - %s",
-                                               strerror(errno));
-                       return -1;
-               }
-       }
-#endif
-       return 0;
-}
-
-
-static int _tdb1_transaction_cancel(struct tdb_context *tdb)
-{
-       int i, ret = 0;
-
-       if (tdb->tdb1.transaction == NULL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_cancel:"
-                                       " no transaction");
-               return -1;
-       }
-
-       if (tdb->tdb1.transaction->nesting != 0) {
-               tdb->tdb1.transaction->transaction_error = 1;
-               tdb->tdb1.transaction->nesting--;
-               return 0;
-       }
-
-       tdb->file->map_size = tdb->tdb1.transaction->old_map_size;
-
-       /* free all the transaction blocks */
-       for (i=0;i<tdb->tdb1.transaction->num_blocks;i++) {
-               if (tdb->tdb1.transaction->blocks[i] != NULL) {
-                       free(tdb->tdb1.transaction->blocks[i]);
-               }
-       }
-       SAFE_FREE(tdb->tdb1.transaction->blocks);
-
-       if (tdb->tdb1.transaction->magic_offset) {
-               const struct tdb1_methods *methods = tdb->tdb1.transaction->io_methods;
-               const uint32_t invalid = TDB1_RECOVERY_INVALID_MAGIC;
-
-               /* remove the recovery marker */
-               if (methods->tdb1_write(tdb, tdb->tdb1.transaction->magic_offset, &invalid, 4) == -1 ||
-               transaction1_sync(tdb, tdb->tdb1.transaction->magic_offset, 4) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_cancel: failed to"
-                                  " remove recovery magic");
-                       ret = -1;
-               }
-       }
-
-       /* This also removes the OPEN_LOCK, if we have it. */
-       tdb1_release_transaction_locks(tdb);
-
-       /* restore the normal io methods */
-       tdb->tdb1.io = tdb->tdb1.transaction->io_methods;
-
-       SAFE_FREE(tdb->tdb1.transaction->hash_heads);
-       SAFE_FREE(tdb->tdb1.transaction);
-
-       return ret;
-}
-
-/*
-  cancel the current transaction
-*/
-int tdb1_transaction_cancel(struct tdb_context *tdb)
-{
-       tdb->stats.transaction_cancel++;
-       return _tdb1_transaction_cancel(tdb);
-}
-
-/*
-  work out how much space the linearised recovery data will consume
-*/
-static tdb1_len_t tdb1_recovery_size(struct tdb_context *tdb)
-{
-       tdb1_len_t recovery_size = 0;
-       int i;
-
-       recovery_size = sizeof(uint32_t);
-       for (i=0;i<tdb->tdb1.transaction->num_blocks;i++) {
-               if (i * tdb->tdb1.transaction->block_size >= tdb->tdb1.transaction->old_map_size) {
-                       break;
-               }
-               if (tdb->tdb1.transaction->blocks[i] == NULL) {
-                       continue;
-               }
-               recovery_size += 2*sizeof(tdb1_off_t);
-               if (i == tdb->tdb1.transaction->num_blocks-1) {
-                       recovery_size += tdb->tdb1.transaction->last_block_size;
-               } else {
-                       recovery_size += tdb->tdb1.transaction->block_size;
-               }
-       }
-
-       return recovery_size;
-}
-
-int tdb1_recovery_area(struct tdb_context *tdb,
-                     const struct tdb1_methods *methods,
-                     tdb1_off_t *recovery_offset,
-                     struct tdb1_record *rec)
-{
-       if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, recovery_offset) == -1) {
-               return -1;
-       }
-
-       if (*recovery_offset == 0) {
-               rec->rec_len = 0;
-               return 0;
-       }
-
-       if (methods->tdb1_read(tdb, *recovery_offset, rec, sizeof(*rec),
-                             TDB1_DOCONV()) == -1) {
-               return -1;
-       }
-
-       /* ignore invalid recovery regions: can happen in crash */
-       if (rec->magic != TDB1_RECOVERY_MAGIC &&
-           rec->magic != TDB1_RECOVERY_INVALID_MAGIC) {
-               *recovery_offset = 0;
-               rec->rec_len = 0;
-       }
-       return 0;
-}
-
-/*
-  allocate the recovery area, or use an existing recovery area if it is
-  large enough
-*/
-static int tdb1_recovery_allocate(struct tdb_context *tdb,
-                                tdb1_len_t *recovery_size,
-                                tdb1_off_t *recovery_offset,
-                                tdb1_len_t *recovery_max_size)
-{
-       struct tdb1_record rec;
-       const struct tdb1_methods *methods = tdb->tdb1.transaction->io_methods;
-       tdb1_off_t recovery_head;
-
-       if (tdb1_recovery_area(tdb, methods, &recovery_head, &rec) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_recovery_allocate:"
-                          " failed to read recovery head");
-               return -1;
-       }
-
-       *recovery_size = tdb1_recovery_size(tdb);
-
-       if (recovery_head != 0 && *recovery_size <= rec.rec_len) {
-               /* it fits in the existing area */
-               *recovery_max_size = rec.rec_len;
-               *recovery_offset = recovery_head;
-               return 0;
-       }
-
-       /* we need to free up the old recovery area, then allocate a
-          new one at the end of the file. Note that we cannot use
-          tdb1_allocate() to allocate the new one as that might return
-          us an area that is being currently used (as of the start of
-          the transaction) */
-       if (recovery_head != 0) {
-               if (tdb1_free(tdb, recovery_head, &rec) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_recovery_allocate: failed to free"
-                                  " previous recovery area");
-                       return -1;
-               }
-       }
-
-       /* the tdb1_free() call might have increased the recovery size */
-       *recovery_size = tdb1_recovery_size(tdb);
-
-       /* round up to a multiple of page size */
-       *recovery_max_size = tdb1_expand_adjust(tdb->file->map_size,
-                                              *recovery_size,
-                                              tdb->tdb1.page_size)
-               - sizeof(rec);
-
-       *recovery_offset = tdb->file->map_size;
-       recovery_head = *recovery_offset;
-
-       if (methods->tdb1_expand_file(tdb, tdb->tdb1.transaction->old_map_size,
-                                    (tdb->file->map_size - tdb->tdb1.transaction->old_map_size) +
-                                    sizeof(rec) + *recovery_max_size) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_recovery_allocate:"
-                          " failed to create recovery area");
-               return -1;
-       }
-       tdb->stats.transaction_expand_file++;
-
-       /* remap the file (if using mmap) */
-       methods->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
-
-       /* we have to reset the old map size so that we don't try to expand the file
-          again in the transaction commit, which would destroy the recovery area */
-       tdb->tdb1.transaction->old_map_size = tdb->file->map_size;
-
-       /* write the recovery header offset and sync - we can sync without a race here
-          as the magic ptr in the recovery record has not been set */
-       TDB1_CONV(recovery_head);
-       if (methods->tdb1_write(tdb, TDB1_RECOVERY_HEAD,
-                              &recovery_head, sizeof(tdb1_off_t)) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_recovery_allocate:"
-                          " failed to write recovery head");
-               return -1;
-       }
-       if (transaction1_write_existing(tdb, TDB1_RECOVERY_HEAD, &recovery_head, sizeof(tdb1_off_t)) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_recovery_allocate:"
-                          " failed to write recovery head");
-               return -1;
-       }
-
-       return 0;
-}
-
-
-/*
-  setup the recovery data that will be used on a crash during commit
-*/
-static int transaction1_setup_recovery(struct tdb_context *tdb,
-                                      tdb1_off_t *magic_offset)
-{
-       tdb1_len_t recovery_size;
-       unsigned char *data, *p;
-       const struct tdb1_methods *methods = tdb->tdb1.transaction->io_methods;
-       struct tdb1_record *rec;
-       tdb1_off_t recovery_offset, recovery_max_size;
-       tdb1_off_t old_map_size = tdb->tdb1.transaction->old_map_size;
-       uint32_t magic, tailer;
-       int i;
-
-       /*
-         check that the recovery area has enough space
-       */
-       if (tdb1_recovery_allocate(tdb, &recovery_size,
-                                 &recovery_offset, &recovery_max_size) == -1) {
-               return -1;
-       }
-
-       data = (unsigned char *)malloc(recovery_size + sizeof(*rec));
-       if (data == NULL) {
-               tdb->last_error = TDB_ERR_OOM;
-               return -1;
-       }
-
-       rec = (struct tdb1_record *)data;
-       memset(rec, 0, sizeof(*rec));
-
-       rec->magic    = TDB1_RECOVERY_INVALID_MAGIC;
-       rec->data_len = recovery_size;
-       rec->rec_len  = recovery_max_size;
-       rec->key_len  = old_map_size;
-       TDB1_CONV(*rec);
-
-       /* build the recovery data into a single blob to allow us to do a single
-          large write, which should be more efficient */
-       p = data + sizeof(*rec);
-       for (i=0;i<tdb->tdb1.transaction->num_blocks;i++) {
-               tdb1_off_t offset;
-               tdb1_len_t length;
-
-               if (tdb->tdb1.transaction->blocks[i] == NULL) {
-                       continue;
-               }
-
-               offset = i * tdb->tdb1.transaction->block_size;
-               length = tdb->tdb1.transaction->block_size;
-               if (i == tdb->tdb1.transaction->num_blocks-1) {
-                       length = tdb->tdb1.transaction->last_block_size;
-               }
-
-               if (offset >= old_map_size) {
-                       continue;
-               }
-               if (offset + length > tdb->tdb1.transaction->old_map_size) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                               TDB_LOG_ERROR,
-                                               "tdb1_transaction_setup_recovery: transaction data over new region boundary");
-                       free(data);
-                       return -1;
-               }
-               memcpy(p, &offset, 4);
-               memcpy(p+4, &length, 4);
-               if (TDB1_DOCONV()) {
-                       tdb1_convert(p, 8);
-               }
-               /* the recovery area contains the old data, not the
-                  new data, so we have to call the original tdb1_read
-                  method to get it */
-               if (methods->tdb1_read(tdb, offset, p + 8, length, 0) != 0) {
-                       free(data);
-                       tdb->last_error = TDB_ERR_IO;
-                       return -1;
-               }
-               p += 8 + length;
-       }
-
-       /* and the tailer */
-       tailer = sizeof(*rec) + recovery_max_size;
-       memcpy(p, &tailer, 4);
-       if (TDB1_DOCONV()) {
-               tdb1_convert(p, 4);
-       }
-
-       /* write the recovery data to the recovery area */
-       if (methods->tdb1_write(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_setup_recovery:"
-                          " failed to write recovery data");
-               free(data);
-               return -1;
-       }
-       if (transaction1_write_existing(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_setup_recovery: failed to write"
-                          " secondary recovery data");
-               free(data);
-               return -1;
-       }
-
-       /* as we don't have ordered writes, we have to sync the recovery
-          data before we update the magic to indicate that the recovery
-          data is present */
-       if (transaction1_sync(tdb, recovery_offset, sizeof(*rec) + recovery_size) == -1) {
-               free(data);
-               return -1;
-       }
-
-       free(data);
-
-       magic = TDB1_RECOVERY_MAGIC;
-       TDB1_CONV(magic);
-
-       *magic_offset = recovery_offset + offsetof(struct tdb1_record, magic);
-
-       if (methods->tdb1_write(tdb, *magic_offset, &magic, sizeof(magic)) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_setup_recovery:"
-                          " failed to write recovery magic");
-               return -1;
-       }
-       if (transaction1_write_existing(tdb, *magic_offset, &magic, sizeof(magic)) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_setup_recovery:"
-                          " failed to write secondary recovery magic");
-               return -1;
-       }
-
-       /* ensure the recovery magic marker is on disk */
-       if (transaction1_sync(tdb, *magic_offset, sizeof(magic)) == -1) {
-               return -1;
-       }
-
-       return 0;
-}
-
-static int _tdb1_transaction_prepare_commit(struct tdb_context *tdb)
-{
-       const struct tdb1_methods *methods;
-
-       if (tdb->tdb1.transaction == NULL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_prepare_commit:"
-                                       " no transaction");
-               return -1;
-       }
-
-       if (tdb->tdb1.transaction->prepared) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_prepare_commit:"
-                                       " transaction already prepared");
-               _tdb1_transaction_cancel(tdb);
-               return -1;
-       }
-
-       if (tdb->tdb1.transaction->transaction_error) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                       "tdb1_transaction_prepare_commit:"
-                                       " transaction error pending");
-               _tdb1_transaction_cancel(tdb);
-               return -1;
-       }
-
-
-       if (tdb->tdb1.transaction->nesting != 0) {
-               return 0;
-       }
-
-       /* check for a null transaction */
-       if (tdb->tdb1.transaction->blocks == NULL) {
-               return 0;
-       }
-
-       methods = tdb->tdb1.transaction->io_methods;
-
-       /* if there are any locks pending then the caller has not
-          nested their locks properly, so fail the transaction */
-       if (tdb1_have_extra_locks(tdb)) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_prepare_commit:"
-                                       " locks pending on commit");
-               _tdb1_transaction_cancel(tdb);
-               return -1;
-       }
-
-       /* upgrade the main transaction lock region to a write lock */
-       if (tdb1_allrecord_upgrade(tdb) == -1) {
-               if (errno != EAGAIN && errno != EINTR) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_prepare_commit:"
-                                  " failed to upgrade hash locks");
-               }
-               return -1;
-       }
-
-       /* get the open lock - this prevents new users attaching to the database
-          during the commit */
-       if (tdb1_nest_lock(tdb, TDB1_OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT) == -1) {
-               if (errno != EAGAIN && errno != EINTR) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_prepare_commit:"
-                                  " failed to get open lock");
-               }
-               return -1;
-       }
-
-       if (!(tdb->flags & TDB_NOSYNC)) {
-               /* write the recovery data to the end of the file */
-               if (transaction1_setup_recovery(tdb, &tdb->tdb1.transaction->magic_offset) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_prepare_commit:"
-                                  " failed to setup recovery data");
-                       return -1;
-               }
-       }
-
-       tdb->tdb1.transaction->prepared = true;
-
-       /* expand the file to the new size if needed */
-       if (tdb->file->map_size != tdb->tdb1.transaction->old_map_size) {
-               if (methods->tdb1_expand_file(tdb, tdb->tdb1.transaction->old_map_size,
-                                            tdb->file->map_size -
-                                            tdb->tdb1.transaction->old_map_size) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_prepare_commit:"
-                                  " expansion failed");
-                       return -1;
-               }
-               tdb->stats.transaction_expand_file++;
-               tdb->file->map_size = tdb->tdb1.transaction->old_map_size;
-               methods->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
-       }
-
-       /* Keep the open lock until the actual commit */
-
-       return 0;
-}
-
-/*
-   prepare to commit the current transaction
-*/
-int tdb1_transaction_prepare_commit(struct tdb_context *tdb)
-{
-       return _tdb1_transaction_prepare_commit(tdb);
-}
-
-/* A repack is worthwhile if the largest is less than half total free. */
-static bool repack_worthwhile(struct tdb_context *tdb)
-{
-       tdb1_off_t ptr;
-       struct tdb1_record rec;
-       tdb1_len_t total = 0, largest = 0;
-
-       if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &ptr) == -1) {
-               return false;
-       }
-
-       while (ptr != 0 && tdb1_rec_free_read(tdb, ptr, &rec) == 0) {
-               total += rec.rec_len;
-               if (rec.rec_len > largest) {
-                       largest = rec.rec_len;
-               }
-               ptr = rec.next;
-       }
-
-       return total > largest * 2;
-}
-
-/*
-  commit the current transaction
-*/
-int tdb1_transaction_commit(struct tdb_context *tdb)
-{
-       const struct tdb1_methods *methods;
-       int i;
-       bool need_repack = false;
-
-       if (tdb->tdb1.transaction == NULL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                       "tdb1_transaction_commit:"
-                                       " no transaction");
-               return -1;
-       }
-
-       if (tdb->tdb1.transaction->transaction_error) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                       "tdb1_transaction_commit:"
-                                       " transaction error pending");
-               _tdb1_transaction_cancel(tdb);
-               return -1;
-       }
-
-
-       if (tdb->tdb1.transaction->nesting != 0) {
-               tdb->tdb1.transaction->nesting--;
-               return 0;
-       }
-
-       /* check for a null transaction */
-       if (tdb->tdb1.transaction->blocks == NULL) {
-               _tdb1_transaction_cancel(tdb);
-               return 0;
-       }
-
-       if (!tdb->tdb1.transaction->prepared) {
-               int ret = _tdb1_transaction_prepare_commit(tdb);
-               if (ret) {
-                       _tdb1_transaction_cancel(tdb);
-                       return ret;
-               }
-       }
-
-       methods = tdb->tdb1.transaction->io_methods;
-
-       /* perform all the writes */
-       for (i=0;i<tdb->tdb1.transaction->num_blocks;i++) {
-               tdb1_off_t offset;
-               tdb1_len_t length;
-
-               if (tdb->tdb1.transaction->blocks[i] == NULL) {
-                       continue;
-               }
-
-               offset = i * tdb->tdb1.transaction->block_size;
-               length = tdb->tdb1.transaction->block_size;
-               if (i == tdb->tdb1.transaction->num_blocks-1) {
-                       length = tdb->tdb1.transaction->last_block_size;
-               }
-
-               if (methods->tdb1_write(tdb, offset, tdb->tdb1.transaction->blocks[i], length) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_commit:"
-                                  " write failed during commit");
-
-                       /* we've overwritten part of the data and
-                          possibly expanded the file, so we need to
-                          run the crash recovery code */
-                       tdb->tdb1.io = methods;
-                       tdb1_transaction_recover(tdb);
-
-                       _tdb1_transaction_cancel(tdb);
-
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_commit: write failed");
-                       return -1;
-               }
-               SAFE_FREE(tdb->tdb1.transaction->blocks[i]);
-       }
-
-       /* Do this before we drop lock or blocks. */
-       if (tdb->tdb1.transaction->expanded) {
-               need_repack = repack_worthwhile(tdb);
-       }
-
-       SAFE_FREE(tdb->tdb1.transaction->blocks);
-       tdb->tdb1.transaction->num_blocks = 0;
-
-       /* ensure the new data is on disk */
-       if (transaction1_sync(tdb, 0, tdb->file->map_size) == -1) {
-               return -1;
-       }
-
-       /*
-         TODO: maybe write to some dummy hdr field, or write to magic
-         offset without mmap, before the last sync, instead of the
-         utime() call
-       */
-
-       /* on some systems (like Linux 2.6.x) changes via mmap/msync
-          don't change the mtime of the file, this means the file may
-          not be backed up (as tdb rounding to block sizes means that
-          file size changes are quite rare too). The following forces
-          mtime changes when a transaction completes */
-#if HAVE_UTIME
-       utime(tdb->name, NULL);
-#endif
-
-       /* use a transaction cancel to free memory and remove the
-          transaction locks */
-       _tdb1_transaction_cancel(tdb);
-
-       if (need_repack) {
-               if (tdb_repack(tdb) != 0)
-                       return -1;
-       }
-
-       return 0;
-}
-
-
-/*
-  recover from an aborted transaction. Must be called with exclusive
-  database write access already established (including the open
-  lock to prevent new processes attaching)
-*/
-int tdb1_transaction_recover(struct tdb_context *tdb)
-{
-       tdb1_off_t recovery_head, recovery_eof;
-       unsigned char *data, *p;
-       uint32_t zero = 0;
-       struct tdb1_record rec;
-
-       /* find the recovery area */
-       if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_recover:"
-                          " failed to read recovery head");
-               return -1;
-       }
-
-       if (recovery_head == 0) {
-               /* we have never allocated a recovery record */
-               return 0;
-       }
-
-       /* read the recovery record */
-       if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec,
-                                  sizeof(rec), TDB1_DOCONV()) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_recover:"
-                          " failed to read recovery record");
-               return -1;
-       }
-
-       if (rec.magic != TDB1_RECOVERY_MAGIC) {
-               /* there is no valid recovery data */
-               return 0;
-       }
-
-       if (tdb->flags & TDB_RDONLY) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                       "tdb1_transaction_recover:"
-                                       " attempt to recover read only"
-                                       " database");
-               return -1;
-       }
-
-       recovery_eof = rec.key_len;
-
-       data = (unsigned char *)malloc(rec.data_len);
-       if (data == NULL) {
-               tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                       "tdb1_transaction_recover:"
-                                       " failed to allocate recovery data");
-               return -1;
-       }
-
-       /* read the full recovery data */
-       if (tdb->tdb1.io->tdb1_read(tdb, recovery_head + sizeof(rec), data,
-                                  rec.data_len, 0) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_recover:"
-                          " failed to read recovery data");
-               return -1;
-       }
-
-       /* recover the file data */
-       p = data;
-       while (p+8 < data + rec.data_len) {
-               uint32_t ofs, len;
-               if (TDB1_DOCONV()) {
-                       tdb1_convert(p, 8);
-               }
-               memcpy(&ofs, p, 4);
-               memcpy(&len, p+4, 4);
-
-               if (tdb->tdb1.io->tdb1_write(tdb, ofs, p+8, len) == -1) {
-                       free(data);
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_recover: failed to recover"
-                                  " %d bytes at offset %d", len, ofs);
-                       return -1;
-               }
-               p += 8 + len;
-       }
-
-       free(data);
-
-       if (transaction1_sync(tdb, 0, tdb->file->map_size) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_recover: failed to sync recovery");
-               return -1;
-       }
-
-       /* if the recovery area is after the recovered eof then remove it */
-       if (recovery_eof <= recovery_head) {
-               if (tdb1_ofs_write(tdb, TDB1_RECOVERY_HEAD, &zero) == -1) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_transaction_recover: failed to remove"
-                                  " recovery head");
-                       return -1;
-               }
-       }
-
-       /* remove the recovery magic */
-       if (tdb1_ofs_write(tdb, recovery_head + offsetof(struct tdb1_record, magic),
-                         &zero) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_recover: failed to remove"
-                          " recovery magic");
-               return -1;
-       }
-
-       if (transaction1_sync(tdb, 0, recovery_eof) == -1) {
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_transaction_recover:"
-                          " failed to sync2 recovery");
-               return -1;
-       }
-
-       tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                  "tdb1_transaction_recover: recovered %d byte database",
-                  recovery_eof);
-
-       /* all done */
-       return 0;
-}
-
-/* Any I/O failures we say "needs recovery". */
-tdb_bool_err tdb1_needs_recovery(struct tdb_context *tdb)
-{
-       tdb1_off_t recovery_head;
-       struct tdb1_record rec;
-
-       /* find the recovery area */
-       if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
-               return TDB_ERR_TO_OFF(tdb->last_error);
-       }
-
-       if (recovery_head == 0) {
-               /* we have never allocated a recovery record */
-               return false;
-       }
-
-       /* read the recovery record */
-       if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec,
-                                  sizeof(rec), TDB1_DOCONV()) == -1) {
-               return TDB_ERR_TO_OFF(tdb->last_error);
-       }
-
-       return (rec.magic == TDB1_RECOVERY_MAGIC);
-}
diff --git a/ccan/tdb2/tdb1_traverse.c b/ccan/tdb2/tdb1_traverse.c
deleted file mode 100644 (file)
index d9d3649..0000000
+++ /dev/null
@@ -1,373 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              1999-2005
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000-2003
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb1_private.h"
-
-#define TDB1_NEXT_LOCK_ERR ((tdb1_off_t)-1)
-
-static TDB_DATA tdb1_null;
-
-/* Uses traverse lock: 0 = finish, TDB1_NEXT_LOCK_ERR = error,
-   other = record offset */
-static tdb1_off_t tdb1_next_lock(struct tdb_context *tdb, struct tdb1_traverse_lock *tlock,
-                        struct tdb1_record *rec)
-{
-       int want_next = (tlock->off != 0);
-
-       /* Lock each chain from the start one. */
-       for (; tlock->hash < tdb->tdb1.header.hash_size; tlock->hash++) {
-               if (!tlock->off && tlock->hash != 0) {
-                       /* this is an optimisation for the common case where
-                          the hash chain is empty, which is particularly
-                          common for the use of tdb with ldb, where large
-                          hashes are used. In that case we spend most of our
-                          time in tdb1_brlock(), locking empty hash chains.
-
-                          To avoid this, we do an unlocked pre-check to see
-                          if the hash chain is empty before starting to look
-                          inside it. If it is empty then we can avoid that
-                          hash chain. If it isn't empty then we can't believe
-                          the value we get back, as we read it without a
-                          lock, so instead we get the lock and re-fetch the
-                          value below.
-
-                          Notice that not doing this optimisation on the
-                          first hash chain is critical. We must guarantee
-                          that we have done at least one fcntl lock at the
-                          start of a search to guarantee that memory is
-                          coherent on SMP systems. If records are added by
-                          others during the search then thats OK, and we
-                          could possibly miss those with this trick, but we
-                          could miss them anyway without this trick, so the
-                          semantics don't change.
-
-                          With a non-indexed ldb search this trick gains us a
-                          factor of around 80 in speed on a linux 2.6.x
-                          system (testing using ldbtest).
-                       */
-                       tdb->tdb1.io->next_hash_chain(tdb, &tlock->hash);
-                       if (tlock->hash == tdb->tdb1.header.hash_size) {
-                               continue;
-                       }
-               }
-
-               if (tdb1_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
-                       return TDB1_NEXT_LOCK_ERR;
-
-               /* No previous record?  Start at top of chain. */
-               if (!tlock->off) {
-                       if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(tlock->hash),
-                                    &tlock->off) == -1)
-                               goto fail;
-               } else {
-                       /* Otherwise unlock the previous record. */
-                       if (tdb1_unlock_record(tdb, tlock->off) != 0)
-                               goto fail;
-               }
-
-               if (want_next) {
-                       /* We have offset of old record: grab next */
-                       if (tdb1_rec_read(tdb, tlock->off, rec) == -1)
-                               goto fail;
-                       tlock->off = rec->next;
-               }
-
-               /* Iterate through chain */
-               while( tlock->off) {
-                       tdb1_off_t current;
-                       if (tdb1_rec_read(tdb, tlock->off, rec) == -1)
-                               goto fail;
-
-                       /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
-                       if (tlock->off == rec->next) {
-                               tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
-                                                       TDB_LOG_ERROR,
-                                                       "tdb1_next_lock:"
-                                                       " loop detected.");
-                               goto fail;
-                       }
-
-                       if (!TDB1_DEAD(rec)) {
-                               /* Woohoo: we found one! */
-                               if (tdb1_lock_record(tdb, tlock->off) != 0)
-                                       goto fail;
-                               return tlock->off;
-                       }
-
-                       /* Try to clean dead ones from old traverses */
-                       current = tlock->off;
-                       tlock->off = rec->next;
-                       if (!((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) &&
-                           tdb1_do_delete(tdb, current, rec) != 0)
-                               goto fail;
-               }
-               tdb1_unlock(tdb, tlock->hash, tlock->lock_rw);
-               want_next = 0;
-       }
-       /* We finished iteration without finding anything */
-       tdb->last_error = TDB_SUCCESS;
-       return 0;
-
- fail:
-       tlock->off = 0;
-       if (tdb1_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_next_lock: On error unlock failed!");
-       return TDB1_NEXT_LOCK_ERR;
-}
-
-/* traverse the entire database - calling fn(tdb, key, data) on each element.
-   return -1 on error or the record count traversed
-   if fn is NULL then it is not called
-   a non-zero return value from fn() indicates that the traversal should stop
-  */
-static int tdb1_traverse_internal(struct tdb_context *tdb,
-                                 int (*fn)(struct tdb_context *,
-                                           TDB_DATA, TDB_DATA, void *),
-                                 void *private_data,
-                                 struct tdb1_traverse_lock *tl)
-{
-       TDB_DATA key, dbuf;
-       struct tdb1_record rec;
-       int ret = 0, count = 0;
-       tdb1_off_t off;
-
-       /* This was in the initializaton, above, but the IRIX compiler
-        * did not like it.  crh
-        */
-       tl->next = tdb->tdb1.travlocks.next;
-
-       /* fcntl locks don't stack: beware traverse inside traverse */
-       tdb->tdb1.travlocks.next = tl;
-
-       /* tdb1_next_lock places locks on the record returned, and its chain */
-       while ((off = tdb1_next_lock(tdb, tl, &rec)) != 0) {
-               if (off == TDB1_NEXT_LOCK_ERR) {
-                       ret = -1;
-                       goto out;
-               }
-               count++;
-               /* now read the full record */
-               key.dptr = tdb1_alloc_read(tdb, tl->off + sizeof(rec),
-                                         rec.key_len + rec.data_len);
-               if (!key.dptr) {
-                       ret = -1;
-                       if (tdb1_unlock(tdb, tl->hash, tl->lock_rw) != 0)
-                               goto out;
-                       if (tdb1_unlock_record(tdb, tl->off) != 0)
-                               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                          "tdb1_traverse: key.dptr == NULL and"
-                                          " unlock_record failed!");
-                       goto out;
-               }
-               key.dsize = rec.key_len;
-               dbuf.dptr = key.dptr + rec.key_len;
-               dbuf.dsize = rec.data_len;
-
-               /* Drop chain lock, call out */
-               if (tdb1_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
-                       ret = -1;
-                       SAFE_FREE(key.dptr);
-                       goto out;
-               }
-               if (fn && fn(tdb, key, dbuf, private_data)) {
-                       /* They want us to terminate traversal */
-                       if (tdb1_unlock_record(tdb, tl->off) != 0) {
-                               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                          "tdb1_traverse:"
-                                          " unlock_record failed!");
-                               ret = -1;
-                       }
-                       SAFE_FREE(key.dptr);
-                       goto out;
-               }
-               SAFE_FREE(key.dptr);
-       }
-out:
-       tdb->tdb1.travlocks.next = tl->next;
-       if (ret < 0)
-               return -1;
-       else
-               return count;
-}
-
-
-/*
-  a read style traverse - only if db read only
-*/
-static int tdb1_traverse_read(struct tdb_context *tdb,
-                             int (*fn)(struct tdb_context *,
-                                       TDB_DATA, TDB_DATA, void *),
-                             void *private_data)
-{
-       struct tdb1_traverse_lock tl = { NULL, 0, 0, F_RDLCK };
-       int ret;
-
-       /* we need to get a read lock on the transaction lock here to
-          cope with the lock ordering semantics of solaris10 */
-       if (tdb1_transaction_lock(tdb, F_RDLCK, TDB_LOCK_WAIT)) {
-               return -1;
-       }
-
-       tdb->tdb1.traverse_read++;
-       ret = tdb1_traverse_internal(tdb, fn, private_data, &tl);
-       tdb->tdb1.traverse_read--;
-
-       tdb1_transaction_unlock(tdb, F_RDLCK);
-
-       return ret;
-}
-
-/*
-  a write style traverse - needs to get the transaction lock to
-  prevent deadlocks
-
-  WARNING: The data buffer given to the callback fn does NOT meet the
-  alignment restrictions malloc gives you.
-*/
-int tdb1_traverse(struct tdb_context *tdb,
-                 int (*fn)(struct tdb_context *, TDB_DATA, TDB_DATA, void *),
-                 void *private_data)
-{
-       struct tdb1_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
-       int ret;
-
-       /* If we're read-only, we don't have to write-lock whole db. */
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb1_traverse_read(tdb, fn, private_data);
-       }
-
-       if (tdb1_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) {
-               return -1;
-       }
-
-       tdb->tdb1.traverse_write++;
-       ret = tdb1_traverse_internal(tdb, fn, private_data, &tl);
-       tdb->tdb1.traverse_write--;
-
-       tdb1_transaction_unlock(tdb, F_WRLCK);
-
-       return ret;
-}
-
-
-/* find the first entry in the database and return its key */
-TDB_DATA tdb1_firstkey(struct tdb_context *tdb)
-{
-       TDB_DATA key;
-       struct tdb1_record rec;
-       tdb1_off_t off;
-
-       /* release any old lock */
-       if (tdb1_unlock_record(tdb, tdb->tdb1.travlocks.off) != 0)
-               return tdb1_null;
-       tdb->tdb1.travlocks.off = tdb->tdb1.travlocks.hash = 0;
-       tdb->tdb1.travlocks.lock_rw = F_RDLCK;
-
-       /* Grab first record: locks chain and returned record. */
-       off = tdb1_next_lock(tdb, &tdb->tdb1.travlocks, &rec);
-       if (off == 0 || off == TDB1_NEXT_LOCK_ERR) {
-               return tdb1_null;
-       }
-       /* now read the key */
-       key.dsize = rec.key_len;
-       key.dptr =tdb1_alloc_read(tdb,tdb->tdb1.travlocks.off+sizeof(rec),key.dsize);
-
-       /* Unlock the hash chain of the record we just read. */
-       if (tdb1_unlock(tdb, tdb->tdb1.travlocks.hash, tdb->tdb1.travlocks.lock_rw) != 0)
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_firstkey:"
-                          " error occurred while tdb1_unlocking!");
-       return key;
-}
-
-/* find the next entry in the database, returning its key */
-TDB_DATA tdb1_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
-{
-       uint32_t oldhash;
-       TDB_DATA key = tdb1_null;
-       struct tdb1_record rec;
-       unsigned char *k = NULL;
-       tdb1_off_t off;
-
-       /* Is locked key the old key?  If so, traverse will be reliable. */
-       if (tdb->tdb1.travlocks.off) {
-               if (tdb1_lock(tdb,tdb->tdb1.travlocks.hash,tdb->tdb1.travlocks.lock_rw))
-                       return tdb1_null;
-               if (tdb1_rec_read(tdb, tdb->tdb1.travlocks.off, &rec) == -1
-                   || !(k = tdb1_alloc_read(tdb,tdb->tdb1.travlocks.off+sizeof(rec),
-                                           rec.key_len))
-                   || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
-                       /* No, it wasn't: unlock it and start from scratch */
-                       if (tdb1_unlock_record(tdb, tdb->tdb1.travlocks.off) != 0) {
-                               SAFE_FREE(k);
-                               return tdb1_null;
-                       }
-                       if (tdb1_unlock(tdb, tdb->tdb1.travlocks.hash, tdb->tdb1.travlocks.lock_rw) != 0) {
-                               SAFE_FREE(k);
-                               return tdb1_null;
-                       }
-                       tdb->tdb1.travlocks.off = 0;
-               }
-
-               SAFE_FREE(k);
-       }
-
-       if (!tdb->tdb1.travlocks.off) {
-               /* No previous element: do normal find, and lock record */
-               tdb->tdb1.travlocks.off = tdb1_find_lock_hash(tdb, oldkey, tdb_hash(tdb, oldkey.dptr, oldkey.dsize), tdb->tdb1.travlocks.lock_rw, &rec);
-               if (!tdb->tdb1.travlocks.off) {
-                       return tdb1_null;
-               }
-               tdb->tdb1.travlocks.hash = TDB1_BUCKET(rec.full_hash);
-               if (tdb1_lock_record(tdb, tdb->tdb1.travlocks.off) != 0) {
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_nextkey: lock_record failed (%s)!",
-                                  strerror(errno));
-                       return tdb1_null;
-               }
-       }
-       oldhash = tdb->tdb1.travlocks.hash;
-
-       /* Grab next record: locks chain and returned record,
-          unlocks old record */
-       off = tdb1_next_lock(tdb, &tdb->tdb1.travlocks, &rec);
-       if (off != TDB1_NEXT_LOCK_ERR && off != 0) {
-               key.dsize = rec.key_len;
-               key.dptr = tdb1_alloc_read(tdb, tdb->tdb1.travlocks.off+sizeof(rec),
-                                         key.dsize);
-               /* Unlock the chain of this new record */
-               if (tdb1_unlock(tdb, tdb->tdb1.travlocks.hash, tdb->tdb1.travlocks.lock_rw) != 0)
-                       tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                                  "tdb1_nextkey: WARNING tdb1_unlock failed!");
-       }
-       /* Unlock the chain of old record */
-       if (tdb1_unlock(tdb, TDB1_BUCKET(oldhash), tdb->tdb1.travlocks.lock_rw) != 0)
-               tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
-                          "tdb1_nextkey: WARNING tdb1_unlock failed!");
-       return key;
-}
diff --git a/ccan/tdb2/tdb2.h b/ccan/tdb2/tdb2.h
deleted file mode 100644 (file)
index 3fa99b1..0000000
+++ /dev/null
@@ -1,924 +0,0 @@
-#ifndef CCAN_TDB2_H
-#define CCAN_TDB2_H
-
-/*
-   TDB version 2: trivial database library
-
-   Copyright (C) Andrew Tridgell 1999-2004
-   Copyright (C) Rusty Russell 2010-2011
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
-#ifndef _SAMBA_BUILD_
-#include "config.h"
-#if HAVE_FILE_OFFSET_BITS
-#define _FILE_OFFSET_BITS 64
-#endif
-/* For mode_t */
-#include <sys/types.h>
-/* For O_* flags. */
-#include <sys/stat.h>
-/* For sig_atomic_t. */
-#include <signal.h>
-/* For uint64_t */
-#include <stdint.h>
-/* For bool */
-#include <stdbool.h>
-/* For memcmp */
-#include <string.h>
-#endif
-#include <ccan/compiler/compiler.h>
-#include <ccan/typesafe_cb/typesafe_cb.h>
-#include <ccan/cast/cast.h>
-
-union tdb_attribute;
-struct tdb_context;
-
-/**
- * tdb_open - open a database file
- * @name: the file name (can be NULL if flags contains TDB_INTERNAL)
- * @tdb_flags: options for this database
- * @open_flags: flags argument for tdb's open() call.
- * @mode: mode argument for tdb's open() call.
- * @attributes: linked list of extra attributes for this tdb.
- *
- * This call opens (and potentially creates) a database file.
- * Multiple processes can have the TDB file open at once.
- *
- * On failure it will return NULL, and set errno: it may also call
- * any log attribute found in @attributes.
- *
- * See also:
- *     union tdb_attribute
- */
-struct tdb_context *tdb_open(const char *name, int tdb_flags,
-                            int open_flags, mode_t mode,
-                            union tdb_attribute *attributes);
-
-
-/* flags for tdb_open() */
-#define TDB_DEFAULT 0 /* just a readability place holder */
-#define TDB_INTERNAL 2 /* don't store on disk */
-#define TDB_NOLOCK   4 /* don't do any locking */
-#define TDB_NOMMAP   8 /* don't use mmap */
-#define TDB_CONVERT 16 /* convert endian */
-#define TDB_NOSYNC   64 /* don't use synchronous transactions */
-#define TDB_SEQNUM   128 /* maintain a sequence number */
-#define TDB_ALLOW_NESTING   256 /* fake nested transactions */
-#define TDB_RDONLY   512 /* implied by O_RDONLY */
-#define TDB_VERSION1  1024 /* create/open an old style TDB */
-#define TDB_CANT_CHECK  2048 /* has a feature which we don't understand */
-
-/**
- * tdb1_incompatible_hash - better (Jenkins) hash for tdb1
- *
- * This is better than the default hash for tdb1; but older versions of the
- * tdb library (prior to version 1.2.6) won't be able to open them.
- *
- * It only makes sense to specify this (using tdb_attribute_hash) when
- * creating (with O_CREAT) an old tdb version using TDB_VERSION1.  It's
- * equivalent to the TDB_INCOMPATIBLE_HASH flag for tdb1.
- */
-uint64_t tdb1_incompatible_hash(const void *, size_t, uint64_t, void *);
-
-/**
- * tdb_close - close and free a tdb.
- * @tdb: the tdb context returned from tdb_open()
- *
- * This always succeeds, in that @tdb is unusable after this call.  But if
- * some unexpected error occurred while closing, it will return non-zero
- * (the only clue as to cause will be via the log attribute).
- */
-int tdb_close(struct tdb_context *tdb);
-
-/**
- * struct tdb_data - representation of keys or values.
- * @dptr: the data pointer
- * @dsize: the size of the data pointed to by dptr.
- *
- * This is the "blob" representation of keys and data used by TDB.
- */
-typedef struct tdb_data {
-       unsigned char *dptr;
-       size_t dsize;
-} TDB_DATA;
-
-/**
- * enum TDB_ERROR - error returns for TDB
- *
- * See Also:
- *     tdb_errorstr()
- */
-enum TDB_ERROR {
-       TDB_SUCCESS     = 0,    /* No error. */
-       TDB_ERR_CORRUPT = -1,   /* We read the db, and it was bogus. */
-       TDB_ERR_IO      = -2,   /* We couldn't read/write the db. */
-       TDB_ERR_LOCK    = -3,   /* Locking failed. */
-       TDB_ERR_OOM     = -4,   /* Out of Memory. */
-       TDB_ERR_EXISTS  = -5,   /* The key already exists. */
-       TDB_ERR_NOEXIST = -6,   /* The key does not exist. */
-       TDB_ERR_EINVAL  = -7,   /* You're using it wrong. */
-       TDB_ERR_RDONLY  = -8,   /* The database is read-only. */
-       TDB_ERR_LAST = TDB_ERR_RDONLY
-};
-
-/**
- * tdb_store - store a key/value pair in a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key
- * @dbuf: the data to associate with the key.
- * @flag: TDB_REPLACE, TDB_INSERT or TDB_MODIFY.
- *
- * This inserts (or overwrites) a key/value pair in the TDB.  If flag
- * is TDB_REPLACE, it doesn't matter whether the key exists or not;
- * TDB_INSERT means it must not exist (returns TDB_ERR_EXISTS otherwise),
- * and TDB_MODIFY means it must exist (returns TDB_ERR_NOEXIST otherwise).
- *
- * On success, this returns TDB_SUCCESS.
- *
- * See also:
- *     tdb_fetch, tdb_transaction_start, tdb_append, tdb_delete.
- */
-enum TDB_ERROR tdb_store(struct tdb_context *tdb,
-                        struct tdb_data key,
-                        struct tdb_data dbuf,
-                        int flag);
-
-/* flags to tdb_store() */
-#define TDB_REPLACE 1          /* A readability place holder */
-#define TDB_INSERT 2           /* Don't overwrite an existing entry */
-#define TDB_MODIFY 3           /* Don't create an existing entry    */
-
-/**
- * tdb_fetch - fetch a value from a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key
- * @data: pointer to data.
- *
- * This looks up a key in the database and sets it in @data.
- *
- * If it returns TDB_SUCCESS, the key was found: it is your
- * responsibility to call free() on @data->dptr.
- *
- * Otherwise, it returns an error (usually, TDB_ERR_NOEXIST) and @data is
- * undefined.
- */
-enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
-                        struct tdb_data *data);
-
-/**
- * tdb_errorstr - map the tdb error onto a constant readable string
- * @ecode: the enum TDB_ERROR to map.
- *
- * This is useful for displaying errors to users.
- */
-const char *tdb_errorstr(enum TDB_ERROR ecode);
-
-/**
- * tdb_append - append a value to a key/value pair in a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key
- * @dbuf: the data to append.
- *
- * This is equivalent to fetching a record, reallocating .dptr to add the
- * data, and writing it back, only it's much more efficient.  If the key
- * doesn't exist, it's equivalent to tdb_store (with an additional hint that
- * you expect to expand the record in future).
- *
- * See Also:
- *     tdb_fetch(), tdb_store()
- */
-enum TDB_ERROR tdb_append(struct tdb_context *tdb,
-                         struct tdb_data key, struct tdb_data dbuf);
-
-/**
- * tdb_delete - delete a key from a tdb.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to delete.
- *
- * Returns TDB_SUCCESS on success, or an error (usually TDB_ERR_NOEXIST).
- *
- * See Also:
- *     tdb_fetch(), tdb_store()
- */
-enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key);
-
-/**
- * tdb_exists - does a key exist in the database?
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to search for.
- *
- * Returns true if it exists, or false if it doesn't or any other error.
- */
-bool tdb_exists(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_deq - are struct tdb_data equal?
- * @a: one struct tdb_data
- * @b: another struct tdb_data
- */
-static inline bool tdb_deq(struct tdb_data a, struct tdb_data b)
-{
-       return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0;
-}
-
-/**
- * tdb_mkdata - make a struct tdb_data from const data
- * @p: the constant pointer
- * @len: the length
- *
- * As the dptr member of struct tdb_data is not constant, you need to
- * cast it.  This function keeps thost casts in one place, as well as
- * suppressing the warning some compilers give when casting away a
- * qualifier (eg. gcc with -Wcast-qual)
- */
-static inline struct tdb_data tdb_mkdata(const void *p, size_t len)
-{
-       struct tdb_data d;
-       d.dptr = cast_const(void *, p);
-       d.dsize = len;
-       return d;
-}
-
-/**
- * tdb_transaction_start - start a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This begins a series of atomic operations.  Other processes will be able
- * to read the tdb, but not alter it (they will block), nor will they see
- * any changes until tdb_transaction_commit() is called.
- *
- * Note that if the TDB_ALLOW_NESTING flag is set, a tdb_transaction_start()
- * within a transaction will succeed, but it's not a real transaction:
- * (1) An inner transaction which is committed is not actually committed until
- *     the outer transaction is; if the outer transaction is cancelled, the
- *     inner ones are discarded.
- * (2) tdb_transaction_cancel() marks the outer transaction as having an error,
- *     so the final tdb_transaction_commit() will fail.
- * (3) the outer transaction will see the results of the inner transaction.
- *
- * See Also:
- *     tdb_transaction_cancel, tdb_transaction_commit.
- */
-enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb);
-
-/**
- * tdb_transaction_cancel - abandon a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This aborts a transaction, discarding any changes which were made.
- * tdb_close() does this implicitly.
- */
-void tdb_transaction_cancel(struct tdb_context *tdb);
-
-/**
- * tdb_transaction_commit - commit a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This completes a transaction, writing any changes which were made.
- *
- * fsync() is used to commit the transaction (unless TDB_NOSYNC is set),
- * making it robust against machine crashes, but very slow compared to
- * other TDB operations.
- *
- * A failure can only be caused by unexpected errors (eg. I/O or
- * memory); this is no point looping on transaction failure.
- *
- * See Also:
- *     tdb_transaction_prepare_commit()
- */
-enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb);
-
-/**
- * tdb_transaction_prepare_commit - prepare to commit a transaction
- * @tdb: the tdb context returned from tdb_open()
- *
- * This ensures we have the resources to commit a transaction (using
- * tdb_transaction_commit): if this succeeds then a transaction will only
- * fail if the write() or fsync() calls fail.
- *
- * If this fails you must still call tdb_transaction_cancel() to cancel
- * the transaction.
- *
- * See Also:
- *     tdb_transaction_commit()
- */
-enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb);
-
-/**
- * tdb_traverse - traverse a TDB
- * @tdb: the tdb context returned from tdb_open()
- * @fn: the function to call for every key/value pair (or NULL)
- * @p: the pointer to hand to @f
- *
- * This walks the TDB until all they keys have been traversed, or @fn
- * returns non-zero.  If the traverse function or other processes are
- * changing data or adding or deleting keys, the traverse may be
- * unreliable: keys may be skipped or (rarely) visited twice.
- *
- * There is one specific exception: the special case of deleting the
- * current key does not undermine the reliability of the traversal.
- *
- * On success, returns the number of keys iterated.  On error returns
- * a negative enum TDB_ERROR value.
- */
-#define tdb_traverse(tdb, fn, p)                                       \
-       tdb_traverse_(tdb, typesafe_cb_preargs(int, void *, (fn), (p),  \
-                                              struct tdb_context *,    \
-                                              TDB_DATA, TDB_DATA), (p))
-
-int64_t tdb_traverse_(struct tdb_context *tdb,
-                     int (*fn)(struct tdb_context *,
-                               TDB_DATA, TDB_DATA, void *), void *p);
-
-/**
- * tdb_parse_record - operate directly on data in the database.
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key whose record we should hand to @parse
- * @parse: the function to call for the data
- * @data: the private pointer to hand to @parse (types must match).
- *
- * This avoids a copy for many cases, by handing you a pointer into
- * the memory-mapped database.  It also locks the record to prevent
- * other accesses at the same time.
- *
- * Do not alter the data handed to parse()!
- */
-#define tdb_parse_record(tdb, key, parse, data)                                \
-       tdb_parse_record_((tdb), (key),                                 \
-                         typesafe_cb_preargs(enum TDB_ERROR, void *,   \
-                                             (parse), (data),          \
-                                             TDB_DATA, TDB_DATA), (data))
-
-enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
-                                TDB_DATA key,
-                                enum TDB_ERROR (*parse)(TDB_DATA k,
-                                                        TDB_DATA d,
-                                                        void *data),
-                                void *data);
-
-/**
- * tdb_get_seqnum - get a database sequence number
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns a sequence number: any change to the database from a
- * tdb context opened with the TDB_SEQNUM flag will cause that number
- * to increment.  Note that the incrementing is unreliable (it is done
- * without locking), so this is only useful as an optimization.
- *
- * For example, you may have a regular database backup routine which
- * does not operate if the sequence number is unchanged.  In the
- * unlikely event of a failed increment, it will be backed up next
- * time any way.
- *
- * Returns an enum TDB_ERROR (ie. negative) on error.
- */
-int64_t tdb_get_seqnum(struct tdb_context *tdb);
-
-/**
- * tdb_firstkey - get the "first" key in a TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: pointer to key.
- *
- * This returns an arbitrary key in the database; with tdb_nextkey() it allows
- * open-coded traversal of the database, though it is slightly less efficient
- * than tdb_traverse.
- *
- * It is your responsibility to free @key->dptr on success.
- *
- * Returns TDB_ERR_NOEXIST if the database is empty.
- */
-enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key);
-
-/**
- * tdb_nextkey - get the "next" key in a TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: a key returned by tdb_firstkey() or tdb_nextkey().
- *
- * This returns another key in the database; it will free @key.dptr for
- * your convenience.
- *
- * Returns TDB_ERR_NOEXIST if there are no more keys.
- */
-enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key);
-
-/**
- * tdb_chainlock - lock a record in the TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to lock.
- *
- * This prevents any access occurring to a group of keys including @key,
- * even if @key does not exist.  This allows primitive atomic updates of
- * records without using transactions.
- *
- * You cannot begin a transaction while holding a tdb_chainlock(), nor can
- * you do any operations on any other keys in the database.  This also means
- * that you cannot hold more than one tdb_chainlock() at a time.
- *
- * See Also:
- *     tdb_chainunlock()
- */
-enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_chainunlock - unlock a record in the TDB
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to unlock.
- *
- * The key must have previously been locked by tdb_chainlock().
- */
-void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_chainlock_read - lock a record in the TDB, for reading
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to lock.
- *
- * This prevents any changes from occurring to a group of keys including @key,
- * even if @key does not exist.  This allows primitive atomic updates of
- * records without using transactions.
- *
- * You cannot begin a transaction while holding a tdb_chainlock_read(), nor can
- * you do any operations on any other keys in the database.  This also means
- * that you cannot hold more than one tdb_chainlock()/read() at a time.
- *
- * See Also:
- *     tdb_chainlock()
- */
-enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_chainunlock_read - unlock a record in the TDB for reading
- * @tdb: the tdb context returned from tdb_open()
- * @key: the key to unlock.
- *
- * The key must have previously been locked by tdb_chainlock_read().
- */
-void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
-
-/**
- * tdb_lockall - lock the entire TDB
- * @tdb: the tdb context returned from tdb_open()
- *
- * You cannot hold a tdb_chainlock while calling this.  It nests, so you
- * must call tdb_unlockall as many times as you call tdb_lockall.
- */
-enum TDB_ERROR tdb_lockall(struct tdb_context *tdb);
-
-/**
- * tdb_unlockall - unlock the entire TDB
- * @tdb: the tdb context returned from tdb_open()
- */
-void tdb_unlockall(struct tdb_context *tdb);
-
-/**
- * tdb_lockall_read - lock the entire TDB for reading
- * @tdb: the tdb context returned from tdb_open()
- *
- * This prevents others writing to the database, eg. tdb_delete, tdb_store,
- * tdb_append, but not tdb_fetch.
- *
- * You cannot hold a tdb_chainlock while calling this.  It nests, so you
- * must call tdb_unlockall_read as many times as you call tdb_lockall_read.
- */
-enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb);
-
-/**
- * tdb_unlockall_read - unlock the entire TDB for reading
- * @tdb: the tdb context returned from tdb_open()
- */
-void tdb_unlockall_read(struct tdb_context *tdb);
-
-/**
- * tdb_wipe_all - wipe the database clean
- * @tdb: the tdb context returned from tdb_open()
- *
- * Completely erase the database.  This is faster than iterating through
- * each key and doing tdb_delete.
- */
-enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb);
-
-/**
- * tdb_repack - repack the database
- * @tdb: the tdb context returned from tdb_open()
- *
- * This repacks the database; if it is suffering from a great deal of
- * fragmentation this might help.  However, it can take twice the
- * memory of the existing TDB.
- */
-enum TDB_ERROR tdb_repack(struct tdb_context *tdb);
-
-/**
- * tdb_check - check a TDB for consistency
- * @tdb: the tdb context returned from tdb_open()
- * @check: function to check each key/data pair (or NULL)
- * @data: argument for @check, must match type.
- *
- * This performs a consistency check of the open database, optionally calling
- * a check() function on each record so you can do your own data consistency
- * checks as well.  If check() returns an error, that is returned from
- * tdb_check().
- *
- * Note that the TDB uses a feature which we don't understand which
- * indicates we can't run tdb_check(), this will log a warning to that
- * effect and return TDB_SUCCESS.  You can detect this condition by
- * looking for TDB_CANT_CHECK in tdb_get_flags().
- *
- * Returns TDB_SUCCESS or an error.
- */
-#define tdb_check(tdb, check, data)                                    \
-       tdb_check_((tdb), typesafe_cb_preargs(enum TDB_ERROR, void *,   \
-                                             (check), (data),          \
-                                             struct tdb_data,          \
-                                             struct tdb_data),         \
-                  (data))
-
-enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
-                         enum TDB_ERROR (*check)(struct tdb_data k,
-                                                 struct tdb_data d,
-                                                 void *data),
-                         void *data);
-
-/**
- * tdb_error - get the last error (not threadsafe)
- * @tdb: the tdb context returned from tdb_open()
- *
- * Returns the last error returned by a TDB function.
- *
- * This makes porting from TDB1 easier, but note that the last error is not
- * reliable in threaded programs.
- */
-enum TDB_ERROR tdb_error(struct tdb_context *tdb);
-
-/**
- * enum tdb_summary_flags - flags for tdb_summary.
- */
-enum tdb_summary_flags {
-       TDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */
-};
-
-/**
- * tdb_summary - return a string describing the TDB state
- * @tdb: the tdb context returned from tdb_open()
- * @flags: flags to control the summary output.
- * @summary: pointer to string to allocate.
- *
- * This returns a developer-readable string describing the overall
- * state of the tdb, such as the percentage used and sizes of records.
- * It is designed to provide information about the tdb at a glance
- * without displaying any keys or data in the database.
- *
- * On success, sets @summary to point to a malloc()'ed nul-terminated
- * multi-line string.  It is your responsibility to free() it.
- */
-enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
-                          enum tdb_summary_flags flags,
-                          char **summary);
-
-/**
- * tdb_get_flags - return the flags for a tdb
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns the flags on the current tdb.  Some of these are caused by
- * the flags argument to tdb_open(), others (such as TDB_CONVERT) are
- * intuited.
- */
-unsigned int tdb_get_flags(struct tdb_context *tdb);
-
-/**
- * tdb_add_flag - set a flag for a tdb
- * @tdb: the tdb context returned from tdb_open()
- * @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING.
- *
- * You can use this to set a flag on the TDB.  You cannot set these flags
- * on a TDB_INTERNAL tdb.
- */
-void tdb_add_flag(struct tdb_context *tdb, unsigned flag);
-
-/**
- * tdb_remove_flag - unset a flag for a tdb
- * @tdb: the tdb context returned from tdb_open()
- * @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING.
- *
- * You can use this to clear a flag on the TDB.  You cannot clear flags
- * on a TDB_INTERNAL tdb.
- */
-void tdb_remove_flag(struct tdb_context *tdb, unsigned flag);
-
-/**
- * enum tdb_attribute_type - descriminator for union tdb_attribute.
- */
-enum tdb_attribute_type {
-       TDB_ATTRIBUTE_LOG = 0,
-       TDB_ATTRIBUTE_HASH = 1,
-       TDB_ATTRIBUTE_SEED = 2,
-       TDB_ATTRIBUTE_STATS = 3,
-       TDB_ATTRIBUTE_OPENHOOK = 4,
-       TDB_ATTRIBUTE_FLOCK = 5,
-       TDB_ATTRIBUTE_TDB1_HASHSIZE = 128,
-       TDB_ATTRIBUTE_TDB1_MAX_DEAD = 129,
-};
-
-/**
- * tdb_get_attribute - get an attribute for an existing tdb
- * @tdb: the tdb context returned from tdb_open()
- * @attr: the union tdb_attribute to set.
- *
- * This gets an attribute from a TDB which has previously been set (or
- * may return the default values).  Set @attr.base.attr to the
- * attribute type you want get.
- */
-enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
-                                union tdb_attribute *attr);
-
-/**
- * tdb_set_attribute - set an attribute for an existing tdb
- * @tdb: the tdb context returned from tdb_open()
- * @attr: the union tdb_attribute to set.
- *
- * This sets an attribute on a TDB, overriding any previous attribute
- * of the same type.  It returns TDB_ERR_EINVAL if the attribute is
- * unknown or invalid.
- *
- * Note that TDB_ATTRIBUTE_HASH, TDB_ATTRIBUTE_SEED,
- * TDB_ATTRIBUTE_OPENHOOK and TDB_ATTRIBUTE_TDB1_HASHSIZE cannot
- * currently be set after tdb_open.
- */
-enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
-                                const union tdb_attribute *attr);
-
-/**
- * tdb_unset_attribute - reset an attribute for an existing tdb
- * @tdb: the tdb context returned from tdb_open()
- * @type: the attribute type to unset.
- *
- * This unsets an attribute on a TDB, returning it to the defaults
- * (where applicable).
- *
- * Note that it only makes sense for TDB_ATTRIBUTE_LOG and TDB_ATTRIBUTE_FLOCK
- * to be unset.
- */
-void tdb_unset_attribute(struct tdb_context *tdb,
-                        enum tdb_attribute_type type);
-
-/**
- * tdb_name - get the name of a tdb
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns a copy of the name string, made at tdb_open() time.  If that
- * argument was NULL (possible for a TDB_INTERNAL db) this will return NULL.
- *
- * This is mostly useful for logging.
- */
-const char *tdb_name(const struct tdb_context *tdb);
-
-/**
- * tdb_fd - get the file descriptor of a tdb
- * @tdb: the tdb context returned from tdb_open()
- *
- * This returns the file descriptor for the underlying database file, or -1
- * for TDB_INTERNAL.
- */
-int tdb_fd(const struct tdb_context *tdb);
-
-/**
- * tdb_foreach - iterate through every open TDB.
- * @fn: the function to call for every TDB
- * @p: the pointer to hand to @fn
- *
- * TDB internally keeps track of all open TDBs; this function allows you to
- * iterate through them.  If @fn returns non-zero, traversal stops.
- */
-#define tdb_foreach(fn, p)                                             \
-       tdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p),        \
-                                        struct tdb_context *), (p))
-
-void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p);
-
-/**
- * struct tdb_attribute_base - common fields for all tdb attributes.
- */
-struct tdb_attribute_base {
-       enum tdb_attribute_type attr;
-       union tdb_attribute *next;
-};
-
-/**
- * enum tdb_log_level - log levels for tdb_attribute_log
- * @TDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors
- *                or internal consistency failures.
- * @TDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters
- *                or writing to a read-only database.
- * @TDB_LOG_WARNING: used for informational messages on issues which
- *                  are unusual but handled by TDB internally, such
- *                  as a failure to mmap or failure to open /dev/urandom.
- */
-enum tdb_log_level {
-       TDB_LOG_ERROR,
-       TDB_LOG_USE_ERROR,
-       TDB_LOG_WARNING
-};
-
-/**
- * struct tdb_attribute_log - log function attribute
- *
- * This attribute provides a hook for you to log errors.
- */
-struct tdb_attribute_log {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
-       void (*fn)(struct tdb_context *tdb,
-                  enum tdb_log_level level,
-                  enum TDB_ERROR ecode,
-                  const char *message,
-                  void *data);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_hash - hash function attribute
- *
- * This attribute allows you to provide an alternative hash function.
- * This hash function will be handed keys from the database; it will also
- * be handed the 8-byte TDB_HASH_MAGIC value for checking the header (the
- * tdb_open() will fail if the hash value doesn't match the header).
- *
- * Note that if your hash function gives different results on
- * different machine endians, your tdb will no longer work across
- * different architectures!
- */
-struct tdb_attribute_hash {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
-       uint64_t (*fn)(const void *key, size_t len, uint64_t seed,
-                      void *data);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_seed - hash function seed attribute
- *
- * The hash function seed is normally taken from /dev/urandom (or equivalent)
- * but can be set manually here.  This is mainly for testing purposes.
- */
-struct tdb_attribute_seed {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_SEED */
-       uint64_t seed;
-};
-
-/**
- * struct tdb_attribute_stats - tdb operational statistics
- *
- * This attribute records statistics of various low-level TDB operations.
- * This can be used to assist performance evaluation.  This is only
- * useful for tdb_get_attribute().
- *
- * New fields will be added at the end, hence the "size" argument which
- * indicates how large your structure is: it must be filled in before
- * calling tdb_get_attribute(), which will overwrite it with the size
- * tdb knows about.
- */
-struct tdb_attribute_stats {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_STATS */
-       size_t size; /* = sizeof(struct tdb_attribute_stats) */
-       uint64_t allocs;
-       uint64_t   alloc_subhash;
-       uint64_t   alloc_chain;
-       uint64_t   alloc_bucket_exact;
-       uint64_t   alloc_bucket_max;
-       uint64_t   alloc_leftover;
-       uint64_t   alloc_coalesce_tried;
-       uint64_t     alloc_coalesce_iterate_clash;
-       uint64_t     alloc_coalesce_lockfail;
-       uint64_t     alloc_coalesce_race;
-       uint64_t     alloc_coalesce_succeeded;
-       uint64_t       alloc_coalesce_num_merged;
-       uint64_t compares;
-       uint64_t   compare_wrong_bucket;
-       uint64_t   compare_wrong_offsetbits;
-       uint64_t   compare_wrong_keylen;
-       uint64_t   compare_wrong_rechash;
-       uint64_t   compare_wrong_keycmp;
-       uint64_t transactions;
-       uint64_t   transaction_cancel;
-       uint64_t   transaction_nest;
-       uint64_t   transaction_expand_file;
-       uint64_t   transaction_read_direct;
-       uint64_t      transaction_read_direct_fail;
-       uint64_t   transaction_write_direct;
-       uint64_t      transaction_write_direct_fail;
-       uint64_t expands;
-       uint64_t frees;
-       uint64_t locks;
-       uint64_t   lock_lowlevel;
-       uint64_t   lock_nonblock;
-       uint64_t     lock_nonblock_fail;
-};
-
-/**
- * struct tdb_attribute_openhook - tdb special effects hook for open
- *
- * This attribute contains a function to call once we have the OPEN_LOCK
- * for the tdb, but before we've examined its contents.  If this succeeds,
- * the tdb will be populated if it's then zero-length.
- *
- * This is a hack to allow support for TDB1-style TDB_CLEAR_IF_FIRST
- * behaviour.
- */
-struct tdb_attribute_openhook {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_OPENHOOK */
-       enum TDB_ERROR (*fn)(int fd, void *data);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_flock - tdb special effects hook for file locking
- *
- * This attribute contains function to call to place locks on a file; it can
- * be used to support non-blocking operations or lock proxying.
- *
- * They should return 0 on success, -1 on failure and set errno.
- *
- * An error will be logged on error if errno is neither EAGAIN nor EINTR
- * (normally it would only return EAGAIN if waitflag is false, and
- * loop internally on EINTR).
- */
-struct tdb_attribute_flock {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_FLOCK */
-       int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *);
-       int (*unlock)(int fd, int rw, off_t off, off_t len, void *);
-       void *data;
-};
-
-/**
- * struct tdb_attribute_tdb1_hashsize - tdb1 hashsize
- *
- * This attribute allows setting the TDB1 hashsize; it only makes sense with
- * O_CREAT and TDB_VERSION1.
- *
- * Hashsize should generally be a prime, such as 10007.
- */
-struct tdb_attribute_tdb1_hashsize {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_TDB1_HASHSIZE */
-       unsigned int hsize;
-};
-
-/**
- * struct tdb_attribute_tdb1_max_dead - tdb1 number of maximum dead records.
- *
- * TDB1 has a method to speed up its slow free list: it lets a certain
- * number of "dead" records build up before freeing them.  This is
- * particularly useful for volatile TDBs; setting it to 5 is
- * equivalent to tdb1's TDB_VOLATILE flag.
- */
-struct tdb_attribute_tdb1_max_dead {
-       struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_TDB1_MAX_DEAD */
-       unsigned int max_dead;
-};
-
-/**
- * union tdb_attribute - tdb attributes.
- *
- * This represents all the known attributes.
- *
- * See also:
- *     struct tdb_attribute_log, struct tdb_attribute_hash,
- *     struct tdb_attribute_seed, struct tdb_attribute_stats,
- *     struct tdb_attribute_openhook, struct tdb_attribute_flock.
- */
-union tdb_attribute {
-       struct tdb_attribute_base base;
-       struct tdb_attribute_log log;
-       struct tdb_attribute_hash hash;
-       struct tdb_attribute_seed seed;
-       struct tdb_attribute_stats stats;
-       struct tdb_attribute_openhook openhook;
-       struct tdb_attribute_flock flock;
-       struct tdb_attribute_tdb1_hashsize tdb1_hashsize;
-       struct tdb_attribute_tdb1_max_dead tdb1_max_dead;
-};
-
-#ifdef  __cplusplus
-}
-#endif
-
-#endif /* tdb2.h */
diff --git a/ccan/tdb2/test/api-12-store.c b/ccan/tdb2/test/api-12-store.c
deleted file mode 100644 (file)
index ccec53e..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <ccan/hash/hash.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-#include "logging.h"
-
-/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
-{
-       return hash64_stable((const unsigned char *)key, len,
-                            *(uint64_t *)p);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       uint64_t seed = 16014841315512641303ULL;
-       union tdb_attribute fixed_hattr
-               = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                             .fn = fixedhash,
-                             .data = &seed } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
-
-       fixed_hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-12-store.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* We seemed to lose some keys.
-                * Insert and check they're in there! */
-               for (j = 0; j < 500; j++) {
-                       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-                       ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(tdb_deq(d, data));
-                       free(d.dptr);
-               }
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-13-delete.c b/ccan/tdb2/test/api-13-delete.c
deleted file mode 100644 (file)
index 0287a6a..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-#include <ccan/tdb2/private.h> // For TDB_TOPLEVEL_HASH_BITS
-#include <ccan/hash/hash.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-/* We rig the hash so adjacent-numbered records always clash. */
-static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return ((uint64_t)*(const unsigned int *)key)
-               << (64 - TDB_TOPLEVEL_HASH_BITS - 1);
-}
-
-/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
-{
-       return hash64_stable((const unsigned char *)key, len,
-                            *(uint64_t *)p);
-}
-
-static bool store_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data d, data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-               tdb_fetch(tdb, key, &d);
-               if (!tdb_deq(d, data))
-                       return false;
-               free(d.dptr);
-       }
-       return true;
-}
-
-static void test_val(struct tdb_context *tdb, uint64_t val)
-{
-       uint64_t v;
-       struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
-       struct tdb_data d, data = { (unsigned char *)&v, sizeof(v) };
-
-       /* Insert an entry, then delete it. */
-       v = val;
-       /* Delete should fail. */
-       ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Insert should succeed. */
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Delete should succeed. */
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Re-add it, then add collision. */
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       v = val + 1;
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Can find both? */
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* Delete second one. */
-       v = val + 1;
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Re-add */
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Now, try deleting first one. */
-       v = val;
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Can still find second? */
-       v = val + 1;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* Now, this will be ideally placed. */
-       v = val + 2;
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* This will collide with both. */
-       v = val;
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-
-       /* We can still find them all, right? */
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val + 1;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val + 2;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* And if we delete val + 1, that val + 2 should not move! */
-       v = val + 1;
-       ok1(tdb_delete(tdb, key) == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       v = val;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-       v = val + 2;
-       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-       ok1(d.dsize == data.dsize);
-       free(d.dptr);
-
-       /* Delete those two, so we are empty. */
-       ok1(tdb_delete(tdb, key) == 0);
-       v = val;
-       ok1(tdb_delete(tdb, key) == 0);
-
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       uint64_t seed = 16014841315512641303ULL;
-       union tdb_attribute clash_hattr
-               = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                             .fn = clash } };
-       union tdb_attribute fixed_hattr
-               = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                             .fn = fixedhash,
-                             .data = &seed } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       /* These two values gave trouble before. */
-       int vals[] = { 755, 837 };
-
-       clash_hattr.base.next = &tap_log_attr;
-       fixed_hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-13-delete.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Check start of hash table. */
-               test_val(tdb, 0);
-
-               /* Check end of hash table. */
-               test_val(tdb, -1ULL);
-
-               /* Check mixed bitpattern. */
-               test_val(tdb, 0x123456789ABCDEF0ULL);
-
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
-               tdb_close(tdb);
-
-               /* Deleting these entries in the db gave problems. */
-               tdb = tdb_open("run-13-delete.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(store_records(tdb));
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
-                       struct tdb_data key;
-
-                       key.dptr = (unsigned char *)&vals[j];
-                       key.dsize = sizeof(vals[j]);
-                       ok1(tdb_delete(tdb, key) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-               }
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-14-exists.c b/ccan/tdb2/test/api-14-exists.c
deleted file mode 100644 (file)
index 698006f..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-static bool test_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_exists(tdb, key))
-                       return false;
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-               if (!tdb_exists(tdb, key))
-                       return false;
-       }
-
-       for (i = 0; i < 1000; i++) {
-               if (!tdb_exists(tdb, key))
-                       return false;
-               if (tdb_delete(tdb, key) != 0)
-                       return false;
-               if (tdb_exists(tdb, key))
-                       return false;
-       }
-       return true;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-14-exists.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (ok1(tdb))
-                       ok1(test_records(tdb));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-16-wipe_all.c b/ccan/tdb2/test/api-16-wipe_all.c
deleted file mode 100644 (file)
index d17eff8..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-static bool add_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       }
-       return true;
-}
-
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-16-wipe_all.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (ok1(tdb)) {
-                       struct tdb_data key;
-                       ok1(add_records(tdb));
-                       ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
-                       ok1(tdb_firstkey(tdb, &key) == TDB_ERR_NOEXIST);
-                       tdb_close(tdb);
-               }
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-21-parse_record.c b/ccan/tdb2/test/api-21-parse_record.c
deleted file mode 100644 (file)
index def4f45..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-static enum TDB_ERROR parse(TDB_DATA key, TDB_DATA data, TDB_DATA *expected)
-{
-       if (!tdb_deq(data, *expected))
-               return TDB_ERR_EINVAL;
-       return TDB_SUCCESS;
-}
-
-static enum TDB_ERROR parse_err(TDB_DATA key, TDB_DATA data, void *unused)
-{
-       return 100;
-}
-
-static bool test_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       }
-
-       for (i = 0; i < 1000; i++) {
-               if (tdb_parse_record(tdb, key, parse, &data) != TDB_SUCCESS)
-                       return false;
-       }
-
-       if (tdb_parse_record(tdb, key, parse, &data) != TDB_ERR_NOEXIST)
-               return false;
-
-       /* Test error return from parse function. */
-       i = 0;
-       if (tdb_parse_record(tdb, key, parse_err, NULL) != 100)
-               return false;
-
-       return true;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-21-parse_record.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (ok1(tdb))
-                       ok1(test_records(tdb));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-55-transaction.c b/ccan/tdb2/test/api-55-transaction.c
deleted file mode 100644 (file)
index 9c1044b..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <ccan/tdb2/private.h> // struct tdb_context
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       unsigned char *buffer;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data;
-
-       buffer = malloc(1000);
-       for (i = 0; i < 1000; i++)
-               buffer[i] = i;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-55-transaction.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(tdb_transaction_start(tdb) == 0);
-               data.dptr = buffer;
-               data.dsize = 1000;
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == 1000);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-
-               /* Cancelling a transaction means no store */
-               tdb_transaction_cancel(tdb);
-               ok1(tdb->file->allrecord_lock.count == 0
-                   && tdb->file->num_lockrecs == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_ERR_NOEXIST);
-
-               /* Commit the transaction. */
-               ok1(tdb_transaction_start(tdb) == 0);
-               data.dptr = buffer;
-               data.dsize = 1000;
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == 1000);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-               ok1(tdb_transaction_commit(tdb) == 0);
-               ok1(tdb->file->allrecord_lock.count == 0
-                   && tdb->file->num_lockrecs == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == 1000);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       free(buffer);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-80-tdb_fd.c b/ccan/tdb2/test/api-80-tdb_fd.c
deleted file mode 100644 (file)
index 0088f9b..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-80-tdb_fd.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       continue;
-
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tdb_fd(tdb) == -1);
-               else
-                       ok1(tdb_fd(tdb) > 2);
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-81-seqnum.c b/ccan/tdb2/test/api-81-seqnum.c
deleted file mode 100644 (file)
index c1eb751..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, seq;
-       struct tdb_context *tdb;
-       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 8 * 13);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-81-seqnum.tdb", flags[i]|TDB_SEQNUM,
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       continue;
-
-               seq = 0;
-               ok1(tdb_get_seqnum(tdb) == seq);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-               /* Fetch doesn't change seqnum */
-               if (ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS))
-                       free(d.dptr);
-               ok1(tdb_get_seqnum(tdb) == seq);
-               ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-               /* Append in tdb1 (or store over value) bumps twice! */
-               if (flags[i] & TDB_VERSION1)
-                       seq++;
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-               /* Empty append works */
-               ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-
-               ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == ++seq);
-
-               if (!(flags[i] & TDB_INTERNAL)) {
-                       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_get_seqnum(tdb) == ++seq);
-                       /* Append in tdb1 (or store over value) bumps twice! */
-                       if (flags[i] & TDB_VERSION1)
-                               seq++;
-                       ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-                       ok1(tdb_get_seqnum(tdb) == ++seq);
-                       ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-                       ok1(tdb_get_seqnum(tdb) == ++seq);
-                       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-                       ok1(tdb_get_seqnum(tdb) == seq);
-
-                       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_get_seqnum(tdb) == seq + 1);
-                       tdb_transaction_cancel(tdb);
-                       ok1(tdb_get_seqnum(tdb) == seq);
-               }
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-82-lockattr.c b/ccan/tdb2/test/api-82-lockattr.c
deleted file mode 100644 (file)
index 048feac..0000000
+++ /dev/null
@@ -1,248 +0,0 @@
-#include <ccan/tdb2/private.h> // for tdb_fcntl_unlock
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include "logging.h"
-
-static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
-                 void *_err)
-{
-       int *lock_err = _err;
-       struct flock fl;
-       int ret;
-
-       if (*lock_err) {
-               errno = *lock_err;
-               return -1;
-       }
-
-       do {
-               fl.l_type = rw;
-               fl.l_whence = SEEK_SET;
-               fl.l_start = off;
-               fl.l_len = len;
-
-               if (waitflag)
-                       ret = fcntl(fd, F_SETLKW, &fl);
-               else
-                       ret = fcntl(fd, F_SETLK, &fl);
-       } while (ret != 0 && errno == EINTR);
-
-       return ret;
-}
-
-static int trav_err;
-static int trav(struct tdb_context *tdb, TDB_DATA k, TDB_DATA d, int *err)
-{
-       *err = trav_err;
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       union tdb_attribute lock_attr;
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-       int lock_err;
-
-       lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-       lock_attr.base.next = &tap_log_attr;
-       lock_attr.flock.lock = mylock;
-       lock_attr.flock.unlock = tdb_fcntl_unlock;
-       lock_attr.flock.data = &lock_err;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 80);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct tdb_data d;
-               unsigned int num_oom_messages;
-
-               /* TDB1 double logs here. */
-               if (flags[i] & TDB_VERSION1) {
-                       num_oom_messages = 2;
-               } else {
-                       num_oom_messages = 1;
-               }
-
-               /* Nonblocking open; expect no error message. */
-               lock_err = EAGAIN;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
-               ok(errno == lock_err, "Errno is %u", errno);
-               ok1(!tdb);
-               ok1(tap_log_messages == 0);
-
-               lock_err = EINTR;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
-               ok(errno == lock_err, "Errno is %u", errno);
-               ok1(!tdb);
-               ok1(tap_log_messages == 0);
-
-               /* Forced fail open. */
-               lock_err = ENOMEM;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
-               ok1(errno == lock_err);
-               ok1(!tdb);
-               ok1(tap_log_messages == 1);
-               tap_log_messages = 0;
-
-               lock_err = 0;
-               tdb = tdb_open("run-82-lockattr.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
-               if (!ok1(tdb))
-                       continue;
-               ok1(tap_log_messages == 0);
-
-               /* Nonblocking store. */
-               lock_err = EAGAIN;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               /* Nonblocking fetch. */
-               lock_err = EAGAIN;
-               ok1(!tdb_exists(tdb, key));
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(!tdb_exists(tdb, key));
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(!tdb_exists(tdb, key));
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               lock_err = EAGAIN;
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               /* Nonblocking delete. */
-               lock_err = EAGAIN;
-               ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               /* Nonblocking locks. */
-               lock_err = EAGAIN;
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               lock_err = EAGAIN;
-               ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               lock_err = EAGAIN;
-               ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
-               /* This actually does divide and conquer. */
-               ok1(tap_log_messages > 0);
-               tap_log_messages = 0;
-
-               lock_err = EAGAIN;
-               ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages > 0);
-               tap_log_messages = 0;
-
-               /* Nonblocking traverse; go nonblock partway through. */
-               lock_err = 0;
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-               trav_err = EAGAIN;
-               ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               trav_err = EINTR;
-               lock_err = 0;
-               ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               trav_err = ENOMEM;
-               lock_err = 0;
-               ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == num_oom_messages);
-               tap_log_messages = 0;
-
-               /* Nonblocking transactions. */
-               lock_err = EAGAIN;
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = EINTR;
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-               lock_err = ENOMEM;
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 1);
-               tap_log_messages = 0;
-
-               /* Nonblocking transaction prepare. */
-               lock_err = 0;
-               ok1(tdb_transaction_start(tdb) == 0);
-               ok1(tdb_delete(tdb, key) == 0);
-
-               lock_err = EAGAIN;
-               ok1(tdb_transaction_prepare_commit(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-
-               lock_err = 0;
-               ok1(tdb_transaction_prepare_commit(tdb) == 0);
-               ok1(tdb_transaction_commit(tdb) == 0);
-
-               /* And the transaction was committed, right? */
-               ok1(!tdb_exists(tdb, key));
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-83-openhook.c b/ccan/tdb2/test/api-83-openhook.c
deleted file mode 100644 (file)
index e7e9473..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include <err.h>
-#include <unistd.h>
-#include "external-agent.h"
-#include "logging.h"
-
-static enum TDB_ERROR clear_if_first(int fd, void *arg)
-{
-/* We hold a lock offset 4 always, so we can tell if anyone is holding it.
- * (This is compatible with tdb1's TDB_CLEAR_IF_FIRST flag).  */
-       struct flock fl;
-
-       if (arg != clear_if_first)
-               return TDB_ERR_CORRUPT;
-
-       fl.l_type = F_WRLCK;
-       fl.l_whence = SEEK_SET;
-       fl.l_start = 4;
-       fl.l_len = 1;
-
-       if (fcntl(fd, F_SETLK, &fl) == 0) {
-               /* We must be first ones to open it! */
-               diag("truncating file!");
-               if (ftruncate(fd, 0) != 0) {
-                       return TDB_ERR_IO;
-               }
-       }
-       fl.l_type = F_RDLCK;
-       if (fcntl(fd, F_SETLKW, &fl) != 0) {
-               return TDB_ERR_IO;
-       }
-       return TDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       struct agent *agent;
-       union tdb_attribute cif;
-       struct tdb_data key = tdb_mkdata("key", 3);
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK;
-       cif.openhook.base.next = &tap_log_attr;
-       cif.openhook.fn = clear_if_first;
-       cif.openhook.data = clear_if_first;
-
-       agent = prepare_external_agent();
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 13);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               /* Create it */
-               tdb = tdb_open("run-83-openhook.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
-               ok1(tdb);
-               ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
-               tdb_close(tdb);
-
-               /* Now, open with CIF, should clear it. */
-               tdb = tdb_open("run-83-openhook.tdb", flags[i],
-                              O_RDWR, 0, &cif);
-               ok1(tdb);
-               ok1(!tdb_exists(tdb, key));
-               ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
-
-               /* Agent should not clear it, since it's still open. */
-               ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
-                                            "run-83-openhook.tdb") == SUCCESS);
-               ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS);
-               ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
-
-               /* Still exists for us too. */
-               ok1(tdb_exists(tdb, key));
-
-               /* Close it, now agent should clear it. */
-               tdb_close(tdb);
-
-               ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
-                                            "run-83-openhook.tdb") == SUCCESS);
-               ok1(external_agent_operation(agent, FETCH, "key") == FAILED);
-               ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
-
-               ok1(tap_log_messages == 0);
-       }
-
-       free_external_agent(agent);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-91-get-stats.c b/ccan/tdb2/test/api-91-get-stats.c
deleted file mode 100644 (file)
index d9a22ca..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               union tdb_attribute *attr;
-               struct tdb_data key = tdb_mkdata("key", 3);
-
-               tdb = tdb_open("run-91-get-stats.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
-
-               /* Use malloc so valgrind will catch overruns. */
-               attr = malloc(sizeof *attr);
-               attr->stats.base.attr = TDB_ATTRIBUTE_STATS;
-               attr->stats.size = sizeof(*attr);
-
-               ok1(tdb_get_attribute(tdb, attr) == 0);
-               ok1(attr->stats.size == sizeof(*attr));
-               ok1(attr->stats.allocs > 0);
-               ok1(attr->stats.expands > 0);
-               ok1(attr->stats.locks > 0);
-               free(attr);
-
-               /* Try short one. */
-               attr = malloc(offsetof(struct tdb_attribute_stats, allocs)
-                             + sizeof(attr->stats.allocs));
-               attr->stats.base.attr = TDB_ATTRIBUTE_STATS;
-               attr->stats.size = offsetof(struct tdb_attribute_stats, allocs)
-                       + sizeof(attr->stats.allocs);
-               ok1(tdb_get_attribute(tdb, attr) == 0);
-               ok1(attr->stats.size == sizeof(*attr));
-               ok1(attr->stats.allocs > 0);
-               free(attr);
-               ok1(tap_log_messages == 0);
-
-               tdb_close(tdb);
-
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-92-get-set-readonly.c b/ccan/tdb2/test/api-92-get-set-readonly.c
deleted file mode 100644 (file)
index 483b50d..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, extra_msgs;
-       struct tdb_context *tdb;
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 48);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               /* RW -> R0 */
-               tdb = tdb_open("run-92-get-set-readonly.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(!(tdb_get_flags(tdb) & TDB_RDONLY));
-
-               /* TDB1 complains multiple times. */
-               if (flags[i] & TDB_VERSION1) {
-                       extra_msgs = 1;
-               } else {
-                       extra_msgs = 0;
-               }
-
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-
-               tdb_add_flag(tdb, TDB_RDONLY);
-               ok1(tdb_get_flags(tdb) & TDB_RDONLY);
-
-               /* Can't store, append, delete. */
-               ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 1);
-               ok1(tdb_append(tdb, key, data) == TDB_ERR_RDONLY);
-               tap_log_messages -= extra_msgs;
-               ok1(tap_log_messages == 2);
-               ok1(tdb_delete(tdb, key) == TDB_ERR_RDONLY);
-               tap_log_messages -= extra_msgs;
-               ok1(tap_log_messages == 3);
-
-               /* Can't start a transaction, or any write lock. */
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 4);
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_RDONLY);
-               tap_log_messages -= extra_msgs;
-               ok1(tap_log_messages == 5);
-               ok1(tdb_lockall(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 6);
-               ok1(tdb_wipe_all(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 7);
-
-               /* Back to RW. */
-               tdb_remove_flag(tdb, TDB_RDONLY);
-               ok1(!(tdb_get_flags(tdb) & TDB_RDONLY));
-
-               ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_SUCCESS);
-               ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-
-               ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-               ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-
-               ok1(tdb_chainlock(tdb, key) == TDB_SUCCESS);
-               tdb_chainunlock(tdb, key);
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
-               tdb_unlockall(tdb);
-               ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
-               ok1(tap_log_messages == 7);
-
-               tdb_close(tdb);
-
-               /* R0 -> RW */
-               tdb = tdb_open("run-92-get-set-readonly.tdb", flags[i],
-                              O_RDONLY, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(tdb_get_flags(tdb) & TDB_RDONLY);
-
-               /* Can't store, append, delete. */
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 8);
-               ok1(tdb_append(tdb, key, data) == TDB_ERR_RDONLY);
-               tap_log_messages -= extra_msgs;
-               ok1(tap_log_messages == 9);
-               ok1(tdb_delete(tdb, key) == TDB_ERR_RDONLY);
-               tap_log_messages -= extra_msgs;
-               ok1(tap_log_messages == 10);
-
-               /* Can't start a transaction, or any write lock. */
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 11);
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_RDONLY);
-               tap_log_messages -= extra_msgs;
-               ok1(tap_log_messages == 12);
-               ok1(tdb_lockall(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 13);
-               ok1(tdb_wipe_all(tdb) == TDB_ERR_RDONLY);
-               ok1(tap_log_messages == 14);
-
-               /* Can't remove TDB_RDONLY since we opened with O_RDONLY */
-               tdb_remove_flag(tdb, TDB_RDONLY);
-               ok1(tap_log_messages == 15);
-               ok1(tdb_get_flags(tdb) & TDB_RDONLY);
-               tdb_close(tdb);
-
-               ok1(tap_log_messages == 15);
-               tap_log_messages = 0;
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-93-repack.c b/ccan/tdb2/test/api-93-repack.c
deleted file mode 100644 (file)
index 74a8b5e..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-#define NUM_TESTS 1000
-
-static bool store_all(struct tdb_context *tdb)
-{
-       unsigned int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data dbuf = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < NUM_TESTS; i++) {
-               if (tdb_store(tdb, key, dbuf, TDB_INSERT) != TDB_SUCCESS)
-                       return false;
-       }
-       return true;
-}
-
-static int mark_entry(struct tdb_context *tdb,
-                     TDB_DATA key, TDB_DATA data, bool found[])
-{
-       unsigned int num;
-
-       if (key.dsize != sizeof(num))
-               return -1;
-       memcpy(&num, key.dptr, key.dsize);
-       if (num >= NUM_TESTS)
-               return -1;
-       if (found[num])
-               return -1;
-       found[num] = true;
-       return 0;
-}
-
-static bool is_all_set(bool found[], unsigned int num)
-{
-       unsigned int i;
-
-       for (i = 0; i < num; i++)
-               if (!found[i])
-                       return false;
-       return true;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       bool found[NUM_TESTS];
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_VERSION1|TDB_NOMMAP,
-                       TDB_VERSION1|TDB_CONVERT,
-                       TDB_VERSION1|TDB_NOMMAP|TDB_CONVERT
-       };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 6 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-93-repack.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       break;
-
-               ok1(store_all(tdb));
-
-               ok1(tdb_repack(tdb) == TDB_SUCCESS);
-               memset(found, 0, sizeof(found));
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               ok1(tdb_traverse(tdb, mark_entry, found) == NUM_TESTS);
-               ok1(is_all_set(found, NUM_TESTS));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-add-remove-flags.c b/ccan/tdb2/test/api-add-remove-flags.c
deleted file mode 100644 (file)
index 231b9f6..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-#include <ccan/tdb2/private.h> // for tdb_context
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(173);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-add-remove-flags.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(tdb_get_flags(tdb) == tdb->flags);
-               tap_log_messages = 0;
-               tdb_add_flag(tdb, TDB_NOLOCK);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(tdb_get_flags(tdb) & TDB_NOLOCK);
-               }
-
-               tap_log_messages = 0;
-               tdb_add_flag(tdb, TDB_NOMMAP);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(tdb_get_flags(tdb) & TDB_NOMMAP);
-                       ok1(tdb->file->map_ptr == NULL);
-               }
-
-               tap_log_messages = 0;
-               tdb_add_flag(tdb, TDB_NOSYNC);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(tdb_get_flags(tdb) & TDB_NOSYNC);
-               }
-
-               ok1(tdb_get_flags(tdb) == tdb->flags);
-
-               tap_log_messages = 0;
-               tdb_remove_flag(tdb, TDB_NOLOCK);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(!(tdb_get_flags(tdb) & TDB_NOLOCK));
-               }
-
-               tap_log_messages = 0;
-               tdb_remove_flag(tdb, TDB_NOMMAP);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(!(tdb_get_flags(tdb) & TDB_NOMMAP));
-                       ok1(tdb->file->map_ptr != NULL);
-               }
-
-               tap_log_messages = 0;
-               tdb_remove_flag(tdb, TDB_NOSYNC);
-               if (flags[i] & TDB_INTERNAL)
-                       ok1(tap_log_messages == 1);
-               else {
-                       ok1(tap_log_messages == 0);
-                       ok1(!(tdb_get_flags(tdb) & TDB_NOSYNC));
-               }
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-check-callback.c b/ccan/tdb2/test/api-check-callback.c
deleted file mode 100644 (file)
index 1ea263d..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-#define NUM_RECORDS 1000
-
-static bool store_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < NUM_RECORDS; i++)
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       return true;
-}
-
-static enum TDB_ERROR check(struct tdb_data key,
-                           struct tdb_data data,
-                           bool *array)
-{
-       int val;
-
-       if (key.dsize != sizeof(val)) {
-               diag("Wrong key size: %zu\n", key.dsize);
-               return TDB_ERR_CORRUPT;
-       }
-
-       if (key.dsize != data.dsize
-           || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) {
-               diag("Key and data differ\n");
-               return TDB_ERR_CORRUPT;
-       }
-
-       memcpy(&val, key.dptr, sizeof(val));
-       if (val >= NUM_RECORDS || val < 0) {
-               diag("check value %i\n", val);
-               return TDB_ERR_CORRUPT;
-       }
-
-       if (array[val]) {
-               diag("Value %i already seen\n", val);
-               return TDB_ERR_CORRUPT;
-       }
-
-       array[val] = true;
-       return TDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               bool array[NUM_RECORDS];
-
-               tdb = tdb_open("run-check-callback.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(store_records(tdb));
-               for (j = 0; j < NUM_RECORDS; j++)
-                       array[j] = false;
-               ok1(tdb_check(tdb, check, array) == TDB_SUCCESS);
-               for (j = 0; j < NUM_RECORDS; j++)
-                       if (!array[j])
-                               break;
-               ok1(j == NUM_RECORDS);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-firstkey-nextkey.c b/ccan/tdb2/test/api-firstkey-nextkey.c
deleted file mode 100644 (file)
index e0374d8..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-#define NUM_RECORDS 1000
-
-static bool store_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < NUM_RECORDS; i++)
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       return true;
-}
-
-struct trav_data {
-       unsigned int records[NUM_RECORDS];
-       unsigned int calls;
-};
-
-static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
-{
-       struct trav_data *td = p;
-       int val;
-
-       memcpy(&val, dbuf.dptr, dbuf.dsize);
-       td->records[td->calls++] = val;
-       return 0;
-}
-
-/* Since tdb_nextkey frees dptr, we need to clone it. */
-static TDB_DATA dup_key(TDB_DATA key)
-{
-       void *p = malloc(key.dsize);
-       memcpy(p, key.dptr, key.dsize);
-       key.dptr = p;
-       return key;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       int num;
-       struct trav_data td;
-       TDB_DATA k;
-       struct tdb_context *tdb;
-       union tdb_attribute seed_attr;
-       enum TDB_ERROR ecode;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
-       seed_attr.base.next = &tap_log_attr;
-       seed_attr.seed.seed = 6334326220117065685ULL;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("api-firstkey-nextkey.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600,
-                              flags[i] & TDB_VERSION1 ? NULL : &seed_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(tdb_firstkey(tdb, &k) == TDB_ERR_NOEXIST);
-
-               /* One entry... */
-               k.dptr = (unsigned char *)&num;
-               k.dsize = sizeof(num);
-               num = 0;
-               ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0);
-               ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS);
-               ok1(k.dsize == sizeof(num));
-               ok1(memcmp(k.dptr, &num, sizeof(num)) == 0);
-               ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST);
-
-               /* Two entries. */
-               k.dptr = (unsigned char *)&num;
-               k.dsize = sizeof(num);
-               num = 1;
-               ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0);
-               ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS);
-               ok1(k.dsize == sizeof(num));
-               memcpy(&num, k.dptr, sizeof(num));
-               ok1(num == 0 || num == 1);
-               ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS);
-               ok1(k.dsize == sizeof(j));
-               memcpy(&j, k.dptr, sizeof(j));
-               ok1(j == 0 || j == 1);
-               ok1(j != num);
-               ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST);
-
-               /* Clean up. */
-               k.dptr = (unsigned char *)&num;
-               k.dsize = sizeof(num);
-               num = 0;
-               ok1(tdb_delete(tdb, k) == 0);
-               num = 1;
-               ok1(tdb_delete(tdb, k) == 0);
-
-               /* Now lots of records. */
-               ok1(store_records(tdb));
-               td.calls = 0;
-
-               num = tdb_traverse(tdb, trav, &td);
-               ok1(num == NUM_RECORDS);
-               ok1(td.calls == NUM_RECORDS);
-
-               /* Simple loop should match tdb_traverse */
-               for (j = 0, ecode = tdb_firstkey(tdb, &k); j < td.calls; j++) {
-                       int val;
-
-                       ok1(ecode == TDB_SUCCESS);
-                       ok1(k.dsize == sizeof(val));
-                       memcpy(&val, k.dptr, k.dsize);
-                       ok1(td.records[j] == val);
-                       ecode = tdb_nextkey(tdb, &k);
-               }
-
-               /* But arbitrary orderings should work too. */
-               for (j = td.calls-1; j > 0; j--) {
-                       k.dptr = (unsigned char *)&td.records[j-1];
-                       k.dsize = sizeof(td.records[j-1]);
-                       k = dup_key(k);
-                       ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS);
-                       ok1(k.dsize == sizeof(td.records[j]));
-                       ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0);
-                       free(k.dptr);
-               }
-
-               /* Even delete should work. */
-               for (j = 0, ecode = tdb_firstkey(tdb, &k);
-                    ecode != TDB_ERR_NOEXIST;
-                    j++) {
-                       ok1(ecode == TDB_SUCCESS);
-                       ok1(k.dsize == 4);
-                       ok1(tdb_delete(tdb, k) == 0);
-                       ecode = tdb_nextkey(tdb, &k);
-               }
-
-               diag("delete using first/nextkey gave %u of %u records",
-                    j, NUM_RECORDS);
-               ok1(j == NUM_RECORDS);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-fork-test.c b/ccan/tdb2/test/api-fork-test.c
deleted file mode 100644 (file)
index 6feb618..0000000
+++ /dev/null
@@ -1,204 +0,0 @@
-/* Test forking while holding lock.
- *
- * There are only five ways to do this currently:
- * (1) grab a tdb_chainlock, then fork.
- * (2) grab a tdb_lockall, then fork.
- * (3) grab a tdb_lockall_read, then fork.
- * (4) start a transaction, then fork.
- * (5) fork from inside a tdb_parse() callback.
- *
- * Note that we don't hold a lock across tdb_traverse callbacks, so
- * that doesn't matter.
- */
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include "logging.h"
-
-static enum TDB_ERROR fork_in_parse(TDB_DATA key, TDB_DATA data,
-                                   struct tdb_context *tdb)
-{
-       int status, extra_messages;
-
-       if (tdb_get_flags(tdb) & TDB_VERSION1) {
-               extra_messages = 1;
-       } else {
-               extra_messages = 0;
-       }
-
-       if (fork() == 0) {
-               /* We expect this to fail. */
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
-                       exit(1);
-               tap_log_messages -= extra_messages;
-
-               if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
-                       exit(1);
-
-               tap_log_messages -= extra_messages;
-               if (tap_log_messages != 2)
-                       exit(2);
-
-               tdb_close(tdb);
-               if (tap_log_messages != 2)
-                       exit(3);
-               exit(0);
-       }
-       wait(&status);
-       ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-       return TDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               int status, extra_messages;
-
-               if (flags[i] & TDB_VERSION1) {
-                       extra_messages = 1;
-               } else {
-                       extra_messages = 0;
-               }
-
-               tap_log_messages = 0;
-
-               tdb = tdb_open("run-fork-test.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       continue;
-
-               /* Put a record in here. */
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_SUCCESS);
-
-               ok1(tdb_chainlock(tdb, key) == TDB_SUCCESS);
-               if (fork() == 0) {
-                       /* We expect this to fail. */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tap_log_messages != 2)
-                               return 2;
-
-                       tdb_chainunlock(tdb, key);
-                       if (tap_log_messages != 3)
-                               return 3;
-                       tdb_close(tdb);
-                       if (tap_log_messages != 3)
-                               return 4;
-                       return 0;
-               }
-               wait(&status);
-               ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_chainunlock(tdb, key);
-
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
-               if (fork() == 0) {
-                       /* We expect this to fail. */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tap_log_messages != 2)
-                               return 2;
-
-                       tdb_unlockall(tdb);
-                       if (tap_log_messages != 2)
-                               return 3;
-                       tdb_close(tdb);
-                       if (tap_log_messages != 2)
-                               return 4;
-                       return 0;
-               }
-               wait(&status);
-               ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_unlockall(tdb);
-
-               ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
-               if (fork() == 0) {
-                       /* We expect this to fail. */
-                       /* This would always fail anyway... */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tap_log_messages != 2)
-                               return 2;
-
-                       tdb_unlockall_read(tdb);
-                       if (tap_log_messages != 2)
-                               return 3;
-                       tdb_close(tdb);
-                       if (tap_log_messages != 2)
-                               return 4;
-                       return 0;
-               }
-               wait(&status);
-               ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_unlockall_read(tdb);
-
-               ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-               /* If transactions is empty, noop "commit" succeeds. */
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-               if (fork() == 0) {
-                       /* We expect this to fail. */
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
-                               return 1;
-                       tap_log_messages -= extra_messages;
-
-                       if (tap_log_messages != 2)
-                               return 2;
-
-                       if (tdb_transaction_commit(tdb) != TDB_ERR_LOCK)
-                               return 3;
-                       tap_log_messages -= extra_messages;
-
-                       tdb_close(tdb);
-                       if (tap_log_messages < 3)
-                               return 4;
-                       return 0;
-               }
-               wait(&status);
-               ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
-               tdb_transaction_cancel(tdb);
-
-               ok1(tdb_parse_record(tdb, key, fork_in_parse, tdb)
-                   == TDB_SUCCESS);
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-locktimeout.c b/ccan/tdb2/test/api-locktimeout.c
deleted file mode 100644 (file)
index 21a26c4..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <errno.h>
-#include "logging.h"
-#include "external-agent.h"
-
-#undef alarm
-#define alarm fast_alarm
-
-/* Speed things up by doing things in milliseconds. */
-static unsigned int fast_alarm(unsigned int milli_seconds)
-{
-       struct itimerval it;
-
-       it.it_interval.tv_sec = it.it_interval.tv_usec = 0;
-       it.it_value.tv_sec = milli_seconds / 1000;
-       it.it_value.tv_usec = milli_seconds * 1000;
-       setitimer(ITIMER_REAL, &it, NULL);
-       return 0;
-}
-
-#define CatchSignal(sig, handler) signal((sig), (handler))
-
-static void do_nothing(int signum)
-{
-}
-
-/* This example code is taken from SAMBA, so try not to change it. */
-static struct flock flock_struct;
-
-/* Return a value which is none of v1, v2 or v3. */
-static inline short int invalid_value(short int v1, short int v2, short int v3)
-{
-       short int try = (v1+v2+v3)^((v1+v2+v3) << 16);
-       while (try == v1 || try == v2 || try == v3)
-               try++;
-       return try;
-}
-
-/* We invalidate in as many ways as we can, so the OS rejects it */
-static void invalidate_flock_struct(int signum)
-{
-       flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK);
-       flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END);
-       flock_struct.l_start = -1;
-       /* A large negative. */
-       flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1);
-}
-
-static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
-                       void *_timeout)
-{
-       int ret, saved_errno = errno;
-       unsigned int timeout = *(unsigned int *)_timeout;
-
-       flock_struct.l_type = rw;
-       flock_struct.l_whence = SEEK_SET;
-       flock_struct.l_start = off;
-       flock_struct.l_len = len;
-
-       CatchSignal(SIGALRM, invalidate_flock_struct);
-       alarm(timeout);
-
-       for (;;) {
-               if (waitflag)
-                       ret = fcntl(fd, F_SETLKW, &flock_struct);
-               else
-                       ret = fcntl(fd, F_SETLK, &flock_struct);
-
-               if (ret == 0)
-                       break;
-
-               /* Not signalled?  Something else went wrong. */
-               if (flock_struct.l_len == len) {
-                       if (errno == EAGAIN || errno == EINTR)
-                               continue;
-                       saved_errno = errno;
-                       break;
-               } else {
-                       saved_errno = EINTR;
-                       break;
-               }
-       }
-
-       alarm(0);
-       errno = saved_errno;
-       return ret;
-}
-
-static int tdb_chainlock_with_timeout_internal(struct tdb_context *tdb,
-                                              TDB_DATA key,
-                                              unsigned int timeout,
-                                              int rw_type)
-{
-       union tdb_attribute locking;
-       enum TDB_ERROR ecode;
-
-       if (timeout) {
-               locking.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ecode = tdb_get_attribute(tdb, &locking);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-
-               /* Replace locking function with our own. */
-               locking.flock.data = &timeout;
-               locking.flock.lock = timeout_lock;
-
-               ecode = tdb_set_attribute(tdb, &locking);
-               if (ecode != TDB_SUCCESS)
-                       return ecode;
-       }
-       if (rw_type == F_RDLCK)
-               ecode = tdb_chainlock_read(tdb, key);
-       else
-               ecode = tdb_chainlock(tdb, key);
-
-       if (timeout) {
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-       }
-       return ecode;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       TDB_DATA key = tdb_mkdata("hello", 5);
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct agent *agent;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 15);
-
-       agent = prepare_external_agent();
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               enum TDB_ERROR ecode;
-               tdb = tdb_open("run-locktimeout.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       break;
-
-               /* Simple cases: should succeed. */
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
-                                                           F_RDLCK);
-               ok1(ecode == TDB_SUCCESS);
-               ok1(tap_log_messages == 0);
-
-               tdb_chainunlock_read(tdb, key);
-               ok1(tap_log_messages == 0);
-
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
-                                                           F_WRLCK);
-               ok1(ecode == TDB_SUCCESS);
-               ok1(tap_log_messages == 0);
-
-               tdb_chainunlock(tdb, key);
-               ok1(tap_log_messages == 0);
-
-               /* OK, get agent to start transaction, then we should time out. */
-               ok1(external_agent_operation(agent, OPEN, "run-locktimeout.tdb")
-                   == SUCCESS);
-               ok1(external_agent_operation(agent, TRANSACTION_START, "")
-                   == SUCCESS);
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
-                                                           F_WRLCK);
-               ok1(ecode == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-
-               /* Even if we get a different signal, should be fine. */
-               CatchSignal(SIGUSR1, do_nothing);
-               external_agent_operation(agent, SEND_SIGNAL, "");
-               ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
-                                                           F_WRLCK);
-               ok1(ecode == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 0);
-
-               ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "")
-                   == SUCCESS);
-               ok1(external_agent_operation(agent, CLOSE, "")
-                   == SUCCESS);
-               tdb_close(tdb);
-       }
-       free_external_agent(agent);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-missing-entries.c b/ccan/tdb2/test/api-missing-entries.c
deleted file mode 100644 (file)
index 0b21e1e..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Another test revealed that we lost an entry.  This reproduces it. */
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/hash/hash.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-#define NUM_RECORDS 1189
-
-/* We use the same seed which we saw this failure on. */
-static uint64_t failhash(const void *key, size_t len, uint64_t seed, void *p)
-{
-       seed = 699537674708983027ULL;
-       return hash64_stable((const unsigned char *)key, len, seed);
-}
-
-int main(int argc, char *argv[])
-{
-       int i;
-       struct tdb_context *tdb;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = failhash } };
-
-       hattr.base.next = &tap_log_attr;
-       plan_tests(1 + NUM_RECORDS + 2);
-
-       tdb = tdb_open("run-missing-entries.tdb", TDB_INTERNAL,
-                      O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-       if (ok1(tdb)) {
-               for (i = 0; i < NUM_RECORDS; i++) {
-                       ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-open-multiple-times.c b/ccan/tdb2/test/api-open-multiple-times.c
deleted file mode 100644 (file)
index 1656206..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, extra_messages;
-       struct tdb_context *tdb, *tdb2;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 28);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-open-multiple-times.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               if (flags[i] & TDB_VERSION1) {
-                       extra_messages = 1;
-               } else {
-                       extra_messages = 0;
-               }
-               tdb2 = tdb_open("run-open-multiple-times.tdb", flags[i],
-                               O_RDWR|O_CREAT, 0600, &tap_log_attr);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_check(tdb2, NULL, NULL) == 0);
-
-               /* Store in one, fetch in the other. */
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-               ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS);
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-
-               /* Vice versa, with delete. */
-               ok1(tdb_delete(tdb2, key) == 0);
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST);
-
-               /* OK, now close first one, check second still good. */
-               ok1(tdb_close(tdb) == 0);
-
-               ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == 0);
-               ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS);
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-
-               /* Reopen */
-               tdb = tdb_open("run-open-multiple-times.tdb", flags[i],
-                              O_RDWR|O_CREAT, 0600, &tap_log_attr);
-               ok1(tdb);
-
-               ok1(tdb_transaction_start(tdb2) == 0);
-
-               /* Anything in the other one should fail. */
-               ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
-               tap_log_messages -= extra_messages;
-               ok1(tap_log_messages == 1);
-               ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
-               tap_log_messages -= extra_messages;
-               ok1(tap_log_messages == 2);
-               ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
-               ok1(tap_log_messages == 3);
-               ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
-               tap_log_messages -= extra_messages;
-               ok1(tap_log_messages == 4);
-
-               /* Transaciton should work as normal. */
-               ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == TDB_SUCCESS);
-
-               /* Now... try closing with locks held. */
-               ok1(tdb_close(tdb2) == 0);
-
-               ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-               ok1(tdb_close(tdb) == 0);
-               ok1(tap_log_messages == 4);
-               tap_log_messages = 0;
-       }
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-record-expand.c b/ccan/tdb2/test/api-record-expand.c
deleted file mode 100644 (file)
index 48ad1cd..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-#define MAX_SIZE 10000
-#define SIZE_STEP 131
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data;
-
-       data.dptr = malloc(MAX_SIZE);
-       memset(data.dptr, 0x24, MAX_SIZE);
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-record-expand.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               data.dsize = 0;
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               for (data.dsize = 0;
-                    data.dsize < MAX_SIZE;
-                    data.dsize += SIZE_STEP) {
-                       memset(data.dptr, data.dsize, data.dsize);
-                       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-               }
-               tdb_close(tdb);
-       }
-       ok1(tap_log_messages == 0);
-       free(data.dptr);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-simple-delete.c b/ccan/tdb2/test/api-simple-delete.c
deleted file mode 100644 (file)
index a5b65d6..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-simple-delete.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (tdb) {
-                       /* Delete should fail. */
-                       ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       /* Insert should succeed. */
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       /* Delete should now work. */
-                       ok1(tdb_delete(tdb, key) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       tdb_close(tdb);
-               }
-       }
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-summary.c b/ccan/tdb2/test/api-summary.c
deleted file mode 100644 (file)
index e0e292e..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
-       char *summary;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-summary.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Put some stuff in there. */
-               for (j = 0; j < 500; j++) {
-                       /* Make sure padding varies to we get some graphs! */
-                       data.dsize = j % (sizeof(j) + 1);
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                               fail("Storing in tdb");
-               }
-
-               for (j = 0;
-                    j <= TDB_SUMMARY_HISTOGRAMS;
-                    j += TDB_SUMMARY_HISTOGRAMS) {
-                       ok1(tdb_summary(tdb, j, &summary) == TDB_SUCCESS);
-                       ok1(strstr(summary, "Number of records: 500\n"));
-                       ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
-                       ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
-                       if (!(flags[i] & TDB_VERSION1)
-                           && j == TDB_SUMMARY_HISTOGRAMS) {
-                               ok1(strstr(summary, "|")
-                                   && strstr(summary, "*"));
-                       } else {
-                               ok1(!strstr(summary, "|")
-                                   && !strstr(summary, "*"));
-                       }
-                       free(summary);
-               }
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/api-tdb1-flag-removal.c b/ccan/tdb2/test/api-tdb1-flag-removal.c
deleted file mode 100644 (file)
index 28f24e6..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#include <ccan/tdb2/tdb2.h>
-#include <ccan/tap/tap.h>
-#include <ccan/hash/hash.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-12-store.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       continue;
-
-               tdb_close(tdb);
-
-               tdb = tdb_open("run-12-store.tdb", flags[i] | TDB_VERSION1,
-                              O_RDWR, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       continue;
-               /* It's not a version1 */
-               ok1(!(tdb_get_flags(tdb) & TDB_VERSION1));
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/external-agent.c b/ccan/tdb2/test/external-agent.c
deleted file mode 100644 (file)
index 01c7106..0000000
+++ /dev/null
@@ -1,256 +0,0 @@
-#include "external-agent.h"
-#include "logging.h"
-#include "lock-tracking.h"
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <err.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-#include <errno.h>
-#include <ccan/tdb2/tdb1_private.h>
-#include <ccan/tap/tap.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-static struct tdb_context *tdb;
-
-void (*external_agent_free)(void *) = free;
-
-static enum TDB_ERROR clear_if_first(int fd, void *arg)
-{
-/* We hold a lock offset 4 always, so we can tell if anyone is holding it.
- * (This is compatible with tdb1's TDB_CLEAR_IF_FIRST flag).  */
-       struct flock fl;
-
-       fl.l_type = F_WRLCK;
-       fl.l_whence = SEEK_SET;
-       fl.l_start = 4;
-       fl.l_len = 1;
-
-       if (fcntl(fd, F_SETLK, &fl) == 0) {
-               /* We must be first ones to open it! */
-               diag("agent truncating file!");
-               if (ftruncate(fd, 0) != 0) {
-                       return TDB_ERR_IO;
-               }
-       }
-       fl.l_type = F_RDLCK;
-       if (fcntl(fd, F_SETLKW, &fl) != 0) {
-               return TDB_ERR_IO;
-       }
-       return TDB_SUCCESS;
-}
-
-static enum agent_return do_operation(enum operation op, const char *name)
-{
-       TDB_DATA k;
-       enum agent_return ret;
-       TDB_DATA data;
-       enum TDB_ERROR ecode;
-       union tdb_attribute cif;
-
-       if (op != OPEN && op != OPEN_WITH_HOOK && !tdb) {
-               diag("external: No tdb open!");
-               return OTHER_FAILURE;
-       }
-
-       diag("external: %s", operation_name(op));
-
-       k = tdb_mkdata(name, strlen(name));
-
-       locking_would_block = 0;
-       switch (op) {
-       case OPEN:
-               if (tdb) {
-                       diag("Already have tdb %s open", tdb->name);
-                       return OTHER_FAILURE;
-               }
-               tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &tap_log_attr);
-               if (!tdb) {
-                       if (!locking_would_block)
-                               diag("Opening tdb gave %s", strerror(errno));
-                       forget_locking();
-                       ret = OTHER_FAILURE;
-               } else
-                       ret = SUCCESS;
-               break;
-       case OPEN_WITH_HOOK:
-               if (tdb) {
-                       diag("Already have tdb %s open", tdb->name);
-                       return OTHER_FAILURE;
-               }
-               cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK;
-               cif.openhook.base.next = &tap_log_attr;
-               cif.openhook.fn = clear_if_first;
-               tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &cif);
-               if (!tdb) {
-                       if (!locking_would_block)
-                               diag("Opening tdb gave %s", strerror(errno));
-                       forget_locking();
-                       ret = OTHER_FAILURE;
-               } else
-                       ret = SUCCESS;
-               break;
-       case FETCH:
-               ecode = tdb_fetch(tdb, k, &data);
-               if (ecode == TDB_ERR_NOEXIST) {
-                       ret = FAILED;
-               } else if (ecode < 0) {
-                       ret = OTHER_FAILURE;
-               } else if (!tdb_deq(data, k)) {
-                       ret = OTHER_FAILURE;
-                       external_agent_free(data.dptr);
-               } else {
-                       ret = SUCCESS;
-                       external_agent_free(data.dptr);
-               }
-               break;
-       case STORE:
-               ret = tdb_store(tdb, k, k, 0) == 0 ? SUCCESS : OTHER_FAILURE;
-               break;
-       case TRANSACTION_START:
-               ret = tdb_transaction_start(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
-               break;
-       case TRANSACTION_COMMIT:
-               ret = tdb_transaction_commit(tdb)==0 ? SUCCESS : OTHER_FAILURE;
-               break;
-       case NEEDS_RECOVERY:
-               if (tdb->flags & TDB_VERSION1)
-                       ret = tdb1_needs_recovery(tdb) ? SUCCESS : FAILED;
-               else
-                       ret = tdb_needs_recovery(tdb) ? SUCCESS : FAILED;
-               break;
-       case CHECK:
-               ret = tdb_check(tdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE;
-               break;
-       case CLOSE:
-               ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
-               tdb = NULL;
-               break;
-       case SEND_SIGNAL:
-               /* We do this async */
-               ret = SUCCESS;
-               break;
-       default:
-               ret = OTHER_FAILURE;
-       }
-
-       if (locking_would_block)
-               ret = WOULD_HAVE_BLOCKED;
-
-       return ret;
-}
-
-struct agent {
-       int cmdfd, responsefd;
-};
-
-/* Do this before doing any tdb stuff.  Return handle, or NULL. */
-struct agent *prepare_external_agent(void)
-{
-       int pid, ret;
-       int command[2], response[2];
-       char name[1+PATH_MAX];
-
-       if (pipe(command) != 0 || pipe(response) != 0)
-               return NULL;
-
-       pid = fork();
-       if (pid < 0)
-               return NULL;
-
-       if (pid != 0) {
-               struct agent *agent = malloc(sizeof(*agent));
-
-               close(command[0]);
-               close(response[1]);
-               agent->cmdfd = command[1];
-               agent->responsefd = response[0];
-               return agent;
-       }
-
-       close(command[1]);
-       close(response[0]);
-
-       /* We want to fail, not block. */
-       nonblocking_locks = true;
-       log_prefix = "external: ";
-       while ((ret = read(command[0], name, sizeof(name))) > 0) {
-               enum agent_return result;
-
-               result = do_operation(name[0], name+1);
-               if (write(response[1], &result, sizeof(result))
-                   != sizeof(result))
-                       err(1, "Writing response");
-               if (name[0] == SEND_SIGNAL) {
-                       struct timeval ten_ms;
-                       ten_ms.tv_sec = 0;
-                       ten_ms.tv_usec = 10000;
-                       select(0, NULL, NULL, NULL, &ten_ms);
-                       kill(getppid(), SIGUSR1);
-               }
-       }
-       exit(0);
-}
-
-/* Ask the external agent to try to do an operation. */
-enum agent_return external_agent_operation(struct agent *agent,
-                                          enum operation op,
-                                          const char *name)
-{
-       enum agent_return res;
-       unsigned int len;
-       char *string;
-
-       if (!name)
-               name = "";
-       len = 1 + strlen(name) + 1;
-       string = malloc(len);
-
-       string[0] = op;
-       strcpy(string+1, name);
-
-       if (write(agent->cmdfd, string, len) != len
-           || read(agent->responsefd, &res, sizeof(res)) != sizeof(res))
-               res = AGENT_DIED;
-
-       free(string);
-       return res;
-}
-
-const char *agent_return_name(enum agent_return ret)
-{
-       return ret == SUCCESS ? "SUCCESS"
-               : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED"
-               : ret == AGENT_DIED ? "AGENT_DIED"
-               : ret == FAILED ? "FAILED"
-               : ret == OTHER_FAILURE ? "OTHER_FAILURE"
-               : "**INVALID**";
-}
-
-const char *operation_name(enum operation op)
-{
-       switch (op) {
-       case OPEN: return "OPEN";
-       case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK";
-       case FETCH: return "FETCH";
-       case STORE: return "STORE";
-       case CHECK: return "CHECK";
-       case TRANSACTION_START: return "TRANSACTION_START";
-       case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT";
-       case NEEDS_RECOVERY: return "NEEDS_RECOVERY";
-       case SEND_SIGNAL: return "SEND_SIGNAL";
-       case CLOSE: return "CLOSE";
-       }
-       return "**INVALID**";
-}
-
-void free_external_agent(struct agent *agent)
-{
-       close(agent->cmdfd);
-       close(agent->responsefd);
-       free(agent);
-}
diff --git a/ccan/tdb2/test/external-agent.h b/ccan/tdb2/test/external-agent.h
deleted file mode 100644 (file)
index 9d25c58..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef TDB2_TEST_EXTERNAL_AGENT_H
-#define TDB2_TEST_EXTERNAL_AGENT_H
-
-/* For locking tests, we need a different process to try things at
- * various times. */
-enum operation {
-       OPEN,
-       OPEN_WITH_HOOK,
-       FETCH,
-       STORE,
-       TRANSACTION_START,
-       TRANSACTION_COMMIT,
-       NEEDS_RECOVERY,
-       CHECK,
-       SEND_SIGNAL,
-       CLOSE,
-};
-
-/* Do this before doing any tdb stuff.  Return handle, or -1. */
-struct agent *prepare_external_agent(void);
-
-enum agent_return {
-       SUCCESS,
-       WOULD_HAVE_BLOCKED,
-       AGENT_DIED,
-       FAILED, /* For fetch, or NEEDS_RECOVERY */
-       OTHER_FAILURE,
-};
-
-/* Ask the external agent to try to do an operation. 
- * name == tdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
- * record name for FETCH/STORE (store stores name as data too)
- */
-enum agent_return external_agent_operation(struct agent *handle,
-                                          enum operation op,
-                                          const char *name);
-
-/* Hook into free() on tdb_data in external agent. */
-void (*external_agent_free)(void *);
-
-/* Mapping enum -> string. */
-const char *agent_return_name(enum agent_return ret);
-const char *operation_name(enum operation op);
-
-void free_external_agent(struct agent *agent);
-#endif /* TDB2_TEST_EXTERNAL_AGENT_H */
diff --git a/ccan/tdb2/test/failtest_helper.c b/ccan/tdb2/test/failtest_helper.c
deleted file mode 100644 (file)
index ab79de1..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "failtest_helper.h"
-#include "logging.h"
-#include <string.h>
-#include <ccan/tap/tap.h>
-
-bool failtest_suppress = false;
-
-/* FIXME: From ccan/str */
-static inline bool strends(const char *str, const char *postfix)
-{
-       if (strlen(str) < strlen(postfix))
-               return false;
-
-       return !strcmp(str + strlen(str) - strlen(postfix), postfix);
-}
-
-bool failmatch(const struct failtest_call *call,
-              const char *file, int line, enum failtest_call_type type)
-{
-       return call->type == type
-               && call->line == line
-               && ((strcmp(call->file, file) == 0)
-                   || (strends(call->file, file)
-                       && (call->file[strlen(call->file) - strlen(file) - 1]
-                           == '/')));
-}
-
-static bool is_nonblocking_lock(const struct failtest_call *call)
-{
-       return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK;
-}
-
-static bool is_unlock(const struct failtest_call *call)
-{
-       return call->type == FAILTEST_FCNTL
-               && call->u.fcntl.arg.fl.l_type == F_UNLCK;
-}
-
-bool exit_check_log(struct tlist_calls *history)
-{
-       const struct failtest_call *i;
-
-       tlist_for_each(history, i, list) {
-               if (!i->fail)
-                       continue;
-               /* Failing the /dev/urandom open doesn't count: we fall back. */
-               if (failmatch(i, URANDOM_OPEN))
-                       continue;
-
-               /* Similarly with read fail. */
-               if (failmatch(i, URANDOM_READ))
-                       continue;
-
-               /* Initial allocation of tdb doesn't log. */
-               if (failmatch(i, INITIAL_TDB_MALLOC))
-                       continue;
-
-               /* We don't block "failures" on non-blocking locks. */
-               if (is_nonblocking_lock(i))
-                       continue;
-
-               if (!tap_log_messages)
-                       diag("We didn't log for %s:%u", i->file, i->line);
-               return tap_log_messages != 0;
-       }
-       return true;
-}
-
-/* Some places we soldier on despite errors: only fail them once. */
-enum failtest_result
-block_repeat_failures(struct tlist_calls *history)
-{
-       const struct failtest_call *last;
-
-       last = tlist_tail(history, list);
-
-       if (failtest_suppress)
-               return FAIL_DONT_FAIL;
-
-       if (failmatch(last, INITIAL_TDB_MALLOC)
-           || failmatch(last, URANDOM_OPEN)
-           || failmatch(last, URANDOM_READ)) {
-               return FAIL_PROBE;
-       }
-
-       /* We handle mmap failing, by falling back to read/write, so
-        * don't try all possible paths. */
-       if (last->type == FAILTEST_MMAP)
-               return FAIL_PROBE;
-
-       /* Unlock or non-blocking lock is fail-once. */
-       if (is_unlock(last) || is_nonblocking_lock(last))
-               return FAIL_PROBE;
-
-       return FAIL_OK;
-}
diff --git a/ccan/tdb2/test/failtest_helper.h b/ccan/tdb2/test/failtest_helper.h
deleted file mode 100644 (file)
index 4130aff..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef TDB2_TEST_FAILTEST_HELPER_H
-#define TDB2_TEST_FAILTEST_HELPER_H
-#include <ccan/failtest/failtest.h>
-#include <stdbool.h>
-
-/* FIXME: Check these! */
-#define INITIAL_TDB_MALLOC     "open.c", 445, FAILTEST_MALLOC
-#define URANDOM_OPEN           "open.c", 62, FAILTEST_OPEN
-#define URANDOM_READ           "open.c", 42, FAILTEST_READ
-
-bool exit_check_log(struct tlist_calls *history);
-bool failmatch(const struct failtest_call *call,
-              const char *file, int line, enum failtest_call_type type);
-enum failtest_result block_repeat_failures(struct tlist_calls *history);
-
-/* Set this to suppress failure. */
-extern bool failtest_suppress;
-
-#endif /* TDB2_TEST_LOGGING_H */
diff --git a/ccan/tdb2/test/jenkins-be-hash.tdb1 b/ccan/tdb2/test/jenkins-be-hash.tdb1
deleted file mode 100644 (file)
index b652840..0000000
Binary files a/ccan/tdb2/test/jenkins-be-hash.tdb1 and /dev/null differ
diff --git a/ccan/tdb2/test/jenkins-le-hash.tdb1 b/ccan/tdb2/test/jenkins-le-hash.tdb1
deleted file mode 100644 (file)
index 007e0a3..0000000
Binary files a/ccan/tdb2/test/jenkins-le-hash.tdb1 and /dev/null differ
diff --git a/ccan/tdb2/test/layout.c b/ccan/tdb2/test/layout.c
deleted file mode 100644 (file)
index ae37f56..0000000
+++ /dev/null
@@ -1,402 +0,0 @@
-/* TDB tools to create various canned database layouts. */
-#include "layout.h"
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <err.h>
-#include "logging.h"
-
-struct tdb_layout *new_tdb_layout(void)
-{
-       struct tdb_layout *layout = malloc(sizeof(*layout));
-       layout->num_elems = 0;
-       layout->elem = NULL;
-       return layout;
-}
-
-static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
-{
-       layout->elem = realloc(layout->elem,
-                              sizeof(layout->elem[0])
-                              * (layout->num_elems+1));
-       layout->elem[layout->num_elems++] = elem;
-}
-
-void tdb_layout_add_freetable(struct tdb_layout *layout)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = FREETABLE;
-       add(layout, elem);
-}
-
-void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
-                        unsigned ftable)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = FREE;
-       elem.free.len = len;
-       elem.free.ftable_num = ftable;
-       add(layout, elem);
-}
-
-void tdb_layout_add_capability(struct tdb_layout *layout,
-                              uint64_t type,
-                              bool write_breaks,
-                              bool check_breaks,
-                              bool open_breaks,
-                              tdb_len_t extra)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = CAPABILITY;
-       elem.capability.type = type;
-       if (write_breaks)
-               elem.capability.type |= TDB_CAP_NOWRITE;
-       if (open_breaks)
-               elem.capability.type |= TDB_CAP_NOOPEN;
-       if (check_breaks)
-               elem.capability.type |= TDB_CAP_NOCHECK;
-       elem.capability.extra = extra;
-       add(layout, elem);
-}
-
-static struct tdb_data dup_key(struct tdb_data key)
-{
-       struct tdb_data ret;
-       ret.dsize = key.dsize;
-       ret.dptr = malloc(ret.dsize);
-       memcpy(ret.dptr, key.dptr, ret.dsize);
-       return ret;
-}
-
-void tdb_layout_add_used(struct tdb_layout *layout,
-                        TDB_DATA key, TDB_DATA data,
-                        tdb_len_t extra)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = DATA;
-       elem.used.key = dup_key(key);
-       elem.used.data = dup_key(data);
-       elem.used.extra = extra;
-       add(layout, elem);
-}
-
-static tdb_len_t free_record_len(tdb_len_t len)
-{
-       return sizeof(struct tdb_used_record) + len;
-}
-
-static tdb_len_t data_record_len(struct tle_used *used)
-{
-       tdb_len_t len;
-       len = sizeof(struct tdb_used_record)
-               + used->key.dsize + used->data.dsize + used->extra;
-       assert(len >= sizeof(struct tdb_free_record));
-       return len;
-}
-
-static tdb_len_t hashtable_len(struct tle_hashtable *htable)
-{
-       return sizeof(struct tdb_used_record)
-               + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
-               + htable->extra;
-}
-
-static tdb_len_t capability_len(struct tle_capability *cap)
-{
-       return sizeof(struct tdb_capability) + cap->extra;
-}
-
-static tdb_len_t freetable_len(struct tle_freetable *ftable)
-{
-       return sizeof(struct tdb_freetable);
-}
-
-static void set_free_record(void *mem, tdb_len_t len)
-{
-       /* We do all the work in add_to_freetable */
-}
-
-static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
-{
-       if (extra)
-               ((char *)(u + 1))[len] = '\0';
-}
-
-static void set_data_record(void *mem, struct tdb_context *tdb,
-                           struct tle_used *used)
-{
-       struct tdb_used_record *u = mem;
-
-       set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
-                  used->key.dsize + used->data.dsize + used->extra,
-                  tdb_hash(tdb, used->key.dptr, used->key.dsize));
-       memcpy(u + 1, used->key.dptr, used->key.dsize);
-       memcpy((char *)(u + 1) + used->key.dsize,
-              used->data.dptr, used->data.dsize);
-       add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
-}
-
-static void set_hashtable(void *mem, struct tdb_context *tdb,
-                         struct tle_hashtable *htable)
-{
-       struct tdb_used_record *u = mem;
-       tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
-
-       set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
-       memset(u + 1, 0, len);
-       add_zero_pad(u, len, htable->extra);
-}
-
-static void set_capability(void *mem, struct tdb_context *tdb,
-                          struct tle_capability *cap, struct tdb_header *hdr,
-                          tdb_off_t last_cap)
-{
-       struct tdb_capability *c = mem;
-       tdb_len_t len = sizeof(*c) - sizeof(struct tdb_used_record) + cap->extra;
-
-       c->type = cap->type;
-       c->next = 0;
-       set_header(tdb, &c->hdr, TDB_CAP_MAGIC, 0, len, len, 0);
-
-       /* Append to capability list. */
-       if (!last_cap) {
-               hdr->capabilities = cap->base.off;
-       } else {
-               c = (struct tdb_capability *)((char *)hdr + last_cap);
-               c->next = cap->base.off;
-       }
-}
-
-static void set_freetable(void *mem, struct tdb_context *tdb,
-                        struct tle_freetable *freetable, struct tdb_header *hdr,
-                        tdb_off_t last_ftable)
-{
-       struct tdb_freetable *ftable = mem;
-       memset(ftable, 0, sizeof(*ftable));
-       set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
-                       sizeof(*ftable) - sizeof(ftable->hdr),
-                       sizeof(*ftable) - sizeof(ftable->hdr), 0);
-
-       if (last_ftable) {
-               ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
-               ftable->next = freetable->base.off;
-       } else {
-               hdr->free_table = freetable->base.off;
-       }
-}
-
-static void add_to_freetable(struct tdb_context *tdb,
-                            tdb_off_t eoff,
-                            tdb_off_t elen,
-                            unsigned ftable,
-                            struct tle_freetable *freetable)
-{
-       tdb->tdb2.ftable_off = freetable->base.off;
-       tdb->tdb2.ftable = ftable;
-       add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
-                       TDB_LOCK_WAIT, false);
-}
-
-static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
-{
-       return group_start
-               + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
-}
-
-/* Get bits from a value. */
-static uint32_t bits(uint64_t val, unsigned start, unsigned num)
-{
-       assert(num <= 32);
-       return (val >> start) & ((1U << num) - 1);
-}
-
-/* We take bits from the top: that way we can lock whole sections of the hash
- * by using lock ranges. */
-static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
-{
-       *used += num;
-       return bits(h, 64 - *used, num);
-}
-
-static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
-                              uint64_t h)
-{
-       return bucket
-               | new_off
-               | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
-                                 TDB_OFF_UPPER_STEAL_EXTRA)
-                  << TDB_OFF_HASH_EXTRA_BIT);
-}
-
-/* FIXME: Our hash table handling here is primitive: we don't expand! */
-static void add_to_hashtable(struct tdb_context *tdb,
-                            tdb_off_t eoff,
-                            struct tdb_data key)
-{
-       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t b_off, group_start;
-       unsigned i, group, in_group;
-       unsigned used = 0;
-
-       group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
-       in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
-
-       group_start = offsetof(struct tdb_header, hashtable)
-               + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
-
-       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
-               unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
-
-               b_off = hbucket_off(group_start, bucket);               
-               if (tdb_read_off(tdb, b_off) == 0) {
-                       tdb_write_off(tdb, b_off,
-                                     encode_offset(eoff, in_group, h));
-                       return;
-               }
-       }
-       abort();
-}
-
-static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
-{
-       unsigned i;
-
-       for (i = 0; i < layout->num_elems; i++) {
-               if (layout->elem[i].base.type != FREETABLE)
-                       continue;
-               if (num == 0)
-                       return &layout->elem[i].ftable;
-               num--;
-       }
-       abort();
-}
-
-/* FIXME: Support TDB_CONVERT */
-struct tdb_context *tdb_layout_get(struct tdb_layout *layout,
-                                  void (*freefn)(void *),
-                                  union tdb_attribute *attr)
-{
-       unsigned int i;
-       tdb_off_t off, len, last_ftable, last_cap;
-       char *mem;
-       struct tdb_context *tdb;
-
-       off = sizeof(struct tdb_header);
-
-       /* First pass of layout: calc lengths */
-       for (i = 0; i < layout->num_elems; i++) {
-               union tdb_layout_elem *e = &layout->elem[i];
-               e->base.off = off;
-               switch (e->base.type) {
-               case FREETABLE:
-                       len = freetable_len(&e->ftable);
-                       break;
-               case FREE:
-                       len = free_record_len(e->free.len);
-                       break;
-               case DATA:
-                       len = data_record_len(&e->used);
-                       break;
-               case HASHTABLE:
-                       len = hashtable_len(&e->hashtable);
-                       break;
-               case CAPABILITY:
-                       len = capability_len(&e->capability);
-                       break;
-               default:
-                       abort();
-               }
-               off += len;
-       }
-
-       mem = malloc(off);
-       /* Fill with some weird pattern. */
-       memset(mem, 0x99, off);
-       /* Now populate our header, cribbing from a real TDB header. */
-       tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, attr);
-       memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
-
-       /* Mug the tdb we have to make it use this. */
-       freefn(tdb->file->map_ptr);
-       tdb->file->map_ptr = mem;
-       tdb->file->map_size = off;
-
-       last_ftable = 0;
-       last_cap = 0;
-       for (i = 0; i < layout->num_elems; i++) {
-               union tdb_layout_elem *e = &layout->elem[i];
-               switch (e->base.type) {
-               case FREETABLE:
-                       set_freetable(mem + e->base.off, tdb, &e->ftable,
-                                    (struct tdb_header *)mem, last_ftable);
-                       last_ftable = e->base.off;
-                       break;
-               case FREE:
-                       set_free_record(mem + e->base.off, e->free.len);
-                       break;
-               case DATA:
-                       set_data_record(mem + e->base.off, tdb, &e->used);
-                       break;
-               case HASHTABLE:
-                       set_hashtable(mem + e->base.off, tdb, &e->hashtable);
-                       break;
-               case CAPABILITY:
-                       set_capability(mem + e->base.off, tdb, &e->capability,
-                                      (struct tdb_header *)mem, last_cap);
-                       last_cap = e->base.off;
-                       break;
-               }
-       }
-       /* Must have a free table! */
-       assert(last_ftable);
-
-       /* Now fill the free and hash tables. */
-       for (i = 0; i < layout->num_elems; i++) {
-               union tdb_layout_elem *e = &layout->elem[i];
-               switch (e->base.type) {
-               case FREE:
-                       add_to_freetable(tdb, e->base.off, e->free.len,
-                                        e->free.ftable_num,
-                                        find_ftable(layout, e->free.ftable_num));
-                       break;
-               case DATA:
-                       add_to_hashtable(tdb, e->base.off, e->used.key);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       tdb->tdb2.ftable_off = find_ftable(layout, 0)->base.off;
-       return tdb;
-}
-
-void tdb_layout_write(struct tdb_layout *layout, void (*freefn)(void *),
-                      union tdb_attribute *attr, const char *filename)
-{
-       struct tdb_context *tdb = tdb_layout_get(layout, freefn, attr);
-       int fd;
-
-       fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT,  0600);
-       if (fd < 0)
-               err(1, "opening %s for writing", filename);
-       if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
-           != tdb->file->map_size)
-               err(1, "writing %s", filename);
-       close(fd);
-       tdb_close(tdb);
-}
-
-void tdb_layout_free(struct tdb_layout *layout)
-{
-       unsigned int i;
-
-       for (i = 0; i < layout->num_elems; i++) {
-               if (layout->elem[i].base.type == DATA) {
-                       free(layout->elem[i].used.key.dptr);
-                       free(layout->elem[i].used.data.dptr);
-               }
-       }
-       free(layout->elem);
-       free(layout);
-}
diff --git a/ccan/tdb2/test/layout.h b/ccan/tdb2/test/layout.h
deleted file mode 100644 (file)
index 9a71484..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef TDB2_TEST_LAYOUT_H
-#define TDB2_TEST_LAYOUT_H
-#include <ccan/tdb2/private.h>
-
-struct tdb_layout *new_tdb_layout(void);
-void tdb_layout_add_freetable(struct tdb_layout *layout);
-void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
-                        unsigned ftable);
-void tdb_layout_add_used(struct tdb_layout *layout,
-                        TDB_DATA key, TDB_DATA data,
-                        tdb_len_t extra);
-void tdb_layout_add_capability(struct tdb_layout *layout,
-                              uint64_t type,
-                              bool write_breaks,
-                              bool check_breaks,
-                              bool open_breaks,
-                              tdb_len_t extra);
-
-#if 0 /* FIXME: Allow allocation of subtables */
-void tdb_layout_add_hashtable(struct tdb_layout *layout,
-                             int htable_parent, /* -1 == toplevel */
-                             unsigned int bucket,
-                             tdb_len_t extra);
-#endif
-/* freefn is needed if we're using failtest_free. */
-struct tdb_context *tdb_layout_get(struct tdb_layout *layout,
-                                  void (*freefn)(void *),
-                                  union tdb_attribute *attr);
-void tdb_layout_write(struct tdb_layout *layout, void (*freefn)(void *),
-                      union tdb_attribute *attr, const char *filename);
-
-void tdb_layout_free(struct tdb_layout *layout);
-
-enum layout_type {
-       FREETABLE, FREE, DATA, HASHTABLE, CAPABILITY
-};
-
-/* Shared by all union members. */
-struct tle_base {
-       enum layout_type type;
-       tdb_off_t off;
-};
-
-struct tle_freetable {
-       struct tle_base base;
-};
-
-struct tle_free {
-       struct tle_base base;
-       tdb_len_t len;
-       unsigned ftable_num;
-};
-
-struct tle_used {
-       struct tle_base base;
-       TDB_DATA key;
-       TDB_DATA data;
-       tdb_len_t extra;
-};
-
-struct tle_hashtable {
-       struct tle_base base;
-       int parent;
-       unsigned int bucket;
-       tdb_len_t extra;
-};
-
-struct tle_capability {
-       struct tle_base base;
-       uint64_t type;
-       tdb_len_t extra;
-};
-
-union tdb_layout_elem {
-       struct tle_base base;
-       struct tle_freetable ftable;
-       struct tle_free free;
-       struct tle_used used;
-       struct tle_hashtable hashtable;
-       struct tle_capability capability;
-};
-
-struct tdb_layout {
-       unsigned int num_elems;
-       union tdb_layout_elem *elem;
-};
-#endif /* TDB2_TEST_LAYOUT_H */
diff --git a/ccan/tdb2/test/lock-tracking.c b/ccan/tdb2/test/lock-tracking.c
deleted file mode 100644 (file)
index e253db9..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/* We save the locks so we can reaquire them. */
-#include <ccan/tdb2/tdb1_private.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <ccan/tap/tap.h>
-#include "lock-tracking.h"
-
-struct lock {
-       struct lock *next;
-       unsigned int off;
-       unsigned int len;
-       int type;
-};
-static struct lock *locks;
-int locking_errors = 0;
-bool suppress_lockcheck = false;
-bool nonblocking_locks;
-int locking_would_block = 0;
-void (*unlock_callback)(int fd);
-
-int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
-{
-       va_list ap;
-       int ret, arg3;
-       struct flock *fl;
-       bool may_block = false;
-
-       if (cmd != F_SETLK && cmd != F_SETLKW) {
-               /* This may be totally bogus, but we don't know in general. */
-               va_start(ap, cmd);
-               arg3 = va_arg(ap, int);
-               va_end(ap);
-
-               return fcntl(fd, cmd, arg3);
-       }
-
-       va_start(ap, cmd);
-       fl = va_arg(ap, struct flock *);
-       va_end(ap);
-
-       if (cmd == F_SETLKW && nonblocking_locks) {
-               cmd = F_SETLK;
-               may_block = true;
-       }
-       ret = fcntl(fd, cmd, fl);
-
-       /* Detect when we failed, but might have been OK if we waited. */
-       if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) {
-               locking_would_block++;
-       }
-
-       if (fl->l_type == F_UNLCK) {
-               struct lock **l;
-               struct lock *old = NULL;
-
-               for (l = &locks; *l; l = &(*l)->next) {
-                       if ((*l)->off == fl->l_start
-                           && (*l)->len == fl->l_len) {
-                               if (ret == 0) {
-                                       old = *l;
-                                       *l = (*l)->next;
-                                       free(old);
-                               }
-                               break;
-                       }
-               }
-               if (!old && !suppress_lockcheck) {
-                       diag("Unknown unlock %u@%u - %i",
-                            (int)fl->l_len, (int)fl->l_start, ret);
-                       locking_errors++;
-               }
-       } else {
-               struct lock *new, *i;
-               unsigned int fl_end = fl->l_start + fl->l_len;
-               if (fl->l_len == 0)
-                       fl_end = (unsigned int)-1;
-
-               /* Check for overlaps: we shouldn't do this. */
-               for (i = locks; i; i = i->next) {
-                       unsigned int i_end = i->off + i->len;
-                       if (i->len == 0)
-                               i_end = (unsigned int)-1;
-
-                       if (fl->l_start >= i->off && fl->l_start < i_end)
-                               break;
-                       if (fl_end > i->off && fl_end < i_end)
-                               break;
-
-                       /* tdb_allrecord_lock does this, handle adjacent: */
-                       if (fl->l_start > TDB_HASH_LOCK_START
-                           && fl->l_start == i_end && fl->l_type == i->type) {
-                               if (ret == 0) {
-                                       i->len = fl->l_len
-                                               ? i->len + fl->l_len
-                                               : 0;
-                               }
-                               goto done;
-                       }
-               }
-               if (i) {
-                       /* Special case: upgrade of allrecord lock. */
-                       if (i->type == F_RDLCK && fl->l_type == F_WRLCK
-                           && i->off == TDB_HASH_LOCK_START
-                           && fl->l_start == TDB_HASH_LOCK_START
-                           && i->len == 0
-                           && fl->l_len == 0) {
-                               if (ret == 0)
-                                       i->type = F_WRLCK;
-                               goto done;
-                       }
-                       /* allrecord upgrade for tdb1. */
-                       if (i->type == F_RDLCK && fl->l_type == F_WRLCK
-                           && i->off == TDB1_FREELIST_TOP
-                           && fl->l_start == TDB1_FREELIST_TOP
-                           && i->len == 0
-                           && fl->l_len == 0) {
-                               if (ret == 0)
-                                       i->type = F_WRLCK;
-                               goto done;
-                       }
-
-                       if (!suppress_lockcheck) {
-                               diag("%s lock %u@%u overlaps %u@%u",
-                                    fl->l_type == F_WRLCK ? "write" : "read",
-                                    (int)fl->l_len, (int)fl->l_start,
-                                    i->len, (int)i->off);
-                               locking_errors++;
-                       }
-               }
-
-               if (ret == 0) {
-                       new = malloc(sizeof *new);
-                       new->off = fl->l_start;
-                       new->len = fl->l_len;
-                       new->type = fl->l_type;
-                       new->next = locks;
-                       locks = new;
-               }
-       }
-done:
-       if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback)
-               unlock_callback(fd);
-       return ret;
-}
-
-unsigned int forget_locking(void)
-{
-       unsigned int num = 0;
-       while (locks) {
-               struct lock *next = locks->next;
-               free(locks);
-               locks = next;
-               num++;
-       }
-       return num;
-}
diff --git a/ccan/tdb2/test/lock-tracking.h b/ccan/tdb2/test/lock-tracking.h
deleted file mode 100644 (file)
index f2c9c44..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef LOCK_TRACKING_H
-#define LOCK_TRACKING_H
-#include <stdbool.h>
-
-/* Set this if you want a callback after fnctl unlock. */
-extern void (*unlock_callback)(int fd);
-
-/* Replacement fcntl. */
-int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ );
-
-/* Discard locking info: returns number of locks outstanding. */
-unsigned int forget_locking(void);
-
-/* Number of errors in locking. */
-extern int locking_errors;
-
-/* Suppress lock checking. */
-extern bool suppress_lockcheck;
-
-/* Make all locks non-blocking. */
-extern bool nonblocking_locks;
-
-/* Number of times we failed a lock because we made it non-blocking. */
-extern int locking_would_block;
-#endif /* LOCK_TRACKING_H */
diff --git a/ccan/tdb2/test/logging.c b/ccan/tdb2/test/logging.c
deleted file mode 100644 (file)
index 0712cc0..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-unsigned tap_log_messages;
-const char *log_prefix = "";
-char *log_last = NULL;
-bool suppress_logging;
-
-union tdb_attribute tap_log_attr = {
-       .log = { .base = { .attr = TDB_ATTRIBUTE_LOG },
-                .fn = tap_log_fn }
-};
-
-void tap_log_fn(struct tdb_context *tdb,
-               enum tdb_log_level level,
-               enum TDB_ERROR ecode,
-               const char *message, void *priv)
-{
-       if (suppress_logging)
-               return;
-
-       diag("tdb log level %u: %s: %s%s",
-            level, tdb_errorstr(ecode), log_prefix, message);
-       if (log_last)
-               free(log_last);
-       log_last = strdup(message);
-       tap_log_messages++;
-}
-
diff --git a/ccan/tdb2/test/logging.h b/ccan/tdb2/test/logging.h
deleted file mode 100644 (file)
index 2dfea14..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TDB2_TEST_LOGGING_H
-#define TDB2_TEST_LOGGING_H
-#include <ccan/tdb2/tdb2.h>
-#include <stdbool.h>
-#include <string.h>
-
-extern bool suppress_logging;
-extern const char *log_prefix;
-extern unsigned tap_log_messages;
-extern union tdb_attribute tap_log_attr;
-extern char *log_last;
-
-void tap_log_fn(struct tdb_context *tdb,
-               enum tdb_log_level level,
-               enum TDB_ERROR ecode,
-               const char *message, void *priv);
-#endif /* TDB2_TEST_LOGGING_H */
diff --git a/ccan/tdb2/test/old-nohash-be.tdb1 b/ccan/tdb2/test/old-nohash-be.tdb1
deleted file mode 100644 (file)
index 1c49116..0000000
Binary files a/ccan/tdb2/test/old-nohash-be.tdb1 and /dev/null differ
diff --git a/ccan/tdb2/test/old-nohash-le.tdb1 b/ccan/tdb2/test/old-nohash-le.tdb1
deleted file mode 100644 (file)
index 0655072..0000000
Binary files a/ccan/tdb2/test/old-nohash-le.tdb1 and /dev/null differ
diff --git a/ccan/tdb2/test/run-001-encode.c b/ccan/tdb2/test/run-001-encode.c
deleted file mode 100644 (file)
index 67616fc..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_used_record rec;
-       struct tdb_context tdb = { .log_fn = tap_log_fn };
-
-       plan_tests(64 + 32 + 48*6 + 1);
-
-       /* We should be able to encode any data value. */
-       for (i = 0; i < 64; i++)
-               ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, 0, 1ULL << i,
-                              1ULL << i, 0) == 0);
-
-       /* And any key and data with < 64 bits between them. */
-       for (i = 0; i < 32; i++) {
-               tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
-               ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen,
-                              klen + dlen, 0)  == 0);
-       }
-
-       /* We should neatly encode all values. */
-       for (i = 0; i < 48; i++) {
-               uint64_t h = 1ULL << (i < 5 ? i : 4);
-               uint64_t klen = 1ULL << (i < 16 ? i : 15);
-               uint64_t dlen = 1ULL << i;
-               uint64_t xlen = 1ULL << (i < 32 ? i : 31);
-               ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen,
-                              klen+dlen+xlen, h) == 0);
-               ok1(rec_key_length(&rec) == klen);
-               ok1(rec_data_length(&rec) == dlen);
-               ok1(rec_extra_padding(&rec) == xlen);
-               ok1((uint64_t)rec_hash(&rec) == h);
-               ok1(rec_magic(&rec) == TDB_USED_MAGIC);
-       }
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-001-fls.c b/ccan/tdb2/test/run-001-fls.c
deleted file mode 100644 (file)
index 4449f69..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-
-static unsigned int dumb_fls(uint64_t num)
-{
-       int i;
-
-       for (i = 63; i >= 0; i--) {
-               if (num & (1ULL << i))
-                       break;
-       }
-       return i + 1;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-
-       plan_tests(64 * 64 + 2);
-
-       ok1(fls64(0) == 0);
-       ok1(dumb_fls(0) == 0);
-
-       for (i = 0; i < 64; i++) {
-               for (j = 0; j < 64; j++) {
-                       uint64_t val = (1ULL << i) | (1ULL << j);
-                       ok(fls64(val) == dumb_fls(val),
-                          "%llu -> %u should be %u", (long long)val,
-                          fls64(val), dumb_fls(val));
-               }
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-01-new_database.c b/ccan/tdb2/test/run-01-new_database.c
deleted file mode 100644 (file)
index a5f0dd3..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-new_database.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       failtest_exit(exit_status());
-
-               failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               failtest_suppress = false;
-               tdb_close(tdb);
-               if (!ok1(tap_log_messages == 0))
-                       break;
-       }
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-02-expand.c b/ccan/tdb2/test/run-02-expand.c
deleted file mode 100644 (file)
index e3f5905..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       uint64_t val;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               failtest_suppress = true;
-               tdb = tdb_open("run-expand.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       break;
-
-               val = tdb->file->map_size;
-               /* Need some hash lock for expand. */
-               ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
-               failtest_suppress = false;
-               if (!ok1(tdb_expand(tdb, 1) == 0)) {
-                       failtest_suppress = true;
-                       tdb_close(tdb);
-                       break;
-               }
-               failtest_suppress = true;
-                       
-               ok1(tdb->file->map_size >= val + 1 * TDB_EXTENSION_FACTOR);
-               ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               val = tdb->file->map_size;
-               ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
-               failtest_suppress = false;
-               if (!ok1(tdb_expand(tdb, 1024) == 0)) {
-                       failtest_suppress = true;
-                       tdb_close(tdb);
-                       break;
-               }
-               failtest_suppress = true;
-               ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
-               ok1(tdb->file->map_size >= val + 1024 * TDB_EXTENSION_FACTOR);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-03-coalesce.c b/ccan/tdb2/test/run-03-coalesce.c
deleted file mode 100644 (file)
index 99f94fe..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-#include "layout.h"
-
-static tdb_len_t free_record_length(struct tdb_context *tdb, tdb_off_t off)
-{
-       struct tdb_free_record f;
-       enum TDB_ERROR ecode;
-
-       ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-       if (frec_magic(&f) != TDB_FREE_MAGIC)
-               return TDB_ERR_CORRUPT;
-       return frec_len(&f);
-}
-
-int main(int argc, char *argv[])
-{
-       tdb_off_t b_off, test;
-       struct tdb_context *tdb;
-       struct tdb_layout *layout;
-       struct tdb_data data, key;
-       tdb_len_t len;
-
-       /* FIXME: Test TDB_CONVERT */
-       /* FIXME: Test lock order fail. */
-
-       plan_tests(42);
-       data = tdb_mkdata("world", 5);
-       key = tdb_mkdata("hello", 5);
-
-       /* No coalescing can be done due to EOF */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       len = 1024;
-       tdb_layout_add_free(layout, len, 0);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(len));
-       /* Lock and fail to coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[1].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len, &test)
-           == 0);
-       tdb_unlock_free_bucket(tdb, b_off);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
-       ok1(test == layout->elem[1].base.off);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* No coalescing can be done due to used record */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(1024));
-       /* Lock and fail to coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[1].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 0);
-       tdb_unlock_free_bucket(tdb, b_off);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(test == layout->elem[1].base.off);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* Coalescing can be done due to two free records, then EOF */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_free(layout, 2048, 0);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(free_record_length(tdb, layout->elem[2].base.off) == 2048);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket (first) free entry is. */
-       b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(1024));
-       /* Lock and coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[2].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 1024 + sizeof(struct tdb_used_record) + 2048);
-       /* Should tell us it's erased this one... */
-       ok1(test == TDB_ERR_NOEXIST);
-       ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off)
-           == 1024 + sizeof(struct tdb_used_record) + 2048);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* Coalescing can be done due to two free records, then data */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_free(layout, 512, 0);
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(1024));
-       /* Lock and coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[2].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 1024 + sizeof(struct tdb_used_record) + 512);
-       ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off)
-           == 1024 + sizeof(struct tdb_used_record) + 512);
-       ok1(test == TDB_ERR_NOEXIST);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       /* Coalescing can be done due to three free records, then EOF */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 1024, 0);
-       tdb_layout_add_free(layout, 512, 0);
-       tdb_layout_add_free(layout, 256, 0);
-       tdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.tdb");
-       /* NOMMAP is for lockcheck. */
-       tdb = tdb_open("run-03-coalesce.tdb", TDB_NOMMAP, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
-       ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
-       ok1(free_record_length(tdb, layout->elem[3].base.off) == 256);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       /* Figure out which bucket free entry is. */
-       b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(1024));
-       /* Lock and coalesce. */
-       ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
-       test = layout->elem[2].base.off;
-       ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
-           == 1024 + sizeof(struct tdb_used_record) + 512
-           + sizeof(struct tdb_used_record) + 256);
-       ok1(tdb->file->allrecord_lock.count == 0
-           && tdb->file->num_lockrecs == 0);
-       ok1(free_record_length(tdb, layout->elem[1].base.off)
-           == 1024 + sizeof(struct tdb_used_record) + 512
-           + sizeof(struct tdb_used_record) + 256);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-04-basichash.c b/ccan/tdb2/test/run-04-basichash.c
deleted file mode 100644 (file)
index 4852744..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-/* We rig the hash so adjacent-numbered records always clash. */
-static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return ((uint64_t)*(const unsigned int *)key)
-               << (64 - TDB_TOPLEVEL_HASH_BITS - 1);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       unsigned int v;
-       struct tdb_used_record rec;
-       struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
-       struct tdb_data dbuf = { (unsigned char *)&v, sizeof(v) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = clash } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-       };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (91 + (2 * ((1 << TDB_HASH_GROUP_BITS) - 1))) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct hash_info h;
-               tdb_off_t new_off, off, subhash;
-
-               tdb = tdb_open("run-04-basichash.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               v = 0;
-               /* Should not find it. */
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range == 
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               /* Allocate a new record. */
-               new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
-                               TDB_USED_MAGIC, false);
-               ok1(!TDB_OFF_IS_ERR(new_off));
-
-               /* We should be able to add it now. */
-               ok1(add_to_hash(tdb, &h, new_off) == 0);
-
-               /* Make sure we fill it in for later finding. */
-               off = new_off + sizeof(struct tdb_used_record);
-               ok1(!tdb->tdb2.io->twrite(tdb, off, key.dptr, key.dsize));
-               off += key.dsize;
-               ok1(!tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize));
-
-               /* We should be able to unlock that OK. */
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Database should be consistent. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now, this should give a successful lookup. */
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
-                   == new_off);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range == 
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-               
-               /* Database should be consistent. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Test expansion. */
-               v = 1;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 1. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 1);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range == 
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               /* Make it expand 0'th bucket. */
-               ok1(expand_group(tdb, &h) == 0);
-               /* First one should be subhash, next should be empty. */
-               ok1(is_subhash(h.group[0]));
-               subhash = (h.group[0] & TDB_OFF_MASK);
-               for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(h.group[j] == 0);
-
-               ok1(tdb_write_convert(tdb, h.group_start,
-                                     h.group, sizeof(h.group)) == 0);
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Should be happy with expansion. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Should be able to find it. */
-               v = 0;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
-                   == new_off);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in expanded group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                   + TDB_SUBLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range == 
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               /* Simple delete should work. */
-               ok1(delete_from_hash(tdb, &h) == 0);
-               ok1(add_free_record(tdb, new_off,
-                                   sizeof(struct tdb_used_record)
-                                   + rec_key_length(&rec)
-                                   + rec_data_length(&rec)
-                                   + rec_extra_padding(&rec),
-                                   TDB_LOCK_NOWAIT, false) == 0);
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Test second-level expansion: should expand 0th bucket. */
-               v = 0;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS+TDB_SUBLEVEL_HASH_BITS);
-
-               /* Should have lock on bucket 0 */
-               ok1(h.hlock_start == 0);
-               ok1(h.hlock_range == 
-                   1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
-               ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
-               ok1((tdb->flags & TDB_NOLOCK)
-                   || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
-               /* FIXME: Check lock length */
-
-               ok1(expand_group(tdb, &h) == 0);
-               /* First one should be subhash, next should be empty. */
-               ok1(is_subhash(h.group[0]));
-               subhash = (h.group[0] & TDB_OFF_MASK);
-               for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(h.group[j] == 0);
-               ok1(tdb_write_convert(tdb, h.group_start,
-                                     h.group, sizeof(h.group)) == 0);
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Should be happy with expansion. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                   + TDB_SUBLEVEL_HASH_BITS * 2);
-
-               /* We should be able to add it now. */
-               /* Allocate a new record. */
-               new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
-                               TDB_USED_MAGIC, false);
-               ok1(!TDB_OFF_IS_ERR(new_off));
-               ok1(add_to_hash(tdb, &h, new_off) == 0);
-
-               /* Make sure we fill it in for later finding. */
-               off = new_off + sizeof(struct tdb_used_record);
-               ok1(!tdb->tdb2.io->twrite(tdb, off, key.dptr, key.dsize));
-               off += key.dsize;
-               ok1(!tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize));
-
-               /* We should be able to unlock that OK. */
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_WRLCK) == 0);
-
-               /* Database should be consistent. */
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Should be able to find it. */
-               v = 0;
-               ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
-                   == new_off);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in expanded group 0, bucket 0. */
-               ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                   + TDB_SUBLEVEL_HASH_BITS * 2);
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-05-readonly-open.c b/ccan/tdb2/test/run-05-readonly-open.c
deleted file mode 100644 (file)
index 80eb567..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4), d;
-       union tdb_attribute seed_attr;
-       unsigned int msgs = 0;
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
-       seed_attr.base.next = &tap_log_attr;
-       seed_attr.seed.seed = 0;
-
-       failtest_suppress = true;
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-05-readonly-open.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600,
-                              flags[i] & TDB_VERSION1
-                              ? &tap_log_attr : &seed_attr);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               tdb_close(tdb);
-
-               failtest_suppress = false;
-               tdb = tdb_open("run-05-readonly-open.tdb", flags[i],
-                              O_RDONLY, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       break;
-               ok1(tap_log_messages == msgs);
-               /* Fetch should succeed, stores should fail. */
-               if (!ok1(tdb_fetch(tdb, key, &d) == 0))
-                       goto fail;
-               ok1(tdb_deq(d, data));
-               free(d.dptr);
-               if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY)
-                        == TDB_ERR_RDONLY))
-                       goto fail;
-               ok1(tap_log_messages == ++msgs);
-               if (!ok1(tdb_store(tdb, key, data, TDB_INSERT)
-                        == TDB_ERR_RDONLY))
-                       goto fail;
-               ok1(tap_log_messages == ++msgs);
-               failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-               ok1(tap_log_messages == msgs);
-               /* SIGH: failtest bug, it doesn't save the tdb file because
-                * we have it read-only.  If we go around again, it gets
-                * changed underneath us and things get screwy. */
-               if (failtest_has_failed())
-                       break;
-       }
-       failtest_exit(exit_status());
-
-fail:
-       failtest_suppress = true;
-       tdb_close(tdb);
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-10-simple-store.c b/ccan/tdb2/test/run-10-simple-store.c
deleted file mode 100644 (file)
index 10bbb49..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-
-       failtest_suppress = true;
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-10-simple-store.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       break;
-               /* Modify should fail. */
-               failtest_suppress = false;
-               if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY)
-                        == TDB_ERR_NOEXIST))
-                       goto fail;
-               failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               /* Insert should succeed. */
-               failtest_suppress = false;
-               if (!ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0))
-                       goto fail;
-               failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               /* Second insert should fail. */
-               failtest_suppress = false;
-               if (!ok1(tdb_store(tdb, key, data, TDB_INSERT)
-                        == TDB_ERR_EXISTS))
-                       goto fail;
-               failtest_suppress = true;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-       }
-       ok1(tap_log_messages == 0);
-       failtest_exit(exit_status());
-
-fail:
-       failtest_suppress = true;
-       tdb_close(tdb);
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-11-simple-fetch.c b/ccan/tdb2/test/run-11-simple-fetch.c
deleted file mode 100644 (file)
index ad97be3..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-
-       failtest_suppress = true;
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-11-simple-fetch.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (tdb) {
-                       struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-
-                       /* fetch should fail. */
-                       failtest_suppress = false;
-                       if (!ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST))
-                               goto fail;
-                       failtest_suppress = true;
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       /* Insert should succeed. */
-                       ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       /* Fetch should now work. */
-                       failtest_suppress = false;
-                       if (!ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS))
-                               goto fail;
-                       failtest_suppress = true;
-                       ok1(tdb_deq(d, data));
-                       free(d.dptr);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       tdb_close(tdb);
-               }
-       }
-       ok1(tap_log_messages == 0);
-       failtest_exit(exit_status());
-
-fail:
-       failtest_suppress = true;
-       tdb_close(tdb);
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-12-check.c b/ccan/tdb2/test/run-12-check.c
deleted file mode 100644 (file)
index b55bfe7..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <ccan/tdb2/private.h>
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL,
-                       TDB_INTERNAL|TDB_CONVERT,
-                       TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1,
-                       TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-
-       failtest_suppress = true;
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-12-check.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-
-               /* This is what we really want to test: tdb_check(). */
-               failtest_suppress = false;
-               if (!ok1(tdb_check(tdb, NULL, NULL) == 0))
-                       goto fail;
-               failtest_suppress = true;
-
-               tdb_close(tdb);
-       }
-       ok1(tap_log_messages == 0);
-       failtest_exit(exit_status());
-
-fail:
-       failtest_suppress = true;
-       tdb_close(tdb);
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-15-append.c b/ccan/tdb2/test/run-15-append.c
deleted file mode 100644 (file)
index 39afaf7..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/ilog/ilog.h>
-#include "logging.h"
-
-#define MAX_SIZE 13100
-#define SIZE_STEP 131
-
-static tdb_off_t tdb_offset(struct tdb_context *tdb, struct tdb_data key)
-{
-       tdb_off_t off;
-       struct tdb_used_record rec;
-       struct hash_info h;
-
-       if (tdb_get_flags(tdb) & TDB_VERSION1) {
-               struct tdb1_record rec;
-               return tdb1_find(tdb, key, tdb_hash(tdb, key.dptr, key.dsize),
-                                &rec);
-       }
-
-       off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
-       if (TDB_OFF_IS_ERR(off))
-               return 0;
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-       return off;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j, moves;
-       struct tdb_context *tdb;
-       unsigned char *buffer;
-       tdb_off_t oldoff = 0, newoff;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT,
-                       TDB_INTERNAL|TDB_VERSION1, TDB_VERSION1,
-                       TDB_NOMMAP|TDB_VERSION1,
-                       TDB_INTERNAL|TDB_CONVERT|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data;
-
-       buffer = malloc(MAX_SIZE);
-       for (i = 0; i < MAX_SIZE; i++)
-               buffer[i] = i;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7)
-                  + 1);
-
-       /* Using tdb_store. */
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-append.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               moves = 0;
-               for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
-                       data.dptr = buffer;
-                       data.dsize = j;
-                       ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-                       ok1(data.dsize == j);
-                       ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-                       free(data.dptr);
-                       newoff = tdb_offset(tdb, key);
-                       if (newoff != oldoff)
-                               moves++;
-                       oldoff = newoff;
-               }
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
-               if (flags[i] & TDB_VERSION1) {
-                       /* TDB1 simply over-size by 25%. */
-                       ok(moves <= ilog64(j / SIZE_STEP)*4,
-                          "Moved %u times", moves);
-               } else {
-                       /* We should increase by 50% each time... */
-                       ok(moves <= ilog64(j / SIZE_STEP)*2,
-                          "Moved %u times", moves);
-               }
-               tdb_close(tdb);
-       }
-
-       /* Using tdb_append. */
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               size_t prev_len = 0;
-               tdb = tdb_open("run-append.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               moves = 0;
-               for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
-                       data.dptr = buffer + prev_len;
-                       data.dsize = j - prev_len;
-                       ok1(tdb_append(tdb, key, data) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-                       ok1(data.dsize == j);
-                       ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-                       free(data.dptr);
-                       prev_len = data.dsize;
-                       newoff = tdb_offset(tdb, key);
-                       if (newoff != oldoff)
-                               moves++;
-                       oldoff = newoff;
-               }
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
-               if (flags[i] & TDB_VERSION1) {
-                       /* TDB1 simply over-size by 25%. */
-                       ok(moves <= ilog64(j / SIZE_STEP)*4,
-                          "Moved %u times", moves);
-               } else {
-                       /* We should increase by 50% each time... */
-                       ok(moves <= ilog64(j / SIZE_STEP)*2,
-                          "Moved %u times", moves);
-               }
-               tdb_close(tdb);
-       }
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-append.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Huge initial store. */
-               data.dptr = buffer;
-               data.dsize = MAX_SIZE;
-               ok1(tdb_append(tdb, key, data) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-               ok1(data.dsize == MAX_SIZE);
-               ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
-               free(data.dptr);
-               ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
-                                  && tdb->file->num_lockrecs == 0));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       free(buffer);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-20-growhash.c b/ccan/tdb2/test/run-20-growhash.c
deleted file mode 100644 (file)
index 65cead0..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return *(const uint64_t *)key;
-}
-
-static void add_bits(uint64_t *val, unsigned new, unsigned new_bits,
-                    unsigned *done)
-{
-       *done += new_bits;
-       *val |= ((uint64_t)new << (64 - *done));
-}
-
-static uint64_t make_key(unsigned topgroup, unsigned topbucket,
-                        unsigned subgroup1, unsigned subbucket1,
-                        unsigned subgroup2, unsigned subbucket2)
-{
-       uint64_t key = 0;
-       unsigned done = 0;
-
-       add_bits(&key, topgroup, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
-                &done);
-       add_bits(&key, topbucket, TDB_HASH_GROUP_BITS, &done);
-       add_bits(&key, subgroup1, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
-                &done);
-       add_bits(&key, subbucket1, TDB_HASH_GROUP_BITS, &done);
-       add_bits(&key, subgroup2, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
-                &done);
-       add_bits(&key, subbucket2, TDB_HASH_GROUP_BITS, &done);
-       return key;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       uint64_t kdata;
-       struct tdb_used_record rec;
-       struct tdb_data key = { (unsigned char *)&kdata, sizeof(kdata) };
-       struct tdb_data dbuf = { (unsigned char *)&kdata, sizeof(kdata) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = myhash } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-       };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0])
-                  * (9 + (20 + 2 * ((1 << TDB_HASH_GROUP_BITS) - 2))
-                     * (1 << TDB_HASH_GROUP_BITS)) + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct hash_info h;
-
-               tdb = tdb_open("run-20-growhash.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Fill a group. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       kdata = make_key(0, j, 0, 0, 0, 0);
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Check first still exists. */
-               kdata = make_key(0, 0, 0, 0, 0, 0);
-               ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL) != 0);
-               /* Should have created correct hash. */
-               ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-               /* Should have located space in group 0, bucket 0. */
-               ok1(h.group_start == offsetof(struct tdb_header, hashtable));
-               ok1(h.home_bucket == 0);
-               ok1(h.found_bucket == 0);
-               ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
-               /* Entire group should be full! */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(h.group[j] != 0);
-
-               ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                     F_RDLCK) == 0);
-
-               /* Now, add one more to each should expand (that) bucket. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       unsigned int k;
-                       kdata = make_key(0, j, 0, 1, 0, 0);
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-                       ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL));
-                       /* Should have created correct hash. */
-                       ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-                       /* Should have moved to subhash */
-                       ok1(h.group_start >= sizeof(struct tdb_header));
-                       ok1(h.home_bucket == 1);
-                       ok1(h.found_bucket == 1);
-                       ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                           + TDB_SUBLEVEL_HASH_BITS);
-                       ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                             F_RDLCK) == 0);
-
-                       /* Keep adding, make it expand again. */
-                       for (k = 2; k < (1 << TDB_HASH_GROUP_BITS); k++) {
-                               kdata = make_key(0, j, 0, k, 0, 0);
-                               ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                               ok1(tdb_check(tdb, NULL, NULL) == 0);
-                       }
-
-                       /* This should tip it over to sub-sub-hash. */
-                       kdata = make_key(0, j, 0, 0, 0, 1);
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-                       ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL));
-                       /* Should have created correct hash. */
-                       ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
-                       /* Should have moved to subhash */
-                       ok1(h.group_start >= sizeof(struct tdb_header));
-                       ok1(h.home_bucket == 1);
-                       ok1(h.found_bucket == 1);
-                       ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
-                           + TDB_SUBLEVEL_HASH_BITS + TDB_SUBLEVEL_HASH_BITS);
-                       ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
-                                             F_RDLCK) == 0);
-               }
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-25-hashoverload.c b/ccan/tdb2/test/run-25-hashoverload.c
deleted file mode 100644 (file)
index 0e14302..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv)
-{
-       return 0;
-}
-
-static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
-{
-       if (p)
-               return tdb_delete(tdb, key);
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data dbuf = { (unsigned char *)&j, sizeof(j) };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = badhash } };
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT,
-       };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(6883);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
-
-               tdb = tdb_open("run-25-hashoverload.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Fill a group. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now store one last value: should form chain. */
-               ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Check we can find them all. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS) + 1; j++) {
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(d.dsize == sizeof(j));
-                       ok1(d.dptr != NULL);
-                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
-                       free(d.dptr);
-               }
-
-               /* Now add a *lot* more. */
-               for (j = (1 << TDB_HASH_GROUP_BITS) + 1;
-                    j < (16 << TDB_HASH_GROUP_BITS);
-                    j++) {
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(d.dsize == sizeof(j));
-                       ok1(d.dptr != NULL);
-                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
-                       free(d.dptr);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Traverse through them. */
-               ok1(tdb_traverse(tdb, trav, NULL) == j);
-
-               /* Empty the first chain-worth. */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
-                       ok1(tdb_delete(tdb, key) == 0);
-
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               for (j = (1 << TDB_HASH_GROUP_BITS);
-                    j < (16 << TDB_HASH_GROUP_BITS);
-                    j++) {
-                       ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
-                       ok1(d.dsize == sizeof(j));
-                       ok1(d.dptr != NULL);
-                       ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
-                       free(d.dptr);
-               }
-
-               /* Traverse through them. */
-               ok1(tdb_traverse(tdb, trav, NULL)
-                   == (15 << TDB_HASH_GROUP_BITS));
-
-               /* Re-add */
-               for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
-                       ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
-               }
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now try deleting as we go. */
-               ok1(tdb_traverse(tdb, trav, trav)
-                   == (16 << TDB_HASH_GROUP_BITS));
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb_traverse(tdb, trav, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-30-exhaust-before-expand.c b/ccan/tdb2/test/run-30-exhaust-before-expand.c
deleted file mode 100644 (file)
index db391e7..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <err.h>
-#include "logging.h"
-
-static bool empty_freetable(struct tdb_context *tdb)
-{
-       struct tdb_freetable ftab;
-       unsigned int i;
-
-       /* Now, free table should be completely exhausted in zone 0 */
-       if (tdb_read_convert(tdb, tdb->tdb2.ftable_off, &ftab, sizeof(ftab)) != 0)
-               abort();
-
-       for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) {
-               if (ftab.buckets[i])
-                       return false;
-       }
-       return true;
-}
-
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               TDB_DATA k;
-               uint64_t size;
-               bool was_empty = false;
-
-               k.dptr = (void *)&j;
-               k.dsize = sizeof(j);
-
-               tdb = tdb_open("run-30-exhaust-before-expand.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(empty_freetable(tdb));
-               /* Need some hash lock for expand. */
-               ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
-               /* Create some free space. */
-               ok1(tdb_expand(tdb, 1) == 0);
-               ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(!empty_freetable(tdb));
-
-               size = tdb->file->map_size;
-               /* Insert minimal-length records until we expand. */
-               for (j = 0; tdb->file->map_size == size; j++) {
-                       was_empty = empty_freetable(tdb);
-                       if (tdb_store(tdb, k, k, TDB_INSERT) != 0)
-                               err(1, "Failed to store record %i", j);
-               }
-
-               /* Would have been empty before expansion, but no longer. */
-               ok1(was_empty);
-               ok1(!empty_freetable(tdb));
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-35-convert.c b/ccan/tdb2/test/run-35-convert.c
deleted file mode 100644 (file)
index b7b1e6e..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <ccan/tdb2/private.h>
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, messages = 0;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-35-convert.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               if (!ok1(tdb))
-                       failtest_exit(exit_status());
-
-               tdb_close(tdb);
-               /* If we say TDB_CONVERT, it must be converted */
-               tdb = tdb_open("run-35-convert.tdb",
-                              flags[i]|TDB_CONVERT,
-                              O_RDWR, 0600, &tap_log_attr);
-               if (flags[i] & TDB_CONVERT) {
-                       if (!tdb)
-                               failtest_exit(exit_status());
-                       ok1(tdb_get_flags(tdb) & TDB_CONVERT);
-                       tdb_close(tdb);
-               } else {
-                       if (!ok1(!tdb && errno == EIO))
-                               failtest_exit(exit_status());
-                       ok1(tap_log_messages == ++messages);
-                       if (!ok1(log_last && strstr(log_last, "TDB_CONVERT")))
-                               failtest_exit(exit_status());
-               }
-
-               /* If don't say TDB_CONVERT, it *may* be converted */
-               tdb = tdb_open("run-35-convert.tdb",
-                              flags[i] & ~TDB_CONVERT,
-                              O_RDWR, 0600, &tap_log_attr);
-               if (!tdb)
-                       failtest_exit(exit_status());
-               ok1(tdb_get_flags(tdb) == flags[i]);
-               tdb_close(tdb);
-       }
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-50-multiple-freelists.c b/ccan/tdb2/test/run-50-multiple-freelists.c
deleted file mode 100644 (file)
index 44fee94..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-#include "layout.h"
-
-int main(int argc, char *argv[])
-{
-       tdb_off_t off;
-       struct tdb_context *tdb;
-       struct tdb_layout *layout;
-       TDB_DATA key, data;
-       union tdb_attribute seed;
-
-       /* This seed value previously tickled a layout.c bug. */
-       seed.base.attr = TDB_ATTRIBUTE_SEED;
-       seed.seed.seed = 0xb1142bc054d035b4ULL;
-       seed.base.next = &tap_log_attr;
-
-       plan_tests(11);
-       key = tdb_mkdata("Hello", 5);
-       data = tdb_mkdata("world", 5);
-
-       /* Create a TDB with three free tables. */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_free(layout, 80, 0);
-       /* Used record prevent coalescing. */
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_add_free(layout, 160, 1);
-       key.dsize--;
-       tdb_layout_add_used(layout, key, data, 7);
-       tdb_layout_add_free(layout, 320, 2);
-       key.dsize--;
-       tdb_layout_add_used(layout, key, data, 8);
-       tdb_layout_add_free(layout, 40, 0);
-       tdb = tdb_layout_get(layout, free, &seed);
-       ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-       off = get_free(tdb, 0, 80 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[3].base.off);
-       ok1(tdb->tdb2.ftable_off == layout->elem[0].base.off);
-
-       off = get_free(tdb, 0, 160 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[5].base.off);
-       ok1(tdb->tdb2.ftable_off == layout->elem[1].base.off);
-
-       off = get_free(tdb, 0, 320 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[7].base.off);
-       ok1(tdb->tdb2.ftable_off == layout->elem[2].base.off);
-
-       off = get_free(tdb, 0, 40 - sizeof(struct tdb_used_record), 0,
-                      TDB_USED_MAGIC, 0);
-       ok1(off == layout->elem[9].base.off);
-       ok1(tdb->tdb2.ftable_off == layout->elem[0].base.off);
-
-       /* Now we fail. */
-       off = get_free(tdb, 0, 0, 1, TDB_USED_MAGIC, 0);
-       ok1(off == 0);
-
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-56-open-during-transaction.c b/ccan/tdb2/test/run-56-open-during-transaction.c
deleted file mode 100644 (file)
index 9262c05..0000000
+++ /dev/null
@@ -1,169 +0,0 @@
-#include <ccan/tdb2/private.h>
-#include <unistd.h>
-#include "lock-tracking.h"
-
-static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
-static ssize_t write_check(int fd, const void *buf, size_t count);
-static int ftruncate_check(int fd, off_t length);
-
-#define pwrite pwrite_check
-#define write write_check
-#define fcntl fcntl_with_lockcheck
-#define ftruncate ftruncate_check
-
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include <err.h>
-#include "external-agent.h"
-#include "logging.h"
-
-static struct agent *agent;
-static bool opened;
-static int errors = 0;
-#define TEST_DBNAME "run-56-open-during-transaction.tdb"
-
-#undef write
-#undef pwrite
-#undef fcntl
-#undef ftruncate
-
-static bool is_same(const char *snapshot, const char *latest, off_t len)
-{
-       unsigned i;
-
-       for (i = 0; i < len; i++) {
-               if (snapshot[i] != latest[i])
-                       return false;
-       }
-       return true;
-}
-
-static bool compare_file(int fd, const char *snapshot, off_t snapshot_len)
-{
-       char *contents;
-       bool same;
-
-       /* over-length read serves as length check. */
-       contents = malloc(snapshot_len+1);
-       same = pread(fd, contents, snapshot_len+1, 0) == snapshot_len
-               && is_same(snapshot, contents, snapshot_len);
-       free(contents);
-       return same;
-}
-
-static void check_file_intact(int fd)
-{
-       enum agent_return ret;
-       struct stat st;
-       char *contents;
-
-       fstat(fd, &st);
-       contents = malloc(st.st_size);
-       if (pread(fd, contents, st.st_size, 0) != st.st_size) {
-               diag("Read fail");
-               errors++;
-               return;
-       }
-
-       /* Ask agent to open file. */
-       ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
-
-       /* It's OK to open it, but it must not have changed! */
-       if (!compare_file(fd, contents, st.st_size)) {
-               diag("Agent changed file after opening %s",
-                    agent_return_name(ret));
-               errors++;
-       }
-
-       if (ret == SUCCESS) {
-               ret = external_agent_operation(agent, CLOSE, NULL);
-               if (ret != SUCCESS) {
-                       diag("Agent failed to close tdb: %s",
-                            agent_return_name(ret));
-                       errors++;
-               }
-       } else if (ret != WOULD_HAVE_BLOCKED) {
-               diag("Agent opening file gave %s",
-                    agent_return_name(ret));
-               errors++;
-       }
-
-       free(contents);
-}
-
-static void after_unlock(int fd)
-{
-       if (opened)
-               check_file_intact(fd);
-}
-
-static ssize_t pwrite_check(int fd,
-                           const void *buf, size_t count, off_t offset)
-{
-       if (opened)
-               check_file_intact(fd);
-
-       return pwrite(fd, buf, count, offset);
-}
-
-static ssize_t write_check(int fd, const void *buf, size_t count)
-{
-       if (opened)
-               check_file_intact(fd);
-
-       return write(fd, buf, count);
-}
-
-static int ftruncate_check(int fd, off_t length)
-{
-       if (opened)
-               check_file_intact(fd);
-
-       return ftruncate(fd, length);
-
-}
-
-int main(int argc, char *argv[])
-{
-       const int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       int i;
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-
-       plan_tests(sizeof(flags)/sizeof(flags[0]) * 5);
-       agent = prepare_external_agent();
-       if (!agent)
-               err(1, "preparing agent");
-
-       unlock_callback = after_unlock;
-       for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
-               diag("Test with %s and %s\n",
-                    (flags[i] & TDB_CONVERT) ? "CONVERT" : "DEFAULT",
-                    (flags[i] & TDB_NOMMAP) ? "no mmap" : "mmap");
-               unlink(TEST_DBNAME);
-               tdb = tdb_open(TEST_DBNAME, flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-
-               opened = true;
-               ok1(tdb_transaction_start(tdb) == 0);
-               key = tdb_mkdata("hi", strlen("hi"));
-               data = tdb_mkdata("world", strlen("world"));
-
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb_transaction_commit(tdb) == 0);
-               ok(!errors, "We had %u open errors", errors);
-
-               opened = false;
-               tdb_close(tdb);
-       }
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-57-die-during-transaction.c b/ccan/tdb2/test/run-57-die-during-transaction.c
deleted file mode 100644 (file)
index 42102ae..0000000
+++ /dev/null
@@ -1,296 +0,0 @@
-#include <ccan/tdb2/private.h>
-#include <unistd.h>
-#include "lock-tracking.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <assert.h>
-static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
-static ssize_t write_check(int fd, const void *buf, size_t count);
-static int ftruncate_check(int fd, off_t length);
-
-#define pwrite pwrite_check
-#define write write_check
-#define fcntl fcntl_with_lockcheck
-#define ftruncate ftruncate_check
-
-/* There's a malloc inside transaction_setup_recovery, and valgrind complains
- * when we longjmp and leak it. */
-#define MAX_ALLOCATIONS 10
-static void *allocated[MAX_ALLOCATIONS];
-static unsigned max_alloc = 0;
-
-static void *malloc_noleak(size_t len)
-{
-       unsigned int i;
-
-       for (i = 0; i < MAX_ALLOCATIONS; i++)
-               if (!allocated[i]) {
-                       allocated[i] = malloc(len);
-                       if (i > max_alloc) {
-                               max_alloc = i;
-                               diag("max_alloc: %i", max_alloc);
-                       }
-                       return allocated[i];
-               }
-       diag("Too many allocations!");
-       abort();
-}
-
-static void *realloc_noleak(void *p, size_t size)
-{
-       unsigned int i;
-
-       for (i = 0; i < MAX_ALLOCATIONS; i++) {
-               if (allocated[i] == p) {
-                       if (i > max_alloc) {
-                               max_alloc = i;
-                               diag("max_alloc: %i", max_alloc);
-                       }
-                       return allocated[i] = realloc(p, size);
-               }
-       }
-       diag("Untracked realloc!");
-       abort();
-}
-
-static void free_noleak(void *p)
-{
-       unsigned int i;
-
-       /* We don't catch asprintf, so don't complain if we miss one. */
-       for (i = 0; i < MAX_ALLOCATIONS; i++) {
-               if (allocated[i] == p) {
-                       allocated[i] = NULL;
-                       break;
-               }
-       }
-       free(p);
-}
-
-static void free_all(void)
-{
-       unsigned int i;
-
-       for (i = 0; i < MAX_ALLOCATIONS; i++) {
-               free(allocated[i]);
-               allocated[i] = NULL;
-       }
-}
-
-#define malloc malloc_noleak
-#define free free_noleak
-#define realloc realloc_noleak
-
-#include "tdb2-source.h"
-
-#undef malloc
-#undef free
-#undef realloc
-#undef write
-#undef pwrite
-#undef fcntl
-#undef ftruncate
-
-#include <stdbool.h>
-#include <stdarg.h>
-#include <err.h>
-#include <setjmp.h>
-#include "external-agent.h"
-#include "logging.h"
-
-static bool in_transaction;
-static int target, current;
-static jmp_buf jmpbuf;
-#define TEST_DBNAME "run-57-die-during-transaction.tdb"
-#define KEY_STRING "helloworld"
-
-static void maybe_die(int fd)
-{
-       if (in_transaction && current++ == target) {
-               longjmp(jmpbuf, 1);
-       }
-}
-
-static ssize_t pwrite_check(int fd,
-                           const void *buf, size_t count, off_t offset)
-{
-       ssize_t ret;
-
-       maybe_die(fd);
-
-       ret = pwrite(fd, buf, count, offset);
-       if (ret != count)
-               return ret;
-
-       maybe_die(fd);
-       return ret;
-}
-
-static ssize_t write_check(int fd, const void *buf, size_t count)
-{
-       ssize_t ret;
-
-       maybe_die(fd);
-
-       ret = write(fd, buf, count);
-       if (ret != count)
-               return ret;
-
-       maybe_die(fd);
-       return ret;
-}
-
-static int ftruncate_check(int fd, off_t length)
-{
-       int ret;
-
-       maybe_die(fd);
-
-       ret = ftruncate(fd, length);
-
-       maybe_die(fd);
-       return ret;
-}
-
-static bool test_death(enum operation op, struct agent *agent, int flags)
-{
-       struct tdb_context *tdb = NULL;
-       TDB_DATA key;
-       enum agent_return ret;
-       int needed_recovery = 0;
-
-       current = target = 0;
-reset:
-       unlink(TEST_DBNAME);
-       tdb = tdb_open(TEST_DBNAME, flags|TDB_NOMMAP,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr);
-       if (!tdb) {
-               diag("Failed opening TDB: %s", strerror(errno));
-               return false;
-       }
-
-       if (setjmp(jmpbuf) != 0) {
-               /* We're partway through.  Simulate our death. */
-               close(tdb->file->fd);
-               forget_locking();
-               in_transaction = false;
-
-               ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
-               if (ret == SUCCESS)
-                       needed_recovery++;
-               else if (ret != FAILED) {
-                       diag("Step %u agent NEEDS_RECOVERY = %s", current,
-                            agent_return_name(ret));
-                       return false;
-               }
-
-               ret = external_agent_operation(agent, op, KEY_STRING);
-               if (ret != SUCCESS) {
-                       diag("Step %u op %s failed = %s", current,
-                            operation_name(op),
-                            agent_return_name(ret));
-                       return false;
-               }
-
-               ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
-               if (ret != FAILED) {
-                       diag("Still needs recovery after step %u = %s",
-                            current, agent_return_name(ret));
-                       return false;
-               }
-
-               ret = external_agent_operation(agent, CHECK, "");
-               if (ret != SUCCESS) {
-                       diag("Step %u check failed = %s", current,
-                            agent_return_name(ret));
-                       return false;
-               }
-
-               ret = external_agent_operation(agent, CLOSE, "");
-               if (ret != SUCCESS) {
-                       diag("Step %u close failed = %s", current,
-                            agent_return_name(ret));
-                       return false;
-               }
-
-               /* Suppress logging as this tries to use closed fd. */
-               suppress_logging = true;
-               suppress_lockcheck = true;
-               tdb_close(tdb);
-               suppress_logging = false;
-               suppress_lockcheck = false;
-               target++;
-               current = 0;
-               free_all();
-               goto reset;
-       }
-
-       /* Put key for agent to fetch. */
-       key = tdb_mkdata(KEY_STRING, strlen(KEY_STRING));
-       if (tdb_store(tdb, key, key, TDB_INSERT) != 0)
-               return false;
-
-       /* This is the key we insert in transaction. */
-       key.dsize--;
-
-       ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
-       if (ret != SUCCESS)
-               errx(1, "Agent failed to open: %s", agent_return_name(ret));
-
-       ret = external_agent_operation(agent, FETCH, KEY_STRING);
-       if (ret != SUCCESS)
-               errx(1, "Agent failed find key: %s", agent_return_name(ret));
-
-       in_transaction = true;
-       if (tdb_transaction_start(tdb) != 0)
-               return false;
-
-       if (tdb_store(tdb, key, key, TDB_INSERT) != 0)
-               return false;
-
-       if (tdb_transaction_commit(tdb) != 0)
-               return false;
-
-       in_transaction = false;
-
-       /* We made it! */
-       diag("Completed %u runs", current);
-       tdb_close(tdb);
-       ret = external_agent_operation(agent, CLOSE, "");
-       if (ret != SUCCESS) {
-               diag("Step %u close failed = %s", current,
-                    agent_return_name(ret));
-               return false;
-       }
-
-       ok1(needed_recovery);
-       ok1(locking_errors == 0);
-       ok1(forget_locking() == 0);
-       locking_errors = 0;
-       return true;
-}
-
-int main(int argc, char *argv[])
-{
-       enum operation ops[] = { FETCH, STORE, TRANSACTION_START };
-       struct agent *agent;
-       int i, flags;
-
-       plan_tests(24);
-       unlock_callback = maybe_die;
-
-       external_agent_free = free_noleak;
-       agent = prepare_external_agent();
-       if (!agent)
-               err(1, "preparing agent");
-
-       for (flags = TDB_DEFAULT; flags <= TDB_VERSION1; flags += TDB_VERSION1) {
-               for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) {
-                       diag("Testing %s after death", operation_name(ops[i]));
-                       ok1(test_death(ops[i], agent, flags));
-               }
-       }
-
-       free_external_agent(agent);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-64-bit-tdb.c b/ccan/tdb2/test/run-64-bit-tdb.c
deleted file mode 100644 (file)
index 566f546..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       if (sizeof(off_t) <= 4) {
-               plan_tests(1);
-               pass("No 64 bit off_t");
-               return exit_status();
-       }
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               off_t old_size;
-               TDB_DATA k, d;
-               struct hash_info h;
-               struct tdb_used_record rec;
-               tdb_off_t off;
-
-               tdb = tdb_open("run-64-bit-tdb.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               old_size = tdb->file->map_size;
-
-               /* This makes a sparse file */
-               ok1(ftruncate(tdb->file->fd, 0xFFFFFFF0) == 0);
-               ok1(add_free_record(tdb, old_size, 0xFFFFFFF0 - old_size,
-                                   TDB_LOCK_WAIT, false) == TDB_SUCCESS);
-
-               /* Now add a little record past the 4G barrier. */
-               ok1(tdb_expand_file(tdb, 100) == TDB_SUCCESS);
-               ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT, false)
-                   == TDB_SUCCESS);
-
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-               /* Test allocation path. */
-               k = tdb_mkdata("key", 4);
-               d = tdb_mkdata("data", 5);
-               ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-               /* Make sure it put it at end as we expected. */
-               off = find_and_lock(tdb, k, F_RDLCK, &h, &rec, NULL);
-               ok1(off >= 0xFFFFFFF0);
-               tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-
-               ok1(tdb_fetch(tdb, k, &d) == 0);
-               ok1(d.dsize == 5);
-               ok1(strcmp((char *)d.dptr, "data") == 0);
-               free(d.dptr);
-
-               ok1(tdb_delete(tdb, k) == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-               tdb_close(tdb);
-       }
-
-       /* We might get messages about mmap failing, so don't test
-        * tap_log_messages */
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-90-get-set-attributes.c b/ccan/tdb2/test/run-90-get-set-attributes.c
deleted file mode 100644 (file)
index 4cbbda0..0000000
+++ /dev/null
@@ -1,186 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
-                 void *unused)
-{
-       return 0;
-}
-
-static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
-{
-       return 0;
-}
-
-static uint64_t hash_fn(const void *key, size_t len, uint64_t seed,
-                       void *priv)
-{
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       union tdb_attribute seed_attr;
-       union tdb_attribute hash_attr;
-       union tdb_attribute lock_attr;
-
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
-       seed_attr.base.next = &hash_attr;
-       seed_attr.seed.seed = 100;
-
-       hash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       hash_attr.base.next = &lock_attr;
-       hash_attr.hash.fn = hash_fn;
-       hash_attr.hash.data = &hash_attr;
-
-       lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-       lock_attr.base.next = &tap_log_attr;
-       lock_attr.flock.lock = mylock;
-       lock_attr.flock.unlock = myunlock;
-       lock_attr.flock.data = &lock_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 49
-                  + sizeof(flags) / sizeof(flags[0]) / 2);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               union tdb_attribute attr;
-
-               /* First open with no attributes. */
-               tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
-               ok1(tdb);
-
-               /* Get log on no attributes will fail */
-               attr.base.attr = TDB_ATTRIBUTE_LOG;
-               ok1(tdb_get_attribute(tdb, &attr) == TDB_ERR_NOEXIST);
-               /* These always work. */
-               attr.base.attr = TDB_ATTRIBUTE_HASH;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_HASH);
-               if (flags[i] & TDB_VERSION1) {
-                       ok1(attr.hash.fn == tdb1_old_hash);
-               } else {
-                       ok1(attr.hash.fn == tdb_jenkins_hash);
-               }
-               attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
-               ok1(attr.flock.lock == tdb_fcntl_lock);
-               ok1(attr.flock.unlock == tdb_fcntl_unlock);
-               attr.base.attr = TDB_ATTRIBUTE_SEED;
-               if (flags[i] & TDB_VERSION1) {
-                       ok1(tdb_get_attribute(tdb, &attr) == TDB_ERR_EINVAL);
-                       tap_log_messages = 0;
-               } else {
-                       ok1(tdb_get_attribute(tdb, &attr) == 0);
-                       ok1(attr.base.attr == TDB_ATTRIBUTE_SEED);
-                       /* This is possible, just astronomically unlikely. */
-                       ok1(attr.seed.seed != 0);
-               }
-
-               /* Unset attributes. */
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-
-               /* Set them. */
-               ok1(tdb_set_attribute(tdb, &tap_log_attr) == 0);
-               ok1(tdb_set_attribute(tdb, &lock_attr) == 0);
-               /* These should fail. */
-               ok1(tdb_set_attribute(tdb, &seed_attr) == TDB_ERR_EINVAL);
-               ok1(tap_log_messages == 1);
-               ok1(tdb_set_attribute(tdb, &hash_attr) == TDB_ERR_EINVAL);
-               ok1(tap_log_messages == 2);
-               tap_log_messages = 0;
-
-               /* Getting them should work as expected. */
-               attr.base.attr = TDB_ATTRIBUTE_LOG;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_LOG);
-               ok1(attr.log.fn == tap_log_attr.log.fn);
-               ok1(attr.log.data == tap_log_attr.log.data);
-
-               attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
-               ok1(attr.flock.lock == mylock);
-               ok1(attr.flock.unlock == myunlock);
-               ok1(attr.flock.data == &lock_attr);
-
-               /* Unset them again. */
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-               ok1(tap_log_messages == 0);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
-               ok1(tap_log_messages == 0);
-
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-
-               /* Now open with all attributes. */
-               tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600,
-                              &seed_attr);
-
-               if (flags[i] & TDB_VERSION1) {
-                       ok1(!tdb);
-                       ok1(tap_log_messages == 1);
-                       tap_log_messages = 0;
-                       tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
-                                      O_RDWR|O_CREAT|O_TRUNC, 0600,
-                                      &hash_attr);
-               }
-               ok1(tdb);
-
-               /* Get will succeed */
-               attr.base.attr = TDB_ATTRIBUTE_LOG;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_LOG);
-               ok1(attr.log.fn == tap_log_attr.log.fn);
-               ok1(attr.log.data == tap_log_attr.log.data);
-
-               attr.base.attr = TDB_ATTRIBUTE_HASH;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_HASH);
-               ok1(attr.hash.fn == hash_fn);
-               ok1(attr.hash.data == &hash_attr);
-
-               attr.base.attr = TDB_ATTRIBUTE_FLOCK;
-               ok1(tdb_get_attribute(tdb, &attr) == 0);
-               ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
-               ok1(attr.flock.lock == mylock);
-               ok1(attr.flock.unlock == myunlock);
-               ok1(attr.flock.data == &lock_attr);
-
-               attr.base.attr = TDB_ATTRIBUTE_SEED;
-               if (flags[i] & TDB_VERSION1) {
-                       ok1(tdb_get_attribute(tdb, &attr) == TDB_ERR_EINVAL);
-                       ok1(tap_log_messages == 1);
-                       tap_log_messages = 0;
-               } else {
-                       ok1(tdb_get_attribute(tdb, &attr) == 0);
-                       ok1(attr.base.attr == TDB_ATTRIBUTE_SEED);
-                       ok1(attr.seed.seed == seed_attr.seed.seed);
-               }
-
-               /* Unset attributes. */
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_HASH);
-               ok1(tap_log_messages == 1);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_SEED);
-               ok1(tap_log_messages == 2);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
-               tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
-               ok1(tap_log_messages == 2);
-               tap_log_messages = 0;
-
-               tdb_close(tdb);
-
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-capabilities.c b/ccan/tdb2/test/run-capabilities.c
deleted file mode 100644 (file)
index 4b25f9c..0000000
+++ /dev/null
@@ -1,272 +0,0 @@
-#include <ccan/failtest/failtest_override.h>
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-#include "layout.h"
-#include "failtest_helper.h"
-#include <stdarg.h>
-#include <err.h>
-
-static size_t len_of(bool breaks_check, bool breaks_write, bool breaks_open)
-{
-       size_t len = 0;
-       if (breaks_check)
-               len += 8;
-       if (breaks_write)
-               len += 16;
-       if (breaks_open)
-               len += 32;
-       return len;
-}
-
-/* Creates a TDB with various capabilities. */
-static void create_tdb(const char *name,
-                      unsigned int cap,
-                      bool breaks_check,
-                      bool breaks_write,
-                      bool breaks_open, ...)
-{
-       TDB_DATA key, data;
-       va_list ap;
-       struct tdb_layout *layout;
-       struct tdb_context *tdb;
-       int fd;
-
-       key = tdb_mkdata("Hello", 5);
-       data = tdb_mkdata("world", 5);
-
-       /* Create a TDB with some data, and some capabilities */
-       layout = new_tdb_layout();
-       tdb_layout_add_freetable(layout);
-       tdb_layout_add_used(layout, key, data, 6);
-       tdb_layout_add_free(layout, 80, 0);
-       tdb_layout_add_capability(layout, cap,
-                                 breaks_write, breaks_check, breaks_open,
-                                 len_of(breaks_check, breaks_write, breaks_open));
-
-       va_start(ap, breaks_open);
-       while ((cap = va_arg(ap, int)) != 0) {
-               breaks_check = va_arg(ap, int);
-               breaks_write = va_arg(ap, int);
-               breaks_open = va_arg(ap, int);
-
-               key.dsize--;
-               tdb_layout_add_used(layout, key, data, 11 - key.dsize);
-               tdb_layout_add_free(layout, 80, 0);
-               tdb_layout_add_capability(layout, cap,
-                                         breaks_write, breaks_check,
-                                         breaks_open,
-                                         len_of(breaks_check, breaks_write,
-                                                breaks_open));
-       }
-       va_end(ap);
-
-       /* We open-code this, because we need to use the failtest write. */
-       tdb = tdb_layout_get(layout, failtest_free, &tap_log_attr);
-
-       fd = open(name, O_RDWR|O_TRUNC|O_CREAT, 0600);
-       if (fd < 0)
-               err(1, "opening %s for writing", name);
-       if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
-           != tdb->file->map_size)
-               err(1, "writing %s", name);
-       close(fd);
-       tdb_close(tdb);
-       tdb_layout_free(layout);
-}
-
-/* Note all the "goto out" early exits: they're to shorten failtest time. */
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       char *summary;
-
-       failtest_init(argc, argv);
-       failtest_hook = block_repeat_failures;
-       failtest_exit_check = exit_check_log;
-       plan_tests(60);
-
-       failtest_suppress = true;
-       /* Capability says you can ignore it? */
-       create_tdb("run-capabilities.tdb", 1, false, false, false, 0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       if (!ok1(tdb))
-               goto out;
-       ok1(tap_log_messages == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       ok1(tap_log_messages == 0);
-       tdb_close(tdb);
-
-       /* Two capabilitues say you can ignore them? */
-       create_tdb("run-capabilities.tdb",
-                  1, false, false, false,
-                  2, false, false, false, 0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       if (!ok1(tdb))
-               goto out;
-       ok1(tap_log_messages == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       ok1(tap_log_messages == 0);
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
-       ok1(strstr(summary, "Capability 1\n"));
-       free(summary);
-       tdb_close(tdb);
-
-       /* Capability says you can't check. */
-       create_tdb("run-capabilities.tdb",
-                  1, false, false, false,
-                  2, true, false, false, 0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       if (!ok1(tdb))
-               goto out;
-       ok1(tap_log_messages == 0);
-       ok1(tdb_get_flags(tdb) & TDB_CANT_CHECK);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       /* We expect a warning! */
-       ok1(tap_log_messages == 1);
-       ok1(strstr(log_last, "capabilit"));
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
-       ok1(strstr(summary, "Capability 1\n"));
-       ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
-       free(summary);
-       tdb_close(tdb);
-
-       /* Capability says you can't write. */
-       create_tdb("run-capabilities.tdb",
-                  1, false, false, false,
-                  2, false, true, false, 0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       /* We expect a message. */
-       ok1(!tdb);
-       if (!ok1(tap_log_messages == 2))
-               goto out;
-       if (!ok1(strstr(log_last, "unknown")))
-               goto out;
-       ok1(strstr(log_last, "write"));
-
-       /* We can open it read-only though! */
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDONLY, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       if (!ok1(tdb))
-               goto out;
-       ok1(tap_log_messages == 2);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       ok1(tap_log_messages == 2);
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
-       ok1(strstr(summary, "Capability 1\n"));
-       ok1(strstr(summary, "Capability 2 (read-only)\n"));
-       free(summary);
-       tdb_close(tdb);
-
-       /* Capability says you can't open. */
-       create_tdb("run-capabilities.tdb",
-                  1, false, false, false,
-                  2, false, false, true, 0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       /* We expect a message. */
-       ok1(!tdb);
-       if (!ok1(tap_log_messages == 3))
-               goto out;
-       if (!ok1(strstr(log_last, "unknown")))
-               goto out;
-
-       /* Combine capabilities correctly. */
-       create_tdb("run-capabilities.tdb",
-                  1, false, false, false,
-                  2, true, false, false,
-                  3, false, true, false, 0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       /* We expect a message. */
-       ok1(!tdb);
-       if (!ok1(tap_log_messages == 4))
-               goto out;
-       if (!ok1(strstr(log_last, "unknown")))
-               goto out;
-       ok1(strstr(log_last, "write"));
-
-       /* We can open it read-only though! */
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDONLY, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       if (!ok1(tdb))
-               goto out;
-       ok1(tap_log_messages == 4);
-       ok1(tdb_get_flags(tdb) & TDB_CANT_CHECK);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       /* We expect a warning! */
-       ok1(tap_log_messages == 5);
-       ok1(strstr(log_last, "unknown"));
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
-       ok1(strstr(summary, "Capability 1\n"));
-       ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
-       ok1(strstr(summary, "Capability 3 (read-only)\n"));
-       free(summary);
-       tdb_close(tdb);
-
-       /* Two capability flags in one. */
-       create_tdb("run-capabilities.tdb",
-                  1, false, false, false,
-                  2, true, true, false,
-                  0);
-
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDWR, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       /* We expect a message. */
-       ok1(!tdb);
-       if (!ok1(tap_log_messages == 6))
-               goto out;
-       if (!ok1(strstr(log_last, "unknown")))
-               goto out;
-       ok1(strstr(log_last, "write"));
-
-       /* We can open it read-only though! */
-       failtest_suppress = false;
-       tdb = tdb_open("run-capabilities.tdb", TDB_DEFAULT, O_RDONLY, 0,
-                      &tap_log_attr);
-       failtest_suppress = true;
-       if (!ok1(tdb))
-               goto out;
-       ok1(tap_log_messages == 6);
-       ok1(tdb_get_flags(tdb) & TDB_CANT_CHECK);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       /* We expect a warning! */
-       ok1(tap_log_messages == 7);
-       ok1(strstr(log_last, "unknown"));
-       ok1(tdb_summary(tdb, 0, &summary) == TDB_SUCCESS);
-       ok1(strstr(summary, "Capability 1\n"));
-       ok1(strstr(summary, "Capability 2 (uncheckable,read-only)\n"));
-       free(summary);
-       tdb_close(tdb);
-
-out:
-       failtest_exit(exit_status());
-}
diff --git a/ccan/tdb2/test/run-expand-in-transaction.c b/ccan/tdb2/test/run-expand-in-transaction.c
deleted file mode 100644 (file)
index 0fa2a57..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       struct tdb_data key = tdb_mkdata("key", 3);
-       struct tdb_data data = tdb_mkdata("data", 4);
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
-
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               size_t size;
-               tdb = tdb_open("run-expand-in-transaction.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               size = tdb->file->map_size;
-               ok1(tdb_transaction_start(tdb) == 0);
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
-               ok1(tdb->file->map_size > size);
-               ok1(tdb_transaction_commit(tdb) == 0);
-               ok1(tdb->file->map_size > size);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-features.c b/ccan/tdb2/test/run-features.c
deleted file mode 100644 (file)
index b086869..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
-       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               uint64_t features;
-               tdb = tdb_open("run-features.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Put some stuff in there. */
-               for (j = 0; j < 100; j++) {
-                       if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                               fail("Storing in tdb");
-               }
-
-               /* Mess with features fields in hdr. */
-               features = (~TDB_FEATURE_MASK ^ 1);
-               ok1(tdb_write_convert(tdb, offsetof(struct tdb_header,
-                                                   features_used), 
-                                     &features, sizeof(features)) == 0);
-               ok1(tdb_write_convert(tdb, offsetof(struct tdb_header,
-                                                   features_offered), 
-                                     &features, sizeof(features)) == 0);
-               tdb_close(tdb);
-
-               tdb = tdb_open("run-features.tdb", flags[i], O_RDWR, 0,
-                              &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Should not have changed features offered. */
-               ok1(tdb_read_convert(tdb, offsetof(struct tdb_header,
-                                                  features_offered), 
-                                    &features, sizeof(features)) == 0);
-               ok1(features == (~TDB_FEATURE_MASK ^ 1));
-
-               /* Should have cleared unknown bits in features_used. */
-               ok1(tdb_read_convert(tdb, offsetof(struct tdb_header,
-                                                  features_used), 
-                                    &features, sizeof(features)) == 0);
-               ok1(features == (1 & TDB_FEATURE_MASK));
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
-
-
diff --git a/ccan/tdb2/test/run-lockall.c b/ccan/tdb2/test/run-lockall.c
deleted file mode 100644 (file)
index 7cd9b84..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <ccan/tdb2/private.h>
-#include <unistd.h>
-#include "lock-tracking.h"
-
-#define fcntl fcntl_with_lockcheck
-#include "tdb2-source.h"
-
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include <err.h>
-#include "external-agent.h"
-#include "logging.h"
-
-#define TEST_DBNAME "run-lockall.tdb"
-
-#undef fcntl
-
-int main(int argc, char *argv[])
-{
-       struct agent *agent;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
-                       TDB_VERSION1, TDB_NOMMAP|TDB_VERSION1,
-                       TDB_CONVERT|TDB_VERSION1,
-                       TDB_NOMMAP|TDB_CONVERT|TDB_VERSION1 };
-       int i;
-
-       plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1);
-       agent = prepare_external_agent();
-       if (!agent)
-               err(1, "preparing agent");
-
-       for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
-               enum agent_return ret;
-               struct tdb_context *tdb;
-
-               tdb = tdb_open(TEST_DBNAME, flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-
-               ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
-               ok1(ret == SUCCESS);
-
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
-               ok1(external_agent_operation(agent, STORE, "key")
-                   == WOULD_HAVE_BLOCKED);
-               ok1(external_agent_operation(agent, FETCH, "key")
-                   == WOULD_HAVE_BLOCKED);
-               /* Test nesting. */
-               ok1(tdb_lockall(tdb) == TDB_SUCCESS);
-               tdb_unlockall(tdb);
-               tdb_unlockall(tdb);
-
-               ok1(external_agent_operation(agent, STORE, "key") == SUCCESS);
-
-               ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
-               ok1(external_agent_operation(agent, STORE, "key")
-                   == WOULD_HAVE_BLOCKED);
-               ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS);
-               ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
-               tdb_unlockall_read(tdb);
-               tdb_unlockall_read(tdb);
-
-               ok1(external_agent_operation(agent, STORE, "key") == SUCCESS);
-               ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS);
-               tdb_close(tdb);
-       }
-
-       free_external_agent(agent);
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-remap-in-read_traverse.c b/ccan/tdb2/test/run-remap-in-read_traverse.c
deleted file mode 100644 (file)
index b70a841..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "tdb2-source.h"
-/* We had a bug where we marked the tdb read-only for a tdb_traverse_read.
- * If we then expanded the tdb, we would remap read-only, and later SEGV. */
-#include <ccan/tap/tap.h>
-#include "external-agent.h"
-#include "logging.h"
-
-static bool file_larger(int fd, tdb_len_t size)
-{
-       struct stat st;
-
-       fstat(fd, &st);
-       return st.st_size != size;
-}
-
-static unsigned add_records_to_grow(struct agent *agent, int fd, tdb_len_t size)
-{
-       unsigned int i;
-
-       for (i = 0; !file_larger(fd, size); i++) {
-               char data[20];
-               sprintf(data, "%i", i);
-               if (external_agent_operation(agent, STORE, data) != SUCCESS)
-                       return 0;
-       }
-       diag("Added %u records to grow file", i);
-       return i;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct agent *agent;
-       struct tdb_context *tdb;
-       struct tdb_data d = tdb_mkdata("hello", 5);
-       const char filename[] = "run-remap-in-read_traverse.tdb";
-
-       plan_tests(4);
-
-       agent = prepare_external_agent();
-
-       tdb = tdb_open(filename, TDB_DEFAULT,
-                      O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-
-       ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS);
-       i = add_records_to_grow(agent, tdb->file->fd, tdb->file->map_size);
-
-       /* Do a traverse. */
-       ok1(tdb_traverse(tdb, NULL, NULL) == i);
-
-       /* Now store something! */
-       ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0);
-       ok1(tap_log_messages == 0);
-       tdb_close(tdb);
-       free_external_agent(agent);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-seed.c b/ccan/tdb2/test/run-seed.c
deleted file mode 100644 (file)
index 0919396..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-static int log_count = 0;
-
-/* Normally we get a log when setting random seed. */
-static void my_log_fn(struct tdb_context *tdb,
-                     enum tdb_log_level level,
-                     enum TDB_ERROR ecode,
-                     const char *message, void *priv)
-{
-       log_count++;
-}
-
-static union tdb_attribute log_attr = {
-       .log = { .base = { .attr = TDB_ATTRIBUTE_LOG },
-                .fn = my_log_fn }
-};
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       struct tdb_context *tdb;
-       union tdb_attribute attr;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT };
-
-       attr.seed.base.attr = TDB_ATTRIBUTE_SEED;
-       attr.seed.base.next = &log_attr;
-       attr.seed.seed = 42;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               struct tdb_header hdr;
-               int fd;
-               tdb = tdb_open("run-seed.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &attr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tdb->hash_seed == 42);
-               ok1(log_count == 0);
-               tdb_close(tdb);
-
-               if (flags[i] & TDB_INTERNAL)
-                       continue;
-
-               fd = open("run-seed.tdb", O_RDONLY);
-               ok1(fd >= 0);
-               ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
-               if (flags[i] & TDB_CONVERT)
-                       ok1(bswap_64(hdr.hash_seed) == 42);
-               else
-                       ok1(hdr.hash_seed == 42);
-               close(fd);
-       }
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-3G-file.c b/ccan/tdb2/test/run-tdb1-3G-file.c
deleted file mode 100644 (file)
index f3403dd..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-/* We need this otherwise fcntl locking fails. */
-#define _FILE_OFFSET_BITS 64
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-static int tdb1_expand_file_sparse(struct tdb_context *tdb,
-                                 tdb1_off_t size,
-                                 tdb1_off_t addition)
-{
-       if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
-               tdb->last_error = TDB_ERR_RDONLY;
-               return -1;
-       }
-
-       if (ftruncate(tdb->file->fd, size+addition) == -1) {
-               char b = 0;
-               ssize_t written = pwrite(tdb->file->fd,  &b, 1, (size+addition) - 1);
-               if (written == 0) {
-                       /* try once more, potentially revealing errno */
-                       written = pwrite(tdb->file->fd,  &b, 1, (size+addition) - 1);
-               }
-               if (written == 0) {
-                       /* again - give up, guessing errno */
-                       errno = ENOSPC;
-               }
-               if (written != 1) {
-                       tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                               "expand_file to %d failed (%s)",
-                                               size+addition,
-                                               strerror(errno));
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-static const struct tdb1_methods large_io_methods = {
-       tdb1_read,
-       tdb1_write,
-       tdb1_next_hash_chain,
-       tdb1_oob,
-       tdb1_expand_file_sparse
-};
-
-static int test_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                        void *_data)
-{
-       TDB_DATA *expect = _data;
-       ok1(key.dsize == strlen("hi"));
-       ok1(memcmp(key.dptr, "hi", strlen("hi")) == 0);
-       ok1(data.dsize == expect->dsize);
-       ok1(memcmp(data.dptr, expect->dptr, data.dsize) == 0);
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, orig_data, data;
-       uint32_t hash;
-       tdb1_off_t rec_ptr;
-       struct tdb1_record rec;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(26);
-       tdb = tdb_open("run-36-file.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       ok1(tdb);
-       tdb->tdb1.io = &large_io_methods;
-
-       /* Enlarge the file (internally multiplies by 2). */
-       ok1(tdb1_expand(tdb, 1500000000) == 0);
-
-       /* Put an entry in, and check it. */
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       orig_data.dsize = strlen("world");
-       orig_data.dptr = (void *)"world";
-
-       ok1(tdb_store(tdb, key, orig_data, TDB_INSERT) == TDB_SUCCESS);
-
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-
-       /* That currently fills at the end, make sure that's true. */
-       hash = tdb_hash(tdb, key.dptr, key.dsize);
-       rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec);
-       ok1(rec_ptr);
-       ok1(rec_ptr > 2U*1024*1024*1024);
-       tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
-
-       /* Traverse must work. */
-       ok1(tdb_traverse(tdb, test_traverse, &orig_data) == 1);
-
-       /* Delete should work. */
-       ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-
-       ok1(tdb_traverse(tdb, test_traverse, NULL) == 0);
-
-       /* Transactions should work. */
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(tdb_store(tdb, key, orig_data, TDB_INSERT) == TDB_SUCCESS);
-
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-
-       ok1(tdb_traverse(tdb, test_traverse, &orig_data) == 1);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-bad-tdb-header.c b/ccan/tdb2/test/run-tdb1-bad-tdb-header.c
deleted file mode 100644 (file)
index c1d7674..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       struct tdb1_header hdr;
-       int fd;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(11);
-       /* Cannot open fine if complete crap, even with O_CREAT. */
-       fd = open("run-bad-tdb-header.tdb1", O_RDWR|O_CREAT|O_TRUNC, 0600);
-       ok1(fd >= 0);
-       ok1(write(fd, "hello world", 11) == 11);
-       close(fd);
-       tdb = tdb_open("run-bad-tdb-header.tdb1", 0, O_RDWR, 0, &tap_log_attr);
-       ok1(!tdb);
-       tdb = tdb_open("run-bad-tdb-header.tdb1", 0, O_CREAT|O_RDWR,
-                       0600, &hsize);
-       ok1(!tdb);
-
-       /* With truncate, will be fine. */
-       tdb = tdb_open("run-bad-tdb-header.tdb1", TDB_VERSION1,
-                      O_RDWR|O_CREAT|O_TRUNC, 0600, &hsize);
-       ok1(tdb);
-       tdb_close(tdb);
-
-       /* Now, with wrong version it should *not* overwrite. */
-       fd = open("run-bad-tdb-header.tdb1", O_RDWR);
-       ok1(fd >= 0);
-       ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
-       ok1(hdr.version == TDB1_VERSION);
-       hdr.version++;
-       lseek(fd, 0, SEEK_SET);
-       ok1(write(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
-       close(fd);
-
-       tdb = tdb_open("run-bad-tdb-header.tdb1", TDB_VERSION1, O_RDWR|O_CREAT,
-                      0600, &hsize);
-       ok1(errno == EIO);
-       ok1(!tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-check.c b/ccan/tdb2/test/run-tdb1-check.c
deleted file mode 100644 (file)
index e939d04..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1;
-
-       plan_tests(13);
-       tdb = tdb_open("run-check.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dsize = strlen("world");
-       data.dptr = (void *)"world";
-
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       tdb = tdb_open("run-check.tdb1", TDB_VERSION1, O_RDWR, 0, &tap_log_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       tdb = tdb_open("test/tdb1.corrupt", TDB_VERSION1, O_RDWR, 0,
-                       &tap_log_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_ERR_CORRUPT);
-       ok1(tdb_error(tdb) == TDB_ERR_CORRUPT);
-       tdb_close(tdb);
-
-       /* Big and little endian should work! */
-       tdb = tdb_open("test/old-nohash-le.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       tdb = tdb_open("test/old-nohash-be.tdb1", TDB_VERSION1, O_RDWR, 0,
-                       &tap_log_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-corrupt.c b/ccan/tdb2/test/run-tdb1-corrupt.c
deleted file mode 100644 (file)
index 35bc4c3..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-static int check(TDB_DATA key, TDB_DATA data, void *private)
-{
-       unsigned int *sizes = private;
-
-       if (key.dsize > strlen("hello"))
-               return -1;
-       if (memcmp(key.dptr, "hello", key.dsize) != 0)
-               return -1;
-
-       if (data.dsize != strlen("world"))
-               return -1;
-       if (memcmp(data.dptr, "world", data.dsize) != 0)
-               return -1;
-
-       sizes[0] += key.dsize;
-       sizes[1] += data.dsize;
-       return 0;
-}
-
-static void tdb1_flip_bit(struct tdb_context *tdb, unsigned int bit)
-{
-       unsigned int off = bit / CHAR_BIT;
-       unsigned char mask = (1 << (bit % CHAR_BIT));
-
-       if (tdb->file->map_ptr)
-               ((unsigned char *)tdb->file->map_ptr)[off] ^= mask;
-       else {
-               unsigned char c;
-               if (pread(tdb->file->fd, &c, 1, off) != 1)
-                       err(1, "pread");
-               c ^= mask;
-               if (pwrite(tdb->file->fd, &c, 1, off) != 1)
-                       err(1, "pwrite");
-       }
-}
-
-static void check_test(struct tdb_context *tdb)
-{
-       TDB_DATA key, data;
-       unsigned int i, verifiable, corrupt, sizes[2], dsize, ksize;
-
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-       key.dptr = (void *)"hello";
-       data.dsize = strlen("world");
-       data.dptr = (void *)"world";
-
-       /* Key and data size respectively. */
-       dsize = ksize = 0;
-
-       /* 5 keys in hash size 2 means we'll have multichains. */
-       for (key.dsize = 1; key.dsize <= 5; key.dsize++) {
-               ksize += key.dsize;
-               dsize += data.dsize;
-               if (tdb_store(tdb, key, data, TDB_INSERT) != TDB_SUCCESS)
-                       abort();
-       }
-
-       /* This is how many bytes we expect to be verifiable. */
-       /* From the file header. */
-       verifiable = strlen(TDB_MAGIC_FOOD) + 1
-               + 2 * sizeof(uint32_t) + 2 * sizeof(tdb1_off_t)
-               + 2 * sizeof(uint32_t);
-       /* From the free list chain and hash chains. */
-       verifiable += 3 * sizeof(tdb1_off_t);
-       /* From the record headers & tailer */
-       verifiable += 5 * (sizeof(struct tdb1_record) + sizeof(uint32_t));
-       /* The free block: we ignore datalen, keylen, full_hash. */
-       verifiable += sizeof(struct tdb1_record) - 3*sizeof(uint32_t) +
-               sizeof(uint32_t);
-       /* Our check function verifies the key and data. */
-       verifiable += ksize + dsize;
-
-       /* Flip one bit at a time, make sure it detects verifiable bytes. */
-       for (i = 0, corrupt = 0; i < tdb->file->map_size * CHAR_BIT; i++) {
-               tdb1_flip_bit(tdb, i);
-               memset(sizes, 0, sizeof(sizes));
-               if (tdb_check(tdb, check, sizes) == TDB_ERR_CORRUPT)
-                       corrupt++;
-               else if (sizes[0] != ksize || sizes[1] != dsize)
-                       corrupt++;
-               tdb1_flip_bit(tdb, i);
-       }
-       ok(corrupt == verifiable * CHAR_BIT, "corrupt %u should be %u",
-          corrupt, verifiable * CHAR_BIT);
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 2;
-
-       plan_tests(4);
-       /* This should use mmap. */
-       tdb = tdb_open("run-corrupt.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       if (!tdb)
-               abort();
-       check_test(tdb);
-       tdb_close(tdb);
-
-       /* This should not. */
-       tdb = tdb_open("run-corrupt.tdb1", TDB_VERSION1|TDB_NOMMAP,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       if (!tdb)
-               abort();
-       check_test(tdb);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-endian.c b/ccan/tdb2/test/run-tdb1-endian.c
deleted file mode 100644 (file)
index 3b91d45..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(14);
-       tdb = tdb_open("run-endian.tdb1",
-                      TDB_VERSION1|TDB_CONVERT,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       ok1(tdb);
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dsize = strlen("world");
-       data.dptr = (void *)"world";
-
-       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_ERR_NOEXIST);
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_ERR_EXISTS);
-       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_SUCCESS);
-
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-
-       key.dsize++;
-       ok1(tdb_fetch(tdb, key, &data) == TDB_ERR_NOEXIST);
-       ok1(data.dptr == NULL);
-       tdb_close(tdb);
-
-       /* Reopen: should read it */
-       tdb = tdb_open("run-endian.tdb1", 0, O_RDWR, 0, NULL);
-       ok1(tdb);
-
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-hashsize.c b/ccan/tdb2/test/run-tdb1-hashsize.c
deleted file mode 100644 (file)
index 8a78196..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       union tdb_attribute hsize, h2;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(14);
-       tdb = tdb_open("run-tdb1-hashsize.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-       ok1(tdb);
-       h2.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       ok1(tdb_get_attribute(tdb, &h2) == TDB_SUCCESS);
-       ok1(h2.tdb1_hashsize.hsize == hsize.tdb1_hashsize.hsize);
-       tdb_close(tdb);
-
-       /* Can't specify TDB_ATTRIBUTE_TDB1_HASHSIZE without O_CREAT */
-       tdb = tdb_open("run-tdb1-hashsize.tdb1", TDB_VERSION1,
-                      O_RDWR, 0600, &hsize);
-       ok1(!tdb);
-       ok1(tap_log_messages == 1);
-
-       /* Can't specify TDB_ATTRIBUTE_TDB1_HASHSIZE for version2. */
-       tdb = tdb_open("run-tdb1-hashsize.tdb", TDB_DEFAULT,
-                       O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-       ok1(!tdb);
-       ok1(tap_log_messages == 2);
-
-       /* We can get attribute even if we didn't set it though. */
-       tdb = tdb_open("run-tdb1-hashsize.tdb1", TDB_DEFAULT,
-                      O_RDWR, 0600, &tap_log_attr);
-
-       ok1(tdb);
-       memset(&h2, 0, sizeof(h2));
-       h2.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       ok1(tdb_get_attribute(tdb, &h2) == TDB_SUCCESS);
-       ok1(h2.tdb1_hashsize.hsize == hsize.tdb1_hashsize.hsize);
-       tdb_close(tdb);
-
-       /* Check for default hash size. */
-       tdb = tdb_open("run-tdb1-hashsize.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr);
-
-       ok1(tdb);
-       memset(&h2, 0, sizeof(h2));
-       h2.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       ok1(tdb_get_attribute(tdb, &h2) == TDB_SUCCESS);
-       ok1(h2.tdb1_hashsize.hsize == TDB1_DEFAULT_HASH_SIZE);
-       tdb_close(tdb);
-       ok1(tap_log_messages == 2);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-incompatible.c b/ccan/tdb2/test/run-tdb1-incompatible.c
deleted file mode 100644 (file)
index 46ab566..0000000
+++ /dev/null
@@ -1,213 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-
-static uint64_t tdb1_dumb_hash(const void *key, size_t len, uint64_t seed,
-                              void *unused)
-{
-       return len;
-}
-
-static void log_fn(struct tdb_context *tdb, enum tdb_log_level level,
-                  enum TDB_ERROR ecode, const char *message, void *priv)
-{
-       unsigned int *count = priv;
-       if (strstr(message, "hash"))
-               (*count)++;
-}
-
-static unsigned int hdr_rwlocks(const char *fname)
-{
-       struct tdb1_header hdr;
-
-       int fd = open(fname, O_RDONLY);
-       if (fd == -1)
-               return -1;
-
-       if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
-               return -1;
-
-       close(fd);
-       return hdr.rwlocks;
-}
-
-static uint64_t jenkins_hashfn(const void *key, size_t len, uint64_t seed,
-                              void *unused)
-{
-       return hashlittle(key, len);
-}
-
-static uint64_t old_hash(const void *key, size_t len, uint64_t seed,
-                        void *unused)
-{
-       return tdb1_old_hash(key, len, seed, unused);
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       unsigned int log_count, flags;
-       TDB_DATA d;
-       union tdb_attribute log_attr, jhash_attr, ohash_attr,
-               incompat_hash_attr, dumbhash_attr;
-
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
-       log_attr.base.next = NULL;
-       log_attr.log.fn = log_fn;
-       log_attr.log.data = &log_count;
-
-       jhash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       jhash_attr.base.next = &log_attr;
-       jhash_attr.hash.fn = jenkins_hashfn;
-
-       ohash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       ohash_attr.base.next = &log_attr;
-       ohash_attr.hash.fn = old_hash;
-
-       incompat_hash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       incompat_hash_attr.base.next = &log_attr;
-       incompat_hash_attr.hash.fn = tdb1_incompatible_hash;
-
-       dumbhash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       dumbhash_attr.base.next = &log_attr;
-       dumbhash_attr.hash.fn = tdb1_dumb_hash;
-
-       plan_tests(42 * 2);
-
-       for (flags = 0; flags <= TDB_CONVERT; flags += TDB_CONVERT) {
-               unsigned int rwmagic = TDB1_HASH_RWLOCK_MAGIC;
-
-               if (flags & TDB_CONVERT)
-                       tdb1_convert(&rwmagic, sizeof(rwmagic));
-
-               /* Create an old-style hash. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1", flags|TDB_VERSION1,
-                              O_CREAT|O_RDWR|O_TRUNC, 0600, &log_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               d.dptr = (void *)"Hello";
-               d.dsize = 5;
-               ok1(tdb_store(tdb, d, d, TDB_INSERT) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               /* Should not have marked rwlocks field. */
-               ok1(hdr_rwlocks("run-incompatible.tdb1") == 0);
-
-               /* We can still open any old-style with incompat hash. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1",
-                              TDB_VERSION1,
-                              O_RDWR, 0600, &incompat_hash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               ok1(tdb_fetch(tdb, d, &d) == TDB_SUCCESS);
-               ok1(d.dsize == 5);
-               free(d.dptr);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               log_count = 0;
-               tdb = tdb_open("test/jenkins-le-hash.tdb1",
-                              TDB_VERSION1, O_RDONLY, 0, &jhash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               log_count = 0;
-               tdb = tdb_open("test/jenkins-be-hash.tdb1",
-                              TDB_VERSION1, O_RDONLY, 0, &jhash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               /* OK, now create with incompatible hash. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1",
-                              flags|TDB_VERSION1,
-                              O_CREAT|O_RDWR|O_TRUNC, 0600,
-                              &incompat_hash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               d.dptr = (void *)"Hello";
-               d.dsize = 5;
-               ok1(tdb_store(tdb, d, d, TDB_INSERT) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               /* Should have marked rwlocks field. */
-               ok1(hdr_rwlocks("run-incompatible.tdb1") == rwmagic);
-
-               /* Cannot open with old hash. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1", TDB_VERSION1,
-                              O_RDWR, 0600, &ohash_attr);
-               ok1(!tdb);
-               ok1(log_count == 1);
-
-               /* Can open with jenkins hash. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1", TDB_VERSION1,
-                              O_RDWR, 0600, &jhash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               ok1(tdb_fetch(tdb, d, &d) == TDB_SUCCESS);
-               ok1(d.dsize == 5);
-               free(d.dptr);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               /* Can open by letting it figure it out itself. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1", TDB_VERSION1,
-                              O_RDWR, 0600, &log_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               d.dptr = (void *)"Hello";
-               d.dsize = 5;
-               ok1(tdb_fetch(tdb, d, &d) == TDB_SUCCESS);
-               ok1(d.dsize == 5);
-               free(d.dptr);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               /* FIXME: Not possible with TDB2 :( */
-               /* We can also use incompatible hash with other hashes. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1",
-                              flags|TDB_VERSION1,
-                              O_CREAT|O_RDWR|O_TRUNC, 0600, &dumbhash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               d.dptr = (void *)"Hello";
-               d.dsize = 5;
-               ok1(tdb_store(tdb, d, d, TDB_INSERT) == TDB_SUCCESS);
-               tdb_close(tdb);
-
-               /* FIXME: Should have marked rwlocks field. */
-               ok1(hdr_rwlocks("run-incompatible.tdb1") != rwmagic);
-
-               /* It should not open if we don't specify. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1", TDB_VERSION1, O_RDWR, 0,
-                              &log_attr);
-               ok1(!tdb);
-               ok1(log_count == 1);
-
-               /* Should reopen with correct hash. */
-               log_count = 0;
-               tdb = tdb_open("run-incompatible.tdb1", TDB_VERSION1, O_RDWR, 0,
-                              &dumbhash_attr);
-               ok1(tdb);
-               ok1(log_count == 0);
-               ok1(tdb_fetch(tdb, d, &d) == TDB_SUCCESS);
-               ok1(d.dsize == 5);
-               free(d.dptr);
-               ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-               tdb_close(tdb);
-       }
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-nested-transactions.c b/ccan/tdb2/test/run-tdb1-nested-transactions.c
deleted file mode 100644 (file)
index 149e531..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(30);
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-
-       tdb = tdb_open("run-nested-transactions.tdb1",
-                      TDB_VERSION1, O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-       ok1(tdb);
-
-       /* No nesting by default. */
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       data.dptr = (void *)"world";
-       data.dsize = strlen("world");
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-       ok1(tdb_transaction_start(tdb) == TDB_ERR_EINVAL);
-       ok1(tap_log_messages == 1);
-
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-       tdb_close(tdb);
-
-       tdb = tdb_open("run-nested-transactions.tdb1",
-                      TDB_ALLOW_NESTING, O_RDWR, 0, &tap_log_attr);
-       ok1(tdb);
-
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-       ok1(!tdb_exists(tdb, key));
-       tdb_transaction_cancel(tdb);
-       ok1(tap_log_messages == 1);
-       /* Surprise! Kills inner "committed" transaction. */
-       ok1(tdb_exists(tdb, key));
-
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-       ok1(!tdb_exists(tdb, key));
-       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-       ok1(!tdb_exists(tdb, key));
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-nested-traverse.c b/ccan/tdb2/test/run-tdb1-nested-traverse.c
deleted file mode 100644 (file)
index cf5aa4a..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-#include "tdb1-lock-tracking.h"
-#define fcntl fcntl_with_lockcheck1
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#undef fcntl
-#include <stdlib.h>
-#include <stdbool.h>
-#include <err.h>
-#include "tdb1-external-agent.h"
-#include "logging.h"
-
-static struct agent *agent;
-
-static bool correct_key(TDB_DATA key)
-{
-       return key.dsize == strlen("hi")
-               && memcmp(key.dptr, "hi", key.dsize) == 0;
-}
-
-static bool correct_data(TDB_DATA data)
-{
-       return data.dsize == strlen("world")
-               && memcmp(data.dptr, "world", data.dsize) == 0;
-}
-
-static int traverse2(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                    void *p)
-{
-       ok1(correct_key(key));
-       ok1(correct_data(data));
-       return 0;
-}
-
-static int traverse1(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                    void *p)
-{
-       ok1(correct_key(key));
-       ok1(correct_data(data));
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == WOULD_HAVE_BLOCKED);
-       tdb_traverse(tdb, traverse2, NULL);
-
-       /* That should *not* release the transaction lock! */
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == WOULD_HAVE_BLOCKED);
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(17);
-       agent = prepare_external_agent1();
-       if (!agent)
-               err(1, "preparing agent");
-
-       tdb = tdb_open("run-nested-traverse.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-       ok1(tdb);
-
-       ok1(external_agent_operation1(agent, OPEN, tdb->name) == SUCCESS);
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == SUCCESS);
-       ok1(external_agent_operation1(agent, TRANSACTION_COMMIT, tdb->name)
-           == SUCCESS);
-
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dptr = (void *)"world";
-       data.dsize = strlen("world");
-
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-       tdb_traverse(tdb, traverse1, NULL);
-       tdb_add_flag(tdb, TDB_RDONLY);
-       tdb_traverse(tdb, traverse1, NULL);
-       tdb_remove_flag(tdb, TDB_RDONLY);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-no-lock-during-traverse.c b/ccan/tdb2/test/run-tdb1-no-lock-during-traverse.c
deleted file mode 100644 (file)
index b2b7a78..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-#include <ccan/tdb2/private.h>
-#include <unistd.h>
-#include "tdb1-lock-tracking.h"
-
-#define fcntl fcntl_with_lockcheck1
-
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-#undef fcntl
-
-#define NUM_ENTRIES 10
-
-static bool prepare_entries(struct tdb_context *tdb)
-{
-       unsigned int i;
-       TDB_DATA key, data;
-
-       for (i = 0; i < NUM_ENTRIES; i++) {
-               key.dsize = sizeof(i);
-               key.dptr = (void *)&i;
-               data.dsize = strlen("world");
-               data.dptr = (void *)"world";
-
-               if (tdb_store(tdb, key, data, 0) != TDB_SUCCESS)
-                       return false;
-       }
-       return true;
-}
-
-static void delete_entries(struct tdb_context *tdb)
-{
-       unsigned int i;
-       TDB_DATA key;
-
-       for (i = 0; i < NUM_ENTRIES; i++) {
-               key.dsize = sizeof(i);
-               key.dptr = (void *)&i;
-
-               ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-       }
-}
-
-/* We don't know how many times this will run. */
-static int delete_other(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                       void *private_data)
-{
-       unsigned int i;
-       memcpy(&i, key.dptr, 4);
-       i = (i + 1) % NUM_ENTRIES;
-       key.dptr = (void *)&i;
-       if (tdb_delete(tdb, key) != TDB_SUCCESS)
-               (*(int *)private_data)++;
-       return 0;
-}
-
-static int delete_self(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                       void *private_data)
-{
-       ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       int errors = 0;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(40);
-       tdb = tdb_open("run-no-lock-during-traverse.tdb1",
-                      TDB_VERSION1, O_CREAT|O_TRUNC|O_RDWR,
-                      0600, &hsize);
-
-       ok1(tdb);
-       ok1(prepare_entries(tdb));
-       ok1(locking_errors1 == 0);
-       ok1(tdb_lockall(tdb) == 0);
-       ok1(locking_errors1 == 0);
-       ok1(tdb_traverse(tdb, delete_other, &errors) >= 0);
-       ok1(errors == 0);
-       ok1(locking_errors1 == 0);
-       tdb_unlockall(tdb);
-
-       ok1(prepare_entries(tdb));
-       ok1(locking_errors1 == 0);
-       ok1(tdb_lockall(tdb) == 0);
-       ok1(locking_errors1 == 0);
-       ok1(tdb_traverse(tdb, delete_self, NULL) == NUM_ENTRIES);
-       ok1(locking_errors1 == 0);
-       tdb_unlockall(tdb);
-
-       ok1(prepare_entries(tdb));
-       ok1(locking_errors1 == 0);
-       ok1(tdb_lockall(tdb) == 0);
-       ok1(locking_errors1 == 0);
-       delete_entries(tdb);
-       ok1(locking_errors1 == 0);
-       tdb_unlockall(tdb);
-
-       ok1(tdb_close(tdb) == 0);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-oldhash.c b/ccan/tdb2/test/run-tdb1-oldhash.c
deleted file mode 100644 (file)
index f9cffa2..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       union tdb_attribute incompat_hash_attr;
-
-       incompat_hash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       incompat_hash_attr.base.next = &tap_log_attr;
-       incompat_hash_attr.hash.fn = tdb1_incompatible_hash;
-
-       plan_tests(8);
-
-       /* Old format (with zeroes in the hash magic fields) should
-        * open with any hash (since we don't know what hash they used). */
-       tdb = tdb_open("test/old-nohash-le.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       tdb = tdb_open("test/old-nohash-be.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &tap_log_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       tdb = tdb_open("test/old-nohash-le.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &incompat_hash_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       tdb = tdb_open("test/old-nohash-be.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &incompat_hash_attr);
-       ok1(tdb);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-readonly-check.c b/ccan/tdb2/test/run-tdb1-readonly-check.c
deleted file mode 100644 (file)
index f42a8f5..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/* We should be able to tdb_check a O_RDONLY tdb, and we were previously allowed
- * to tdb_check() inside a transaction (though that's paranoia!). */
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(10);
-       tdb = tdb_open("run-readonly-check.tdb1",
-                      TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       ok1(tdb);
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dsize = strlen("world");
-       data.dptr = (void *)"world";
-
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-
-       /* We are also allowed to do a check inside a transaction. */
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       ok1(tdb_close(tdb) == 0);
-
-       tdb = tdb_open("run-readonly-check.tdb1",
-                      TDB_DEFAULT, O_RDONLY, 0, &tap_log_attr);
-
-       ok1(tdb);
-       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_ERR_RDONLY);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       ok1(tdb_close(tdb) == 0);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-rwlock-check.c b/ccan/tdb2/test/run-tdb1-rwlock-check.c
deleted file mode 100644 (file)
index 44a2eeb..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-
-static void log_fn(struct tdb_context *tdb, enum tdb_log_level level,
-                  enum TDB_ERROR ecode, const char *message, void *priv)
-{
-       unsigned int *count = priv;
-       if (strstr(message, "spinlocks"))
-               (*count)++;
-}
-
-/* The code should barf on TDBs created with rwlocks. */
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       unsigned int log_count;
-       union tdb_attribute log_attr;
-
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
-       log_attr.base.next = NULL;
-       log_attr.log.fn = log_fn;
-       log_attr.log.data = &log_count;
-
-       plan_tests(4);
-
-       /* We should fail to open rwlock-using tdbs of either endian. */
-       log_count = 0;
-       tdb = tdb_open("test/rwlock-le.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &log_attr);
-       ok1(!tdb);
-       ok1(log_count == 1);
-
-       log_count = 0;
-       tdb = tdb_open("test/rwlock-be.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &log_attr);
-       ok1(!tdb);
-       ok1(log_count == 1);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-seqnum-wrap.c b/ccan/tdb2/test/run-tdb1-seqnum-wrap.c
deleted file mode 100644 (file)
index c3eb278..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       unsigned int i;
-       struct tdb1_header hdr;
-       struct tdb_data key = { (unsigned char *)&hdr, sizeof(hdr) };
-       struct tdb_data data = { (unsigned char *)&hdr, sizeof(hdr) };
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 7);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-tdb1-seqnum-wrap.tdb1",
-                              flags[i]|TDB_VERSION1|TDB_SEQNUM,
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
-               if (!tdb)
-                       break;
-               ok1(pread(tdb->file->fd, &hdr, sizeof(hdr), 0) == sizeof(hdr));
-               hdr.sequence_number = 0xFFFFFFFF;
-               ok1(pwrite(tdb->file->fd, &hdr, sizeof(hdr), 0) == sizeof(hdr));
-
-               /* Must not be negative: that would mean an error! */
-               ok1(tdb_get_seqnum(tdb) == 0xFFFFFFFF);
-
-               ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-               ok1(tdb_get_seqnum(tdb) == 0);
-               tdb_close(tdb);
-               ok1(tap_log_messages == 0);
-       }
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-summary.c b/ccan/tdb2/test/run-tdb1-summary.c
deleted file mode 100644 (file)
index 5107b8e..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j;
-       struct tdb_context *tdb;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
-                       TDB_NOMMAP|TDB_CONVERT };
-       TDB_DATA key = { (unsigned char *)&j, sizeof(j) };
-       TDB_DATA data = { (unsigned char *)&j, sizeof(j) };
-       char *summary;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-summary.tdb1", flags[i]|TDB_VERSION1,
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               /* Put some stuff in there. */
-               for (j = 0; j < 500; j++) {
-                       /* Make sure padding varies to we get some graphs! */
-                       data.dsize = j % (sizeof(j) + 1);
-                       if (tdb_store(tdb, key, data, TDB_REPLACE)
-                           != TDB_SUCCESS) {
-                               fail("Storing in tdb");
-                       }
-               }
-
-               summary = tdb1_summary(tdb);
-               diag("%s", summary);
-               ok1(strstr(summary, "Size of file/data: "));
-               ok1(strstr(summary, "Number of records: 500\n"));
-               ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
-               ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
-               ok1(strstr(summary, "Smallest/average/largest padding: "));
-               ok1(strstr(summary, "Number of dead records: 0\n"));
-               ok1(strstr(summary, "Number of free records: 1\n"));
-               ok1(strstr(summary, "Smallest/average/largest free records: "));
-               ok1(strstr(summary, "Number of hash chains: 131\n"));
-               ok1(strstr(summary, "Smallest/average/largest hash chains: "));
-               ok1(strstr(summary, "Number of uncoalesced records: 0\n"));
-               ok1(strstr(summary, "Smallest/average/largest uncoalesced runs: 0/0/0\n"));
-               ok1(strstr(summary, "Percentage keys/data/padding/free/dead/rechdrs&tailers/hashes: "));
-
-               free(summary);
-               tdb_close(tdb);
-       }
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-traverse-in-transaction.c b/ccan/tdb2/test/run-tdb1-traverse-in-transaction.c
deleted file mode 100644 (file)
index 691aaf9..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-#include "config.h"
-#include "tdb1-lock-tracking.h"
-#define fcntl fcntl_with_lockcheck1
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#undef fcntl_with_lockcheck
-#include <stdlib.h>
-#include <stdbool.h>
-#include <err.h>
-#include "tdb1-external-agent.h"
-#include "logging.h"
-
-static struct agent *agent;
-
-static bool correct_key(TDB_DATA key)
-{
-       return key.dsize == strlen("hi")
-               && memcmp(key.dptr, "hi", key.dsize) == 0;
-}
-
-static bool correct_data(TDB_DATA data)
-{
-       return data.dsize == strlen("world")
-               && memcmp(data.dptr, "world", data.dsize) == 0;
-}
-
-static int traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
-                    void *p)
-{
-       ok1(correct_key(key));
-       ok1(correct_data(data));
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(13);
-       agent = prepare_external_agent1();
-       if (!agent)
-               err(1, "preparing agent");
-
-       tdb = tdb_open("run-traverse-in-transaction.tdb1",
-                      TDB_VERSION1, O_CREAT|O_TRUNC|O_RDWR,
-                      0600, &hsize);
-       ok1(tdb);
-
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dptr = (void *)"world";
-       data.dsize = strlen("world");
-
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-
-       ok1(external_agent_operation1(agent, OPEN, tdb->name) == SUCCESS);
-
-       ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == WOULD_HAVE_BLOCKED);
-       tdb_traverse(tdb, traverse, NULL);
-
-       /* That should *not* release the transaction lock! */
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == WOULD_HAVE_BLOCKED);
-       tdb_traverse(tdb, traverse, NULL);
-
-       /* That should *not* release the transaction lock! */
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == WOULD_HAVE_BLOCKED);
-       ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
-       /* Now we should be fine. */
-       ok1(external_agent_operation1(agent, TRANSACTION_START, tdb->name)
-           == SUCCESS);
-
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-wronghash-fail.c b/ccan/tdb2/test/run-tdb1-wronghash-fail.c
deleted file mode 100644 (file)
index 63c1bdf..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-
-static void log_fn(struct tdb_context *tdb, enum tdb_log_level level,
-                  enum TDB_ERROR ecode, const char *message, void *priv)
-{
-       unsigned int *count = priv;
-       if (strstr(message, "hash"))
-               (*count)++;
-}
-
-static uint64_t jenkins_hashfn(const void *key, size_t len, uint64_t seed,
-                              void *unused)
-{
-       return hashlittle(key, len);
-}
-
-/* the tdb1_old_hash function is "magic" as it automatically makes us test the
- * tdb1_incompatible_hash as well, so use this wrapper. */
-static uint64_t old_hash(const void *key, size_t len, uint64_t seed,
-                        void *unused)
-{
-       return tdb1_old_hash(key, len, seed, unused);
-}
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       unsigned int log_count;
-       TDB_DATA d;
-       union tdb_attribute log_attr, jhash_attr, ohash_attr,
-               incompat_hash_attr;
-
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
-       log_attr.base.next = NULL;
-       log_attr.log.fn = log_fn;
-       log_attr.log.data = &log_count;
-
-       jhash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       jhash_attr.base.next = &log_attr;
-       jhash_attr.hash.fn = jenkins_hashfn;
-
-       ohash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       ohash_attr.base.next = &log_attr;
-       ohash_attr.hash.fn = old_hash;
-
-       incompat_hash_attr.base.attr = TDB_ATTRIBUTE_HASH;
-       incompat_hash_attr.base.next = &log_attr;
-       incompat_hash_attr.hash.fn = tdb1_incompatible_hash;
-
-       plan_tests(28);
-
-       /* Create with default hash. */
-       log_count = 0;
-       tdb = tdb_open("run-wronghash-fail.tdb1", TDB_VERSION1,
-                      O_CREAT|O_RDWR|O_TRUNC, 0600, &log_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       d.dptr = (void *)"Hello";
-       d.dsize = 5;
-       ok1(tdb_store(tdb, d, d, TDB_INSERT) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       /* Fail to open with different hash. */
-       tdb = tdb_open("run-wronghash-fail.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &jhash_attr);
-       ok1(!tdb);
-       ok1(log_count == 1);
-
-       /* Create with different hash. */
-       log_count = 0;
-       tdb = tdb_open("run-wronghash-fail.tdb1", TDB_VERSION1,
-                      O_CREAT|O_RDWR|O_TRUNC, 0600, &jhash_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       tdb_close(tdb);
-
-       /* Endian should be no problem. */
-       log_count = 0;
-       tdb = tdb_open("test/jenkins-le-hash.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &ohash_attr);
-       ok1(!tdb);
-       ok1(log_count == 1);
-
-       log_count = 0;
-       tdb = tdb_open("test/jenkins-be-hash.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &ohash_attr);
-       ok1(!tdb);
-       ok1(log_count == 1);
-
-       log_count = 0;
-       /* Fail to open with old default hash. */
-       tdb = tdb_open("run-wronghash-fail.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &ohash_attr);
-       ok1(!tdb);
-       ok1(log_count == 1);
-
-       log_count = 0;
-       tdb = tdb_open("test/jenkins-le-hash.tdb1", TDB_VERSION1, O_RDONLY,
-                      0, &incompat_hash_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       log_count = 0;
-       tdb = tdb_open("test/jenkins-be-hash.tdb1", TDB_VERSION1, O_RDONLY,
-                      0, &incompat_hash_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       /* It should open with jenkins hash if we don't specify. */
-       log_count = 0;
-       tdb = tdb_open("test/jenkins-le-hash.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &log_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       log_count = 0;
-       tdb = tdb_open("test/jenkins-be-hash.tdb1", TDB_VERSION1, O_RDWR, 0,
-                      &log_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-       log_count = 0;
-       tdb = tdb_open("run-wronghash-fail.tdb1", TDB_VERSION1, O_RDONLY,
-                      0, &log_attr);
-       ok1(tdb);
-       ok1(log_count == 0);
-       ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
-       tdb_close(tdb);
-
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1-zero-append.c b/ccan/tdb2/test/run-tdb1-zero-append.c
deleted file mode 100644 (file)
index fdc9cdc..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(5);
-       tdb = tdb_open(NULL, TDB_INTERNAL|TDB_VERSION1, O_CREAT|O_TRUNC|O_RDWR,
-                      0600, &hsize);
-       ok1(tdb);
-
-       /* Tickle bug on appending zero length buffer to zero length buffer. */
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dptr = (void *)"world";
-       data.dsize = 0;
-
-       ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-       ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == 0);
-       free(data.dptr);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb1.c b/ccan/tdb2/test/run-tdb1.c
deleted file mode 100644 (file)
index dca6473..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include <stdlib.h>
-#include <err.h>
-#include "logging.h"
-
-int main(int argc, char *argv[])
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, data;
-       union tdb_attribute hsize;
-
-       hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
-       hsize.base.next = &tap_log_attr;
-       hsize.tdb1_hashsize.hsize = 1024;
-
-       plan_tests(9);
-       tdb = tdb_open("run.tdb1", TDB_VERSION1,
-                      O_CREAT|O_TRUNC|O_RDWR, 0600, &hsize);
-
-       ok1(tdb);
-       key.dsize = strlen("hi");
-       key.dptr = (void *)"hi";
-       data.dsize = strlen("world");
-       data.dptr = (void *)"world";
-
-       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_ERR_NOEXIST);
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_SUCCESS);
-       ok1(tdb_store(tdb, key, data, TDB_INSERT) == TDB_ERR_EXISTS);
-       ok1(tdb_store(tdb, key, data, TDB_MODIFY) == TDB_SUCCESS);
-
-       ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
-       ok1(data.dsize == strlen("world"));
-       ok1(memcmp(data.dptr, "world", strlen("world")) == 0);
-       free(data.dptr);
-
-       key.dsize++;
-       ok1(tdb_fetch(tdb, key, &data) == TDB_ERR_NOEXIST);
-       tdb_close(tdb);
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb_errorstr.c b/ccan/tdb2/test/run-tdb_errorstr.c
deleted file mode 100644 (file)
index 742fb89..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-
-int main(int argc, char *argv[])
-{
-       enum TDB_ERROR err;
-       plan_tests(TDB_ERR_RDONLY*-1 + 2);
-
-       for (err = TDB_SUCCESS; err >= TDB_ERR_RDONLY; err--) {
-               switch (err) {
-               case TDB_SUCCESS:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Success"));
-                       break;
-               case TDB_ERR_IO:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "IO Error"));
-                       break;
-               case TDB_ERR_LOCK:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Locking error"));
-                       break;
-               case TDB_ERR_OOM:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Out of memory"));
-                       break;
-               case TDB_ERR_EXISTS:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Record exists"));
-                       break;
-               case TDB_ERR_EINVAL:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Invalid parameter"));
-                       break;
-               case TDB_ERR_NOEXIST:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Record does not exist"));
-                       break;
-               case TDB_ERR_RDONLY:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "write not permitted"));
-                       break;
-               case TDB_ERR_CORRUPT:
-                       ok1(!strcmp(tdb_errorstr(err),
-                                   "Corrupt database"));
-                       break;
-               }
-       }
-       ok1(!strcmp(tdb_errorstr(err), "Invalid error code"));
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-tdb_foreach.c b/ccan/tdb2/test/run-tdb_foreach.c
deleted file mode 100644 (file)
index b17f078..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-static int drop_count(struct tdb_context *tdb, unsigned int *count)
-{
-       if (--(*count) == 0)
-               return 1;
-       return 0;
-}
-
-static int set_found(struct tdb_context *tdb, bool found[3])
-{
-       unsigned int idx;
-
-       if (strcmp(tdb_name(tdb), "run-tdb_foreach0.tdb") == 0)
-               idx = 0;
-       else if (strcmp(tdb_name(tdb), "run-tdb_foreach1.tdb") == 0)
-               idx = 1;
-       else if (strcmp(tdb_name(tdb), "run-tdb_foreach2.tdb") == 0)
-               idx = 2;
-       else
-               abort();
-
-       if (found[idx])
-               abort();
-       found[idx] = true;
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, count;
-       bool found[3];
-       struct tdb_context *tdb0, *tdb1, *tdb2;
-       int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 8);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb0 = tdb_open("run-tdb_foreach0.tdb", flags[i],
-                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               tdb1 = tdb_open("run-tdb_foreach1.tdb", flags[i],
-                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               tdb2 = tdb_open("run-tdb_foreach2.tdb", flags[i],
-                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-
-               memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
-               ok1(found[0] && found[1] && found[2]);
-
-               /* Test premature iteration termination */
-               count = 1;
-               tdb_foreach(drop_count, &count);
-               ok1(count == 0);
-
-               tdb_close(tdb1);
-               memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
-               ok1(found[0] && !found[1] && found[2]);
-
-               tdb_close(tdb2);
-               memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
-               ok1(found[0] && !found[1] && !found[2]);
-
-               tdb1 = tdb_open("run-tdb_foreach1.tdb", flags[i],
-                               O_RDWR, 0600, &tap_log_attr);
-               memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
-               ok1(found[0] && found[1] && !found[2]);
-
-               tdb_close(tdb0);
-               memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
-               ok1(!found[0] && found[1] && !found[2]);
-
-               tdb_close(tdb1);
-               memset(found, 0, sizeof(found));
-               tdb_foreach(set_found, found);
-               ok1(!found[0] && !found[1] && !found[2]);
-               ok1(tap_log_messages == 0);
-       }
-
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/run-traverse.c b/ccan/tdb2/test/run-traverse.c
deleted file mode 100644 (file)
index 4de0ebd..0000000
+++ /dev/null
@@ -1,203 +0,0 @@
-#include "tdb2-source.h"
-#include <ccan/tap/tap.h>
-#include "logging.h"
-
-#define NUM_RECORDS 1000
-
-/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
-{
-       return hash64_stable((const unsigned char *)key, len,
-                            *(uint64_t *)p);
-}
-
-static bool store_records(struct tdb_context *tdb)
-{
-       int i;
-       struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
-       struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
-
-       for (i = 0; i < NUM_RECORDS; i++)
-               if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
-                       return false;
-       return true;
-}
-
-struct trav_data {
-       unsigned int calls, call_limit;
-       int low, high;
-       bool mismatch;
-       bool delete;
-       enum TDB_ERROR delete_error;
-};
-
-static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
-               struct trav_data *td)
-{
-       int val;
-
-       td->calls++;
-       if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val)
-           || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
-               td->mismatch = true;
-               return -1;
-       }
-       memcpy(&val, dbuf.dptr, dbuf.dsize);
-       if (val < td->low)
-               td->low = val;
-       if (val > td->high)
-               td->high = val;
-
-       if (td->delete) {
-               td->delete_error = tdb_delete(tdb, key);
-               if (td->delete_error != TDB_SUCCESS) {
-                       return -1;
-               }
-       }
-
-       if (td->calls == td->call_limit)
-               return 1;
-       return 0;
-}
-
-struct trav_grow_data {
-       unsigned int calls;
-       unsigned int num_large;
-       bool mismatch;
-       enum TDB_ERROR error;
-};
-
-static int trav_grow(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
-                    struct trav_grow_data *tgd)             
-{
-       int val;
-       unsigned char buffer[128] = { 0 };
-
-       tgd->calls++;
-       if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val)
-           || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
-               tgd->mismatch = true;
-               return -1;
-       }
-
-       if (dbuf.dsize > sizeof(val))
-               /* We must have seen this before! */
-               tgd->num_large++;
-
-       /* Make a big difference to the database. */
-       dbuf.dptr = buffer;
-       dbuf.dsize = sizeof(buffer);
-       tgd->error = tdb_append(tdb, key, dbuf);
-       if (tgd->error != TDB_SUCCESS) {
-               return -1;
-       }
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i;
-       int num;
-       struct trav_data td;
-       struct trav_grow_data tgd;
-       struct tdb_context *tdb;
-       uint64_t seed = 16014841315512641303ULL;
-       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
-                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
-                       TDB_NOMMAP|TDB_CONVERT };
-       union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
-                                               .fn = fixedhash,
-                                               .data = &seed } };
-
-       hattr.base.next = &tap_log_attr;
-
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1);
-       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
-               tdb = tdb_open("run-traverse.tdb", flags[i],
-                              O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
-               ok1(tdb);
-               if (!tdb)
-                       continue;
-
-               ok1(tdb_traverse(tdb, NULL, NULL) == 0);
-
-               ok1(store_records(tdb));
-               num = tdb_traverse(tdb, NULL, NULL);
-               ok1(num == NUM_RECORDS);
-
-               /* Full traverse. */
-               td.calls = 0;
-               td.call_limit = UINT_MAX;
-               td.low = INT_MAX;
-               td.high = INT_MIN;
-               td.mismatch = false;
-               td.delete = false;
-
-               num = tdb_traverse(tdb, trav, &td);
-               ok1(num == NUM_RECORDS);
-               ok1(!td.mismatch);
-               ok1(td.calls == NUM_RECORDS);
-               ok1(td.low == 0);
-               ok1(td.high == NUM_RECORDS-1);
-
-               /* Short traverse. */
-               td.calls = 0;
-               td.call_limit = NUM_RECORDS / 2;
-               td.low = INT_MAX;
-               td.high = INT_MIN;
-               td.mismatch = false;
-               td.delete = false;
-
-               num = tdb_traverse(tdb, trav, &td);
-               ok1(num == NUM_RECORDS / 2);
-               ok1(!td.mismatch);
-               ok1(td.calls == NUM_RECORDS / 2);
-               ok1(td.low <= NUM_RECORDS / 2);
-               ok1(td.high > NUM_RECORDS / 2);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tap_log_messages == 0);
-
-               /* Deleting traverse (delete everything). */
-               td.calls = 0;
-               td.call_limit = UINT_MAX;
-               td.low = INT_MAX;
-               td.high = INT_MIN;
-               td.mismatch = false;
-               td.delete = true;
-               td.delete_error = TDB_SUCCESS;
-               num = tdb_traverse(tdb, trav, &td);
-               ok1(num == NUM_RECORDS);
-               ok1(td.delete_error == TDB_SUCCESS);
-               ok1(!td.mismatch);
-               ok1(td.calls == NUM_RECORDS);
-               ok1(td.low == 0);
-               ok1(td.high == NUM_RECORDS - 1);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Now it's empty! */
-               ok1(tdb_traverse(tdb, NULL, NULL) == 0);
-
-               /* Re-add. */
-               ok1(store_records(tdb));
-               ok1(tdb_traverse(tdb, NULL, NULL) == NUM_RECORDS);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-
-               /* Grow.  This will cause us to be reshuffled. */
-               tgd.calls = 0;
-               tgd.num_large = 0;
-               tgd.mismatch = false;
-               tgd.error = TDB_SUCCESS;
-               ok1(tdb_traverse(tdb, trav_grow, &tgd) > 1);
-               ok1(tgd.error == 0);
-               ok1(!tgd.mismatch);
-               ok1(tdb_check(tdb, NULL, NULL) == 0);
-               ok1(tgd.num_large < tgd.calls);
-               diag("growing db: %u calls, %u repeats",
-                    tgd.calls, tgd.num_large);
-
-               tdb_close(tdb);
-       }
-
-       ok1(tap_log_messages == 0);
-       return exit_status();
-}
diff --git a/ccan/tdb2/test/rwlock-be.tdb1 b/ccan/tdb2/test/rwlock-be.tdb1
deleted file mode 100644 (file)
index 45b5f09..0000000
Binary files a/ccan/tdb2/test/rwlock-be.tdb1 and /dev/null differ
diff --git a/ccan/tdb2/test/rwlock-le.tdb1 b/ccan/tdb2/test/rwlock-le.tdb1
deleted file mode 100644 (file)
index 45b5f09..0000000
Binary files a/ccan/tdb2/test/rwlock-le.tdb1 and /dev/null differ
diff --git a/ccan/tdb2/test/tdb1-external-agent.c b/ccan/tdb2/test/tdb1-external-agent.c
deleted file mode 100644 (file)
index ffde077..0000000
+++ /dev/null
@@ -1,188 +0,0 @@
-#include "tdb1-external-agent.h"
-#include "tdb1-lock-tracking.h"
-#include "logging.h"
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <err.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-#include <errno.h>
-#include <ccan/tdb2/tdb1_private.h>
-#include <ccan/tap/tap.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-static struct tdb_context *tdb;
-
-static enum agent_return do_operation(enum operation op, const char *name)
-{
-       TDB_DATA k;
-       enum agent_return ret;
-       TDB_DATA data;
-
-       if (op != OPEN && !tdb) {
-               diag("external: No tdb open!");
-               return OTHER_FAILURE;
-       }
-
-       k.dptr = (void *)name;
-       k.dsize = strlen(name);
-
-       locking_would_block1 = 0;
-       switch (op) {
-       case OPEN:
-               if (tdb) {
-                       diag("Already have tdb %s open", tdb->name);
-                       return OTHER_FAILURE;
-               }
-               tdb = tdb_open(name, TDB_VERSION1, O_RDWR, 0, &tap_log_attr);
-               if (!tdb) {
-                       if (!locking_would_block1)
-                               diag("Opening tdb gave %s", strerror(errno));
-                       ret = OTHER_FAILURE;
-               } else
-                       ret = SUCCESS;
-               break;
-       case TRANSACTION_START:
-               ret = tdb_transaction_start(tdb) == TDB_SUCCESS ? SUCCESS : OTHER_FAILURE;
-               break;
-       case FETCH:
-               if (tdb_fetch(tdb, k, &data) != TDB_SUCCESS) {
-                       if (tdb->last_error == TDB_ERR_NOEXIST)
-                               ret = FAILED;
-                       else
-                               ret = OTHER_FAILURE;
-               } else if (data.dsize != k.dsize
-                          || memcmp(data.dptr, k.dptr, k.dsize) != 0) {
-                       ret = OTHER_FAILURE;
-               } else {
-                       ret = SUCCESS;
-               }
-               free(data.dptr);
-               break;
-       case STORE:
-               if (tdb_store(tdb, k, k, 0) == TDB_SUCCESS)
-                       ret = SUCCESS;
-               else
-                       ret = OTHER_FAILURE;
-               break;
-       case TRANSACTION_COMMIT:
-               ret = tdb_transaction_commit(tdb) == TDB_SUCCESS ? SUCCESS : OTHER_FAILURE;
-               break;
-       case CHECK:
-               ret = tdb_check(tdb, NULL, NULL) == TDB_SUCCESS ? SUCCESS : OTHER_FAILURE;
-               break;
-       case NEEDS_RECOVERY:
-               ret = tdb1_needs_recovery(tdb) ? SUCCESS : FAILED;
-               break;
-       case CLOSE:
-               ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
-               tdb = NULL;
-               break;
-       default:
-               ret = OTHER_FAILURE;
-       }
-
-       if (locking_would_block1)
-               ret = WOULD_HAVE_BLOCKED;
-
-       return ret;
-}
-
-struct agent {
-       int cmdfd, responsefd;
-};
-
-/* Do this before doing any tdb stuff.  Return handle, or NULL. */
-struct agent *prepare_external_agent1(void)
-{
-       int pid, ret;
-       int command[2], response[2];
-       char name[1+PATH_MAX];
-
-       if (pipe(command) != 0 || pipe(response) != 0)
-               return NULL;
-
-       pid = fork();
-       if (pid < 0)
-               return NULL;
-
-       if (pid != 0) {
-               struct agent *agent = malloc(sizeof(*agent));
-
-               close(command[0]);
-               close(response[1]);
-               agent->cmdfd = command[1];
-               agent->responsefd = response[0];
-               return agent;
-       }
-
-       close(command[1]);
-       close(response[0]);
-
-       /* We want to fail, not block. */
-       nonblocking_locks1 = true;
-       log_prefix = "external: ";
-       while ((ret = read(command[0], name, sizeof(name))) > 0) {
-               enum agent_return result;
-
-               result = do_operation(name[0], name+1);
-               if (write(response[1], &result, sizeof(result))
-                   != sizeof(result))
-                       err(1, "Writing response");
-       }
-       exit(0);
-}
-
-/* Ask the external agent to try to do an operation. */
-enum agent_return external_agent_operation1(struct agent *agent,
-                                          enum operation op,
-                                          const char *name)
-{
-       enum agent_return res;
-       unsigned int len;
-       char *string;
-
-       if (!name)
-               name = "";
-       len = 1 + strlen(name) + 1;
-       string = malloc(len);
-
-       string[0] = op;
-       strcpy(string+1, name);
-
-       if (write(agent->cmdfd, string, len) != len
-           || read(agent->responsefd, &res, sizeof(res)) != sizeof(res))
-               res = AGENT_DIED;
-
-       free(string);
-       return res;
-}
-
-const char *agent_return_name1(enum agent_return ret)
-{
-       return ret == SUCCESS ? "SUCCESS"
-               : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED"
-               : ret == AGENT_DIED ? "AGENT_DIED"
-               : ret == FAILED ? "FAILED"
-               : ret == OTHER_FAILURE ? "OTHER_FAILURE"
-               : "**INVALID**";
-}
-
-const char *operation_name1(enum operation op)
-{
-       switch (op) {
-       case OPEN: return "OPEN";
-       case TRANSACTION_START: return "TRANSACTION_START";
-       case FETCH: return "FETCH";
-       case STORE: return "STORE";
-       case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT";
-       case CHECK: return "CHECK";
-       case NEEDS_RECOVERY: return "NEEDS_RECOVERY";
-       case CLOSE: return "CLOSE";
-       }
-       return "**INVALID**";
-}
diff --git a/ccan/tdb2/test/tdb1-external-agent.h b/ccan/tdb2/test/tdb1-external-agent.h
deleted file mode 100644 (file)
index ee903b6..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef TDB_TEST_EXTERNAL_AGENT_H
-#define TDB_TEST_EXTERNAL_AGENT_H
-
-/* For locking tests, we need a different process to try things at
- * various times. */
-enum operation {
-       OPEN,
-       TRANSACTION_START,
-       FETCH,
-       STORE,
-       TRANSACTION_COMMIT,
-       CHECK,
-       NEEDS_RECOVERY,
-       CLOSE,
-};
-
-/* Do this before doing any tdb stuff.  Return handle, or -1. */
-struct agent *prepare_external_agent1(void);
-
-enum agent_return {
-       SUCCESS,
-       WOULD_HAVE_BLOCKED,
-       AGENT_DIED,
-       FAILED, /* For fetch, or NEEDS_RECOVERY */
-       OTHER_FAILURE,
-};
-
-/* Ask the external agent to try to do an operation.
- * name == tdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
- * record name for FETCH/STORE (store stores name as data too)
- */
-enum agent_return external_agent_operation1(struct agent *handle,
-                                          enum operation op,
-                                          const char *name);
-
-/* Mapping enum -> string. */
-const char *agent_return_name1(enum agent_return ret);
-const char *operation_name1(enum operation op);
-
-#endif /* TDB_TEST_EXTERNAL_AGENT_H */
diff --git a/ccan/tdb2/test/tdb1-lock-tracking.c b/ccan/tdb2/test/tdb1-lock-tracking.c
deleted file mode 100644 (file)
index 197b1f0..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-/* We save the locks so we can reaquire them. */
-#include <ccan/tdb2/tdb1_private.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <ccan/tap/tap.h>
-#include "tdb1-lock-tracking.h"
-
-struct lock {
-       struct lock *next;
-       unsigned int off;
-       unsigned int len;
-       int type;
-};
-static struct lock *locks;
-int locking_errors1 = 0;
-bool suppress_lockcheck1 = false;
-bool nonblocking_locks1;
-int locking_would_block1 = 0;
-void (*unlock_callback1)(int fd);
-
-int fcntl_with_lockcheck1(int fd, int cmd, ... /* arg */ )
-{
-       va_list ap;
-       int ret, arg3;
-       struct flock *fl;
-       bool may_block = false;
-
-       if (cmd != F_SETLK && cmd != F_SETLKW) {
-               /* This may be totally bogus, but we don't know in general. */
-               va_start(ap, cmd);
-               arg3 = va_arg(ap, int);
-               va_end(ap);
-
-               return fcntl(fd, cmd, arg3);
-       }
-
-       va_start(ap, cmd);
-       fl = va_arg(ap, struct flock *);
-       va_end(ap);
-
-       if (cmd == F_SETLKW && nonblocking_locks1) {
-               cmd = F_SETLK;
-               may_block = true;
-       }
-       ret = fcntl(fd, cmd, fl);
-
-       /* Detect when we failed, but might have been OK if we waited. */
-       if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) {
-               locking_would_block1++;
-       }
-
-       if (fl->l_type == F_UNLCK) {
-               struct lock **l;
-               struct lock *old = NULL;
-
-               for (l = &locks; *l; l = &(*l)->next) {
-                       if ((*l)->off == fl->l_start
-                           && (*l)->len == fl->l_len) {
-                               if (ret == 0) {
-                                       old = *l;
-                                       *l = (*l)->next;
-                                       free(old);
-                               }
-                               break;
-                       }
-               }
-               if (!old && !suppress_lockcheck1) {
-                       diag("Unknown unlock %u@%u - %i",
-                            (int)fl->l_len, (int)fl->l_start, ret);
-                       locking_errors1++;
-               }
-       } else {
-               struct lock *new, *i;
-               unsigned int fl_end = fl->l_start + fl->l_len;
-               if (fl->l_len == 0)
-                       fl_end = (unsigned int)-1;
-
-               /* Check for overlaps: we shouldn't do this. */
-               for (i = locks; i; i = i->next) {
-                       unsigned int i_end = i->off + i->len;
-                       if (i->len == 0)
-                               i_end = (unsigned int)-1;
-
-                       if (fl->l_start >= i->off && fl->l_start < i_end)
-                               break;
-                       if (fl_end >= i->off && fl_end < i_end)
-                               break;
-
-                       /* tdb_allrecord_lock does this, handle adjacent: */
-                       if (fl->l_start == i_end && fl->l_type == i->type) {
-                               if (ret == 0) {
-                                       i->len = fl->l_len
-                                               ? i->len + fl->l_len
-                                               : 0;
-                               }
-                               goto done;
-                       }
-               }
-               if (i) {
-                       /* Special case: upgrade of allrecord lock. */
-                       if (i->type == F_RDLCK && fl->l_type == F_WRLCK
-                           && i->off == TDB1_FREELIST_TOP
-                           && fl->l_start == TDB1_FREELIST_TOP
-                           && i->len == 0
-                           && fl->l_len == 0) {
-                               if (ret == 0)
-                                       i->type = F_WRLCK;
-                               goto done;
-                       }
-                       if (!suppress_lockcheck1) {
-                               diag("%s lock %u@%u overlaps %u@%u",
-                                    fl->l_type == F_WRLCK ? "write" : "read",
-                                    (int)fl->l_len, (int)fl->l_start,
-                                    i->len, (int)i->off);
-                               locking_errors1++;
-                       }
-               }
-
-               if (ret == 0) {
-                       new = malloc(sizeof *new);
-                       new->off = fl->l_start;
-                       new->len = fl->l_len;
-                       new->type = fl->l_type;
-                       new->next = locks;
-                       locks = new;
-               }
-       }
-done:
-       if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback1)
-               unlock_callback1(fd);
-       return ret;
-}
-
-unsigned int forget_locking1(void)
-{
-       unsigned int num = 0;
-       while (locks) {
-               struct lock *next = locks->next;
-               free(locks);
-               locks = next;
-               num++;
-       }
-       return num;
-}
diff --git a/ccan/tdb2/test/tdb1-lock-tracking.h b/ccan/tdb2/test/tdb1-lock-tracking.h
deleted file mode 100644 (file)
index cb8c2f1..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef TDB1_LOCK_TRACKING_H
-#define TDB1_LOCK_TRACKING_H
-#include <ccan/tdb2/private.h>
-#include <stdbool.h>
-
-/* Set this if you want a callback after fnctl unlock. */
-extern void (*unlock_callback1)(int fd);
-
-/* Replacement fcntl. */
-int fcntl_with_lockcheck1(int fd, int cmd, ... /* arg */ );
-
-/* Discard locking info: returns number of locks outstanding. */
-unsigned int forget_locking1(void);
-
-/* Number of errors in locking. */
-extern int locking_errors1;
-
-/* Suppress lock checking. */
-extern bool suppress_lockcheck1;
-
-/* Make all locks non-blocking. */
-extern bool nonblocking_locks1;
-
-/* Number of times we failed a lock because we made it non-blocking. */
-extern int locking_would_block1;
-#endif /* LOCK_TRACKING_H */
diff --git a/ccan/tdb2/test/tdb1.corrupt b/ccan/tdb2/test/tdb1.corrupt
deleted file mode 100644 (file)
index 83d6677..0000000
Binary files a/ccan/tdb2/test/tdb1.corrupt and /dev/null differ
diff --git a/ccan/tdb2/test/tdb2-source.h b/ccan/tdb2/test/tdb2-source.h
deleted file mode 100644 (file)
index 28ab351..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#include "config.h"
-#include <ccan/tdb2/check.c>
-#include <ccan/tdb2/free.c>
-#include <ccan/tdb2/hash.c>
-#include <ccan/tdb2/io.c>
-#include <ccan/tdb2/lock.c>
-#include <ccan/tdb2/open.c>
-#include <ccan/tdb2/summary.c>
-#include <ccan/tdb2/tdb.c>
-#include <ccan/tdb2/transaction.c>
-#include <ccan/tdb2/traverse.c>
-#include <ccan/tdb2/tdb1_check.c>
-#include <ccan/tdb2/tdb1_freelist.c>
-#include <ccan/tdb2/tdb1_hash.c>
-#include <ccan/tdb2/tdb1_io.c>
-#include <ccan/tdb2/tdb1_lock.c>
-#include <ccan/tdb2/tdb1_open.c>
-#include <ccan/tdb2/tdb1_summary.c>
-#include <ccan/tdb2/tdb1_tdb.c>
-#include <ccan/tdb2/tdb1_transaction.c>
-#include <ccan/tdb2/tdb1_traverse.c>
diff --git a/ccan/tdb2/tools/Makefile b/ccan/tdb2/tools/Makefile
deleted file mode 100644 (file)
index 11188c3..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-OBJS:=../../tdb2.o ../../hash.o ../../tally.o
-CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg
-LDFLAGS:=-L../../..
-
-default: tdb2torture tdb2tool tdb2dump tdb2restore mktdb2 speed growtdb-bench
-
-tdb2dump: tdb2dump.c $(OBJS)
-tdb2restore: tdb2restore.c $(OBJS)
-tdb2torture: tdb2torture.c $(OBJS)
-tdb2tool: tdb2tool.c $(OBJS)
-mktdb2: mktdb2.c $(OBJS)
-speed: speed.c $(OBJS)
-growtdb-bench: growtdb-bench.c $(OBJS)
-
-clean:
-       rm -f tdb2torture tdb2dump tdb2restore tdb2tool mktdb2 speed growtdb-bench
diff --git a/ccan/tdb2/tools/growtdb-bench.c b/ccan/tdb2/tools/growtdb-bench.c
deleted file mode 100644 (file)
index 205ff86..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-#include "tdb2.h"
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <err.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-static void logfn(struct tdb_context *tdb,
-                 enum tdb_log_level level,
-                 enum TDB_ERROR ecode,
-                 const char *message,
-                 void *data)
-{
-       fprintf(stderr, "tdb:%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j, users, groups;
-       TDB_DATA idxkey, idxdata;
-       TDB_DATA k, d, gk;
-       char cmd[100];
-       struct tdb_context *tdb;
-       enum TDB_ERROR ecode;
-       union tdb_attribute log;
-
-       if (argc != 3) {
-               printf("Usage: growtdb-bench <users> <groups>\n");
-               exit(1);
-       }
-       users = atoi(argv[1]);
-       groups = atoi(argv[2]);
-
-       sprintf(cmd, "cat /proc/%i/statm", getpid());
-
-       log.base.attr = TDB_ATTRIBUTE_LOG;
-       log.base.next = NULL;
-       log.log.fn = logfn;
-       
-       tdb = tdb_open("/tmp/growtdb.tdb", TDB_DEFAULT,
-                      O_RDWR|O_CREAT|O_TRUNC, 0600, &log);
-
-       idxkey.dptr = (unsigned char *)"User index";
-       idxkey.dsize = strlen("User index");
-       idxdata.dsize = 51;
-       idxdata.dptr = calloc(idxdata.dsize, 1);
-
-       /* Create users. */
-       k.dsize = 48;
-       k.dptr = calloc(k.dsize, 1);
-       d.dsize = 64;
-       d.dptr = calloc(d.dsize, 1);
-
-       tdb_transaction_start(tdb);
-       for (i = 0; i < users; i++) {
-               memcpy(k.dptr, &i, sizeof(i));
-               ecode = tdb_store(tdb, k, d, TDB_INSERT);
-               if (ecode != TDB_SUCCESS)
-                       errx(1, "tdb insert failed: %s", tdb_errorstr(ecode));
-
-               /* This simulates a growing index record. */
-               ecode = tdb_append(tdb, idxkey, idxdata);
-               if (ecode != TDB_SUCCESS)
-                       errx(1, "tdb append failed: %s", tdb_errorstr(ecode));
-       }
-       if ((ecode = tdb_transaction_commit(tdb)) != 0)
-               errx(1, "tdb commit1 failed: %s", tdb_errorstr(ecode));
-
-       if ((ecode = tdb_check(tdb, NULL, NULL)) != 0)
-               errx(1, "tdb_check failed after initial insert!");
-
-       system(cmd);
-
-       /* Now put them all in groups: add 32 bytes to each record for
-        * a group. */
-       gk.dsize = 48;
-       gk.dptr = calloc(k.dsize, 1);
-       gk.dptr[gk.dsize-1] = 1;
-
-       d.dsize = 32;
-       for (i = 0; i < groups; i++) {
-               tdb_transaction_start(tdb);
-               /* Create the "group". */
-               memcpy(gk.dptr, &i, sizeof(i));
-               ecode = tdb_store(tdb, gk, d, TDB_INSERT);
-               if (ecode != TDB_SUCCESS)
-                       errx(1, "tdb insert failed: %s", tdb_errorstr(ecode));
-
-               /* Now populate it. */
-               for (j = 0; j < users; j++) {
-                       /* Append to the user. */
-                       memcpy(k.dptr, &j, sizeof(j));
-                       if ((ecode = tdb_append(tdb, k, d)) != 0)
-                               errx(1, "tdb append failed: %s",
-                                    tdb_errorstr(ecode));
-                       
-                       /* Append to the group. */
-                       if ((ecode = tdb_append(tdb, gk, d)) != 0)
-                               errx(1, "tdb append failed: %s",
-                                    tdb_errorstr(ecode));
-               }
-               if ((ecode = tdb_transaction_commit(tdb)) != 0)
-                       errx(1, "tdb commit2 failed: %s", tdb_errorstr(ecode));
-               if ((ecode = tdb_check(tdb, NULL, NULL)) != 0)
-                       errx(1, "tdb_check failed after iteration %i!", i);
-               system(cmd);
-       }
-
-       return 0;
-}
diff --git a/ccan/tdb2/tools/mktdb2.c b/ccan/tdb2/tools/mktdb2.c
deleted file mode 100644 (file)
index c8c2803..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "tdb2.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <err.h>
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, num_recs;
-       struct tdb_context *tdb;
-
-       if (argc != 3 || (num_recs = atoi(argv[2])) == 0)
-               errx(1, "Usage: mktdb <tdbfile> <numrecords>");
-
-       tdb = tdb_open(argv[1], TDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL);
-       if (!tdb)
-               err(1, "Opening %s", argv[1]);
-
-       for (i = 0; i < num_recs; i++) {
-               TDB_DATA d;
-
-               d.dptr = (void *)&i;
-               d.dsize = sizeof(i);
-               if (tdb_store(tdb, d, d, TDB_INSERT) != 0)
-                       err(1, "Failed to store record %i", i);
-       }
-       printf("Done\n");
-       return 0;
-}
diff --git a/ccan/tdb2/tools/speed.c b/ccan/tdb2/tools/speed.c
deleted file mode 100644 (file)
index ccb5ae3..0000000
+++ /dev/null
@@ -1,443 +0,0 @@
-/* Simple speed test for TDB */
-#include <err.h>
-#include <time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-#include "tdb2.h"
-
-/* Nanoseconds per operation */
-static size_t normalize(const struct timeval *start,
-                       const struct timeval *stop,
-                       unsigned int num)
-{
-       struct timeval diff;
-
-       timersub(stop, start, &diff);
-
-       /* Floating point is more accurate here. */
-       return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
-               / num * 1000;
-}
-
-static size_t file_size(void)
-{
-       struct stat st;
-
-       if (stat("/tmp/speed.tdb", &st) != 0)
-               return -1;
-       return st.st_size;
-}
-
-static int count_record(struct tdb_context *tdb,
-                       TDB_DATA key, TDB_DATA data, void *p)
-{
-       int *total = p;
-       *total += *(int *)data.dptr;
-       return 0;
-}
-
-static void dump_and_clear_stats(struct tdb_context **tdb,
-                                int flags,
-                                union tdb_attribute *attr)
-{
-       union tdb_attribute stats;
-       enum TDB_ERROR ecode;
-
-       stats.base.attr = TDB_ATTRIBUTE_STATS;
-       stats.stats.size = sizeof(stats.stats);
-       ecode = tdb_get_attribute(*tdb, &stats);
-       if (ecode != TDB_SUCCESS)
-               errx(1, "Getting stats: %s", tdb_errorstr(ecode));
-
-       printf("allocs = %llu\n",
-              (unsigned long long)stats.stats.allocs);
-       printf("  alloc_subhash = %llu\n",
-              (unsigned long long)stats.stats.alloc_subhash);
-       printf("  alloc_chain = %llu\n",
-              (unsigned long long)stats.stats.alloc_chain);
-       printf("  alloc_bucket_exact = %llu\n",
-              (unsigned long long)stats.stats.alloc_bucket_exact);
-       printf("  alloc_bucket_max = %llu\n",
-              (unsigned long long)stats.stats.alloc_bucket_max);
-       printf("  alloc_leftover = %llu\n",
-              (unsigned long long)stats.stats.alloc_leftover);
-       printf("  alloc_coalesce_tried = %llu\n",
-              (unsigned long long)stats.stats.alloc_coalesce_tried);
-       printf("    alloc_coalesce_iterate_clash = %llu\n",
-              (unsigned long long)stats.stats.alloc_coalesce_iterate_clash);
-       printf("    alloc_coalesce_lockfail = %llu\n",
-              (unsigned long long)stats.stats.alloc_coalesce_lockfail);
-       printf("    alloc_coalesce_race = %llu\n",
-              (unsigned long long)stats.stats.alloc_coalesce_race);
-       printf("    alloc_coalesce_succeeded = %llu\n",
-              (unsigned long long)stats.stats.alloc_coalesce_succeeded);
-       printf("      alloc_coalesce_num_merged = %llu\n",
-              (unsigned long long)stats.stats.alloc_coalesce_num_merged);
-       printf("compares = %llu\n",
-              (unsigned long long)stats.stats.compares);
-       printf("  compare_wrong_bucket = %llu\n",
-              (unsigned long long)stats.stats.compare_wrong_bucket);
-       printf("  compare_wrong_offsetbits = %llu\n",
-              (unsigned long long)stats.stats.compare_wrong_offsetbits);
-       printf("  compare_wrong_keylen = %llu\n",
-              (unsigned long long)stats.stats.compare_wrong_keylen);
-       printf("  compare_wrong_rechash = %llu\n",
-              (unsigned long long)stats.stats.compare_wrong_rechash);
-       printf("  compare_wrong_keycmp = %llu\n",
-              (unsigned long long)stats.stats.compare_wrong_keycmp);
-       printf("transactions = %llu\n",
-              (unsigned long long)stats.stats.transactions);
-       printf("  transaction_cancel = %llu\n",
-              (unsigned long long)stats.stats.transaction_cancel);
-       printf("  transaction_nest = %llu\n",
-              (unsigned long long)stats.stats.transaction_nest);
-       printf("  transaction_expand_file = %llu\n",
-              (unsigned long long)stats.stats.transaction_expand_file);
-       printf("  transaction_read_direct = %llu\n",
-              (unsigned long long)stats.stats.transaction_read_direct);
-       printf("    transaction_read_direct_fail = %llu\n",
-              (unsigned long long)stats.stats.transaction_read_direct_fail);
-       printf("  transaction_write_direct = %llu\n",
-              (unsigned long long)stats.stats.transaction_write_direct);
-       printf("    transaction_write_direct_fail = %llu\n",
-              (unsigned long long)stats.stats.transaction_write_direct_fail);
-       printf("expands = %llu\n",
-              (unsigned long long)stats.stats.expands);
-       printf("frees = %llu\n",
-              (unsigned long long)stats.stats.frees);
-       printf("locks = %llu\n",
-              (unsigned long long)stats.stats.locks);
-       printf("  lock_lowlevel = %llu\n",
-              (unsigned long long)stats.stats.lock_lowlevel);
-       printf("  lock_nonblock = %llu\n",
-              (unsigned long long)stats.stats.lock_nonblock);
-       printf("    lock_nonblock_fail = %llu\n",
-              (unsigned long long)stats.stats.lock_nonblock_fail);
-
-       /* Now clear. */
-       tdb_close(*tdb);
-       *tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR, 0, attr);
-}
-
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
-                   const char *message,
-                   void *data)
-{
-       fprintf(stderr, "tdb:%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int i, j, num = 1000, stage = 0, stopat = -1;
-       int flags = TDB_DEFAULT;
-       bool transaction = false, summary = false;
-       TDB_DATA key, data;
-       struct tdb_context *tdb;
-       struct timeval start, stop;
-       union tdb_attribute seed, log;
-       bool do_stats = false;
-       enum TDB_ERROR ecode;
-
-       /* Try to keep benchmarks even. */
-       seed.base.attr = TDB_ATTRIBUTE_SEED;
-       seed.base.next = NULL;
-       seed.seed.seed = 0;
-
-       log.base.attr = TDB_ATTRIBUTE_LOG;
-       log.base.next = &seed;
-       log.log.fn = tdb_log;
-
-       if (argv[1] && strcmp(argv[1], "--internal") == 0) {
-               flags = TDB_INTERNAL;
-               argc--;
-               argv++;
-       }
-       if (argv[1] && strcmp(argv[1], "--transaction") == 0) {
-               transaction = true;
-               argc--;
-               argv++;
-       }
-       if (argv[1] && strcmp(argv[1], "--no-sync") == 0) {
-               flags |= TDB_NOSYNC;
-               argc--;
-               argv++;
-       }
-       if (argv[1] && strcmp(argv[1], "--summary") == 0) {
-               summary = true;
-               argc--;
-               argv++;
-       }
-       if (argv[1] && strcmp(argv[1], "--stats") == 0) {
-               do_stats = true;
-               argc--;
-               argv++;
-       }
-
-       tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR|O_CREAT|O_TRUNC,
-                      0600, &log);
-       if (!tdb)
-               err(1, "Opening /tmp/speed.tdb");
-
-       key.dptr = (void *)&i;
-       key.dsize = sizeof(i);
-       data = key;
-
-       if (argv[1]) {
-               num = atoi(argv[1]);
-               argv++;
-               argc--;
-       }
-
-       if (argv[1]) {
-               stopat = atoi(argv[1]);
-               argv++;
-               argc--;
-       }
-
-       /* Add 1000 records. */
-       printf("Adding %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       gettimeofday(&start, NULL);
-       for (i = 0; i < num; i++)
-               if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
-                       errx(1, "Inserting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-
-       if (++stage == stopat)
-               exit(0);
-
-       /* Finding 1000 records. */
-       printf("Finding %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       gettimeofday(&start, NULL);
-       for (i = 0; i < num; i++) {
-               struct tdb_data dbuf;
-               if ((ecode = tdb_fetch(tdb, key, &dbuf)) != TDB_SUCCESS
-                   || *(int *)dbuf.dptr != i) {
-                       errx(1, "Fetching key %u in tdb gave %u",
-                            i, ecode ? ecode : *(int *)dbuf.dptr);
-               }
-       }
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-       if (++stage == stopat)
-               exit(0);
-
-       /* Missing 1000 records. */
-       printf("Missing %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       gettimeofday(&start, NULL);
-       for (i = num; i < num*2; i++) {
-               struct tdb_data dbuf;
-               ecode = tdb_fetch(tdb, key, &dbuf);
-               if (ecode != TDB_ERR_NOEXIST)
-                       errx(1, "Fetching key %u in tdb gave %s",
-                            i, tdb_errorstr(ecode));
-       }
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-       if (++stage == stopat)
-               exit(0);
-
-       /* Traverse 1000 records. */
-       printf("Traversing %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       i = 0;
-       gettimeofday(&start, NULL);
-       if (tdb_traverse(tdb, count_record, &i) != num)
-               errx(1, "Traverse returned wrong number of records");
-       if (i != (num - 1) * (num / 2))
-               errx(1, "Traverse tallied to %u", i);
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-       if (++stage == stopat)
-               exit(0);
-
-       /* Delete 1000 records (not in order). */
-       printf("Deleting %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       gettimeofday(&start, NULL);
-       for (j = 0; j < num; j++) {
-               i = (j + 100003) % num;
-               if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS)
-                       errx(1, "Deleting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
-       }
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-       if (++stage == stopat)
-               exit(0);
-
-       /* Re-add 1000 records (not in order). */
-       printf("Re-adding %u records: ", num); fflush(stdout);
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       gettimeofday(&start, NULL);
-       for (j = 0; j < num; j++) {
-               i = (j + 100003) % num;
-               if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
-                       errx(1, "Inserting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
-       }
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-       if (++stage == stopat)
-               exit(0);
-
-       /* Append 1000 records. */
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       printf("Appending %u records: ", num); fflush(stdout);
-       gettimeofday(&start, NULL);
-       for (i = 0; i < num; i++)
-               if ((ecode = tdb_append(tdb, key, data)) != TDB_SUCCESS)
-                       errx(1, "Appending key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (++stage == stopat)
-               exit(0);
-
-       /* Churn 1000 records: not in order! */
-       if (transaction && (ecode = tdb_transaction_start(tdb)))
-               errx(1, "starting transaction: %s", tdb_errorstr(ecode));
-       printf("Churning %u records: ", num); fflush(stdout);
-       gettimeofday(&start, NULL);
-       for (j = 0; j < num; j++) {
-               i = (j + 1000019) % num;
-               if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS)
-                       errx(1, "Deleting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
-               i += num;
-               if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
-                       errx(1, "Inserting key %u in tdb: %s",
-                            i, tdb_errorstr(ecode));
-       }
-       gettimeofday(&stop, NULL);
-       if (transaction && (ecode = tdb_transaction_commit(tdb)))
-               errx(1, "committing transaction: %s", tdb_errorstr(ecode));
-       printf(" %zu ns (%zu bytes)\n",
-              normalize(&start, &stop, num), file_size());
-
-       if (tdb_check(tdb, NULL, NULL))
-               errx(1, "tdb_check failed!");
-       if (summary) {
-               char *sumstr = NULL;
-               tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
-               printf("%s\n", sumstr);
-               free(sumstr);
-       }
-       if (do_stats)
-               dump_and_clear_stats(&tdb, flags, &log);
-       if (++stage == stopat)
-               exit(0);
-
-       return 0;
-}
diff --git a/ccan/tdb2/tools/tdb2dump.c b/ccan/tdb2/tools/tdb2dump.c
deleted file mode 100644 (file)
index bf9216f..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-/* 
-   simple tdb2 dump util
-   Copyright (C) Andrew Tridgell              2001
-   Copyright (C) Rusty Russell                2011
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#include "tdb2.h"
-#include <ctype.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-static void print_data(TDB_DATA d)
-{
-       unsigned char *p = (unsigned char *)d.dptr;
-       int len = d.dsize;
-       while (len--) {
-               if (isprint(*p) && !strchr("\"\\", *p)) {
-                       fputc(*p, stdout);
-               } else {
-                       printf("\\%02X", *p);
-               }
-               p++;
-       }
-}
-
-static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
-{
-       printf("{\n");
-       printf("key(%d) = \"", (int)key.dsize);
-       print_data(key);
-       printf("\"\n");
-       printf("data(%d) = \"", (int)dbuf.dsize);
-       print_data(dbuf);
-       printf("\"\n");
-       printf("}\n");
-       return 0;
-}
-
-static int dump_tdb(const char *fname, const char *keyname)
-{
-       struct tdb_context *tdb;
-       TDB_DATA key, value;
-       
-       tdb = tdb_open(fname, 0, O_RDONLY, 0, NULL);
-       if (!tdb) {
-               printf("Failed to open %s\n", fname);
-               return 1;
-       }
-
-       if (!keyname) {
-               tdb_traverse(tdb, traverse_fn, NULL);
-       } else {
-               key = tdb_mkdata(keyname, strlen(keyname));
-               if (tdb_fetch(tdb, key, &value) != 0) {
-                       return 1;
-               } else {
-                       print_data(value);
-                       free(value.dptr);
-               }
-       }
-
-       return 0;
-}
-
-static void usage( void)
-{
-       printf( "Usage: tdb2dump [options] <filename>\n\n");
-       printf( "   -h          this help message\n");
-       printf( "   -k keyname  dumps value of keyname\n");
-}
-
- int main(int argc, char *argv[])
-{
-       char *fname, *keyname=NULL;
-       int c;
-
-       if (argc < 2) {
-               printf("Usage: tdb2dump <fname>\n");
-               exit(1);
-       }
-
-       while ((c = getopt( argc, argv, "hk:")) != -1) {
-               switch (c) {
-               case 'h':
-                       usage();
-                       exit( 0);
-               case 'k':
-                       keyname = optarg;
-                       break;
-               default:
-                       usage();
-                       exit( 1);
-               }
-       }
-
-       fname = argv[optind];
-
-       return dump_tdb(fname, keyname);
-}
diff --git a/ccan/tdb2/tools/tdb2restore.c b/ccan/tdb2/tools/tdb2restore.c
deleted file mode 100644 (file)
index 658215a..0000000
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
-   tdb2restore -- construct a tdb from tdbdump output.
-   Copyright (C) Volker Lendecke               2010
-   Copyright (C) Simon McVittie                        2005
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb2.h"
-#include <assert.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#define debug_fprintf(file, fmt, ...) do {/*nothing*/} while (0)
-
-static int read_linehead(FILE *f)
-{
-       int i, c;
-       int num_bytes;
-       char prefix[128];
-
-       while (1) {
-               c = getc(f);
-               if (c == EOF) {
-                       return -1;
-               }
-               if (c == '(') {
-                       break;
-               }
-       }
-       for (i=0; i<sizeof(prefix); i++) {
-               c = getc(f);
-               if (c == EOF) {
-                       return -1;
-               }
-               prefix[i] = c;
-               if (c == '"') {
-                       break;
-               }
-       }
-       if (i == sizeof(prefix)) {
-               return -1;
-       }
-       prefix[i] = '\0';
-
-       if (sscanf(prefix, "%d) = ", &num_bytes) != 1) {
-               return -1;
-       }
-       return num_bytes;
-}
-
-static int read_hex(void) {
-       int c;
-       c = getchar();
-       if (c == EOF) {
-               fprintf(stderr, "Unexpected EOF in data\n");
-               return -1;
-       } else if (c == '"') {
-               fprintf(stderr, "Unexpected \\\" sequence\n");
-               return -1;
-       } else if ('0' <= c && c <= '9')  {
-               return c - '0';
-       } else if ('A' <= c && c <= 'F')  {
-               return c - 'A' + 10;
-       } else if ('a' <= c && c <= 'f')  {
-               return c - 'a' + 10;
-       } else {
-               fprintf(stderr, "Invalid hex: %c\n", c);
-               return -1;
-       }
-}
-
-static int read_data(FILE *f, struct tdb_data *d, size_t size) {
-       int c, low, high;
-       int i;
-
-       d->dptr = (unsigned char *)malloc(size);
-       if (d->dptr == NULL) {
-               return -1;
-       }
-       d->dsize = size;
-
-       for (i=0; i<size; i++) {
-               c = getc(f);
-               if (c == EOF) {
-                       fprintf(stderr, "Unexpected EOF in data\n");
-                       return 1;
-               } else if (c == '"') {
-                       return 0;
-               } else if (c == '\\') {
-                       high = read_hex();
-                       if (high < 0) {
-                               return -1;
-                       }
-                       high = high << 4;
-                       assert(high == (high & 0xf0));
-                       low = read_hex();
-                       if (low < 0) {
-                               return -1;
-                       }
-                       assert(low == (low & 0x0f));
-                       d->dptr[i] = (low|high);
-               } else {
-                       d->dptr[i] = c;
-               }
-       }
-       return 0;
-}
-
-static int swallow(FILE *f, const char *s, int *eof)
-{
-       char line[128];
-
-       if (fgets(line, sizeof(line), f) == NULL) {
-               if (eof != NULL) {
-                       *eof = 1;
-               }
-               return -1;
-       }
-       if (strcmp(line, s) != 0) {
-               return -1;
-       }
-       return 0;
-}
-
-static bool read_rec(FILE *f, struct tdb_context *tdb, int *eof)
-{
-       int length;
-       struct tdb_data key, data;
-       bool ret = false;
-       enum TDB_ERROR e;
-
-       key.dptr = NULL;
-       data.dptr = NULL;
-
-       if (swallow(f, "{\n", eof) == -1) {
-               goto fail;
-       }
-       length = read_linehead(f);
-       if (length == -1) {
-               goto fail;
-       }
-       if (read_data(f, &key, length) == -1) {
-               goto fail;
-       }
-       if (swallow(f, "\"\n", NULL) == -1) {
-               goto fail;
-       }
-       length = read_linehead(f);
-       if (length == -1) {
-               goto fail;
-       }
-       if (read_data(f, &data, length) == -1) {
-               goto fail;
-       }
-       if ((swallow(f, "\"\n", NULL) == -1)
-           || (swallow(f, "}\n", NULL) == -1)) {
-               goto fail;
-       }
-       e = tdb_store(tdb, key, data, TDB_INSERT);
-       if (e != TDB_SUCCESS) {
-               fprintf(stderr, "TDB error: %s\n", tdb_errorstr(e));
-               goto fail;
-       }
-
-       ret = true;
-fail:
-       free(key.dptr);
-       free(data.dptr);
-       return ret;
-}
-
-static int restore_tdb(const char *fname)
-{
-       struct tdb_context *tdb;
-
-       tdb = tdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, NULL);
-       if (!tdb) {
-               perror("tdb_open");
-               fprintf(stderr, "Failed to open %s\n", fname);
-               return 1;
-       }
-
-       while (1) {
-               int eof = 0;
-               if (!read_rec(stdin, tdb, &eof)) {
-                       if (eof) {
-                               break;
-                       }
-                       return 1;
-               }
-       }
-       if (tdb_close(tdb)) {
-               fprintf(stderr, "Error closing tdb\n");
-               return 1;
-       }
-       fprintf(stderr, "EOF\n");
-       return 0;
-}
-
-int main(int argc, char *argv[])
-{
-       char *fname;
-
-       if (argc < 2) {
-               printf("Usage: %s dbname < tdbdump_output\n", argv[0]);
-               exit(1);
-       }
-
-       fname = argv[1];
-
-       return restore_tdb(fname);
-}
diff --git a/ccan/tdb2/tools/tdb2tool.c b/ccan/tdb2/tools/tdb2tool.c
deleted file mode 100644 (file)
index 8073561..0000000
+++ /dev/null
@@ -1,802 +0,0 @@
-/* 
-   Unix SMB/CIFS implementation.
-   Samba database functions
-   Copyright (C) Andrew Tridgell              1999-2000
-   Copyright (C) Paul `Rusty' Russell             2000
-   Copyright (C) Jeremy Allison                           2000
-   Copyright (C) Andrew Esh                        2001
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "tdb2.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <string.h>
-#include <stdarg.h>
-
-static int do_command(void);
-const char *cmdname;
-char *arg1, *arg2;
-size_t arg1len, arg2len;
-int bIterate = 0;
-char *line;
-TDB_DATA iterate_kbuf;
-char cmdline[1024];
-static int disable_mmap;
-
-enum commands {
-       CMD_CREATE_TDB,
-       CMD_OPEN_TDB,
-       CMD_TRANSACTION_START,
-       CMD_TRANSACTION_COMMIT,
-       CMD_TRANSACTION_CANCEL,
-       CMD_ERASE,
-       CMD_DUMP,
-       CMD_INSERT,
-       CMD_MOVE,
-       CMD_STORE,
-       CMD_SHOW,
-       CMD_KEYS,
-       CMD_HEXKEYS,
-       CMD_DELETE,
-#if 0
-       CMD_LIST_HASH_FREE,
-       CMD_LIST_FREE,
-#endif
-       CMD_INFO,
-       CMD_MMAP,
-       CMD_SPEED,
-       CMD_FIRST,
-       CMD_NEXT,
-       CMD_SYSTEM,
-       CMD_CHECK,
-       CMD_QUIT,
-       CMD_HELP
-};
-
-typedef struct {
-       const char *name;
-       enum commands cmd;
-} COMMAND_TABLE;
-
-COMMAND_TABLE cmd_table[] = {
-       {"create",      CMD_CREATE_TDB},
-       {"open",        CMD_OPEN_TDB},
-#if 0
-       {"transaction_start",   CMD_TRANSACTION_START},
-       {"transaction_commit",  CMD_TRANSACTION_COMMIT},
-       {"transaction_cancel",  CMD_TRANSACTION_CANCEL},
-#endif
-       {"erase",       CMD_ERASE},
-       {"dump",        CMD_DUMP},
-       {"insert",      CMD_INSERT},
-       {"move",        CMD_MOVE},
-       {"store",       CMD_STORE},
-       {"show",        CMD_SHOW},
-       {"keys",        CMD_KEYS},
-       {"hexkeys",     CMD_HEXKEYS},
-       {"delete",      CMD_DELETE},
-#if 0
-       {"list",        CMD_LIST_HASH_FREE},
-       {"free",        CMD_LIST_FREE},
-#endif
-       {"info",        CMD_INFO},
-       {"speed",       CMD_SPEED},
-       {"mmap",        CMD_MMAP},
-       {"first",       CMD_FIRST},
-       {"1",           CMD_FIRST},
-       {"next",        CMD_NEXT},
-       {"n",           CMD_NEXT},
-       {"check",       CMD_CHECK},
-       {"quit",        CMD_QUIT},
-       {"q",           CMD_QUIT},
-       {"!",           CMD_SYSTEM},
-       {NULL,          CMD_HELP}
-};
-
-struct timeval tp1,tp2;
-
-static void _start_timer(void)
-{
-       gettimeofday(&tp1,NULL);
-}
-
-static double _end_timer(void)
-{
-       gettimeofday(&tp2,NULL);
-       return((tp2.tv_sec - tp1.tv_sec) + 
-              (tp2.tv_usec - tp1.tv_usec)*1.0e-6);
-}
-
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
-                   const char *message,
-                   void *data)
-{
-       fprintf(stderr, "tdb:%s:%s:%s\n",
-               tdb_name(tdb), tdb_errorstr(ecode), message);
-}
-
-/* a tdb tool for manipulating a tdb database */
-
-static struct tdb_context *tdb;
-
-static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
-static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
-static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
-
-static void print_asc(const char *buf,int len)
-{
-       int i;
-
-       /* We're probably printing ASCII strings so don't try to display
-          the trailing NULL character. */
-
-       if (buf[len - 1] == 0)
-               len--;
-
-       for (i=0;i<len;i++)
-               printf("%c",isprint(buf[i])?buf[i]:'.');
-}
-
-static void print_data(const char *buf,int len)
-{
-       int i=0;
-       if (len<=0) return;
-       printf("[%03X] ",i);
-       for (i=0;i<len;) {
-               printf("%02X ",(int)((unsigned char)buf[i]));
-               i++;
-               if (i%8 == 0) printf(" ");
-               if (i%16 == 0) {      
-                       print_asc(&buf[i-16],8); printf(" ");
-                       print_asc(&buf[i-8],8); printf("\n");
-                       if (i<len) printf("[%03X] ",i);
-               }
-       }
-       if (i%16) {
-               int n;
-               
-               n = 16 - (i%16);
-               printf(" ");
-               if (n>8) printf(" ");
-               while (n--) printf("   ");
-               
-               n = i%16;
-               if (n > 8) n = 8;
-               print_asc(&buf[i-(i%16)],n); printf(" ");
-               n = (i%16) - n;
-               if (n>0) print_asc(&buf[i-n],n); 
-               printf("\n");    
-       }
-}
-
-static void help(void)
-{
-       printf("\n"
-"tdbtool: \n"
-"  create    dbname     : create a database\n"
-"  open      dbname     : open an existing database\n"
-"  openjh    dbname     : open an existing database (jenkins hash)\n"
-"  transaction_start    : start a transaction\n"
-"  transaction_commit   : commit a transaction\n"
-"  transaction_cancel   : cancel a transaction\n"
-"  erase                : erase the database\n"
-"  dump                 : dump the database as strings\n"
-"  keys                 : dump the database keys as strings\n"
-"  hexkeys              : dump the database keys as hex values\n"
-"  info                 : print summary info about the database\n"
-"  insert    key  data  : insert a record\n"
-"  move      key  file  : move a record to a destination tdb\n"
-"  store     key  data  : store a record (replace)\n"
-"  show      key        : show a record by key\n"
-"  delete    key        : delete a record by key\n"
-#if 0
-"  list                 : print the database hash table and freelist\n"
-"  free                 : print the database freelist\n"
-#endif
-"  check                : check the integrity of an opened database\n"
-"  speed                : perform speed tests on the database\n"
-"  ! command            : execute system command\n"
-"  1 | first            : print the first record\n"
-"  n | next             : print the next record\n"
-"  q | quit             : terminate\n"
-"  \\n                   : repeat 'next' command\n"
-"\n");
-}
-
-static void terror(enum TDB_ERROR err, const char *why)
-{
-       if (err != TDB_SUCCESS)
-               printf("%s:%s\n", tdb_errorstr(err), why);
-       else
-               printf("%s\n", why);
-}
-
-static void create_tdb(const char *tdbname)
-{
-       union tdb_attribute log_attr;
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
-       log_attr.base.next = NULL;
-       log_attr.log.fn = tdb_log;
-
-       if (tdb) tdb_close(tdb);
-       tdb = tdb_open(tdbname, (disable_mmap?TDB_NOMMAP:0),
-                      O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr);
-       if (!tdb) {
-               printf("Could not create %s: %s\n", tdbname, strerror(errno));
-       }
-}
-
-static void open_tdb(const char *tdbname)
-{
-       union tdb_attribute log_attr;
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
-       log_attr.base.next = NULL;
-       log_attr.log.fn = tdb_log;
-
-       if (tdb) tdb_close(tdb);
-       tdb = tdb_open(tdbname, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600,
-                      &log_attr);
-       if (!tdb) {
-               printf("Could not open %s: %s\n", tdbname, strerror(errno));
-       }
-}
-
-static void insert_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
-{
-       TDB_DATA key, dbuf;
-       enum TDB_ERROR ecode;
-
-       if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
-               return;
-       }
-
-       key.dptr = (unsigned char *)keyname;
-       key.dsize = keylen;
-       dbuf.dptr = (unsigned char *)data;
-       dbuf.dsize = datalen;
-
-       ecode = tdb_store(tdb, key, dbuf, TDB_INSERT);
-       if (ecode) {
-               terror(ecode, "insert failed");
-       }
-}
-
-static void store_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
-{
-       TDB_DATA key, dbuf;
-       enum TDB_ERROR ecode;
-
-       if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
-               return;
-       }
-
-       if ((data == NULL) || (datalen == 0)) {
-               terror(TDB_SUCCESS, "need data");
-               return;
-       }
-
-       key.dptr = (unsigned char *)keyname;
-       key.dsize = keylen;
-       dbuf.dptr = (unsigned char *)data;
-       dbuf.dsize = datalen;
-
-       printf("Storing key:\n");
-       print_rec(tdb, key, dbuf, NULL);
-
-       ecode = tdb_store(tdb, key, dbuf, TDB_REPLACE);
-       if (ecode) {
-               terror(ecode, "store failed");
-       }
-}
-
-static void show_tdb(char *keyname, size_t keylen)
-{
-       TDB_DATA key, dbuf;
-       enum TDB_ERROR ecode;
-
-       if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
-               return;
-       }
-
-       key.dptr = (unsigned char *)keyname;
-       key.dsize = keylen;
-
-       ecode = tdb_fetch(tdb, key, &dbuf);
-       if (ecode) {
-               terror(ecode, "fetch failed");
-               return;
-       }
-       
-       print_rec(tdb, key, dbuf, NULL);
-       
-       free( dbuf.dptr );
-}
-
-static void delete_tdb(char *keyname, size_t keylen)
-{
-       TDB_DATA key;
-       enum TDB_ERROR ecode;
-
-       if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
-               return;
-       }
-
-       key.dptr = (unsigned char *)keyname;
-       key.dsize = keylen;
-
-       ecode = tdb_delete(tdb, key);
-       if (ecode) {
-               terror(ecode, "delete failed");
-       }
-}
-
-static void move_rec(char *keyname, size_t keylen, char* tdbname)
-{
-       TDB_DATA key, dbuf;
-       struct tdb_context *dst_tdb;
-       enum TDB_ERROR ecode;
-
-       if ((keyname == NULL) || (keylen == 0)) {
-               terror(TDB_SUCCESS, "need key");
-               return;
-       }
-
-       if ( !tdbname ) {
-               terror(TDB_SUCCESS, "need destination tdb name");
-               return;
-       }
-
-       key.dptr = (unsigned char *)keyname;
-       key.dsize = keylen;
-
-       ecode = tdb_fetch(tdb, key, &dbuf);
-       if (ecode) {
-               terror(ecode, "fetch failed");
-               return;
-       }
-       
-       print_rec(tdb, key, dbuf, NULL);
-       
-       dst_tdb = tdb_open(tdbname, 0, O_RDWR, 0600, NULL);
-       if ( !dst_tdb ) {
-               terror(TDB_SUCCESS, "unable to open destination tdb");
-               return;
-       }
-       
-       ecode = tdb_store( dst_tdb, key, dbuf, TDB_REPLACE);
-       if (ecode)
-               terror(ecode, "failed to move record");
-       else
-               printf("record moved\n");
-       
-       tdb_close( dst_tdb );
-}
-
-static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
-{
-       printf("\nkey %d bytes\n", (int)key.dsize);
-       print_asc((const char *)key.dptr, key.dsize);
-       printf("\ndata %d bytes\n", (int)dbuf.dsize);
-       print_data((const char *)dbuf.dptr, dbuf.dsize);
-       return 0;
-}
-
-static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
-{
-       printf("key %d bytes: ", (int)key.dsize);
-       print_asc((const char *)key.dptr, key.dsize);
-       printf("\n");
-       return 0;
-}
-
-static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
-{
-       printf("key %d bytes\n", (int)key.dsize);
-       print_data((const char *)key.dptr, key.dsize);
-       printf("\n");
-       return 0;
-}
-
-static int total_bytes;
-
-static int traverse_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
-{
-       total_bytes += dbuf.dsize;
-       return 0;
-}
-
-static void info_tdb(void)
-{
-       enum TDB_ERROR ecode;
-       char *summary;
-
-       ecode = tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &summary);
-
-       if (ecode) {
-               terror(ecode, "Getting summary");
-       } else {
-               printf("%s", summary);
-               free(summary);
-       }
-}
-
-static void speed_tdb(const char *tlimit)
-{
-       unsigned timelimit = tlimit?atoi(tlimit):0;
-       double t;
-       int ops;
-       if (timelimit == 0) timelimit = 5;
-
-       ops = 0;
-       printf("Testing store speed for %u seconds\n", timelimit);
-       _start_timer();
-       do {
-               long int r = random();
-               TDB_DATA key, dbuf;
-               key = tdb_mkdata("store test", strlen("store test"));
-               dbuf.dptr = (unsigned char *)&r;
-               dbuf.dsize = sizeof(r);
-               tdb_store(tdb, key, dbuf, TDB_REPLACE);
-               t = _end_timer();
-               ops++;
-       } while (t < timelimit);
-       printf("%10.3f ops/sec\n", ops/t);
-
-       ops = 0;
-       printf("Testing fetch speed for %u seconds\n", timelimit);
-       _start_timer();
-       do {
-               long int r = random();
-               TDB_DATA key, dbuf;
-               key = tdb_mkdata("store test", strlen("store test"));
-               dbuf.dptr = (unsigned char *)&r;
-               dbuf.dsize = sizeof(r);
-               tdb_fetch(tdb, key, &dbuf);
-               t = _end_timer();
-               ops++;
-       } while (t < timelimit);
-       printf("%10.3f ops/sec\n", ops/t);
-
-       ops = 0;
-       printf("Testing transaction speed for %u seconds\n", timelimit);
-       _start_timer();
-       do {
-               long int r = random();
-               TDB_DATA key, dbuf;
-               key = tdb_mkdata("transaction test", strlen("transaction test"));
-               dbuf.dptr = (unsigned char *)&r;
-               dbuf.dsize = sizeof(r);
-               tdb_transaction_start(tdb);
-               tdb_store(tdb, key, dbuf, TDB_REPLACE);
-               tdb_transaction_commit(tdb);
-               t = _end_timer();
-               ops++;
-       } while (t < timelimit);
-       printf("%10.3f ops/sec\n", ops/t);
-
-       ops = 0;
-       printf("Testing traverse speed for %u seconds\n", timelimit);
-       _start_timer();
-       do {
-               tdb_traverse(tdb, traverse_fn, NULL);
-               t = _end_timer();
-               ops++;
-       } while (t < timelimit);
-       printf("%10.3f ops/sec\n", ops/t);
-}
-
-static void toggle_mmap(void)
-{
-       disable_mmap = !disable_mmap;
-       if (disable_mmap) {
-               printf("mmap is disabled\n");
-       } else {
-               printf("mmap is enabled\n");
-       }
-}
-
-static char *tdb_getline(const char *prompt)
-{
-       static char thisline[1024];
-       char *p;
-       fputs(prompt, stdout);
-       thisline[0] = 0;
-       p = fgets(thisline, sizeof(thisline)-1, stdin);
-       if (p) p = strchr(p, '\n');
-       if (p) *p = 0;
-       return p?thisline:NULL;
-}
-
-static int do_delete_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf,
-                     void *state)
-{
-    return tdb_delete(the_tdb, key);
-}
-
-static void first_record(struct tdb_context *the_tdb, TDB_DATA *pkey)
-{
-       TDB_DATA dbuf;
-       enum TDB_ERROR ecode;
-       ecode = tdb_firstkey(the_tdb, pkey);
-       if (!ecode)
-               ecode = tdb_fetch(the_tdb, *pkey, &dbuf);
-       if (ecode) terror(ecode, "fetch failed");
-       else {
-               print_rec(the_tdb, *pkey, dbuf, NULL);
-       }
-}
-
-static void next_record(struct tdb_context *the_tdb, TDB_DATA *pkey)
-{
-       TDB_DATA dbuf;
-       enum TDB_ERROR ecode;
-       ecode = tdb_nextkey(the_tdb, pkey);
-
-       if (!ecode)
-               ecode = tdb_fetch(the_tdb, *pkey, &dbuf);
-       if (ecode) 
-               terror(ecode, "fetch failed");
-       else
-               print_rec(the_tdb, *pkey, dbuf, NULL);
-}
-
-static void check_db(struct tdb_context *the_tdb)
-{
-       if (!the_tdb) {
-               printf("Error: No database opened!\n");
-       } else {
-               if (tdb_check(the_tdb, NULL, NULL) != 0)
-                       printf("Integrity check for the opened database failed.\n");
-               else
-                       printf("Database integrity is OK.\n");
-       }
-}
-
-static int do_command(void)
-{
-       COMMAND_TABLE *ctp = cmd_table;
-       enum commands mycmd = CMD_HELP;
-       int cmd_len;
-
-       if (cmdname && strlen(cmdname) == 0) {
-               mycmd = CMD_NEXT;
-       } else {
-               while (ctp->name) {
-                       cmd_len = strlen(ctp->name);
-                       if (strncmp(ctp->name,cmdname,cmd_len) == 0) {
-                               mycmd = ctp->cmd;
-                               break;
-                       }
-                       ctp++;
-               }
-       }
-
-       switch (mycmd) {
-       case CMD_CREATE_TDB:
-               bIterate = 0;
-               create_tdb(arg1);
-               return 0;
-       case CMD_OPEN_TDB:
-               bIterate = 0;
-               open_tdb(arg1);
-               return 0;
-       case CMD_SYSTEM:
-               /* Shell command */
-               if (system(arg1) == -1) {
-                       terror(TDB_SUCCESS, "system() call failed\n");
-               }
-               return 0;
-       case CMD_QUIT:
-               return 1;
-       default:
-               /* all the rest require a open database */
-               if (!tdb) {
-                       bIterate = 0;
-                       terror(TDB_SUCCESS, "database not open");
-                       help();
-                       return 0;
-               }
-               switch (mycmd) {
-               case CMD_TRANSACTION_START:
-                       bIterate = 0;
-                       tdb_transaction_start(tdb);
-                       return 0;
-               case CMD_TRANSACTION_COMMIT:
-                       bIterate = 0;
-                       tdb_transaction_commit(tdb);
-                       return 0;
-               case CMD_TRANSACTION_CANCEL:
-                       bIterate = 0;
-                       tdb_transaction_cancel(tdb);
-                       return 0;
-               case CMD_ERASE:
-                       bIterate = 0;
-                       tdb_traverse(tdb, do_delete_fn, NULL);
-                       return 0;
-               case CMD_DUMP:
-                       bIterate = 0;
-                       tdb_traverse(tdb, print_rec, NULL);
-                       return 0;
-               case CMD_INSERT:
-                       bIterate = 0;
-                       insert_tdb(arg1, arg1len,arg2,arg2len);
-                       return 0;
-               case CMD_MOVE:
-                       bIterate = 0;
-                       move_rec(arg1,arg1len,arg2);
-                       return 0;
-               case CMD_STORE:
-                       bIterate = 0;
-                       store_tdb(arg1,arg1len,arg2,arg2len);
-                       return 0;
-               case CMD_SHOW:
-                       bIterate = 0;
-                       show_tdb(arg1, arg1len);
-                       return 0;
-               case CMD_KEYS:
-                       tdb_traverse(tdb, print_key, NULL);
-                       return 0;
-               case CMD_HEXKEYS:
-                       tdb_traverse(tdb, print_hexkey, NULL);
-                       return 0;
-               case CMD_DELETE:
-                       bIterate = 0;
-                       delete_tdb(arg1,arg1len);
-                       return 0;
-#if 0
-               case CMD_LIST_HASH_FREE:
-                       tdb_dump_all(tdb);
-                       return 0;
-               case CMD_LIST_FREE:
-                       tdb_printfreelist(tdb);
-                       return 0;
-#endif
-               case CMD_INFO:
-                       info_tdb();
-                       return 0;
-               case CMD_SPEED:
-                       speed_tdb(arg1);
-                       return 0;
-               case CMD_MMAP:
-                       toggle_mmap();
-                       return 0;
-               case CMD_FIRST:
-                       bIterate = 1;
-                       first_record(tdb, &iterate_kbuf);
-                       return 0;
-               case CMD_NEXT:
-                       if (bIterate)
-                               next_record(tdb, &iterate_kbuf);
-                       return 0;
-               case CMD_CHECK:
-                       check_db(tdb);
-                       return 0;
-               case CMD_HELP:
-                       help();
-                       return 0;
-               case CMD_CREATE_TDB:
-               case CMD_OPEN_TDB:
-               case CMD_SYSTEM:
-               case CMD_QUIT:
-                       /*
-                        * unhandled commands.  cases included here to avoid compiler
-                        * warnings.
-                        */
-                       return 0;
-               }
-       }
-
-       return 0;
-}
-
-static char *convert_string(char *instring, size_t *sizep)
-{
-       size_t length = 0;
-       char *outp, *inp;
-       char temp[3];
-
-       outp = inp = instring;
-
-       while (*inp) {
-               if (*inp == '\\') {
-                       inp++;
-                       if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
-                               temp[0] = *inp++;
-                               temp[1] = '\0';
-                               if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
-                                       temp[1] = *inp++;
-                                       temp[2] = '\0';
-                               }
-                               *outp++ = (char)strtol((const char *)temp,NULL,16);
-                       } else {
-                               *outp++ = *inp++;
-                       }
-               } else {
-                       *outp++ = *inp++;
-               }
-               length++;
-       }
-       *sizep = length;
-       return instring;
-}
-
-int main(int argc, char *argv[])
-{
-       cmdname = "";
-       arg1 = NULL;
-       arg1len = 0;
-       arg2 = NULL;
-       arg2len = 0;
-
-       if (argv[1]) {
-               cmdname = "open";
-               arg1 = argv[1];
-               do_command();
-               cmdname =  "";
-               arg1 = NULL;
-       }
-
-       switch (argc) {
-       case 1:
-       case 2:
-               /* Interactive mode */
-               while ((cmdname = tdb_getline("tdb> "))) {
-                       arg2 = arg1 = NULL;
-                       if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) {
-                               arg1++;
-                               arg2 = arg1;
-                               while (*arg2) {
-                                       if (*arg2 == ' ') {
-                                               *arg2++ = '\0';
-                                               break;
-                                       }
-                                       if ((*arg2++ == '\\') && (*arg2 == ' ')) {
-                                               arg2++;
-                                       }
-                               }
-                       }
-                       if (arg1) arg1 = convert_string(arg1,&arg1len);
-                       if (arg2) arg2 = convert_string(arg2,&arg2len);
-                       if (do_command()) break;
-               }
-               break;
-       case 5:
-               arg2 = convert_string(argv[4],&arg2len);
-       case 4:
-               arg1 = convert_string(argv[3],&arg1len);
-       case 3:
-               cmdname = argv[2];
-       default:
-               do_command();
-               break;
-       }
-
-       if (tdb) tdb_close(tdb);
-
-       return 0;
-}
diff --git a/ccan/tdb2/tools/tdb2torture.c b/ccan/tdb2/tools/tdb2torture.c
deleted file mode 100644 (file)
index 29ecb6a..0000000
+++ /dev/null
@@ -1,498 +0,0 @@
-/* this tests tdb by doing lots of ops from several simultaneous
-   writers - that stresses the locking code. 
-*/
-
-#include "tdb2.h"
-#include <stdlib.h>
-#include <err.h>
-#include <getopt.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <time.h>
-#include <sys/wait.h>
-
-//#define REOPEN_PROB 30
-#define DELETE_PROB 8
-#define STORE_PROB 4
-#define APPEND_PROB 6
-#define TRANSACTION_PROB 10
-#define TRANSACTION_PREPARE_PROB 2
-#define LOCKSTORE_PROB 5
-#define TRAVERSE_PROB 20
-#define TRAVERSE_MOD_PROB 100
-#define TRAVERSE_ABORT_PROB 500
-#define CULL_PROB 100
-#define KEYLEN 3
-#define DATALEN 100
-
-static struct tdb_context *db;
-static int in_transaction;
-static int in_traverse;
-static int error_count;
-#if TRANSACTION_PROB
-static int always_transaction = 0;
-#endif
-static int loopnum;
-static int count_pipe;
-static union tdb_attribute log_attr;
-static union tdb_attribute seed_attr;
-
-static void tdb_log(struct tdb_context *tdb,
-                   enum tdb_log_level level,
-                   enum TDB_ERROR ecode,
-                   const char *message,
-                   void *data)
-{
-       printf("tdb:%s:%s:%s\n",
-              tdb_name(tdb), tdb_errorstr(ecode), message);
-       fflush(stdout);
-#if 0
-       {
-               char str[200];
-               signal(SIGUSR1, SIG_IGN);
-               sprintf(str,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
-               system(str);
-       }
-#endif 
-}
-
-#include "../private.h"
-
-static void segv_handler(int sig, siginfo_t *info, void *p)
-{
-       char string[100];
-
-       sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n",
-               getpid(), info->si_addr, db->file->map_ptr,
-               (size_t)db->file->map_size);
-       if (write(2, string, strlen(string)) > 0)
-               sleep(60);
-       _exit(11);
-}      
-
-static void fatal(struct tdb_context *tdb, const char *why)
-{
-       fprintf(stderr, "%u:%s:%s\n", getpid(), why,
-               tdb ? tdb_errorstr(tdb_error(tdb)) : "(no tdb)");
-       error_count++;
-}
-
-static char *randbuf(int len)
-{
-       char *buf;
-       int i;
-       buf = (char *)malloc(len+1);
-
-       for (i=0;i<len;i++) {
-               buf[i] = 'a' + (rand() % 26);
-       }
-       buf[i] = 0;
-       return buf;
-}
-
-static void addrec_db(void);
-static int modify_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
-                          void *state)
-{
-#if CULL_PROB
-       if (random() % CULL_PROB == 0) {
-               tdb_delete(tdb, key);
-       }
-#endif
-
-#if TRAVERSE_MOD_PROB
-       if (random() % TRAVERSE_MOD_PROB == 0) {
-               addrec_db();
-       }
-#endif
-
-#if TRAVERSE_ABORT_PROB
-       if (random() % TRAVERSE_ABORT_PROB == 0)
-               return 1;
-#endif
-
-       return 0;
-}
-
-static void addrec_db(void)
-{
-       int klen, dlen;
-       char *k, *d;
-       TDB_DATA key, data;
-
-       klen = 1 + (rand() % KEYLEN);
-       dlen = 1 + (rand() % DATALEN);
-
-       k = randbuf(klen);
-       d = randbuf(dlen);
-
-       key.dptr = (unsigned char *)k;
-       key.dsize = klen+1;
-
-       data.dptr = (unsigned char *)d;
-       data.dsize = dlen+1;
-
-#if REOPEN_PROB
-       if (in_traverse == 0 && in_transaction == 0 && random() % REOPEN_PROB == 0) {
-               tdb_reopen_all(0);
-               goto next;
-       } 
-#endif
-
-#if TRANSACTION_PROB
-       if (in_traverse == 0 && in_transaction == 0 && (always_transaction || random() % TRANSACTION_PROB == 0)) {
-               if (tdb_transaction_start(db) != 0) {
-                       fatal(db, "tdb_transaction_start failed");
-               }
-               in_transaction++;
-               goto next;
-       }
-       if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
-               if (random() % TRANSACTION_PREPARE_PROB == 0) {
-                       if (tdb_transaction_prepare_commit(db) != 0) {
-                               fatal(db, "tdb_transaction_prepare_commit failed");
-                       }
-               }
-               if (tdb_transaction_commit(db) != 0) {
-                       fatal(db, "tdb_transaction_commit failed");
-               }
-               in_transaction--;
-               goto next;
-       }
-
-       if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
-               tdb_transaction_cancel(db);
-               in_transaction--;
-               goto next;
-       }
-#endif
-
-#if DELETE_PROB
-       if (random() % DELETE_PROB == 0) {
-               tdb_delete(db, key);
-               goto next;
-       }
-#endif
-
-#if STORE_PROB
-       if (random() % STORE_PROB == 0) {
-               if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
-                       fatal(db, "tdb_store failed");
-               }
-               goto next;
-       }
-#endif
-
-#if APPEND_PROB
-       if (random() % APPEND_PROB == 0) {
-               if (tdb_append(db, key, data) != 0) {
-                       fatal(db, "tdb_append failed");
-               }
-               goto next;
-       }
-#endif
-
-#if LOCKSTORE_PROB
-       if (random() % LOCKSTORE_PROB == 0) {
-               tdb_chainlock(db, key);
-               if (tdb_fetch(db, key, &data) != TDB_SUCCESS) {
-                       data.dsize = 0;
-                       data.dptr = NULL;
-               }
-               if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
-                       fatal(db, "tdb_store failed");
-               }
-               if (data.dptr) free(data.dptr);
-               tdb_chainunlock(db, key);
-               goto next;
-       } 
-#endif
-
-#if TRAVERSE_PROB
-       /* FIXME: recursive traverses break transactions? */
-       if (in_traverse == 0 && random() % TRAVERSE_PROB == 0) {
-               in_traverse++;
-               tdb_traverse(db, modify_traverse, NULL);
-               in_traverse--;
-               goto next;
-       }
-#endif
-
-       if (tdb_fetch(db, key, &data) == TDB_SUCCESS)
-               free(data.dptr);
-
-next:
-       free(k);
-       free(d);
-}
-
-static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
-                       void *state)
-{
-       tdb_delete(tdb, key);
-       return 0;
-}
-
-static void usage(void)
-{
-       printf("Usage: tdbtorture"
-#if TRANSACTION_PROB
-              " [-t]"
-#endif
-              " [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-S]\n");
-       exit(0);
-}
-
-static void send_count_and_suicide(int sig)
-{
-       /* This ensures our successor can continue where we left off. */
-       if (write(count_pipe, &loopnum, sizeof(loopnum)) != sizeof(loopnum))
-               exit(2);
-       /* This gives a unique signature. */
-       kill(getpid(), SIGUSR2);
-}
-
-static int run_child(int i, int seed, unsigned num_loops, unsigned start,
-                    int tdb_flags)
-{
-       struct sigaction act = { .sa_sigaction = segv_handler,
-                                .sa_flags = SA_SIGINFO };
-       sigaction(11, &act, NULL);      
-
-       db = tdb_open("torture.tdb", tdb_flags, O_RDWR | O_CREAT, 0600,
-                     &log_attr);
-       if (!db) {
-               fatal(NULL, "db open failed");
-       }
-
-#if 0
-       if (i == 0) {
-               printf("pid %i\n", getpid());
-               sleep(9);
-       } else
-               sleep(10);
-#endif
-
-       srand(seed + i);
-       srandom(seed + i);
-
-       /* Set global, then we're ready to handle being killed. */
-       loopnum = start;
-       signal(SIGUSR1, send_count_and_suicide);
-
-       for (;loopnum<num_loops && error_count == 0;loopnum++) {
-               addrec_db();
-       }
-
-       if (error_count == 0) {
-               tdb_traverse(db, NULL, NULL);
-#if TRANSACTION_PROB
-               if (always_transaction) {
-                       while (in_transaction) {
-                               tdb_transaction_cancel(db);
-                               in_transaction--;
-                       }
-                       if (tdb_transaction_start(db) != 0)
-                               fatal(db, "tdb_transaction_start failed");
-               }
-#endif
-               tdb_traverse(db, traverse_fn, NULL);
-               tdb_traverse(db, traverse_fn, NULL);
-
-#if TRANSACTION_PROB
-               if (always_transaction) {
-                       if (tdb_transaction_commit(db) != 0)
-                               fatal(db, "tdb_transaction_commit failed");
-               }
-#endif
-       }
-
-       tdb_close(db);
-
-       return (error_count < 100 ? error_count : 100);
-}
-
-int main(int argc, char * const *argv)
-{
-       int i, seed = -1;
-       int num_loops = 5000;
-       int num_procs = 3;
-       int c, pfds[2];
-       extern char *optarg;
-       pid_t *pids;
-       int kill_random = 0;
-       int *done;
-       int tdb_flags = TDB_DEFAULT;
-
-       log_attr.base.attr = TDB_ATTRIBUTE_LOG;
-       log_attr.base.next = &seed_attr;
-       log_attr.log.fn = tdb_log;
-       seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
-
-       while ((c = getopt(argc, argv, "n:l:s:thkS")) != -1) {
-               switch (c) {
-               case 'n':
-                       num_procs = strtol(optarg, NULL, 0);
-                       break;
-               case 'l':
-                       num_loops = strtol(optarg, NULL, 0);
-                       break;
-               case 's':
-                       seed = strtol(optarg, NULL, 0);
-                       break;
-               case 'S':
-                       tdb_flags = TDB_NOSYNC;
-                       break;
-               case 't':
-#if TRANSACTION_PROB
-                       always_transaction = 1;
-#else
-                       fprintf(stderr, "Transactions not supported\n");
-                       usage();
-#endif
-                       break;
-               case 'k':
-                       kill_random = 1;
-                       break;
-               default:
-                       usage();
-               }
-       }
-
-       unlink("torture.tdb");
-
-       if (seed == -1) {
-               seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
-       }
-       seed_attr.seed.seed = (((uint64_t)seed) << 32) | seed; 
-
-       if (num_procs == 1 && !kill_random) {
-               /* Don't fork for this case, makes debugging easier. */
-               error_count = run_child(0, seed, num_loops, 0, tdb_flags);
-               goto done;
-       }
-
-       pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
-       done = (int *)calloc(sizeof(int), num_procs);
-
-       if (pipe(pfds) != 0) {
-               perror("Creating pipe");
-               exit(1);
-       }
-       count_pipe = pfds[1];
-
-       for (i=0;i<num_procs;i++) {
-               if ((pids[i]=fork()) == 0) {
-                       close(pfds[0]);
-                       if (i == 0) {
-                               printf("testing with %d processes, %d loops, seed=%d%s\n", 
-                                      num_procs, num_loops, seed, 
-#if TRANSACTION_PROB
-                                      always_transaction ? " (all within transactions)" : ""
-#else
-                                      ""
-#endif
-                                       );
-                       }
-                       exit(run_child(i, seed, num_loops, 0, tdb_flags));
-               }
-       }
-
-       while (num_procs) {
-               int status, j;
-               pid_t pid;
-
-               if (error_count != 0) {
-                       /* try and stop the test on any failure */
-                       for (j=0;j<num_procs;j++) {
-                               if (pids[j] != 0) {
-                                       kill(pids[j], SIGTERM);
-                               }
-                       }
-               }
-
-               pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
-               if (pid == 0) {
-                       struct timespec ts;
-
-                       /* Sleep for 1/10 second. */
-                       ts.tv_sec = 0;
-                       ts.tv_nsec = 100000000;
-                       nanosleep(&ts, NULL);
-
-                       /* Kill someone. */
-                       kill(pids[random() % num_procs], SIGUSR1);
-                       continue;
-               }
-
-               if (pid == -1) {
-                       perror("failed to wait for child\n");
-                       exit(1);
-               }
-
-               for (j=0;j<num_procs;j++) {
-                       if (pids[j] == pid) break;
-               }
-               if (j == num_procs) {
-                       printf("unknown child %d exited!?\n", (int)pid);
-                       exit(1);
-               }
-               if (WIFSIGNALED(status)) {
-                       if (WTERMSIG(status) == SIGUSR2
-                           || WTERMSIG(status) == SIGUSR1) {
-                               /* SIGUSR2 means they wrote to pipe. */
-                               if (WTERMSIG(status) == SIGUSR2) {
-                                       if (read(pfds[0], &done[j],
-                                                sizeof(done[j]))
-                                           != sizeof(done[j]))
-                                               err(1,
-                                                   "Short read from child?");
-                               }
-                               pids[j] = fork();
-                               if (pids[j] == 0)
-                                       exit(run_child(j, seed, num_loops,
-                                                      done[j], tdb_flags));
-                               printf("Restarting child %i for %u-%u\n",
-                                      j, done[j], num_loops);
-                               continue;
-                       }
-                       printf("child %d exited with signal %d\n",
-                              (int)pid, WTERMSIG(status));
-                       error_count++;
-               } else {
-                       if (WEXITSTATUS(status) != 0) {
-                               printf("child %d exited with status %d\n",
-                                      (int)pid, WEXITSTATUS(status));
-                               error_count++;
-                       }
-               }
-               memmove(&pids[j], &pids[j+1],
-                       (num_procs - j - 1)*sizeof(pids[0]));
-               num_procs--;
-       }
-
-       free(pids);
-
-done:
-       if (error_count == 0) {
-               db = tdb_open("torture.tdb", TDB_DEFAULT, O_RDWR | O_CREAT,
-                             0600, &log_attr);
-               if (!db) {
-                       fatal(db, "db open failed");
-                       exit(1);
-               }
-               if (tdb_check(db, NULL, NULL) != 0) {
-                       fatal(db, "db check failed");
-                       exit(1);
-               }
-               tdb_close(db);
-               printf("OK\n");
-       }
-
-       return error_count;
-}
diff --git a/ccan/tdb2/transaction.c b/ccan/tdb2/transaction.c
deleted file mode 100644 (file)
index dd94510..0000000
+++ /dev/null
@@ -1,1343 +0,0 @@
- /*
-   Unix SMB/CIFS implementation.
-
-   trivial database library
-
-   Copyright (C) Andrew Tridgell              2005
-   Copyright (C) Rusty Russell                2010
-
-     ** NOTE! The following LGPL license applies to the tdb
-     ** library. This does NOT imply that all of Samba is released
-     ** under the LGPL
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-#define SAFE_FREE(x) do { if ((x) != NULL) {free((void *)x); (x)=NULL;} } while(0)
-
-/*
-  transaction design:
-
-  - only allow a single transaction at a time per database. This makes
-    using the transaction API simpler, as otherwise the caller would
-    have to cope with temporary failures in transactions that conflict
-    with other current transactions
-
-  - keep the transaction recovery information in the same file as the
-    database, using a special 'transaction recovery' record pointed at
-    by the header. This removes the need for extra journal files as
-    used by some other databases
-
-  - dynamically allocated the transaction recover record, re-using it
-    for subsequent transactions. If a larger record is needed then
-    tdb_free() the old record to place it on the normal tdb freelist
-    before allocating the new record
-
-  - during transactions, keep a linked list of writes all that have
-    been performed by intercepting all tdb_write() calls. The hooked
-    transaction versions of tdb_read() and tdb_write() check this
-    linked list and try to use the elements of the list in preference
-    to the real database.
-
-  - don't allow any locks to be held when a transaction starts,
-    otherwise we can end up with deadlock (plus lack of lock nesting
-    in POSIX locks would mean the lock is lost)
-
-  - if the caller gains a lock during the transaction but doesn't
-    release it then fail the commit
-
-  - allow for nested calls to tdb_transaction_start(), re-using the
-    existing transaction record. If the inner transaction is canceled
-    then a subsequent commit will fail
-
-  - keep a mirrored copy of the tdb hash chain heads to allow for the
-    fast hash heads scan on traverse, updating the mirrored copy in
-    the transaction version of tdb_write
-
-  - allow callers to mix transaction and non-transaction use of tdb,
-    although once a transaction is started then an exclusive lock is
-    gained until the transaction is committed or canceled
-
-  - the commit stategy involves first saving away all modified data
-    into a linearised buffer in the transaction recovery area, then
-    marking the transaction recovery area with a magic value to
-    indicate a valid recovery record. In total 4 fsync/msync calls are
-    needed per commit to prevent race conditions. It might be possible
-    to reduce this to 3 or even 2 with some more work.
-
-  - check for a valid recovery record on open of the tdb, while the
-    open lock is held. Automatically recover from the transaction
-    recovery area if needed, then continue with the open as
-    usual. This allows for smooth crash recovery with no administrator
-    intervention.
-
-  - if TDB_NOSYNC is passed to flags in tdb_open then transactions are
-    still available, but no transaction recovery area is used and no
-    fsync/msync calls are made.
-*/
-
-/*
-  hold the context of any current transaction
-*/
-struct tdb_transaction {
-       /* the original io methods - used to do IOs to the real db */
-       const struct tdb_methods *io_methods;
-
-       /* the list of transaction blocks. When a block is first
-          written to, it gets created in this list */
-       uint8_t **blocks;
-       size_t num_blocks;
-       size_t last_block_size; /* number of valid bytes in the last block */
-
-       /* non-zero when an internal transaction error has
-          occurred. All write operations will then fail until the
-          transaction is ended */
-       int transaction_error;
-
-       /* when inside a transaction we need to keep track of any
-          nested tdb_transaction_start() calls, as these are allowed,
-          but don't create a new transaction */
-       unsigned int nesting;
-
-       /* set when a prepare has already occurred */
-       bool prepared;
-       tdb_off_t magic_offset;
-
-       /* old file size before transaction */
-       tdb_len_t old_map_size;
-};
-
-/* This doesn't really need to be pagesize, but we use it for similar reasons. */
-#define PAGESIZE 65536
-
-/*
-  read while in a transaction. We need to check first if the data is in our list
-  of transaction elements, then if not do a real read
-*/
-static enum TDB_ERROR transaction_read(struct tdb_context *tdb, tdb_off_t off,
-                                      void *buf, tdb_len_t len)
-{
-       size_t blk;
-       enum TDB_ERROR ecode;
-
-       /* break it down into block sized ops */
-       while (len + (off % PAGESIZE) > PAGESIZE) {
-               tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
-               ecode = transaction_read(tdb, off, buf, len2);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-               len -= len2;
-               off += len2;
-               buf = (void *)(len2 + (char *)buf);
-       }
-
-       if (len == 0) {
-               return TDB_SUCCESS;
-       }
-
-       blk = off / PAGESIZE;
-
-       /* see if we have it in the block list */
-       if (tdb->tdb2.transaction->num_blocks <= blk ||
-           tdb->tdb2.transaction->blocks[blk] == NULL) {
-               /* nope, do a real read */
-               ecode = tdb->tdb2.transaction->io_methods->tread(tdb, off, buf, len);
-               if (ecode != TDB_SUCCESS) {
-                       goto fail;
-               }
-               return 0;
-       }
-
-       /* it is in the block list. Now check for the last block */
-       if (blk == tdb->tdb2.transaction->num_blocks-1) {
-               if (len > tdb->tdb2.transaction->last_block_size) {
-                       ecode = TDB_ERR_IO;
-                       goto fail;
-               }
-       }
-
-       /* now copy it out of this block */
-       memcpy(buf, tdb->tdb2.transaction->blocks[blk] + (off % PAGESIZE), len);
-       return TDB_SUCCESS;
-
-fail:
-       tdb->tdb2.transaction->transaction_error = 1;
-       return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                         "transaction_read: failed at off=%zu len=%zu",
-                         (size_t)off, (size_t)len);
-}
-
-
-/*
-  write while in a transaction
-*/
-static enum TDB_ERROR transaction_write(struct tdb_context *tdb, tdb_off_t off,
-                                       const void *buf, tdb_len_t len)
-{
-       size_t blk;
-       enum TDB_ERROR ecode;
-
-       /* Only a commit is allowed on a prepared transaction */
-       if (tdb->tdb2.transaction->prepared) {
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_ERROR,
-                                  "transaction_write: transaction already"
-                                  " prepared, write not allowed");
-               goto fail;
-       }
-
-       /* break it up into block sized chunks */
-       while (len + (off % PAGESIZE) > PAGESIZE) {
-               tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
-               ecode = transaction_write(tdb, off, buf, len2);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-               len -= len2;
-               off += len2;
-               if (buf != NULL) {
-                       buf = (const void *)(len2 + (const char *)buf);
-               }
-       }
-
-       if (len == 0) {
-               return TDB_SUCCESS;
-       }
-
-       blk = off / PAGESIZE;
-       off = off % PAGESIZE;
-
-       if (tdb->tdb2.transaction->num_blocks <= blk) {
-               uint8_t **new_blocks;
-               /* expand the blocks array */
-               if (tdb->tdb2.transaction->blocks == NULL) {
-                       new_blocks = (uint8_t **)malloc(
-                               (blk+1)*sizeof(uint8_t *));
-               } else {
-                       new_blocks = (uint8_t **)realloc(
-                               tdb->tdb2.transaction->blocks,
-                               (blk+1)*sizeof(uint8_t *));
-               }
-               if (new_blocks == NULL) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                          "transaction_write:"
-                                          " failed to allocate");
-                       goto fail;
-               }
-               memset(&new_blocks[tdb->tdb2.transaction->num_blocks], 0,
-                      (1+(blk - tdb->tdb2.transaction->num_blocks))*sizeof(uint8_t *));
-               tdb->tdb2.transaction->blocks = new_blocks;
-               tdb->tdb2.transaction->num_blocks = blk+1;
-               tdb->tdb2.transaction->last_block_size = 0;
-       }
-
-       /* allocate and fill a block? */
-       if (tdb->tdb2.transaction->blocks[blk] == NULL) {
-               tdb->tdb2.transaction->blocks[blk] = (uint8_t *)calloc(PAGESIZE, 1);
-               if (tdb->tdb2.transaction->blocks[blk] == NULL) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                          "transaction_write:"
-                                          " failed to allocate");
-                       goto fail;
-               }
-               if (tdb->tdb2.transaction->old_map_size > blk * PAGESIZE) {
-                       tdb_len_t len2 = PAGESIZE;
-                       if (len2 + (blk * PAGESIZE) > tdb->tdb2.transaction->old_map_size) {
-                               len2 = tdb->tdb2.transaction->old_map_size - (blk * PAGESIZE);
-                       }
-                       ecode = tdb->tdb2.transaction->io_methods->tread(tdb,
-                                       blk * PAGESIZE,
-                                       tdb->tdb2.transaction->blocks[blk],
-                                       len2);
-                       if (ecode != TDB_SUCCESS) {
-                               ecode = tdb_logerr(tdb, ecode,
-                                                  TDB_LOG_ERROR,
-                                                  "transaction_write:"
-                                                  " failed to"
-                                                  " read old block: %s",
-                                                  strerror(errno));
-                               SAFE_FREE(tdb->tdb2.transaction->blocks[blk]);
-                               goto fail;
-                       }
-                       if (blk == tdb->tdb2.transaction->num_blocks-1) {
-                               tdb->tdb2.transaction->last_block_size = len2;
-                       }
-               }
-       }
-
-       /* overwrite part of an existing block */
-       if (buf == NULL) {
-               memset(tdb->tdb2.transaction->blocks[blk] + off, 0, len);
-       } else {
-               memcpy(tdb->tdb2.transaction->blocks[blk] + off, buf, len);
-       }
-       if (blk == tdb->tdb2.transaction->num_blocks-1) {
-               if (len + off > tdb->tdb2.transaction->last_block_size) {
-                       tdb->tdb2.transaction->last_block_size = len + off;
-               }
-       }
-
-       return TDB_SUCCESS;
-
-fail:
-       tdb->tdb2.transaction->transaction_error = 1;
-       return ecode;
-}
-
-
-/*
-  write while in a transaction - this variant never expands the transaction blocks, it only
-  updates existing blocks. This means it cannot change the recovery size
-*/
-static void transaction_write_existing(struct tdb_context *tdb, tdb_off_t off,
-                                      const void *buf, tdb_len_t len)
-{
-       size_t blk;
-
-       /* break it up into block sized chunks */
-       while (len + (off % PAGESIZE) > PAGESIZE) {
-               tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
-               transaction_write_existing(tdb, off, buf, len2);
-               len -= len2;
-               off += len2;
-               if (buf != NULL) {
-                       buf = (const void *)(len2 + (const char *)buf);
-               }
-       }
-
-       if (len == 0) {
-               return;
-       }
-
-       blk = off / PAGESIZE;
-       off = off % PAGESIZE;
-
-       if (tdb->tdb2.transaction->num_blocks <= blk ||
-           tdb->tdb2.transaction->blocks[blk] == NULL) {
-               return;
-       }
-
-       if (blk == tdb->tdb2.transaction->num_blocks-1 &&
-           off + len > tdb->tdb2.transaction->last_block_size) {
-               if (off >= tdb->tdb2.transaction->last_block_size) {
-                       return;
-               }
-               len = tdb->tdb2.transaction->last_block_size - off;
-       }
-
-       /* overwrite part of an existing block */
-       memcpy(tdb->tdb2.transaction->blocks[blk] + off, buf, len);
-}
-
-
-/*
-  out of bounds check during a transaction
-*/
-static enum TDB_ERROR transaction_oob(struct tdb_context *tdb,
-                                     tdb_off_t off, tdb_len_t len, bool probe)
-{
-       if ((off + len >= off && off + len <= tdb->file->map_size) || probe) {
-               return TDB_SUCCESS;
-       }
-
-       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                  "tdb_oob len %lld beyond transaction size %lld",
-                  (long long)(off + len),
-                  (long long)tdb->file->map_size);
-       return TDB_ERR_IO;
-}
-
-/*
-  transaction version of tdb_expand().
-*/
-static enum TDB_ERROR transaction_expand_file(struct tdb_context *tdb,
-                                             tdb_off_t addition)
-{
-       enum TDB_ERROR ecode;
-
-       /* add a write to the transaction elements, so subsequent
-          reads see the zero data */
-       ecode = transaction_write(tdb, tdb->file->map_size, NULL, addition);
-       if (ecode == TDB_SUCCESS) {
-               tdb->file->map_size += addition;
-       }
-       return ecode;
-}
-
-static void *transaction_direct(struct tdb_context *tdb, tdb_off_t off,
-                               size_t len, bool write_mode)
-{
-       size_t blk = off / PAGESIZE, end_blk;
-
-       /* This is wrong for zero-length blocks, but will fail gracefully */
-       end_blk = (off + len - 1) / PAGESIZE;
-
-       /* Can only do direct if in single block and we've already copied. */
-       if (write_mode) {
-               tdb->stats.transaction_write_direct++;
-               if (blk != end_blk
-                   || blk >= tdb->tdb2.transaction->num_blocks
-                   || tdb->tdb2.transaction->blocks[blk] == NULL) {
-                       tdb->stats.transaction_write_direct_fail++;
-                       return NULL;
-               }
-               return tdb->tdb2.transaction->blocks[blk] + off % PAGESIZE;
-       }
-
-       tdb->stats.transaction_read_direct++;
-       /* Single which we have copied? */
-       if (blk == end_blk
-           && blk < tdb->tdb2.transaction->num_blocks
-           && tdb->tdb2.transaction->blocks[blk])
-               return tdb->tdb2.transaction->blocks[blk] + off % PAGESIZE;
-
-       /* Otherwise must be all not copied. */
-       while (blk <= end_blk) {
-               if (blk >= tdb->tdb2.transaction->num_blocks)
-                       break;
-               if (tdb->tdb2.transaction->blocks[blk]) {
-                       tdb->stats.transaction_read_direct_fail++;
-                       return NULL;
-               }
-               blk++;
-       }
-       return tdb->tdb2.transaction->io_methods->direct(tdb, off, len, false);
-}
-
-static const struct tdb_methods transaction_methods = {
-       transaction_read,
-       transaction_write,
-       transaction_oob,
-       transaction_expand_file,
-       transaction_direct,
-};
-
-/*
-  sync to disk
-*/
-static enum TDB_ERROR transaction_sync(struct tdb_context *tdb,
-                                      tdb_off_t offset, tdb_len_t length)
-{
-       if (tdb->flags & TDB_NOSYNC) {
-               return TDB_SUCCESS;
-       }
-
-       if (fsync(tdb->file->fd) != 0) {
-               return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                 "tdb_transaction: fsync failed: %s",
-                                 strerror(errno));
-       }
-#ifdef MS_SYNC
-       if (tdb->file->map_ptr) {
-               tdb_off_t moffset = offset & ~(getpagesize()-1);
-               if (msync(moffset + (char *)tdb->file->map_ptr,
-                         length + (offset - moffset), MS_SYNC) != 0) {
-                       return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                         "tdb_transaction: msync failed: %s",
-                                         strerror(errno));
-               }
-       }
-#endif
-       return TDB_SUCCESS;
-}
-
-
-static void _tdb_transaction_cancel(struct tdb_context *tdb)
-{
-       int i;
-       enum TDB_ERROR ecode;
-
-       if (tdb->tdb2.transaction == NULL) {
-               tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                          "tdb_transaction_cancel: no transaction");
-               return;
-       }
-
-       if (tdb->tdb2.transaction->nesting != 0) {
-               tdb->tdb2.transaction->transaction_error = 1;
-               tdb->tdb2.transaction->nesting--;
-               return;
-       }
-
-       tdb->file->map_size = tdb->tdb2.transaction->old_map_size;
-
-       /* free all the transaction blocks */
-       for (i=0;i<tdb->tdb2.transaction->num_blocks;i++) {
-               if (tdb->tdb2.transaction->blocks[i] != NULL) {
-                       free(tdb->tdb2.transaction->blocks[i]);
-               }
-       }
-       SAFE_FREE(tdb->tdb2.transaction->blocks);
-
-       if (tdb->tdb2.transaction->magic_offset) {
-               const struct tdb_methods *methods = tdb->tdb2.transaction->io_methods;
-               uint64_t invalid = TDB_RECOVERY_INVALID_MAGIC;
-
-               /* remove the recovery marker */
-               ecode = methods->twrite(tdb, tdb->tdb2.transaction->magic_offset,
-                                       &invalid, sizeof(invalid));
-               if (ecode == TDB_SUCCESS)
-                       ecode = transaction_sync(tdb,
-                                                tdb->tdb2.transaction->magic_offset,
-                                                sizeof(invalid));
-               if (ecode != TDB_SUCCESS) {
-                       tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                  "tdb_transaction_cancel: failed to remove"
-                                  " recovery magic");
-               }
-       }
-
-       if (tdb->file->allrecord_lock.count)
-               tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
-
-       /* restore the normal io methods */
-       tdb->tdb2.io = tdb->tdb2.transaction->io_methods;
-
-       tdb_transaction_unlock(tdb, F_WRLCK);
-
-       if (tdb_has_open_lock(tdb))
-               tdb_unlock_open(tdb, F_WRLCK);
-
-       SAFE_FREE(tdb->tdb2.transaction);
-}
-
-/*
-  start a tdb transaction. No token is returned, as only a single
-  transaction is allowed to be pending per tdb_context
-*/
-enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_transaction_start(tdb) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       tdb->stats.transactions++;
-       /* some sanity checks */
-       if (tdb->flags & TDB_INTERNAL) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot start a"
-                                                   " transaction on an"
-                                                   " internal tdb");
-       }
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot start a"
-                                                   " transaction on a "
-                                                   " read-only tdb");
-       }
-
-       /* cope with nested tdb_transaction_start() calls */
-       if (tdb->tdb2.transaction != NULL) {
-               if (!(tdb->flags & TDB_ALLOW_NESTING)) {
-                       return tdb->last_error
-                               = tdb_logerr(tdb, TDB_ERR_IO,
-                                            TDB_LOG_USE_ERROR,
-                                            "tdb_transaction_start:"
-                                            " already inside transaction");
-               }
-               tdb->tdb2.transaction->nesting++;
-               tdb->stats.transaction_nest++;
-               return 0;
-       }
-
-       if (tdb_has_hash_locks(tdb)) {
-               /* the caller must not have any locks when starting a
-                  transaction as otherwise we'll be screwed by lack
-                  of nested locks in POSIX */
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot start a"
-                                                   " transaction with locks"
-                                                   " held");
-       }
-
-       tdb->tdb2.transaction = (struct tdb_transaction *)
-               calloc(sizeof(struct tdb_transaction), 1);
-       if (tdb->tdb2.transaction == NULL) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM,
-                                                   TDB_LOG_ERROR,
-                                                   "tdb_transaction_start:"
-                                                   " cannot allocate");
-       }
-
-       /* get the transaction write lock. This is a blocking lock. As
-          discussed with Volker, there are a number of ways we could
-          make this async, which we will probably do in the future */
-       ecode = tdb_transaction_lock(tdb, F_WRLCK);
-       if (ecode != TDB_SUCCESS) {
-               SAFE_FREE(tdb->tdb2.transaction->blocks);
-               SAFE_FREE(tdb->tdb2.transaction);
-               return tdb->last_error = ecode;
-       }
-
-       /* get a read lock over entire file. This is upgraded to a write
-          lock during the commit */
-       ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true);
-       if (ecode != TDB_SUCCESS) {
-               goto fail_allrecord_lock;
-       }
-
-       /* make sure we know about any file expansions already done by
-          anyone else */
-       tdb->tdb2.io->oob(tdb, tdb->file->map_size, 1, true);
-       tdb->tdb2.transaction->old_map_size = tdb->file->map_size;
-
-       /* finally hook the io methods, replacing them with
-          transaction specific methods */
-       tdb->tdb2.transaction->io_methods = tdb->tdb2.io;
-       tdb->tdb2.io = &transaction_methods;
-       return tdb->last_error = TDB_SUCCESS;
-
-fail_allrecord_lock:
-       tdb_transaction_unlock(tdb, F_WRLCK);
-       SAFE_FREE(tdb->tdb2.transaction->blocks);
-       SAFE_FREE(tdb->tdb2.transaction);
-       return tdb->last_error = ecode;
-}
-
-
-/*
-  cancel the current transaction
-*/
-void tdb_transaction_cancel(struct tdb_context *tdb)
-{
-       if (tdb->flags & TDB_VERSION1) {
-               tdb1_transaction_cancel(tdb);
-               return;
-       }
-       tdb->stats.transaction_cancel++;
-       _tdb_transaction_cancel(tdb);
-}
-
-/*
-  work out how much space the linearised recovery data will consume (worst case)
-*/
-static tdb_len_t tdb_recovery_size(struct tdb_context *tdb)
-{
-       tdb_len_t recovery_size = 0;
-       int i;
-
-       recovery_size = 0;
-       for (i=0;i<tdb->tdb2.transaction->num_blocks;i++) {
-               if (i * PAGESIZE >= tdb->tdb2.transaction->old_map_size) {
-                       break;
-               }
-               if (tdb->tdb2.transaction->blocks[i] == NULL) {
-                       continue;
-               }
-               recovery_size += 2*sizeof(tdb_off_t);
-               if (i == tdb->tdb2.transaction->num_blocks-1) {
-                       recovery_size += tdb->tdb2.transaction->last_block_size;
-               } else {
-                       recovery_size += PAGESIZE;
-               }
-       }
-
-       return recovery_size;
-}
-
-static enum TDB_ERROR tdb_recovery_area(struct tdb_context *tdb,
-                                       const struct tdb_methods *methods,
-                                       tdb_off_t *recovery_offset,
-                                       struct tdb_recovery_record *rec)
-{
-       enum TDB_ERROR ecode;
-
-       *recovery_offset = tdb_read_off(tdb,
-                                       offsetof(struct tdb_header, recovery));
-       if (TDB_OFF_IS_ERR(*recovery_offset)) {
-               return TDB_OFF_TO_ERR(*recovery_offset);
-       }
-
-       if (*recovery_offset == 0) {
-               rec->max_len = 0;
-               return TDB_SUCCESS;
-       }
-
-       ecode = methods->tread(tdb, *recovery_offset, rec, sizeof(*rec));
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       tdb_convert(tdb, rec, sizeof(*rec));
-       /* ignore invalid recovery regions: can happen in crash */
-       if (rec->magic != TDB_RECOVERY_MAGIC &&
-           rec->magic != TDB_RECOVERY_INVALID_MAGIC) {
-               *recovery_offset = 0;
-               rec->max_len = 0;
-       }
-       return TDB_SUCCESS;
-}
-
-static unsigned int same(const unsigned char *new,
-                        const unsigned char *old,
-                        unsigned int length)
-{
-       unsigned int i;
-
-       for (i = 0; i < length; i++) {
-               if (new[i] != old[i])
-                       break;
-       }
-       return i;
-}
-
-static unsigned int different(const unsigned char *new,
-                             const unsigned char *old,
-                             unsigned int length,
-                             unsigned int min_same,
-                             unsigned int *samelen)
-{
-       unsigned int i;
-
-       *samelen = 0;
-       for (i = 0; i < length; i++) {
-               if (new[i] == old[i]) {
-                       (*samelen)++;
-               } else {
-                       if (*samelen >= min_same) {
-                               return i - *samelen;
-                       }
-                       *samelen = 0;
-               }
-       }
-
-       if (*samelen < min_same)
-               *samelen = 0;
-       return length - *samelen;
-}
-
-/* Allocates recovery blob, without tdb_recovery_record at head set up. */
-static struct tdb_recovery_record *alloc_recovery(struct tdb_context *tdb,
-                                                 tdb_len_t *len)
-{
-       struct tdb_recovery_record *rec;
-       size_t i;
-       enum TDB_ERROR ecode;
-       unsigned char *p;
-       const struct tdb_methods *old_methods = tdb->tdb2.io;
-
-       rec = malloc(sizeof(*rec) + tdb_recovery_size(tdb));
-       if (!rec) {
-               tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                          "transaction_setup_recovery:"
-                          " cannot allocate");
-               return TDB_ERR_PTR(TDB_ERR_OOM);
-       }
-
-       /* We temporarily revert to the old I/O methods, so we can use
-        * tdb_access_read */
-       tdb->tdb2.io = tdb->tdb2.transaction->io_methods;
-
-       /* build the recovery data into a single blob to allow us to do a single
-          large write, which should be more efficient */
-       p = (unsigned char *)(rec + 1);
-       for (i=0;i<tdb->tdb2.transaction->num_blocks;i++) {
-               tdb_off_t offset;
-               tdb_len_t length;
-               unsigned int off;
-               const unsigned char *buffer;
-
-               if (tdb->tdb2.transaction->blocks[i] == NULL) {
-                       continue;
-               }
-
-               offset = i * PAGESIZE;
-               length = PAGESIZE;
-               if (i == tdb->tdb2.transaction->num_blocks-1) {
-                       length = tdb->tdb2.transaction->last_block_size;
-               }
-
-               if (offset >= tdb->tdb2.transaction->old_map_size) {
-                       continue;
-               }
-
-               if (offset + length > tdb->file->map_size) {
-                       ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                          "tdb_transaction_setup_recovery:"
-                                          " transaction data over new region"
-                                          " boundary");
-                       goto fail;
-               }
-               if (offset + length > tdb->tdb2.transaction->old_map_size) {
-                       /* Short read at EOF. */
-                       length = tdb->tdb2.transaction->old_map_size - offset;
-               }
-               buffer = tdb_access_read(tdb, offset, length, false);
-               if (TDB_PTR_IS_ERR(buffer)) {
-                       ecode = TDB_PTR_ERR(buffer);
-                       goto fail;
-               }
-
-               /* Skip over anything the same at the start. */
-               off = same(tdb->tdb2.transaction->blocks[i], buffer, length);
-               offset += off;
-
-               while (off < length) {
-                       tdb_len_t len;
-                       unsigned int samelen;
-
-                       len = different(tdb->tdb2.transaction->blocks[i] + off,
-                                       buffer + off, length - off,
-                                       sizeof(offset) + sizeof(len) + 1,
-                                       &samelen);
-
-                       memcpy(p, &offset, sizeof(offset));
-                       memcpy(p + sizeof(offset), &len, sizeof(len));
-                       tdb_convert(tdb, p, sizeof(offset) + sizeof(len));
-                       p += sizeof(offset) + sizeof(len);
-                       memcpy(p, buffer + off, len);
-                       p += len;
-                       off += len + samelen;
-                       offset += len + samelen;
-               }
-               tdb_access_release(tdb, buffer);
-       }
-
-       *len = p - (unsigned char *)(rec + 1);
-       tdb->tdb2.io = old_methods;
-       return rec;
-
-fail:
-       free(rec);
-       tdb->tdb2.io = old_methods;
-       return TDB_ERR_PTR(ecode);
-}
-
-static tdb_off_t create_recovery_area(struct tdb_context *tdb,
-                                     tdb_len_t rec_length,
-                                     struct tdb_recovery_record *rec)
-{
-       tdb_off_t off, recovery_off;
-       tdb_len_t addition;
-       enum TDB_ERROR ecode;
-       const struct tdb_methods *methods = tdb->tdb2.transaction->io_methods;
-
-       /* round up to a multiple of page size. Overallocate, since each
-        * such allocation forces us to expand the file. */
-       rec->max_len = tdb_expand_adjust(tdb->file->map_size, rec_length);
-
-       /* Round up to a page. */
-       rec->max_len = ((sizeof(*rec) + rec->max_len + PAGESIZE-1)
-                       & ~(PAGESIZE-1))
-               - sizeof(*rec);
-
-       off = tdb->file->map_size;
-
-       /* Restore ->map_size before calling underlying expand_file.
-          Also so that we don't try to expand the file again in the
-          transaction commit, which would destroy the recovery
-          area */
-       addition = (tdb->file->map_size - tdb->tdb2.transaction->old_map_size) +
-               sizeof(*rec) + rec->max_len;
-       tdb->file->map_size = tdb->tdb2.transaction->old_map_size;
-       tdb->stats.transaction_expand_file++;
-       ecode = methods->expand_file(tdb, addition);
-       if (ecode != TDB_SUCCESS) {
-               tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                          "tdb_recovery_allocate:"
-                          " failed to create recovery area");
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       /* we have to reset the old map size so that we don't try to
-          expand the file again in the transaction commit, which
-          would destroy the recovery area */
-       tdb->tdb2.transaction->old_map_size = tdb->file->map_size;
-
-       /* write the recovery header offset and sync - we can sync without a race here
-          as the magic ptr in the recovery record has not been set */
-       recovery_off = off;
-       tdb_convert(tdb, &recovery_off, sizeof(recovery_off));
-       ecode = methods->twrite(tdb, offsetof(struct tdb_header, recovery),
-                               &recovery_off, sizeof(tdb_off_t));
-       if (ecode != TDB_SUCCESS) {
-               tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                          "tdb_recovery_allocate:"
-                          " failed to write recovery head");
-               return TDB_ERR_TO_OFF(ecode);
-       }
-       transaction_write_existing(tdb, offsetof(struct tdb_header, recovery),
-                                  &recovery_off,
-                                  sizeof(tdb_off_t));
-       return off;
-}
-
-/*
-  setup the recovery data that will be used on a crash during commit
-*/
-static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb)
-{
-       tdb_len_t recovery_size = 0;
-       tdb_off_t recovery_off = 0;
-       tdb_off_t old_map_size = tdb->tdb2.transaction->old_map_size;
-       struct tdb_recovery_record *recovery;
-       const struct tdb_methods *methods = tdb->tdb2.transaction->io_methods;
-       uint64_t magic;
-       enum TDB_ERROR ecode;
-
-       recovery = alloc_recovery(tdb, &recovery_size);
-       if (TDB_PTR_IS_ERR(recovery))
-               return TDB_PTR_ERR(recovery);
-
-       ecode = tdb_recovery_area(tdb, methods, &recovery_off, recovery);
-       if (ecode) {
-               free(recovery);
-               return ecode;
-       }
-
-       if (recovery->max_len < recovery_size) {
-               /* Not large enough. Free up old recovery area. */
-               if (recovery_off) {
-                       tdb->stats.frees++;
-                       ecode = add_free_record(tdb, recovery_off,
-                                               sizeof(*recovery)
-                                               + recovery->max_len,
-                                               TDB_LOCK_WAIT, true);
-                       free(recovery);
-                       if (ecode != TDB_SUCCESS) {
-                               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                                 "tdb_recovery_allocate:"
-                                                 " failed to free previous"
-                                                 " recovery area");
-                       }
-
-                       /* Refresh recovery after add_free_record above. */
-                       recovery = alloc_recovery(tdb, &recovery_size);
-                       if (TDB_PTR_IS_ERR(recovery))
-                               return TDB_PTR_ERR(recovery);
-               }
-
-               recovery_off = create_recovery_area(tdb, recovery_size,
-                                                   recovery);
-               if (TDB_OFF_IS_ERR(recovery_off)) {
-                       free(recovery);
-                       return TDB_OFF_TO_ERR(recovery_off);
-               }
-       }
-
-       /* Now we know size, convert rec header. */
-       recovery->magic = TDB_RECOVERY_INVALID_MAGIC;
-       recovery->len = recovery_size;
-       recovery->eof = old_map_size;
-       tdb_convert(tdb, recovery, sizeof(*recovery));
-
-       /* write the recovery data to the recovery area */
-       ecode = methods->twrite(tdb, recovery_off, recovery, recovery_size);
-       if (ecode != TDB_SUCCESS) {
-               free(recovery);
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_setup_recovery:"
-                                 " failed to write recovery data");
-       }
-       transaction_write_existing(tdb, recovery_off, recovery, recovery_size);
-
-       free(recovery);
-
-       /* as we don't have ordered writes, we have to sync the recovery
-          data before we update the magic to indicate that the recovery
-          data is present */
-       ecode = transaction_sync(tdb, recovery_off, recovery_size);
-       if (ecode != TDB_SUCCESS)
-               return ecode;
-
-       magic = TDB_RECOVERY_MAGIC;
-       tdb_convert(tdb, &magic, sizeof(magic));
-
-       tdb->tdb2.transaction->magic_offset
-               = recovery_off + offsetof(struct tdb_recovery_record, magic);
-
-       ecode = methods->twrite(tdb, tdb->tdb2.transaction->magic_offset,
-                               &magic, sizeof(magic));
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_setup_recovery:"
-                                 " failed to write recovery magic");
-       }
-       transaction_write_existing(tdb, tdb->tdb2.transaction->magic_offset,
-                                  &magic, sizeof(magic));
-
-       /* ensure the recovery magic marker is on disk */
-       return transaction_sync(tdb, tdb->tdb2.transaction->magic_offset,
-                               sizeof(magic));
-}
-
-static enum TDB_ERROR _tdb_transaction_prepare_commit(struct tdb_context *tdb)
-{
-       const struct tdb_methods *methods;
-       enum TDB_ERROR ecode;
-
-       if (tdb->tdb2.transaction == NULL) {
-               return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                 "tdb_transaction_prepare_commit:"
-                                 " no transaction");
-       }
-
-       if (tdb->tdb2.transaction->prepared) {
-               _tdb_transaction_cancel(tdb);
-               return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                 "tdb_transaction_prepare_commit:"
-                                 " transaction already prepared");
-       }
-
-       if (tdb->tdb2.transaction->transaction_error) {
-               _tdb_transaction_cancel(tdb);
-               return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_ERROR,
-                                 "tdb_transaction_prepare_commit:"
-                                 " transaction error pending");
-       }
-
-
-       if (tdb->tdb2.transaction->nesting != 0) {
-               return TDB_SUCCESS;
-       }
-
-       /* check for a null transaction */
-       if (tdb->tdb2.transaction->blocks == NULL) {
-               return TDB_SUCCESS;
-       }
-
-       methods = tdb->tdb2.transaction->io_methods;
-
-       /* upgrade the main transaction lock region to a write lock */
-       ecode = tdb_allrecord_upgrade(tdb, TDB_HASH_LOCK_START);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* get the open lock - this prevents new users attaching to the database
-          during the commit */
-       ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
-       if (ecode != TDB_SUCCESS) {
-               return ecode;
-       }
-
-       /* Since we have whole db locked, we don't need the expansion lock. */
-       if (!(tdb->flags & TDB_NOSYNC)) {
-               /* Sets up tdb->tdb2.transaction->recovery and
-                * tdb->tdb2.transaction->magic_offset. */
-               ecode = transaction_setup_recovery(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-       }
-
-       tdb->tdb2.transaction->prepared = true;
-
-       /* expand the file to the new size if needed */
-       if (tdb->file->map_size != tdb->tdb2.transaction->old_map_size) {
-               tdb_len_t add;
-
-               add = tdb->file->map_size - tdb->tdb2.transaction->old_map_size;
-               /* Restore original map size for tdb_expand_file */
-               tdb->file->map_size = tdb->tdb2.transaction->old_map_size;
-               ecode = methods->expand_file(tdb, add);
-               if (ecode != TDB_SUCCESS) {
-                       return ecode;
-               }
-       }
-
-       /* Keep the open lock until the actual commit */
-       return TDB_SUCCESS;
-}
-
-/*
-   prepare to commit the current transaction
-*/
-enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb)
-{
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_transaction_prepare_commit(tdb) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-       return tdb->last_error = _tdb_transaction_prepare_commit(tdb);
-}
-
-/*
-  commit the current transaction
-*/
-enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb)
-{
-       const struct tdb_methods *methods;
-       int i;
-       enum TDB_ERROR ecode;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_transaction_commit(tdb) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       if (tdb->tdb2.transaction == NULL) {
-               return tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                                   TDB_LOG_USE_ERROR,
-                                                   "tdb_transaction_commit:"
-                                                   " no transaction");
-       }
-
-       tdb_trace(tdb, "tdb_transaction_commit");
-
-       if (tdb->tdb2.transaction->nesting != 0) {
-               tdb->tdb2.transaction->nesting--;
-               return tdb->last_error = TDB_SUCCESS;
-       }
-
-       /* check for a null transaction */
-       if (tdb->tdb2.transaction->blocks == NULL) {
-               _tdb_transaction_cancel(tdb);
-               return tdb->last_error = TDB_SUCCESS;
-       }
-
-       if (!tdb->tdb2.transaction->prepared) {
-               ecode = _tdb_transaction_prepare_commit(tdb);
-               if (ecode != TDB_SUCCESS) {
-                       _tdb_transaction_cancel(tdb);
-                       return tdb->last_error = ecode;
-               }
-       }
-
-       methods = tdb->tdb2.transaction->io_methods;
-
-       /* perform all the writes */
-       for (i=0;i<tdb->tdb2.transaction->num_blocks;i++) {
-               tdb_off_t offset;
-               tdb_len_t length;
-
-               if (tdb->tdb2.transaction->blocks[i] == NULL) {
-                       continue;
-               }
-
-               offset = i * PAGESIZE;
-               length = PAGESIZE;
-               if (i == tdb->tdb2.transaction->num_blocks-1) {
-                       length = tdb->tdb2.transaction->last_block_size;
-               }
-
-               ecode = methods->twrite(tdb, offset,
-                                       tdb->tdb2.transaction->blocks[i], length);
-               if (ecode != TDB_SUCCESS) {
-                       /* we've overwritten part of the data and
-                          possibly expanded the file, so we need to
-                          run the crash recovery code */
-                       tdb->tdb2.io = methods;
-                       tdb_transaction_recover(tdb);
-
-                       _tdb_transaction_cancel(tdb);
-
-                       return tdb->last_error = ecode;
-               }
-               SAFE_FREE(tdb->tdb2.transaction->blocks[i]);
-       }
-
-       SAFE_FREE(tdb->tdb2.transaction->blocks);
-       tdb->tdb2.transaction->num_blocks = 0;
-
-       /* ensure the new data is on disk */
-       ecode = transaction_sync(tdb, 0, tdb->file->map_size);
-       if (ecode != TDB_SUCCESS) {
-               return tdb->last_error = ecode;
-       }
-
-       /*
-         TODO: maybe write to some dummy hdr field, or write to magic
-         offset without mmap, before the last sync, instead of the
-         utime() call
-       */
-
-       /* on some systems (like Linux 2.6.x) changes via mmap/msync
-          don't change the mtime of the file, this means the file may
-          not be backed up (as tdb rounding to block sizes means that
-          file size changes are quite rare too). The following forces
-          mtime changes when a transaction completes */
-#if HAVE_UTIME
-       utime(tdb->name, NULL);
-#endif
-
-       /* use a transaction cancel to free memory and remove the
-          transaction locks: it "restores" map_size, too. */
-       tdb->tdb2.transaction->old_map_size = tdb->file->map_size;
-       _tdb_transaction_cancel(tdb);
-
-       return tdb->last_error = TDB_SUCCESS;
-}
-
-
-/*
-  recover from an aborted transaction. Must be called with exclusive
-  database write access already established (including the open
-  lock to prevent new processes attaching)
-*/
-enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb)
-{
-       tdb_off_t recovery_head, recovery_eof;
-       unsigned char *data, *p;
-       struct tdb_recovery_record rec;
-       enum TDB_ERROR ecode;
-
-       /* find the recovery area */
-       recovery_head = tdb_read_off(tdb, offsetof(struct tdb_header,recovery));
-       if (TDB_OFF_IS_ERR(recovery_head)) {
-               ecode = TDB_OFF_TO_ERR(recovery_head);
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to read recovery head");
-       }
-
-       if (recovery_head == 0) {
-               /* we have never allocated a recovery record */
-               return TDB_SUCCESS;
-       }
-
-       /* read the recovery record */
-       ecode = tdb_read_convert(tdb, recovery_head, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to read recovery record");
-       }
-
-       if (rec.magic != TDB_RECOVERY_MAGIC) {
-               /* there is no valid recovery data */
-               return TDB_SUCCESS;
-       }
-
-       if (tdb->flags & TDB_RDONLY) {
-               return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " attempt to recover read only database");
-       }
-
-       recovery_eof = rec.eof;
-
-       data = (unsigned char *)malloc(rec.len);
-       if (data == NULL) {
-               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to allocate recovery data");
-       }
-
-       /* read the full recovery data */
-       ecode = tdb->tdb2.io->tread(tdb, recovery_head + sizeof(rec), data,
-                                   rec.len);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to read recovery data");
-       }
-
-       /* recover the file data */
-       p = data;
-       while (p+sizeof(tdb_off_t)+sizeof(tdb_len_t) < data + rec.len) {
-               tdb_off_t ofs;
-               tdb_len_t len;
-               tdb_convert(tdb, p, sizeof(ofs) + sizeof(len));
-               memcpy(&ofs, p, sizeof(ofs));
-               memcpy(&len, p + sizeof(ofs), sizeof(len));
-               p += sizeof(ofs) + sizeof(len);
-
-               ecode = tdb->tdb2.io->twrite(tdb, ofs, p, len);
-               if (ecode != TDB_SUCCESS) {
-                       free(data);
-                       return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                         "tdb_transaction_recover:"
-                                         " failed to recover %zu bytes"
-                                         " at offset %zu",
-                                         (size_t)len, (size_t)ofs);
-               }
-               p += len;
-       }
-
-       free(data);
-
-       ecode = transaction_sync(tdb, 0, tdb->file->map_size);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to sync recovery");
-       }
-
-       /* if the recovery area is after the recovered eof then remove it */
-       if (recovery_eof <= recovery_head) {
-               ecode = tdb_write_off(tdb, offsetof(struct tdb_header,
-                                                   recovery),
-                                     0);
-               if (ecode != TDB_SUCCESS) {
-                       return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                         "tdb_transaction_recover:"
-                                         " failed to remove recovery head");
-               }
-       }
-
-       /* remove the recovery magic */
-       ecode = tdb_write_off(tdb,
-                             recovery_head
-                             + offsetof(struct tdb_recovery_record, magic),
-                             TDB_RECOVERY_INVALID_MAGIC);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to remove recovery magic");
-       }
-
-       ecode = transaction_sync(tdb, 0, recovery_eof);
-       if (ecode != TDB_SUCCESS) {
-               return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
-                                 "tdb_transaction_recover:"
-                                 " failed to sync2 recovery");
-       }
-
-       tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
-                  "tdb_transaction_recover: recovered %zu byte database",
-                  (size_t)recovery_eof);
-
-       /* all done */
-       return TDB_SUCCESS;
-}
-
-tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb)
-{
-       tdb_off_t recovery_head;
-       struct tdb_recovery_record rec;
-       enum TDB_ERROR ecode;
-
-       /* find the recovery area */
-       recovery_head = tdb_read_off(tdb, offsetof(struct tdb_header,recovery));
-       if (TDB_OFF_IS_ERR(recovery_head)) {
-               return recovery_head;
-       }
-
-       if (recovery_head == 0) {
-               /* we have never allocated a recovery record */
-               return false;
-       }
-
-       /* read the recovery record */
-       ecode = tdb_read_convert(tdb, recovery_head, &rec, sizeof(rec));
-       if (ecode != TDB_SUCCESS) {
-               return TDB_ERR_TO_OFF(ecode);
-       }
-
-       return (rec.magic == TDB_RECOVERY_MAGIC);
-}
diff --git a/ccan/tdb2/traverse.c b/ccan/tdb2/traverse.c
deleted file mode 100644 (file)
index 0bf4189..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
- /*
-   Trivial Database 2: traverse function.
-   Copyright (C) Rusty Russell 2010
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 3 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-
-int64_t tdb_traverse_(struct tdb_context *tdb,
-                     int (*fn)(struct tdb_context *,
-                               TDB_DATA, TDB_DATA, void *),
-                     void *p)
-{
-       enum TDB_ERROR ecode;
-       struct traverse_info tinfo;
-       struct tdb_data k, d;
-       int64_t count = 0;
-
-       if (tdb->flags & TDB_VERSION1) {
-               count = tdb1_traverse(tdb, fn, p);
-               if (count == -1)
-                       return TDB_ERR_TO_OFF(tdb->last_error);
-               return count;
-       }
-
-       k.dptr = NULL;
-       for (ecode = first_in_hash(tdb, &tinfo, &k, &d.dsize);
-            ecode == TDB_SUCCESS;
-            ecode = next_in_hash(tdb, &tinfo, &k, &d.dsize)) {
-               d.dptr = k.dptr + k.dsize;
-               
-               count++;
-               if (fn && fn(tdb, k, d, p)) {
-                       free(k.dptr);
-                       tdb->last_error = TDB_SUCCESS;
-                       return count;
-               }
-               free(k.dptr);
-       }
-
-       if (ecode != TDB_ERR_NOEXIST) {
-               return TDB_ERR_TO_OFF(tdb->last_error = ecode);
-       }
-       tdb->last_error = TDB_SUCCESS;
-       return count;
-}
-       
-enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key)
-{
-       struct traverse_info tinfo;
-
-       if (tdb->flags & TDB_VERSION1) {
-               tdb->last_error = TDB_SUCCESS;
-               *key = tdb1_firstkey(tdb);
-               /* TDB1 didn't set error for last key. */
-               if (!key->dptr && tdb->last_error == TDB_SUCCESS) {
-                       tdb->last_error = TDB_ERR_NOEXIST;
-               }
-               return tdb->last_error;
-       }
-
-       return tdb->last_error = first_in_hash(tdb, &tinfo, key, NULL);
-}
-
-/* We lock twice, not very efficient.  We could keep last key & tinfo cached. */
-enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key)
-{
-       struct traverse_info tinfo;
-       struct hash_info h;
-       struct tdb_used_record rec;
-
-       if (tdb->flags & TDB_VERSION1) {
-               struct tdb_data last_key = *key;
-               tdb->last_error = TDB_SUCCESS;
-               *key = tdb1_nextkey(tdb, last_key);
-               free(last_key.dptr);
-               /* TDB1 didn't set error for last key. */
-               if (!key->dptr && tdb->last_error == TDB_SUCCESS) {
-                       tdb->last_error = TDB_ERR_NOEXIST;
-               }
-               return tdb->last_error;
-       }
-
-       tinfo.prev = find_and_lock(tdb, *key, F_RDLCK, &h, &rec, &tinfo);
-       free(key->dptr);
-       if (TDB_OFF_IS_ERR(tinfo.prev)) {
-               return tdb->last_error = TDB_OFF_TO_ERR(tinfo.prev);
-       }
-       tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
-
-       return tdb->last_error = next_in_hash(tdb, &tinfo, key, NULL);
-}
-
-static int wipe_one(struct tdb_context *tdb,
-                   TDB_DATA key, TDB_DATA data, enum TDB_ERROR *ecode)
-{
-       *ecode = tdb_delete(tdb, key);
-       return (*ecode != TDB_SUCCESS);
-}
-
-enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb)
-{
-       enum TDB_ERROR ecode;
-       int64_t count;
-
-       if (tdb->flags & TDB_VERSION1) {
-               if (tdb1_wipe_all(tdb) == -1)
-                       return tdb->last_error;
-               return TDB_SUCCESS;
-       }
-
-       ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
-       if (ecode != TDB_SUCCESS)
-               return tdb->last_error = ecode;
-
-       /* FIXME: Be smarter. */
-       count = tdb_traverse(tdb, wipe_one, &ecode);
-       if (count < 0)
-               ecode = TDB_OFF_TO_ERR(count);
-       tdb_allrecord_unlock(tdb, F_WRLCK);
-       return tdb->last_error = ecode;
-}