From: Rusty Russell Date: Mon, 3 Apr 2017 00:08:26 +0000 (+0930) Subject: ccan/ntdb: demote to junkcode. X-Git-Url: http://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=1aab74723e837a0fd8091e264a325bb2cdcdd0fa ccan/ntdb: demote to junkcode. Signed-off-by: Rusty Russell --- diff --git a/ccan/ntdb/ABI/ntdb-0.9.sigs b/ccan/ntdb/ABI/ntdb-0.9.sigs deleted file mode 100644 index 6b12ddbd..00000000 --- a/ccan/ntdb/ABI/ntdb-0.9.sigs +++ /dev/null @@ -1,38 +0,0 @@ -ntdb_add_flag: void (struct ntdb_context *, unsigned int) -ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA) -ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) -ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) -ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA) -ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA) -ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) -ntdb_close: int (struct ntdb_context *) -ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) -ntdb_errorstr: const char *(enum NTDB_ERROR) -ntdb_exists: bool (struct ntdb_context *, NTDB_DATA) -ntdb_fd: int (const struct ntdb_context *) -ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *) -ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) -ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *) -ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *) -ntdb_get_flags: unsigned int (struct ntdb_context *) -ntdb_get_seqnum: int64_t (struct ntdb_context *) -ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *) -ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *) -ntdb_name: const char *(const struct ntdb_context *) -ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) -ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *) -ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) -ntdb_remove_flag: void (struct ntdb_context *, unsigned int) -ntdb_repack: enum NTDB_ERROR (struct ntdb_context *) -ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *) -ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int) -ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **) -ntdb_transaction_cancel: void (struct ntdb_context *) -ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *) -ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *) -ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *) -ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *) -ntdb_unlockall: void (struct ntdb_context *) -ntdb_unlockall_read: void (struct ntdb_context *) -ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type) -ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *) diff --git a/ccan/ntdb/ABI/ntdb-1.0.sigs b/ccan/ntdb/ABI/ntdb-1.0.sigs deleted file mode 100644 index 6b12ddbd..00000000 --- a/ccan/ntdb/ABI/ntdb-1.0.sigs +++ /dev/null @@ -1,38 +0,0 @@ -ntdb_add_flag: void (struct ntdb_context *, unsigned int) -ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA) -ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) -ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) -ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA) -ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA) -ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) -ntdb_close: int (struct ntdb_context *) -ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) -ntdb_errorstr: const char *(enum NTDB_ERROR) -ntdb_exists: bool (struct ntdb_context *, NTDB_DATA) -ntdb_fd: int (const struct ntdb_context *) -ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *) -ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) -ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *) -ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *) -ntdb_get_flags: unsigned int (struct ntdb_context *) -ntdb_get_seqnum: int64_t (struct ntdb_context *) -ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *) -ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *) -ntdb_name: const char *(const struct ntdb_context *) -ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) -ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *) -ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) -ntdb_remove_flag: void (struct ntdb_context *, unsigned int) -ntdb_repack: enum NTDB_ERROR (struct ntdb_context *) -ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *) -ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int) -ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **) -ntdb_transaction_cancel: void (struct ntdb_context *) -ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *) -ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *) -ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *) -ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *) -ntdb_unlockall: void (struct ntdb_context *) -ntdb_unlockall_read: void (struct ntdb_context *) -ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type) -ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *) diff --git a/ccan/ntdb/LICENSE b/ccan/ntdb/LICENSE deleted file mode 120000 index 74550445..00000000 --- a/ccan/ntdb/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../../licenses/LGPL-3 \ No newline at end of file diff --git a/ccan/ntdb/Makefile b/ccan/ntdb/Makefile deleted file mode 100644 index 3ce5fd16..00000000 --- a/ccan/ntdb/Makefile +++ /dev/null @@ -1,80 +0,0 @@ -CC=gcc -CFLAGS=-g -O0 -Wall -W -I../../ -I./ -LIBS= - -LIBNTDB_OBJ = ccan_hash.o ccan_tally.o check.o free.o hash.o io.o lock.o open.o summary.o ntdb.o transaction.o traverse.o - -all: ntdbtorture ntdbtool ntdbdump ntdbrestore ntdbbackup - -ntdbtorture: tools/ntdbtorture.c libntdb.a - $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) - -ntdbtool: tools/ntdbtool.c libntdb.a - $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) - -ntdbdump: tools/ntdbdump.c libntdb.a - $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) - -ntdbrestore: tools/ntdbrestore.c libntdb.a - $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) - -ntdbbackup: tools/ntdbbackup.c libntdb.a - $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) - -libntdb.a: $(LIBNTDB_OBJ) - @echo Creating library $@ - ar r libntdb.a $(LIBNTDB_OBJ) - ranlib libntdb.a - -check.o: check.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c check.c -o $@ - -free.o: free.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c free.c -o $@ - -hash.o: hash.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c hash.c -o $@ - -io.o: io.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c io.c -o $@ - -lock.o: lock.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c lock.c -o $@ - -open.o: open.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c open.c -o $@ - -summary.o: summary.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c summary.c -o $@ - -ntdb.o: ntdb.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c ntdb.c -o $@ - -transaction.o: transaction.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c transaction.c -o $@ - -traverse.o: traverse.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c traverse.c -o $@ - -ccan_hash.o: ../hash/hash.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c ../hash/hash.c -o $@ - -ccan_tally.o: ../tally/tally.c - @echo Compiling $@ - $(CC) $(CFLAGS) -c ../tally/tally.c -o $@ - -clean: - rm -f *.o - rm -f *.a - rm -f tools/ntdbtorture tools/ntdbtool tools/ntdbdump tools/ntdbrestore tools/ntdbbackup diff --git a/ccan/ntdb/_info b/ccan/ntdb/_info deleted file mode 100644 index 5aedb81a..00000000 --- a/ccan/ntdb/_info +++ /dev/null @@ -1,72 +0,0 @@ -#include "config.h" -#include -#include - -/** - * ntdb - Next Generation Trivial Database - * - * This package provides an experimental persistent keyword/data store. - * Its main advantage over tdb is that it's 64-bit. - * - * Example: - * #include - * #include - * #include - * #include - * - * int main(int argc, char *argv[]) - * { - * NTDB_DATA key = ntdb_mkdata("key", 3); - * NTDB_DATA val = ntdb_mkdata("val", 3); - * struct ntdb_context *ntdb; - * - * ntdb = ntdb_open("example.ntdb", NTDB_DEFAULT, - * O_RDWR | O_CREAT | O_TRUNC, 0600, NULL); - * if (ntdb == NULL) - * errx(1, "failed to open database file"); - * - * ntdb_store(ntdb, key, val, NTDB_INSERT); - * - * ntdb_close(ntdb); - * - * return 0; - * } - * - * License: LGPL (v3 or any later version) - * Authors: Rusty Russell - * Andrew Tridgell - * Jeremy Allison - * Jelmer Vernooij - * Volker Lendecke - * Andrew Esh - * Simon McVittie - * Tim Potter - * Maintainer: Rusty Russell - */ -int main(int argc, char *argv[]) -{ - if (argc != 2) - return 1; - - if (strcmp(argv[1], "depends") == 0) { - printf("ccan/asearch\n"); - printf("ccan/build_assert\n"); - printf("ccan/cast\n"); - printf("ccan/compiler\n"); - printf("ccan/endian\n"); - printf("ccan/hash\n"); - printf("ccan/ilog\n"); - printf("ccan/likely\n"); - printf("ccan/tally\n"); - printf("ccan/typesafe_cb\n"); - return 0; - } - - if (strcmp(argv[1], "testdepends") == 0) { - printf("ccan/failtest\n"); - printf("ccan/err\n"); - return 0; - } - - return 1; -} diff --git a/ccan/ntdb/check.c b/ccan/ntdb/check.c deleted file mode 100644 index f2423945..00000000 --- a/ccan/ntdb/check.c +++ /dev/null @@ -1,726 +0,0 @@ - /* - Trivial Database 2: free list/block handling - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include -#include - -/* We keep an ordered array of offsets. */ -static bool append(struct ntdb_context *ntdb, - ntdb_off_t **arr, size_t *num, ntdb_off_t off) -{ - ntdb_off_t *new; - - if (*num == 0) { - new = ntdb->alloc_fn(ntdb, sizeof(ntdb_off_t), ntdb->alloc_data); - } else { - new = ntdb->expand_fn(*arr, (*num + 1) * sizeof(ntdb_off_t), - ntdb->alloc_data); - } - if (!new) - return false; - new[(*num)++] = off; - *arr = new; - return true; -} - -static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, - ntdb_off_t *recovery, - uint64_t *features, - size_t *num_capabilities) -{ - uint64_t hash_test; - struct ntdb_header hdr; - enum NTDB_ERROR ecode; - ntdb_off_t off, next; - - ecode = ntdb_read_convert(ntdb, 0, &hdr, sizeof(hdr)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - /* magic food should not be converted, so convert back. */ - ntdb_convert(ntdb, hdr.magic_food, sizeof(hdr.magic_food)); - - hash_test = NTDB_HASH_MAGIC; - hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test)); - if (hdr.hash_test != hash_test) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "check: hash test %llu should be %llu", - (long long)hdr.hash_test, - (long long)hash_test); - } - - if (strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "check: bad magic '%.*s'", - (unsigned)sizeof(hdr.magic_food), - hdr.magic_food); - } - - /* Features which are used must be a subset of features offered. */ - if (hdr.features_used & ~hdr.features_offered) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "check: features used (0x%llx) which" - " are not offered (0x%llx)", - (long long)hdr.features_used, - (long long)hdr.features_offered); - } - - *features = hdr.features_offered; - *recovery = hdr.recovery; - if (*recovery) { - if (*recovery < sizeof(hdr) - || *recovery > ntdb->file->map_size) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check:" - " invalid recovery offset %zu", - (size_t)*recovery); - } - } - - for (off = hdr.capabilities; off && ecode == NTDB_SUCCESS; off = next) { - const struct ntdb_capability *cap; - enum NTDB_ERROR e; - - cap = ntdb_access_read(ntdb, off, sizeof(*cap), true); - if (NTDB_PTR_IS_ERR(cap)) { - return NTDB_PTR_ERR(cap); - } - - /* All capabilities are unknown. */ - e = unknown_capability(ntdb, "ntdb_check", cap->type); - next = cap->next; - ntdb_access_release(ntdb, cap); - if (e) - return e; - (*num_capabilities)++; - } - - /* Don't check reserved: they *can* be used later. */ - return NTDB_SUCCESS; -} - -static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b, void *ctx) -{ - /* Can overflow an int. */ - return *a > *b ? 1 - : *a < *b ? -1 - : 0; -} - -static enum NTDB_ERROR check_entry(struct ntdb_context *ntdb, - ntdb_off_t off_and_hash, - ntdb_len_t bucket, - ntdb_off_t used[], - size_t num_used, - size_t *num_found, - enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, - void *), - void *data) -{ - enum NTDB_ERROR ecode; - const struct ntdb_used_record *r; - const unsigned char *kptr; - ntdb_len_t klen, dlen; - uint32_t hash; - ntdb_off_t off = off_and_hash & NTDB_OFF_MASK; - ntdb_off_t *p; - - /* Empty bucket is fine. */ - if (!off_and_hash) { - return NTDB_SUCCESS; - } - - /* This can't point to a chain, we handled those at toplevel. */ - if (off_and_hash & (1ULL << NTDB_OFF_CHAIN_BIT)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Invalid chain bit in offset " - " %llu", (long long)off_and_hash); - } - - p = asearch(&off, used, num_used, off_cmp, NULL); - if (!p) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Invalid offset" - " %llu in hash", (long long)off); - } - /* Mark it invalid. */ - *p ^= 1; - (*num_found)++; - - r = ntdb_access_read(ntdb, off, sizeof(*r), true); - if (NTDB_PTR_IS_ERR(r)) { - return NTDB_PTR_ERR(r); - } - klen = rec_key_length(r); - dlen = rec_data_length(r); - ntdb_access_release(ntdb, r); - - kptr = ntdb_access_read(ntdb, off + sizeof(*r), klen + dlen, false); - if (NTDB_PTR_IS_ERR(kptr)) { - return NTDB_PTR_ERR(kptr); - } - - hash = ntdb_hash(ntdb, kptr, klen); - - /* Are we in the right chain? */ - if (bits_from(hash, 0, ntdb->hash_bits) != bucket) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: Bad bucket %u vs %llu", - bits_from(hash, 0, ntdb->hash_bits), - (long long)bucket); - /* Next 8 bits should be the same as top bits of bucket. */ - } else if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL) - != bits_from(off_and_hash, 64-NTDB_OFF_UPPER_STEAL, - NTDB_OFF_UPPER_STEAL)) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: Bad hash bits %llu vs %llu", - (long long)off_and_hash, - (long long)hash); - } else if (check) { - NTDB_DATA k, d; - - k = ntdb_mkdata(kptr, klen); - d = ntdb_mkdata(kptr + klen, dlen); - ecode = check(k, d, data); - } else { - ecode = NTDB_SUCCESS; - } - ntdb_access_release(ntdb, kptr); - - return ecode; -} - -static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb, - ntdb_off_t off, - ntdb_len_t bucket, - ntdb_off_t used[], - size_t num_used, - size_t *num_found, - enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, - void *), - void *data) -{ - struct ntdb_used_record rec; - enum NTDB_ERROR ecode; - const ntdb_off_t *entries; - ntdb_len_t i, num; - - /* This is a used entry. */ - (*num_found)++; - - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash chain magic %llu", - (long long)rec_magic(&rec)); - } - - if (rec_data_length(&rec) % sizeof(ntdb_off_t)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash chain data length %llu", - (long long)rec_data_length(&rec)); - } - - if (rec_key_length(&rec) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash chain key length %llu", - (long long)rec_key_length(&rec)); - } - - off += sizeof(rec); - num = rec_data_length(&rec) / sizeof(ntdb_off_t); - entries = ntdb_access_read(ntdb, off, rec_data_length(&rec), true); - if (NTDB_PTR_IS_ERR(entries)) { - return NTDB_PTR_ERR(entries); - } - - /* Check each non-deleted entry in chain. */ - for (i = 0; i < num; i++) { - ecode = check_entry(ntdb, entries[i], bucket, - used, num_used, num_found, check, data); - if (ecode) { - break; - } - } - - ntdb_access_release(ntdb, entries); - return ecode; -} - -static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb, - ntdb_off_t used[], - size_t num_used, - size_t num_other_used, - enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, - void *), - void *data) -{ - enum NTDB_ERROR ecode; - struct ntdb_used_record rec; - const ntdb_off_t *entries; - ntdb_len_t i; - /* Free tables and capabilities also show up as used, as do we. */ - size_t num_found = num_other_used + 1; - - ecode = ntdb_read_convert(ntdb, NTDB_HASH_OFFSET, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash table magic %llu", - (long long)rec_magic(&rec)); - } - - if (rec_data_length(&rec) != (sizeof(ntdb_off_t) << ntdb->hash_bits)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash table data length %llu", - (long long)rec_data_length(&rec)); - } - - if (rec_key_length(&rec) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash table key length %llu", - (long long)rec_key_length(&rec)); - } - - entries = ntdb_access_read(ntdb, NTDB_HASH_OFFSET + sizeof(rec), - rec_data_length(&rec), true); - if (NTDB_PTR_IS_ERR(entries)) { - return NTDB_PTR_ERR(entries); - } - - for (i = 0; i < (1 << ntdb->hash_bits); i++) { - ntdb_off_t off = entries[i] & NTDB_OFF_MASK; - if (entries[i] & (1ULL << NTDB_OFF_CHAIN_BIT)) { - ecode = check_hash_chain(ntdb, off, i, - used, num_used, &num_found, - check, data); - } else { - ecode = check_entry(ntdb, entries[i], i, - used, num_used, &num_found, - check, data); - } - if (ecode) { - break; - } - } - ntdb_access_release(ntdb, entries); - - if (ecode == NTDB_SUCCESS && num_found != num_used) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Not all entries are in hash"); - } - return ecode; -} - -static enum NTDB_ERROR check_free(struct ntdb_context *ntdb, - ntdb_off_t off, - const struct ntdb_free_record *frec, - ntdb_off_t prev, unsigned int ftable, - unsigned int bucket) -{ - enum NTDB_ERROR ecode; - - if (frec_magic(frec) != NTDB_FREE_MAGIC) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: offset %llu bad magic 0x%llx", - (long long)off, - (long long)frec->magic_and_prev); - } - if (frec_ftable(frec) != ftable) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: offset %llu bad freetable %u", - (long long)off, frec_ftable(frec)); - - } - - ecode = ntdb_oob(ntdb, off, - frec_len(frec) + sizeof(struct ntdb_used_record), - false); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - if (size_to_bucket(frec_len(frec)) != bucket) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: offset %llu in wrong bucket" - " (%u vs %u)", - (long long)off, - bucket, size_to_bucket(frec_len(frec))); - } - if (prev && prev != frec_prev(frec)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: offset %llu bad prev" - " (%llu vs %llu)", - (long long)off, - (long long)prev, (long long)frec_len(frec)); - } - return NTDB_SUCCESS; -} - -static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb, - ntdb_off_t ftable_off, - unsigned ftable_num, - ntdb_off_t fr[], - size_t num_free, - size_t *num_found) -{ - struct ntdb_freetable ft; - ntdb_off_t h; - unsigned int i; - enum NTDB_ERROR ecode; - - ecode = ntdb_read_convert(ntdb, ftable_off, &ft, sizeof(ft)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC - || rec_key_length(&ft.hdr) != 0 - || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Invalid header on free table"); - } - - for (i = 0; i < NTDB_FREE_BUCKETS; i++) { - ntdb_off_t off, prev = 0, *p, first = 0; - struct ntdb_free_record f; - - h = bucket_off(ftable_off, i); - for (off = ntdb_read_off(ntdb, h); off; off = f.next) { - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - if (!first) { - off &= NTDB_OFF_MASK; - first = off; - } - ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - ecode = check_free(ntdb, off, &f, prev, ftable_num, i); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* FIXME: Check hash bits */ - p = asearch(&off, fr, num_free, off_cmp, NULL); - if (!p) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: Invalid offset" - " %llu in free table", - (long long)off); - } - /* Mark it invalid. */ - *p ^= 1; - (*num_found)++; - prev = off; - } - - if (first) { - /* Now we can check first back pointer. */ - ecode = ntdb_read_convert(ntdb, first, &f, sizeof(f)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - ecode = check_free(ntdb, first, &f, prev, ftable_num, i); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - } - } - return NTDB_SUCCESS; -} - -/* Slow, but should be very rare. */ -ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off) -{ - size_t len; - enum NTDB_ERROR ecode; - - for (len = 0; off + len < ntdb->file->map_size; len++) { - char c; - ecode = ntdb->io->tread(ntdb, off, &c, 1); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - if (c != 0 && c != 0x43) - break; - } - return len; -} - -static enum NTDB_ERROR check_linear(struct ntdb_context *ntdb, - ntdb_off_t **used, size_t *num_used, - ntdb_off_t **fr, size_t *num_free, - uint64_t features, ntdb_off_t recovery) -{ - ntdb_off_t off; - ntdb_len_t len; - enum NTDB_ERROR ecode; - bool found_recovery = false; - - for (off = sizeof(struct ntdb_header); - off < ntdb->file->map_size; - off += len) { - union { - struct ntdb_used_record u; - struct ntdb_free_record f; - struct ntdb_recovery_record r; - } rec; - /* r is larger: only get that if we need to. */ - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.f)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* If we crash after ftruncate, we can get zeroes or fill. */ - if (rec.r.magic == NTDB_RECOVERY_INVALID_MAGIC - || rec.r.magic == 0x4343434343434343ULL) { - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - if (recovery == off) { - found_recovery = true; - len = sizeof(rec.r) + rec.r.max_len; - } else { - len = dead_space(ntdb, off); - if (NTDB_OFF_IS_ERR(len)) { - return NTDB_OFF_TO_ERR(len); - } - if (len < sizeof(rec.r)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: invalid" - " dead space at %zu", - (size_t)off); - } - - ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, - "Dead space at %zu-%zu (of %zu)", - (size_t)off, (size_t)(off + len), - (size_t)ntdb->file->map_size); - } - } else if (rec.r.magic == NTDB_RECOVERY_MAGIC) { - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - if (recovery != off) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: unexpected" - " recovery record at offset" - " %zu", - (size_t)off); - } - if (rec.r.len > rec.r.max_len) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: invalid recovery" - " length %zu", - (size_t)rec.r.len); - } - if (rec.r.eof > ntdb->file->map_size) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: invalid old EOF" - " %zu", (size_t)rec.r.eof); - } - found_recovery = true; - len = sizeof(rec.r) + rec.r.max_len; - } else if (frec_magic(&rec.f) == NTDB_FREE_MAGIC) { - len = sizeof(rec.u) + frec_len(&rec.f); - if (off + len > ntdb->file->map_size) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: free overlength" - " %llu at offset %llu", - (long long)len, - (long long)off); - } - /* This record should be in free lists. */ - if (frec_ftable(&rec.f) != NTDB_FTABLE_NONE - && !append(ntdb, fr, num_free, off)) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, - NTDB_LOG_ERROR, - "ntdb_check: tracking %zu'th" - " free record.", *num_free); - } - } else if (rec_magic(&rec.u) == NTDB_USED_MAGIC - || rec_magic(&rec.u) == NTDB_CHAIN_MAGIC - || rec_magic(&rec.u) == NTDB_HTABLE_MAGIC - || rec_magic(&rec.u) == NTDB_FTABLE_MAGIC - || rec_magic(&rec.u) == NTDB_CAP_MAGIC) { - uint64_t klen, dlen, extra; - - /* This record is used! */ - if (!append(ntdb, used, num_used, off)) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, - NTDB_LOG_ERROR, - "ntdb_check: tracking %zu'th" - " used record.", *num_used); - } - - klen = rec_key_length(&rec.u); - dlen = rec_data_length(&rec.u); - extra = rec_extra_padding(&rec.u); - - len = sizeof(rec.u) + klen + dlen + extra; - if (off + len > ntdb->file->map_size) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: used overlength" - " %llu at offset %llu", - (long long)len, - (long long)off); - } - - if (len < sizeof(rec.f)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: too short record" - " %llu at %llu", - (long long)len, - (long long)off); - } - - /* Check that records have correct 0 at end (but may - * not in future). */ - if (extra && !features - && rec_magic(&rec.u) != NTDB_CAP_MAGIC) { - const char *p; - char c; - p = ntdb_access_read(ntdb, off + sizeof(rec.u) - + klen + dlen, 1, false); - if (NTDB_PTR_IS_ERR(p)) - return NTDB_PTR_ERR(p); - c = *p; - ntdb_access_release(ntdb, p); - - if (c != '\0') { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check:" - " non-zero extra" - " at %llu", - (long long)off); - } - } - } else { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: Bad magic 0x%llx" - " at offset %zu", - (long long)rec_magic(&rec.u), - (size_t)off); - } - } - - /* We must have found recovery area if there was one. */ - if (recovery != 0 && !found_recovery) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: expected a recovery area at %zu", - (size_t)recovery); - } - - return NTDB_SUCCESS; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb, - enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *), - void *data) -{ - ntdb_off_t *fr = NULL, *used = NULL; - ntdb_off_t ft = 0, recovery = 0; - size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0, - num_capabilities = 0; - uint64_t features = 0; - enum NTDB_ERROR ecode; - - if (ntdb->flags & NTDB_CANT_CHECK) { - return ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, - "ntdb_check: database has unknown capability," - " cannot check."); - } - - ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - ecode = ntdb_lock_expand(ntdb, F_RDLCK); - if (ecode != NTDB_SUCCESS) { - ntdb_allrecord_unlock(ntdb, F_RDLCK); - return ecode; - } - - ecode = check_header(ntdb, &recovery, &features, &num_capabilities); - if (ecode != NTDB_SUCCESS) - goto out; - - /* First we do a linear scan, checking all records. */ - ecode = check_linear(ntdb, &used, &num_used, &fr, &num_free, features, - recovery); - if (ecode != NTDB_SUCCESS) - goto out; - - for (ft = first_ftable(ntdb); ft; ft = next_ftable(ntdb, ft)) { - if (NTDB_OFF_IS_ERR(ft)) { - ecode = NTDB_OFF_TO_ERR(ft); - goto out; - } - ecode = check_free_table(ntdb, ft, num_ftables, fr, num_free, - &num_found); - if (ecode != NTDB_SUCCESS) - goto out; - num_ftables++; - } - - /* FIXME: Check key uniqueness? */ - ecode = check_hash(ntdb, used, num_used, num_ftables + num_capabilities, - check, data); - if (ecode != NTDB_SUCCESS) - goto out; - - if (num_found != num_free) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Not all entries are in" - " free table"); - } - -out: - ntdb_allrecord_unlock(ntdb, F_RDLCK); - ntdb_unlock_expand(ntdb, F_RDLCK); - ntdb->free_fn(fr, ntdb->alloc_data); - ntdb->free_fn(used, ntdb->alloc_data); - return ecode; -} diff --git a/ccan/ntdb/doc/TDB_porting.txt b/ccan/ntdb/doc/TDB_porting.txt deleted file mode 100644 index 5daf94b7..00000000 --- a/ccan/ntdb/doc/TDB_porting.txt +++ /dev/null @@ -1,483 +0,0 @@ -Interface differences between TDB and NTDB. - -- ntdb shares 'struct TDB_DATA' with tdb, but TDB defines the TDB_DATA - typedef, whereas ntdb defines NTDB_DATA (ie. both are compatible). - If you include both ntdb.h and tdb.h, #include tdb.h first, - otherwise you'll get a compile error when tdb.h re-defined struct - TDB_DATA. - - Example: - #include - #include - -- ntdb functions return NTDB_SUCCESS (ie 0) on success, and a negative - error on failure, whereas tdb functions returned 0 on success, and - -1 on failure. tdb then used tdb_error() to determine the error; - this API is nasty if we ever want to support threads, so is not supported. - - Example: - #include - #include - - void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d) - { - if (tdb_store(tdb, key, d) == -1) { - printf("store failed: %s\n", tdb_errorstr(tdb)); - } - } - - void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d) - { - enum NTDB_ERROR e; - - e = ntdb_store(ntdb, key, d); - if (e) { - printf("store failed: %s\n", ntdb_errorstr(e)); - } - } - -- ntdb's ntdb_fetch() returns an error, tdb's returned the data directly - (or tdb_null, and you were supposed to check tdb_error() to find out why). - - Example: - #include - #include - - void tdb_example(struct tdb_context *tdb, TDB_DATA key) - { - TDB_DATA data; - - data = tdb_fetch(tdb, key); - if (!data.dptr) { - printf("fetch failed: %s\n", tdb_errorstr(tdb)); - } - } - - void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key) - { - NTDB_DATA data; - enum NTDB_ERROR e; - - e = ntdb_fetch(ntdb, key, &data); - if (e) { - printf("fetch failed: %s\n", ntdb_errorstr(e)); - } - } - -- ntdb's ntdb_nextkey() frees the old key's dptr, in tdb you needed to do - this manually. - - Example: - #include - #include - - void tdb_example(struct tdb_context *tdb) - { - TDB_DATA key, next, data; - - for (key = tdb_firstkey(tdb); key.dptr; key = next) { - printf("Got key!\n"); - next = tdb_nextkey(tdb, key); - free(key.dptr); - } - } - - - void ntdb_example(struct ntdb_context *ntdb) - { - NTDB_DATA k, data; - enum NTDB_ERROR e; - - for (e = ntdb_firstkey(ntdb,&k); !e; e = ntdb_nextkey(ntdb,&k)) - printf("Got key!\n"); - } - -- Unlike tdb_open/tdb_open_ex, ntdb_open does not allow NULL names, - even for NTDB_INTERNAL dbs, and thus ntdb_name() never returns NULL. - - Example: - #include - #include - - struct tdb_context *tdb_example(void) - { - return tdb_open(NULL, 0, TDB_INTERNAL, O_RDWR, 0); - } - - struct ntdb_context *ntdb_example(void) - { - return ntdb_open("example", NTDB_INTERNAL, O_RDWR, 0); - } - -- ntdb uses a linked list of attribute structures to implement logging and - alternate hashes. tdb used tdb_open_ex, which was not extensible. - - Example: - #include - #include - - /* Custom hash function */ - static unsigned int my_tdb_hash_func(TDB_DATA *key) - { - return key->dsize; - } - - struct tdb_context *tdb_example(void) - { - return tdb_open_ex("example.tdb", 0, TDB_DEFAULT, - O_CREAT|O_RDWR, 0600, NULL, my_hash_func); - } - - /* Custom hash function */ - static unsigned int my_ntdb_hash_func(const void *key, size_t len, - uint32_t seed, void *data) - { - return len; - } - - struct ntdb_context *ntdb_example(void) - { - union ntdb_attribute hash; - - hash.base.attr = NTDB_ATTRIBUTE_HASH; - hash.base.next = NULL; - hash.hash.fn = my_ntdb_hash_func; - return ntdb_open("example.ntdb", NTDB_DEFAULT, - O_CREAT|O_RDWR, 0600, &hash); - } - -- tdb's tdb_open/tdb_open_ex took an explicit hash size, defaulting to - 131. ntdb's uses an attribute for this, defaulting to 8192. - - Example: - #include - #include - - struct tdb_context *tdb_example(void) - { - return tdb_open("example.tdb", 10007, TDB_DEFAULT, - O_CREAT|O_RDWR, 0600); - } - - struct ntdb_context *ntdb_example(void) - { - union ntdb_attribute hashsize; - - hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE; - hashsize.base.next = NULL; - hashsize.hashsize.size = 16384; - return ntdb_open("example.ntdb", NTDB_DEFAULT, - O_CREAT|O_RDWR, 0600, &hashsize); - } - -- ntdb's log function is simpler than tdb's log function. The string - is already formatted, is not terminated by a '\n', and it takes an - enum ntdb_log_level not a tdb_debug_level, and which has only three - values: NTDB_LOG_ERROR, NTDB_LOG_USE_ERROR and NTDB_LOG_WARNING. - - #include - #include - - static void tdb_log(struct tdb_context *tdb, - enum tdb_debug_level level, const char *fmt, ...) - { - va_list ap; - const char *name; - - switch (level) { - case TDB_DEBUG_FATAL: - fprintf(stderr, "FATAL: "); - break; - case TDB_DEBUG_ERROR: - fprintf(stderr, "ERROR: "); - break; - case TDB_DEBUG_WARNING: - fprintf(stderr, "WARNING: "); - break; - case TDB_DEBUG_TRACE: - /* Don't print out tracing. */ - return; - } - - name = tdb_name(tdb); - if (!name) { - name = "unnamed"; - } - - fprintf(stderr, "tdb(%s):", name); - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - } - - struct tdb_context *tdb_example(void) - { - struct tdb_logging_context lctx; - - lctx.log_fn = tdb_log; - return tdb_open_ex("example.tdb", 0, TDB_DEFAULT, - O_CREAT|O_RDWR, 0600, &lctx, NULL); - } - - static void ntdb_log(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) - { - switch (level) { - case NTDB_LOG_ERROR: - fprintf(stderr, "ERROR: "); - break; - case NTDB_LOG_USE_ERROR: - /* We made a mistake, so abort. */ - abort(); - break; - case NTDB_LOG_WARNING: - fprintf(stderr, "WARNING: "); - break; - } - - fprintf(stderr, "ntdb(%s):%s:%s\n", - ntdb_name(ntdb), ntdb_errorstr(ecode), message); - } - - struct ntdb_context *ntdb_example(void) - { - union ntdb_attribute log; - - log.base.attr = NTDB_ATTRIBUTE_LOG; - log.base.next = NULL; - log.log.fn = ntdb_log; - return ntdb_open("example.ntdb", NTDB_DEFAULT, - O_CREAT|O_RDWR, 0600, &log); - } - -- ntdb provides ntdb_deq() for comparing two NTDB_DATA, and ntdb_mkdata() for - creating an NTDB_DATA. - - #include - #include - - void tdb_example(struct tdb_context *tdb) - { - TDB_DATA data, key; - - key.dsize = strlen("hello"); - key.dptr = "hello"; - data = tdb_fetch(tdb, key); - if (data.dsize == key.dsize - && !memcmp(data.dptr, key.dptr, key.dsize)) - printf("key is same as data\n"); - } - free(data.dptr); - } - - void ntdb_example(struct ntdb_context *ntdb) - { - NTDB_DATA data, key; - - key = ntdb_mkdata("hello", strlen("hello")); - if (ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS) { - if (ntdb_deq(key, data)) { - printf("key is same as data\n"); - } - free(data.dptr); - } - } - -- ntdb's ntdb_parse_record() takes a type-checked callback data - pointer, not a void * (though a void * pointer still works). The - callback function is allowed to do read operations on the database, - or write operations if you first call ntdb_lockall(). TDB's - tdb_parse_record() did not allow any database access within the - callback, could crash if you tried. - - Example: - #include - #include - - static int tdb_parser(TDB_DATA key, TDB_DATA data, void *private_data) - { - TDB_DATA *expect = private_data; - - return data.dsize == expect->dsize - && !memcmp(data.dptr, expect->dptr, data.dsize); - } - - void tdb_example(struct tdb_context *tdb, TDB_DATA key, NTDB_DATA d) - { - switch (tdb_parse_record(tdb, key, tdb_parser, &d)) { - case -1: - printf("parse failed: %s\n", tdb_errorstr(tdb)); - break; - case 0: - printf("data was different!\n"); - break; - case 1: - printf("data was same!\n"); - break; - } - } - - static int ntdb_parser(TDB_DATA key, TDB_DATA data, TDB_DATA *expect) - { - return ntdb_deq(data, *expect); - } - - void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d) - { - enum NTDB_ERROR e; - - e = tdb_parse_record(tdb, key, tdb_parser, &d); - switch (e) { - case 0: - printf("data was different!\n"); - break; - case 1: - printf("data was same!\n"); - break; - default: - printf("parse failed: %s\n", ntdb_errorstr(e)); - break; - } - } - -- ntdb does locking on read-only databases (ie. O_RDONLY passed to ntdb_open). - tdb did not: use the NTDB_NOLOCK flag if you want to suppress locking. - - Example: - #include - #include - - struct tdb_context *tdb_example(void) - { - return tdb_open("example.tdb", 0, TDB_DEFAULT, O_RDONLY, 0); - } - - struct ntdb_context *ntdb_example(void) - { - return ntdb_open("example.ntdb", NTDB_NOLOCK, O_RDONLY, NULL); - } - -- Failure inside a transaction (such as a lock function failing) does - not implicitly cancel the transaction; you still need to call - ntdb_transaction_cancel(). - - #include - #include - - void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d) - { - if (tdb_transaction_start(tdb) == -1) { - printf("transaction failed: %s\n", tdb_errorstr(tdb)); - return; - } - - if (tdb_store(tdb, key, d) == -1) { - printf("store failed: %s\n", tdb_errorstr(tdb)); - return; - } - if (tdb_transaction_commit(tdb) == -1) { - printf("commit failed: %s\n", tdb_errorstr(tdb)); - } - } - - void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d) - { - enum NTDB_ERROR e; - - e = ntdb_transaction_start(ntdb); - if (e) { - printf("transaction failed: %s\n", ntdb_errorstr(e)); - return; - } - - e = ntdb_store(ntdb, key, d); - if (e) { - printf("store failed: %s\n", ntdb_errorstr(e)); - ntdb_transaction_cancel(ntdb); - } - - e = ntdb_transaction_commit(ntdb); - if (e) { - printf("commit failed: %s\n", ntdb_errorstr(e)); - } - } - -- There is no NTDB_CLEAR_IF_FIRST flag; it has severe scalability and - API problems. If necessary, you can emulate this by using the open - hook and placing a 1-byte lock at offset 4. If your program forks - and exits, you will need to place this lock again in the child before - the parent exits. - - Example: - - #include - #include - - struct tdb_context *tdb_example(void) - { - return tdb_open("example.tdb", 0, TDB_CLEAR_IF_FIRST, - O_CREAT|O_RDWR, 0600); - } - - static enum NTDB_ERROR clear_if_first(int fd, void *unused) - { - /* We hold a lock offset 4 always, so we can tell if - * anyone else is. */ - struct flock fl; - - fl.l_type = F_WRLCK; - fl.l_whence = SEEK_SET; - fl.l_start = 4; /* ACTIVE_LOCK */ - fl.l_len = 1; - - if (fcntl(fd, F_SETLK, &fl) == 0) { - /* We must be first ones to open it! Clear it. */ - if (ftruncate(fd, 0) != 0) { - return NTDB_ERR_IO; - } - } - fl.l_type = F_RDLCK; - if (fcntl(fd, F_SETLKW, &fl) != 0) { - return NTDB_ERR_IO; - } - return NTDB_SUCCESS; - } - - struct ntdb_context *ntdb_example(void) - { - union ntdb_attribute open_attr; - - open_attr.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK; - open_attr.openhook.base.next = NULL; - open_attr.openhook.fn = clear_if_first; - - return ntdb_open("example.ntdb", NTDB_DEFAULT, - O_CREAT|O_RDWR, 0600, &open_attr); - } - -- ntdb traversals are not reliable if the database is changed during - the traversal, ie your traversal may not cover all elements, or may - cover elements multiple times. As a special exception, deleting the - current record within ntdb_traverse() is reliable. - -- There is no ntdb_traverse_read, since ntdb_traverse does not hold - a lock across the entire traversal anyway. If you want to make sure - that your traversal function does not write to the database, you can - set and clear the NTDB_RDONLY flag around the traversal. - -- ntdb does not need tdb_reopen() or tdb_reopen_all(). If you call - fork() after during certain operations the child should close the - ntdb, or complete the operations before continuing to use the tdb: - - ntdb_transaction_start(): child must ntdb_transaction_cancel() - ntdb_lockall(): child must call ntdb_unlockall() - ntdb_lockall_read(): child must call ntdb_unlockall_read() - ntdb_chainlock(): child must call ntdb_chainunlock() - ntdb_parse() callback: child must return from ntdb_parse() - -- ntdb will not open a non-ntdb file, even if O_CREAT is specified. tdb - will overwrite an unknown file in that case. diff --git a/ccan/ntdb/doc/design.lyx b/ccan/ntdb/doc/design.lyx deleted file mode 100644 index 5a10ee35..00000000 --- a/ccan/ntdb/doc/design.lyx +++ /dev/null @@ -1,2727 +0,0 @@ -#LyX 2.0 created this file. For more info see http://www.lyx.org/ -\lyxformat 413 -\begin_document -\begin_header -\textclass article -\use_default_options true -\maintain_unincluded_children false -\language english -\language_package default -\inputencoding auto -\fontencoding global -\font_roman default -\font_sans default -\font_typewriter default -\font_default_family default -\use_non_tex_fonts false -\font_sc false -\font_osf false -\font_sf_scale 100 -\font_tt_scale 100 - -\graphics default -\default_output_format default -\output_sync 0 -\bibtex_command default -\index_command default -\paperfontsize default -\use_hyperref false -\papersize default -\use_geometry false -\use_amsmath 1 -\use_esint 1 -\use_mhchem 1 -\use_mathdots 1 -\cite_engine basic -\use_bibtopic false -\use_indices false -\paperorientation portrait -\suppress_date false -\use_refstyle 0 -\index Index -\shortcut idx -\color #008000 -\end_index -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation indent -\paragraph_indentation default -\quotes_language english -\papercolumns 1 -\papersides 1 -\paperpagestyle default -\tracking_changes true -\output_changes true -\html_math_output 0 -\html_css_as_file 0 -\html_be_strict false -\end_header - -\begin_body - -\begin_layout Title -NTDB: Redesigning The Trivial DataBase -\end_layout - -\begin_layout Author -Rusty Russell, IBM Corporation -\end_layout - -\begin_layout Date -19 June 2012 -\end_layout - -\begin_layout Abstract -The Trivial DataBase on-disk format is 32 bits; with usage cases heading - towards the 4G limit, that must change. - This required breakage provides an opportunity to revisit TDB's other design - decisions and reassess them. -\end_layout - -\begin_layout Section -Introduction -\end_layout - -\begin_layout Standard -The Trivial DataBase was originally written by Andrew Tridgell as a simple - key/data pair storage system with the same API as dbm, but allowing multiple - readers and writers while being small enough (< 1000 lines of C) to include - in SAMBA. - The simple design created in 1999 has proven surprisingly robust and performant -, used in Samba versions 3 and 4 as well as numerous other projects. - Its useful life was greatly increased by the (backwards-compatible!) addition - of transaction support in 2005. -\end_layout - -\begin_layout Standard -The wider variety and greater demands of TDB-using code has lead to some - organic growth of the API, as well as some compromises on the implementation. - None of these, by themselves, are seen as show-stoppers, but the cumulative - effect is to a loss of elegance over the initial, simple TDB implementation. - Here is a table of the approximate number of lines of implementation code - and number of API functions at the end of each year: -\end_layout - -\begin_layout Standard -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\begin_layout Plain Layout -Year End -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -API Functions -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -Lines of C Code Implementation -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -1999 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -13 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1195 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2000 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -24 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -1725 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2001 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -32 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2228 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2002 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -35 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2481 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2003 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -35 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2552 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2004 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -40 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2584 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2005 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -38 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -2647 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2006 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -52 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -3754 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2007 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -66 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4398 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2008 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -71 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -4768 -\end_layout - -\end_inset - - - - -\begin_inset Text - -\begin_layout Plain Layout -2009 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -73 -\end_layout - -\end_inset - - -\begin_inset Text - -\begin_layout Plain Layout -5715 -\end_layout - -\end_inset - - - - -\end_inset - - -\end_layout - -\begin_layout Standard -This review is an attempt to catalog and address all the known issues with - TDB and create solutions which address the problems without significantly - increasing complexity; all involved are far too aware of the dangers of - second system syndrome in rewriting a successful project like this. -\end_layout - -\begin_layout Standard -Note: the final decision was to make ntdb a separate library, with a separarate - 'ntdb' namespace so both can potentially be linked together. - This document still refers to -\begin_inset Quotes eld -\end_inset - -tdb -\begin_inset Quotes erd -\end_inset - - everywhere, for simplicity. -\end_layout - -\begin_layout Section -API Issues -\end_layout - -\begin_layout Subsection -tdb_open_ex Is Not Expandable -\end_layout - -\begin_layout Standard -The tdb_open() call was expanded to tdb_open_ex(), which added an optional - hashing function and an optional logging function argument. - Additional arguments to open would require the introduction of a tdb_open_ex2 - call etc. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\begin_inset CommandInset label -LatexCommand label -name "attributes" - -\end_inset - - -\end_layout - -\begin_layout Standard -tdb_open() will take a linked-list of attributes: -\end_layout - -\begin_layout LyX-Code -enum tdb_attribute { -\end_layout - -\begin_layout LyX-Code - TDB_ATTRIBUTE_LOG = 0, -\end_layout - -\begin_layout LyX-Code - TDB_ATTRIBUTE_HASH = 1 -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout LyX-Code -struct tdb_attribute_base { -\end_layout - -\begin_layout LyX-Code - enum tdb_attribute attr; -\end_layout - -\begin_layout LyX-Code - union tdb_attribute *next; -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout LyX-Code -struct tdb_attribute_log { -\end_layout - -\begin_layout LyX-Code - struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ -\end_layout - -\begin_layout LyX-Code - tdb_log_func log_fn; -\end_layout - -\begin_layout LyX-Code - void *log_private; -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout LyX-Code -struct tdb_attribute_hash { -\end_layout - -\begin_layout LyX-Code - struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ -\end_layout - -\begin_layout LyX-Code - tdb_hash_func hash_fn; -\end_layout - -\begin_layout LyX-Code - void *hash_private; -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout LyX-Code -union tdb_attribute { -\end_layout - -\begin_layout LyX-Code - struct tdb_attribute_base base; -\end_layout - -\begin_layout LyX-Code - struct tdb_attribute_log log; -\end_layout - -\begin_layout LyX-Code - struct tdb_attribute_hash hash; -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout Standard -This allows future attributes to be added, even if this expands the size - of the union. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -tdb_traverse Makes Impossible Guarantees -\end_layout - -\begin_layout Standard -tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it - was thought that it was important to guarantee that all records which exist - at the start and end of the traversal would be included, and no record - would be included twice. -\end_layout - -\begin_layout Standard -This adds complexity (see -\begin_inset CommandInset ref -LatexCommand ref -reference "Reliable-Traversal-Adds" - -\end_inset - -) and does not work anyway for records which are altered (in particular, - those which are expanded may be effectively deleted and re-added behind - the traversal). -\end_layout - -\begin_layout Subsubsection -\begin_inset CommandInset label -LatexCommand label -name "traverse-Proposed-Solution" - -\end_inset - -Proposed Solution -\end_layout - -\begin_layout Standard -Abandon the guarantee. - You will see every record if no changes occur during your traversal, otherwise - you will see some subset. - You can prevent changes by using a transaction or the locking API. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. - Delete-during-traverse will still delete every record, too (assuming no - other changes). -\end_layout - -\begin_layout Subsection -Nesting of Transactions Is Fraught -\end_layout - -\begin_layout Standard -TDB has alternated between allowing nested transactions and not allowing - them. - Various paths in the Samba codebase assume that transactions will nest, - and in a sense they can: the operation is only committed to disk when the - outer transaction is committed. - There are two problems, however: -\end_layout - -\begin_layout Enumerate -Canceling the inner transaction will cause the outer transaction commit - to fail, and will not undo any operations since the inner transaction began. - This problem is soluble with some additional internal code. -\end_layout - -\begin_layout Enumerate -An inner transaction commit can be cancelled by the outer transaction. - This is desirable in the way which Samba's database initialization code - uses transactions, but could be a surprise to any users expecting a successful - transaction commit to expose changes to others. -\end_layout - -\begin_layout Standard -The current solution is to specify the behavior at tdb_open(), with the - default currently that nested transactions are allowed. - This flag can also be changed at runtime. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Given the usage patterns, it seems that the -\begin_inset Quotes eld -\end_inset - -least-surprise -\begin_inset Quotes erd -\end_inset - - behavior of disallowing nested transactions should become the default. - Additionally, it seems the outer transaction is the only code which knows - whether inner transactions should be allowed, so a flag to indicate this - could be added to tdb_transaction_start. - However, this behavior can be simulated with a wrapper which uses tdb_add_flags -() and tdb_remove_flags(), so the API should not be expanded for this relatively --obscure case. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete; the nesting flag has been removed. -\end_layout - -\begin_layout Subsection -Incorrect Hash Function is Not Detected -\end_layout - -\begin_layout Standard -tdb_open_ex() allows the calling code to specify a different hash function - to use, but does not check that all other processes accessing this tdb - are using the same hash function. - The result is that records are missing from tdb_fetch(). -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The header should contain an example hash result (eg. - the hash of 0xdeadbeef), and tdb_open_ex() should check that the given - hash function produces the same answer, or fail the tdb_open call. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -tdb_set_max_dead/TDB_VOLATILE Expose Implementation -\end_layout - -\begin_layout Standard -In response to scalability issues with the free list ( -\begin_inset CommandInset ref -LatexCommand ref -reference "TDB-Freelist-Is" - -\end_inset - -) two API workarounds have been incorporated in TDB: tdb_set_max_dead() - and the TDB_VOLATILE flag to tdb_open. - The latter actually calls the former with an argument of -\begin_inset Quotes eld -\end_inset - -5 -\begin_inset Quotes erd -\end_inset - -. -\end_layout - -\begin_layout Standard -This code allows deleted records to accumulate without putting them in the - free list. - On delete we iterate through each chain and free them in a batch if there - are more than max_dead entries. - These are never otherwise recycled except as a side-effect of a tdb_repack. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -With the scalability problems of the freelist solved, this API can be removed. - The TDB_VOLATILE flag may still be useful as a hint that store and delete - of records will be at least as common as fetch in order to allow some internal - tuning, but initially will become a no-op. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. - Unknown flags cause tdb_open() to fail as well, so they can be detected - at runtime. -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "TDB-Files-Cannot" - -\end_inset - -TDB Files Cannot Be Opened Multiple Times In The Same Process -\end_layout - -\begin_layout Standard -No process can open the same TDB twice; we check and disallow it. - This is an unfortunate side-effect of fcntl locks, which operate on a per-file - rather than per-file-descriptor basis, and do not nest. - Thus, closing any file descriptor on a file clears all the locks obtained - by this process, even if they were placed using a different file descriptor! -\end_layout - -\begin_layout Standard -Note that even if this were solved, deadlock could occur if operations were - nested: this is a more manageable programming error in most cases. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -We could lobby POSIX to fix the perverse rules, or at least lobby Linux - to violate them so that the most common implementation does not have this - restriction. - This would be a generally good idea for other fcntl lock users. -\end_layout - -\begin_layout Standard -Samba uses a wrapper which hands out the same tdb_context to multiple callers - if this happens, and does simple reference counting. - We should do this inside the tdb library, which already emulates lock nesting - internally; it would need to recognize when deadlock occurs within a single - process. - This would create a new failure mode for tdb operations (while we currently - handle locking failures, they are impossible in normal use and a process - encountering them can do little but give up). -\end_layout - -\begin_layout Standard -I do not see benefit in an additional tdb_open flag to indicate whether - re-opening is allowed, as though there may be some benefit to adding a - call to detect when a tdb_context is shared, to allow other to create such - an API. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -TDB API Is Not POSIX Thread-safe -\end_layout - -\begin_layout Standard -The TDB API uses an error code which can be queried after an operation to - determine what went wrong. - This programming model does not work with threads, unless specific additional - guarantees are given by the implementation. - In addition, even otherwise-independent threads cannot open the same TDB - (as in -\begin_inset CommandInset ref -LatexCommand ref -reference "TDB-Files-Cannot" - -\end_inset - -). -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Reachitecting the API to include a tdb_errcode pointer would be a great - deal of churn, but fortunately most functions return 0 on success and -1 - on error: we can change these to return 0 on success and a negative error - code on error, and the API remains similar to previous. - The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA - pointer and return an error code. - It is also simpler to have tdb_nextkey replace its key argument in place, - freeing up any old .dptr. -\end_layout - -\begin_layout Standard -Internal locking is required to make sure that fcntl locks do not overlap - between threads, and also that the global list of tdbs is maintained. -\end_layout - -\begin_layout Standard -The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe - version of the library, and otherwise no overhead will exist. - Alternatively, a hooking mechanism similar to that proposed for -\begin_inset CommandInset ref -LatexCommand ref -reference "Proposed-Solution-locking-hook" - -\end_inset - - could be used to enable pthread locking at runtime. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Incomplete; API has been changed but thread safety has not been implemented. -\end_layout - -\begin_layout Subsection -*_nonblock Functions And *_mark Functions Expose Implementation -\end_layout - -\begin_layout Standard -CTDB -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Clustered TDB, see http://ctdb.samba.org -\end_layout - -\end_inset - - wishes to operate on TDB in a non-blocking manner. - This is currently done as follows: -\end_layout - -\begin_layout Enumerate -Call the _nonblock variant of an API function (eg. - tdb_lockall_nonblock). - If this fails: -\end_layout - -\begin_layout Enumerate -Fork a child process, and wait for it to call the normal variant (eg. - tdb_lockall). -\end_layout - -\begin_layout Enumerate -If the child succeeds, call the _mark variant to indicate we already have - the locks (eg. - tdb_lockall_mark). -\end_layout - -\begin_layout Enumerate -Upon completion, tell the child to release the locks (eg. - tdb_unlockall). -\end_layout - -\begin_layout Enumerate -Indicate to tdb that it should consider the locks removed (eg. - tdb_unlockall_mark). -\end_layout - -\begin_layout Standard -There are several issues with this approach. - Firstly, adding two new variants of each function clutters the API for - an obscure use, and so not all functions have three variants. - Secondly, it assumes that all paths of the functions ask for the same locks, - otherwise the parent process will have to get a lock which the child doesn't - have under some circumstances. - I don't believe this is currently the case, but it constrains the implementatio -n. -\end_layout - -\begin_layout Subsubsection -\begin_inset CommandInset label -LatexCommand label -name "Proposed-Solution-locking-hook" - -\end_inset - -Proposed Solution -\end_layout - -\begin_layout Standard -Implement a hook for locking methods, so that the caller can control the - calls to create and remove fcntl locks. - In this scenario, ctdbd would operate as follows: -\end_layout - -\begin_layout Enumerate -Call the normal API function, eg tdb_lockall(). -\end_layout - -\begin_layout Enumerate -When the lock callback comes in, check if the child has the lock. - Initially, this is always false. - If so, return 0. - Otherwise, try to obtain it in non-blocking mode. - If that fails, return EWOULDBLOCK. -\end_layout - -\begin_layout Enumerate -Release locks in the unlock callback as normal. -\end_layout - -\begin_layout Enumerate -If tdb_lockall() fails, see if we recorded a lock failure; if so, call the - child to repeat the operation. -\end_layout - -\begin_layout Enumerate -The child records what locks it obtains, and returns that information to - the parent. -\end_layout - -\begin_layout Enumerate -When the child has succeeded, goto 1. -\end_layout - -\begin_layout Standard -This is flexible enough to handle any potential locking scenario, even when - lock requirements change. - It can be optimized so that the parent does not release locks, just tells - the child which locks it doesn't need to obtain. -\end_layout - -\begin_layout Standard -It also keeps the complexity out of the API, and in ctdbd where it is needed. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -tdb_chainlock Functions Expose Implementation -\end_layout - -\begin_layout Standard -tdb_chainlock locks some number of records, including the record indicated - by the given key. - This gave atomicity guarantees; no-one can start a transaction, alter, - read or delete that key while the lock is held. -\end_layout - -\begin_layout Standard -It also makes the same guarantee for any other key in the chain, which is - an internal implementation detail and potentially a cause for deadlock. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -None. - It would be nice to have an explicit single entry lock which effected no - other keys. - Unfortunately, this won't work for an entry which doesn't exist. - Thus while chainlock may be implemented more efficiently for the existing - case, it will still have overlap issues with the non-existing case. - So it is best to keep the current (lack of) guarantee about which records - will be effected to avoid constraining our implementation. -\end_layout - -\begin_layout Subsection -Signal Handling is Not Race-Free -\end_layout - -\begin_layout Standard -The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate - that the tdb locking code should return with a failure, rather than trying - again when a signal is received (and errno == EAGAIN). - This is usually used to implement timeouts. -\end_layout - -\begin_layout Standard -Unfortunately, this does not work in the case where the signal is received - before the tdb code enters the fcntl() call to place the lock: the code - will sleep within the fcntl() code, unaware that the signal wants it to - exit. - In the case of long timeouts, this does not happen in practice. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The locking hooks proposed in -\begin_inset CommandInset ref -LatexCommand ref -reference "Proposed-Solution-locking-hook" - -\end_inset - - would allow the user to decide on whether to fail the lock acquisition - on a signal. - This allows the caller to choose their own compromise: they could narrow - the race by checking immediately before the fcntl call. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -It may be possible to make this race-free in some implementations by having - the signal handler alter the struct flock to make it invalid. - This will cause the fcntl() lock call to fail with EINVAL if the signal - occurs before the kernel is entered, otherwise EAGAIN. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -The API Uses Gratuitous Typedefs, Capitals -\end_layout - -\begin_layout Standard -typedefs are useful for providing source compatibility when types can differ - across implementations, or arguably in the case of function pointer definitions - which are hard for humans to parse. - Otherwise it is simply obfuscation and pollutes the namespace. -\end_layout - -\begin_layout Standard -Capitalization is usually reserved for compile-time constants and macros. -\end_layout - -\begin_layout Description -TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the - definition isn't visible to the API user anyway. -\end_layout - -\begin_layout Description -TDB_DATA There is no reason to use this over struct TDB_DATA; the struct - needs to be understood by the API user. -\end_layout - -\begin_layout Description -struct -\begin_inset space ~ -\end_inset - -TDB_DATA This would normally be called 'struct tdb_data'. -\end_layout - -\begin_layout Description -enum -\begin_inset space ~ -\end_inset - -TDB_ERROR Similarly, this would normally be enum tdb_error. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -None. - Introducing lower case variants would please pedants like myself, but if - it were done the existing ones should be kept. - There is little point forcing a purely cosmetic change upon tdb users. -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "tdb_log_func-Doesnt-Take" - -\end_inset - -tdb_log_func Doesn't Take The Private Pointer -\end_layout - -\begin_layout Standard -For API compatibility reasons, the logging function needs to call tdb_get_loggin -g_private() to retrieve the pointer registered by the tdb_open_ex for logging. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -It should simply take an extra argument, since we are prepared to break - the API/ABI. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -Various Callback Functions Are Not Typesafe -\end_layout - -\begin_layout Standard -The callback functions in tdb_set_logging_function (after -\begin_inset CommandInset ref -LatexCommand ref -reference "tdb_log_func-Doesnt-Take" - -\end_inset - - is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check - all take void * and must internally convert it to the argument type they - were expecting. -\end_layout - -\begin_layout Standard -If this type changes, the compiler will not produce warnings on the callers, - since it only sees void *. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -With careful use of macros, we can create callback functions which give - a warning when used on gcc and the types of the callback and its private - argument differ. - Unsupported compilers will not give a warning, which is no worse than now. - In addition, the callbacks become clearer, as they need not use void * - for their parameter. -\end_layout - -\begin_layout Standard -See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic -\end_layout - -\begin_layout Standard -The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should - be cleared if the caller discovers it is the only process with the TDB - open. - However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not - be detected, so will have the TDB erased underneath them (usually resulting - in a crash). -\end_layout - -\begin_layout Standard -There is a similar issue on fork(); if the parent exits (or otherwise closes - the tdb) before the child calls tdb_reopen_all() to establish the lock - used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener - at that moment will believe it alone has opened the TDB and will erase - it. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Remove TDB_CLEAR_IF_FIRST. - Other workarounds are possible, but see -\begin_inset CommandInset ref -LatexCommand ref -reference "TDB_CLEAR_IF_FIRST-Imposes-Performance" - -\end_inset - -. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. - An open hook is provided to replicate this functionality if required. -\end_layout - -\begin_layout Subsection -Extending The Header Is Difficult -\end_layout - -\begin_layout Standard -We have reserved (zeroed) words in the TDB header, which can be used for - future features. - If the future features are compulsory, the version number must be updated - to prevent old code from accessing the database. - But if the future feature is optional, we have no way of telling if older - code is accessing the database or not. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The header should contain a -\begin_inset Quotes eld -\end_inset - -format variant -\begin_inset Quotes erd -\end_inset - - value (64-bit). - This is divided into two 32-bit parts: -\end_layout - -\begin_layout Enumerate -The lower part reflects the format variant understood by code accessing - the database. -\end_layout - -\begin_layout Enumerate -The upper part reflects the format variant you must understand to write - to the database (otherwise you can only open for reading). -\end_layout - -\begin_layout Standard -The latter field can only be written at creation time, the former should - be written under the OPEN_LOCK when opening the database for writing, if - the variant of the code is lower than the current lowest variant. -\end_layout - -\begin_layout Standard -This should allow backwards-compatible features to be added, and detection - if older code (which doesn't understand the feature) writes to the database. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -Record Headers Are Not Expandible -\end_layout - -\begin_layout Standard -If we later want to add (say) checksums on keys and data, it would require - another format change, which we'd like to avoid. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -We often have extra padding at the tail of a record. - If we ensure that the first byte (if any) of this padding is zero, we will - have a way for future changes to detect code which doesn't understand a - new format: the new code would write (say) a 1 at the tail, and thus if - there is no tail or the first byte is 0, we would know the extension is - not present on that record. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -TDB Does Not Use Talloc -\end_layout - -\begin_layout Standard -Many users of TDB (particularly Samba) use the talloc allocator, and thus - have to wrap TDB in a talloc context to use it conveniently. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The allocation within TDB is not complicated enough to justify the use of - talloc, and I am reluctant to force another (excellent) library on TDB - users. - Nonetheless a compromise is possible. - An attribute (see -\begin_inset CommandInset ref -LatexCommand ref -reference "attributes" - -\end_inset - -) can be added later to tdb_open() to provide an alternate allocation mechanism, - specifically for talloc but usable by any other allocator (which would - ignore the -\begin_inset Quotes eld -\end_inset - -context -\begin_inset Quotes erd -\end_inset - - argument). -\end_layout - -\begin_layout Standard -This would form a talloc heirarchy as expected, but the caller would still - have to attach a destructor to the tdb context returned from tdb_open to - close it. - All TDB_DATA fields would be children of the tdb_context, and the caller - would still have to manage them (using talloc_free() or talloc_steal()). -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute. -\end_layout - -\begin_layout Section -Performance And Scalability Issues -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "TDB_CLEAR_IF_FIRST-Imposes-Performance" - -\end_inset - -TDB_CLEAR_IF_FIRST Imposes Performance Penalty -\end_layout - -\begin_layout Standard -When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset - 4 (aka. - the ACTIVE_LOCK). - While these locks never conflict in normal tdb usage, they do add substantial - overhead for most fcntl lock implementations when the kernel scans to detect - if a lock conflict exists. - This is often a single linked list, making the time to acquire and release - a fcntl lock O(N) where N is the number of processes with the TDB open, - not the number actually doing work. -\end_layout - -\begin_layout Standard -In a Samba server it is common to have huge numbers of clients sitting idle, - and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag. -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -There is a flag to tdb_reopen_all() which is used for this optimization: - if the parent process will outlive the child, the child does not need the - ACTIVE_LOCK. - This is a workaround for this very performance issue. -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Remove the flag. - It was a neat idea, but even trivial servers tend to know when they are - initializing for the first time and can simply unlink the old tdb at that - point. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -TDB Files Have a 4G Limit -\end_layout - -\begin_layout Standard -This seems to be becoming an issue (so much for -\begin_inset Quotes eld -\end_inset - -trivial -\begin_inset Quotes erd -\end_inset - -!), particularly for ldb. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -A new, incompatible TDB format which uses 64 bit offsets internally rather - than 32 bit as now. - For simplicity of endian conversion (which TDB does on the fly if required), - all values will be 64 bit on disk. - In practice, some upper bits may be used for other purposes, but at least - 56 bits will be available for file offsets. -\end_layout - -\begin_layout Standard -tdb_open() will automatically detect the old version, and even create them - if TDB_VERSION6 is specified to tdb_open. -\end_layout - -\begin_layout Standard -32 bit processes will still be able to access TDBs larger than 4G (assuming - that their off_t allows them to seek to 64 bits), they will gracefully - fall back as they fail to mmap. - This can happen already with large TDBs. -\end_layout - -\begin_layout Standard -Old versions of tdb will fail to open the new TDB files (since 28 August - 2009, commit 398d0c29290: prior to that any unrecognized file format would - be erased and initialized as a fresh tdb!) -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -TDB Records Have a 4G Limit -\end_layout - -\begin_layout Standard -This has not been a reported problem, and the API uses size_t which can - be 64 bit on 64 bit platforms. - However, other limits may have made such an issue moot. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Record sizes will be 64 bit, with an error returned on 32 bit platforms - which try to access such records (the current implementation would return - TDB_ERR_OOM in a similar case). - It seems unlikely that 32 bit keys will be a limitation, so the implementation - may not support this (see -\begin_inset CommandInset ref -LatexCommand ref -reference "sub:Records-Incur-A" - -\end_inset - -). -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -Hash Size Is Determined At TDB Creation Time -\end_layout - -\begin_layout Standard -TDB contains a number of hash chains in the header; the number is specified - at creation time, and defaults to 131. - This is such a bottleneck on large databases (as each hash chain gets quite - long), that LDB uses 10,000 for this hash. - In general it is impossible to know what the 'right' answer is at database - creation time. -\end_layout - -\begin_layout Subsubsection -\begin_inset CommandInset label -LatexCommand label -name "sub:Hash-Size-Solution" - -\end_inset - -Proposed Solution -\end_layout - -\begin_layout Standard -After comprehensive performance testing on various scalable hash variants -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying - because I was previously convinced that an expanding tree of hashes would - be very close to optimal. -\end_layout - -\end_inset - -, it became clear that it is hard to beat a straight linear hash table which - doubles in size when it reaches saturation. - Unfortunately, altering the hash table introduces serious locking complications -: the entire hash table needs to be locked to enlarge the hash table, and - others might be holding locks. - Particularly insidious are insertions done under tdb_chainlock. -\end_layout - -\begin_layout Standard -Thus an expanding layered hash will be used: an array of hash groups, with - each hash group exploding into pointers to lower hash groups once it fills, - turning into a hash tree. - This has implications for locking: we must lock the entire group in case - we need to expand it, yet we don't know how deep the tree is at that point. -\end_layout - -\begin_layout Standard -Note that bits from the hash table entries should be stolen to hold more - hash bits to reduce the penalty of collisions. - We can use the otherwise-unused lower 3 bits. - If we limit the size of the database to 64 exabytes, we can use the top - 8 bits of the hash entry as well. - These 11 bits would reduce false positives down to 1 in 2000 which is more - than we need: we can use one of the bits to indicate that the extra hash - bits are valid. - This means we can choose not to re-hash all entries when we expand a hash - group; simply use the next bits we need and mark them invalid. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Ignore. - Scaling the hash automatically proved inefficient at small hash sizes; - we default to a 8192-element hash (changable via NTDB_ATTRIBUTE_HASHSIZE), - and when buckets clash we expand to an array of hash entries. - This scales slightly better than the tdb chain (due to the 8 top bits containin -g extra hash). -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "TDB-Freelist-Is" - -\end_inset - -TDB Freelist Is Highly Contended -\end_layout - -\begin_layout Standard -TDB uses a single linked list for the free list. - Allocation occurs as follows, using heuristics which have evolved over - time: -\end_layout - -\begin_layout Enumerate -Get the free list lock for this whole operation. -\end_layout - -\begin_layout Enumerate -Multiply length by 1.25, so we always over-allocate by 25%. -\end_layout - -\begin_layout Enumerate -Set the slack multiplier to 1. -\end_layout - -\begin_layout Enumerate -Examine the current freelist entry: if it is > length but < the current - best case, remember it as the best case. -\end_layout - -\begin_layout Enumerate -Multiply the slack multiplier by 1.05. -\end_layout - -\begin_layout Enumerate -If our best fit so far is less than length * slack multiplier, return it. - The slack will be turned into a new free record if it's large enough. -\end_layout - -\begin_layout Enumerate -Otherwise, go onto the next freelist entry. -\end_layout - -\begin_layout Standard -Deleting a record occurs as follows: -\end_layout - -\begin_layout Enumerate -Lock the hash chain for this whole operation. -\end_layout - -\begin_layout Enumerate -Walk the chain to find the record, keeping the prev pointer offset. -\end_layout - -\begin_layout Enumerate -If max_dead is non-zero: -\end_layout - -\begin_deeper -\begin_layout Enumerate -Walk the hash chain again and count the dead records. -\end_layout - -\begin_layout Enumerate -If it's more than max_dead, bulk free all the dead ones (similar to steps - 4 and below, but the lock is only obtained once). -\end_layout - -\begin_layout Enumerate -Simply mark this record as dead and return. -\end_layout - -\end_deeper -\begin_layout Enumerate -Get the free list lock for the remainder of this operation. -\end_layout - -\begin_layout Enumerate -\begin_inset CommandInset label -LatexCommand label -name "right-merging" - -\end_inset - -Examine the following block to see if it is free; if so, enlarge the current - block and remove that block from the free list. - This was disabled, as removal from the free list was O(entries-in-free-list). -\end_layout - -\begin_layout Enumerate -Examine the preceeding block to see if it is free: for this reason, each - block has a 32-bit tailer which indicates its length. - If it is free, expand it to cover our new block and return. -\end_layout - -\begin_layout Enumerate -Otherwise, prepend ourselves to the free list. -\end_layout - -\begin_layout Standard -Disabling right-merging (step -\begin_inset CommandInset ref -LatexCommand ref -reference "right-merging" - -\end_inset - -) causes fragmentation; the other heuristics proved insufficient to address - this, so the final answer to this was that when we expand the TDB file - inside a transaction commit, we repack the entire tdb. -\end_layout - -\begin_layout Standard -The single list lock limits our allocation rate; due to the other issues - this is not currently seen as a bottleneck. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The first step is to remove all the current heuristics, as they obviously - interact, then examine them once the lock contention is addressed. -\end_layout - -\begin_layout Standard -The free list must be split to reduce contention. - Assuming perfect free merging, we can at most have 1 free list entry for - each entry. - This implies that the number of free lists is related to the size of the - hash table, but as it is rare to walk a large number of free list entries - we can use far fewer, say 1/32 of the number of hash buckets. -\end_layout - -\begin_layout Standard -It seems tempting to try to reuse the hash implementation which we use for - records here, but we have two ways of searching for free entries: for allocatio -n we search by size (and possibly zone) which produces too many clashes - for our hash table to handle well, and for coalescing we search by address. - Thus an array of doubly-linked free lists seems preferable. -\end_layout - -\begin_layout Standard -There are various benefits in using per-size free lists (see -\begin_inset CommandInset ref -LatexCommand ref -reference "sub:TDB-Becomes-Fragmented" - -\end_inset - -) but it's not clear this would reduce contention in the common case where - all processes are allocating/freeing the same size. - Thus we almost certainly need to divide in other ways: the most obvious - is to divide the file into zones, and using a free list (or table of free - lists) for each. - This approximates address ordering. -\end_layout - -\begin_layout Standard -Unfortunately it is difficult to know what heuristics should be used to - determine zone sizes, and our transaction code relies on being able to - create a -\begin_inset Quotes eld -\end_inset - -recovery area -\begin_inset Quotes erd -\end_inset - - by simply appending to the file (difficult if it would need to create a - new zone header). - Thus we use a linked-list of free tables; currently we only ever create - one, but if there is more than one we choose one at random to use. - In future we may use heuristics to add new free tables on contention. - We only expand the file when all free tables are exhausted. -\end_layout - -\begin_layout Standard -The basic algorithm is as follows. - Freeing is simple: -\end_layout - -\begin_layout Enumerate -Identify the correct free list. -\end_layout - -\begin_layout Enumerate -Lock the corresponding list. -\end_layout - -\begin_layout Enumerate -Re-check the list (we didn't have a lock, sizes could have changed): relock - if necessary. -\end_layout - -\begin_layout Enumerate -Place the freed entry in the list. -\end_layout - -\begin_layout Standard -Allocation is a little more complicated, as we perform delayed coalescing - at this point: -\end_layout - -\begin_layout Enumerate -Pick a free table; usually the previous one. -\end_layout - -\begin_layout Enumerate -Lock the corresponding list. -\end_layout - -\begin_layout Enumerate -If the top entry is -large enough, remove it from the list and return it. -\end_layout - -\begin_layout Enumerate -Otherwise, coalesce entries in the list.If there was no entry large enough, - unlock the list and try the next largest list -\end_layout - -\begin_layout Enumerate -If no list has an entry which meets our needs, try the next free table. -\end_layout - -\begin_layout Enumerate -If no zone satisfies, expand the file. -\end_layout - -\begin_layout Standard -This optimizes rapid insert/delete of free list entries by not coalescing - them all the time.. - First-fit address ordering ordering seems to be fairly good for keeping - fragmentation low (see -\begin_inset CommandInset ref -LatexCommand ref -reference "sub:TDB-Becomes-Fragmented" - -\end_inset - -). - Note that address ordering does not need a tailer to coalesce, though if - we needed one we could have one cheaply: see -\begin_inset CommandInset ref -LatexCommand ref -reference "sub:Records-Incur-A" - -\end_inset - -. -\end_layout - -\begin_layout Standard -Each free entry has the free table number in the header: less than 255. - It also contains a doubly-linked list for easy deletion. -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "sub:TDB-Becomes-Fragmented" - -\end_inset - -TDB Becomes Fragmented -\end_layout - -\begin_layout Standard -Much of this is a result of allocation strategy -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute -xas.edu/pub/garbage/malloc/ismm98.ps -\end_layout - -\end_inset - - and deliberate hobbling of coalescing; internal fragmentation (aka overallocati -on) is deliberately set at 25%, and external fragmentation is only cured - by the decision to repack the entire db when a transaction commit needs - to enlarge the file. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The 25% overhead on allocation works in practice for ldb because indexes - tend to expand by one record at a time. - This internal fragmentation can be resolved by having an -\begin_inset Quotes eld -\end_inset - -expanded -\begin_inset Quotes erd -\end_inset - - bit in the header to note entries that have previously expanded, and allocating - more space for them. -\end_layout - -\begin_layout Standard -There are is a spectrum of possible solutions for external fragmentation: - one is to use a fragmentation-avoiding allocation strategy such as best-fit - address-order allocator. - The other end of the spectrum would be to use a bump allocator (very fast - and simple) and simply repack the file when we reach the end. -\end_layout - -\begin_layout Standard -There are three problems with efficient fragmentation-avoiding allocators: - they are non-trivial, they tend to use a single free list for each size, - and there's no evidence that tdb allocation patterns will match those recorded - for general allocators (though it seems likely). -\end_layout - -\begin_layout Standard -Thus we don't spend too much effort on external fragmentation; we will be - no worse than the current code if we need to repack on occasion. - More effort is spent on reducing freelist contention, and reducing overhead. -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "sub:Records-Incur-A" - -\end_inset - -Records Incur A 28-Byte Overhead -\end_layout - -\begin_layout Standard -Each TDB record has a header as follows: -\end_layout - -\begin_layout LyX-Code -struct tdb_record { -\end_layout - -\begin_layout LyX-Code - tdb_off_t next; /* offset of the next record in the list */ -\end_layout - -\begin_layout LyX-Code - tdb_len_t rec_len; /* total byte length of record */ -\end_layout - -\begin_layout LyX-Code - tdb_len_t key_len; /* byte length of key */ -\end_layout - -\begin_layout LyX-Code - tdb_len_t data_len; /* byte length of data */ -\end_layout - -\begin_layout LyX-Code - uint32_t full_hash; /* the full 32 bit hash of the key */ -\end_layout - -\begin_layout LyX-Code - uint32_t magic; /* try to catch errors */ -\end_layout - -\begin_layout LyX-Code - /* the following union is implied: -\end_layout - -\begin_layout LyX-Code - union { -\end_layout - -\begin_layout LyX-Code - char record[rec_len]; -\end_layout - -\begin_layout LyX-Code - struct { -\end_layout - -\begin_layout LyX-Code - char key[key_len]; -\end_layout - -\begin_layout LyX-Code - char data[data_len]; -\end_layout - -\begin_layout LyX-Code - } -\end_layout - -\begin_layout LyX-Code - uint32_t totalsize; (tailer) -\end_layout - -\begin_layout LyX-Code - } -\end_layout - -\begin_layout LyX-Code - */ -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout Standard -Naively, this would double to a 56-byte overhead on a 64 bit implementation. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -We can use various techniques to reduce this for an allocated block: -\end_layout - -\begin_layout Enumerate -The 'next' pointer is not required, as we are using a flat hash table. -\end_layout - -\begin_layout Enumerate -'rec_len' can instead be expressed as an addition to key_len and data_len - (it accounts for wasted or overallocated length in the record). - Since the record length is always a multiple of 8, we can conveniently - fit it in 32 bits (representing up to 35 bits). -\end_layout - -\begin_layout Enumerate -'key_len' and 'data_len' can be reduced. - I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine - the two into one 64-bit field and using a 5 bit value which indicates at - what bit to divide the two. - Keys are unlikely to scale as fast as data, so I'm assuming a maximum key - size of 32 bits. -\end_layout - -\begin_layout Enumerate -'full_hash' is used to avoid a memcmp on the -\begin_inset Quotes eld -\end_inset - -miss -\begin_inset Quotes erd -\end_inset - - case, but this is diminishing returns after a handful of bits (at 10 bits, - it reduces 99.9% of false memcmp). - As an aside, as the lower bits are already incorporated in the hash table - resolution, the upper bits should be used here. - Note that it's not clear that these bits will be a win, given the extra - bits in the hash table itself (see -\begin_inset CommandInset ref -LatexCommand ref -reference "sub:Hash-Size-Solution" - -\end_inset - -). -\end_layout - -\begin_layout Enumerate -'magic' does not need to be enlarged: it currently reflects one of 5 values - (used, free, dead, recovery, and unused_recovery). - It is useful for quick sanity checking however, and should not be eliminated. -\end_layout - -\begin_layout Enumerate -'tailer' is only used to coalesce free blocks (so a block to the right can - find the header to check if this block is free). - This can be replaced by a single 'free' bit in the header of the following - block (and the tailer only exists in free blocks). -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -This technique from Thomas Standish. - Data Structure Techniques. - Addison-Wesley, Reading, Massachusetts, 1980. -\end_layout - -\end_inset - - The current proposed coalescing algorithm doesn't need this, however. -\end_layout - -\begin_layout Standard -This produces a 16 byte used header like this: -\end_layout - -\begin_layout LyX-Code -struct tdb_used_record { -\end_layout - -\begin_layout LyX-Code - uint32_t used_magic : 16, -\end_layout - -\begin_layout LyX-Code - -\end_layout - -\begin_layout LyX-Code - key_data_divide: 5, -\end_layout - -\begin_layout LyX-Code - top_hash: 11; -\end_layout - -\begin_layout LyX-Code - uint32_t extra_octets; -\end_layout - -\begin_layout LyX-Code - uint64_t key_and_data_len; -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout Standard -And a free record like this: -\end_layout - -\begin_layout LyX-Code -struct tdb_free_record { -\end_layout - -\begin_layout LyX-Code - uint64_t free_magic: 8, -\end_layout - -\begin_layout LyX-Code - prev : 56; -\end_layout - -\begin_layout LyX-Code - -\end_layout - -\begin_layout LyX-Code - uint64_t free_table: 8, -\end_layout - -\begin_layout LyX-Code - total_length : 56 -\end_layout - -\begin_layout LyX-Code - uint64_t next;; -\end_layout - -\begin_layout LyX-Code -}; -\end_layout - -\begin_layout Standard -Note that by limiting valid offsets to 56 bits, we can pack everything we - need into 3 64-byte words, meaning our minimum record size is 8 bytes. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -Transaction Commit Requires 4 fdatasync -\end_layout - -\begin_layout Standard -The current transaction algorithm is: -\end_layout - -\begin_layout Enumerate -write_recovery_data(); -\end_layout - -\begin_layout Enumerate -sync(); -\end_layout - -\begin_layout Enumerate -write_recovery_header(); -\end_layout - -\begin_layout Enumerate -sync(); -\end_layout - -\begin_layout Enumerate -overwrite_with_new_data(); -\end_layout - -\begin_layout Enumerate -sync(); -\end_layout - -\begin_layout Enumerate -remove_recovery_header(); -\end_layout - -\begin_layout Enumerate -sync(); -\end_layout - -\begin_layout Standard -On current ext3, each sync flushes all data to disk, so the next 3 syncs - are relatively expensive. - But this could become a performance bottleneck on other filesystems such - as ext4. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Neil Brown points out that this is overzealous, and only one sync is needed: -\end_layout - -\begin_layout Enumerate -Bundle the recovery data, a transaction counter and a strong checksum of - the new data. -\end_layout - -\begin_layout Enumerate -Strong checksum that whole bundle. -\end_layout - -\begin_layout Enumerate -Store the bundle in the database. -\end_layout - -\begin_layout Enumerate -Overwrite the oldest of the two recovery pointers in the header (identified - using the transaction counter) with the offset of this bundle. -\end_layout - -\begin_layout Enumerate -sync. -\end_layout - -\begin_layout Enumerate -Write the new data to the file. -\end_layout - -\begin_layout Standard -Checking for recovery means identifying the latest bundle with a valid checksum - and using the new data checksum to ensure that it has been applied. - This is more expensive than the current check, but need only be done at - open. - For running databases, a separate header field can be used to indicate - a transaction in progress; we need only check for recovery if this is set. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Deferred. -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "sub:TDB-Does-Not" - -\end_inset - -TDB Does Not Have Snapshot Support -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -None. - At some point you say -\begin_inset Quotes eld -\end_inset - -use a real database -\begin_inset Quotes erd -\end_inset - - (but see -\begin_inset CommandInset ref -LatexCommand ref -reference "replay-attribute" - -\end_inset - -). -\end_layout - -\begin_layout Standard -But as a thought experiment, if we implemented transactions to only overwrite - free entries (this is tricky: there must not be a header in each entry - which indicates whether it is free, but use of presence in metadata elsewhere), - and a pointer to the hash table, we could create an entirely new commit - without destroying existing data. - Then it would be easy to implement snapshots in a similar way. -\end_layout - -\begin_layout Standard -This would not allow arbitrary changes to the database, such as tdb_repack - does, and would require more space (since we have to preserve the current - and future entries at once). - If we used hash trees rather than one big hash table, we might only have - to rewrite some sections of the hash, too. -\end_layout - -\begin_layout Standard -We could then implement snapshots using a similar method, using multiple - different hash tables/free tables. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Deferred. -\end_layout - -\begin_layout Subsection -Transactions Cannot Operate in Parallel -\end_layout - -\begin_layout Standard -This would be useless for ldb, as it hits the index records with just about - every update. - It would add significant complexity in resolving clashes, and cause the - all transaction callers to write their code to loop in the case where the - transactions spuriously failed. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -None (but see -\begin_inset CommandInset ref -LatexCommand ref -reference "replay-attribute" - -\end_inset - -). - We could solve a small part of the problem by providing read-only transactions. - These would allow one write transaction to begin, but it could not commit - until all r/o transactions are done. - This would require a new RO_TRANSACTION_LOCK, which would be upgraded on - commit. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Deferred. -\end_layout - -\begin_layout Subsection -Default Hash Function Is Suboptimal -\end_layout - -\begin_layout Standard -The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially - if we expand it to 64 bits), and works best when the hash bucket size is - a prime number (which also means a slow modulus). - In addition, it is highly predictable which could potentially lead to a - Denial of Service attack in some TDB uses. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -The Jenkins lookup3 hash -\begin_inset Foot -status open - -\begin_layout Plain Layout -http://burtleburtle.net/bob/c/lookup3.c -\end_layout - -\end_inset - - is a fast and superbly-mixing hash. - It's used by the Linux kernel and almost everything else. - This has the particular properties that it takes an initial seed, and produces - two 32 bit hash numbers, which we can combine into a 64-bit hash. -\end_layout - -\begin_layout Standard -The seed should be created at tdb-creation time from some random source, - and placed in the header. - This is far from foolproof, but adds a little bit of protection against - hash bombing. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -\begin_inset CommandInset label -LatexCommand label -name "Reliable-Traversal-Adds" - -\end_inset - -Reliable Traversal Adds Complexity -\end_layout - -\begin_layout Standard -We lock a record during traversal iteration, and try to grab that lock in - the delete code. - If that grab on delete fails, we simply mark it deleted and continue onwards; - traversal checks for this condition and does the delete when it moves off - the record. -\end_layout - -\begin_layout Standard -If traversal terminates, the dead record may be left indefinitely. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -Remove reliability guarantees; see -\begin_inset CommandInset ref -LatexCommand ref -reference "traverse-Proposed-Solution" - -\end_inset - -. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Complete. -\end_layout - -\begin_layout Subsection -Fcntl Locking Adds Overhead -\end_layout - -\begin_layout Standard -Placing a fcntl lock means a system call, as does removing one. - This is actually one reason why transactions can be faster (everything - is locked once at transaction start). - In the uncontended case, this overhead can theoretically be eliminated. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -None. -\end_layout - -\begin_layout Standard -We tried this before with spinlock support, in the early days of TDB, and - it didn't make much difference except in manufactured benchmarks. -\end_layout - -\begin_layout Standard -We could use spinlocks (with futex kernel support under Linux), but it means - that we lose automatic cleanup when a process dies with a lock. - There is a method of auto-cleanup under Linux, but it's not supported by - other operating systems. - We could reintroduce a clear-if-first-style lock and sweep for dead futexes - on open, but that wouldn't help the normal case of one concurrent opener - dying. - Increasingly elaborate repair schemes could be considered, but they require - an ABI change (everyone must use them) anyway, so there's no need to do - this at the same time as everything else. -\end_layout - -\begin_layout Subsection -Some Transactions Don't Require Durability -\end_layout - -\begin_layout Standard -Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast) - usage, and occasionally empties the results into a transactional TDB. - This kind of usage prioritizes performance over durability: as long as - we are consistent, data can be lost. -\end_layout - -\begin_layout Standard -This would be more neatly implemented inside tdb: a -\begin_inset Quotes eld -\end_inset - -soft -\begin_inset Quotes erd -\end_inset - - transaction commit (ie. - syncless) which meant that data may be reverted on a crash. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\end_layout - -\begin_layout Standard -None. -\end_layout - -\begin_layout Standard -Unfortunately any transaction scheme which overwrites old data requires - a sync before that overwrite to avoid the possibility of corruption. -\end_layout - -\begin_layout Standard -It seems possible to use a scheme similar to that described in -\begin_inset CommandInset ref -LatexCommand ref -reference "sub:TDB-Does-Not" - -\end_inset - -,where transactions are committed without overwriting existing data, and - an array of top-level pointers were available in the header. - If the transaction is -\begin_inset Quotes eld -\end_inset - -soft -\begin_inset Quotes erd -\end_inset - - then we would not need a sync at all: existing processes would pick up - the new hash table and free list and work with that. -\end_layout - -\begin_layout Standard -At some later point, a sync would allow recovery of the old data into the - free lists (perhaps when the array of top-level pointers filled). - On crash, tdb_open() would examine the array of top levels, and apply the - transactions until it encountered an invalid checksum. -\end_layout - -\begin_layout Subsection -Tracing Is Fragile, Replay Is External -\end_layout - -\begin_layout Standard -The current TDB has compile-time-enabled tracing code, but it often breaks - as it is not enabled by default. - In a similar way, the ctdb code has an external wrapper which does replay - tracing so it can coordinate cluster-wide transactions. -\end_layout - -\begin_layout Subsubsection -Proposed Solution -\begin_inset CommandInset label -LatexCommand label -name "replay-attribute" - -\end_inset - - -\end_layout - -\begin_layout Standard -Tridge points out that an attribute can be later added to tdb_open (see -\begin_inset CommandInset ref -LatexCommand ref -reference "attributes" - -\end_inset - -) to provide replay/trace hooks, which could become the basis for this and - future parallel transactions and snapshot support. -\end_layout - -\begin_layout Subsubsection -Status -\end_layout - -\begin_layout Standard -Deferred. -\end_layout - -\end_body -\end_document diff --git a/ccan/ntdb/doc/design.pdf b/ccan/ntdb/doc/design.pdf deleted file mode 100644 index 83819146..00000000 Binary files a/ccan/ntdb/doc/design.pdf and /dev/null differ diff --git a/ccan/ntdb/doc/design.txt b/ccan/ntdb/doc/design.txt deleted file mode 100644 index bd680f09..00000000 --- a/ccan/ntdb/doc/design.txt +++ /dev/null @@ -1,1270 +0,0 @@ -NTDB: Redesigning The Trivial DataBase - -Rusty Russell, IBM Corporation - -19 June 2012 - -Abstract - -The Trivial DataBase on-disk format is 32 bits; with usage cases -heading towards the 4G limit, that must change. This required -breakage provides an opportunity to revisit TDB's other design -decisions and reassess them. - -1 Introduction - -The Trivial DataBase was originally written by Andrew Tridgell as -a simple key/data pair storage system with the same API as dbm, -but allowing multiple readers and writers while being small -enough (< 1000 lines of C) to include in SAMBA. The simple design -created in 1999 has proven surprisingly robust and performant, -used in Samba versions 3 and 4 as well as numerous other -projects. Its useful life was greatly increased by the -(backwards-compatible!) addition of transaction support in 2005. - -The wider variety and greater demands of TDB-using code has lead -to some organic growth of the API, as well as some compromises on -the implementation. None of these, by themselves, are seen as -show-stoppers, but the cumulative effect is to a loss of elegance -over the initial, simple TDB implementation. Here is a table of -the approximate number of lines of implementation code and number -of API functions at the end of each year: - - -+-----------+----------------+--------------------------------+ -| Year End | API Functions | Lines of C Code Implementation | -+-----------+----------------+--------------------------------+ -+-----------+----------------+--------------------------------+ -| 1999 | 13 | 1195 | -+-----------+----------------+--------------------------------+ -| 2000 | 24 | 1725 | -+-----------+----------------+--------------------------------+ -| 2001 | 32 | 2228 | -+-----------+----------------+--------------------------------+ -| 2002 | 35 | 2481 | -+-----------+----------------+--------------------------------+ -| 2003 | 35 | 2552 | -+-----------+----------------+--------------------------------+ -| 2004 | 40 | 2584 | -+-----------+----------------+--------------------------------+ -| 2005 | 38 | 2647 | -+-----------+----------------+--------------------------------+ -| 2006 | 52 | 3754 | -+-----------+----------------+--------------------------------+ -| 2007 | 66 | 4398 | -+-----------+----------------+--------------------------------+ -| 2008 | 71 | 4768 | -+-----------+----------------+--------------------------------+ -| 2009 | 73 | 5715 | -+-----------+----------------+--------------------------------+ - - -This review is an attempt to catalog and address all the known -issues with TDB and create solutions which address the problems -without significantly increasing complexity; all involved are far -too aware of the dangers of second system syndrome in rewriting a -successful project like this. - -Note: the final decision was to make ntdb a separate library, -with a separarate 'ntdb' namespace so both can potentially be -linked together. This document still refers to “tdb” everywhere, -for simplicity. - -2 API Issues - -2.1 tdb_open_ex Is Not Expandable - -The tdb_open() call was expanded to tdb_open_ex(), which added an -optional hashing function and an optional logging function -argument. Additional arguments to open would require the -introduction of a tdb_open_ex2 call etc. - -2.1.1 Proposed Solution - -tdb_open() will take a linked-list of attributes: - -enum tdb_attribute { - - TDB_ATTRIBUTE_LOG = 0, - - TDB_ATTRIBUTE_HASH = 1 - -}; - -struct tdb_attribute_base { - - enum tdb_attribute attr; - - union tdb_attribute *next; - -}; - -struct tdb_attribute_log { - - struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG -*/ - - tdb_log_func log_fn; - - void *log_private; - -}; - -struct tdb_attribute_hash { - - struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH -*/ - - tdb_hash_func hash_fn; - - void *hash_private; - -}; - -union tdb_attribute { - - struct tdb_attribute_base base; - - struct tdb_attribute_log log; - - struct tdb_attribute_hash hash; - -}; - -This allows future attributes to be added, even if this expands -the size of the union. - -2.1.2 Status - -Complete. - -2.2 tdb_traverse Makes Impossible Guarantees - -tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, -and it was thought that it was important to guarantee that all -records which exist at the start and end of the traversal would -be included, and no record would be included twice. - -This adds complexity (see[Reliable-Traversal-Adds]) and does not -work anyway for records which are altered (in particular, those -which are expanded may be effectively deleted and re-added behind -the traversal). - -2.2.1 Proposed Solution - -Abandon the guarantee. You will see every record if no changes -occur during your traversal, otherwise you will see some subset. -You can prevent changes by using a transaction or the locking -API. - -2.2.2 Status - -Complete. Delete-during-traverse will still delete every record, -too (assuming no other changes). - -2.3 Nesting of Transactions Is Fraught - -TDB has alternated between allowing nested transactions and not -allowing them. Various paths in the Samba codebase assume that -transactions will nest, and in a sense they can: the operation is -only committed to disk when the outer transaction is committed. -There are two problems, however: - -1. Canceling the inner transaction will cause the outer - transaction commit to fail, and will not undo any operations - since the inner transaction began. This problem is soluble with - some additional internal code. - -2. An inner transaction commit can be cancelled by the outer - transaction. This is desirable in the way which Samba's - database initialization code uses transactions, but could be a - surprise to any users expecting a successful transaction commit - to expose changes to others. - -The current solution is to specify the behavior at tdb_open(), -with the default currently that nested transactions are allowed. -This flag can also be changed at runtime. - -2.3.1 Proposed Solution - -Given the usage patterns, it seems that the“least-surprise” -behavior of disallowing nested transactions should become the -default. Additionally, it seems the outer transaction is the only -code which knows whether inner transactions should be allowed, so -a flag to indicate this could be added to tdb_transaction_start. -However, this behavior can be simulated with a wrapper which uses -tdb_add_flags() and tdb_remove_flags(), so the API should not be -expanded for this relatively-obscure case. - -2.3.2 Status - -Complete; the nesting flag has been removed. - -2.4 Incorrect Hash Function is Not Detected - -tdb_open_ex() allows the calling code to specify a different hash -function to use, but does not check that all other processes -accessing this tdb are using the same hash function. The result -is that records are missing from tdb_fetch(). - -2.4.1 Proposed Solution - -The header should contain an example hash result (eg. the hash of -0xdeadbeef), and tdb_open_ex() should check that the given hash -function produces the same answer, or fail the tdb_open call. - -2.4.2 Status - -Complete. - -2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation - -In response to scalability issues with the free list ([TDB-Freelist-Is] -) two API workarounds have been incorporated in TDB: -tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The -latter actually calls the former with an argument of“5”. - -This code allows deleted records to accumulate without putting -them in the free list. On delete we iterate through each chain -and free them in a batch if there are more than max_dead entries. -These are never otherwise recycled except as a side-effect of a -tdb_repack. - -2.5.1 Proposed Solution - -With the scalability problems of the freelist solved, this API -can be removed. The TDB_VOLATILE flag may still be useful as a -hint that store and delete of records will be at least as common -as fetch in order to allow some internal tuning, but initially -will become a no-op. - -2.5.2 Status - -Complete. Unknown flags cause tdb_open() to fail as well, so they -can be detected at runtime. - -2.6 TDB Files Cannot Be Opened Multiple Times - In The Same Process - -No process can open the same TDB twice; we check and disallow it. -This is an unfortunate side-effect of fcntl locks, which operate -on a per-file rather than per-file-descriptor basis, and do not -nest. Thus, closing any file descriptor on a file clears all the -locks obtained by this process, even if they were placed using a -different file descriptor! - -Note that even if this were solved, deadlock could occur if -operations were nested: this is a more manageable programming -error in most cases. - -2.6.1 Proposed Solution - -We could lobby POSIX to fix the perverse rules, or at least lobby -Linux to violate them so that the most common implementation does -not have this restriction. This would be a generally good idea -for other fcntl lock users. - -Samba uses a wrapper which hands out the same tdb_context to -multiple callers if this happens, and does simple reference -counting. We should do this inside the tdb library, which already -emulates lock nesting internally; it would need to recognize when -deadlock occurs within a single process. This would create a new -failure mode for tdb operations (while we currently handle -locking failures, they are impossible in normal use and a process -encountering them can do little but give up). - -I do not see benefit in an additional tdb_open flag to indicate -whether re-opening is allowed, as though there may be some -benefit to adding a call to detect when a tdb_context is shared, -to allow other to create such an API. - -2.6.2 Status - -Complete. - -2.7 TDB API Is Not POSIX Thread-safe - -The TDB API uses an error code which can be queried after an -operation to determine what went wrong. This programming model -does not work with threads, unless specific additional guarantees -are given by the implementation. In addition, even -otherwise-independent threads cannot open the same TDB (as in[TDB-Files-Cannot] -). - -2.7.1 Proposed Solution - -Reachitecting the API to include a tdb_errcode pointer would be a -great deal of churn, but fortunately most functions return 0 on -success and -1 on error: we can change these to return 0 on -success and a negative error code on error, and the API remains -similar to previous. The tdb_fetch, tdb_firstkey and tdb_nextkey -functions need to take a TDB_DATA pointer and return an error -code. It is also simpler to have tdb_nextkey replace its key -argument in place, freeing up any old .dptr. - -Internal locking is required to make sure that fcntl locks do not -overlap between threads, and also that the global list of tdbs is -maintained. - -The aim is that building tdb with -DTDB_PTHREAD will result in a -pthread-safe version of the library, and otherwise no overhead -will exist. Alternatively, a hooking mechanism similar to that -proposed for[Proposed-Solution-locking-hook] could be used to -enable pthread locking at runtime. - -2.7.2 Status - -Incomplete; API has been changed but thread safety has not been -implemented. - -2.8 *_nonblock Functions And *_mark Functions Expose - Implementation - -CTDB[footnote: -Clustered TDB, see http://ctdb.samba.org -] wishes to operate on TDB in a non-blocking manner. This is -currently done as follows: - -1. Call the _nonblock variant of an API function (eg. - tdb_lockall_nonblock). If this fails: - -2. Fork a child process, and wait for it to call the normal - variant (eg. tdb_lockall). - -3. If the child succeeds, call the _mark variant to indicate we - already have the locks (eg. tdb_lockall_mark). - -4. Upon completion, tell the child to release the locks (eg. - tdb_unlockall). - -5. Indicate to tdb that it should consider the locks removed (eg. - tdb_unlockall_mark). - -There are several issues with this approach. Firstly, adding two -new variants of each function clutters the API for an obscure -use, and so not all functions have three variants. Secondly, it -assumes that all paths of the functions ask for the same locks, -otherwise the parent process will have to get a lock which the -child doesn't have under some circumstances. I don't believe this -is currently the case, but it constrains the implementation. - -2.8.1 Proposed Solution - -Implement a hook for locking methods, so that the caller can -control the calls to create and remove fcntl locks. In this -scenario, ctdbd would operate as follows: - -1. Call the normal API function, eg tdb_lockall(). - -2. When the lock callback comes in, check if the child has the - lock. Initially, this is always false. If so, return 0. - Otherwise, try to obtain it in non-blocking mode. If that - fails, return EWOULDBLOCK. - -3. Release locks in the unlock callback as normal. - -4. If tdb_lockall() fails, see if we recorded a lock failure; if - so, call the child to repeat the operation. - -5. The child records what locks it obtains, and returns that - information to the parent. - -6. When the child has succeeded, goto 1. - -This is flexible enough to handle any potential locking scenario, -even when lock requirements change. It can be optimized so that -the parent does not release locks, just tells the child which -locks it doesn't need to obtain. - -It also keeps the complexity out of the API, and in ctdbd where -it is needed. - -2.8.2 Status - -Complete. - -2.9 tdb_chainlock Functions Expose Implementation - -tdb_chainlock locks some number of records, including the record -indicated by the given key. This gave atomicity guarantees; -no-one can start a transaction, alter, read or delete that key -while the lock is held. - -It also makes the same guarantee for any other key in the chain, -which is an internal implementation detail and potentially a -cause for deadlock. - -2.9.1 Proposed Solution - -None. It would be nice to have an explicit single entry lock -which effected no other keys. Unfortunately, this won't work for -an entry which doesn't exist. Thus while chainlock may be -implemented more efficiently for the existing case, it will still -have overlap issues with the non-existing case. So it is best to -keep the current (lack of) guarantee about which records will be -effected to avoid constraining our implementation. - -2.10 Signal Handling is Not Race-Free - -The tdb_setalarm_sigptr() call allows the caller's signal handler -to indicate that the tdb locking code should return with a -failure, rather than trying again when a signal is received (and -errno == EAGAIN). This is usually used to implement timeouts. - -Unfortunately, this does not work in the case where the signal is -received before the tdb code enters the fcntl() call to place the -lock: the code will sleep within the fcntl() code, unaware that -the signal wants it to exit. In the case of long timeouts, this -does not happen in practice. - -2.10.1 Proposed Solution - -The locking hooks proposed in[Proposed-Solution-locking-hook] -would allow the user to decide on whether to fail the lock -acquisition on a signal. This allows the caller to choose their -own compromise: they could narrow the race by checking -immediately before the fcntl call.[footnote: -It may be possible to make this race-free in some implementations -by having the signal handler alter the struct flock to make it -invalid. This will cause the fcntl() lock call to fail with -EINVAL if the signal occurs before the kernel is entered, -otherwise EAGAIN. -] - -2.10.2 Status - -Complete. - -2.11 The API Uses Gratuitous Typedefs, Capitals - -typedefs are useful for providing source compatibility when types -can differ across implementations, or arguably in the case of -function pointer definitions which are hard for humans to parse. -Otherwise it is simply obfuscation and pollutes the namespace. - -Capitalization is usually reserved for compile-time constants and -macros. - - TDB_CONTEXT There is no reason to use this over 'struct - tdb_context'; the definition isn't visible to the API user - anyway. - - TDB_DATA There is no reason to use this over struct TDB_DATA; - the struct needs to be understood by the API user. - - struct TDB_DATA This would normally be called 'struct - tdb_data'. - - enum TDB_ERROR Similarly, this would normally be enum - tdb_error. - -2.11.1 Proposed Solution - -None. Introducing lower case variants would please pedants like -myself, but if it were done the existing ones should be kept. -There is little point forcing a purely cosmetic change upon tdb -users. - -2.12 tdb_log_func Doesn't Take The - Private Pointer - -For API compatibility reasons, the logging function needs to call -tdb_get_logging_private() to retrieve the pointer registered by -the tdb_open_ex for logging. - -2.12.1 Proposed Solution - -It should simply take an extra argument, since we are prepared to -break the API/ABI. - -2.12.2 Status - -Complete. - -2.13 Various Callback Functions Are Not Typesafe - -The callback functions in tdb_set_logging_function (after[tdb_log_func-Doesnt-Take] - is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read -and tdb_check all take void * and must internally convert it to -the argument type they were expecting. - -If this type changes, the compiler will not produce warnings on -the callers, since it only sees void *. - -2.13.1 Proposed Solution - -With careful use of macros, we can create callback functions -which give a warning when used on gcc and the types of the -callback and its private argument differ. Unsupported compilers -will not give a warning, which is no worse than now. In addition, -the callbacks become clearer, as they need not use void * for -their parameter. - -See CCAN's typesafe_cb module at -http://ccan.ozlabs.org/info/typesafe_cb.html - -2.13.2 Status - -Complete. - -2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, - tdb_reopen_all Problematic - -The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB -file should be cleared if the caller discovers it is the only -process with the TDB open. However, if any caller does not -specify TDB_CLEAR_IF_FIRST it will not be detected, so will have -the TDB erased underneath them (usually resulting in a crash). - -There is a similar issue on fork(); if the parent exits (or -otherwise closes the tdb) before the child calls tdb_reopen_all() -to establish the lock used to indicate the TDB is opened by -someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe -it alone has opened the TDB and will erase it. - -2.14.1 Proposed Solution - -Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but -see[TDB_CLEAR_IF_FIRST-Imposes-Performance]. - -2.14.2 Status - -Complete. An open hook is provided to replicate this -functionality if required. - -2.15 Extending The Header Is Difficult - -We have reserved (zeroed) words in the TDB header, which can be -used for future features. If the future features are compulsory, -the version number must be updated to prevent old code from -accessing the database. But if the future feature is optional, we -have no way of telling if older code is accessing the database or -not. - -2.15.1 Proposed Solution - -The header should contain a“format variant” value (64-bit). This -is divided into two 32-bit parts: - -1. The lower part reflects the format variant understood by code - accessing the database. - -2. The upper part reflects the format variant you must understand - to write to the database (otherwise you can only open for - reading). - -The latter field can only be written at creation time, the former -should be written under the OPEN_LOCK when opening the database -for writing, if the variant of the code is lower than the current -lowest variant. - -This should allow backwards-compatible features to be added, and -detection if older code (which doesn't understand the feature) -writes to the database. - -2.15.2 Status - -Complete. - -2.16 Record Headers Are Not Expandible - -If we later want to add (say) checksums on keys and data, it -would require another format change, which we'd like to avoid. - -2.16.1 Proposed Solution - -We often have extra padding at the tail of a record. If we ensure -that the first byte (if any) of this padding is zero, we will -have a way for future changes to detect code which doesn't -understand a new format: the new code would write (say) a 1 at -the tail, and thus if there is no tail or the first byte is 0, we -would know the extension is not present on that record. - -2.16.2 Status - -Complete. - -2.17 TDB Does Not Use Talloc - -Many users of TDB (particularly Samba) use the talloc allocator, -and thus have to wrap TDB in a talloc context to use it -conveniently. - -2.17.1 Proposed Solution - -The allocation within TDB is not complicated enough to justify -the use of talloc, and I am reluctant to force another -(excellent) library on TDB users. Nonetheless a compromise is -possible. An attribute (see[attributes]) can be added later to -tdb_open() to provide an alternate allocation mechanism, -specifically for talloc but usable by any other allocator (which -would ignore the“context” argument). - -This would form a talloc heirarchy as expected, but the caller -would still have to attach a destructor to the tdb context -returned from tdb_open to close it. All TDB_DATA fields would be -children of the tdb_context, and the caller would still have to -manage them (using talloc_free() or talloc_steal()). - -2.17.2 Status - -Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute. - -3 Performance And Scalability Issues - -3.1 TDB_CLEAR_IF_FIRST - Imposes Performance Penalty - -When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is -placed at offset 4 (aka. the ACTIVE_LOCK). While these locks -never conflict in normal tdb usage, they do add substantial -overhead for most fcntl lock implementations when the kernel -scans to detect if a lock conflict exists. This is often a single -linked list, making the time to acquire and release a fcntl lock -O(N) where N is the number of processes with the TDB open, not -the number actually doing work. - -In a Samba server it is common to have huge numbers of clients -sitting idle, and thus they have weaned themselves off the -TDB_CLEAR_IF_FIRST flag.[footnote: -There is a flag to tdb_reopen_all() which is used for this -optimization: if the parent process will outlive the child, the -child does not need the ACTIVE_LOCK. This is a workaround for -this very performance issue. -] - -3.1.1 Proposed Solution - -Remove the flag. It was a neat idea, but even trivial servers -tend to know when they are initializing for the first time and -can simply unlink the old tdb at that point. - -3.1.2 Status - -Complete. - -3.2 TDB Files Have a 4G Limit - -This seems to be becoming an issue (so much for“trivial”!), -particularly for ldb. - -3.2.1 Proposed Solution - -A new, incompatible TDB format which uses 64 bit offsets -internally rather than 32 bit as now. For simplicity of endian -conversion (which TDB does on the fly if required), all values -will be 64 bit on disk. In practice, some upper bits may be used -for other purposes, but at least 56 bits will be available for -file offsets. - -tdb_open() will automatically detect the old version, and even -create them if TDB_VERSION6 is specified to tdb_open. - -32 bit processes will still be able to access TDBs larger than 4G -(assuming that their off_t allows them to seek to 64 bits), they -will gracefully fall back as they fail to mmap. This can happen -already with large TDBs. - -Old versions of tdb will fail to open the new TDB files (since 28 -August 2009, commit 398d0c29290: prior to that any unrecognized -file format would be erased and initialized as a fresh tdb!) - -3.2.2 Status - -Complete. - -3.3 TDB Records Have a 4G Limit - -This has not been a reported problem, and the API uses size_t -which can be 64 bit on 64 bit platforms. However, other limits -may have made such an issue moot. - -3.3.1 Proposed Solution - -Record sizes will be 64 bit, with an error returned on 32 bit -platforms which try to access such records (the current -implementation would return TDB_ERR_OOM in a similar case). It -seems unlikely that 32 bit keys will be a limitation, so the -implementation may not support this (see[sub:Records-Incur-A]). - -3.3.2 Status - -Complete. - -3.4 Hash Size Is Determined At TDB Creation Time - -TDB contains a number of hash chains in the header; the number is -specified at creation time, and defaults to 131. This is such a -bottleneck on large databases (as each hash chain gets quite -long), that LDB uses 10,000 for this hash. In general it is -impossible to know what the 'right' answer is at database -creation time. - -3.4.1 Proposed Solution - -After comprehensive performance testing on various scalable hash -variants[footnote: -http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 -This was annoying because I was previously convinced that an -expanding tree of hashes would be very close to optimal. -], it became clear that it is hard to beat a straight linear hash -table which doubles in size when it reaches saturation. -Unfortunately, altering the hash table introduces serious locking -complications: the entire hash table needs to be locked to -enlarge the hash table, and others might be holding locks. -Particularly insidious are insertions done under tdb_chainlock. - -Thus an expanding layered hash will be used: an array of hash -groups, with each hash group exploding into pointers to lower -hash groups once it fills, turning into a hash tree. This has -implications for locking: we must lock the entire group in case -we need to expand it, yet we don't know how deep the tree is at -that point. - -Note that bits from the hash table entries should be stolen to -hold more hash bits to reduce the penalty of collisions. We can -use the otherwise-unused lower 3 bits. If we limit the size of -the database to 64 exabytes, we can use the top 8 bits of the -hash entry as well. These 11 bits would reduce false positives -down to 1 in 2000 which is more than we need: we can use one of -the bits to indicate that the extra hash bits are valid. This -means we can choose not to re-hash all entries when we expand a -hash group; simply use the next bits we need and mark them -invalid. - -3.4.2 Status - -Ignore. Scaling the hash automatically proved inefficient at -small hash sizes; we default to a 8192-element hash (changable -via NTDB_ATTRIBUTE_HASHSIZE), and when buckets clash we expand to -an array of hash entries. This scales slightly better than the -tdb chain (due to the 8 top bits containing extra hash). - -3.5 TDB Freelist Is Highly Contended - -TDB uses a single linked list for the free list. Allocation -occurs as follows, using heuristics which have evolved over time: - -1. Get the free list lock for this whole operation. - -2. Multiply length by 1.25, so we always over-allocate by 25%. - -3. Set the slack multiplier to 1. - -4. Examine the current freelist entry: if it is > length but < - the current best case, remember it as the best case. - -5. Multiply the slack multiplier by 1.05. - -6. If our best fit so far is less than length * slack multiplier, - return it. The slack will be turned into a new free record if - it's large enough. - -7. Otherwise, go onto the next freelist entry. - -Deleting a record occurs as follows: - -1. Lock the hash chain for this whole operation. - -2. Walk the chain to find the record, keeping the prev pointer - offset. - -3. If max_dead is non-zero: - - (a) Walk the hash chain again and count the dead records. - - (b) If it's more than max_dead, bulk free all the dead ones - (similar to steps 4 and below, but the lock is only obtained - once). - - (c) Simply mark this record as dead and return. - -4. Get the free list lock for the remainder of this operation. - -5. Examine the following block to see if it is - free; if so, enlarge the current block and remove that block - from the free list. This was disabled, as removal from the free - list was O(entries-in-free-list). - -6. Examine the preceeding block to see if it is free: for this - reason, each block has a 32-bit tailer which indicates its - length. If it is free, expand it to cover our new block and - return. - -7. Otherwise, prepend ourselves to the free list. - -Disabling right-merging (step[right-merging]) causes -fragmentation; the other heuristics proved insufficient to -address this, so the final answer to this was that when we expand -the TDB file inside a transaction commit, we repack the entire -tdb. - -The single list lock limits our allocation rate; due to the other -issues this is not currently seen as a bottleneck. - -3.5.1 Proposed Solution - -The first step is to remove all the current heuristics, as they -obviously interact, then examine them once the lock contention is -addressed. - -The free list must be split to reduce contention. Assuming -perfect free merging, we can at most have 1 free list entry for -each entry. This implies that the number of free lists is related -to the size of the hash table, but as it is rare to walk a large -number of free list entries we can use far fewer, say 1/32 of the -number of hash buckets. - -It seems tempting to try to reuse the hash implementation which -we use for records here, but we have two ways of searching for -free entries: for allocation we search by size (and possibly -zone) which produces too many clashes for our hash table to -handle well, and for coalescing we search by address. Thus an -array of doubly-linked free lists seems preferable. - -There are various benefits in using per-size free lists (see[sub:TDB-Becomes-Fragmented] -) but it's not clear this would reduce contention in the common -case where all processes are allocating/freeing the same size. -Thus we almost certainly need to divide in other ways: the most -obvious is to divide the file into zones, and using a free list -(or table of free lists) for each. This approximates address -ordering. - -Unfortunately it is difficult to know what heuristics should be -used to determine zone sizes, and our transaction code relies on -being able to create a“recovery area” by simply appending to the -file (difficult if it would need to create a new zone header). -Thus we use a linked-list of free tables; currently we only ever -create one, but if there is more than one we choose one at random -to use. In future we may use heuristics to add new free tables on -contention. We only expand the file when all free tables are -exhausted. - -The basic algorithm is as follows. Freeing is simple: - -1. Identify the correct free list. - -2. Lock the corresponding list. - -3. Re-check the list (we didn't have a lock, sizes could have - changed): relock if necessary. - -4. Place the freed entry in the list. - -Allocation is a little more complicated, as we perform delayed -coalescing at this point: - -1. Pick a free table; usually the previous one. - -2. Lock the corresponding list. - -3. If the top entry is -large enough, remove it from the list and - return it. - -4. Otherwise, coalesce entries in the list.If there was no entry - large enough, unlock the list and try the next largest list - -5. If no list has an entry which meets our needs, try the next - free table. - -6. If no zone satisfies, expand the file. - -This optimizes rapid insert/delete of free list entries by not -coalescing them all the time.. First-fit address ordering -ordering seems to be fairly good for keeping fragmentation low -(see[sub:TDB-Becomes-Fragmented]). Note that address ordering -does not need a tailer to coalesce, though if we needed one we -could have one cheaply: see[sub:Records-Incur-A]. - -Each free entry has the free table number in the header: less -than 255. It also contains a doubly-linked list for easy -deletion. - -3.6 TDB Becomes Fragmented - -Much of this is a result of allocation strategy[footnote: -The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 -ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps -] and deliberate hobbling of coalescing; internal fragmentation -(aka overallocation) is deliberately set at 25%, and external -fragmentation is only cured by the decision to repack the entire -db when a transaction commit needs to enlarge the file. - -3.6.1 Proposed Solution - -The 25% overhead on allocation works in practice for ldb because -indexes tend to expand by one record at a time. This internal -fragmentation can be resolved by having an“expanded” bit in the -header to note entries that have previously expanded, and -allocating more space for them. - -There are is a spectrum of possible solutions for external -fragmentation: one is to use a fragmentation-avoiding allocation -strategy such as best-fit address-order allocator. The other end -of the spectrum would be to use a bump allocator (very fast and -simple) and simply repack the file when we reach the end. - -There are three problems with efficient fragmentation-avoiding -allocators: they are non-trivial, they tend to use a single free -list for each size, and there's no evidence that tdb allocation -patterns will match those recorded for general allocators (though -it seems likely). - -Thus we don't spend too much effort on external fragmentation; we -will be no worse than the current code if we need to repack on -occasion. More effort is spent on reducing freelist contention, -and reducing overhead. - -3.7 Records Incur A 28-Byte Overhead - -Each TDB record has a header as follows: - -struct tdb_record { - - tdb_off_t next; /* offset of the next record in the list -*/ - - tdb_len_t rec_len; /* total byte length of record */ - - tdb_len_t key_len; /* byte length of key */ - - tdb_len_t data_len; /* byte length of data */ - - uint32_t full_hash; /* the full 32 bit hash of the key */ - - uint32_t magic; /* try to catch errors */ - - /* the following union is implied: - - union { - - char record[rec_len]; - - struct { - - char key[key_len]; - - char data[data_len]; - - } - - uint32_t totalsize; (tailer) - - } - - */ - -}; - -Naively, this would double to a 56-byte overhead on a 64 bit -implementation. - -3.7.1 Proposed Solution - -We can use various techniques to reduce this for an allocated -block: - -1. The 'next' pointer is not required, as we are using a flat - hash table. - -2. 'rec_len' can instead be expressed as an addition to key_len - and data_len (it accounts for wasted or overallocated length in - the record). Since the record length is always a multiple of 8, - we can conveniently fit it in 32 bits (representing up to 35 - bits). - -3. 'key_len' and 'data_len' can be reduced. I'm unwilling to - restrict 'data_len' to 32 bits, but instead we can combine the - two into one 64-bit field and using a 5 bit value which - indicates at what bit to divide the two. Keys are unlikely to - scale as fast as data, so I'm assuming a maximum key size of 32 - bits. - -4. 'full_hash' is used to avoid a memcmp on the“miss” case, but - this is diminishing returns after a handful of bits (at 10 - bits, it reduces 99.9% of false memcmp). As an aside, as the - lower bits are already incorporated in the hash table - resolution, the upper bits should be used here. Note that it's - not clear that these bits will be a win, given the extra bits - in the hash table itself (see[sub:Hash-Size-Solution]). - -5. 'magic' does not need to be enlarged: it currently reflects - one of 5 values (used, free, dead, recovery, and - unused_recovery). It is useful for quick sanity checking - however, and should not be eliminated. - -6. 'tailer' is only used to coalesce free blocks (so a block to - the right can find the header to check if this block is free). - This can be replaced by a single 'free' bit in the header of - the following block (and the tailer only exists in free - blocks).[footnote: -This technique from Thomas Standish. Data Structure Techniques. -Addison-Wesley, Reading, Massachusetts, 1980. -] The current proposed coalescing algorithm doesn't need this, - however. - -This produces a 16 byte used header like this: - -struct tdb_used_record { - - uint32_t used_magic : 16, - - - - key_data_divide: 5, - - top_hash: 11; - - uint32_t extra_octets; - - uint64_t key_and_data_len; - -}; - -And a free record like this: - -struct tdb_free_record { - - uint64_t free_magic: 8, - - prev : 56; - - - - uint64_t free_table: 8, - - total_length : 56 - - uint64_t next;; - -}; - -Note that by limiting valid offsets to 56 bits, we can pack -everything we need into 3 64-byte words, meaning our minimum -record size is 8 bytes. - -3.7.2 Status - -Complete. - -3.8 Transaction Commit Requires 4 fdatasync - -The current transaction algorithm is: - -1. write_recovery_data(); - -2. sync(); - -3. write_recovery_header(); - -4. sync(); - -5. overwrite_with_new_data(); - -6. sync(); - -7. remove_recovery_header(); - -8. sync(); - -On current ext3, each sync flushes all data to disk, so the next -3 syncs are relatively expensive. But this could become a -performance bottleneck on other filesystems such as ext4. - -3.8.1 Proposed Solution - -Neil Brown points out that this is overzealous, and only one sync -is needed: - -1. Bundle the recovery data, a transaction counter and a strong - checksum of the new data. - -2. Strong checksum that whole bundle. - -3. Store the bundle in the database. - -4. Overwrite the oldest of the two recovery pointers in the - header (identified using the transaction counter) with the - offset of this bundle. - -5. sync. - -6. Write the new data to the file. - -Checking for recovery means identifying the latest bundle with a -valid checksum and using the new data checksum to ensure that it -has been applied. This is more expensive than the current check, -but need only be done at open. For running databases, a separate -header field can be used to indicate a transaction in progress; -we need only check for recovery if this is set. - -3.8.2 Status - -Deferred. - -3.9 TDB Does Not Have Snapshot Support - -3.9.1 Proposed Solution - -None. At some point you say“use a real database” (but see[replay-attribute] -). - -But as a thought experiment, if we implemented transactions to -only overwrite free entries (this is tricky: there must not be a -header in each entry which indicates whether it is free, but use -of presence in metadata elsewhere), and a pointer to the hash -table, we could create an entirely new commit without destroying -existing data. Then it would be easy to implement snapshots in a -similar way. - -This would not allow arbitrary changes to the database, such as -tdb_repack does, and would require more space (since we have to -preserve the current and future entries at once). If we used hash -trees rather than one big hash table, we might only have to -rewrite some sections of the hash, too. - -We could then implement snapshots using a similar method, using -multiple different hash tables/free tables. - -3.9.2 Status - -Deferred. - -3.10 Transactions Cannot Operate in Parallel - -This would be useless for ldb, as it hits the index records with -just about every update. It would add significant complexity in -resolving clashes, and cause the all transaction callers to write -their code to loop in the case where the transactions spuriously -failed. - -3.10.1 Proposed Solution - -None (but see[replay-attribute]). We could solve a small part of -the problem by providing read-only transactions. These would -allow one write transaction to begin, but it could not commit -until all r/o transactions are done. This would require a new -RO_TRANSACTION_LOCK, which would be upgraded on commit. - -3.10.2 Status - -Deferred. - -3.11 Default Hash Function Is Suboptimal - -The Knuth-inspired multiplicative hash used by tdb is fairly slow -(especially if we expand it to 64 bits), and works best when the -hash bucket size is a prime number (which also means a slow -modulus). In addition, it is highly predictable which could -potentially lead to a Denial of Service attack in some TDB uses. - -3.11.1 Proposed Solution - -The Jenkins lookup3 hash[footnote: -http://burtleburtle.net/bob/c/lookup3.c -] is a fast and superbly-mixing hash. It's used by the Linux -kernel and almost everything else. This has the particular -properties that it takes an initial seed, and produces two 32 bit -hash numbers, which we can combine into a 64-bit hash. - -The seed should be created at tdb-creation time from some random -source, and placed in the header. This is far from foolproof, but -adds a little bit of protection against hash bombing. - -3.11.2 Status - -Complete. - -3.12 Reliable Traversal Adds Complexity - -We lock a record during traversal iteration, and try to grab that -lock in the delete code. If that grab on delete fails, we simply -mark it deleted and continue onwards; traversal checks for this -condition and does the delete when it moves off the record. - -If traversal terminates, the dead record may be left -indefinitely. - -3.12.1 Proposed Solution - -Remove reliability guarantees; see[traverse-Proposed-Solution]. - -3.12.2 Status - -Complete. - -3.13 Fcntl Locking Adds Overhead - -Placing a fcntl lock means a system call, as does removing one. -This is actually one reason why transactions can be faster -(everything is locked once at transaction start). In the -uncontended case, this overhead can theoretically be eliminated. - -3.13.1 Proposed Solution - -None. - -We tried this before with spinlock support, in the early days of -TDB, and it didn't make much difference except in manufactured -benchmarks. - -We could use spinlocks (with futex kernel support under Linux), -but it means that we lose automatic cleanup when a process dies -with a lock. There is a method of auto-cleanup under Linux, but -it's not supported by other operating systems. We could -reintroduce a clear-if-first-style lock and sweep for dead -futexes on open, but that wouldn't help the normal case of one -concurrent opener dying. Increasingly elaborate repair schemes -could be considered, but they require an ABI change (everyone -must use them) anyway, so there's no need to do this at the same -time as everything else. - -3.14 Some Transactions Don't Require Durability - -Volker points out that gencache uses a CLEAR_IF_FIRST tdb for -normal (fast) usage, and occasionally empties the results into a -transactional TDB. This kind of usage prioritizes performance -over durability: as long as we are consistent, data can be lost. - -This would be more neatly implemented inside tdb: a“soft” -transaction commit (ie. syncless) which meant that data may be -reverted on a crash. - -3.14.1 Proposed Solution - -None. - -Unfortunately any transaction scheme which overwrites old data -requires a sync before that overwrite to avoid the possibility of -corruption. - -It seems possible to use a scheme similar to that described in[sub:TDB-Does-Not] -,where transactions are committed without overwriting existing -data, and an array of top-level pointers were available in the -header. If the transaction is“soft” then we would not need a sync -at all: existing processes would pick up the new hash table and -free list and work with that. - -At some later point, a sync would allow recovery of the old data -into the free lists (perhaps when the array of top-level pointers -filled). On crash, tdb_open() would examine the array of top -levels, and apply the transactions until it encountered an -invalid checksum. - -3.15 Tracing Is Fragile, Replay Is External - -The current TDB has compile-time-enabled tracing code, but it -often breaks as it is not enabled by default. In a similar way, -the ctdb code has an external wrapper which does replay tracing -so it can coordinate cluster-wide transactions. - -3.15.1 Proposed Solution - -Tridge points out that an attribute can be later added to -tdb_open (see[attributes]) to provide replay/trace hooks, which -could become the basis for this and future parallel transactions -and snapshot support. - -3.15.2 Status - -Deferred. diff --git a/ccan/ntdb/free.c b/ccan/ntdb/free.c deleted file mode 100644 index 0d0e25f1..00000000 --- a/ccan/ntdb/free.c +++ /dev/null @@ -1,972 +0,0 @@ - /* - Trivial Database 2: free list/block handling - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include -#include -#include -#include - -static unsigned fls64(uint64_t val) -{ - return ilog64(val); -} - -/* In which bucket would we find a particular record size? (ignoring header) */ -unsigned int size_to_bucket(ntdb_len_t data_len) -{ - unsigned int bucket; - - /* We can't have records smaller than this. */ - assert(data_len >= NTDB_MIN_DATA_LEN); - - /* Ignoring the header... */ - if (data_len - NTDB_MIN_DATA_LEN <= 64) { - /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */ - bucket = (data_len - NTDB_MIN_DATA_LEN) / 8; - } else { - /* After that we go power of 2. */ - bucket = fls64(data_len - NTDB_MIN_DATA_LEN) + 2; - } - - if (unlikely(bucket >= NTDB_FREE_BUCKETS)) - bucket = NTDB_FREE_BUCKETS - 1; - return bucket; -} - -ntdb_off_t first_ftable(struct ntdb_context *ntdb) -{ - return ntdb_read_off(ntdb, offsetof(struct ntdb_header, free_table)); -} - -ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable) -{ - return ntdb_read_off(ntdb, ftable + offsetof(struct ntdb_freetable,next)); -} - -enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb) -{ - /* Use reservoir sampling algorithm to select a free list at random. */ - unsigned int rnd, max = 0, count = 0; - ntdb_off_t off; - - ntdb->ftable_off = off = first_ftable(ntdb); - ntdb->ftable = 0; - - while (off) { - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - - rnd = random(); - if (rnd >= max) { - ntdb->ftable_off = off; - ntdb->ftable = count; - max = rnd; - } - - off = next_ftable(ntdb, off); - count++; - } - return NTDB_SUCCESS; -} - -/* Offset of a given bucket. */ -ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket) -{ - return ftable_off + offsetof(struct ntdb_freetable, buckets) - + bucket * sizeof(ntdb_off_t); -} - -/* Returns free_buckets + 1, or list number to search, or -ve error. */ -static ntdb_off_t find_free_head(struct ntdb_context *ntdb, - ntdb_off_t ftable_off, - ntdb_off_t bucket) -{ - /* Speculatively search for a non-zero bucket. */ - return ntdb_find_nonzero_off(ntdb, bucket_off(ftable_off, 0), - bucket, NTDB_FREE_BUCKETS); -} - -static void check_list(struct ntdb_context *ntdb, ntdb_off_t b_off) -{ -#ifdef CCAN_NTDB_DEBUG - ntdb_off_t off, prev = 0, first; - struct ntdb_free_record r; - - first = off = (ntdb_read_off(ntdb, b_off) & NTDB_OFF_MASK); - while (off != 0) { - ntdb_read_convert(ntdb, off, &r, sizeof(r)); - if (frec_magic(&r) != NTDB_FREE_MAGIC) - abort(); - if (prev && frec_prev(&r) != prev) - abort(); - prev = off; - off = r.next; - } - - if (first) { - ntdb_read_convert(ntdb, first, &r, sizeof(r)); - if (frec_prev(&r) != prev) - abort(); - } -#endif -} - -/* Remove from free bucket. */ -static enum NTDB_ERROR remove_from_list(struct ntdb_context *ntdb, - ntdb_off_t b_off, ntdb_off_t r_off, - const struct ntdb_free_record *r) -{ - ntdb_off_t off, prev_next, head; - enum NTDB_ERROR ecode; - - /* Is this only element in list? Zero out bucket, and we're done. */ - if (frec_prev(r) == r_off) - return ntdb_write_off(ntdb, b_off, 0); - - /* off = &r->prev->next */ - off = frec_prev(r) + offsetof(struct ntdb_free_record, next); - - /* Get prev->next */ - prev_next = ntdb_read_off(ntdb, off); - if (NTDB_OFF_IS_ERR(prev_next)) - return NTDB_OFF_TO_ERR(prev_next); - - /* If prev->next == 0, we were head: update bucket to point to next. */ - if (prev_next == 0) { - /* We must preserve upper bits. */ - head = ntdb_read_off(ntdb, b_off); - if (NTDB_OFF_IS_ERR(head)) - return NTDB_OFF_TO_ERR(head); - - if ((head & NTDB_OFF_MASK) != r_off) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "remove_from_list:" - " %llu head %llu on list %llu", - (long long)r_off, - (long long)head, - (long long)b_off); - } - head = ((head & ~NTDB_OFF_MASK) | r->next); - ecode = ntdb_write_off(ntdb, b_off, head); - if (ecode != NTDB_SUCCESS) - return ecode; - } else { - /* r->prev->next = r->next */ - ecode = ntdb_write_off(ntdb, off, r->next); - if (ecode != NTDB_SUCCESS) - return ecode; - } - - /* If we were the tail, off = &head->prev. */ - if (r->next == 0) { - head = ntdb_read_off(ntdb, b_off); - if (NTDB_OFF_IS_ERR(head)) - return NTDB_OFF_TO_ERR(head); - head &= NTDB_OFF_MASK; - off = head + offsetof(struct ntdb_free_record, magic_and_prev); - } else { - /* off = &r->next->prev */ - off = r->next + offsetof(struct ntdb_free_record, - magic_and_prev); - } - -#ifdef CCAN_NTDB_DEBUG - /* *off == r */ - if ((ntdb_read_off(ntdb, off) & NTDB_OFF_MASK) != r_off) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "remove_from_list:" - " %llu bad prev in list %llu", - (long long)r_off, (long long)b_off); - } -#endif - /* r->next->prev = r->prev */ - return ntdb_write_off(ntdb, off, r->magic_and_prev); -} - -/* Enqueue in this free bucket: sets coalesce if we've added 128 - * entries to it. */ -static enum NTDB_ERROR enqueue_in_free(struct ntdb_context *ntdb, - ntdb_off_t b_off, - ntdb_off_t off, - ntdb_len_t len, - bool *coalesce) -{ - struct ntdb_free_record new; - enum NTDB_ERROR ecode; - ntdb_off_t prev, head; - uint64_t magic = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL)); - - head = ntdb_read_off(ntdb, b_off); - if (NTDB_OFF_IS_ERR(head)) - return NTDB_OFF_TO_ERR(head); - - /* We only need to set ftable_and_len; rest is set in enqueue_in_free */ - new.ftable_and_len = ((uint64_t)ntdb->ftable - << (64 - NTDB_OFF_UPPER_STEAL)) - | len; - - /* new->next = head. */ - new.next = (head & NTDB_OFF_MASK); - - /* First element? Prev points to ourselves. */ - if (!new.next) { - new.magic_and_prev = (magic | off); - } else { - /* new->prev = next->prev */ - prev = ntdb_read_off(ntdb, - new.next + offsetof(struct ntdb_free_record, - magic_and_prev)); - new.magic_and_prev = prev; - if (frec_magic(&new) != NTDB_FREE_MAGIC) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "enqueue_in_free: %llu bad head" - " prev %llu", - (long long)new.next, - (long long)prev); - } - /* next->prev = new. */ - ecode = ntdb_write_off(ntdb, new.next - + offsetof(struct ntdb_free_record, - magic_and_prev), - off | magic); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - -#ifdef CCAN_NTDB_DEBUG - prev = ntdb_read_off(ntdb, frec_prev(&new) - + offsetof(struct ntdb_free_record, next)); - if (prev != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "enqueue_in_free:" - " %llu bad tail next ptr %llu", - (long long)frec_prev(&new) - + offsetof(struct ntdb_free_record, - next), - (long long)prev); - } -#endif - } - - /* Update enqueue count, but don't set high bit: see NTDB_OFF_IS_ERR */ - if (*coalesce) - head += (1ULL << (64 - NTDB_OFF_UPPER_STEAL)); - head &= ~(NTDB_OFF_MASK | (1ULL << 63)); - head |= off; - - ecode = ntdb_write_off(ntdb, b_off, head); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* It's time to coalesce if counter wrapped. */ - if (*coalesce) - *coalesce = ((head & ~NTDB_OFF_MASK) == 0); - - return ntdb_write_convert(ntdb, off, &new, sizeof(new)); -} - -static ntdb_off_t ftable_offset(struct ntdb_context *ntdb, unsigned int ftable) -{ - ntdb_off_t off; - unsigned int i; - - if (likely(ntdb->ftable == ftable)) - return ntdb->ftable_off; - - off = first_ftable(ntdb); - for (i = 0; i < ftable; i++) { - if (NTDB_OFF_IS_ERR(off)) { - break; - } - off = next_ftable(ntdb, off); - } - return off; -} - -/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and - * need to blatt the *protect record (which is set to an error). */ -static ntdb_len_t coalesce(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_off_t b_off, - ntdb_len_t data_len, - ntdb_off_t *protect) -{ - ntdb_off_t end; - struct ntdb_free_record rec; - enum NTDB_ERROR ecode; - - ntdb->stats.alloc_coalesce_tried++; - end = off + sizeof(struct ntdb_used_record) + data_len; - - while (end < ntdb->file->map_size) { - const struct ntdb_free_record *r; - ntdb_off_t nb_off; - unsigned ftable, bucket; - - r = ntdb_access_read(ntdb, end, sizeof(*r), true); - if (NTDB_PTR_IS_ERR(r)) { - ecode = NTDB_PTR_ERR(r); - goto err; - } - - if (frec_magic(r) != NTDB_FREE_MAGIC - || frec_ftable(r) == NTDB_FTABLE_NONE) { - ntdb_access_release(ntdb, r); - break; - } - - ftable = frec_ftable(r); - bucket = size_to_bucket(frec_len(r)); - nb_off = ftable_offset(ntdb, ftable); - if (NTDB_OFF_IS_ERR(nb_off)) { - ntdb_access_release(ntdb, r); - ecode = NTDB_OFF_TO_ERR(nb_off); - goto err; - } - nb_off = bucket_off(nb_off, bucket); - ntdb_access_release(ntdb, r); - - /* We may be violating lock order here, so best effort. */ - if (ntdb_lock_free_bucket(ntdb, nb_off, NTDB_LOCK_NOWAIT) - != NTDB_SUCCESS) { - ntdb->stats.alloc_coalesce_lockfail++; - break; - } - - /* Now we have lock, re-check. */ - ecode = ntdb_read_convert(ntdb, end, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - ntdb_unlock_free_bucket(ntdb, nb_off); - goto err; - } - - if (unlikely(frec_magic(&rec) != NTDB_FREE_MAGIC)) { - ntdb->stats.alloc_coalesce_race++; - ntdb_unlock_free_bucket(ntdb, nb_off); - break; - } - - if (unlikely(frec_ftable(&rec) != ftable) - || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) { - ntdb->stats.alloc_coalesce_race++; - ntdb_unlock_free_bucket(ntdb, nb_off); - break; - } - - /* Did we just mess up a record you were hoping to use? */ - if (end == *protect) { - ntdb->stats.alloc_coalesce_iterate_clash++; - *protect = NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST); - } - - ecode = remove_from_list(ntdb, nb_off, end, &rec); - check_list(ntdb, nb_off); - if (ecode != NTDB_SUCCESS) { - ntdb_unlock_free_bucket(ntdb, nb_off); - goto err; - } - - end += sizeof(struct ntdb_used_record) + frec_len(&rec); - ntdb_unlock_free_bucket(ntdb, nb_off); - ntdb->stats.alloc_coalesce_num_merged++; - } - - /* Didn't find any adjacent free? */ - if (end == off + sizeof(struct ntdb_used_record) + data_len) - return 0; - - /* Before we expand, check this isn't one you wanted protected? */ - if (off == *protect) { - *protect = NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS); - ntdb->stats.alloc_coalesce_iterate_clash++; - } - - /* OK, expand initial record */ - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - goto err; - } - - if (frec_len(&rec) != data_len) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "coalesce: expected data len %zu not %zu", - (size_t)data_len, (size_t)frec_len(&rec)); - goto err; - } - - ecode = remove_from_list(ntdb, b_off, off, &rec); - check_list(ntdb, b_off); - if (ecode != NTDB_SUCCESS) { - goto err; - } - - /* Try locking violation first. We don't allow coalesce recursion! */ - ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_NOWAIT, false); - if (ecode != NTDB_SUCCESS) { - /* Need to drop lock. Can't rely on anything stable. */ - ntdb->stats.alloc_coalesce_lockfail++; - *protect = NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT); - - /* We have to drop this to avoid deadlocks, so make sure record - * doesn't get coalesced by someone else! */ - rec.ftable_and_len = (NTDB_FTABLE_NONE - << (64 - NTDB_OFF_UPPER_STEAL)) - | (end - off - sizeof(struct ntdb_used_record)); - ecode = ntdb_write_off(ntdb, - off + offsetof(struct ntdb_free_record, - ftable_and_len), - rec.ftable_and_len); - if (ecode != NTDB_SUCCESS) { - goto err; - } - - ntdb_unlock_free_bucket(ntdb, b_off); - - ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_WAIT, - false); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - } else if (NTDB_OFF_IS_ERR(*protect)) { - /* For simplicity, we always drop lock if they can't continue */ - ntdb_unlock_free_bucket(ntdb, b_off); - } - ntdb->stats.alloc_coalesce_succeeded++; - - /* Return usable length. */ - return end - off - sizeof(struct ntdb_used_record); - -err: - /* To unify error paths, we *always* unlock bucket on error. */ - ntdb_unlock_free_bucket(ntdb, b_off); - return NTDB_ERR_TO_OFF(ecode); -} - -/* List is locked: we unlock it. */ -static enum NTDB_ERROR coalesce_list(struct ntdb_context *ntdb, - ntdb_off_t ftable_off, - ntdb_off_t b_off, - unsigned int limit) -{ - enum NTDB_ERROR ecode; - ntdb_off_t off; - - off = ntdb_read_off(ntdb, b_off); - if (NTDB_OFF_IS_ERR(off)) { - ecode = NTDB_OFF_TO_ERR(off); - goto unlock_err; - } - /* A little bit of paranoia: counter should be 0. */ - off &= NTDB_OFF_MASK; - - while (off && limit--) { - struct ntdb_free_record rec; - ntdb_len_t coal; - ntdb_off_t next; - - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - next = rec.next; - coal = coalesce(ntdb, off, b_off, frec_len(&rec), &next); - if (NTDB_OFF_IS_ERR(coal)) { - /* This has already unlocked on error. */ - return NTDB_OFF_TO_ERR(coal); - } - if (NTDB_OFF_IS_ERR(next)) { - /* Coalescing had to unlock, so stop. */ - return NTDB_SUCCESS; - } - /* Keep going if we're doing well... */ - limit += size_to_bucket(coal / 16 + NTDB_MIN_DATA_LEN); - off = next; - } - - /* Now, move those elements to the tail of the list so we get something - * else next time. */ - if (off) { - struct ntdb_free_record oldhrec, newhrec, oldtrec, newtrec; - ntdb_off_t oldhoff, oldtoff, newtoff; - - /* The record we were up to is the new head. */ - ecode = ntdb_read_convert(ntdb, off, &newhrec, sizeof(newhrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - /* Get the new tail. */ - newtoff = frec_prev(&newhrec); - ecode = ntdb_read_convert(ntdb, newtoff, &newtrec, - sizeof(newtrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - /* Get the old head. */ - oldhoff = ntdb_read_off(ntdb, b_off); - if (NTDB_OFF_IS_ERR(oldhoff)) { - ecode = NTDB_OFF_TO_ERR(oldhoff); - goto unlock_err; - } - - /* This could happen if they all coalesced away. */ - if (oldhoff == off) - goto out; - - ecode = ntdb_read_convert(ntdb, oldhoff, &oldhrec, - sizeof(oldhrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - /* Get the old tail. */ - oldtoff = frec_prev(&oldhrec); - ecode = ntdb_read_convert(ntdb, oldtoff, &oldtrec, - sizeof(oldtrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - /* Old tail's next points to old head. */ - oldtrec.next = oldhoff; - - /* Old head's prev points to old tail. */ - oldhrec.magic_and_prev - = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL)) - | oldtoff; - - /* New tail's next is 0. */ - newtrec.next = 0; - - /* Write out the modified versions. */ - ecode = ntdb_write_convert(ntdb, oldtoff, &oldtrec, - sizeof(oldtrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - ecode = ntdb_write_convert(ntdb, oldhoff, &oldhrec, - sizeof(oldhrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - ecode = ntdb_write_convert(ntdb, newtoff, &newtrec, - sizeof(newtrec)); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - - /* And finally link in new head. */ - ecode = ntdb_write_off(ntdb, b_off, off); - if (ecode != NTDB_SUCCESS) - goto unlock_err; - } -out: - ntdb_unlock_free_bucket(ntdb, b_off); - return NTDB_SUCCESS; - -unlock_err: - ntdb_unlock_free_bucket(ntdb, b_off); - return ecode; -} - -/* List must not be locked if coalesce_ok is set. */ -enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len_with_header, - enum ntdb_lock_flags waitflag, - bool coalesce_ok) -{ - ntdb_off_t b_off; - ntdb_len_t len; - enum NTDB_ERROR ecode; - - assert(len_with_header >= sizeof(struct ntdb_free_record)); - - len = len_with_header - sizeof(struct ntdb_used_record); - - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len)); - ecode = ntdb_lock_free_bucket(ntdb, b_off, waitflag); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - ecode = enqueue_in_free(ntdb, b_off, off, len, &coalesce_ok); - check_list(ntdb, b_off); - - /* Coalescing unlocks free list. */ - if (!ecode && coalesce_ok) - ecode = coalesce_list(ntdb, ntdb->ftable_off, b_off, 2); - else - ntdb_unlock_free_bucket(ntdb, b_off); - return ecode; -} - -static size_t adjust_size(size_t keylen, size_t datalen) -{ - size_t size = keylen + datalen; - - if (size < NTDB_MIN_DATA_LEN) - size = NTDB_MIN_DATA_LEN; - - /* Round to next uint64_t boundary. */ - return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL); -} - -/* If we have enough left over to be useful, split that off. */ -static size_t record_leftover(size_t keylen, size_t datalen, - bool want_extra, size_t total_len) -{ - ssize_t leftover; - - if (want_extra) - datalen += datalen / 2; - leftover = total_len - adjust_size(keylen, datalen); - - if (leftover < (ssize_t)sizeof(struct ntdb_free_record)) - return 0; - - return leftover; -} - -/* We need size bytes to put our key and data in. */ -static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb, - ntdb_off_t ftable_off, - ntdb_off_t bucket, - size_t keylen, size_t datalen, - bool want_extra, - unsigned magic) -{ - ntdb_off_t off, b_off,best_off; - struct ntdb_free_record best = { 0 }; - double multiplier; - size_t size = adjust_size(keylen, datalen); - enum NTDB_ERROR ecode; - - ntdb->stats.allocs++; - b_off = bucket_off(ftable_off, bucket); - - /* FIXME: Try non-blocking wait first, to measure contention. */ - /* Lock this bucket. */ - ecode = ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - best.ftable_and_len = -1ULL; - best_off = 0; - - /* Get slack if we're after extra. */ - if (want_extra) - multiplier = 1.5; - else - multiplier = 1.0; - - /* Walk the list to see if any are large enough, getting less fussy - * as we go. */ - off = ntdb_read_off(ntdb, b_off); - if (NTDB_OFF_IS_ERR(off)) { - ecode = NTDB_OFF_TO_ERR(off); - goto unlock_err; - } - off &= NTDB_OFF_MASK; - - while (off) { - const struct ntdb_free_record *r; - ntdb_off_t next; - - r = ntdb_access_read(ntdb, off, sizeof(*r), true); - if (NTDB_PTR_IS_ERR(r)) { - ecode = NTDB_PTR_ERR(r); - goto unlock_err; - } - - if (frec_magic(r) != NTDB_FREE_MAGIC) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "lock_and_alloc:" - " %llu non-free 0x%llx", - (long long)off, - (long long)r->magic_and_prev); - ntdb_access_release(ntdb, r); - goto unlock_err; - } - - if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) { - best_off = off; - best = *r; - } - - if (frec_len(&best) <= size * multiplier && best_off) { - ntdb_access_release(ntdb, r); - break; - } - - multiplier *= 1.01; - - next = r->next; - ntdb_access_release(ntdb, r); - off = next; - } - - /* If we found anything at all, use it. */ - if (best_off) { - struct ntdb_used_record rec; - size_t leftover; - - /* We're happy with this size: take it. */ - ecode = remove_from_list(ntdb, b_off, best_off, &best); - check_list(ntdb, b_off); - if (ecode != NTDB_SUCCESS) { - goto unlock_err; - } - - leftover = record_leftover(keylen, datalen, want_extra, - frec_len(&best)); - - assert(keylen + datalen + leftover <= frec_len(&best)); - /* We need to mark non-free before we drop lock, otherwise - * coalesce() could try to merge it! */ - ecode = set_header(ntdb, &rec, magic, keylen, datalen, - frec_len(&best) - leftover); - if (ecode != NTDB_SUCCESS) { - goto unlock_err; - } - - ecode = ntdb_write_convert(ntdb, best_off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - goto unlock_err; - } - - /* For futureproofing, we put a 0 in any unused space. */ - if (rec_extra_padding(&rec)) { - ecode = ntdb->io->twrite(ntdb, best_off + sizeof(rec) - + keylen + datalen, "", 1); - if (ecode != NTDB_SUCCESS) { - goto unlock_err; - } - } - - /* Bucket of leftover will be <= current bucket, so nested - * locking is allowed. */ - if (leftover) { - ntdb->stats.alloc_leftover++; - ecode = add_free_record(ntdb, - best_off + sizeof(rec) - + frec_len(&best) - leftover, - leftover, NTDB_LOCK_WAIT, false); - if (ecode != NTDB_SUCCESS) { - best_off = NTDB_ERR_TO_OFF(ecode); - } - } - ntdb_unlock_free_bucket(ntdb, b_off); - - return best_off; - } - - ntdb_unlock_free_bucket(ntdb, b_off); - return 0; - -unlock_err: - ntdb_unlock_free_bucket(ntdb, b_off); - return NTDB_ERR_TO_OFF(ecode); -} - -/* Get a free block from current free list, or 0 if none, -ve on error. */ -static ntdb_off_t get_free(struct ntdb_context *ntdb, - size_t keylen, size_t datalen, bool want_extra, - unsigned magic) -{ - ntdb_off_t off, ftable_off; - ntdb_off_t start_b, b, ftable; - bool wrapped = false; - - /* If they are growing, add 50% to get to higher bucket. */ - if (want_extra) - start_b = size_to_bucket(adjust_size(keylen, - datalen + datalen / 2)); - else - start_b = size_to_bucket(adjust_size(keylen, datalen)); - - ftable_off = ntdb->ftable_off; - ftable = ntdb->ftable; - while (!wrapped || ftable_off != ntdb->ftable_off) { - /* Start at exact size bucket, and search up... */ - for (b = find_free_head(ntdb, ftable_off, start_b); - b < NTDB_FREE_BUCKETS; - b = find_free_head(ntdb, ftable_off, b + 1)) { - /* Try getting one from list. */ - off = lock_and_alloc(ntdb, ftable_off, - b, keylen, datalen, want_extra, - magic); - if (NTDB_OFF_IS_ERR(off)) - return off; - if (off != 0) { - if (b == start_b) - ntdb->stats.alloc_bucket_exact++; - if (b == NTDB_FREE_BUCKETS - 1) - ntdb->stats.alloc_bucket_max++; - /* Worked? Stay using this list. */ - ntdb->ftable_off = ftable_off; - ntdb->ftable = ftable; - return off; - } - /* Didn't work. Try next bucket. */ - } - - if (NTDB_OFF_IS_ERR(b)) { - return b; - } - - /* Hmm, try next table. */ - ftable_off = next_ftable(ntdb, ftable_off); - if (NTDB_OFF_IS_ERR(ftable_off)) { - return ftable_off; - } - ftable++; - - if (ftable_off == 0) { - wrapped = true; - ftable_off = first_ftable(ntdb); - if (NTDB_OFF_IS_ERR(ftable_off)) { - return ftable_off; - } - ftable = 0; - } - } - - return 0; -} - -enum NTDB_ERROR set_header(struct ntdb_context *ntdb, - struct ntdb_used_record *rec, - unsigned magic, uint64_t keylen, uint64_t datalen, - uint64_t actuallen) -{ - uint64_t keybits = (fls64(keylen) + 1) / 2; - - rec->magic_and_meta = ((actuallen - (keylen + datalen)) << 11) - | (keybits << 43) - | ((uint64_t)magic << 48); - rec->key_and_data_len = (keylen | (datalen << (keybits*2))); - - /* Encoding can fail on big values. */ - if (rec_key_length(rec) != keylen - || rec_data_length(rec) != datalen - || rec_extra_padding(rec) != actuallen - (keylen + datalen)) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "Could not encode k=%llu,d=%llu,a=%llu", - (long long)keylen, (long long)datalen, - (long long)actuallen); - } - return NTDB_SUCCESS; -} - -/* You need 'size', this tells you how much you should expand by. */ -ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size) -{ - ntdb_off_t new_size, top_size; - - /* limit size in order to avoid using up huge amounts of memory for - * in memory tdbs if an oddball huge record creeps in */ - if (size > 100 * 1024) { - top_size = map_size + size * 2; - } else { - top_size = map_size + size * 100; - } - - /* always make room for at least top_size more records, and at - least 25% more space. if the DB is smaller than 100MiB, - otherwise grow it by 10% only. */ - if (map_size > 100 * 1024 * 1024) { - new_size = map_size * 1.10; - } else { - new_size = map_size * 1.25; - } - - if (new_size < top_size) - new_size = top_size; - - /* We always make the file a multiple of transaction page - * size. This guarantees that the transaction recovery area - * is always aligned, otherwise the transaction code can overwrite - * itself. */ - new_size = (new_size + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1); - return new_size - map_size; -} - -/* Expand the database. */ -static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size) -{ - uint64_t old_size; - ntdb_len_t wanted; - enum NTDB_ERROR ecode; - - /* Need to hold a hash lock to expand DB: transactions rely on it. */ - if (!(ntdb->flags & NTDB_NOLOCK) - && !ntdb->file->allrecord_lock.count && !ntdb_has_hash_locks(ntdb)) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_expand: must hold lock during expand"); - } - - /* Only one person can expand file at a time. */ - ecode = ntdb_lock_expand(ntdb, F_WRLCK); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* Someone else may have expanded the file, so retry. */ - old_size = ntdb->file->map_size; - ntdb_oob(ntdb, ntdb->file->map_size, 1, true); - if (ntdb->file->map_size != old_size) { - ntdb_unlock_expand(ntdb, F_WRLCK); - return NTDB_SUCCESS; - } - - /* We need room for the record header too. */ - size = adjust_size(0, sizeof(struct ntdb_used_record) + size); - /* Overallocate. */ - wanted = ntdb_expand_adjust(old_size, size); - - ecode = ntdb->io->expand_file(ntdb, wanted); - if (ecode != NTDB_SUCCESS) { - ntdb_unlock_expand(ntdb, F_WRLCK); - return ecode; - } - - /* We need to drop this lock before adding free record. */ - ntdb_unlock_expand(ntdb, F_WRLCK); - - ntdb->stats.expands++; - return add_free_record(ntdb, old_size, wanted, NTDB_LOCK_WAIT, true); -} - -/* This won't fail: it will expand the database if it has to. */ -ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, - unsigned magic, bool growing) -{ - ntdb_off_t off; - - for (;;) { - enum NTDB_ERROR ecode; - off = get_free(ntdb, keylen, datalen, growing, magic); - if (likely(off != 0)) - break; - - ecode = ntdb_expand(ntdb, adjust_size(keylen, datalen)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - } - - return off; -} diff --git a/ccan/ntdb/hash.c b/ccan/ntdb/hash.c deleted file mode 100644 index b223668d..00000000 --- a/ccan/ntdb/hash.c +++ /dev/null @@ -1,624 +0,0 @@ - /* - Trivial Database 2: hash handling - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include - -/* Default hash function. */ -uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed, - void *unused) -{ - return hash_stable((const unsigned char *)key, length, seed); -} - -uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len) -{ - return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data); -} - -static ntdb_bool_err key_matches(struct ntdb_context *ntdb, - const struct ntdb_used_record *rec, - ntdb_off_t off, - const NTDB_DATA *key, - const char **rptr) -{ - ntdb_bool_err ret = false; - const char *rkey; - - if (rec_key_length(rec) != key->dsize) { - ntdb->stats.compare_wrong_keylen++; - return ret; - } - - rkey = ntdb_access_read(ntdb, off + sizeof(*rec), - key->dsize + rec_data_length(rec), false); - if (NTDB_PTR_IS_ERR(rkey)) { - return (ntdb_bool_err)NTDB_PTR_ERR(rkey); - } - if (memcmp(rkey, key->dptr, key->dsize) == 0) { - if (rptr) { - *rptr = rkey; - } else { - ntdb_access_release(ntdb, rkey); - } - return true; - } - ntdb->stats.compare_wrong_keycmp++; - ntdb_access_release(ntdb, rkey); - return ret; -} - -/* Does entry match? */ -static ntdb_bool_err match(struct ntdb_context *ntdb, - uint32_t hash, - const NTDB_DATA *key, - ntdb_off_t val, - struct ntdb_used_record *rec, - const char **rptr) -{ - ntdb_off_t off; - enum NTDB_ERROR ecode; - - ntdb->stats.compares++; - - /* Top bits of offset == next bits of hash. */ - if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL) - != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) { - ntdb->stats.compare_wrong_offsetbits++; - return false; - } - - off = val & NTDB_OFF_MASK; - ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec)); - if (ecode != NTDB_SUCCESS) { - return (ntdb_bool_err)ecode; - } - - return key_matches(ntdb, rec, off, key, rptr); -} - -static bool is_chain(ntdb_off_t val) -{ - return val & (1ULL << NTDB_OFF_CHAIN_BIT); -} - -static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx) -{ - return base + sizeof(struct ntdb_used_record) - + idx * sizeof(ntdb_off_t); -} - -/* This is the core routine which searches the hashtable for an entry. - * On error, no locks are held and -ve is returned. - * Otherwise, hinfo is filled in. - * If not found, the return value is 0. - * If found, the return value is the offset, and *rec is the record. */ -ntdb_off_t find_and_lock(struct ntdb_context *ntdb, - NTDB_DATA key, - int ltype, - struct hash_info *h, - struct ntdb_used_record *rec, - const char **rptr) -{ - ntdb_off_t off, val; - const ntdb_off_t *arr = NULL; - ntdb_len_t i; - bool found_empty; - enum NTDB_ERROR ecode; - struct ntdb_used_record chdr; - ntdb_bool_err berr; - - h->h = ntdb_hash(ntdb, key.dptr, key.dsize); - - h->table = NTDB_HASH_OFFSET; - h->table_size = 1 << ntdb->hash_bits; - h->bucket = bits_from(h->h, 0, ntdb->hash_bits); - h->old_val = 0; - - ecode = ntdb_lock_hash(ntdb, h->bucket, ltype); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - off = hbucket_off(h->table, h->bucket); - val = ntdb_read_off(ntdb, off); - if (NTDB_OFF_IS_ERR(val)) { - ecode = NTDB_OFF_TO_ERR(val); - goto fail; - } - - /* Directly in hash table? */ - if (!likely(is_chain(val))) { - if (val) { - berr = match(ntdb, h->h, &key, val, rec, rptr); - if (berr < 0) { - ecode = NTDB_OFF_TO_ERR(berr); - goto fail; - } - if (berr) { - return val & NTDB_OFF_MASK; - } - /* If you want to insert here, make a chain. */ - h->old_val = val; - } - return 0; - } - - /* Nope? Iterate through chain. */ - h->table = val & NTDB_OFF_MASK; - - ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - - if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "find_and_lock:" - " corrupt record %#x at %llu", - rec_magic(&chdr), (long long)off); - goto fail; - } - - h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t); - - arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0), - rec_data_length(&chdr), true); - if (NTDB_PTR_IS_ERR(arr)) { - ecode = NTDB_PTR_ERR(arr); - goto fail; - } - - found_empty = false; - for (i = 0; i < h->table_size; i++) { - if (arr[i] == 0) { - if (!found_empty) { - h->bucket = i; - found_empty = true; - } - } else { - berr = match(ntdb, h->h, &key, arr[i], rec, rptr); - if (berr < 0) { - ecode = NTDB_OFF_TO_ERR(berr); - ntdb_access_release(ntdb, arr); - goto fail; - } - if (berr) { - /* We found it! */ - h->bucket = i; - off = arr[i] & NTDB_OFF_MASK; - ntdb_access_release(ntdb, arr); - return off; - } - } - } - if (!found_empty) { - /* Set to any non-zero value */ - h->old_val = 1; - h->bucket = i; - } - - ntdb_access_release(ntdb, arr); - return 0; - -fail: - ntdb_unlock_hash(ntdb, h->bucket, ltype); - return NTDB_ERR_TO_OFF(ecode); -} - -static ntdb_off_t encode_offset(const struct ntdb_context *ntdb, - ntdb_off_t new_off, uint32_t hash) -{ - ntdb_off_t extra; - - assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0); - assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0); - /* We pack extra hash bits into the upper bits of the offset. */ - extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL); - extra <<= (64 - NTDB_OFF_UPPER_STEAL); - - return new_off | extra; -} - -/* Simply overwrite the hash entry we found before. */ -enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, - const struct hash_info *h, - ntdb_off_t new_off) -{ - return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), - encode_offset(ntdb, new_off, h->h)); -} - -enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, - const struct hash_info *h) -{ - return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0); -} - - -enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, - const struct hash_info *h, - ntdb_off_t new_off) -{ - enum NTDB_ERROR ecode; - ntdb_off_t chain; - struct ntdb_used_record chdr; - const ntdb_off_t *old; - ntdb_off_t *new; - - /* We hit an empty bucket during search? That's where it goes. */ - if (!h->old_val) { - return replace_in_hash(ntdb, h, new_off); - } - - /* Full at top-level? Create a 2-element chain. */ - if (h->table == NTDB_HASH_OFFSET) { - ntdb_off_t pair[2]; - - /* One element is old value, the other is the new value. */ - pair[0] = h->old_val; - pair[1] = encode_offset(ntdb, new_off, h->h); - - chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true); - if (NTDB_OFF_IS_ERR(chain)) { - return NTDB_OFF_TO_ERR(chain); - } - ecode = ntdb_write_convert(ntdb, - chain - + sizeof(struct ntdb_used_record), - pair, sizeof(pair)); - if (ecode == NTDB_SUCCESS) { - ecode = ntdb_write_off(ntdb, - hbucket_off(h->table, h->bucket), - chain - | (1ULL << NTDB_OFF_CHAIN_BIT)); - } - return ecode; - } - - /* Full bucket. Expand. */ - ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (rec_extra_padding(&chdr) >= sizeof(new_off)) { - /* Expand in place. */ - uint64_t dlen = rec_data_length(&chdr); - - ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0, - dlen + sizeof(new_off), - dlen + rec_extra_padding(&chdr)); - - if (ecode != NTDB_SUCCESS) { - return ecode; - } - /* find_and_lock set up h to point to last bucket. */ - ecode = replace_in_hash(ntdb, h, new_off); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - /* For futureproofing, we always make the first byte of padding - * a zero. */ - if (rec_extra_padding(&chdr)) { - ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr) - + dlen + sizeof(new_off), - "", 1); - } - return ecode; - } - - /* We need to reallocate the chain. */ - chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t), - NTDB_CHAIN_MAGIC, true); - if (NTDB_OFF_IS_ERR(chain)) { - return NTDB_OFF_TO_ERR(chain); - } - - /* Map both and copy across old buckets. */ - old = ntdb_access_read(ntdb, hbucket_off(h->table, 0), - h->table_size*sizeof(ntdb_off_t), true); - if (NTDB_PTR_IS_ERR(old)) { - return NTDB_PTR_ERR(old); - } - new = ntdb_access_write(ntdb, hbucket_off(chain, 0), - (h->table_size + 1)*sizeof(ntdb_off_t), true); - if (NTDB_PTR_IS_ERR(new)) { - ntdb_access_release(ntdb, old); - return NTDB_PTR_ERR(new); - } - - memcpy(new, old, h->bucket * sizeof(ntdb_off_t)); - new[h->bucket] = encode_offset(ntdb, new_off, h->h); - ntdb_access_release(ntdb, old); - - ecode = ntdb_access_commit(ntdb, new); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* Free the old chain. */ - ecode = add_free_record(ntdb, h->table, - sizeof(struct ntdb_used_record) - + rec_data_length(&chdr) - + rec_extra_padding(&chdr), - NTDB_LOCK_WAIT, true); - - /* Replace top-level to point to new chain */ - return ntdb_write_off(ntdb, - hbucket_off(NTDB_HASH_OFFSET, - bits_from(h->h, 0, ntdb->hash_bits)), - chain | (1ULL << NTDB_OFF_CHAIN_BIT)); -} - -/* Traverse support: returns offset of record, or 0 or -ve error. */ -static ntdb_off_t iterate_chain(struct ntdb_context *ntdb, - ntdb_off_t val, - struct hash_info *h) -{ - ntdb_off_t i; - enum NTDB_ERROR ecode; - struct ntdb_used_record chdr; - - /* First load up chain header. */ - h->table = val & NTDB_OFF_MASK; - ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "get_table:" - " corrupt record %#x at %llu", - rec_magic(&chdr), - (long long)h->table); - } - - /* Chain length is implied by data length. */ - h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t); - - i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket, - h->table_size); - if (NTDB_OFF_IS_ERR(i)) { - return i; - } - - if (i != h->table_size) { - /* Return to next bucket. */ - h->bucket = i + 1; - val = ntdb_read_off(ntdb, hbucket_off(h->table, i)); - if (NTDB_OFF_IS_ERR(val)) { - return val; - } - return val & NTDB_OFF_MASK; - } - - /* Go back up to hash table. */ - h->table = NTDB_HASH_OFFSET; - h->table_size = 1 << ntdb->hash_bits; - h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1; - return 0; -} - -/* Keeps hash locked unless returns 0 or error. */ -static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb, - struct hash_info *h) -{ - ntdb_off_t val, i; - enum NTDB_ERROR ecode; - - if (h->table != NTDB_HASH_OFFSET) { - /* We're in a chain. */ - i = bits_from(h->h, 0, ntdb->hash_bits); - ecode = ntdb_lock_hash(ntdb, i, F_RDLCK); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - /* We dropped lock, bucket might have moved! */ - val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i)); - if (NTDB_OFF_IS_ERR(val)) { - goto unlock; - } - - /* We don't remove chains: there should still be one there! */ - if (!val || !is_chain(val)) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "iterate_hash:" - " vanished hchain %llu at %llu", - (long long)val, - (long long)i); - val = NTDB_ERR_TO_OFF(ecode); - goto unlock; - } - - /* Find next bucket in the chain. */ - val = iterate_chain(ntdb, val, h); - if (NTDB_OFF_IS_ERR(val)) { - goto unlock; - } - if (val != 0) { - return val; - } - ntdb_unlock_hash(ntdb, i, F_RDLCK); - - /* OK, we've reset h back to top level. */ - } - - /* We do this unlocked, then re-check. */ - for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), - h->bucket, h->table_size); - i != h->table_size; - i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), - i+1, h->table_size)) { - ecode = ntdb_lock_hash(ntdb, i, F_RDLCK); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - val = ntdb_read_off(ntdb, hbucket_off(h->table, i)); - if (NTDB_OFF_IS_ERR(val)) { - goto unlock; - } - - /* Lost race, and it's empty? */ - if (!val) { - ntdb->stats.traverse_val_vanished++; - ntdb_unlock_hash(ntdb, i, F_RDLCK); - continue; - } - - if (!is_chain(val)) { - /* So caller knows what lock to free. */ - h->h = i; - /* Return to next bucket. */ - h->bucket = i + 1; - val &= NTDB_OFF_MASK; - return val; - } - - /* Start at beginning of chain */ - h->bucket = 0; - h->h = i; - - val = iterate_chain(ntdb, val, h); - if (NTDB_OFF_IS_ERR(val)) { - goto unlock; - } - if (val != 0) { - return val; - } - - /* Otherwise, bucket has been set to i+1 */ - ntdb_unlock_hash(ntdb, i, F_RDLCK); - } - return 0; - -unlock: - ntdb_unlock_hash(ntdb, i, F_RDLCK); - return val; -} - -/* Return success if we find something, NTDB_ERR_NOEXIST if none. */ -enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb, - struct hash_info *h, - NTDB_DATA *kbuf, size_t *dlen) -{ - ntdb_off_t off; - struct ntdb_used_record rec; - enum NTDB_ERROR ecode; - - off = lock_and_iterate_hash(ntdb, h); - - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } else if (off == 0) { - return NTDB_ERR_NOEXIST; - } - - /* The hash for this key is still locked. */ - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - goto unlock; - } - if (rec_magic(&rec) != NTDB_USED_MAGIC) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "next_in_hash:" - " corrupt record at %llu", - (long long)off); - goto unlock; - } - - kbuf->dsize = rec_key_length(&rec); - - /* They want data as well? */ - if (dlen) { - *dlen = rec_data_length(&rec); - kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec), - kbuf->dsize + *dlen); - } else { - kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec), - kbuf->dsize); - } - if (NTDB_PTR_IS_ERR(kbuf->dptr)) { - ecode = NTDB_PTR_ERR(kbuf->dptr); - goto unlock; - } - ecode = NTDB_SUCCESS; - -unlock: - ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK); - return ecode; - -} - -enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb, - struct hash_info *h, - NTDB_DATA *kbuf, size_t *dlen) -{ - h->table = NTDB_HASH_OFFSET; - h->table_size = 1 << ntdb->hash_bits; - h->bucket = 0; - - return next_in_hash(ntdb, h, kbuf, dlen); -} - -/* Even if the entry isn't in this hash bucket, you'd have to lock this - * bucket to find it. */ -static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb, - const NTDB_DATA *key, int ltype) -{ - uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize); - - return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype); -} - -/* lock/unlock one hash chain. This is meant to be used to reduce - contention - it cannot guarantee how many records will be locked */ -_PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key) -{ - return chainlock(ntdb, &key, F_WRLCK); -} - -_PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key) -{ - uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - - ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK); -} - -_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, - NTDB_DATA key) -{ - return chainlock(ntdb, &key, F_RDLCK); -} - -_PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key) -{ - uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - - ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK); -} diff --git a/ccan/ntdb/io.c b/ccan/ntdb/io.c deleted file mode 100644 index 7645cddc..00000000 --- a/ccan/ntdb/io.c +++ /dev/null @@ -1,750 +0,0 @@ - /* - Unix SMB/CIFS implementation. - - trivial database library - - Copyright (C) Andrew Tridgell 1999-2005 - Copyright (C) Paul `Rusty' Russell 2000 - Copyright (C) Jeremy Allison 2000-2003 - Copyright (C) Rusty Russell 2010 - - ** NOTE! The following LGPL license applies to the ntdb - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include - -static void free_old_mmaps(struct ntdb_context *ntdb) -{ - struct ntdb_old_mmap *i; - - assert(ntdb->file->direct_count == 0); - - while ((i = ntdb->file->old_mmaps) != NULL) { - ntdb->file->old_mmaps = i->next; - if (ntdb->flags & NTDB_INTERNAL) { - ntdb->free_fn(i->map_ptr, ntdb->alloc_data); - } else { - munmap(i->map_ptr, i->map_size); - } - ntdb->free_fn(i, ntdb->alloc_data); - } -} - -static enum NTDB_ERROR save_old_map(struct ntdb_context *ntdb) -{ - struct ntdb_old_mmap *old; - - assert(ntdb->file->direct_count); - - old = ntdb->alloc_fn(ntdb->file, sizeof(*old), ntdb->alloc_data); - if (!old) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "save_old_map alloc failed"); - } - old->next = ntdb->file->old_mmaps; - old->map_ptr = ntdb->file->map_ptr; - old->map_size = ntdb->file->map_size; - ntdb->file->old_mmaps = old; - - return NTDB_SUCCESS; -} - -enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb) -{ - if (ntdb->file->fd == -1) { - return NTDB_SUCCESS; - } - - if (!ntdb->file->map_ptr) { - return NTDB_SUCCESS; - } - - /* We can't unmap now if there are accessors. */ - if (ntdb->file->direct_count) { - return save_old_map(ntdb); - } else { - munmap(ntdb->file->map_ptr, ntdb->file->map_size); - ntdb->file->map_ptr = NULL; - } - return NTDB_SUCCESS; -} - -enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb) -{ - int mmap_flags; - - if (ntdb->flags & NTDB_INTERNAL) - return NTDB_SUCCESS; - -#ifndef HAVE_INCOHERENT_MMAP - if (ntdb->flags & NTDB_NOMMAP) - return NTDB_SUCCESS; -#endif - - if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) - mmap_flags = PROT_READ; - else - mmap_flags = PROT_READ | PROT_WRITE; - - /* size_t can be smaller than off_t. */ - if ((size_t)ntdb->file->map_size == ntdb->file->map_size) { - ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size, - mmap_flags, - MAP_SHARED, ntdb->file->fd, 0); - } else - ntdb->file->map_ptr = MAP_FAILED; - - /* - * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! - */ - if (ntdb->file->map_ptr == MAP_FAILED) { - ntdb->file->map_ptr = NULL; -#ifdef HAVE_INCOHERENT_MMAP - /* Incoherent mmap means everyone must mmap! */ - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_mmap failed for size %lld (%s)", - (long long)ntdb->file->map_size, - strerror(errno)); -#else - ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, - "ntdb_mmap failed for size %lld (%s)", - (long long)ntdb->file->map_size, strerror(errno)); -#endif - } - return NTDB_SUCCESS; -} - -/* check for an out of bounds access - if it is out of bounds then - see if the database has been expanded by someone else and expand - if necessary - note that "len" is the minimum length needed for the db. - - If probe is true, len being too large isn't a failure. -*/ -static enum NTDB_ERROR ntdb_normal_oob(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, - bool probe) -{ - struct stat st; - enum NTDB_ERROR ecode; - - if (len + off < len) { - if (probe) - return NTDB_SUCCESS; - - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_oob off %llu len %llu wrap\n", - (long long)off, (long long)len); - } - - if (ntdb->flags & NTDB_INTERNAL) { - if (probe) - return NTDB_SUCCESS; - - ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_oob len %lld beyond internal" - " alloc size %lld", - (long long)(off + len), - (long long)ntdb->file->map_size); - return NTDB_ERR_IO; - } - - ecode = ntdb_lock_expand(ntdb, F_RDLCK); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (fstat(ntdb->file->fd, &st) != 0) { - ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "Failed to fstat file: %s", strerror(errno)); - ntdb_unlock_expand(ntdb, F_RDLCK); - return NTDB_ERR_IO; - } - - ntdb_unlock_expand(ntdb, F_RDLCK); - - if (st.st_size < off + len) { - if (probe) - return NTDB_SUCCESS; - - ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_oob len %llu beyond eof at %llu", - (long long)(off + len), (long long)st.st_size); - return NTDB_ERR_IO; - } - - /* Unmap, update size, remap */ - ecode = ntdb_munmap(ntdb); - if (ecode) { - return ecode; - } - - ntdb->file->map_size = st.st_size; - return ntdb_mmap(ntdb); -} - -/* Endian conversion: we only ever deal with 8 byte quantities */ -void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size) -{ - assert(size % 8 == 0); - if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) { - uint64_t i, *p = (uint64_t *)buf; - for (i = 0; i < size / 8; i++) - p[i] = bswap_64(p[i]); - } - return buf; -} - -/* Return first non-zero offset in offset array, or end, or -ve error. */ -/* FIXME: Return the off? */ -uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb, - ntdb_off_t base, uint64_t start, uint64_t end) -{ - uint64_t i; - const uint64_t *val; - - /* Zero vs non-zero is the same unconverted: minor optimization. */ - val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t), - (end - start) * sizeof(ntdb_off_t), false); - if (NTDB_PTR_IS_ERR(val)) { - return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val)); - } - - for (i = 0; i < (end - start); i++) { - if (val[i]) - break; - } - ntdb_access_release(ntdb, val); - return start + i; -} - -/* Return first zero offset in num offset array, or num, or -ve error. */ -uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off, - uint64_t num) -{ - uint64_t i; - const uint64_t *val; - - /* Zero vs non-zero is the same unconverted: minor optimization. */ - val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false); - if (NTDB_PTR_IS_ERR(val)) { - return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val)); - } - - for (i = 0; i < num; i++) { - if (!val[i]) - break; - } - ntdb_access_release(ntdb, val); - return i; -} - -enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len) -{ - char buf[8192] = { 0 }; - void *p = ntdb->io->direct(ntdb, off, len, true); - enum NTDB_ERROR ecode = NTDB_SUCCESS; - - assert(!(ntdb->flags & NTDB_RDONLY)); - if (NTDB_PTR_IS_ERR(p)) { - return NTDB_PTR_ERR(p); - } - if (p) { - memset(p, 0, len); - return ecode; - } - while (len) { - unsigned todo = len < sizeof(buf) ? len : sizeof(buf); - ecode = ntdb->io->twrite(ntdb, off, buf, todo); - if (ecode != NTDB_SUCCESS) { - break; - } - len -= todo; - off += todo; - } - return ecode; -} - -/* write a lump of data at a specified offset */ -static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off, - const void *buf, ntdb_len_t len) -{ - enum NTDB_ERROR ecode; - - if (ntdb->flags & NTDB_RDONLY) { - return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, - "Write to read-only database"); - } - - ecode = ntdb_oob(ntdb, off, len, false); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (ntdb->file->map_ptr) { - memcpy(off + (char *)ntdb->file->map_ptr, buf, len); - } else { -#ifdef HAVE_INCOHERENT_MMAP - return NTDB_ERR_IO; -#else - ssize_t ret; - ret = pwrite(ntdb->file->fd, buf, len, off); - if (ret != len) { - /* This shouldn't happen: we avoid sparse files. */ - if (ret >= 0) - errno = ENOSPC; - - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_write: %zi at %zu len=%zu (%s)", - ret, (size_t)off, (size_t)len, - strerror(errno)); - } -#endif - } - return NTDB_SUCCESS; -} - -/* read a lump of data at a specified offset */ -static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off, - void *buf, ntdb_len_t len) -{ - enum NTDB_ERROR ecode; - - ecode = ntdb_oob(ntdb, off, len, false); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (ntdb->file->map_ptr) { - memcpy(buf, off + (char *)ntdb->file->map_ptr, len); - } else { -#ifdef HAVE_INCOHERENT_MMAP - return NTDB_ERR_IO; -#else - ssize_t r = pread(ntdb->file->fd, buf, len, off); - if (r != len) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_read failed with %zi at %zu " - "len=%zu (%s) map_size=%zu", - r, (size_t)off, (size_t)len, - strerror(errno), - (size_t)ntdb->file->map_size); - } -#endif - } - return NTDB_SUCCESS; -} - -enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off, - const void *rec, size_t len) -{ - enum NTDB_ERROR ecode; - - if (unlikely((ntdb->flags & NTDB_CONVERT))) { - void *conv = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data); - if (!conv) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_write: no memory converting" - " %zu bytes", len); - } - memcpy(conv, rec, len); - ecode = ntdb->io->twrite(ntdb, off, - ntdb_convert(ntdb, conv, len), len); - ntdb->free_fn(conv, ntdb->alloc_data); - } else { - ecode = ntdb->io->twrite(ntdb, off, rec, len); - } - return ecode; -} - -enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off, - void *rec, size_t len) -{ - enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len); - ntdb_convert(ntdb, rec, len); - return ecode; -} - -static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, - ntdb_len_t len, unsigned int prefix) -{ - unsigned char *buf; - enum NTDB_ERROR ecode; - - /* some systems don't like zero length malloc */ - buf = ntdb->alloc_fn(ntdb, prefix + len ? prefix + len : 1, - ntdb->alloc_data); - if (!buf) { - ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_alloc_read alloc failed len=%zu", - (size_t)(prefix + len)); - return NTDB_ERR_PTR(NTDB_ERR_OOM); - } else { - ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len); - if (unlikely(ecode != NTDB_SUCCESS)) { - ntdb->free_fn(buf, ntdb->alloc_data); - return NTDB_ERR_PTR(ecode); - } - } - return buf; -} - -/* read a lump of data, allocating the space for it */ -void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len) -{ - return _ntdb_alloc_read(ntdb, offset, len, 0); -} - -static enum NTDB_ERROR fill(struct ntdb_context *ntdb, - const void *buf, size_t size, - ntdb_off_t off, ntdb_len_t len) -{ - while (len) { - size_t n = len > size ? size : len; - ssize_t ret = pwrite(ntdb->file->fd, buf, n, off); - if (ret != n) { - if (ret >= 0) - errno = ENOSPC; - - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "fill failed:" - " %zi at %zu len=%zu (%s)", - ret, (size_t)off, (size_t)len, - strerror(errno)); - } - len -= n; - off += n; - } - return NTDB_SUCCESS; -} - -/* expand a file. we prefer to use ftruncate, as that is what posix - says to use for mmap expansion */ -static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb, - ntdb_len_t addition) -{ - char buf[8192]; - enum NTDB_ERROR ecode; - - assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0); - if (ntdb->flags & NTDB_RDONLY) { - return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, - "Expand on read-only database"); - } - - if (ntdb->flags & NTDB_INTERNAL) { - char *new; - - /* Can't free it if we have direct accesses. */ - if (ntdb->file->direct_count) { - ecode = save_old_map(ntdb); - if (ecode) { - return ecode; - } - new = ntdb->alloc_fn(ntdb->file, - ntdb->file->map_size + addition, - ntdb->alloc_data); - if (new) { - memcpy(new, ntdb->file->map_ptr, - ntdb->file->map_size); - } - } else { - new = ntdb->expand_fn(ntdb->file->map_ptr, - ntdb->file->map_size + addition, - ntdb->alloc_data); - } - if (!new) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "No memory to expand database"); - } - ntdb->file->map_ptr = new; - ntdb->file->map_size += addition; - return NTDB_SUCCESS; - } else { - /* Unmap before trying to write; old NTDB claimed OpenBSD had - * problem with this otherwise. */ - ecode = ntdb_munmap(ntdb); - if (ecode) { - return ecode; - } - - /* If this fails, we try to fill anyway. */ - if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition)) - ; - - /* now fill the file with something. This ensures that the - file isn't sparse, which would be very bad if we ran out of - disk. This must be done with write, not via mmap */ - memset(buf, 0x43, sizeof(buf)); - ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size, - addition); - if (ecode != NTDB_SUCCESS) - return ecode; - ntdb->file->map_size += addition; - return ntdb_mmap(ntdb); - } -} - -const void *ntdb_access_read(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, bool convert) -{ - void *ret = NULL; - - if (likely(!(ntdb->flags & NTDB_CONVERT))) { - ret = ntdb->io->direct(ntdb, off, len, false); - - if (NTDB_PTR_IS_ERR(ret)) { - return ret; - } - } - if (!ret) { - struct ntdb_access_hdr *hdr; - hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr)); - if (NTDB_PTR_IS_ERR(hdr)) { - return hdr; - } - hdr->next = ntdb->access; - ntdb->access = hdr; - ret = hdr + 1; - if (convert) { - ntdb_convert(ntdb, (void *)ret, len); - } - } else { - ntdb->file->direct_count++; - } - - return ret; -} - -void *ntdb_access_write(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, bool convert) -{ - void *ret = NULL; - - if (ntdb->flags & NTDB_RDONLY) { - ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, - "Write to read-only database"); - return NTDB_ERR_PTR(NTDB_ERR_RDONLY); - } - - if (likely(!(ntdb->flags & NTDB_CONVERT))) { - ret = ntdb->io->direct(ntdb, off, len, true); - - if (NTDB_PTR_IS_ERR(ret)) { - return ret; - } - } - - if (!ret) { - struct ntdb_access_hdr *hdr; - hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr)); - if (NTDB_PTR_IS_ERR(hdr)) { - return hdr; - } - hdr->next = ntdb->access; - ntdb->access = hdr; - hdr->off = off; - hdr->len = len; - hdr->convert = convert; - ret = hdr + 1; - if (convert) - ntdb_convert(ntdb, (void *)ret, len); - } else { - ntdb->file->direct_count++; - } - return ret; -} - -static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p) -{ - struct ntdb_access_hdr **hp; - - for (hp = &ntdb->access; *hp; hp = &(*hp)->next) { - if (*hp + 1 == p) - return hp; - } - return NULL; -} - -void ntdb_access_release(struct ntdb_context *ntdb, const void *p) -{ - struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p); - - if (hp) { - hdr = *hp; - *hp = hdr->next; - ntdb->free_fn(hdr, ntdb->alloc_data); - } else { - if (--ntdb->file->direct_count == 0) { - free_old_mmaps(ntdb); - } - } -} - -enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p) -{ - struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p); - enum NTDB_ERROR ecode; - - if (hp) { - hdr = *hp; - if (hdr->convert) - ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len); - else - ecode = ntdb_write(ntdb, hdr->off, p, hdr->len); - *hp = hdr->next; - ntdb->free_fn(hdr, ntdb->alloc_data); - } else { - if (--ntdb->file->direct_count == 0) { - free_old_mmaps(ntdb); - } - ecode = NTDB_SUCCESS; - } - - return ecode; -} - -static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len, - bool write_mode) -{ - enum NTDB_ERROR ecode; - - if (unlikely(!ntdb->file->map_ptr)) - return NULL; - - ecode = ntdb_oob(ntdb, off, len, false); - if (unlikely(ecode != NTDB_SUCCESS)) - return NTDB_ERR_PTR(ecode); - return (char *)ntdb->file->map_ptr + off; -} - -static ntdb_off_t ntdb_read_normal_off(struct ntdb_context *ntdb, - ntdb_off_t off) -{ - ntdb_off_t ret; - enum NTDB_ERROR ecode; - ntdb_off_t *p; - - p = ntdb_direct(ntdb, off, sizeof(*p), false); - if (NTDB_PTR_IS_ERR(p)) { - return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p)); - } - if (likely(p)) { - return *p; - } - - ecode = ntdb_read(ntdb, off, &ret, sizeof(ret)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - return ret; -} - -static ntdb_off_t ntdb_read_convert_off(struct ntdb_context *ntdb, - ntdb_off_t off) -{ - ntdb_off_t ret; - enum NTDB_ERROR ecode; - - ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - return ret; -} - -static enum NTDB_ERROR ntdb_write_normal_off(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_off_t val) -{ - ntdb_off_t *p; - - p = ntdb_direct(ntdb, off, sizeof(*p), true); - if (NTDB_PTR_IS_ERR(p)) { - return NTDB_PTR_ERR(p); - } - if (likely(p)) { - *p = val; - return NTDB_SUCCESS; - } - return ntdb_write(ntdb, off, &val, sizeof(val)); -} - -static enum NTDB_ERROR ntdb_write_convert_off(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_off_t val) -{ - return ntdb_write_convert(ntdb, off, &val, sizeof(val)); -} - -void ntdb_inc_seqnum(struct ntdb_context *ntdb) -{ - ntdb_off_t seq; - - if (likely(!(ntdb->flags & NTDB_CONVERT))) { - int64_t *direct; - - direct = ntdb->io->direct(ntdb, - offsetof(struct ntdb_header, seqnum), - sizeof(*direct), true); - if (likely(direct)) { - /* Don't let it go negative, even briefly */ - if (unlikely((*direct) + 1) < 0) - *direct = 0; - (*direct)++; - return; - } - } - - seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum)); - if (!NTDB_OFF_IS_ERR(seq)) { - seq++; - if (unlikely((int64_t)seq < 0)) - seq = 0; - ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq); - } -} - -static const struct ntdb_methods io_methods = { - ntdb_read, - ntdb_write, - ntdb_normal_oob, - ntdb_expand_file, - ntdb_direct, - ntdb_read_normal_off, - ntdb_write_normal_off, -}; - -static const struct ntdb_methods io_convert_methods = { - ntdb_read, - ntdb_write, - ntdb_normal_oob, - ntdb_expand_file, - ntdb_direct, - ntdb_read_convert_off, - ntdb_write_convert_off, -}; - -/* - initialise the default methods table -*/ -void ntdb_io_init(struct ntdb_context *ntdb) -{ - if (ntdb->flags & NTDB_CONVERT) - ntdb->io = &io_convert_methods; - else - ntdb->io = &io_methods; -} diff --git a/ccan/ntdb/lock.c b/ccan/ntdb/lock.c deleted file mode 100644 index 71d6d852..00000000 --- a/ccan/ntdb/lock.c +++ /dev/null @@ -1,886 +0,0 @@ - /* - Unix SMB/CIFS implementation. - - trivial database library - - Copyright (C) Andrew Tridgell 1999-2005 - Copyright (C) Paul `Rusty' Russell 2000 - Copyright (C) Jeremy Allison 2000-2003 - - ** NOTE! The following LGPL license applies to the ntdb - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ - -#include "private.h" -#include - -/* If we were threaded, we could wait for unlock, but we're not, so fail. */ -enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call) -{ - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "%s: lock owned by another ntdb in this process.", - call); -} - -/* If we fork, we no longer really own locks. */ -bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log) -{ - /* No locks? No problem! */ - if (ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0) { - return true; - } - - /* No fork? No problem! */ - if (ntdb->file->locker == getpid()) { - return true; - } - - if (log) { - ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "%s: fork() detected after lock acquisition!" - " (%u vs %u)", call, - (unsigned int)ntdb->file->locker, - (unsigned int)getpid()); - } - return false; -} - -int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, - void *unused) -{ - struct flock fl; - int ret; - - do { - fl.l_type = rw; - fl.l_whence = SEEK_SET; - fl.l_start = off; - fl.l_len = len; - - if (waitflag) - ret = fcntl(fd, F_SETLKW, &fl); - else - ret = fcntl(fd, F_SETLK, &fl); - } while (ret != 0 && errno == EINTR); - return ret; -} - -int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused) -{ - struct flock fl; - int ret; - - do { - fl.l_type = F_UNLCK; - fl.l_whence = SEEK_SET; - fl.l_start = off; - fl.l_len = len; - - ret = fcntl(fd, F_SETLKW, &fl); - } while (ret != 0 && errno == EINTR); - return ret; -} - -static int lock(struct ntdb_context *ntdb, - int rw, off_t off, off_t len, bool waitflag) -{ - int ret; - if (ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0) { - ntdb->file->locker = getpid(); - } - - ntdb->stats.lock_lowlevel++; - ret = ntdb->lock_fn(ntdb->file->fd, rw, off, len, waitflag, - ntdb->lock_data); - if (!waitflag) { - ntdb->stats.lock_nonblock++; - if (ret != 0) - ntdb->stats.lock_nonblock_fail++; - } - return ret; -} - -static int unlock(struct ntdb_context *ntdb, int rw, off_t off, off_t len) -{ -#if 0 /* Check they matched up locks and unlocks correctly. */ - char line[80]; - FILE *locks; - bool found = false; - - locks = fopen("/proc/locks", "r"); - - while (fgets(line, 80, locks)) { - char *p; - int type, start, l; - - /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */ - p = strchr(line, ':') + 1; - if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY "))) - continue; - p += strlen(" FLOCK ADVISORY "); - if (strncmp(p, "READ ", strlen("READ ")) == 0) - type = F_RDLCK; - else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0) - type = F_WRLCK; - else - abort(); - p += 6; - if (atoi(p) != getpid()) - continue; - p = strchr(strchr(p, ' ') + 1, ' ') + 1; - start = atoi(p); - p = strchr(p, ' ') + 1; - if (strncmp(p, "EOF", 3) == 0) - l = 0; - else - l = atoi(p) - start + 1; - - if (off == start) { - if (len != l) { - fprintf(stderr, "Len %u should be %u: %s", - (int)len, l, line); - abort(); - } - if (type != rw) { - fprintf(stderr, "Type %s wrong: %s", - rw == F_RDLCK ? "READ" : "WRITE", line); - abort(); - } - found = true; - break; - } - } - - if (!found) { - fprintf(stderr, "Unlock on %u@%u not found!", - (int)off, (int)len); - abort(); - } - - fclose(locks); -#endif - - return ntdb->unlock_fn(ntdb->file->fd, rw, off, len, ntdb->lock_data); -} - -/* a byte range locking function - return 0 on success - this functions locks len bytes at the specified offset. - - note that a len of zero means lock to end of file -*/ -static enum NTDB_ERROR ntdb_brlock(struct ntdb_context *ntdb, - int rw_type, ntdb_off_t offset, ntdb_off_t len, - enum ntdb_lock_flags flags) -{ - int ret; - - if (rw_type == F_WRLCK && (ntdb->flags & NTDB_RDONLY)) { - return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, - "Write lock attempted on read-only database"); - } - - if (ntdb->flags & NTDB_NOLOCK) { - return NTDB_SUCCESS; - } - - /* A 32 bit system cannot open a 64-bit file, but it could have - * expanded since then: check here. */ - if ((size_t)(offset + len) != offset + len) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_brlock: lock on giant offset %llu", - (long long)(offset + len)); - } - - ret = lock(ntdb, rw_type, offset, len, flags & NTDB_LOCK_WAIT); - if (ret != 0) { - /* Generic lock error. errno set by fcntl. - * EAGAIN is an expected return from non-blocking - * locks. */ - if (!(flags & NTDB_LOCK_PROBE) - && (errno != EAGAIN && errno != EINTR)) { - ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_brlock failed (fd=%d) at" - " offset %zu rw_type=%d flags=%d len=%zu:" - " %s", - ntdb->file->fd, (size_t)offset, rw_type, - flags, (size_t)len, strerror(errno)); - } - return NTDB_ERR_LOCK; - } - return NTDB_SUCCESS; -} - -static enum NTDB_ERROR ntdb_brunlock(struct ntdb_context *ntdb, - int rw_type, ntdb_off_t offset, size_t len) -{ - if (ntdb->flags & NTDB_NOLOCK) { - return NTDB_SUCCESS; - } - - if (!check_lock_pid(ntdb, "ntdb_brunlock", false)) - return NTDB_ERR_LOCK; - - if (unlock(ntdb, rw_type, offset, len) == -1) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_brunlock failed (fd=%d) at offset %zu" - " rw_type=%d len=%zu: %s", - ntdb->file->fd, (size_t)offset, rw_type, - (size_t)len, strerror(errno)); - } - return NTDB_SUCCESS; -} - -/* - upgrade a read lock to a write lock. This needs to be handled in a - special way as some OSes (such as solaris) have too conservative - deadlock detection and claim a deadlock when progress can be - made. For those OSes we may loop for a while. -*/ -enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start) -{ - int count = 1000; - - if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true)) - return NTDB_ERR_LOCK; - - if (ntdb->file->allrecord_lock.count != 1) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_allrecord_upgrade failed:" - " count %u too high", - ntdb->file->allrecord_lock.count); - } - - if (ntdb->file->allrecord_lock.off != 1) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_allrecord_upgrade failed:" - " already upgraded?"); - } - - if (ntdb->file->allrecord_lock.owner != ntdb) { - return owner_conflict(ntdb, "ntdb_allrecord_upgrade"); - } - - while (count--) { - struct timeval tv; - if (ntdb_brlock(ntdb, F_WRLCK, start, 0, - NTDB_LOCK_WAIT|NTDB_LOCK_PROBE) == NTDB_SUCCESS) { - ntdb->file->allrecord_lock.ltype = F_WRLCK; - ntdb->file->allrecord_lock.off = 0; - return NTDB_SUCCESS; - } - if (errno != EDEADLK) { - break; - } - /* sleep for as short a time as we can - more portable than usleep() */ - tv.tv_sec = 0; - tv.tv_usec = 1; - select(0, NULL, NULL, NULL, &tv); - } - - if (errno != EAGAIN && errno != EINTR) - ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_allrecord_upgrade failed"); - return NTDB_ERR_LOCK; -} - -static struct ntdb_lock *find_nestlock(struct ntdb_context *ntdb, ntdb_off_t offset, - const struct ntdb_context *owner) -{ - unsigned int i; - - for (i=0; ifile->num_lockrecs; i++) { - if (ntdb->file->lockrecs[i].off == offset) { - if (owner && ntdb->file->lockrecs[i].owner != owner) - return NULL; - return &ntdb->file->lockrecs[i]; - } - } - return NULL; -} - -enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb) -{ - enum NTDB_ERROR ecode; - - if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true)) - return NTDB_ERR_LOCK; - - ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK, - false); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); - if (ecode != NTDB_SUCCESS) { - ntdb_allrecord_unlock(ntdb, F_WRLCK); - return ecode; - } - ecode = ntdb_transaction_recover(ntdb); - ntdb_unlock_open(ntdb, F_WRLCK); - ntdb_allrecord_unlock(ntdb, F_WRLCK); - - return ecode; -} - -/* lock an offset in the database. */ -static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb, - ntdb_off_t offset, int ltype, - enum ntdb_lock_flags flags) -{ - struct ntdb_lock *new_lck; - enum NTDB_ERROR ecode; - - assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits) - + ntdb->file->map_size / 8)); - - if (ntdb->flags & NTDB_NOLOCK) - return NTDB_SUCCESS; - - if (!check_lock_pid(ntdb, "ntdb_nest_lock", true)) { - return NTDB_ERR_LOCK; - } - - ntdb->stats.locks++; - - new_lck = find_nestlock(ntdb, offset, NULL); - if (new_lck) { - if (new_lck->owner != ntdb) { - return owner_conflict(ntdb, "ntdb_nest_lock"); - } - - if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_nest_lock:" - " offset %zu has read lock", - (size_t)offset); - } - /* Just increment the struct, posix locks don't stack. */ - new_lck->count++; - return NTDB_SUCCESS; - } - -#if 0 - if (ntdb->file->num_lockrecs - && offset >= NTDB_HASH_LOCK_START - && offset < NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_nest_lock: already have a hash lock?"); - } -#endif - if (ntdb->file->lockrecs == NULL) { - new_lck = ntdb->alloc_fn(ntdb->file, sizeof(*ntdb->file->lockrecs), - ntdb->alloc_data); - } else { - new_lck = (struct ntdb_lock *)ntdb->expand_fn( - ntdb->file->lockrecs, - sizeof(*ntdb->file->lockrecs) - * (ntdb->file->num_lockrecs+1), - ntdb->alloc_data); - } - if (new_lck == NULL) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_nest_lock:" - " unable to allocate %zu lock struct", - ntdb->file->num_lockrecs + 1); - } - ntdb->file->lockrecs = new_lck; - - /* Since fcntl locks don't nest, we do a lock for the first one, - and simply bump the count for future ones */ - ecode = ntdb_brlock(ntdb, ltype, offset, 1, flags); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* First time we grab a lock, perhaps someone died in commit? */ - if (!(flags & NTDB_LOCK_NOCHECK) - && ntdb->file->num_lockrecs == 0) { - ntdb_bool_err berr = ntdb_needs_recovery(ntdb); - if (berr != false) { - ntdb_brunlock(ntdb, ltype, offset, 1); - - if (berr < 0) - return NTDB_OFF_TO_ERR(berr); - ecode = ntdb_lock_and_recover(ntdb); - if (ecode == NTDB_SUCCESS) { - ecode = ntdb_brlock(ntdb, ltype, offset, 1, - flags); - } - if (ecode != NTDB_SUCCESS) { - return ecode; - } - } - } - - ntdb->file->lockrecs[ntdb->file->num_lockrecs].owner = ntdb; - ntdb->file->lockrecs[ntdb->file->num_lockrecs].off = offset; - ntdb->file->lockrecs[ntdb->file->num_lockrecs].count = 1; - ntdb->file->lockrecs[ntdb->file->num_lockrecs].ltype = ltype; - ntdb->file->num_lockrecs++; - - return NTDB_SUCCESS; -} - -static enum NTDB_ERROR ntdb_nest_unlock(struct ntdb_context *ntdb, - ntdb_off_t off, int ltype) -{ - struct ntdb_lock *lck; - enum NTDB_ERROR ecode; - - if (ntdb->flags & NTDB_NOLOCK) - return NTDB_SUCCESS; - - lck = find_nestlock(ntdb, off, ntdb); - if ((lck == NULL) || (lck->count == 0)) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_nest_unlock: no lock for %zu", - (size_t)off); - } - - if (lck->count > 1) { - lck->count--; - return NTDB_SUCCESS; - } - - /* - * This lock has count==1 left, so we need to unlock it in the - * kernel. We don't bother with decrementing the in-memory array - * element, we're about to overwrite it with the last array element - * anyway. - */ - ecode = ntdb_brunlock(ntdb, ltype, off, 1); - - /* - * Shrink the array by overwriting the element just unlocked with the - * last array element. - */ - *lck = ntdb->file->lockrecs[--ntdb->file->num_lockrecs]; - - return ecode; -} - -/* - get the transaction lock - */ -enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype) -{ - return ntdb_nest_lock(ntdb, NTDB_TRANSACTION_LOCK, ltype, NTDB_LOCK_WAIT); -} - -/* - release the transaction lock - */ -void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype) -{ - ntdb_nest_unlock(ntdb, NTDB_TRANSACTION_LOCK, ltype); -} - -/* We only need to lock individual bytes, but Linux merges consecutive locks - * so we lock in contiguous ranges. */ -static enum NTDB_ERROR ntdb_lock_gradual(struct ntdb_context *ntdb, - int ltype, enum ntdb_lock_flags flags, - ntdb_off_t off, ntdb_off_t len) -{ - enum NTDB_ERROR ecode; - enum ntdb_lock_flags nb_flags = (flags & ~NTDB_LOCK_WAIT); - - if (len <= 1) { - /* 0 would mean to end-of-file... */ - assert(len != 0); - /* Single hash. Just do blocking lock. */ - return ntdb_brlock(ntdb, ltype, off, len, flags); - } - - /* First we try non-blocking. */ - ecode = ntdb_brlock(ntdb, ltype, off, len, nb_flags); - if (ecode != NTDB_ERR_LOCK) { - return ecode; - } - - /* Try locking first half, then second. */ - ecode = ntdb_lock_gradual(ntdb, ltype, flags, off, len / 2); - if (ecode != NTDB_SUCCESS) - return ecode; - - ecode = ntdb_lock_gradual(ntdb, ltype, flags, - off + len / 2, len - len / 2); - if (ecode != NTDB_SUCCESS) { - ntdb_brunlock(ntdb, ltype, off, len / 2); - } - return ecode; -} - -/* lock/unlock entire database. It can only be upgradable if you have some - * other way of guaranteeing exclusivity (ie. transaction write lock). */ -enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype, - enum ntdb_lock_flags flags, bool upgradable) -{ - enum NTDB_ERROR ecode; - ntdb_bool_err berr; - - if (ntdb->flags & NTDB_NOLOCK) { - return NTDB_SUCCESS; - } - - if (!check_lock_pid(ntdb, "ntdb_allrecord_lock", true)) { - return NTDB_ERR_LOCK; - } - - if (ntdb->file->allrecord_lock.count) { - if (ntdb->file->allrecord_lock.owner != ntdb) { - return owner_conflict(ntdb, "ntdb_allrecord_lock"); - } - - if (ltype == F_RDLCK - || ntdb->file->allrecord_lock.ltype == F_WRLCK) { - ntdb->file->allrecord_lock.count++; - return NTDB_SUCCESS; - } - - /* a global lock of a different type exists */ - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "ntdb_allrecord_lock: already have %s lock", - ntdb->file->allrecord_lock.ltype == F_RDLCK - ? "read" : "write"); - } - - if (ntdb_has_hash_locks(ntdb)) { - /* can't combine global and chain locks */ - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "ntdb_allrecord_lock:" - " already have chain lock"); - } - - if (upgradable && ltype != F_RDLCK) { - /* ntdb error: you can't upgrade a write lock! */ - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_allrecord_lock:" - " can't upgrade a write lock"); - } - - ntdb->stats.locks++; -again: - /* Lock hashes, gradually. */ - ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START, - 1 << ntdb->hash_bits); - if (ecode != NTDB_SUCCESS) - return ecode; - - /* Lock free tables: there to end of file. */ - ecode = ntdb_brlock(ntdb, ltype, - NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits), - 0, flags); - if (ecode != NTDB_SUCCESS) { - ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, - 1 << ntdb->hash_bits); - return ecode; - } - - ntdb->file->allrecord_lock.owner = ntdb; - ntdb->file->allrecord_lock.count = 1; - /* If it's upgradable, it's actually exclusive so we can treat - * it as a write lock. */ - ntdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype; - ntdb->file->allrecord_lock.off = upgradable; - - /* Now check for needing recovery. */ - if (flags & NTDB_LOCK_NOCHECK) - return NTDB_SUCCESS; - - berr = ntdb_needs_recovery(ntdb); - if (likely(berr == false)) - return NTDB_SUCCESS; - - ntdb_allrecord_unlock(ntdb, ltype); - if (berr < 0) - return NTDB_OFF_TO_ERR(berr); - ecode = ntdb_lock_and_recover(ntdb); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - goto again; -} - -enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb, - int ltype, enum ntdb_lock_flags flags) -{ - return ntdb_nest_lock(ntdb, NTDB_OPEN_LOCK, ltype, flags); -} - -void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype) -{ - ntdb_nest_unlock(ntdb, NTDB_OPEN_LOCK, ltype); -} - -bool ntdb_has_open_lock(struct ntdb_context *ntdb) -{ - return !(ntdb->flags & NTDB_NOLOCK) - && find_nestlock(ntdb, NTDB_OPEN_LOCK, ntdb) != NULL; -} - -enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype) -{ - /* Lock doesn't protect data, so don't check (we recurse if we do!) */ - return ntdb_nest_lock(ntdb, NTDB_EXPANSION_LOCK, ltype, - NTDB_LOCK_WAIT | NTDB_LOCK_NOCHECK); -} - -void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype) -{ - ntdb_nest_unlock(ntdb, NTDB_EXPANSION_LOCK, ltype); -} - -/* unlock entire db */ -void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype) -{ - if (ntdb->flags & NTDB_NOLOCK) - return; - - if (ntdb->file->allrecord_lock.count == 0) { - ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "ntdb_allrecord_unlock: not locked!"); - return; - } - - if (ntdb->file->allrecord_lock.owner != ntdb) { - ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "ntdb_allrecord_unlock: not locked by us!"); - return; - } - - /* Upgradable locks are marked as write locks. */ - if (ntdb->file->allrecord_lock.ltype != ltype - && (!ntdb->file->allrecord_lock.off || ltype != F_RDLCK)) { - ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_allrecord_unlock: have %s lock", - ntdb->file->allrecord_lock.ltype == F_RDLCK - ? "read" : "write"); - return; - } - - if (ntdb->file->allrecord_lock.count > 1) { - ntdb->file->allrecord_lock.count--; - return; - } - - ntdb->file->allrecord_lock.count = 0; - ntdb->file->allrecord_lock.ltype = 0; - - ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, 0); -} - -bool ntdb_has_expansion_lock(struct ntdb_context *ntdb) -{ - return find_nestlock(ntdb, NTDB_EXPANSION_LOCK, ntdb) != NULL; -} - -bool ntdb_has_hash_locks(struct ntdb_context *ntdb) -{ - unsigned int i; - - for (i=0; ifile->num_lockrecs; i++) { - if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START - && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START - + (1 << ntdb->hash_bits))) - return true; - } - return false; -} - -static bool ntdb_has_free_lock(struct ntdb_context *ntdb) -{ - unsigned int i; - - if (ntdb->flags & NTDB_NOLOCK) - return false; - - for (i=0; ifile->num_lockrecs; i++) { - if (ntdb->file->lockrecs[i].off - > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)) - return true; - } - return false; -} - -enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb, - unsigned int h, - int ltype) -{ - unsigned l = NTDB_HASH_LOCK_START + h; - - assert(h < (1 << ntdb->hash_bits)); - - /* a allrecord lock allows us to avoid per chain locks */ - if (ntdb->file->allrecord_lock.count) { - if (!check_lock_pid(ntdb, "ntdb_lock_hashes", true)) - return NTDB_ERR_LOCK; - - if (ntdb->file->allrecord_lock.owner != ntdb) - return owner_conflict(ntdb, "ntdb_lock_hashes"); - if (ltype == ntdb->file->allrecord_lock.ltype - || ltype == F_RDLCK) { - return NTDB_SUCCESS; - } - - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "ntdb_lock_hashes:" - " already have %s allrecordlock", - ntdb->file->allrecord_lock.ltype == F_RDLCK - ? "read" : "write"); - } - - if (ntdb_has_free_lock(ntdb)) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_lock_hashes: already have free lock"); - } - - if (ntdb_has_expansion_lock(ntdb)) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_lock_hashes:" - " already have expansion lock"); - } - - return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT); -} - -enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb, - unsigned int h, int ltype) -{ - unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1)); - - if (ntdb->flags & NTDB_NOLOCK) - return 0; - - /* a allrecord lock allows us to avoid per chain locks */ - if (ntdb->file->allrecord_lock.count) { - if (ntdb->file->allrecord_lock.ltype == F_RDLCK - && ltype == F_WRLCK) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_unlock_hashes RO allrecord!"); - } - if (ntdb->file->allrecord_lock.owner != ntdb) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, - "ntdb_unlock_hashes:" - " not locked by us!"); - } - return NTDB_SUCCESS; - } - - return ntdb_nest_unlock(ntdb, l, ltype); -} - -/* Hash locks use NTDB_HASH_LOCK_START + .. - * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide. - * The result is that on 32 bit systems we don't use lock values > 2^31 on - * files that are less than 4GB. - */ -static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb, - ntdb_off_t b_off) -{ - return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits) - + b_off / sizeof(ntdb_off_t); -} - -enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off, - enum ntdb_lock_flags waitflag) -{ - assert(b_off >= sizeof(struct ntdb_header)); - - if (ntdb->flags & NTDB_NOLOCK) - return 0; - - /* a allrecord lock allows us to avoid per chain locks */ - if (ntdb->file->allrecord_lock.count) { - if (!check_lock_pid(ntdb, "ntdb_lock_free_bucket", true)) - return NTDB_ERR_LOCK; - - if (ntdb->file->allrecord_lock.owner != ntdb) { - return owner_conflict(ntdb, "ntdb_lock_free_bucket"); - } - - if (ntdb->file->allrecord_lock.ltype == F_WRLCK) - return 0; - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_lock_free_bucket with" - " read-only allrecordlock!"); - } - -#if 0 /* FIXME */ - if (ntdb_has_expansion_lock(ntdb)) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_lock_free_bucket:" - " already have expansion lock"); - } -#endif - - return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK, - waitflag); -} - -void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off) -{ - if (ntdb->file->allrecord_lock.count) - return; - - ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK); -} - -_PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb) -{ - return ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false); -} - -_PUBLIC_ void ntdb_unlockall(struct ntdb_context *ntdb) -{ - ntdb_allrecord_unlock(ntdb, F_WRLCK); -} - -_PUBLIC_ enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb) -{ - return ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); -} - -_PUBLIC_ void ntdb_unlockall_read(struct ntdb_context *ntdb) -{ - ntdb_allrecord_unlock(ntdb, F_RDLCK); -} - -void ntdb_lock_cleanup(struct ntdb_context *ntdb) -{ - unsigned int i; - - /* We don't want to warn: they're allowed to close ntdb after fork. */ - if (!check_lock_pid(ntdb, "ntdb_close", false)) - return; - - while (ntdb->file->allrecord_lock.count - && ntdb->file->allrecord_lock.owner == ntdb) { - ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype); - } - - for (i=0; ifile->num_lockrecs; i++) { - if (ntdb->file->lockrecs[i].owner == ntdb) { - ntdb_nest_unlock(ntdb, - ntdb->file->lockrecs[i].off, - ntdb->file->lockrecs[i].ltype); - i--; - } - } -} diff --git a/ccan/ntdb/man/ntdb.3.xml b/ccan/ntdb/man/ntdb.3.xml deleted file mode 100644 index 3bac9746..00000000 --- a/ccan/ntdb/man/ntdb.3.xml +++ /dev/null @@ -1,132 +0,0 @@ - - - - - ntdb - 3 - Samba - System Administration tools - 4.1 - - - ntdb -A not-so trivial keyword/data database system - - -#include <ntdb.h> - - DESCRIPTION - - If you have previously used the tdb library from Samba, much of - this will seem familiar, but there are some API changes which a - compiler will warn you about if you simply replace 'tdb' with - 'ntdb' in your code! The on-disk format for ntdb is - incompatible with tdb. - - - tdb's API was based on gdbm, and ntdb continues this tradition, - with enhancements. A differences guide is available in the text - file lib/ntdb/doc/TDB_porting.txt in the - SAMBA source tree. - - - NTDB API OVERVIEW - - The complete API is documented in the ntdb.h header, which is - kept up-to-date and recommended reading. - - - Normal usage is to call ntdb_open() to create or open an ntdb - file. ntdb_store() is used to add records, ntdb_fetch() is used - to fetch them. Traversals are supported via callback - (ntdb_traverse()) or iteration (ntdb_firstkey() and - ntdb_nextkey()). Transactions are supported for batching - updates or reads atomically, using ntdb_transaction_start() and - ntdb_transaction_commit(). - - Use With Talloc - - ntdb_open() takes an optional linked list of attributes: - in particular you can specify an alternate allocator (such as - talloc): - - -#include <talloc.h> -#include <ntdb.h> - -static void *my_alloc(const void *owner, size_t len, void *priv) -{ - return talloc_size(owner, len); -} - -static void *my_expand(void *old, size_t newlen, void *priv) -{ - return talloc_realloc_size(NULL, old, newlen); -} - -static void my_free(void *old, void *priv) -{ - talloc_free(old); -} - -/* This opens an ntdb file as a talloc object with given parent. */ -struct ntdb_context *ntdb_open_talloc(const void *parent, - const char *filename) -{ - struct ntdb_context *ntdb; - union ntdb_attribute alloc; - - alloc.base.attr = NTDB_ATTRIBUTE_ALLOCATOR; - alloc.base.next = NULL; - alloc.alloc.alloc = my_alloc; - alloc.alloc.expand = my_expand; - alloc.alloc.free = my_free; - - ntdb = ntdb_open(filename, NTDB_DEFAULT, O_RDWR|O_CREAT, 0600, - &alloc); - if (ntdb) { - talloc_steal(parent, ntdb); - talloc_set_name(ntdb, "%s", filename); - } - return ntdb; -} - - - - SEE ALSO - - - - - - AUTHOR - The original tdb software was created by Andrew Tridgell, and - is now developed by the - Samba Team as an Open Source project similar to the way the - Linux kernel is developed. ntdb was derived from tdb, but mostly - rewritten by Rusty Russell. - - - - COPYRIGHT/LICENSE - - Copyright (C) Rusty Russell 2013, IBM Corporation - - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as - published by the Free Software Foundation; either version 3 of the - License, or (at your option) any later version. - - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - - You should have received a copy of the GNU General Public License - along with this program; if not, see http://www.gnu.org/licenses/. - - - diff --git a/ccan/ntdb/man/ntdbbackup.8.xml b/ccan/ntdb/man/ntdbbackup.8.xml deleted file mode 100644 index 04ae85f9..00000000 --- a/ccan/ntdb/man/ntdbbackup.8.xml +++ /dev/null @@ -1,150 +0,0 @@ - - - - - - ntdbbackup - 8 - Samba - System Administration tools - 4.1 - - - - - ntdbbackup - tool for backing up and for validating the integrity of samba .ntdb files - - - - - ntdbbackup - -s suffix - -v - -h - - - - - DESCRIPTION - - This tool is part of the samba - 1 suite. - - ntdbbackup is a tool that may be used to backup samba .ntdb - files. This tool may also be used to verify the integrity of the .ntdb files prior - to samba startup or during normal operation. If it finds file damage and it finds - a prior backup the backup file will be restored. - - - - - - OPTIONS - - - - - -h - - Get help information. - - - - - -s suffix - - The -s option allows the administrator to specify a file - backup extension. This way it is possible to keep a history of ntdb backup - files by using a new suffix for each backup. - - - - - -v - - The -v will check the database for damages (corrupt data) - which if detected causes the backup to be restored. - - - - - - - - - COMMANDS - - GENERAL INFORMATION - - - The ntdbbackup utility can safely be run at any time. It was designed so - that it can be used at any time to validate the integrity of ntdb files, even during Samba - operation. Typical usage for the command will be: - - - ntdbbackup [-s suffix] *.ntdb - - - Before restarting samba the following command may be run to validate .ntdb files: - - - ntdbbackup -v [-s suffix] *.ntdb - - - Note that Samba 4 can use .tdb files instead, so you should - use tdbbackup on those files. - - - - Samba .tdb and .ntdb files are stored in various locations, be sure to run backup all - .(n)tdb files on the system. Important files includes: - - - - - secrets.(n)tdb - usual location is in the /usr/local/samba/private - directory, or on some systems in /etc/samba. - - - - passdb.(n)tdb - usual location is in the /usr/local/samba/private - directory, or on some systems in /etc/samba. - - - - *.tdb and *.ntdb located in the /usr/local/samba/var directory or on some - systems in the /var/cache or /var/lib/samba directories. - - - - - - - VERSION - - This man page is correct for version 4 of the Samba suite. - - - - SEE ALSO - - - tdbbackup(8), ntdbrestore(8) - - - - - AUTHOR - - - The original Samba software and related utilities were created by Andrew Tridgell. - Samba is now developed by the Samba Team as an Open Source project similar to the way - the Linux kernel is developed. - - - The ntdbbackup man page was written by Rusty Russell, - based on the tdbbackup man page by John H Terpstra. - - - diff --git a/ccan/ntdb/man/ntdbdump.8.xml b/ccan/ntdb/man/ntdbdump.8.xml deleted file mode 100644 index 45d1be19..00000000 --- a/ccan/ntdb/man/ntdbdump.8.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - - ntdbdump - 8 - Samba - System Administration tools - 4.1 - - - - - ntdbdump - tool for printing the contents of an NTDB file - - - - - ntdbdump - -k keyname - -e - -h - filename - - - - - DESCRIPTION - - This tool is part of the samba - 1 suite. - - ntdbdump is a very simple utility that 'dumps' the - contents of a NTDB (New Trivial DataBase) file to standard output in a - human-readable format. - - - This tool can be used when debugging problems with NTDB files. It is - intended for those who are somewhat familiar with Samba internals. - - - - - OPTIONS - - - - - -h - - Get help information. - - - - - -k keyname - - The -k option restricts dumping to a single key, if found. - - - - - - - - SEE ALSO - - - tdbdump(8), ntdbtool(8) - - - - - VERSION - - This man page is correct for version 4 of the Samba suite. - - - - AUTHOR - - - The original Samba software and related utilities were created by Andrew Tridgell. - Samba is now developed by the Samba Team as an Open Source project similar to the way - the Linux kernel is developed. - - - The ntdbdump man page was written by Rusty Russell, base on the tdbdump man page by Jelmer Vernooij. - - - diff --git a/ccan/ntdb/man/ntdbrestore.8.xml b/ccan/ntdb/man/ntdbrestore.8.xml deleted file mode 100644 index 713aabaa..00000000 --- a/ccan/ntdb/man/ntdbrestore.8.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - ntdbrestore - 8 - Samba - System Administration tools - 4.1 - - - - - ntdbrestore - tool for creating a NTDB file out of a ntdbdump output - - - - - ntdbrestore - ntdbfilename - - - - - DESCRIPTION - - This tool is part of the samba - 1 suite. - - ntdbrestore is a very simple utility that 'restores' the - contents of dump file into NTDB (New Trivial DataBase) file. The dump file is obtained from the ntdbdump or tdbdump - commands. - - - This tool wait on the standard input for the content of the dump and will write the ntdb in the ntdbfilename - parameter. - - This tool can be used to translate between ntdb and tdb files by dumping and restoring. - - - - - - VERSION - - This man page is correct for version 4 of the Samba suite. - - - - SEE ALSO - - - ntdbdump(8), tdbrestore(8) - - - - - AUTHOR - - - The original Samba software and related utilities were created by Andrew Tridgell. - Samba is now developed by the Samba Team as an Open Source project similar to the way - the Linux kernel is developed. - - ntdbrestore was written by Rusty Russell based on tdbrestore, which was initially written by Volker Lendecke based on an - idea by Simon McVittie. - - - The ntdbrestore man page was written by Rusty Russell, based on the tdbrestore man page by Matthieu Patou. - - - diff --git a/ccan/ntdb/man/ntdbtool.8.xml b/ccan/ntdb/man/ntdbtool.8.xml deleted file mode 100644 index 7e6530c4..00000000 --- a/ccan/ntdb/man/ntdbtool.8.xml +++ /dev/null @@ -1,247 +0,0 @@ - - - - - - ntdbtool - 8 - Samba - System Administration tools - 4.1 - - - - - ntdbtool - manipulate the contents NTDB files - - - - - - ntdbtool - - NTDBFILE - - - COMMANDS - - - - - - - DESCRIPTION - - This tool is part of the - samba - 1 suite. - - ntdbtool a tool for displaying and - altering the contents of Samba NTDB (New Trivial DataBase) files. Each - of the commands listed below can be entered interactively or - provided on the command line. - - - - - - COMMANDS - - - - - - NTDBFILE - Create a new database named - NTDBFILE. - - - - - - NTDBFILE - Open an existing database named - NTDBFILE. - - - - - - Erase the current database. - - - - - - Dump the current database as strings. - - - - - - Dump the current database as connection records. - - - - - - Dump the current database keys as strings. - - - - - - Dump the current database keys as hex values. - - - - - - Print summary information about the - current database. - - - - - - KEY - DATA - - Insert a record into the - current database. - - - - - - KEY - NTDBFILE - - Move a record from the - current database into NTDBFILE. - - - - - - KEY - DATA - - Store (replace) a record in the - current database. - - - - - - KEY - - Show a record by key. - - - - - - KEY - - Delete a record by key. - - - - - - - Print the current database hash table and free list. - - - - - - - Print the current database and free list. - - - - - - COMMAND - - Execute the given system command. - - - - - - - - Print the first record in the current database. - - - - - - - - Print the next record in the current database. - - - - - - - - Check the integrity of the current database. - - - - - - - - Repack a database using a temporary file to remove fragmentation. - - - - - - - - Exit ntdbtool. - - - - - - - - SEE ALSO - - - tdbtool(8) - - - - - CAVEATS - The contents of the Samba NTDB files are private - to the implementation and should not be altered with - ntdbtool. - - - - - VERSION - This man page is correct for version 4.0 of the Samba suite. - - - - AUTHOR - - The original Samba software and related utilities were - created by Andrew Tridgell. Samba is now developed by the - Samba Team as an Open Source project similar to the way the - Linux kernel is developed. - - - diff --git a/ccan/ntdb/ntdb.c b/ccan/ntdb/ntdb.c deleted file mode 100644 index 51fbbcae..00000000 --- a/ccan/ntdb/ntdb.c +++ /dev/null @@ -1,601 +0,0 @@ - /* - Trivial Database 2: fetch, store and misc routines. - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#ifndef HAVE_LIBREPLACE -#include -#endif - -static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb, - ntdb_off_t off, - ntdb_len_t keylen, - ntdb_len_t datalen, - struct ntdb_used_record *rec) -{ - uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); - enum NTDB_ERROR ecode; - - ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen, - keylen + dataroom); - if (ecode == NTDB_SUCCESS) { - ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec)); - } - return ecode; -} - -static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb, - struct hash_info *h, - NTDB_DATA key, NTDB_DATA dbuf, - ntdb_off_t old_off, ntdb_len_t old_room, - bool growing) -{ - ntdb_off_t new_off; - enum NTDB_ERROR ecode; - - /* Allocate a new record. */ - new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing); - if (NTDB_OFF_IS_ERR(new_off)) { - return NTDB_OFF_TO_ERR(new_off); - } - - /* We didn't like the existing one: remove it. */ - if (old_off) { - ntdb->stats.frees++; - ecode = add_free_record(ntdb, old_off, - sizeof(struct ntdb_used_record) - + key.dsize + old_room, - NTDB_LOCK_WAIT, true); - if (ecode == NTDB_SUCCESS) - ecode = replace_in_hash(ntdb, h, new_off); - } else { - ecode = add_to_hash(ntdb, h, new_off); - } - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - new_off += sizeof(struct ntdb_used_record); - ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - new_off += key.dsize; - ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - if (ntdb->flags & NTDB_SEQNUM) - ntdb_inc_seqnum(ntdb); - - return NTDB_SUCCESS; -} - -static enum NTDB_ERROR update_data(struct ntdb_context *ntdb, - ntdb_off_t off, - NTDB_DATA dbuf, - ntdb_len_t extra) -{ - enum NTDB_ERROR ecode; - - ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize); - if (ecode == NTDB_SUCCESS && extra) { - /* Put a zero in; future versions may append other data. */ - ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1); - } - if (ntdb->flags & NTDB_SEQNUM) - ntdb_inc_seqnum(ntdb); - - return ecode; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, - NTDB_DATA key, NTDB_DATA dbuf, int flag) -{ - struct hash_info h; - ntdb_off_t off; - ntdb_len_t old_room = 0; - struct ntdb_used_record rec; - enum NTDB_ERROR ecode; - - off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - - /* Now we have lock on this hash bucket. */ - if (flag == NTDB_INSERT) { - if (off) { - ecode = NTDB_ERR_EXISTS; - goto out; - } - } else { - if (off) { - old_room = rec_data_length(&rec) - + rec_extra_padding(&rec); - if (old_room >= dbuf.dsize) { - /* Can modify in-place. Easy! */ - ecode = update_rec_hdr(ntdb, off, - key.dsize, dbuf.dsize, - &rec); - if (ecode != NTDB_SUCCESS) { - goto out; - } - ecode = update_data(ntdb, - off + sizeof(rec) - + key.dsize, dbuf, - old_room - dbuf.dsize); - if (ecode != NTDB_SUCCESS) { - goto out; - } - ntdb_unlock_hash(ntdb, h.h, F_WRLCK); - return NTDB_SUCCESS; - } - } else { - if (flag == NTDB_MODIFY) { - /* if the record doesn't exist and we - are in NTDB_MODIFY mode then we should fail - the store */ - ecode = NTDB_ERR_NOEXIST; - goto out; - } - } - } - - /* If we didn't use the old record, this implies we're growing. */ - ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off); -out: - ntdb_unlock_hash(ntdb, h.h, F_WRLCK); - return ecode; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, - NTDB_DATA key, NTDB_DATA dbuf) -{ - struct hash_info h; - ntdb_off_t off; - struct ntdb_used_record rec; - ntdb_len_t old_room = 0, old_dlen; - unsigned char *newdata; - NTDB_DATA new_dbuf; - enum NTDB_ERROR ecode; - - off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - - if (off) { - old_dlen = rec_data_length(&rec); - old_room = old_dlen + rec_extra_padding(&rec); - - /* Fast path: can append in place. */ - if (rec_extra_padding(&rec) >= dbuf.dsize) { - ecode = update_rec_hdr(ntdb, off, key.dsize, - old_dlen + dbuf.dsize, &rec); - if (ecode != NTDB_SUCCESS) { - goto out; - } - - off += sizeof(rec) + key.dsize + old_dlen; - ecode = update_data(ntdb, off, dbuf, - rec_extra_padding(&rec)); - goto out; - } - - /* Slow path. */ - newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize, - ntdb->alloc_data); - if (!newdata) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_append:" - " failed to allocate %zu bytes", - (size_t)(key.dsize + old_dlen - + dbuf.dsize)); - goto out; - } - ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize, - newdata, old_dlen); - if (ecode != NTDB_SUCCESS) { - goto out_free_newdata; - } - memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize); - new_dbuf.dptr = newdata; - new_dbuf.dsize = old_dlen + dbuf.dsize; - } else { - newdata = NULL; - new_dbuf = dbuf; - } - - /* If they're using ntdb_append(), it implies they're growing record. */ - ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true); - -out_free_newdata: - ntdb->free_fn(newdata, ntdb->alloc_data); -out: - ntdb_unlock_hash(ntdb, h.h, F_WRLCK); - return ecode; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key, - NTDB_DATA *data) -{ - ntdb_off_t off; - struct ntdb_used_record rec; - struct hash_info h; - enum NTDB_ERROR ecode; - const char *keyp; - - off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - - if (!off) { - ecode = NTDB_ERR_NOEXIST; - } else { - data->dsize = rec_data_length(&rec); - data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data); - if (unlikely(!data->dptr)) { - ecode = NTDB_ERR_OOM; - } else { - memcpy(data->dptr, keyp + key.dsize, data->dsize); - ecode = NTDB_SUCCESS; - } - ntdb_access_release(ntdb, keyp); - } - - ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - return ecode; -} - -_PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key) -{ - ntdb_off_t off; - struct ntdb_used_record rec; - struct hash_info h; - - off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL); - if (NTDB_OFF_IS_ERR(off)) { - return false; - } - ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - - return off ? true : false; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key) -{ - ntdb_off_t off; - struct ntdb_used_record rec; - struct hash_info h; - enum NTDB_ERROR ecode; - - off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - - if (!off) { - ecode = NTDB_ERR_NOEXIST; - goto unlock; - } - - ecode = delete_from_hash(ntdb, &h); - if (ecode != NTDB_SUCCESS) { - goto unlock; - } - - /* Free the deleted entry. */ - ntdb->stats.frees++; - ecode = add_free_record(ntdb, off, - sizeof(struct ntdb_used_record) - + rec_key_length(&rec) - + rec_data_length(&rec) - + rec_extra_padding(&rec), - NTDB_LOCK_WAIT, true); - - if (ntdb->flags & NTDB_SEQNUM) - ntdb_inc_seqnum(ntdb); - -unlock: - ntdb_unlock_hash(ntdb, h.h, F_WRLCK); - return ecode; -} - -_PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb) -{ - return ntdb->flags; -} - -static bool inside_transaction(const struct ntdb_context *ntdb) -{ - return ntdb->transaction != NULL; -} - -static bool readonly_changable(struct ntdb_context *ntdb, const char *caller) -{ - if (inside_transaction(ntdb)) { - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "%s: can't change" - " NTDB_RDONLY inside transaction", - caller); - return false; - } - return true; -} - -_PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag) -{ - if (ntdb->flags & NTDB_INTERNAL) { - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_add_flag: internal db"); - return; - } - switch (flag) { - case NTDB_NOLOCK: - ntdb->flags |= NTDB_NOLOCK; - break; - case NTDB_NOMMAP: - if (ntdb->file->direct_count) { - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_add_flag: Can't get NTDB_NOMMAP from" - " ntdb_parse_record!"); - return; - } - ntdb->flags |= NTDB_NOMMAP; -#ifndef HAVE_INCOHERENT_MMAP - ntdb_munmap(ntdb); -#endif - break; - case NTDB_NOSYNC: - ntdb->flags |= NTDB_NOSYNC; - break; - case NTDB_SEQNUM: - ntdb->flags |= NTDB_SEQNUM; - break; - case NTDB_ALLOW_NESTING: - ntdb->flags |= NTDB_ALLOW_NESTING; - break; - case NTDB_RDONLY: - if (readonly_changable(ntdb, "ntdb_add_flag")) - ntdb->flags |= NTDB_RDONLY; - break; - default: - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_add_flag: Unknown flag %u", flag); - } -} - -_PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag) -{ - if (ntdb->flags & NTDB_INTERNAL) { - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_remove_flag: internal db"); - return; - } - switch (flag) { - case NTDB_NOLOCK: - ntdb->flags &= ~NTDB_NOLOCK; - break; - case NTDB_NOMMAP: - ntdb->flags &= ~NTDB_NOMMAP; -#ifndef HAVE_INCOHERENT_MMAP - /* If mmap incoherent, we were mmaping anyway. */ - ntdb_mmap(ntdb); -#endif - break; - case NTDB_NOSYNC: - ntdb->flags &= ~NTDB_NOSYNC; - break; - case NTDB_SEQNUM: - ntdb->flags &= ~NTDB_SEQNUM; - break; - case NTDB_ALLOW_NESTING: - ntdb->flags &= ~NTDB_ALLOW_NESTING; - break; - case NTDB_RDONLY: - if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) { - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_remove_flag: can't" - " remove NTDB_RDONLY on ntdb" - " opened with O_RDONLY"); - break; - } - if (readonly_changable(ntdb, "ntdb_remove_flag")) - ntdb->flags &= ~NTDB_RDONLY; - break; - default: - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_remove_flag: Unknown flag %u", - flag); - } -} - -_PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode) -{ - /* Gcc warns if you miss a case in the switch, so use that. */ - switch (NTDB_ERR_TO_OFF(ecode)) { - case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success"; - case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database"; - case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error"; - case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error"; - case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory"; - case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists"; - case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter"; - case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist"; - case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted"; - } - return "Invalid error code"; -} - -enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb, - enum NTDB_ERROR ecode, - enum ntdb_log_level level, - const char *fmt, ...) -{ - char *message; - va_list ap; - size_t len; - /* ntdb_open paths care about errno, so save it. */ - int saved_errno = errno; - - if (!ntdb->log_fn) - return ecode; - - va_start(ap, fmt); - len = vsnprintf(NULL, 0, fmt, ap); - va_end(ap); - - message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data); - if (!message) { - ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM, - "out of memory formatting message:", ntdb->log_data); - ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data); - } else { - va_start(ap, fmt); - vsnprintf(message, len+1, fmt, ap); - va_end(ap); - ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data); - ntdb->free_fn(message, ntdb->alloc_data); - } - errno = saved_errno; - return ecode; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb, - NTDB_DATA key, - enum NTDB_ERROR (*parse)(NTDB_DATA k, - NTDB_DATA d, - void *data), - void *data) -{ - ntdb_off_t off; - struct ntdb_used_record rec; - struct hash_info h; - enum NTDB_ERROR ecode; - const char *keyp; - - off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - - if (!off) { - ecode = NTDB_ERR_NOEXIST; - } else { - unsigned int old_flags; - NTDB_DATA d = ntdb_mkdata(keyp + key.dsize, - rec_data_length(&rec)); - - /* - * Make sure they don't try to write db, since they - * have read lock! They can if they've done - * ntdb_lockall(): if it was ntdb_lockall_read, that'll - * stop them doing a write operation anyway. - */ - old_flags = ntdb->flags; - if (!ntdb->file->allrecord_lock.count && - !(ntdb->flags & NTDB_NOLOCK)) { - ntdb->flags |= NTDB_RDONLY; - } - ecode = parse(key, d, data); - ntdb->flags = old_flags; - ntdb_access_release(ntdb, keyp); - } - - ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - return ecode; -} - -_PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb) -{ - return ntdb->name; -} - -_PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb) -{ - return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum)); -} - - -_PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb) -{ - return ntdb->file->fd; -} - -struct traverse_state { - enum NTDB_ERROR error; - struct ntdb_context *dest_db; -}; - -/* - traverse function for repacking - */ -static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data, - struct traverse_state *state) -{ - state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT); - if (state->error != NTDB_SUCCESS) { - return -1; - } - return 0; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb) -{ - struct ntdb_context *tmp_db; - struct traverse_state state; - - state.error = ntdb_transaction_start(ntdb); - if (state.error != NTDB_SUCCESS) { - return state.error; - } - - tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL); - if (tmp_db == NULL) { - state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - __location__ - " Failed to create tmp_db"); - ntdb_transaction_cancel(ntdb); - return state.error; - } - - state.dest_db = tmp_db; - if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) { - goto fail; - } - - state.error = ntdb_wipe_all(ntdb); - if (state.error != NTDB_SUCCESS) { - goto fail; - } - - state.dest_db = ntdb; - if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) { - goto fail; - } - - ntdb_close(tmp_db); - return ntdb_transaction_commit(ntdb); - -fail: - ntdb_transaction_cancel(ntdb); - ntdb_close(tmp_db); - return state.error; -} diff --git a/ccan/ntdb/ntdb.h b/ccan/ntdb/ntdb.h deleted file mode 100644 index a0c229c1..00000000 --- a/ccan/ntdb/ntdb.h +++ /dev/null @@ -1,933 +0,0 @@ -#ifndef CCAN_NTDB_H -#define CCAN_NTDB_H - -/* - NTDB: trivial database library version 2 - - Copyright (C) Andrew Tridgell 1999-2004 - Copyright (C) Rusty Russell 2010-2012 - - ** NOTE! The following LGPL license applies to the ntdb - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef HAVE_LIBREPLACE -#include -#include -#else -#include "config.h" -#if HAVE_FILE_OFFSET_BITS -#define _FILE_OFFSET_BITS 64 -#endif - -#ifndef _PUBLIC_ -#ifdef HAVE_VISIBILITY_ATTR -#define _PUBLIC_ __attribute__((visibility("default"))) -#else -#define _PUBLIC_ -#endif -#endif - -/* For mode_t */ -#include -/* For O_* flags. */ -#include -/* For sig_atomic_t. */ -#include -/* For uint64_t */ -#include -/* For bool */ -#include -/* For memcmp */ -#include -#endif - -#include -#include -#include - -union ntdb_attribute; -struct ntdb_context; - -/** - * struct TDB_DATA - (n)tdb data blob - * - * To ease compatibility, we use 'struct TDB_DATA' from tdb.h, so if - * you want to include both tdb.h and ntdb.h, you need to #include - * tdb.h first. - */ -#ifndef __TDB_H__ -struct TDB_DATA { - unsigned char *dptr; - size_t dsize; -}; -#endif - -typedef struct TDB_DATA NTDB_DATA; - -/** - * ntdb_open - open a database file - * @name: the file name (or database name if flags contains NTDB_INTERNAL) - * @ntdb_flags: options for this database - * @open_flags: flags argument for ntdb's open() call. - * @mode: mode argument for ntdb's open() call. - * @attributes: linked list of extra attributes for this ntdb. - * - * This call opens (and potentially creates) a database file. - * Multiple processes can have the NTDB file open at once. - * - * On failure it will return NULL, and set errno: it may also call - * any log attribute found in @attributes. - * - * See also: - * union ntdb_attribute - */ -struct ntdb_context *ntdb_open(const char *name, int ntdb_flags, - int open_flags, mode_t mode, - union ntdb_attribute *attributes); - - -/* flags for ntdb_open() */ -#define NTDB_DEFAULT 0 /* just a readability place holder */ -#define NTDB_INTERNAL 2 /* don't store on disk */ -#define NTDB_NOLOCK 4 /* don't do any locking */ -#define NTDB_NOMMAP 8 /* don't use mmap */ -#define NTDB_CONVERT 16 /* convert endian */ -#define NTDB_NOSYNC 64 /* don't use synchronous transactions */ -#define NTDB_SEQNUM 128 /* maintain a sequence number */ -#define NTDB_ALLOW_NESTING 256 /* fake nested transactions */ -#define NTDB_RDONLY 512 /* implied by O_RDONLY */ -#define NTDB_CANT_CHECK 2048 /* has a feature which we don't understand */ - -/** - * ntdb_close - close and free a ntdb. - * @ntdb: the ntdb context returned from ntdb_open() - * - * This always succeeds, in that @ntdb is unusable after this call. But if - * some unexpected error occurred while closing, it will return non-zero - * (the only clue as to cause will be via the log attribute). - */ -int ntdb_close(struct ntdb_context *ntdb); - -/** - * enum NTDB_ERROR - error returns for NTDB - * - * See Also: - * ntdb_errorstr() - */ -enum NTDB_ERROR { - NTDB_SUCCESS = 0, /* No error. */ - NTDB_ERR_CORRUPT = -1, /* We read the db, and it was bogus. */ - NTDB_ERR_IO = -2, /* We couldn't read/write the db. */ - NTDB_ERR_LOCK = -3, /* Locking failed. */ - NTDB_ERR_OOM = -4, /* Out of Memory. */ - NTDB_ERR_EXISTS = -5, /* The key already exists. */ - NTDB_ERR_NOEXIST = -6, /* The key does not exist. */ - NTDB_ERR_EINVAL = -7, /* You're using it wrong. */ - NTDB_ERR_RDONLY = -8, /* The database is read-only. */ - NTDB_ERR_LAST = NTDB_ERR_RDONLY -}; - -/** - * ntdb_store - store a key/value pair in a ntdb. - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key - * @dbuf: the data to associate with the key. - * @flag: NTDB_REPLACE, NTDB_INSERT or NTDB_MODIFY. - * - * This inserts (or overwrites) a key/value pair in the NTDB. If flag - * is NTDB_REPLACE, it doesn't matter whether the key exists or not; - * NTDB_INSERT means it must not exist (returns NTDB_ERR_EXISTS otherwise), - * and NTDB_MODIFY means it must exist (returns NTDB_ERR_NOEXIST otherwise). - * - * On success, this returns NTDB_SUCCESS. - * - * See also: - * ntdb_fetch, ntdb_transaction_start, ntdb_append, ntdb_delete. - */ -enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, - NTDB_DATA key, - NTDB_DATA dbuf, - int flag); - -/* flags to ntdb_store() */ -#define NTDB_REPLACE 1 /* A readability place holder */ -#define NTDB_INSERT 2 /* Don't overwrite an existing entry */ -#define NTDB_MODIFY 3 /* Don't create an existing entry */ - -/** - * ntdb_fetch - fetch a value from a ntdb. - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key - * @data: pointer to data. - * - * This looks up a key in the database and sets it in @data. - * - * If it returns NTDB_SUCCESS, the key was found: it is your - * responsibility to call free() on @data->dptr. - * - * Otherwise, it returns an error (usually, NTDB_ERR_NOEXIST) and @data is - * undefined. - */ -enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key, - NTDB_DATA *data); - -/** - * ntdb_errorstr - map the ntdb error onto a constant readable string - * @ecode: the enum NTDB_ERROR to map. - * - * This is useful for displaying errors to users. - */ -const char *ntdb_errorstr(enum NTDB_ERROR ecode); - -/** - * ntdb_append - append a value to a key/value pair in a ntdb. - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key - * @dbuf: the data to append. - * - * This is equivalent to fetching a record, reallocating .dptr to add the - * data, and writing it back, only it's much more efficient. If the key - * doesn't exist, it's equivalent to ntdb_store (with an additional hint that - * you expect to expand the record in future). - * - * See Also: - * ntdb_fetch(), ntdb_store() - */ -enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, - NTDB_DATA key, NTDB_DATA dbuf); - -/** - * ntdb_delete - delete a key from a ntdb. - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key to delete. - * - * Returns NTDB_SUCCESS on success, or an error (usually NTDB_ERR_NOEXIST). - * - * See Also: - * ntdb_fetch(), ntdb_store() - */ -enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key); - -/** - * ntdb_exists - does a key exist in the database? - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key to search for. - * - * Returns true if it exists, or false if it doesn't or any other error. - */ -bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key); - -/** - * ntdb_deq - are NTDB_DATA equal? - * @a: one NTDB_DATA - * @b: another NTDB_DATA - */ -static inline bool ntdb_deq(NTDB_DATA a, NTDB_DATA b) -{ - return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0; -} - -/** - * ntdb_mkdata - make a NTDB_DATA from const data - * @p: the constant pointer - * @len: the length - * - * As the dptr member of NTDB_DATA is not constant, you need to - * cast it. This function keeps thost casts in one place, as well as - * suppressing the warning some compilers give when casting away a - * qualifier (eg. gcc with -Wcast-qual) - */ -static inline NTDB_DATA ntdb_mkdata(const void *p, size_t len) -{ - NTDB_DATA d; - d.dptr = cast_const(void *, p); - d.dsize = len; - return d; -} - -/** - * ntdb_transaction_start - start a transaction - * @ntdb: the ntdb context returned from ntdb_open() - * - * This begins a series of atomic operations. Other processes will be able - * to read the ntdb, but not alter it (they will block), nor will they see - * any changes until ntdb_transaction_commit() is called. - * - * Note that if the NTDB_ALLOW_NESTING flag is set, a ntdb_transaction_start() - * within a transaction will succeed, but it's not a real transaction: - * (1) An inner transaction which is committed is not actually committed until - * the outer transaction is; if the outer transaction is cancelled, the - * inner ones are discarded. - * (2) ntdb_transaction_cancel() marks the outer transaction as having an error, - * so the final ntdb_transaction_commit() will fail. - * (3) the outer transaction will see the results of the inner transaction. - * - * See Also: - * ntdb_transaction_cancel, ntdb_transaction_commit. - */ -enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb); - -/** - * ntdb_transaction_cancel - abandon a transaction - * @ntdb: the ntdb context returned from ntdb_open() - * - * This aborts a transaction, discarding any changes which were made. - * ntdb_close() does this implicitly. - */ -void ntdb_transaction_cancel(struct ntdb_context *ntdb); - -/** - * ntdb_transaction_commit - commit a transaction - * @ntdb: the ntdb context returned from ntdb_open() - * - * This completes a transaction, writing any changes which were made. - * - * fsync() is used to commit the transaction (unless NTDB_NOSYNC is set), - * making it robust against machine crashes, but very slow compared to - * other NTDB operations. - * - * A failure can only be caused by unexpected errors (eg. I/O or - * memory); this is no point looping on transaction failure. - * - * See Also: - * ntdb_transaction_prepare_commit() - */ -enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb); - -/** - * ntdb_transaction_prepare_commit - prepare to commit a transaction - * @ntdb: the ntdb context returned from ntdb_open() - * - * This ensures we have the resources to commit a transaction (using - * ntdb_transaction_commit): if this succeeds then a transaction will only - * fail if the write() or fsync() calls fail. - * - * If this fails you must still call ntdb_transaction_cancel() to cancel - * the transaction. - * - * See Also: - * ntdb_transaction_commit() - */ -enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb); - -/** - * ntdb_traverse - traverse a NTDB - * @ntdb: the ntdb context returned from ntdb_open() - * @fn: the function to call for every key/value pair (or NULL) - * @p: the pointer to hand to @f - * - * This walks the NTDB until all they keys have been traversed, or @fn - * returns non-zero. If the traverse function or other processes are - * changing data or adding or deleting keys, the traverse may be - * unreliable: keys may be skipped or (rarely) visited twice. - * - * There is one specific exception: the special case of deleting the - * current key does not undermine the reliability of the traversal. - * - * On success, returns the number of keys iterated. On error returns - * a negative enum NTDB_ERROR value. - */ -#define ntdb_traverse(ntdb, fn, p) \ - ntdb_traverse_(ntdb, typesafe_cb_preargs(int, void *, (fn), (p), \ - struct ntdb_context *, \ - NTDB_DATA, NTDB_DATA), (p)) - -int64_t ntdb_traverse_(struct ntdb_context *ntdb, - int (*fn)(struct ntdb_context *, - NTDB_DATA, NTDB_DATA, void *), void *p); - -/** - * ntdb_parse_record - operate directly on data in the database. - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key whose record we should hand to @parse - * @parse: the function to call for the data - * @data: the private pointer to hand to @parse (types must match). - * - * This avoids a copy for many cases, by handing you a pointer into - * the memory-mapped database. It also locks the record to prevent - * other accesses at the same time, so it won't change. - * - * Within the @parse callback you can perform read operations on the - * database, but no write operations: no ntdb_store() or - * ntdb_delete(), for example. The exception is if you call - * ntdb_lockall() before ntdb_parse_record(). - * - * Never alter the data handed to parse()! - */ -#define ntdb_parse_record(ntdb, key, parse, data) \ - ntdb_parse_record_((ntdb), (key), \ - typesafe_cb_preargs(enum NTDB_ERROR, void *, \ - (parse), (data), \ - NTDB_DATA, NTDB_DATA), (data)) - -enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb, - NTDB_DATA key, - enum NTDB_ERROR (*parse)(NTDB_DATA k, - NTDB_DATA d, - void *data), - void *data); - -/** - * ntdb_get_seqnum - get a database sequence number - * @ntdb: the ntdb context returned from ntdb_open() - * - * This returns a sequence number: any change to the database from a - * ntdb context opened with the NTDB_SEQNUM flag will cause that number - * to increment. Note that the incrementing is unreliable (it is done - * without locking), so this is only useful as an optimization. - * - * For example, you may have a regular database backup routine which - * does not operate if the sequence number is unchanged. In the - * unlikely event of a failed increment, it will be backed up next - * time any way. - * - * Returns an enum NTDB_ERROR (ie. negative) on error. - */ -int64_t ntdb_get_seqnum(struct ntdb_context *ntdb); - -/** - * ntdb_firstkey - get the "first" key in a NTDB - * @ntdb: the ntdb context returned from ntdb_open() - * @key: pointer to key. - * - * This returns an arbitrary key in the database; with ntdb_nextkey() it allows - * open-coded traversal of the database, though it is slightly less efficient - * than ntdb_traverse. - * - * It is your responsibility to free @key->dptr on success. - * - * Returns NTDB_ERR_NOEXIST if the database is empty. - */ -enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key); - -/** - * ntdb_nextkey - get the "next" key in a NTDB - * @ntdb: the ntdb context returned from ntdb_open() - * @key: a key returned by ntdb_firstkey() or ntdb_nextkey(). - * - * This returns another key in the database; it will free @key.dptr for - * your convenience. - * - * Returns NTDB_ERR_NOEXIST if there are no more keys. - */ -enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key); - -/** - * ntdb_chainlock - lock a record in the NTDB - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key to lock. - * - * This prevents any access occurring to a group of keys including @key, - * even if @key does not exist. This allows primitive atomic updates of - * records without using transactions. - * - * You cannot begin a transaction while holding a ntdb_chainlock(), nor can - * you do any operations on any other keys in the database. This also means - * that you cannot hold more than one ntdb_chainlock() at a time. - * - * See Also: - * ntdb_chainunlock() - */ -enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key); - -/** - * ntdb_chainunlock - unlock a record in the NTDB - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key to unlock. - * - * The key must have previously been locked by ntdb_chainlock(). - */ -void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key); - -/** - * ntdb_chainlock_read - lock a record in the NTDB, for reading - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key to lock. - * - * This prevents any changes from occurring to a group of keys including @key, - * even if @key does not exist. This allows primitive atomic updates of - * records without using transactions. - * - * You cannot begin a transaction while holding a ntdb_chainlock_read(), nor can - * you do any operations on any other keys in the database. This also means - * that you cannot hold more than one ntdb_chainlock()/read() at a time. - * - * See Also: - * ntdb_chainlock() - */ -enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key); - -/** - * ntdb_chainunlock_read - unlock a record in the NTDB for reading - * @ntdb: the ntdb context returned from ntdb_open() - * @key: the key to unlock. - * - * The key must have previously been locked by ntdb_chainlock_read(). - */ -void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key); - -/** - * ntdb_lockall - lock the entire NTDB - * @ntdb: the ntdb context returned from ntdb_open() - * - * You cannot hold a ntdb_chainlock while calling this. It nests, so you - * must call ntdb_unlockall as many times as you call ntdb_lockall. - */ -enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb); - -/** - * ntdb_unlockall - unlock the entire NTDB - * @ntdb: the ntdb context returned from ntdb_open() - */ -void ntdb_unlockall(struct ntdb_context *ntdb); - -/** - * ntdb_lockall_read - lock the entire NTDB for reading - * @ntdb: the ntdb context returned from ntdb_open() - * - * This prevents others writing to the database, eg. ntdb_delete, ntdb_store, - * ntdb_append, but not ntdb_fetch. - * - * You cannot hold a ntdb_chainlock while calling this. It nests, so you - * must call ntdb_unlockall_read as many times as you call ntdb_lockall_read. - */ -enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb); - -/** - * ntdb_unlockall_read - unlock the entire NTDB for reading - * @ntdb: the ntdb context returned from ntdb_open() - */ -void ntdb_unlockall_read(struct ntdb_context *ntdb); - -/** - * ntdb_wipe_all - wipe the database clean - * @ntdb: the ntdb context returned from ntdb_open() - * - * Completely erase the database. This is faster than iterating through - * each key and doing ntdb_delete. - */ -enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb); - -/** - * ntdb_repack - repack the database - * @ntdb: the ntdb context returned from ntdb_open() - * - * This repacks the database; if it is suffering from a great deal of - * fragmentation this might help. However, it can take twice the - * memory of the existing NTDB. - */ -enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb); - -/** - * ntdb_check - check a NTDB for consistency - * @ntdb: the ntdb context returned from ntdb_open() - * @check: function to check each key/data pair (or NULL) - * @data: argument for @check, must match type. - * - * This performs a consistency check of the open database, optionally calling - * a check() function on each record so you can do your own data consistency - * checks as well. If check() returns an error, that is returned from - * ntdb_check(). - * - * Note that the NTDB uses a feature which we don't understand which - * indicates we can't run ntdb_check(), this will log a warning to that - * effect and return NTDB_SUCCESS. You can detect this condition by - * looking for NTDB_CANT_CHECK in ntdb_get_flags(). - * - * Returns NTDB_SUCCESS or an error. - */ -#define ntdb_check(ntdb, check, data) \ - ntdb_check_((ntdb), typesafe_cb_preargs(enum NTDB_ERROR, void *, \ - (check), (data), \ - NTDB_DATA, \ - NTDB_DATA), \ - (data)) - -enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb, - enum NTDB_ERROR (*check)(NTDB_DATA k, - NTDB_DATA d, - void *data), - void *data); - -/** - * enum ntdb_summary_flags - flags for ntdb_summary. - */ -enum ntdb_summary_flags { - NTDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */ -}; - -/** - * ntdb_summary - return a string describing the NTDB state - * @ntdb: the ntdb context returned from ntdb_open() - * @flags: flags to control the summary output. - * @summary: pointer to string to allocate. - * - * This returns a developer-readable string describing the overall - * state of the ntdb, such as the percentage used and sizes of records. - * It is designed to provide information about the ntdb at a glance - * without displaying any keys or data in the database. - * - * On success, sets @summary to point to a malloc()'ed nul-terminated - * multi-line string. It is your responsibility to free() it. - */ -enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, - enum ntdb_summary_flags flags, - char **summary); - - -/** - * ntdb_get_flags - return the flags for a ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * - * This returns the flags on the current ntdb. Some of these are caused by - * the flags argument to ntdb_open(), others (such as NTDB_CONVERT) are - * intuited. - */ -unsigned int ntdb_get_flags(struct ntdb_context *ntdb); - -/** - * ntdb_add_flag - set a flag for a ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING. - * - * You can use this to set a flag on the NTDB. You cannot set these flags - * on a NTDB_INTERNAL ntdb. - */ -void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag); - -/** - * ntdb_remove_flag - unset a flag for a ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING. - * - * You can use this to clear a flag on the NTDB. You cannot clear flags - * on a NTDB_INTERNAL ntdb. - */ -void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag); - -/** - * enum ntdb_attribute_type - descriminator for union ntdb_attribute. - */ -enum ntdb_attribute_type { - NTDB_ATTRIBUTE_LOG = 0, - NTDB_ATTRIBUTE_HASH = 1, - NTDB_ATTRIBUTE_SEED = 2, - NTDB_ATTRIBUTE_STATS = 3, - NTDB_ATTRIBUTE_OPENHOOK = 4, - NTDB_ATTRIBUTE_FLOCK = 5, - NTDB_ATTRIBUTE_ALLOCATOR = 6, - NTDB_ATTRIBUTE_HASHSIZE = 7 -}; - -/** - * ntdb_get_attribute - get an attribute for an existing ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * @attr: the union ntdb_attribute to set. - * - * This gets an attribute from a NTDB which has previously been set (or - * may return the default values). Set @attr.base.attr to the - * attribute type you want get. - */ -enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb, - union ntdb_attribute *attr); - -/** - * ntdb_set_attribute - set an attribute for an existing ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * @attr: the union ntdb_attribute to set. - * - * This sets an attribute on a NTDB, overriding any previous attribute - * of the same type. It returns NTDB_ERR_EINVAL if the attribute is - * unknown or invalid. - * - * Note that NTDB_ATTRIBUTE_HASH, NTDB_ATTRIBUTE_SEED, and - * NTDB_ATTRIBUTE_OPENHOOK cannot currently be set after ntdb_open. - */ -enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb, - const union ntdb_attribute *attr); - -/** - * ntdb_unset_attribute - reset an attribute for an existing ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * @type: the attribute type to unset. - * - * This unsets an attribute on a NTDB, returning it to the defaults - * (where applicable). - * - * Note that it only makes sense for NTDB_ATTRIBUTE_LOG and NTDB_ATTRIBUTE_FLOCK - * to be unset. - */ -void ntdb_unset_attribute(struct ntdb_context *ntdb, - enum ntdb_attribute_type type); - -/** - * ntdb_name - get the name of a ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * - * This returns a copy of the name string, made at ntdb_open() time. - * - * This is mostly useful for logging. - */ -const char *ntdb_name(const struct ntdb_context *ntdb); - -/** - * ntdb_fd - get the file descriptor of a ntdb - * @ntdb: the ntdb context returned from ntdb_open() - * - * This returns the file descriptor for the underlying database file, or -1 - * for NTDB_INTERNAL. - */ -int ntdb_fd(const struct ntdb_context *ntdb); - -/** - * ntdb_foreach - iterate through every open NTDB. - * @fn: the function to call for every NTDB - * @p: the pointer to hand to @fn - * - * NTDB internally keeps track of all open TDBs; this function allows you to - * iterate through them. If @fn returns non-zero, traversal stops. - */ -#define ntdb_foreach(fn, p) \ - ntdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p), \ - struct ntdb_context *), (p)) - -void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p); - -/** - * struct ntdb_attribute_base - common fields for all ntdb attributes. - */ -struct ntdb_attribute_base { - enum ntdb_attribute_type attr; - union ntdb_attribute *next; -}; - -/** - * enum ntdb_log_level - log levels for ntdb_attribute_log - * @NTDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors - * or internal consistency failures. - * @NTDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters - * or writing to a read-only database. - * @NTDB_LOG_WARNING: used for informational messages on issues which - * are unusual but handled by NTDB internally, such - * as a failure to mmap or failure to open /dev/urandom. - * It's also used when ntdb_open() fails without O_CREAT - * because a file does not exist. - */ -enum ntdb_log_level { - NTDB_LOG_ERROR, - NTDB_LOG_USE_ERROR, - NTDB_LOG_WARNING -}; - -/** - * struct ntdb_attribute_log - log function attribute - * - * This attribute provides a hook for you to log errors. - */ -struct ntdb_attribute_log { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_LOG */ - void (*fn)(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data); - void *data; -}; - -/** - * struct ntdb_attribute_hash - hash function attribute - * - * This attribute allows you to provide an alternative hash function. - * This hash function will be handed keys from the database; it will also - * be handed the 8-byte NTDB_HASH_MAGIC value for checking the header (the - * ntdb_open() will fail if the hash value doesn't match the header). - * - * Note that if your hash function gives different results on - * different machine endians, your ntdb will no longer work across - * different architectures! - */ -struct ntdb_attribute_hash { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */ - uint32_t (*fn)(const void *key, size_t len, uint32_t seed, - void *data); - void *data; -}; - -/** - * struct ntdb_attribute_seed - hash function seed attribute - * - * The hash function seed is normally taken from /dev/urandom (or equivalent) - * but can be set manually here. This is mainly for testing purposes. - */ -struct ntdb_attribute_seed { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_SEED */ - uint64_t seed; -}; - -/** - * struct ntdb_attribute_stats - ntdb operational statistics - * - * This attribute records statistics of various low-level NTDB operations. - * This can be used to assist performance evaluation. This is only - * useful for ntdb_get_attribute(). - * - * New fields will be added at the end, hence the "size" argument which - * indicates how large your structure is: it must be filled in before - * calling ntdb_get_attribute(), which will overwrite it with the size - * ntdb knows about. - */ -struct ntdb_attribute_stats { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_STATS */ - size_t size; /* = sizeof(struct ntdb_attribute_stats) */ - uint64_t allocs; - uint64_t alloc_subhash; - uint64_t alloc_chain; - uint64_t alloc_bucket_exact; - uint64_t alloc_bucket_max; - uint64_t alloc_leftover; - uint64_t alloc_coalesce_tried; - uint64_t alloc_coalesce_iterate_clash; - uint64_t alloc_coalesce_lockfail; - uint64_t alloc_coalesce_race; - uint64_t alloc_coalesce_succeeded; - uint64_t alloc_coalesce_num_merged; - uint64_t compares; - uint64_t compare_wrong_offsetbits; - uint64_t compare_wrong_keylen; - uint64_t compare_wrong_rechash; - uint64_t compare_wrong_keycmp; - uint64_t transactions; - uint64_t transaction_cancel; - uint64_t transaction_nest; - uint64_t transaction_expand_file; - uint64_t transaction_read_direct; - uint64_t transaction_read_direct_fail; - uint64_t transaction_write_direct; - uint64_t transaction_write_direct_fail; - uint64_t traverses; - uint64_t traverse_val_vanished; - uint64_t expands; - uint64_t frees; - uint64_t locks; - uint64_t lock_lowlevel; - uint64_t lock_nonblock; - uint64_t lock_nonblock_fail; -}; - -/** - * struct ntdb_attribute_openhook - ntdb special effects hook for open - * - * This attribute contains a function to call once we have the OPEN_LOCK - * for the ntdb, but before we've examined its contents. If this succeeds, - * the ntdb will be populated if it's then zero-length. - * - * This is a hack to allow support for TDB-style TDB_CLEAR_IF_FIRST - * behaviour. - */ -struct ntdb_attribute_openhook { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_OPENHOOK */ - enum NTDB_ERROR (*fn)(int fd, void *data); - void *data; -}; - -/** - * struct ntdb_attribute_flock - ntdb special effects hook for file locking - * - * This attribute contains function to call to place locks on a file; it can - * be used to support non-blocking operations or lock proxying. - * - * They should return 0 on success, -1 on failure and set errno. - * - * An error will be logged on error if errno is neither EAGAIN nor EINTR - * (normally it would only return EAGAIN if waitflag is false, and - * loop internally on EINTR). - */ -struct ntdb_attribute_flock { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_FLOCK */ - int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *); - int (*unlock)(int fd, int rw, off_t off, off_t len, void *); - void *data; -}; - -/** - * struct ntdb_attribute_hashsize - ntdb hashsize setting. - * - * This attribute is only settable on ntdb_open; it indicates that we create - * a hashtable of the given size, rather than the default. - */ -struct ntdb_attribute_hashsize { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASHSIZE */ - uint32_t size; -}; - -/** - * struct ntdb_attribute_allocator - allocator for ntdb to use. - * - * You can replace malloc/free with your own allocation functions. - * The allocator takes an "owner" pointer, which is either NULL (for - * the initial struct ntdb_context and struct ntdb_file), or a - * previously allocated pointer. This is useful for relationship - * tracking, such as the talloc library. - * - * The expand function is realloc, but only ever used to expand an - * existing allocation. - * - * Be careful mixing allocators: two ntdb_contexts which have the same file - * open will share the same struct ntdb_file. This may be allocated by one - * ntdb's allocator, and freed by the other. - */ -struct ntdb_attribute_allocator { - struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_ALLOCATOR */ - void *(*alloc)(const void *owner, size_t len, void *priv_data); - void *(*expand)(void *old, size_t newlen, void *priv_data); - void (*free)(void *old, void *priv_data); - void *priv_data; -}; - -/** - * union ntdb_attribute - ntdb attributes. - * - * This represents all the known attributes. - * - * See also: - * struct ntdb_attribute_log, struct ntdb_attribute_hash, - * struct ntdb_attribute_seed, struct ntdb_attribute_stats, - * struct ntdb_attribute_openhook, struct ntdb_attribute_flock, - * struct ntdb_attribute_allocator alloc. - */ -union ntdb_attribute { - struct ntdb_attribute_base base; - struct ntdb_attribute_log log; - struct ntdb_attribute_hash hash; - struct ntdb_attribute_seed seed; - struct ntdb_attribute_stats stats; - struct ntdb_attribute_openhook openhook; - struct ntdb_attribute_flock flock; - struct ntdb_attribute_allocator alloc; - struct ntdb_attribute_hashsize hashsize; -}; - -#ifdef __cplusplus -} -#endif - -#endif /* ntdb.h */ diff --git a/ccan/ntdb/ntdb.pc.in b/ccan/ntdb/ntdb.pc.in deleted file mode 100644 index 36a7d513..00000000 --- a/ccan/ntdb/ntdb.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: ntdb -Description: A (not-so) trivial database -Version: @PACKAGE_VERSION@ -Libs: @LIB_RPATH@ -L${libdir} -lntdb -Cflags: -I${includedir} -URL: http://tdb.samba.org/ diff --git a/ccan/ntdb/open.c b/ccan/ntdb/open.c deleted file mode 100644 index 2a265afe..00000000 --- a/ccan/ntdb/open.c +++ /dev/null @@ -1,911 +0,0 @@ - /* - Trivial Database 2: opening and closing TDBs - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include - -/* all tdbs, to detect double-opens (fcntl file don't nest!) */ -static struct ntdb_context *tdbs = NULL; - -static struct ntdb_file *find_file(dev_t device, ino_t ino) -{ - struct ntdb_context *i; - - for (i = tdbs; i; i = i->next) { - if (i->file->device == device && i->file->inode == ino) { - i->file->refcnt++; - return i->file; - } - } - return NULL; -} - -static bool read_all(int fd, void *buf, size_t len) -{ - while (len) { - ssize_t ret; - ret = read(fd, buf, len); - if (ret < 0) - return false; - if (ret == 0) { - /* ETOOSHORT? */ - errno = EWOULDBLOCK; - return false; - } - buf = (char *)buf + ret; - len -= ret; - } - return true; -} - -static uint32_t random_number(struct ntdb_context *ntdb) -{ - int fd; - uint32_t ret = 0; - struct timeval now; - - fd = open("/dev/urandom", O_RDONLY); - if (fd >= 0) { - if (read_all(fd, &ret, sizeof(ret))) { - close(fd); - return ret; - } - close(fd); - } - /* FIXME: Untested! Based on Wikipedia protocol description! */ - fd = open("/dev/egd-pool", O_RDWR); - if (fd >= 0) { - /* Command is 1, next byte is size we want to read. */ - char cmd[2] = { 1, sizeof(uint32_t) }; - if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) { - char reply[1 + sizeof(uint32_t)]; - int r = read(fd, reply, sizeof(reply)); - if (r > 1) { - /* Copy at least some bytes. */ - memcpy(&ret, reply+1, r - 1); - if (reply[0] == sizeof(uint32_t) - && r == sizeof(reply)) { - close(fd); - return ret; - } - } - } - close(fd); - } - - /* Fallback: pid and time. */ - gettimeofday(&now, NULL); - ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec; - ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, - "ntdb_open: random from getpid and time"); - return ret; -} - -static void ntdb_context_init(struct ntdb_context *ntdb) -{ - /* Initialize the NTDB fields here */ - ntdb_io_init(ntdb); - ntdb->transaction = NULL; - ntdb->access = NULL; -} - -/* initialise a new database: - * - * struct ntdb_header; - * struct { - * struct ntdb_used_record hash_header; - * ntdb_off_t hash_buckets[1 << ntdb->hash_bits]; - * } hash; - * struct ntdb_freetable ftable; - * struct { - * struct ntdb_free_record free_header; - * char forty_three[...]; - * } remainder; - */ -#define NEW_DATABASE_HDR_SIZE(hbits) \ - (sizeof(struct ntdb_header) \ - + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \ - + sizeof(struct ntdb_freetable) \ - + sizeof(struct ntdb_free_record)) - -static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb, - struct ntdb_attribute_seed *seed, - struct ntdb_header *rhdr) -{ - /* We make it up in memory, then write it out if not internal */ - struct ntdb_freetable *ftable; - struct ntdb_used_record *htable; - struct ntdb_header *hdr; - struct ntdb_free_record *remainder; - char *mem; - unsigned int magic_len; - ssize_t rlen; - size_t dbsize, hashsize, hdrsize, remaindersize; - enum NTDB_ERROR ecode; - - hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits; - - /* Always make db a multiple of NTDB_PGSIZE */ - hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits); - dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1); - - mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data); - if (!mem) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_new_database: failed to allocate"); - } - - hdr = (void *)mem; - htable = (void *)(mem + sizeof(*hdr)); - ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize); - remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize - + sizeof(*ftable)); - - /* Fill in the header */ - hdr->version = NTDB_VERSION; - if (seed) - hdr->hash_seed = seed->seed; - else - hdr->hash_seed = random_number(ntdb); - hdr->hash_test = NTDB_HASH_MAGIC; - hdr->hash_test = ntdb->hash_fn(&hdr->hash_test, - sizeof(hdr->hash_test), - hdr->hash_seed, - ntdb->hash_data); - hdr->hash_bits = ntdb->hash_bits; - hdr->recovery = 0; - hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK; - hdr->seqnum = 0; - hdr->capabilities = 0; - memset(hdr->reserved, 0, sizeof(hdr->reserved)); - - /* Hash is all zero after header. */ - set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize); - memset(htable + 1, 0, hashsize); - - /* Free is empty. */ - hdr->free_table = (char *)ftable - (char *)hdr; - memset(ftable, 0, sizeof(*ftable)); - ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0, - sizeof(*ftable) - sizeof(ftable->hdr), - sizeof(*ftable) - sizeof(ftable->hdr)); - if (ecode != NTDB_SUCCESS) { - goto out; - } - - /* Rest of database is a free record, containing junk. */ - remaindersize = dbsize - hdrsize; - remainder->ftable_and_len - = (remaindersize + sizeof(*remainder) - - sizeof(struct ntdb_used_record)); - remainder->next = 0; - remainder->magic_and_prev - = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL)) - | ((char *)remainder - (char *)hdr); - memset(remainder + 1, 0x43, remaindersize); - - /* Put in our single free entry. */ - ftable->buckets[size_to_bucket(remaindersize)] = - (char *)remainder - (char *)hdr; - - /* Magic food */ - memset(hdr->magic_food, 0, sizeof(hdr->magic_food)); - strcpy(hdr->magic_food, NTDB_MAGIC_FOOD); - - /* This creates an endian-converted database, as if read from disk */ - magic_len = sizeof(hdr->magic_food); - ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len); - - /* Return copy of header. */ - *rhdr = *hdr; - - if (ntdb->flags & NTDB_INTERNAL) { - ntdb->file->map_size = dbsize; - ntdb->file->map_ptr = hdr; - return NTDB_SUCCESS; - } - if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_new_database:" - " failed to seek: %s", strerror(errno)); - goto out; - } - - if (ftruncate(ntdb->file->fd, 0) == -1) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_new_database:" - " failed to truncate: %s", strerror(errno)); - goto out; - } - - rlen = write(ntdb->file->fd, hdr, dbsize); - if (rlen != dbsize) { - if (rlen >= 0) - errno = ENOSPC; - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_new_database: %zi writing header: %s", - rlen, strerror(errno)); - goto out; - } - -out: - ntdb->free_fn(hdr, ntdb->alloc_data); - return ecode; -} - -static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb) -{ - ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data); - if (!ntdb->file) - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_open: cannot alloc ntdb_file structure"); - ntdb->file->num_lockrecs = 0; - ntdb->file->lockrecs = NULL; - ntdb->file->allrecord_lock.count = 0; - ntdb->file->refcnt = 1; - ntdb->file->map_ptr = NULL; - ntdb->file->direct_count = 0; - ntdb->file->old_mmaps = NULL; - return NTDB_SUCCESS; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb, - const union ntdb_attribute *attr) -{ - switch (attr->base.attr) { - case NTDB_ATTRIBUTE_LOG: - ntdb->log_fn = attr->log.fn; - ntdb->log_data = attr->log.data; - break; - case NTDB_ATTRIBUTE_HASH: - case NTDB_ATTRIBUTE_SEED: - case NTDB_ATTRIBUTE_OPENHOOK: - case NTDB_ATTRIBUTE_HASHSIZE: - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_set_attribute:" - " cannot set %s after opening", - attr->base.attr == NTDB_ATTRIBUTE_HASH - ? "NTDB_ATTRIBUTE_HASH" - : attr->base.attr == NTDB_ATTRIBUTE_SEED - ? "NTDB_ATTRIBUTE_SEED" - : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK - ? "NTDB_ATTRIBUTE_OPENHOOK" - : "NTDB_ATTRIBUTE_HASHSIZE"); - case NTDB_ATTRIBUTE_STATS: - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_set_attribute:" - " cannot set NTDB_ATTRIBUTE_STATS"); - case NTDB_ATTRIBUTE_FLOCK: - ntdb->lock_fn = attr->flock.lock; - ntdb->unlock_fn = attr->flock.unlock; - ntdb->lock_data = attr->flock.data; - break; - case NTDB_ATTRIBUTE_ALLOCATOR: - ntdb->alloc_fn = attr->alloc.alloc; - ntdb->expand_fn = attr->alloc.expand; - ntdb->free_fn = attr->alloc.free; - ntdb->alloc_data = attr->alloc.priv_data; - break; - default: - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_set_attribute:" - " unknown attribute type %u", - attr->base.attr); - } - return NTDB_SUCCESS; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb, - union ntdb_attribute *attr) -{ - switch (attr->base.attr) { - case NTDB_ATTRIBUTE_LOG: - if (!ntdb->log_fn) - return NTDB_ERR_NOEXIST; - attr->log.fn = ntdb->log_fn; - attr->log.data = ntdb->log_data; - break; - case NTDB_ATTRIBUTE_HASH: - attr->hash.fn = ntdb->hash_fn; - attr->hash.data = ntdb->hash_data; - break; - case NTDB_ATTRIBUTE_SEED: - attr->seed.seed = ntdb->hash_seed; - break; - case NTDB_ATTRIBUTE_OPENHOOK: - if (!ntdb->openhook) - return NTDB_ERR_NOEXIST; - attr->openhook.fn = ntdb->openhook; - attr->openhook.data = ntdb->openhook_data; - break; - case NTDB_ATTRIBUTE_STATS: { - size_t size = attr->stats.size; - if (size > ntdb->stats.size) - size = ntdb->stats.size; - memcpy(&attr->stats, &ntdb->stats, size); - break; - } - case NTDB_ATTRIBUTE_FLOCK: - attr->flock.lock = ntdb->lock_fn; - attr->flock.unlock = ntdb->unlock_fn; - attr->flock.data = ntdb->lock_data; - break; - case NTDB_ATTRIBUTE_ALLOCATOR: - attr->alloc.alloc = ntdb->alloc_fn; - attr->alloc.expand = ntdb->expand_fn; - attr->alloc.free = ntdb->free_fn; - attr->alloc.priv_data = ntdb->alloc_data; - break; - case NTDB_ATTRIBUTE_HASHSIZE: - attr->hashsize.size = 1 << ntdb->hash_bits; - break; - default: - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_get_attribute:" - " unknown attribute type %u", - attr->base.attr); - } - attr->base.next = NULL; - return NTDB_SUCCESS; -} - -_PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb, - enum ntdb_attribute_type type) -{ - switch (type) { - case NTDB_ATTRIBUTE_LOG: - ntdb->log_fn = NULL; - break; - case NTDB_ATTRIBUTE_OPENHOOK: - ntdb->openhook = NULL; - break; - case NTDB_ATTRIBUTE_HASH: - case NTDB_ATTRIBUTE_SEED: - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_unset_attribute: cannot unset %s after opening", - type == NTDB_ATTRIBUTE_HASH - ? "NTDB_ATTRIBUTE_HASH" - : "NTDB_ATTRIBUTE_SEED"); - break; - case NTDB_ATTRIBUTE_STATS: - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_unset_attribute:" - "cannot unset NTDB_ATTRIBUTE_STATS"); - break; - case NTDB_ATTRIBUTE_FLOCK: - ntdb->lock_fn = ntdb_fcntl_lock; - ntdb->unlock_fn = ntdb_fcntl_unlock; - break; - default: - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_unset_attribute: unknown attribute type %u", - type); - } -} - -/* The top three bits of the capability tell us whether it matters. */ -enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller, - ntdb_off_t type) -{ - if (type & NTDB_CAP_NOOPEN) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "%s: file has unknown capability %llu", - caller, type & NTDB_CAP_NOOPEN); - } - - if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) { - return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR, - "%s: file has unknown capability %llu" - " (cannot write to it)", - caller, type & NTDB_CAP_NOOPEN); - } - - if (type & NTDB_CAP_NOCHECK) { - ntdb->flags |= NTDB_CANT_CHECK; - } - return NTDB_SUCCESS; -} - -static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb, - ntdb_off_t capabilities) -{ - ntdb_off_t off, next; - enum NTDB_ERROR ecode = NTDB_SUCCESS; - const struct ntdb_capability *cap; - - /* Check capability list. */ - for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) { - cap = ntdb_access_read(ntdb, off, sizeof(*cap), true); - if (NTDB_PTR_IS_ERR(cap)) { - return NTDB_PTR_ERR(cap); - } - - switch (cap->type & NTDB_CAP_TYPE_MASK) { - /* We don't understand any capabilities (yet). */ - default: - ecode = unknown_capability(ntdb, "ntdb_open", cap->type); - } - next = cap->next; - ntdb_access_release(ntdb, cap); - } - return ecode; -} - -static void *default_alloc(const void *owner, size_t len, void *priv_data) -{ - return malloc(len); -} - -static void *default_expand(void *ptr, size_t len, void *priv_data) -{ - return realloc(ptr, len); -} - -static void default_free(void *ptr, void *priv_data) -{ - free(ptr); -} - -/* First allocation needs manual search of attributes. */ -static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr, - const char *name) -{ - size_t len = sizeof(struct ntdb_context) + strlen(name) + 1; - - while (attr) { - if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) { - return attr->alloc.alloc(NULL, len, - attr->alloc.priv_data); - } - attr = attr->base.next; - } - return default_alloc(NULL, len, NULL); -} - -static unsigned int next_pow2(uint64_t size) -{ - unsigned int bits = 1; - - while ((1ULL << bits) < size) - bits++; - return bits; -} - -_PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags, - int open_flags, mode_t mode, - union ntdb_attribute *attr) -{ - struct ntdb_context *ntdb; - struct stat st; - int saved_errno = 0; - uint64_t hash_test; - unsigned v; - ssize_t rlen; - struct ntdb_header hdr; - struct ntdb_attribute_seed *seed = NULL; - ntdb_bool_err berr; - enum NTDB_ERROR ecode; - int openlock; - - ntdb = alloc_ntdb(attr, name); - if (!ntdb) { - /* Can't log this */ - errno = ENOMEM; - return NULL; - } - /* Set name immediately for logging functions. */ - ntdb->name = strcpy((char *)(ntdb + 1), name); - ntdb->flags = ntdb_flags; - ntdb->log_fn = NULL; - ntdb->open_flags = open_flags; - ntdb->file = NULL; - ntdb->openhook = NULL; - ntdb->lock_fn = ntdb_fcntl_lock; - ntdb->unlock_fn = ntdb_fcntl_unlock; - ntdb->hash_fn = ntdb_jenkins_hash; - memset(&ntdb->stats, 0, sizeof(ntdb->stats)); - ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS; - ntdb->stats.size = sizeof(ntdb->stats); - ntdb->alloc_fn = default_alloc; - ntdb->expand_fn = default_expand; - ntdb->free_fn = default_free; - ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */ - - while (attr) { - switch (attr->base.attr) { - case NTDB_ATTRIBUTE_HASH: - ntdb->hash_fn = attr->hash.fn; - ntdb->hash_data = attr->hash.data; - break; - case NTDB_ATTRIBUTE_SEED: - seed = &attr->seed; - break; - case NTDB_ATTRIBUTE_OPENHOOK: - ntdb->openhook = attr->openhook.fn; - ntdb->openhook_data = attr->openhook.data; - break; - case NTDB_ATTRIBUTE_HASHSIZE: - ntdb->hash_bits = next_pow2(attr->hashsize.size); - if (ntdb->hash_bits > 31) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_open: hash_size %u" - " too large", - attr->hashsize.size); - goto fail; - } - break; - default: - /* These are set as normal. */ - ecode = ntdb_set_attribute(ntdb, attr); - if (ecode != NTDB_SUCCESS) - goto fail; - } - attr = attr->base.next; - } - - if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT - | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING - | NTDB_RDONLY)) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_open: unknown flags %u", ntdb_flags); - goto fail; - } - - if (seed) { - if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_open:" - " cannot set NTDB_ATTRIBUTE_SEED" - " without O_CREAT."); - goto fail; - } - } - - if ((open_flags & O_ACCMODE) == O_WRONLY) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_open: can't open ntdb %s write-only", - name); - goto fail; - } - - if ((open_flags & O_ACCMODE) == O_RDONLY) { - openlock = F_RDLCK; - ntdb->flags |= NTDB_RDONLY; - } else { - if (ntdb_flags & NTDB_RDONLY) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, - NTDB_LOG_USE_ERROR, - "ntdb_open: can't use NTDB_RDONLY" - " without O_RDONLY"); - goto fail; - } - openlock = F_WRLCK; - } - - /* internal databases don't need any of the rest. */ - if (ntdb->flags & NTDB_INTERNAL) { - ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP); - ecode = ntdb_new_file(ntdb); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - ntdb->file->fd = -1; - ecode = ntdb_new_database(ntdb, seed, &hdr); - if (ecode == NTDB_SUCCESS) { - ntdb_convert(ntdb, &hdr.hash_seed, - sizeof(hdr.hash_seed)); - ntdb->hash_seed = hdr.hash_seed; - ntdb_context_init(ntdb); - ntdb_ftable_init(ntdb); - } - if (ecode != NTDB_SUCCESS) { - goto fail; - } - return ntdb; - } - - if (stat(name, &st) != -1) - ntdb->file = find_file(st.st_dev, st.st_ino); - - if (!ntdb->file) { - ecode = ntdb_new_file(ntdb); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - - /* Set this now, as ntdb_nest_lock examines it. */ - ntdb->file->map_size = 0; - - if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) { - enum ntdb_log_level lvl; - /* errno set by open(2) */ - saved_errno = errno; - - /* Probing for files like this is a common pattern. */ - if (!(open_flags & O_CREAT) && errno == ENOENT) { - lvl = NTDB_LOG_WARNING; - } else { - lvl = NTDB_LOG_ERROR; - } - ntdb_logerr(ntdb, NTDB_ERR_IO, lvl, - "ntdb_open: could not open file %s: %s", - name, strerror(errno)); - - goto fail_errno; - } - - /* ensure there is only one process initialising at once: - * do it immediately to reduce the create/openlock race. */ - ecode = ntdb_lock_open(ntdb, openlock, - NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); - if (ecode != NTDB_SUCCESS) { - saved_errno = errno; - goto fail_errno; - } - - /* on exec, don't inherit the fd */ - v = fcntl(ntdb->file->fd, F_GETFD, 0); - fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC); - - if (fstat(ntdb->file->fd, &st) == -1) { - saved_errno = errno; - ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open: could not stat open %s: %s", - name, strerror(errno)); - goto fail_errno; - } - - ntdb->file->device = st.st_dev; - ntdb->file->inode = st.st_ino; - - /* call their open hook if they gave us one. */ - if (ntdb->openhook) { - ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data); - if (ecode != NTDB_SUCCESS) { - ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_open: open hook failed"); - goto fail; - } - open_flags |= O_CREAT; - } - } else { - /* ensure there is only one process initialising at once */ - ecode = ntdb_lock_open(ntdb, openlock, - NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); - if (ecode != NTDB_SUCCESS) { - saved_errno = errno; - goto fail_errno; - } - } - - /* If they used O_TRUNC, read will return 0. */ - rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0); - if (rlen == 0 && (open_flags & O_CREAT)) { - ecode = ntdb_new_database(ntdb, seed, &hdr); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - } else if (rlen < 0) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open: error %s reading %s", - strerror(errno), name); - goto fail; - } else if (rlen < sizeof(hdr) - || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open: %s is not a ntdb file", name); - goto fail; - } - - if (hdr.version != NTDB_VERSION) { - if (hdr.version == bswap_64(NTDB_VERSION)) - ntdb->flags |= NTDB_CONVERT; - else { - /* wrong version */ - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open:" - " %s is unknown version 0x%llx", - name, (long long)hdr.version); - goto fail; - } - } else if (ntdb->flags & NTDB_CONVERT) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open:" - " %s does not need NTDB_CONVERT", - name); - goto fail; - } - - ntdb_context_init(ntdb); - - ntdb_convert(ntdb, &hdr, sizeof(hdr)); - ntdb->hash_bits = hdr.hash_bits; - ntdb->hash_seed = hdr.hash_seed; - hash_test = NTDB_HASH_MAGIC; - hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test)); - if (hdr.hash_test != hash_test) { - /* wrong hash variant */ - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open:" - " %s uses a different hash function", - name); - goto fail; - } - - ecode = capabilities_ok(ntdb, hdr.capabilities); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - - /* Clear any features we don't understand. */ - if ((open_flags & O_ACCMODE) != O_RDONLY) { - hdr.features_used &= NTDB_FEATURE_MASK; - ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header, - features_used), - &hdr.features_used, - sizeof(hdr.features_used)); - if (ecode != NTDB_SUCCESS) - goto fail; - } - - ntdb_unlock_open(ntdb, openlock); - - /* This makes sure we have current map_size and mmap. */ - ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true); - if (unlikely(ecode != NTDB_SUCCESS)) - goto fail; - - if (ntdb->file->map_size % NTDB_PGSIZE != 0) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open:" - " %s size %llu isn't a multiple of %u", - name, (long long)ntdb->file->map_size, - NTDB_PGSIZE); - goto fail; - } - - /* Now it's fully formed, recover if necessary. */ - berr = ntdb_needs_recovery(ntdb); - if (unlikely(berr != false)) { - if (berr < 0) { - ecode = NTDB_OFF_TO_ERR(berr); - goto fail; - } - ecode = ntdb_lock_and_recover(ntdb); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - } - - ecode = ntdb_ftable_init(ntdb); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - - ntdb->next = tdbs; - tdbs = ntdb; - return ntdb; - - fail: - /* Map ecode to some logical errno. */ - switch (NTDB_ERR_TO_OFF(ecode)) { - case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): - case NTDB_ERR_TO_OFF(NTDB_ERR_IO): - saved_errno = EIO; - break; - case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): - saved_errno = EWOULDBLOCK; - break; - case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): - saved_errno = ENOMEM; - break; - case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): - saved_errno = EINVAL; - break; - default: - saved_errno = EINVAL; - break; - } - -fail_errno: -#ifdef NTDB_TRACE - close(ntdb->tracefd); -#endif - if (ntdb->file) { - ntdb_lock_cleanup(ntdb); - if (--ntdb->file->refcnt == 0) { - assert(ntdb->file->num_lockrecs == 0); - if (ntdb->file->map_ptr) { - if (ntdb->flags & NTDB_INTERNAL) { - ntdb->free_fn(ntdb->file->map_ptr, - ntdb->alloc_data); - } else - ntdb_munmap(ntdb); - } - if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0) - ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_open: failed to close ntdb fd" - " on error: %s", strerror(errno)); - ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data); - ntdb->free_fn(ntdb->file, ntdb->alloc_data); - } - } - - ntdb->free_fn(ntdb, ntdb->alloc_data); - errno = saved_errno; - return NULL; -} - -_PUBLIC_ int ntdb_close(struct ntdb_context *ntdb) -{ - int ret = 0; - struct ntdb_context **i; - - ntdb_trace(ntdb, "ntdb_close"); - - if (ntdb->transaction) { - ntdb_transaction_cancel(ntdb); - } - - ntdb_lock_cleanup(ntdb); - if (--ntdb->file->refcnt == 0) { - if (ntdb->file->map_ptr) { - if (ntdb->flags & NTDB_INTERNAL) { - ntdb->free_fn(ntdb->file->map_ptr, - ntdb->alloc_data); - } else { - ntdb_munmap(ntdb); - } - } - ret = close(ntdb->file->fd); - ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data); - ntdb->free_fn(ntdb->file, ntdb->alloc_data); - } - - /* Remove from tdbs list */ - for (i = &tdbs; *i; i = &(*i)->next) { - if (*i == ntdb) { - *i = ntdb->next; - break; - } - } - -#ifdef NTDB_TRACE - close(ntdb->tracefd); -#endif - ntdb->free_fn(ntdb, ntdb->alloc_data); - - return ret; -} - -_PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p) -{ - struct ntdb_context *i; - - for (i = tdbs; i; i = i->next) { - if (fn(i, p) != 0) - break; - } -} diff --git a/ccan/ntdb/private.h b/ccan/ntdb/private.h deleted file mode 100644 index 24925375..00000000 --- a/ccan/ntdb/private.h +++ /dev/null @@ -1,677 +0,0 @@ -#ifndef NTDB_PRIVATE_H -#define NTDB_PRIVATE_H -/* - Trivial Database 2: private types and prototypes - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ - -#include "config.h" -#ifndef HAVE_CCAN -#error You need ccan to build ntdb! -#endif -#include "ntdb.h" -#include -#include -#include - -#ifdef HAVE_LIBREPLACE -#include "replace.h" -#include "system/filesys.h" -#include "system/time.h" -#include "system/shmem.h" -#include "system/select.h" -#include "system/wait.h" -#else -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif -#include - -#ifndef TEST_IT -#define TEST_IT(cond) -#endif - -/* #define NTDB_TRACE 1 */ - -#ifndef __STRING -#define __STRING(x) #x -#endif - -#ifndef __STRINGSTRING -#define __STRINGSTRING(x) __STRING(x) -#endif - -#ifndef __location__ -#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__) -#endif - -typedef uint64_t ntdb_len_t; -typedef uint64_t ntdb_off_t; - -#define NTDB_MAGIC_FOOD "NTDB file\n" -#define NTDB_VERSION ((uint64_t)(0x26011967 + 7)) -#define NTDB_USED_MAGIC ((uint64_t)0x1999) -#define NTDB_HTABLE_MAGIC ((uint64_t)0x1888) -#define NTDB_CHAIN_MAGIC ((uint64_t)0x1777) -#define NTDB_FTABLE_MAGIC ((uint64_t)0x1666) -#define NTDB_CAP_MAGIC ((uint64_t)0x1555) -#define NTDB_FREE_MAGIC ((uint64_t)0xFE) -#define NTDB_HASH_MAGIC (0xA1ABE11A01092008ULL) -#define NTDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL) -#define NTDB_RECOVERY_INVALID_MAGIC (0x0ULL) - -/* Capability bits. */ -#define NTDB_CAP_TYPE_MASK 0x1FFFFFFFFFFFFFFFULL -#define NTDB_CAP_NOCHECK 0x8000000000000000ULL -#define NTDB_CAP_NOWRITE 0x4000000000000000ULL -#define NTDB_CAP_NOOPEN 0x2000000000000000ULL - -#define NTDB_OFF_IS_ERR(off) unlikely(off >= (ntdb_off_t)(long)NTDB_ERR_LAST) -#define NTDB_OFF_TO_ERR(off) ((enum NTDB_ERROR)(long)(off)) -#define NTDB_ERR_TO_OFF(ecode) ((ntdb_off_t)(long)(ecode)) - -/* Packing errors into pointers and v.v. */ -#define NTDB_PTR_IS_ERR(ptr) \ - unlikely((unsigned long)(ptr) >= (unsigned long)NTDB_ERR_LAST) -#define NTDB_PTR_ERR(p) ((enum NTDB_ERROR)(long)(p)) -#define NTDB_ERR_PTR(err) ((void *)(long)(err)) - -/* This doesn't really need to be pagesize, but we use it for similar - * reasons. */ -#define NTDB_PGSIZE 16384 - -/* Common case of returning true, false or -ve error. */ -typedef int ntdb_bool_err; - -/* Prevent others from opening the file. */ -#define NTDB_OPEN_LOCK 0 -/* Expanding file. */ -#define NTDB_EXPANSION_LOCK 2 -/* Doing a transaction. */ -#define NTDB_TRANSACTION_LOCK 8 -/* Hash chain locks. */ -#define NTDB_HASH_LOCK_START 64 - -/* Extend file by least 100 times larger than needed. */ -#define NTDB_EXTENSION_FACTOR 100 - -/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */ -#define NTDB_OFF_UPPER_STEAL 8 - -/* And we use the lower bit, too. */ -#define NTDB_OFF_CHAIN_BIT 0 - -/* Hash table sits just after the header. */ -#define NTDB_HASH_OFFSET (sizeof(struct ntdb_header)) - -/* Additional features we understand. Currently: none. */ -#define NTDB_FEATURE_MASK ((uint64_t)0) - -/* The bit number where we store the extra hash bits. */ -/* Convenience mask to get actual offset. */ -#define NTDB_OFF_MASK \ - (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - (1<magic_and_meta >> 43) & ((1 << 5)-1)) * 2; -} - -static inline uint64_t rec_key_length(const struct ntdb_used_record *r) -{ - return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1); -} - -static inline uint64_t rec_data_length(const struct ntdb_used_record *r) -{ - return r->key_and_data_len >> rec_key_bits(r); -} - -static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r) -{ - return (r->magic_and_meta >> 11) & 0xFFFFFFFF; -} - -static inline uint16_t rec_magic(const struct ntdb_used_record *r) -{ - return (r->magic_and_meta >> 48); -} - -struct ntdb_free_record { - uint64_t magic_and_prev; /* NTDB_OFF_UPPER_STEAL bits magic, then prev */ - uint64_t ftable_and_len; /* Len not counting these two fields. */ - /* This is why the minimum record size is 8 bytes. */ - uint64_t next; -}; - -static inline uint64_t frec_prev(const struct ntdb_free_record *f) -{ - return f->magic_and_prev & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1); -} - -static inline uint64_t frec_magic(const struct ntdb_free_record *f) -{ - return f->magic_and_prev >> (64 - NTDB_OFF_UPPER_STEAL); -} - -static inline uint64_t frec_len(const struct ntdb_free_record *f) -{ - return f->ftable_and_len & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL))-1); -} - -static inline unsigned frec_ftable(const struct ntdb_free_record *f) -{ - return f->ftable_and_len >> (64 - NTDB_OFF_UPPER_STEAL); -} - -struct ntdb_recovery_record { - uint64_t magic; - /* Length of record (add this header to get total length). */ - uint64_t max_len; - /* Length used. */ - uint64_t len; - /* Old length of file before transaction. */ - uint64_t eof; -}; - -/* this is stored at the front of every database */ -struct ntdb_header { - char magic_food[64]; /* for /etc/magic */ - /* FIXME: Make me 32 bit? */ - uint64_t version; /* version of the code */ - uint64_t hash_bits; /* bits for toplevel hash table. */ - uint64_t hash_test; /* result of hashing HASH_MAGIC. */ - uint64_t hash_seed; /* "random" seed written at creation time. */ - ntdb_off_t free_table; /* (First) free table. */ - ntdb_off_t recovery; /* Transaction recovery area. */ - - uint64_t features_used; /* Features all writers understand */ - uint64_t features_offered; /* Features offered */ - - uint64_t seqnum; /* Sequence number for NTDB_SEQNUM */ - - ntdb_off_t capabilities; /* Optional linked list of capabilities. */ - ntdb_off_t reserved[22]; - - /* - * Hash table is next: - * - * struct ntdb_used_record htable_hdr; - * ntdb_off_t htable[1 << hash_bits]; - */ -}; - -struct ntdb_freetable { - struct ntdb_used_record hdr; - ntdb_off_t next; - ntdb_off_t buckets[NTDB_FREE_BUCKETS]; -}; - -struct ntdb_capability { - struct ntdb_used_record hdr; - ntdb_off_t type; - ntdb_off_t next; - /* ... */ -}; - -/* Information about a particular (locked) hash entry. */ -struct hash_info { - /* Full hash value of entry. */ - uint32_t h; - /* Start of hash table / chain. */ - ntdb_off_t table; - /* Number of entries in this table/chain. */ - ntdb_off_t table_size; - /* Bucket we (or an empty space) were found in. */ - ntdb_off_t bucket; - /* Old value that was in that entry (if not found) */ - ntdb_off_t old_val; -}; - -enum ntdb_lock_flags { - /* WAIT == F_SETLKW, NOWAIT == F_SETLK */ - NTDB_LOCK_NOWAIT = 0, - NTDB_LOCK_WAIT = 1, - /* If set, don't log an error on failure. */ - NTDB_LOCK_PROBE = 2, - /* If set, don't check for recovery (used by recovery code). */ - NTDB_LOCK_NOCHECK = 4, -}; - -struct ntdb_lock { - struct ntdb_context *owner; - off_t off; - uint32_t count; - uint32_t ltype; -}; - -/* This is only needed for ntdb_access_commit, but used everywhere to - * simplify. */ -struct ntdb_access_hdr { - struct ntdb_access_hdr *next; - ntdb_off_t off; - ntdb_len_t len; - bool convert; -}; - -/* mmaps we are keeping around because they are still direct accessed */ -struct ntdb_old_mmap { - struct ntdb_old_mmap *next; - - void *map_ptr; - ntdb_len_t map_size; -}; - -struct ntdb_file { - /* How many are sharing us? */ - unsigned int refcnt; - - /* Mmap (if any), or malloc (for NTDB_INTERNAL). */ - void *map_ptr; - - /* How much space has been mapped (<= current file size) */ - ntdb_len_t map_size; - - /* The file descriptor (-1 for NTDB_INTERNAL). */ - int fd; - - /* How many are accessing directly? */ - unsigned int direct_count; - - /* Old maps, still direct accessed. */ - struct ntdb_old_mmap *old_mmaps; - - /* Lock information */ - pid_t locker; - struct ntdb_lock allrecord_lock; - size_t num_lockrecs; - struct ntdb_lock *lockrecs; - - /* Identity of this file. */ - dev_t device; - ino_t inode; -}; - -struct ntdb_methods { - enum NTDB_ERROR (*tread)(struct ntdb_context *, ntdb_off_t, void *, - ntdb_len_t); - enum NTDB_ERROR (*twrite)(struct ntdb_context *, ntdb_off_t, const void *, - ntdb_len_t); - enum NTDB_ERROR (*oob)(struct ntdb_context *, ntdb_off_t, ntdb_len_t, bool); - enum NTDB_ERROR (*expand_file)(struct ntdb_context *, ntdb_len_t); - void *(*direct)(struct ntdb_context *, ntdb_off_t, size_t, bool); - ntdb_off_t (*read_off)(struct ntdb_context *ntdb, ntdb_off_t off); - enum NTDB_ERROR (*write_off)(struct ntdb_context *ntdb, ntdb_off_t off, - ntdb_off_t val); -}; - -/* - internal prototypes -*/ -/* Get bits from a value. */ -static inline uint32_t bits_from(uint64_t val, unsigned start, unsigned num) -{ - assert(num <= 32); - return (val >> start) & ((1U << num) - 1); -} - - -/* hash.c: */ -uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed, - void *unused); - -enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb, - struct hash_info *h, - NTDB_DATA *kbuf, size_t *dlen); - -enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb, - struct hash_info *h, - NTDB_DATA *kbuf, size_t *dlen); - -/* Hash random memory. */ -uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len); - -/* Find and lock a hash entry (or where it would be). */ -ntdb_off_t find_and_lock(struct ntdb_context *ntdb, - NTDB_DATA key, - int ltype, - struct hash_info *h, - struct ntdb_used_record *rec, - const char **rkey); - -enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, - const struct hash_info *h, - ntdb_off_t new_off); - -enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, - const struct hash_info *h, - ntdb_off_t new_off); - -enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, - const struct hash_info *h); - -/* For ntdb_check */ -bool is_subhash(ntdb_off_t val); -enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller, - ntdb_off_t type); - -/* free.c: */ -enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb); - -/* check.c needs these to iterate through free lists. */ -ntdb_off_t first_ftable(struct ntdb_context *ntdb); -ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable); - -/* This returns space or -ve error number. */ -ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, - unsigned magic, bool growing); - -/* Put this record in a free list. */ -enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len_with_header, - enum ntdb_lock_flags waitflag, - bool coalesce_ok); - -/* Set up header for a used/ftable/htable/chain/capability record. */ -enum NTDB_ERROR set_header(struct ntdb_context *ntdb, - struct ntdb_used_record *rec, - unsigned magic, uint64_t keylen, uint64_t datalen, - uint64_t actuallen); - -/* Used by ntdb_check to verify. */ -unsigned int size_to_bucket(ntdb_len_t data_len); -ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket); - -/* Used by ntdb_summary */ -ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off); - -/* Adjust expansion, used by create_recovery_area */ -ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size); - -/* io.c: */ -/* Initialize ntdb->methods. */ -void ntdb_io_init(struct ntdb_context *ntdb); - -/* Convert endian of the buffer if required. */ -void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size); - -/* Unmap and try to map the ntdb. */ -enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb); -enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb); - -/* Either alloc a copy, or give direct access. Release frees or noop. */ -const void *ntdb_access_read(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, bool convert); -void *ntdb_access_write(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, bool convert); - -/* Release result of ntdb_access_read/write. */ -void ntdb_access_release(struct ntdb_context *ntdb, const void *p); -/* Commit result of ntdb_acces_write. */ -enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p); - -/* Clear an ondisk area. */ -enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len); - -/* Return a non-zero offset between >= start < end in this array (or end). */ -ntdb_off_t ntdb_find_nonzero_off(struct ntdb_context *ntdb, - ntdb_off_t base, - uint64_t start, - uint64_t end); - -/* Return a zero offset in this array, or num. */ -ntdb_off_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off, - uint64_t num); - -/* Allocate and make a copy of some offset. */ -void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len); - -/* Writes a converted copy of a record. */ -enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off, - const void *rec, size_t len); - -/* Reads record and converts it */ -enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off, - void *rec, size_t len); - -/* Bump the seqnum (caller checks for ntdb->flags & NTDB_SEQNUM) */ -void ntdb_inc_seqnum(struct ntdb_context *ntdb); - -/* lock.c: */ -/* Print message because another ntdb owns a lock we want. */ -enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call); - -/* If we fork, we no longer really own locks. */ -bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log); - -/* Lock/unlock a hash bucket. */ -enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb, - unsigned int hbucket, - int ltype); -enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb, - unsigned int hash, int ltype); - -/* For closing the file. */ -void ntdb_lock_cleanup(struct ntdb_context *ntdb); - -/* Lock/unlock a particular free bucket. */ -enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off, - enum ntdb_lock_flags waitflag); -void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off); - -/* Serialize transaction start. */ -enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype); -void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype); - -/* Do we have any hash locks (ie. via ntdb_chainlock) ? */ -bool ntdb_has_hash_locks(struct ntdb_context *ntdb); - -/* Lock entire database. */ -enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype, - enum ntdb_lock_flags flags, bool upgradable); -void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype); -enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start); - -/* Serialize db open. */ -enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb, - int ltype, enum ntdb_lock_flags flags); -void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype); -bool ntdb_has_open_lock(struct ntdb_context *ntdb); - -/* Serialize db expand. */ -enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype); -void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype); -bool ntdb_has_expansion_lock(struct ntdb_context *ntdb); - -/* If it needs recovery, grab all the locks and do it. */ -enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb); - -/* Default lock and unlock functions. */ -int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *); -int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *); - -/* transaction.c: */ -enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb); -ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb); - -struct ntdb_context { - /* Single list of all TDBs, to detect multiple opens. */ - struct ntdb_context *next; - - /* Filename of the database. */ - const char *name; - - /* Logging function */ - void (*log_fn)(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data); - void *log_data; - - /* Open flags passed to ntdb_open. */ - int open_flags; - - /* low level (fnctl) lock functions. */ - int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *); - int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *); - void *lock_data; - - /* the ntdb flags passed to ntdb_open. */ - uint32_t flags; - - /* Our statistics. */ - struct ntdb_attribute_stats stats; - - /* The actual file information */ - struct ntdb_file *file; - - /* Hash function. */ - uint32_t (*hash_fn)(const void *key, size_t len, uint32_t seed, void *); - void *hash_data; - uint32_t hash_seed; - /* Bits in toplevel hash table. */ - unsigned int hash_bits; - - /* Allocate and free functions. */ - void *(*alloc_fn)(const void *owner, size_t len, void *priv_data); - void *(*expand_fn)(void *old, size_t newlen, void *priv_data); - void (*free_fn)(void *old, void *priv_data); - void *alloc_data; - - /* Our open hook, if any. */ - enum NTDB_ERROR (*openhook)(int fd, void *data); - void *openhook_data; - - /* Set if we are in a transaction. */ - struct ntdb_transaction *transaction; - - /* What free table are we using? */ - ntdb_off_t ftable_off; - unsigned int ftable; - - /* IO methods: changes for transactions. */ - const struct ntdb_methods *io; - - /* Direct access information */ - struct ntdb_access_hdr *access; -}; - -/* ntdb.c: */ -enum NTDB_ERROR COLD PRINTF_FMT(4, 5) - ntdb_logerr(struct ntdb_context *ntdb, - enum NTDB_ERROR ecode, - enum ntdb_log_level level, - const char *fmt, ...); - -static inline enum NTDB_ERROR ntdb_oob(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, - bool probe) -{ - if (likely(off + len >= off) - && likely(off + len <= ntdb->file->map_size) - && likely(!probe)) { - return NTDB_SUCCESS; - } - return ntdb->io->oob(ntdb, off, len, probe); -} - -/* Convenience routine to get an offset. */ -static inline ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb, - ntdb_off_t off) -{ - return ntdb->io->read_off(ntdb, off); -} - -/* Write an offset at an offset. */ -static inline enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb, - ntdb_off_t off, - ntdb_off_t val) -{ - return ntdb->io->write_off(ntdb, off, val); -} - -#ifdef NTDB_TRACE -void ntdb_trace(struct ntdb_context *ntdb, const char *op); -void ntdb_trace_seqnum(struct ntdb_context *ntdb, uint32_t seqnum, const char *op); -void ntdb_trace_open(struct ntdb_context *ntdb, const char *op, - unsigned hash_size, unsigned ntdb_flags, unsigned open_flags); -void ntdb_trace_ret(struct ntdb_context *ntdb, const char *op, int ret); -void ntdb_trace_retrec(struct ntdb_context *ntdb, const char *op, NTDB_DATA ret); -void ntdb_trace_1rec(struct ntdb_context *ntdb, const char *op, - NTDB_DATA rec); -void ntdb_trace_1rec_ret(struct ntdb_context *ntdb, const char *op, - NTDB_DATA rec, int ret); -void ntdb_trace_1rec_retrec(struct ntdb_context *ntdb, const char *op, - NTDB_DATA rec, NTDB_DATA ret); -void ntdb_trace_2rec_flag_ret(struct ntdb_context *ntdb, const char *op, - NTDB_DATA rec1, NTDB_DATA rec2, unsigned flag, - int ret); -void ntdb_trace_2rec_retrec(struct ntdb_context *ntdb, const char *op, - NTDB_DATA rec1, NTDB_DATA rec2, NTDB_DATA ret); -#else -#define ntdb_trace(ntdb, op) -#define ntdb_trace_seqnum(ntdb, seqnum, op) -#define ntdb_trace_open(ntdb, op, hash_size, ntdb_flags, open_flags) -#define ntdb_trace_ret(ntdb, op, ret) -#define ntdb_trace_retrec(ntdb, op, ret) -#define ntdb_trace_1rec(ntdb, op, rec) -#define ntdb_trace_1rec_ret(ntdb, op, rec, ret) -#define ntdb_trace_1rec_retrec(ntdb, op, rec, ret) -#define ntdb_trace_2rec_flag_ret(ntdb, op, rec1, rec2, flag, ret) -#define ntdb_trace_2rec_retrec(ntdb, op, rec1, rec2, ret) -#endif /* !NTDB_TRACE */ - -#endif diff --git a/ccan/ntdb/python/pyntdb.c b/ccan/ntdb/python/pyntdb.c deleted file mode 100644 index 72e62647..00000000 --- a/ccan/ntdb/python/pyntdb.c +++ /dev/null @@ -1,643 +0,0 @@ -/* - Unix SMB/CIFS implementation. - - Python interface to ntdb. Simply modified from tdb version. - - Copyright (C) 2004-2006 Tim Potter - Copyright (C) 2007-2008 Jelmer Vernooij - Copyright (C) 2011 Rusty Russell - - ** NOTE! The following LGPL license applies to the ntdb - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ - -#include -#include "replace.h" -#include "system/filesys.h" - -/* Include ntdb headers */ -#include - -typedef struct { - PyObject_HEAD - struct ntdb_context *ctx; - bool closed; -} PyNtdbObject; - -static PyTypeObject PyNtdb; - -static void PyErr_SetTDBError(enum NTDB_ERROR e) -{ - PyErr_SetObject(PyExc_RuntimeError, - Py_BuildValue("(i,s)", e, ntdb_errorstr(e))); -} - -static NTDB_DATA PyString_AsNtdb_Data(PyObject *data) -{ - NTDB_DATA ret; - ret.dptr = (unsigned char *)PyString_AsString(data); - ret.dsize = PyString_Size(data); - return ret; -} - -static PyObject *PyString_FromNtdb_Data(NTDB_DATA data) -{ - PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr, - data.dsize); - free(data.dptr); - return ret; -} - -#define PyErr_NTDB_ERROR_IS_ERR_RAISE(ret) \ - if (ret != NTDB_SUCCESS) { \ - PyErr_SetTDBError(ret); \ - return NULL; \ - } - -#define PyNtdb_CHECK_CLOSED(pyobj) \ - if (pyobj->closed) {\ - PyErr_SetObject(PyExc_RuntimeError, \ - Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed")); \ - return NULL; \ - } - -static void stderr_log(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) -{ - fprintf(stderr, "%s:%s:%s\n", - ntdb_name(ntdb), ntdb_errorstr(ecode), message); -} - -static PyObject *py_ntdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs) -{ - char *name = NULL; - int ntdb_flags = NTDB_DEFAULT, flags = O_RDWR, mode = 0600; - struct ntdb_context *ctx; - PyNtdbObject *ret; - union ntdb_attribute logattr; - const char *kwnames[] = { "name", "ntdb_flags", "flags", "mode", NULL }; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", cast_const2(char **, kwnames), &name, &ntdb_flags, &flags, &mode)) - return NULL; - - if (name == NULL) { - ntdb_flags |= NTDB_INTERNAL; - name = ""; - } - - logattr.log.base.attr = NTDB_ATTRIBUTE_LOG; - logattr.log.base.next = NULL; - logattr.log.fn = stderr_log; - ctx = ntdb_open(name, ntdb_flags, flags, mode, &logattr); - if (ctx == NULL) { - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - ret = PyObject_New(PyNtdbObject, &PyNtdb); - if (!ret) { - ntdb_close(ctx); - return NULL; - } - - ret->ctx = ctx; - ret->closed = false; - return (PyObject *)ret; -} - -static PyObject *obj_transaction_cancel(PyNtdbObject *self) -{ - PyNtdb_CHECK_CLOSED(self); - ntdb_transaction_cancel(self->ctx); - Py_RETURN_NONE; -} - -static PyObject *obj_transaction_commit(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - PyNtdb_CHECK_CLOSED(self); - ret = ntdb_transaction_commit(self->ctx); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_transaction_prepare_commit(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - PyNtdb_CHECK_CLOSED(self); - ret = ntdb_transaction_prepare_commit(self->ctx); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_transaction_start(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - PyNtdb_CHECK_CLOSED(self); - ret = ntdb_transaction_start(self->ctx); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_lockall(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - PyNtdb_CHECK_CLOSED(self); - ret = ntdb_lockall(self->ctx); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_unlockall(PyNtdbObject *self) -{ - PyNtdb_CHECK_CLOSED(self); - ntdb_unlockall(self->ctx); - Py_RETURN_NONE; -} - -static PyObject *obj_lockall_read(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - PyNtdb_CHECK_CLOSED(self); - ret = ntdb_lockall_read(self->ctx); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_unlockall_read(PyNtdbObject *self) -{ - PyNtdb_CHECK_CLOSED(self); - ntdb_unlockall_read(self->ctx); - Py_RETURN_NONE; -} - -static PyObject *obj_close(PyNtdbObject *self) -{ - int ret; - if (self->closed) - Py_RETURN_NONE; - ret = ntdb_close(self->ctx); - self->closed = true; - if (ret != 0) { - PyErr_SetTDBError(NTDB_ERR_IO); - return NULL; - } - Py_RETURN_NONE; -} - -static PyObject *obj_get(PyNtdbObject *self, PyObject *args) -{ - NTDB_DATA key, data; - PyObject *py_key; - enum NTDB_ERROR ret; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "O", &py_key)) - return NULL; - - key = PyString_AsNtdb_Data(py_key); - ret = ntdb_fetch(self->ctx, key, &data); - if (ret == NTDB_ERR_NOEXIST) - Py_RETURN_NONE; - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - return PyString_FromNtdb_Data(data); -} - -static PyObject *obj_append(PyNtdbObject *self, PyObject *args) -{ - NTDB_DATA key, data; - PyObject *py_key, *py_data; - enum NTDB_ERROR ret; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data)) - return NULL; - - key = PyString_AsNtdb_Data(py_key); - data = PyString_AsNtdb_Data(py_data); - - ret = ntdb_append(self->ctx, key, data); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_firstkey(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - NTDB_DATA key; - - PyNtdb_CHECK_CLOSED(self); - - ret = ntdb_firstkey(self->ctx, &key); - if (ret == NTDB_ERR_NOEXIST) - Py_RETURN_NONE; - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - - return PyString_FromNtdb_Data(key); -} - -static PyObject *obj_nextkey(PyNtdbObject *self, PyObject *args) -{ - NTDB_DATA key; - PyObject *py_key; - enum NTDB_ERROR ret; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "O", &py_key)) - return NULL; - - /* Malloc here, since ntdb_nextkey frees. */ - key.dsize = PyString_Size(py_key); - key.dptr = malloc(key.dsize); - memcpy(key.dptr, PyString_AsString(py_key), key.dsize); - - ret = ntdb_nextkey(self->ctx, &key); - if (ret == NTDB_ERR_NOEXIST) - Py_RETURN_NONE; - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - - return PyString_FromNtdb_Data(key); -} - -static PyObject *obj_delete(PyNtdbObject *self, PyObject *args) -{ - NTDB_DATA key; - PyObject *py_key; - enum NTDB_ERROR ret; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "O", &py_key)) - return NULL; - - key = PyString_AsNtdb_Data(py_key); - ret = ntdb_delete(self->ctx, key); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_has_key(PyNtdbObject *self, PyObject *args) -{ - NTDB_DATA key; - PyObject *py_key; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "O", &py_key)) - return NULL; - - key = PyString_AsNtdb_Data(py_key); - if (ntdb_exists(self->ctx, key)) - return Py_True; - return Py_False; -} - -static PyObject *obj_store(PyNtdbObject *self, PyObject *args) -{ - NTDB_DATA key, value; - enum NTDB_ERROR ret; - int flag = NTDB_REPLACE; - PyObject *py_key, *py_value; - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag)) - return NULL; - - key = PyString_AsNtdb_Data(py_key); - value = PyString_AsNtdb_Data(py_value); - - ret = ntdb_store(self->ctx, key, value, flag); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_add_flag(PyNtdbObject *self, PyObject *args) -{ - unsigned flag; - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "I", &flag)) - return NULL; - - ntdb_add_flag(self->ctx, flag); - Py_RETURN_NONE; -} - -static PyObject *obj_remove_flag(PyNtdbObject *self, PyObject *args) -{ - unsigned flag; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyArg_ParseTuple(args, "I", &flag)) - return NULL; - - ntdb_remove_flag(self->ctx, flag); - Py_RETURN_NONE; -} - -typedef struct { - PyObject_HEAD - NTDB_DATA current; - bool end; - PyNtdbObject *iteratee; -} PyNtdbIteratorObject; - -static PyObject *ntdb_iter_next(PyNtdbIteratorObject *self) -{ - enum NTDB_ERROR e; - PyObject *ret; - if (self->end) - return NULL; - ret = PyString_FromStringAndSize((const char *)self->current.dptr, - self->current.dsize); - e = ntdb_nextkey(self->iteratee->ctx, &self->current); - if (e == NTDB_ERR_NOEXIST) - self->end = true; - else - PyErr_NTDB_ERROR_IS_ERR_RAISE(e); - return ret; -} - -static void ntdb_iter_dealloc(PyNtdbIteratorObject *self) -{ - Py_DECREF(self->iteratee); - PyObject_Del(self); -} - -PyTypeObject PyNtdbIterator = { - .tp_name = "Iterator", - .tp_basicsize = sizeof(PyNtdbIteratorObject), - .tp_iternext = (iternextfunc)ntdb_iter_next, - .tp_dealloc = (destructor)ntdb_iter_dealloc, - .tp_flags = Py_TPFLAGS_DEFAULT, - .tp_iter = PyObject_SelfIter, -}; - -static PyObject *ntdb_object_iter(PyNtdbObject *self) -{ - PyNtdbIteratorObject *ret; - enum NTDB_ERROR e; - PyNtdb_CHECK_CLOSED(self); - - ret = PyObject_New(PyNtdbIteratorObject, &PyNtdbIterator); - if (!ret) - return NULL; - e = ntdb_firstkey(self->ctx, &ret->current); - if (e == NTDB_ERR_NOEXIST) { - ret->end = true; - } else { - PyErr_NTDB_ERROR_IS_ERR_RAISE(e); - ret->end = false; - } - ret->iteratee = self; - Py_INCREF(self); - return (PyObject *)ret; -} - -static PyObject *obj_clear(PyNtdbObject *self) -{ - enum NTDB_ERROR ret; - PyNtdb_CHECK_CLOSED(self); - ret = ntdb_wipe_all(self->ctx); - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - Py_RETURN_NONE; -} - -static PyObject *obj_enable_seqnum(PyNtdbObject *self) -{ - PyNtdb_CHECK_CLOSED(self); - ntdb_add_flag(self->ctx, NTDB_SEQNUM); - Py_RETURN_NONE; -} - -static PyMethodDef ntdb_object_methods[] = { - { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS, - "S.transaction_cancel() -> None\n" - "Cancel the currently active transaction." }, - { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS, - "S.transaction_commit() -> None\n" - "Commit the currently active transaction." }, - { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS, - "S.transaction_prepare_commit() -> None\n" - "Prepare to commit the currently active transaction" }, - { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS, - "S.transaction_start() -> None\n" - "Start a new transaction." }, - { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL }, - { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL }, - { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL }, - { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL }, - { "close", (PyCFunction)obj_close, METH_NOARGS, NULL }, - { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n" - "Fetch a value." }, - { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n" - "Append data to an existing key." }, - { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n" - "Return the first key in this database." }, - { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n" - "Return the next key in this database." }, - { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n" - "Delete an entry." }, - { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n" - "Check whether key exists in this database." }, - { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None" - "Store data." }, - { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" }, - { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" }, - { "iterkeys", (PyCFunction)ntdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" }, - { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n" - "Wipe the entire database." }, - { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS, - "S.enable_seqnum() -> None" }, - { NULL } -}; - -static PyObject *obj_get_flags(PyNtdbObject *self, void *closure) -{ - PyNtdb_CHECK_CLOSED(self); - return PyInt_FromLong(ntdb_get_flags(self->ctx)); -} - -static PyObject *obj_get_filename(PyNtdbObject *self, void *closure) -{ - PyNtdb_CHECK_CLOSED(self); - return PyString_FromString(ntdb_name(self->ctx)); -} - -static PyObject *obj_get_seqnum(PyNtdbObject *self, void *closure) -{ - PyNtdb_CHECK_CLOSED(self); - return PyInt_FromLong(ntdb_get_seqnum(self->ctx)); -} - - -static PyGetSetDef ntdb_object_getsetters[] = { - { cast_const(char *, "flags"), (getter)obj_get_flags, NULL, NULL }, - { cast_const(char *, "filename"), (getter)obj_get_filename, NULL, - cast_const(char *, "The filename of this NTDB file.")}, - { cast_const(char *, "seqnum"), (getter)obj_get_seqnum, NULL, NULL }, - { NULL } -}; - -static PyObject *ntdb_object_repr(PyNtdbObject *self) -{ - if (ntdb_get_flags(self->ctx) & NTDB_INTERNAL) { - return PyString_FromString("Ntdb()"); - } else { - return PyString_FromFormat("Ntdb('%s')", ntdb_name(self->ctx)); - } -} - -static void ntdb_object_dealloc(PyNtdbObject *self) -{ - if (!self->closed) - ntdb_close(self->ctx); - self->ob_type->tp_free(self); -} - -static PyObject *obj_getitem(PyNtdbObject *self, PyObject *key) -{ - NTDB_DATA tkey, val; - enum NTDB_ERROR ret; - - PyNtdb_CHECK_CLOSED(self); - - if (!PyString_Check(key)) { - PyErr_SetString(PyExc_TypeError, "Expected string as key"); - return NULL; - } - - tkey.dptr = (unsigned char *)PyString_AsString(key); - tkey.dsize = PyString_Size(key); - - ret = ntdb_fetch(self->ctx, tkey, &val); - if (ret == NTDB_ERR_NOEXIST) { - PyErr_SetString(PyExc_KeyError, "No such NTDB entry"); - return NULL; - } else { - PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); - return PyString_FromNtdb_Data(val); - } -} - -static int obj_setitem(PyNtdbObject *self, PyObject *key, PyObject *value) -{ - NTDB_DATA tkey, tval; - enum NTDB_ERROR ret; - if (self->closed) { - PyErr_SetObject(PyExc_RuntimeError, - Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed")); - return -1; - } - - if (!PyString_Check(key)) { - PyErr_SetString(PyExc_TypeError, "Expected string as key"); - return -1; - } - - tkey = PyString_AsNtdb_Data(key); - - if (value == NULL) { - ret = ntdb_delete(self->ctx, tkey); - } else { - if (!PyString_Check(value)) { - PyErr_SetString(PyExc_TypeError, "Expected string as value"); - return -1; - } - - tval = PyString_AsNtdb_Data(value); - - ret = ntdb_store(self->ctx, tkey, tval, NTDB_REPLACE); - } - - if (ret != NTDB_SUCCESS) { - PyErr_SetTDBError(ret); - return -1; - } - - return ret; -} - -static PyMappingMethods ntdb_object_mapping = { - .mp_subscript = (binaryfunc)obj_getitem, - .mp_ass_subscript = (objobjargproc)obj_setitem, -}; - -static PyTypeObject PyNtdb = { - .tp_name = "ntdb.Ntdb", - .tp_basicsize = sizeof(PyNtdbObject), - .tp_methods = ntdb_object_methods, - .tp_getset = ntdb_object_getsetters, - .tp_new = py_ntdb_open, - .tp_doc = "A NTDB file", - .tp_repr = (reprfunc)ntdb_object_repr, - .tp_dealloc = (destructor)ntdb_object_dealloc, - .tp_as_mapping = &ntdb_object_mapping, - .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER, - .tp_iter = (getiterfunc)ntdb_object_iter, -}; - -static PyMethodDef ntdb_methods[] = { - { "open", (PyCFunction)py_ntdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, ntdb_flags=NTDB_DEFAULT, flags=O_RDWR, mode=0600)\n" - "Open a NTDB file." }, - { NULL } -}; - -void initntdb(void); -void initntdb(void) -{ - PyObject *m; - - if (PyType_Ready(&PyNtdb) < 0) - return; - - if (PyType_Ready(&PyNtdbIterator) < 0) - return; - - m = Py_InitModule3("ntdb", ntdb_methods, "NTDB is a simple key-value database similar to GDBM that supports multiple writers."); - if (m == NULL) - return; - - PyModule_AddObject(m, "REPLACE", PyInt_FromLong(NTDB_REPLACE)); - PyModule_AddObject(m, "INSERT", PyInt_FromLong(NTDB_INSERT)); - PyModule_AddObject(m, "MODIFY", PyInt_FromLong(NTDB_MODIFY)); - - PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(NTDB_DEFAULT)); - PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(NTDB_INTERNAL)); - PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(NTDB_NOLOCK)); - PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(NTDB_NOMMAP)); - PyModule_AddObject(m, "CONVERT", PyInt_FromLong(NTDB_CONVERT)); - PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(NTDB_NOSYNC)); - PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(NTDB_SEQNUM)); - PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(NTDB_ALLOW_NESTING)); - - PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText")); - - PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION)); - - Py_INCREF(&PyNtdb); - PyModule_AddObject(m, "Ntdb", (PyObject *)&PyNtdb); - - Py_INCREF(&PyNtdbIterator); -} diff --git a/ccan/ntdb/summary.c b/ccan/ntdb/summary.c deleted file mode 100644 index 5a75dc5b..00000000 --- a/ccan/ntdb/summary.c +++ /dev/null @@ -1,321 +0,0 @@ - /* - Trivial Database 2: human-readable summary code - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include - -#define SUMMARY_FORMAT \ - "Size of file/data: %zu/%zu\n" \ - "Number of records: %zu\n" \ - "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \ - "Smallest/average/largest data: %zu/%zu/%zu\n%s" \ - "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \ - "Number of free records: %zu\n" \ - "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \ - "Number of uncoalesced records: %zu\n" \ - "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ - "Toplevel hash used: %u of %u\n" \ - "Number of hashes: %zu\n" \ - "Smallest/average/largest hash chains: %zu/%zu/%zu\n%s" \ - "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" - -#define BUCKET_SUMMARY_FORMAT_A \ - "Free bucket %zu: total entries %zu.\n" \ - "Smallest/average/largest length: %zu/%zu/%zu\n%s" -#define BUCKET_SUMMARY_FORMAT_B \ - "Free bucket %zu-%zu: total entries %zu.\n" \ - "Smallest/average/largest length: %zu/%zu/%zu\n%s" -#define CAPABILITY_FORMAT \ - "Capability %llu%s\n" - -#define HISTO_WIDTH 70 -#define HISTO_HEIGHT 20 - -static ntdb_off_t count_hash(struct ntdb_context *ntdb, - ntdb_off_t hash_off, - ntdb_off_t num) -{ - const ntdb_off_t *h; - ntdb_off_t i, count = 0; - - h = ntdb_access_read(ntdb, hash_off, sizeof(*h) * num, true); - if (NTDB_PTR_IS_ERR(h)) { - return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h)); - } - for (i = 0; i < num; i++) - count += (h[i] != 0); - - ntdb_access_release(ntdb, h); - return count; -} - -static enum NTDB_ERROR summarize(struct ntdb_context *ntdb, - struct tally *ftables, - struct tally *fr, - struct tally *keys, - struct tally *data, - struct tally *extra, - struct tally *uncoal, - struct tally *hashes, - size_t *num_caps) -{ - ntdb_off_t off; - ntdb_len_t len; - ntdb_len_t unc = 0; - - for (off = sizeof(struct ntdb_header); - off < ntdb->file->map_size; - off += len) { - const union { - struct ntdb_used_record u; - struct ntdb_free_record f; - struct ntdb_recovery_record r; - } *p; - /* We might not be able to get the whole thing. */ - p = ntdb_access_read(ntdb, off, sizeof(p->f), true); - if (NTDB_PTR_IS_ERR(p)) { - return NTDB_PTR_ERR(p); - } - if (frec_magic(&p->f) != NTDB_FREE_MAGIC) { - if (unc > 1) { - tally_add(uncoal, unc); - unc = 0; - } - } - - if (p->r.magic == NTDB_RECOVERY_INVALID_MAGIC - || p->r.magic == NTDB_RECOVERY_MAGIC) { - len = sizeof(p->r) + p->r.max_len; - } else if (frec_magic(&p->f) == NTDB_FREE_MAGIC) { - len = frec_len(&p->f); - tally_add(fr, len); - len += sizeof(p->u); - unc++; - } else if (rec_magic(&p->u) == NTDB_USED_MAGIC) { - len = sizeof(p->u) - + rec_key_length(&p->u) - + rec_data_length(&p->u) - + rec_extra_padding(&p->u); - - tally_add(keys, rec_key_length(&p->u)); - tally_add(data, rec_data_length(&p->u)); - tally_add(extra, rec_extra_padding(&p->u)); - } else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) { - ntdb_off_t count = count_hash(ntdb, - off + sizeof(p->u), - 1 << ntdb->hash_bits); - if (NTDB_OFF_IS_ERR(count)) { - return NTDB_OFF_TO_ERR(count); - } - tally_add(hashes, count); - tally_add(extra, rec_extra_padding(&p->u)); - len = sizeof(p->u) - + rec_data_length(&p->u) - + rec_extra_padding(&p->u); - } else if (rec_magic(&p->u) == NTDB_FTABLE_MAGIC) { - len = sizeof(p->u) - + rec_data_length(&p->u) - + rec_extra_padding(&p->u); - tally_add(ftables, rec_data_length(&p->u)); - tally_add(extra, rec_extra_padding(&p->u)); - } else if (rec_magic(&p->u) == NTDB_CHAIN_MAGIC) { - len = sizeof(p->u) - + rec_data_length(&p->u) - + rec_extra_padding(&p->u); - tally_add(hashes, - rec_data_length(&p->u)/sizeof(ntdb_off_t)); - tally_add(extra, rec_extra_padding(&p->u)); - } else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) { - len = sizeof(p->u) - + rec_data_length(&p->u) - + rec_extra_padding(&p->u); - (*num_caps)++; - } else { - len = dead_space(ntdb, off); - if (NTDB_OFF_IS_ERR(len)) { - return NTDB_OFF_TO_ERR(len); - } - } - ntdb_access_release(ntdb, p); - } - if (unc) - tally_add(uncoal, unc); - return NTDB_SUCCESS; -} - -static void add_capabilities(struct ntdb_context *ntdb, char *summary) -{ - ntdb_off_t off, next; - const struct ntdb_capability *cap; - size_t count = 0; - - /* Append to summary. */ - summary += strlen(summary); - - off = ntdb_read_off(ntdb, offsetof(struct ntdb_header, capabilities)); - if (NTDB_OFF_IS_ERR(off)) - return; - - /* Walk capability list. */ - for (; off; off = next) { - cap = ntdb_access_read(ntdb, off, sizeof(*cap), true); - if (NTDB_PTR_IS_ERR(cap)) { - break; - } - count++; - sprintf(summary, CAPABILITY_FORMAT, - cap->type & NTDB_CAP_TYPE_MASK, - /* Noopen? How did we get here? */ - (cap->type & NTDB_CAP_NOOPEN) ? " (unopenable)" - : ((cap->type & NTDB_CAP_NOWRITE) - && (cap->type & NTDB_CAP_NOCHECK)) ? " (uncheckable,read-only)" - : (cap->type & NTDB_CAP_NOWRITE) ? " (read-only)" - : (cap->type & NTDB_CAP_NOCHECK) ? " (uncheckable)" - : ""); - summary += strlen(summary); - next = cap->next; - ntdb_access_release(ntdb, cap); - } -} - -_PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, - enum ntdb_summary_flags flags, - char **summary) -{ - ntdb_len_t len; - size_t num_caps = 0; - struct tally *ftables, *freet, *keys, *data, *extra, *uncoal, *hashes; - char *freeg, *keysg, *datag, *extrag, *uncoalg, *hashesg; - enum NTDB_ERROR ecode; - - freeg = keysg = datag = extrag = uncoalg = hashesg = NULL; - - ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - ecode = ntdb_lock_expand(ntdb, F_RDLCK); - if (ecode != NTDB_SUCCESS) { - ntdb_allrecord_unlock(ntdb, F_RDLCK); - return ecode; - } - - /* Start stats off empty. */ - ftables = tally_new(HISTO_HEIGHT); - freet = tally_new(HISTO_HEIGHT); - keys = tally_new(HISTO_HEIGHT); - data = tally_new(HISTO_HEIGHT); - extra = tally_new(HISTO_HEIGHT); - uncoal = tally_new(HISTO_HEIGHT); - hashes = tally_new(HISTO_HEIGHT); - if (!ftables || !freet || !keys || !data || !extra - || !uncoal || !hashes) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_summary: failed to allocate" - " tally structures"); - goto unlock; - } - - ecode = summarize(ntdb, ftables, freet, keys, data, extra, - uncoal, hashes, &num_caps); - if (ecode != NTDB_SUCCESS) { - goto unlock; - } - - if (flags & NTDB_SUMMARY_HISTOGRAMS) { - freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT); - keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT); - datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT); - extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT); - uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT); - hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); - } - - /* 20 is max length of a %llu. */ - len = strlen(SUMMARY_FORMAT) + 33*20 + 1 - + (freeg ? strlen(freeg) : 0) - + (keysg ? strlen(keysg) : 0) - + (datag ? strlen(datag) : 0) - + (extrag ? strlen(extrag) : 0) - + (uncoalg ? strlen(uncoalg) : 0) - + (hashesg ? strlen(hashesg) : 0) - + num_caps * (strlen(CAPABILITY_FORMAT) + 20 - + strlen(" (uncheckable,read-only)")); - - *summary = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data); - if (!*summary) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_summary: failed to allocate string"); - goto unlock; - } - - sprintf(*summary, SUMMARY_FORMAT, - (size_t)ntdb->file->map_size, - tally_total(keys, NULL) + tally_total(data, NULL), - tally_num(keys), - tally_min(keys), tally_mean(keys), tally_max(keys), - keysg ? keysg : "", - tally_min(data), tally_mean(data), tally_max(data), - datag ? datag : "", - tally_min(extra), tally_mean(extra), tally_max(extra), - extrag ? extrag : "", - tally_num(freet), - tally_min(freet), tally_mean(freet), tally_max(freet), - freeg ? freeg : "", - tally_total(uncoal, NULL), - tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal), - uncoalg ? uncoalg : "", - (unsigned)count_hash(ntdb, sizeof(struct ntdb_header), - 1 << ntdb->hash_bits), - 1 << ntdb->hash_bits, - tally_num(hashes), - tally_min(hashes), tally_mean(hashes), tally_max(hashes), - hashesg ? hashesg : "", - tally_total(keys, NULL) * 100.0 / ntdb->file->map_size, - tally_total(data, NULL) * 100.0 / ntdb->file->map_size, - tally_total(extra, NULL) * 100.0 / ntdb->file->map_size, - tally_total(freet, NULL) * 100.0 / ntdb->file->map_size, - (tally_num(keys) + tally_num(freet) + tally_num(hashes)) - * sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size, - tally_num(ftables) * sizeof(struct ntdb_freetable) - * 100.0 / ntdb->file->map_size, - (tally_total(hashes, NULL) * sizeof(ntdb_off_t) - + (sizeof(ntdb_off_t) << ntdb->hash_bits)) - * 100.0 / ntdb->file->map_size); - - add_capabilities(ntdb, *summary); - -unlock: - ntdb->free_fn(freeg, ntdb->alloc_data); - ntdb->free_fn(keysg, ntdb->alloc_data); - ntdb->free_fn(datag, ntdb->alloc_data); - ntdb->free_fn(extrag, ntdb->alloc_data); - ntdb->free_fn(uncoalg, ntdb->alloc_data); - ntdb->free_fn(hashesg, ntdb->alloc_data); - ntdb->free_fn(freet, ntdb->alloc_data); - ntdb->free_fn(keys, ntdb->alloc_data); - ntdb->free_fn(data, ntdb->alloc_data); - ntdb->free_fn(extra, ntdb->alloc_data); - ntdb->free_fn(uncoal, ntdb->alloc_data); - ntdb->free_fn(ftables, ntdb->alloc_data); - ntdb->free_fn(hashes, ntdb->alloc_data); - - ntdb_allrecord_unlock(ntdb, F_RDLCK); - ntdb_unlock_expand(ntdb, F_RDLCK); - return ecode; -} diff --git a/ccan/ntdb/test/api-12-store.c b/ccan/ntdb/test/api-12-store.c deleted file mode 100644 index f5b3b72a..00000000 --- a/ccan/ntdb/test/api-12-store.c +++ /dev/null @@ -1,55 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "helpapi-external-agent.h" - -/* We use the same seed which we saw a failure on. */ -static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) -{ - return hash64_stable((const unsigned char *)key, len, - *(uint64_t *)p); -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - uint64_t seed = 16014841315512641303ULL; - union ntdb_attribute fixed_hattr - = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = fixedhash, - .data = &seed } }; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; - NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; - - fixed_hattr.base.next = &tap_log_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-12-store.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr); - ok1(ntdb); - if (!ntdb) - continue; - - /* We seemed to lose some keys. - * Insert and check they're in there! */ - for (j = 0; j < 500; j++) { - NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(d, data)); - free(d.dptr); - } - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-13-delete.c b/ccan/ntdb/test/api-13-delete.c deleted file mode 100644 index 44820ff1..00000000 --- a/ccan/ntdb/test/api-13-delete.c +++ /dev/null @@ -1,202 +0,0 @@ -#include "../private.h" // For NTDB_TOPLEVEL_HASH_BITS -#include -#include "../ntdb.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -/* We rig the hash so adjacent-numbered records always clash. */ -static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv) -{ - return *((const unsigned int *)key) / 2; -} - -/* We use the same seed which we saw a failure on. */ -static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) -{ - return hash64_stable((const unsigned char *)key, len, - *(uint64_t *)p); -} - -static bool store_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA d, data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < 1000; i++) { - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - ntdb_fetch(ntdb, key, &d); - if (!ntdb_deq(d, data)) - return false; - free(d.dptr); - } - return true; -} - -static void test_val(struct ntdb_context *ntdb, uint64_t val) -{ - uint64_t v; - NTDB_DATA key = { (unsigned char *)&v, sizeof(v) }; - NTDB_DATA d, data = { (unsigned char *)&v, sizeof(v) }; - - /* Insert an entry, then delete it. */ - v = val; - /* Delete should fail. */ - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Insert should succeed. */ - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Delete should succeed. */ - ok1(ntdb_delete(ntdb, key) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Re-add it, then add collision. */ - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - v = val + 1; - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Can find both? */ - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - v = val; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - - /* Delete second one. */ - v = val + 1; - ok1(ntdb_delete(ntdb, key) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Re-add */ - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Now, try deleting first one. */ - v = val; - ok1(ntdb_delete(ntdb, key) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Can still find second? */ - v = val + 1; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - - /* Now, this will be ideally placed. */ - v = val + 2; - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* This will collide with both. */ - v = val; - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - - /* We can still find them all, right? */ - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - v = val + 1; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - v = val + 2; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - - /* And if we delete val + 1, that val + 2 should not move! */ - v = val + 1; - ok1(ntdb_delete(ntdb, key) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - v = val; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - v = val + 2; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == data.dsize); - free(d.dptr); - - /* Delete those two, so we are empty. */ - ok1(ntdb_delete(ntdb, key) == 0); - v = val; - ok1(ntdb_delete(ntdb, key) == 0); - - ok1(ntdb_check(ntdb, NULL, NULL) == 0); -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - uint64_t seed = 16014841315512641303ULL; - union ntdb_attribute clash_hattr - = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = clash } }; - union ntdb_attribute fixed_hattr - = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = fixedhash, - .data = &seed } }; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - /* These two values gave trouble before. */ - int vals[] = { 755, 837 }; - - clash_hattr.base.next = &tap_log_attr; - fixed_hattr.base.next = &tap_log_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) - * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Check start of hash table. */ - test_val(ntdb, 0); - - /* Check end of hash table. */ - test_val(ntdb, -1ULL); - - /* Check mixed bitpattern. */ - test_val(ntdb, 0x123456789ABCDEF0ULL); - - ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0)); - ntdb_close(ntdb); - - /* Deleting these entries in the db gave problems. */ - ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(store_records(ntdb)); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) { - NTDB_DATA key; - - key.dptr = (unsigned char *)&vals[j]; - key.dsize = sizeof(vals[j]); - ok1(ntdb_delete(ntdb, key) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - } - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-14-exists.c b/ccan/ntdb/test/api-14-exists.c deleted file mode 100644 index 37f40b00..00000000 --- a/ccan/ntdb/test/api-14-exists.c +++ /dev/null @@ -1,53 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -static bool test_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < 1000; i++) { - if (ntdb_exists(ntdb, key)) - return false; - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - if (!ntdb_exists(ntdb, key)) - return false; - } - - for (i = 0; i < 1000; i++) { - if (!ntdb_exists(ntdb, key)) - return false; - if (ntdb_delete(ntdb, key) != 0) - return false; - if (ntdb_exists(ntdb, key)) - return false; - } - return true; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-14-exists.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (ok1(ntdb)) - ok1(test_records(ntdb)); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-16-wipe_all.c b/ccan/ntdb/test/api-16-wipe_all.c deleted file mode 100644 index fb70523f..00000000 --- a/ccan/ntdb/test/api-16-wipe_all.c +++ /dev/null @@ -1,45 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -static bool add_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < 1000; i++) { - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - } - return true; -} - - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-16-wipe_all.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (ok1(ntdb)) { - NTDB_DATA key; - ok1(add_records(ntdb)); - ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS); - ok1(ntdb_firstkey(ntdb, &key) == NTDB_ERR_NOEXIST); - ntdb_close(ntdb); - } - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-20-alloc-attr.c b/ccan/ntdb/test/api-20-alloc-attr.c deleted file mode 100644 index 868764fa..00000000 --- a/ccan/ntdb/test/api-20-alloc-attr.c +++ /dev/null @@ -1,107 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include -#include - -#include "logging.h" -#include "helpapi-external-agent.h" - -static const struct ntdb_context *curr_ntdb; -static const struct ntdb_file *curr_file; - -static int owner_null_count, - owner_weird_count, alloc_count, free_count, expand_count; - -static void *test_alloc(const void *owner, size_t len, void *priv_data) -{ - void *ret; - - if (!owner) { - owner_null_count++; - } else if (owner != curr_ntdb && owner != curr_file) { - owner_weird_count++; - } - - alloc_count++; - ret = malloc(len); - - /* The first time, this is the current ntdb, next is - * for the file struct. */ - if (!owner) { - if (!curr_ntdb) { - curr_ntdb = ret; - } else if (!curr_file) { - curr_file = ret; - } - } - assert(priv_data == &owner_weird_count); - return ret; -} - -static void *test_expand(void *old, size_t newlen, void *priv_data) -{ - expand_count++; - - assert(priv_data == &owner_weird_count); - return realloc(old, newlen); -} - -static void test_free(void *old, void *priv_data) -{ - assert(priv_data == &owner_weird_count); - if (old) { - free_count++; - } - free(old); -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - union ntdb_attribute alloc_attr; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; - NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; - - alloc_attr.base.next = &tap_log_attr; - alloc_attr.base.attr = NTDB_ATTRIBUTE_ALLOCATOR; - - alloc_attr.alloc.alloc = test_alloc; - alloc_attr.alloc.expand = test_expand; - alloc_attr.alloc.free = test_free; - alloc_attr.alloc.priv_data = &owner_weird_count; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 700 * 3 + 4) + 1); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - curr_ntdb = NULL; - curr_file = NULL; - ntdb = ntdb_open("run-20-alloc-attr.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &alloc_attr); - ok1(ntdb); - if (!ntdb) - continue; - - for (j = 0; j < 700; j++) { - NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(d, data)); - test_free(d.dptr, &owner_weird_count); - } - ntdb_close(ntdb); - - ok1(owner_null_count == 2+i*2); - ok1(owner_weird_count == 0); - ok1(alloc_count == free_count); - ok1(expand_count != 0); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-21-parse_record.c b/ccan/ntdb/test/api-21-parse_record.c deleted file mode 100644 index a841a1a7..00000000 --- a/ccan/ntdb/test/api-21-parse_record.c +++ /dev/null @@ -1,67 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, NTDB_DATA *expected) -{ - if (!ntdb_deq(data, *expected)) - return NTDB_ERR_EINVAL; - return NTDB_SUCCESS; -} - -static enum NTDB_ERROR parse_err(NTDB_DATA key, NTDB_DATA data, void *unused) -{ - return 100; -} - -static bool test_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < 1000; i++) { - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - } - - for (i = 0; i < 1000; i++) { - if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_SUCCESS) - return false; - } - - if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_ERR_NOEXIST) - return false; - - /* Test error return from parse function. */ - i = 0; - if (ntdb_parse_record(ntdb, key, parse_err, NULL) != 100) - return false; - - return true; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-21-parse_record.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (ok1(ntdb)) - ok1(test_records(ntdb)); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-55-transaction.c b/ccan/ntdb/test/api-55-transaction.c deleted file mode 100644 index 21dd1c46..00000000 --- a/ccan/ntdb/test/api-55-transaction.c +++ /dev/null @@ -1,72 +0,0 @@ -#include "../private.h" // struct ntdb_context -#include "../ntdb.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - unsigned char *buffer; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data; - - buffer = malloc(1000); - for (i = 0; i < 1000; i++) - buffer[i] = i; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-55-transaction.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(ntdb_transaction_start(ntdb) == 0); - data.dptr = buffer; - data.dsize = 1000; - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); - ok1(data.dsize == 1000); - ok1(memcmp(data.dptr, buffer, data.dsize) == 0); - free(data.dptr); - - /* Cancelling a transaction means no store */ - ntdb_transaction_cancel(ntdb); - ok1(ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_ERR_NOEXIST); - - /* Commit the transaction. */ - ok1(ntdb_transaction_start(ntdb) == 0); - data.dptr = buffer; - data.dsize = 1000; - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); - ok1(data.dsize == 1000); - ok1(memcmp(data.dptr, buffer, data.dsize) == 0); - free(data.dptr); - ok1(ntdb_transaction_commit(ntdb) == 0); - ok1(ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); - ok1(data.dsize == 1000); - ok1(memcmp(data.dptr, buffer, data.dsize) == 0); - free(data.dptr); - - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - free(buffer); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-60-noop-transaction.c b/ccan/ntdb/test/api-60-noop-transaction.c deleted file mode 100644 index 53a4ade6..00000000 --- a/ccan/ntdb/test/api-60-noop-transaction.c +++ /dev/null @@ -1,56 +0,0 @@ -#include "../private.h" // struct ntdb_context -#include "../ntdb.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4), d; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 12 + 1); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-60-transaction.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - - ok1(ntdb_transaction_start(ntdb) == 0); - /* Do an identical replace. */ - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - ok1(ntdb_transaction_commit(ntdb) == 0); - - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(data, d)); - free(d.dptr); - ntdb_close(ntdb); - - /* Reopen, fetch. */ - ntdb = ntdb_open("api-60-transaction.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(data, d)); - free(d.dptr); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-80-tdb_fd.c b/ccan/ntdb/test/api-80-tdb_fd.c deleted file mode 100644 index 0d37754e..00000000 --- a/ccan/ntdb/test/api-80-tdb_fd.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 3); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-80-ntdb_fd.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - continue; - - if (flags[i] & NTDB_INTERNAL) - ok1(ntdb_fd(ntdb) == -1); - else - ok1(ntdb_fd(ntdb) > 2); - ntdb_close(ntdb); - ok1(tap_log_messages == 0); - } - return exit_status(); -} diff --git a/ccan/ntdb/test/api-81-seqnum.c b/ccan/ntdb/test/api-81-seqnum.c deleted file mode 100644 index 33ced82c..00000000 --- a/ccan/ntdb/test/api-81-seqnum.c +++ /dev/null @@ -1,69 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i, seq; - struct ntdb_context *ntdb; - NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-81-seqnum.ntdb", - flags[i]|NTDB_SEQNUM|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - continue; - - seq = 0; - ok1(ntdb_get_seqnum(ntdb) == seq); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - /* Fetch doesn't change seqnum */ - if (ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS)) - free(d.dptr); - ok1(ntdb_get_seqnum(ntdb) == seq); - ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - - ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - /* Empty append works */ - ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - - ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - - if (!(flags[i] & NTDB_INTERNAL)) { - ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == ++seq); - ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS); - ok1(ntdb_get_seqnum(ntdb) == seq); - - ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_get_seqnum(ntdb) == seq + 1); - ntdb_transaction_cancel(ntdb); - ok1(ntdb_get_seqnum(ntdb) == seq); - } - ntdb_close(ntdb); - ok1(tap_log_messages == 0); - } - return exit_status(); -} diff --git a/ccan/ntdb/test/api-82-lockattr.c b/ccan/ntdb/test/api-82-lockattr.c deleted file mode 100644 index 3b7ba8c8..00000000 --- a/ccan/ntdb/test/api-82-lockattr.c +++ /dev/null @@ -1,238 +0,0 @@ -#include "../private.h" // for ntdb_fcntl_unlock -#include "../ntdb.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "helpapi-external-agent.h" - -static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag, - void *_err) -{ - int *lock_err = _err; - struct flock fl; - int ret; - - if (*lock_err) { - errno = *lock_err; - return -1; - } - - do { - fl.l_type = rw; - fl.l_whence = SEEK_SET; - fl.l_start = off; - fl.l_len = len; - - if (waitflag) - ret = fcntl(fd, F_SETLKW, &fl); - else - ret = fcntl(fd, F_SETLK, &fl); - } while (ret != 0 && errno == EINTR); - - return ret; -} - -static int trav_err; -static int trav(struct ntdb_context *ntdb, NTDB_DATA k, NTDB_DATA d, int *terr) -{ - *terr = trav_err; - return 0; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - union ntdb_attribute lock_attr; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - int lock_err; - - lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK; - lock_attr.base.next = &tap_log_attr; - lock_attr.flock.lock = mylock; - lock_attr.flock.unlock = ntdb_fcntl_unlock; - lock_attr.flock.data = &lock_err; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 81); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - NTDB_DATA d; - - /* Nonblocking open; expect no error message. */ - lock_err = EAGAIN; - ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); - ok(errno == lock_err, "Errno is %u", errno); - ok1(!ntdb); - ok1(tap_log_messages == 0); - - lock_err = EINTR; - ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); - ok(errno == lock_err, "Errno is %u", errno); - ok1(!ntdb); - ok1(tap_log_messages == 0); - - /* Forced fail open. */ - lock_err = ENOMEM; - ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); - ok1(errno == lock_err); - ok1(!ntdb); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - lock_err = 0; - ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); - if (!ok1(ntdb)) - continue; - ok1(tap_log_messages == 0); - - /* Nonblocking store. */ - lock_err = EAGAIN; - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - /* Nonblocking fetch. */ - lock_err = EAGAIN; - ok1(!ntdb_exists(ntdb, key)); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(!ntdb_exists(ntdb, key)); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(!ntdb_exists(ntdb, key)); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - lock_err = EAGAIN; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - /* Nonblocking delete. */ - lock_err = EAGAIN; - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - /* Nonblocking locks. */ - lock_err = EAGAIN; - ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - lock_err = EAGAIN; - ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - lock_err = EAGAIN; - ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK); - /* This actually does divide and conquer. */ - ok1(tap_log_messages > 0); - tap_log_messages = 0; - - lock_err = EAGAIN; - ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages > 0); - tap_log_messages = 0; - - /* Nonblocking traverse; go nonblock partway through. */ - lock_err = 0; - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - /* Need two entries to ensure two lock attempts! */ - ok1(ntdb_store(ntdb, ntdb_mkdata("key2", 4), data, - NTDB_REPLACE) == 0); - trav_err = EAGAIN; - ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - trav_err = EINTR; - lock_err = 0; - ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - trav_err = ENOMEM; - lock_err = 0; - ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - /* Nonblocking transactions. */ - lock_err = EAGAIN; - ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = EINTR; - ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - lock_err = ENOMEM; - ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - tap_log_messages = 0; - - /* Nonblocking transaction prepare. */ - lock_err = 0; - ok1(ntdb_transaction_start(ntdb) == 0); - ok1(ntdb_delete(ntdb, key) == 0); - - lock_err = EAGAIN; - ok1(ntdb_transaction_prepare_commit(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - - lock_err = 0; - ok1(ntdb_transaction_prepare_commit(ntdb) == 0); - ok1(ntdb_transaction_commit(ntdb) == 0); - - /* And the transaction was committed, right? */ - ok1(!ntdb_exists(ntdb, key)); - ntdb_close(ntdb); - ok1(tap_log_messages == 0); - } - return exit_status(); -} diff --git a/ccan/ntdb/test/api-83-openhook.c b/ccan/ntdb/test/api-83-openhook.c deleted file mode 100644 index cdd015a5..00000000 --- a/ccan/ntdb/test/api-83-openhook.c +++ /dev/null @@ -1,104 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "external-agent.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -#define KEY_STR "key" - -static enum NTDB_ERROR clear_if_first(int fd, void *arg) -{ -/* We hold a lock offset 4 always, so we can tell if anyone is holding it. - * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */ - struct flock fl; - - if (arg != clear_if_first) - return NTDB_ERR_CORRUPT; - - fl.l_type = F_WRLCK; - fl.l_whence = SEEK_SET; - fl.l_start = 4; - fl.l_len = 1; - - if (fcntl(fd, F_SETLK, &fl) == 0) { - /* We must be first ones to open it! */ - diag("truncating file!"); - if (ftruncate(fd, 0) != 0) { - return NTDB_ERR_IO; - } - } - fl.l_type = F_RDLCK; - if (fcntl(fd, F_SETLKW, &fl) != 0) { - return NTDB_ERR_IO; - } - return NTDB_SUCCESS; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb, *ntdb2; - struct agent *agent; - union ntdb_attribute cif; - NTDB_DATA key = ntdb_mkdata(KEY_STR, strlen(KEY_STR)); - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - - cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK; - cif.openhook.base.next = &tap_log_attr; - cif.openhook.fn = clear_if_first; - cif.openhook.data = clear_if_first; - - agent = prepare_external_agent(); - plan_tests(sizeof(flags) / sizeof(flags[0]) * 16); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - /* Create it */ - ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, NULL); - ok1(ntdb); - ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0); - ntdb_close(ntdb); - - /* Now, open with CIF, should clear it. */ - ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR, 0, &cif); - ok1(ntdb); - ok1(!ntdb_exists(ntdb, key)); - ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0); - - /* Agent should not clear it, since it's still open. */ - ok1(external_agent_operation(agent, OPEN_WITH_HOOK, - "run-83-openhook.ntdb") == SUCCESS); - ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) - == SUCCESS); - ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS); - - /* Still exists for us too. */ - ok1(ntdb_exists(ntdb, key)); - - /* Nested open should not erase db. */ - ntdb2 = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR, 0, &cif); - ok1(ntdb_exists(ntdb2, key)); - ok1(ntdb_exists(ntdb, key)); - ntdb_close(ntdb2); - - ok1(ntdb_exists(ntdb, key)); - - /* Close it, now agent should clear it. */ - ntdb_close(ntdb); - - ok1(external_agent_operation(agent, OPEN_WITH_HOOK, - "run-83-openhook.ntdb") == SUCCESS); - ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) - == FAILED); - ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS); - - ok1(tap_log_messages == 0); - } - - free_external_agent(agent); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-91-get-stats.c b/ccan/ntdb/test/api-91-get-stats.c deleted file mode 100644 index 120b62ed..00000000 --- a/ccan/ntdb/test/api-91-get-stats.c +++ /dev/null @@ -1,58 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 11); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - union ntdb_attribute *attr; - NTDB_DATA key = ntdb_mkdata("key", 3), data; - - ntdb = ntdb_open("run-91-get-stats.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - /* Force an expansion */ - data.dsize = 65536; - data.dptr = calloc(data.dsize, 1); - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - free(data.dptr); - - /* Use malloc so valgrind will catch overruns. */ - attr = malloc(sizeof *attr); - attr->stats.base.attr = NTDB_ATTRIBUTE_STATS; - attr->stats.size = sizeof(*attr); - - ok1(ntdb_get_attribute(ntdb, attr) == 0); - ok1(attr->stats.size == sizeof(*attr)); - ok1(attr->stats.allocs > 0); - ok1(attr->stats.expands > 0); - ok1(attr->stats.locks > 0); - free(attr); - - /* Try short one. */ - attr = malloc(offsetof(struct ntdb_attribute_stats, allocs) - + sizeof(attr->stats.allocs)); - attr->stats.base.attr = NTDB_ATTRIBUTE_STATS; - attr->stats.size = offsetof(struct ntdb_attribute_stats, allocs) - + sizeof(attr->stats.allocs); - ok1(ntdb_get_attribute(ntdb, attr) == 0); - ok1(attr->stats.size == sizeof(*attr)); - ok1(attr->stats.allocs > 0); - free(attr); - ok1(tap_log_messages == 0); - - ntdb_close(ntdb); - - } - return exit_status(); -} diff --git a/ccan/ntdb/test/api-92-get-set-readonly.c b/ccan/ntdb/test/api-92-get-set-readonly.c deleted file mode 100644 index dda5acb9..00000000 --- a/ccan/ntdb/test/api-92-get-set-readonly.c +++ /dev/null @@ -1,106 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 48); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - /* RW -> R0 */ - ntdb = ntdb_open("run-92-get-set-readonly.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY)); - - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); - - ntdb_add_flag(ntdb, NTDB_RDONLY); - ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY); - - /* Can't store, append, delete. */ - ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 1); - ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 2); - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 3); - - /* Can't start a transaction, or any write lock. */ - ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 4); - ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 5); - ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 6); - ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 7); - - /* Back to RW. */ - ntdb_remove_flag(ntdb, NTDB_RDONLY); - ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY)); - - ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_SUCCESS); - ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); - ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); - - ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); - ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS); - - ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS); - ntdb_chainunlock(ntdb, key); - ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); - ntdb_unlockall(ntdb); - ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS); - ok1(tap_log_messages == 7); - - ntdb_close(ntdb); - - /* R0 -> RW */ - ntdb = ntdb_open("run-92-get-set-readonly.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDONLY, 0600, &tap_log_attr); - ok1(ntdb); - ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY); - - /* Can't store, append, delete. */ - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 8); - ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 9); - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 10); - - /* Can't start a transaction, or any write lock. */ - ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 11); - ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 12); - ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 13); - ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY); - ok1(tap_log_messages == 14); - - /* Can't remove NTDB_RDONLY since we opened with O_RDONLY */ - ntdb_remove_flag(ntdb, NTDB_RDONLY); - ok1(tap_log_messages == 15); - ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY); - ntdb_close(ntdb); - - ok1(tap_log_messages == 15); - tap_log_messages = 0; - } - return exit_status(); -} diff --git a/ccan/ntdb/test/api-93-repack.c b/ccan/ntdb/test/api-93-repack.c deleted file mode 100644 index 437c0f85..00000000 --- a/ccan/ntdb/test/api-93-repack.c +++ /dev/null @@ -1,80 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -#define NUM_TESTS 1000 - -static bool store_all(struct ntdb_context *ntdb) -{ - unsigned int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA dbuf = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < NUM_TESTS; i++) { - if (ntdb_store(ntdb, key, dbuf, NTDB_INSERT) != NTDB_SUCCESS) - return false; - } - return true; -} - -static int mark_entry(struct ntdb_context *ntdb, - NTDB_DATA key, NTDB_DATA data, bool found[]) -{ - unsigned int num; - - if (key.dsize != sizeof(num)) - return -1; - memcpy(&num, key.dptr, key.dsize); - if (num >= NUM_TESTS) - return -1; - if (found[num]) - return -1; - found[num] = true; - return 0; -} - -static bool is_all_set(bool found[], unsigned int num) -{ - unsigned int i; - - for (i = 0; i < num; i++) - if (!found[i]) - return false; - return true; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - bool found[NUM_TESTS]; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT - }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 6 + 1); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-93-repack.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - break; - - ok1(store_all(ntdb)); - - ok1(ntdb_repack(ntdb) == NTDB_SUCCESS); - memset(found, 0, sizeof(found)); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - ok1(ntdb_traverse(ntdb, mark_entry, found) == NUM_TESTS); - ok1(is_all_set(found, NUM_TESTS)); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-94-expand-during-parse.c b/ccan/ntdb/test/api-94-expand-during-parse.c deleted file mode 100644 index 39b19988..00000000 --- a/ccan/ntdb/test/api-94-expand-during-parse.c +++ /dev/null @@ -1,87 +0,0 @@ -/* We use direct access to hand to the parse function: what if db expands? */ -#include "config.h" -#include "../ntdb.h" -#include "tap-interface.h" -#include "logging.h" -#include "../private.h" /* To establish size, esp. for NTDB_INTERNAL dbs */ -#include "helpapi-external-agent.h" - -static struct ntdb_context *ntdb; - -static off_t ntdb_size(void) -{ - return ntdb->file->map_size; -} - -struct parse_info { - unsigned int depth; - NTDB_DATA expected; -}; - -static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, - struct parse_info *pinfo) -{ - off_t flen; - unsigned int i; - - if (!ntdb_deq(data, pinfo->expected)) - return NTDB_ERR_EINVAL; - - flen = ntdb_size(); - - for (i = 0; ntdb_size() == flen; i++) { - NTDB_DATA add = ntdb_mkdata(&i, sizeof(i)); - - /* This is technically illegal parse(), which is why we - * grabbed allrecord lock.*/ - ntdb_store(ntdb, add, add, NTDB_INSERT); - } - - /* Access the record again. */ - if (!ntdb_deq(data, pinfo->expected)) - return NTDB_ERR_EINVAL; - - /* Recurse! Woot! */ - if (pinfo->depth != 0) { - enum NTDB_ERROR ecode; - - pinfo->depth--; - ecode = ntdb_parse_record(ntdb, key, parse, pinfo); - if (ecode) { - return ecode; - } - } - - /* Access the record one more time. */ - if (!ntdb_deq(data, pinfo->expected)) - return NTDB_ERR_EINVAL; - - return NTDB_SUCCESS; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - struct parse_info pinfo; - NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5); - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-94-expand-during-parse.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); - ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); - pinfo.expected = data; - pinfo.depth = 3; - ok1(ntdb_parse_record(ntdb, key, parse, &pinfo) == NTDB_SUCCESS); - ntdb_unlockall(ntdb); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-95-read-only-during-parse.c b/ccan/ntdb/test/api-95-read-only-during-parse.c deleted file mode 100644 index 0b0eb69f..00000000 --- a/ccan/ntdb/test/api-95-read-only-during-parse.c +++ /dev/null @@ -1,93 +0,0 @@ -/* Make sure write operations fail during ntdb_parse(). */ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -static struct ntdb_context *ntdb; - -/* We could get either of these. */ -static bool xfail(enum NTDB_ERROR ecode) -{ - return ecode == NTDB_ERR_RDONLY || ecode == NTDB_ERR_LOCK; -} - -static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, - NTDB_DATA *expected) -{ - NTDB_DATA add = ntdb_mkdata("another", strlen("another")); - - if (!ntdb_deq(data, *expected)) { - return NTDB_ERR_EINVAL; - } - - /* These should all fail.*/ - if (!xfail(ntdb_store(ntdb, add, add, NTDB_INSERT))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_append(ntdb, key, add))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_delete(ntdb, key))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_transaction_start(ntdb))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_chainlock(ntdb, key))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_lockall(ntdb))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_wipe_all(ntdb))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - if (!xfail(ntdb_repack(ntdb))) { - return NTDB_ERR_EINVAL; - } - tap_log_messages--; - - /* Access the record one more time. */ - if (!ntdb_deq(data, *expected)) { - return NTDB_ERR_EINVAL; - } - - return NTDB_SUCCESS; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5); - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-95-read-only-during-parse.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); - ok1(ntdb_parse_record(ntdb, key, parse, &data) == NTDB_SUCCESS); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-add-remove-flags.c b/ccan/ntdb/test/api-add-remove-flags.c deleted file mode 100644 index a09046e2..00000000 --- a/ccan/ntdb/test/api-add-remove-flags.c +++ /dev/null @@ -1,88 +0,0 @@ -#include "../private.h" // for ntdb_context -#include "../ntdb.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(87); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-add-remove-flags.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(ntdb_get_flags(ntdb) == ntdb->flags); - tap_log_messages = 0; - ntdb_add_flag(ntdb, NTDB_NOLOCK); - if (flags[i] & NTDB_INTERNAL) - ok1(tap_log_messages == 1); - else { - ok1(tap_log_messages == 0); - ok1(ntdb_get_flags(ntdb) & NTDB_NOLOCK); - } - - tap_log_messages = 0; - ntdb_add_flag(ntdb, NTDB_NOMMAP); - if (flags[i] & NTDB_INTERNAL) - ok1(tap_log_messages == 1); - else { - ok1(tap_log_messages == 0); - ok1(ntdb_get_flags(ntdb) & NTDB_NOMMAP); - ok1(ntdb->file->map_ptr == NULL); - } - - tap_log_messages = 0; - ntdb_add_flag(ntdb, NTDB_NOSYNC); - if (flags[i] & NTDB_INTERNAL) - ok1(tap_log_messages == 1); - else { - ok1(tap_log_messages == 0); - ok1(ntdb_get_flags(ntdb) & NTDB_NOSYNC); - } - - ok1(ntdb_get_flags(ntdb) == ntdb->flags); - - tap_log_messages = 0; - ntdb_remove_flag(ntdb, NTDB_NOLOCK); - if (flags[i] & NTDB_INTERNAL) - ok1(tap_log_messages == 1); - else { - ok1(tap_log_messages == 0); - ok1(!(ntdb_get_flags(ntdb) & NTDB_NOLOCK)); - } - - tap_log_messages = 0; - ntdb_remove_flag(ntdb, NTDB_NOMMAP); - if (flags[i] & NTDB_INTERNAL) - ok1(tap_log_messages == 1); - else { - ok1(tap_log_messages == 0); - ok1(!(ntdb_get_flags(ntdb) & NTDB_NOMMAP)); - ok1(ntdb->file->map_ptr != NULL); - } - - tap_log_messages = 0; - ntdb_remove_flag(ntdb, NTDB_NOSYNC); - if (flags[i] & NTDB_INTERNAL) - ok1(tap_log_messages == 1); - else { - ok1(tap_log_messages == 0); - ok1(!(ntdb_get_flags(ntdb) & NTDB_NOSYNC)); - } - - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-check-callback.c b/ccan/ntdb/test/api-check-callback.c deleted file mode 100644 index eaf60d48..00000000 --- a/ccan/ntdb/test/api-check-callback.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -#define NUM_RECORDS 1000 - -static bool store_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < NUM_RECORDS; i++) - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - return true; -} - -static enum NTDB_ERROR check(NTDB_DATA key, - NTDB_DATA data, - bool *array) -{ - int val; - - if (key.dsize != sizeof(val)) { - diag("Wrong key size: %zu\n", key.dsize); - return NTDB_ERR_CORRUPT; - } - - if (key.dsize != data.dsize - || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) { - diag("Key and data differ\n"); - return NTDB_ERR_CORRUPT; - } - - memcpy(&val, key.dptr, sizeof(val)); - if (val >= NUM_RECORDS || val < 0) { - diag("check value %i\n", val); - return NTDB_ERR_CORRUPT; - } - - if (array[val]) { - diag("Value %i already seen\n", val); - return NTDB_ERR_CORRUPT; - } - - array[val] = true; - return NTDB_SUCCESS; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - return 0; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - bool array[NUM_RECORDS]; - - ntdb = ntdb_open("run-check-callback.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(store_records(ntdb)); - for (j = 0; j < NUM_RECORDS; j++) - array[j] = false; - ok1(ntdb_check(ntdb, check, array) == NTDB_SUCCESS); - for (j = 0; j < NUM_RECORDS; j++) - if (!array[j]) - break; - ok1(j == NUM_RECORDS); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-firstkey-nextkey.c b/ccan/ntdb/test/api-firstkey-nextkey.c deleted file mode 100644 index 6d9ad67a..00000000 --- a/ccan/ntdb/test/api-firstkey-nextkey.c +++ /dev/null @@ -1,158 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -#define NUM_RECORDS 1000 - -static bool store_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < NUM_RECORDS; i++) - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - return true; -} - -struct trav_data { - unsigned int records[NUM_RECORDS]; - unsigned int calls; -}; - -static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p) -{ - struct trav_data *td = p; - int val; - - memcpy(&val, dbuf.dptr, dbuf.dsize); - td->records[td->calls++] = val; - return 0; -} - -/* Since ntdb_nextkey frees dptr, we need to clone it. */ -static NTDB_DATA dup_key(NTDB_DATA key) -{ - void *p = malloc(key.dsize); - memcpy(p, key.dptr, key.dsize); - key.dptr = p; - return key; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - int num; - struct trav_data td; - NTDB_DATA k; - struct ntdb_context *ntdb; - union ntdb_attribute seed_attr; - enum NTDB_ERROR ecode; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; - seed_attr.base.next = &tap_log_attr; - seed_attr.seed.seed = 6334326220117065685ULL; - - plan_tests(sizeof(flags) / sizeof(flags[0]) - * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("api-firstkey-nextkey.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, - &seed_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(ntdb_firstkey(ntdb, &k) == NTDB_ERR_NOEXIST); - - /* One entry... */ - k.dptr = (unsigned char *)# - k.dsize = sizeof(num); - num = 0; - ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0); - ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS); - ok1(k.dsize == sizeof(num)); - ok1(memcmp(k.dptr, &num, sizeof(num)) == 0); - ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST); - - /* Two entries. */ - k.dptr = (unsigned char *)# - k.dsize = sizeof(num); - num = 1; - ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0); - ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS); - ok1(k.dsize == sizeof(num)); - memcpy(&num, k.dptr, sizeof(num)); - ok1(num == 0 || num == 1); - ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS); - ok1(k.dsize == sizeof(j)); - memcpy(&j, k.dptr, sizeof(j)); - ok1(j == 0 || j == 1); - ok1(j != num); - ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST); - - /* Clean up. */ - k.dptr = (unsigned char *)# - k.dsize = sizeof(num); - num = 0; - ok1(ntdb_delete(ntdb, k) == 0); - num = 1; - ok1(ntdb_delete(ntdb, k) == 0); - - /* Now lots of records. */ - ok1(store_records(ntdb)); - td.calls = 0; - - num = ntdb_traverse(ntdb, trav, &td); - ok1(num == NUM_RECORDS); - ok1(td.calls == NUM_RECORDS); - - /* Simple loop should match ntdb_traverse */ - for (j = 0, ecode = ntdb_firstkey(ntdb, &k); j < td.calls; j++) { - int val; - - ok1(ecode == NTDB_SUCCESS); - ok1(k.dsize == sizeof(val)); - memcpy(&val, k.dptr, k.dsize); - ok1(td.records[j] == val); - ecode = ntdb_nextkey(ntdb, &k); - } - - /* But arbitrary orderings should work too. */ - for (j = td.calls-1; j > 0; j--) { - k.dptr = (unsigned char *)&td.records[j-1]; - k.dsize = sizeof(td.records[j-1]); - k = dup_key(k); - ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS); - ok1(k.dsize == sizeof(td.records[j])); - ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0); - free(k.dptr); - } - - /* Even delete should work. */ - for (j = 0, ecode = ntdb_firstkey(ntdb, &k); - ecode != NTDB_ERR_NOEXIST; - j++) { - ok1(ecode == NTDB_SUCCESS); - ok1(k.dsize == 4); - ok1(ntdb_delete(ntdb, k) == 0); - ecode = ntdb_nextkey(ntdb, &k); - } - - diag("delete using first/nextkey gave %u of %u records", - j, NUM_RECORDS); - ok1(j == NUM_RECORDS); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-fork-test.c b/ccan/ntdb/test/api-fork-test.c deleted file mode 100644 index 32c6ebe4..00000000 --- a/ccan/ntdb/test/api-fork-test.c +++ /dev/null @@ -1,195 +0,0 @@ -/* Test forking while holding lock. - * - * There are only five ways to do this currently: - * (1) grab a ntdb_chainlock, then fork. - * (2) grab a ntdb_lockall, then fork. - * (3) grab a ntdb_lockall_read, then fork. - * (4) start a transaction, then fork. - * (5) fork from inside a ntdb_parse() callback. - * - * Note that we don't hold a lock across ntdb_traverse callbacks, so - * that doesn't matter. - */ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -static bool am_child = false; - -static enum NTDB_ERROR fork_in_parse(NTDB_DATA key, NTDB_DATA data, - struct ntdb_context *ntdb) -{ - int status; - - if (fork() == 0) { - am_child = true; - - /* We expect this to fail. */ - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) - exit(1); - - if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) - exit(1); - - if (tap_log_messages != 2) - exit(2); - - return NTDB_SUCCESS; - } - wait(&status); - ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); - return NTDB_SUCCESS; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 14); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - int status; - - tap_log_messages = 0; - - ntdb = ntdb_open("run-fork-test.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - continue; - - /* Put a record in here. */ - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_SUCCESS); - - ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS); - if (fork() == 0) { - /* We expect this to fail. */ - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) - return 1; - - if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) - return 1; - - if (tap_log_messages != 2) - return 2; - - /* Child can do this without any complaints. */ - ntdb_chainunlock(ntdb, key); - if (tap_log_messages != 2) - return 3; - ntdb_close(ntdb); - if (tap_log_messages != 2) - return 4; - return 0; - } - wait(&status); - ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); - ntdb_chainunlock(ntdb, key); - - ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); - if (fork() == 0) { - /* We expect this to fail. */ - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) - return 1; - - if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) - return 1; - - if (tap_log_messages != 2) - return 2; - - /* Child can do this without any complaints. */ - ntdb_unlockall(ntdb); - if (tap_log_messages != 2) - return 3; - ntdb_close(ntdb); - if (tap_log_messages != 2) - return 4; - return 0; - } - wait(&status); - ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); - ntdb_unlockall(ntdb); - - ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS); - if (fork() == 0) { - /* We expect this to fail. */ - /* This would always fail anyway... */ - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) - return 1; - - if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) - return 1; - - if (tap_log_messages != 2) - return 2; - - /* Child can do this without any complaints. */ - ntdb_unlockall_read(ntdb); - if (tap_log_messages != 2) - return 3; - ntdb_close(ntdb); - if (tap_log_messages != 2) - return 4; - return 0; - } - wait(&status); - ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); - ntdb_unlockall_read(ntdb); - - ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); - /* If transactions is empty, noop "commit" succeeds. */ - ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); - if (fork() == 0) { - int last_log_messages; - - /* We expect this to fail. */ - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) - return 1; - - if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) - return 1; - - if (tap_log_messages != 2) - return 2; - - if (ntdb_transaction_prepare_commit(ntdb) - != NTDB_ERR_LOCK) - return 3; - if (tap_log_messages == 2) - return 4; - - last_log_messages = tap_log_messages; - /* Child can do this without any complaints. */ - ntdb_transaction_cancel(ntdb); - if (tap_log_messages != last_log_messages) - return 4; - ntdb_close(ntdb); - if (tap_log_messages != last_log_messages) - return 4; - return 0; - } - wait(&status); - ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); - ntdb_transaction_cancel(ntdb); - - ok1(ntdb_parse_record(ntdb, key, fork_in_parse, ntdb) - == NTDB_SUCCESS); - ntdb_close(ntdb); - if (am_child) { - /* Child can return from parse without complaints. */ - if (tap_log_messages != 2) - exit(3); - exit(0); - } - ok1(tap_log_messages == 0); - } - return exit_status(); -} diff --git a/ccan/ntdb/test/api-locktimeout.c b/ccan/ntdb/test/api-locktimeout.c deleted file mode 100644 index 235409b3..00000000 --- a/ccan/ntdb/test/api-locktimeout.c +++ /dev/null @@ -1,190 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "external-agent.h" -#include "helpapi-external-agent.h" - -#undef alarm -#define alarm fast_alarm - -/* Speed things up by doing things in milliseconds. */ -static unsigned int fast_alarm(unsigned int milli_seconds) -{ - struct itimerval it; - - it.it_interval.tv_sec = it.it_interval.tv_usec = 0; - it.it_value.tv_sec = milli_seconds / 1000; - it.it_value.tv_usec = milli_seconds * 1000; - setitimer(ITIMER_REAL, &it, NULL); - return 0; -} - -#define CatchSignal(sig, handler) signal((sig), (handler)) - -static void do_nothing(int signum) -{ -} - -/* This example code is taken from SAMBA, so try not to change it. */ -static struct flock flock_struct; - -/* Return a value which is none of v1, v2 or v3. */ -static inline short int invalid_value(short int v1, short int v2, short int v3) -{ - short int try = (v1+v2+v3)^((v1+v2+v3) << 16); - while (try == v1 || try == v2 || try == v3) - try++; - return try; -} - -/* We invalidate in as many ways as we can, so the OS rejects it */ -static void invalidate_flock_struct(int signum) -{ - flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK); - flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END); - flock_struct.l_start = -1; - /* A large negative. */ - flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1); -} - -static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag, - void *_timeout) -{ - int ret, saved_errno = errno; - unsigned int timeout = *(unsigned int *)_timeout; - - flock_struct.l_type = rw; - flock_struct.l_whence = SEEK_SET; - flock_struct.l_start = off; - flock_struct.l_len = len; - - CatchSignal(SIGALRM, invalidate_flock_struct); - alarm(timeout); - - for (;;) { - if (waitflag) - ret = fcntl(fd, F_SETLKW, &flock_struct); - else - ret = fcntl(fd, F_SETLK, &flock_struct); - - if (ret == 0) - break; - - /* Not signalled? Something else went wrong. */ - if (flock_struct.l_len == len) { - if (errno == EAGAIN || errno == EINTR) - continue; - saved_errno = errno; - break; - } else { - saved_errno = EINTR; - break; - } - } - - alarm(0); - errno = saved_errno; - return ret; -} - -static int ntdb_chainlock_with_timeout_internal(struct ntdb_context *ntdb, - NTDB_DATA key, - unsigned int timeout, - int rw_type) -{ - union ntdb_attribute locking; - enum NTDB_ERROR ecode; - - if (timeout) { - locking.base.attr = NTDB_ATTRIBUTE_FLOCK; - ecode = ntdb_get_attribute(ntdb, &locking); - if (ecode != NTDB_SUCCESS) - return ecode; - - /* Replace locking function with our own. */ - locking.flock.data = &timeout; - locking.flock.lock = timeout_lock; - - ecode = ntdb_set_attribute(ntdb, &locking); - if (ecode != NTDB_SUCCESS) - return ecode; - } - if (rw_type == F_RDLCK) - ecode = ntdb_chainlock_read(ntdb, key); - else - ecode = ntdb_chainlock(ntdb, key); - - if (timeout) { - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); - } - return ecode; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - NTDB_DATA key = ntdb_mkdata("hello", 5); - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - struct agent *agent; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 15); - - agent = prepare_external_agent(); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - enum NTDB_ERROR ecode; - ntdb = ntdb_open("run-locktimeout.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - break; - - /* Simple cases: should succeed. */ - ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, - F_RDLCK); - ok1(ecode == NTDB_SUCCESS); - ok1(tap_log_messages == 0); - - ntdb_chainunlock_read(ntdb, key); - ok1(tap_log_messages == 0); - - ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, - F_WRLCK); - ok1(ecode == NTDB_SUCCESS); - ok1(tap_log_messages == 0); - - ntdb_chainunlock(ntdb, key); - ok1(tap_log_messages == 0); - - /* OK, get agent to start transaction, then we should time out. */ - ok1(external_agent_operation(agent, OPEN, "run-locktimeout.ntdb") - == SUCCESS); - ok1(external_agent_operation(agent, TRANSACTION_START, "") - == SUCCESS); - ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, - F_WRLCK); - ok1(ecode == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - - /* Even if we get a different signal, should be fine. */ - CatchSignal(SIGUSR1, do_nothing); - external_agent_operation(agent, SEND_SIGNAL, ""); - ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, - F_WRLCK); - ok1(ecode == NTDB_ERR_LOCK); - ok1(tap_log_messages == 0); - - ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "") - == SUCCESS); - ok1(external_agent_operation(agent, CLOSE, "") - == SUCCESS); - ntdb_close(ntdb); - } - free_external_agent(agent); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-missing-entries.c b/ccan/ntdb/test/api-missing-entries.c deleted file mode 100644 index a6427c07..00000000 --- a/ccan/ntdb/test/api-missing-entries.c +++ /dev/null @@ -1,43 +0,0 @@ -/* Another test revealed that we lost an entry. This reproduces it. */ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -#define NUM_RECORDS 1189 - -/* We use the same seed which we saw this failure on. */ -static uint32_t failhash(const void *key, size_t len, uint32_t seed, void *p) -{ - return hash64_stable((const unsigned char *)key, len, - 699537674708983027ULL); -} - -int main(int argc, char *argv[]) -{ - int i; - struct ntdb_context *ntdb; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = failhash } }; - - hattr.base.next = &tap_log_attr; - plan_tests(1 + NUM_RECORDS + 2); - - ntdb = ntdb_open("run-missing-entries.ntdb", NTDB_INTERNAL, - O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); - if (ok1(ntdb)) { - for (i = 0; i < NUM_RECORDS; i++) { - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - } - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-open-multiple-times.c b/ccan/ntdb/test/api-open-multiple-times.c deleted file mode 100644 index 59a03629..00000000 --- a/ccan/ntdb/test/api-open-multiple-times.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "../private.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb, *ntdb2; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 30); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-open-multiple-times.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ntdb2 = ntdb_open("run-open-multiple-times.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT, 0600, &tap_log_attr); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_check(ntdb2, NULL, NULL) == 0); - ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr); - - /* Store in one, fetch in the other. */ - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(d, data)); - free(d.dptr); - - /* Vice versa, with delete. */ - ok1(ntdb_delete(ntdb2, key) == 0); - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST); - - /* OK, now close first one, check second still good. */ - ok1(ntdb_close(ntdb) == 0); - - ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr); - ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == 0); - ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(d, data)); - free(d.dptr); - - /* Reopen */ - ntdb = ntdb_open("run-open-multiple-times.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT, 0600, &tap_log_attr); - ok1(ntdb); - - ok1(ntdb_transaction_start(ntdb2) == 0); - - /* Anything in the other one should fail. */ - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 1); - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 2); - ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 3); - ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); - ok1(tap_log_messages == 4); - - /* Transaciton should work as normal. */ - ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == NTDB_SUCCESS); - - /* Now... try closing with locks held. */ - ok1(ntdb_close(ntdb2) == 0); - - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(ntdb_deq(d, data)); - free(d.dptr); - ok1(ntdb_close(ntdb) == 0); - ok1(tap_log_messages == 4); - tap_log_messages = 0; - } - - return exit_status(); -} diff --git a/ccan/ntdb/test/api-record-expand.c b/ccan/ntdb/test/api-record-expand.c deleted file mode 100644 index b92f13fe..00000000 --- a/ccan/ntdb/test/api-record-expand.c +++ /dev/null @@ -1,50 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -#define MAX_SIZE 10000 -#define SIZE_STEP 131 - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data; - - data.dptr = malloc(MAX_SIZE); - memset(data.dptr, 0x24, MAX_SIZE); - - plan_tests(sizeof(flags) / sizeof(flags[0]) - * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-record-expand.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - data.dsize = 0; - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - for (data.dsize = 0; - data.dsize < MAX_SIZE; - data.dsize += SIZE_STEP) { - memset(data.dptr, data.dsize, data.dsize); - ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - } - ntdb_close(ntdb); - } - ok1(tap_log_messages == 0); - free(data.dptr); - - return exit_status(); -} diff --git a/ccan/ntdb/test/api-simple-delete.c b/ccan/ntdb/test/api-simple-delete.c deleted file mode 100644 index 0b886c3a..00000000 --- a/ccan/ntdb/test/api-simple-delete.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-simple-delete.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (ntdb) { - /* Delete should fail. */ - ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Insert should succeed. */ - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Delete should now work. */ - ok1(ntdb_delete(ntdb, key) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - } - } - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/api-summary.c b/ccan/ntdb/test/api-summary.c deleted file mode 100644 index 7701f26a..00000000 --- a/ccan/ntdb/test/api-summary.c +++ /dev/null @@ -1,56 +0,0 @@ -#include "config.h" -#include "../ntdb.h" -#include "../private.h" -#include "tap-interface.h" -#include "logging.h" -#include "helpapi-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; - NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; - char *summary; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-summary.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Put some stuff in there. */ - for (j = 0; j < 500; j++) { - /* Make sure padding varies to we get some graphs! */ - data.dsize = j % (sizeof(j) + 1); - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - fail("Storing in ntdb"); - } - - for (j = 0; - j <= NTDB_SUMMARY_HISTOGRAMS; - j += NTDB_SUMMARY_HISTOGRAMS) { - ok1(ntdb_summary(ntdb, j, &summary) == NTDB_SUCCESS); - ok1(strstr(summary, "Number of records: 500\n")); - ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n")); - ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n")); - if (j == NTDB_SUMMARY_HISTOGRAMS) { - ok1(strstr(summary, "|") - && strstr(summary, "*")); - } else { - ok1(!strstr(summary, "|") - && !strstr(summary, "*")); - } - free(summary); - } - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/external-agent.c b/ccan/ntdb/test/external-agent.c deleted file mode 100644 index a06b70f7..00000000 --- a/ccan/ntdb/test/external-agent.c +++ /dev/null @@ -1,261 +0,0 @@ -#include "external-agent.h" -#include "logging.h" -#include "lock-tracking.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "tap-interface.h" -#include -#include - -static struct ntdb_context *ntdb; - -void (*external_agent_free)(void *) = free; - -static enum NTDB_ERROR clear_if_first(int fd, void *arg) -{ -/* We hold a lock offset 4 always, so we can tell if anyone is holding it. - * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */ - struct flock fl; - - fl.l_type = F_WRLCK; - fl.l_whence = SEEK_SET; - fl.l_start = 4; - fl.l_len = 1; - - if (fcntl(fd, F_SETLK, &fl) == 0) { - /* We must be first ones to open it! */ - diag("agent truncating file!"); - if (ftruncate(fd, 0) != 0) { - return NTDB_ERR_IO; - } - } - fl.l_type = F_RDLCK; - if (fcntl(fd, F_SETLKW, &fl) != 0) { - return NTDB_ERR_IO; - } - return NTDB_SUCCESS; -} - -static enum agent_return do_operation(enum operation op, const char *name) -{ - NTDB_DATA k, d; - enum agent_return ret; - NTDB_DATA data; - enum NTDB_ERROR ecode; - union ntdb_attribute cif; - const char *eq; - - if (op != OPEN && op != OPEN_WITH_HOOK && !ntdb) { - diag("external: No ntdb open!"); - return OTHER_FAILURE; - } - - diag("external: %s", operation_name(op)); - - eq = strchr(name, '='); - if (eq) { - k = ntdb_mkdata(name, eq - name); - d = ntdb_mkdata(eq + 1, strlen(eq+1)); - } else { - k = ntdb_mkdata(name, strlen(name)); - d.dsize = 0; - d.dptr = NULL; - } - - locking_would_block = 0; - switch (op) { - case OPEN: - if (ntdb) { - diag("Already have ntdb %s open", ntdb_name(ntdb)); - return OTHER_FAILURE; - } - ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &tap_log_attr); - if (!ntdb) { - if (!locking_would_block) - diag("Opening ntdb gave %s", strerror(errno)); - forget_locking(); - ret = OTHER_FAILURE; - } else - ret = SUCCESS; - break; - case OPEN_WITH_HOOK: - if (ntdb) { - diag("Already have ntdb %s open", ntdb_name(ntdb)); - return OTHER_FAILURE; - } - cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK; - cif.openhook.base.next = &tap_log_attr; - cif.openhook.fn = clear_if_first; - ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &cif); - if (!ntdb) { - if (!locking_would_block) - diag("Opening ntdb gave %s", strerror(errno)); - forget_locking(); - ret = OTHER_FAILURE; - } else - ret = SUCCESS; - break; - case FETCH: - ecode = ntdb_fetch(ntdb, k, &data); - if (ecode == NTDB_ERR_NOEXIST) { - ret = FAILED; - } else if (ecode < 0) { - ret = OTHER_FAILURE; - } else if (!ntdb_deq(data, d)) { - ret = OTHER_FAILURE; - external_agent_free(data.dptr); - } else { - ret = SUCCESS; - external_agent_free(data.dptr); - } - break; - case STORE: - ret = ntdb_store(ntdb, k, d, 0) == 0 ? SUCCESS : OTHER_FAILURE; - break; - case TRANSACTION_START: - ret = ntdb_transaction_start(ntdb) == 0 ? SUCCESS : OTHER_FAILURE; - break; - case TRANSACTION_COMMIT: - ret = ntdb_transaction_commit(ntdb)==0 ? SUCCESS : OTHER_FAILURE; - break; - case NEEDS_RECOVERY: - ret = external_agent_needs_rec(ntdb); - break; - case CHECK: - ret = ntdb_check(ntdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE; - break; - case CLOSE: - ret = ntdb_close(ntdb) == 0 ? SUCCESS : OTHER_FAILURE; - ntdb = NULL; - break; - case SEND_SIGNAL: - /* We do this async */ - ret = SUCCESS; - break; - default: - ret = OTHER_FAILURE; - } - - if (locking_would_block) - ret = WOULD_HAVE_BLOCKED; - - return ret; -} - -struct agent { - int cmdfd, responsefd; -}; - -/* Do this before doing any ntdb stuff. Return handle, or NULL. */ -struct agent *prepare_external_agent(void) -{ - int pid, ret; - int command[2], response[2]; - char name[1+PATH_MAX]; - - if (pipe(command) != 0 || pipe(response) != 0) - return NULL; - - pid = fork(); - if (pid < 0) - return NULL; - - if (pid != 0) { - struct agent *agent = malloc(sizeof(*agent)); - - close(command[0]); - close(response[1]); - agent->cmdfd = command[1]; - agent->responsefd = response[0]; - return agent; - } - - close(command[1]); - close(response[0]); - - /* We want to fail, not block. */ - nonblocking_locks = true; - log_prefix = "external: "; - while ((ret = read(command[0], name, sizeof(name))) > 0) { - enum agent_return result; - - result = do_operation(name[0], name+1); - if (write(response[1], &result, sizeof(result)) - != sizeof(result)) - err(1, "Writing response"); - if (name[0] == SEND_SIGNAL) { - struct timeval ten_ms; - ten_ms.tv_sec = 0; - ten_ms.tv_usec = 10000; - select(0, NULL, NULL, NULL, &ten_ms); - kill(getppid(), SIGUSR1); - } - } - exit(0); -} - -/* Ask the external agent to try to do an operation. */ -enum agent_return external_agent_operation(struct agent *agent, - enum operation op, - const char *name) -{ - enum agent_return res; - unsigned int len; - char *string; - - if (!name) - name = ""; - len = 1 + strlen(name) + 1; - string = malloc(len); - - string[0] = op; - strcpy(string+1, name); - - if (write(agent->cmdfd, string, len) != len - || read(agent->responsefd, &res, sizeof(res)) != sizeof(res)) - res = AGENT_DIED; - - free(string); - return res; -} - -const char *agent_return_name(enum agent_return ret) -{ - return ret == SUCCESS ? "SUCCESS" - : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED" - : ret == AGENT_DIED ? "AGENT_DIED" - : ret == FAILED ? "FAILED" - : ret == OTHER_FAILURE ? "OTHER_FAILURE" - : "**INVALID**"; -} - -const char *operation_name(enum operation op) -{ - switch (op) { - case OPEN: return "OPEN"; - case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK"; - case FETCH: return "FETCH"; - case STORE: return "STORE"; - case CHECK: return "CHECK"; - case TRANSACTION_START: return "TRANSACTION_START"; - case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT"; - case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; - case SEND_SIGNAL: return "SEND_SIGNAL"; - case CLOSE: return "CLOSE"; - } - return "**INVALID**"; -} - -void free_external_agent(struct agent *agent) -{ - close(agent->cmdfd); - close(agent->responsefd); - free(agent); -} diff --git a/ccan/ntdb/test/external-agent.h b/ccan/ntdb/test/external-agent.h deleted file mode 100644 index 559a92cd..00000000 --- a/ccan/ntdb/test/external-agent.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef NTDB_TEST_EXTERNAL_AGENT_H -#define NTDB_TEST_EXTERNAL_AGENT_H - -/* For locking tests, we need a different process to try things at - * various times. */ -enum operation { - OPEN, - OPEN_WITH_HOOK, - FETCH, - STORE, - TRANSACTION_START, - TRANSACTION_COMMIT, - NEEDS_RECOVERY, - CHECK, - SEND_SIGNAL, - CLOSE, -}; - -/* Do this before doing any ntdb stuff. Return handle, or -1. */ -struct agent *prepare_external_agent(void); - -enum agent_return { - SUCCESS, - WOULD_HAVE_BLOCKED, - AGENT_DIED, - FAILED, /* For fetch, or NEEDS_RECOVERY */ - OTHER_FAILURE, -}; - -/* Ask the external agent to try to do an operation. - * name == ntdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST, - * = for FETCH/STORE. - */ -enum agent_return external_agent_operation(struct agent *handle, - enum operation op, - const char *name); - -/* Hook into free() on ntdb_data in external agent. */ -extern void (*external_agent_free)(void *); - -/* Mapping enum -> string. */ -const char *agent_return_name(enum agent_return ret); -const char *operation_name(enum operation op); - -void free_external_agent(struct agent *agent); - -/* Internal use: */ -struct ntdb_context; -enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb); - -#endif /* NTDB_TEST_EXTERNAL_AGENT_H */ diff --git a/ccan/ntdb/test/failtest_helper.c b/ccan/ntdb/test/failtest_helper.c deleted file mode 100644 index ab7e61b0..00000000 --- a/ccan/ntdb/test/failtest_helper.c +++ /dev/null @@ -1,90 +0,0 @@ -#include "failtest_helper.h" -#include "logging.h" -#include -#include "tap-interface.h" - -bool failtest_suppress = false; - -bool failmatch(const struct failtest_call *call, - const char *file, int line, enum failtest_call_type type) -{ - return call->type == type - && call->line == line - && ((strcmp(call->file, file) == 0) - || (strends(call->file, file) - && (call->file[strlen(call->file) - strlen(file) - 1] - == '/'))); -} - -static bool is_nonblocking_lock(const struct failtest_call *call) -{ - return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK; -} - -static bool is_unlock(const struct failtest_call *call) -{ - return call->type == FAILTEST_FCNTL - && call->u.fcntl.arg.fl.l_type == F_UNLCK; -} - -bool exit_check_log(struct tlist_calls *history) -{ - const struct failtest_call *i; - unsigned int malloc_count = 0; - - tlist_for_each(history, i, list) { - if (!i->fail) - continue; - /* Failing the /dev/urandom open doesn't count: we fall back. */ - if (failmatch(i, URANDOM_OPEN)) - continue; - - /* Similarly with read fail. */ - if (failmatch(i, URANDOM_READ)) - continue; - - /* Initial allocation of ntdb doesn't log. */ - if (i->type == FAILTEST_MALLOC) { - if (malloc_count++ == 0) { - continue; - } - } - - /* We don't block "failures" on non-blocking locks. */ - if (is_nonblocking_lock(i)) - continue; - - if (!tap_log_messages) - diag("We didn't log for %s:%u", i->file, i->line); - return tap_log_messages != 0; - } - return true; -} - -/* Some places we soldier on despite errors: only fail them once. */ -enum failtest_result -block_repeat_failures(struct tlist_calls *history) -{ - const struct failtest_call *last; - - last = tlist_tail(history, list); - - if (failtest_suppress) - return FAIL_DONT_FAIL; - - if (failmatch(last, URANDOM_OPEN) - || failmatch(last, URANDOM_READ)) { - return FAIL_PROBE; - } - - /* We handle mmap failing, by falling back to read/write, so - * don't try all possible paths. */ - if (last->type == FAILTEST_MMAP) - return FAIL_PROBE; - - /* Unlock or non-blocking lock is fail-once. */ - if (is_unlock(last) || is_nonblocking_lock(last)) - return FAIL_PROBE; - - return FAIL_OK; -} diff --git a/ccan/ntdb/test/failtest_helper.h b/ccan/ntdb/test/failtest_helper.h deleted file mode 100644 index 8d1c3745..00000000 --- a/ccan/ntdb/test/failtest_helper.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef NTDB_TEST_FAILTEST_HELPER_H -#define NTDB_TEST_FAILTEST_HELPER_H -#include -#include - -/* FIXME: Check these! */ -#define URANDOM_OPEN "open.c", 62, FAILTEST_OPEN -#define URANDOM_READ "open.c", 42, FAILTEST_READ - -bool exit_check_log(struct tlist_calls *history); -bool failmatch(const struct failtest_call *call, - const char *file, int line, enum failtest_call_type type); -enum failtest_result block_repeat_failures(struct tlist_calls *history); - -/* Set this to suppress failure. */ -extern bool failtest_suppress; - -#endif /* NTDB_TEST_LOGGING_H */ diff --git a/ccan/ntdb/test/helpapi-external-agent.h b/ccan/ntdb/test/helpapi-external-agent.h deleted file mode 100644 index eb813990..00000000 --- a/ccan/ntdb/test/helpapi-external-agent.h +++ /dev/null @@ -1,7 +0,0 @@ -#include "external-agent.h" - -/* This isn't possible with via the ntdb API, but this makes it link. */ -enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb) -{ - return FAILED; -} diff --git a/ccan/ntdb/test/helprun-external-agent.h b/ccan/ntdb/test/helprun-external-agent.h deleted file mode 100644 index 12610411..00000000 --- a/ccan/ntdb/test/helprun-external-agent.h +++ /dev/null @@ -1,7 +0,0 @@ -#include "external-agent.h" -#include "../private.h" - -enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb) -{ - return ntdb_needs_recovery(ntdb) ? SUCCESS : FAILED; -} diff --git a/ccan/ntdb/test/helprun-layout.h b/ccan/ntdb/test/helprun-layout.h deleted file mode 100644 index 1bacd5e7..00000000 --- a/ccan/ntdb/test/helprun-layout.h +++ /dev/null @@ -1,341 +0,0 @@ -/* NTDB tools to create various canned database layouts. */ -#include "layout.h" -#include -#include -#include -#include -#include "logging.h" - -struct ntdb_layout *new_ntdb_layout(void) -{ - struct ntdb_layout *layout = malloc(sizeof(*layout)); - layout->num_elems = 0; - layout->elem = NULL; - return layout; -} - -static void add(struct ntdb_layout *layout, union ntdb_layout_elem elem) -{ - layout->elem = realloc(layout->elem, - sizeof(layout->elem[0]) - * (layout->num_elems+1)); - layout->elem[layout->num_elems++] = elem; -} - -void ntdb_layout_add_freetable(struct ntdb_layout *layout) -{ - union ntdb_layout_elem elem; - elem.base.type = FREETABLE; - add(layout, elem); -} - -void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len, - unsigned ftable) -{ - union ntdb_layout_elem elem; - elem.base.type = FREE; - elem.free.len = len; - elem.free.ftable_num = ftable; - add(layout, elem); -} - -void ntdb_layout_add_capability(struct ntdb_layout *layout, - uint64_t type, - bool write_breaks, - bool check_breaks, - bool open_breaks, - ntdb_len_t extra) -{ - union ntdb_layout_elem elem; - elem.base.type = CAPABILITY; - elem.capability.type = type; - if (write_breaks) - elem.capability.type |= NTDB_CAP_NOWRITE; - if (open_breaks) - elem.capability.type |= NTDB_CAP_NOOPEN; - if (check_breaks) - elem.capability.type |= NTDB_CAP_NOCHECK; - elem.capability.extra = extra; - add(layout, elem); -} - -static NTDB_DATA dup_key(NTDB_DATA key) -{ - NTDB_DATA ret; - ret.dsize = key.dsize; - ret.dptr = malloc(ret.dsize); - memcpy(ret.dptr, key.dptr, ret.dsize); - return ret; -} - -void ntdb_layout_add_used(struct ntdb_layout *layout, - NTDB_DATA key, NTDB_DATA data, - ntdb_len_t extra) -{ - union ntdb_layout_elem elem; - elem.base.type = DATA; - elem.used.key = dup_key(key); - elem.used.data = dup_key(data); - elem.used.extra = extra; - add(layout, elem); -} - -static ntdb_len_t free_record_len(ntdb_len_t len) -{ - return sizeof(struct ntdb_used_record) + len; -} - -static ntdb_len_t data_record_len(struct tle_used *used) -{ - ntdb_len_t len; - len = sizeof(struct ntdb_used_record) - + used->key.dsize + used->data.dsize + used->extra; - assert(len >= sizeof(struct ntdb_free_record)); - return len; -} - -static ntdb_len_t capability_len(struct tle_capability *cap) -{ - return sizeof(struct ntdb_capability) + cap->extra; -} - -static ntdb_len_t freetable_len(struct tle_freetable *ftable) -{ - return sizeof(struct ntdb_freetable); -} - -static void set_free_record(void *mem, ntdb_len_t len) -{ - /* We do all the work in add_to_freetable */ -} - -static void add_zero_pad(struct ntdb_used_record *u, size_t len, size_t extra) -{ - if (extra) - ((char *)(u + 1))[len] = '\0'; -} - -static void set_data_record(void *mem, struct ntdb_context *ntdb, - struct tle_used *used) -{ - struct ntdb_used_record *u = mem; - - set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize, - used->key.dsize + used->data.dsize + used->extra); - memcpy(u + 1, used->key.dptr, used->key.dsize); - memcpy((char *)(u + 1) + used->key.dsize, - used->data.dptr, used->data.dsize); - add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra); -} - -static void set_capability(void *mem, struct ntdb_context *ntdb, - struct tle_capability *cap, struct ntdb_header *hdr, - ntdb_off_t last_cap) -{ - struct ntdb_capability *c = mem; - ntdb_len_t len = sizeof(*c) - sizeof(struct ntdb_used_record) + cap->extra; - - c->type = cap->type; - c->next = 0; - set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len); - - /* Append to capability list. */ - if (!last_cap) { - hdr->capabilities = cap->base.off; - } else { - c = (struct ntdb_capability *)((char *)hdr + last_cap); - c->next = cap->base.off; - } -} - -static void set_freetable(void *mem, struct ntdb_context *ntdb, - struct tle_freetable *freetable, struct ntdb_header *hdr, - ntdb_off_t last_ftable) -{ - struct ntdb_freetable *ftable = mem; - memset(ftable, 0, sizeof(*ftable)); - set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0, - sizeof(*ftable) - sizeof(ftable->hdr), - sizeof(*ftable) - sizeof(ftable->hdr)); - - if (last_ftable) { - ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable); - ftable->next = freetable->base.off; - } else { - hdr->free_table = freetable->base.off; - } -} - -static void add_to_freetable(struct ntdb_context *ntdb, - ntdb_off_t eoff, - ntdb_off_t elen, - unsigned ftable, - struct tle_freetable *freetable) -{ - ntdb->ftable_off = freetable->base.off; - ntdb->ftable = ftable; - add_free_record(ntdb, eoff, sizeof(struct ntdb_used_record) + elen, - NTDB_LOCK_WAIT, false); -} - -static ntdb_off_t hbucket_offset(ntdb_len_t idx) -{ - return sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record) - + idx * sizeof(ntdb_off_t); -} - -/* FIXME: Our hash table handling here is primitive: we don't expand! */ -static void add_to_hashtable(struct ntdb_context *ntdb, - ntdb_off_t eoff, - NTDB_DATA key) -{ - ntdb_off_t b_off; - uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - - b_off = hbucket_offset(h & ((1 << ntdb->hash_bits)-1)); - if (ntdb_read_off(ntdb, b_off) != 0) - abort(); - - ntdb_write_off(ntdb, b_off, encode_offset(ntdb, eoff, h)); -} - -static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num) -{ - unsigned i; - - for (i = 0; i < layout->num_elems; i++) { - if (layout->elem[i].base.type != FREETABLE) - continue; - if (num == 0) - return &layout->elem[i].ftable; - num--; - } - abort(); -} - -/* FIXME: Support NTDB_CONVERT */ -struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, - void (*freefn)(void *), - union ntdb_attribute *attr) -{ - unsigned int i; - ntdb_off_t off, hdrlen, len, last_ftable, last_cap; - char *mem; - struct ntdb_context *ntdb; - - /* Now populate our header, cribbing from a real NTDB header. */ - ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr); - - off = sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record) - + (sizeof(ntdb_off_t) << ntdb->hash_bits); - hdrlen = off; - - /* First pass of layout: calc lengths */ - for (i = 0; i < layout->num_elems; i++) { - union ntdb_layout_elem *e = &layout->elem[i]; - e->base.off = off; - switch (e->base.type) { - case FREETABLE: - len = freetable_len(&e->ftable); - break; - case FREE: - len = free_record_len(e->free.len); - break; - case DATA: - len = data_record_len(&e->used); - break; - case CAPABILITY: - len = capability_len(&e->capability); - break; - default: - abort(); - } - off += len; - } - - mem = malloc(off); - /* Fill with some weird pattern. */ - memset(mem, 0x99, off); - memcpy(mem, ntdb->file->map_ptr, hdrlen); - - /* Mug the ntdb we have to make it use this. */ - freefn(ntdb->file->map_ptr); - ntdb->file->map_ptr = mem; - ntdb->file->map_size = off; - - last_ftable = 0; - last_cap = 0; - for (i = 0; i < layout->num_elems; i++) { - union ntdb_layout_elem *e = &layout->elem[i]; - switch (e->base.type) { - case FREETABLE: - set_freetable(mem + e->base.off, ntdb, &e->ftable, - (struct ntdb_header *)mem, last_ftable); - last_ftable = e->base.off; - break; - case FREE: - set_free_record(mem + e->base.off, e->free.len); - break; - case DATA: - set_data_record(mem + e->base.off, ntdb, &e->used); - break; - case CAPABILITY: - set_capability(mem + e->base.off, ntdb, &e->capability, - (struct ntdb_header *)mem, last_cap); - last_cap = e->base.off; - break; - } - } - /* Must have a free table! */ - assert(last_ftable); - - /* Now fill the free and hash tables. */ - for (i = 0; i < layout->num_elems; i++) { - union ntdb_layout_elem *e = &layout->elem[i]; - switch (e->base.type) { - case FREE: - add_to_freetable(ntdb, e->base.off, e->free.len, - e->free.ftable_num, - find_ftable(layout, e->free.ftable_num)); - break; - case DATA: - add_to_hashtable(ntdb, e->base.off, e->used.key); - break; - default: - break; - } - } - - ntdb->ftable_off = find_ftable(layout, 0)->base.off; - return ntdb; -} - -void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *), - union ntdb_attribute *attr, const char *filename) -{ - struct ntdb_context *ntdb = ntdb_layout_get(layout, freefn, attr); - int fd; - - fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0600); - if (fd < 0) - err(1, "opening %s for writing", filename); - if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size) - != ntdb->file->map_size) - err(1, "writing %s", filename); - close(fd); - ntdb_close(ntdb); -} - -void ntdb_layout_free(struct ntdb_layout *layout) -{ - unsigned int i; - - for (i = 0; i < layout->num_elems; i++) { - if (layout->elem[i].base.type == DATA) { - free(layout->elem[i].used.key.dptr); - free(layout->elem[i].used.data.dptr); - } - } - free(layout->elem); - free(layout); -} diff --git a/ccan/ntdb/test/layout.h b/ccan/ntdb/test/layout.h deleted file mode 100644 index ea84382c..00000000 --- a/ccan/ntdb/test/layout.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef NTDB_TEST_LAYOUT_H -#define NTDB_TEST_LAYOUT_H -#include "../private.h" - -struct ntdb_layout *new_ntdb_layout(void); -void ntdb_layout_add_freetable(struct ntdb_layout *layout); -void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len, - unsigned ftable); -void ntdb_layout_add_used(struct ntdb_layout *layout, - NTDB_DATA key, NTDB_DATA data, - ntdb_len_t extra); -void ntdb_layout_add_capability(struct ntdb_layout *layout, - uint64_t type, - bool write_breaks, - bool check_breaks, - bool open_breaks, - ntdb_len_t extra); - -#if 0 /* FIXME: Allow allocation of subtables */ -void ntdb_layout_add_hashtable(struct ntdb_layout *layout, - int htable_parent, /* -1 == toplevel */ - unsigned int bucket, - ntdb_len_t extra); -#endif -/* freefn is needed if we're using failtest_free. */ -struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, - void (*freefn)(void *), - union ntdb_attribute *attr); -void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *), - union ntdb_attribute *attr, const char *filename); - -void ntdb_layout_free(struct ntdb_layout *layout); - -enum layout_type { - FREETABLE, FREE, DATA, CAPABILITY -}; - -/* Shared by all union members. */ -struct tle_base { - enum layout_type type; - ntdb_off_t off; -}; - -struct tle_freetable { - struct tle_base base; -}; - -struct tle_free { - struct tle_base base; - ntdb_len_t len; - unsigned ftable_num; -}; - -struct tle_used { - struct tle_base base; - NTDB_DATA key; - NTDB_DATA data; - ntdb_len_t extra; -}; - -struct tle_capability { - struct tle_base base; - uint64_t type; - ntdb_len_t extra; -}; - -union ntdb_layout_elem { - struct tle_base base; - struct tle_freetable ftable; - struct tle_free free; - struct tle_used used; - struct tle_capability capability; -}; - -struct ntdb_layout { - unsigned int num_elems; - union ntdb_layout_elem *elem; -}; - -#include "helprun-layout.h" -#endif /* NTDB_TEST_LAYOUT_H */ diff --git a/ccan/ntdb/test/lock-tracking.c b/ccan/ntdb/test/lock-tracking.c deleted file mode 100644 index 2d654e4b..00000000 --- a/ccan/ntdb/test/lock-tracking.c +++ /dev/null @@ -1,147 +0,0 @@ -/* We save the locks so we can reaquire them. */ -#include "../private.h" /* For NTDB_HASH_LOCK_START, etc. */ -#include -#include -#include -#include -#include "tap-interface.h" -#include "lock-tracking.h" - -struct lock { - struct lock *next; - unsigned int off; - unsigned int len; - int type; -}; -static struct lock *locks; -int locking_errors = 0; -bool suppress_lockcheck = false; -bool nonblocking_locks; -int locking_would_block = 0; -void (*unlock_callback)(int fd); - -int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ) -{ - va_list ap; - int ret, arg3; - struct flock *fl; - bool may_block = false; - - if (cmd != F_SETLK && cmd != F_SETLKW) { - /* This may be totally bogus, but we don't know in general. */ - va_start(ap, cmd); - arg3 = va_arg(ap, int); - va_end(ap); - - return fcntl(fd, cmd, arg3); - } - - va_start(ap, cmd); - fl = va_arg(ap, struct flock *); - va_end(ap); - - if (cmd == F_SETLKW && nonblocking_locks) { - cmd = F_SETLK; - may_block = true; - } - ret = fcntl(fd, cmd, fl); - - /* Detect when we failed, but might have been OK if we waited. */ - if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) { - locking_would_block++; - } - - if (fl->l_type == F_UNLCK) { - struct lock **l; - struct lock *old = NULL; - - for (l = &locks; *l; l = &(*l)->next) { - if ((*l)->off == fl->l_start - && (*l)->len == fl->l_len) { - if (ret == 0) { - old = *l; - *l = (*l)->next; - free(old); - } - break; - } - } - if (!old && !suppress_lockcheck) { - diag("Unknown unlock %u@%u - %i", - (int)fl->l_len, (int)fl->l_start, ret); - locking_errors++; - } - } else { - struct lock *new, *i; - unsigned int fl_end = fl->l_start + fl->l_len; - if (fl->l_len == 0) - fl_end = (unsigned int)-1; - - /* Check for overlaps: we shouldn't do this. */ - for (i = locks; i; i = i->next) { - unsigned int i_end = i->off + i->len; - if (i->len == 0) - i_end = (unsigned int)-1; - - if (fl->l_start >= i->off && fl->l_start < i_end) - break; - if (fl_end > i->off && fl_end < i_end) - break; - - /* ntdb_allrecord_lock does this, handle adjacent: */ - if (fl->l_start > NTDB_HASH_LOCK_START - && fl->l_start == i_end && fl->l_type == i->type) { - if (ret == 0) { - i->len = fl->l_len - ? i->len + fl->l_len - : 0; - } - goto done; - } - } - if (i) { - /* Special case: upgrade of allrecord lock. */ - if (i->type == F_RDLCK && fl->l_type == F_WRLCK - && i->off == NTDB_HASH_LOCK_START - && fl->l_start == NTDB_HASH_LOCK_START - && i->len == 0 - && fl->l_len == 0) { - if (ret == 0) - i->type = F_WRLCK; - goto done; - } - if (!suppress_lockcheck) { - diag("%s lock %u@%u overlaps %u@%u", - fl->l_type == F_WRLCK ? "write" : "read", - (int)fl->l_len, (int)fl->l_start, - i->len, (int)i->off); - locking_errors++; - } - } - - if (ret == 0) { - new = malloc(sizeof *new); - new->off = fl->l_start; - new->len = fl->l_len; - new->type = fl->l_type; - new->next = locks; - locks = new; - } - } -done: - if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback) - unlock_callback(fd); - return ret; -} - -unsigned int forget_locking(void) -{ - unsigned int num = 0; - while (locks) { - struct lock *next = locks->next; - free(locks); - locks = next; - num++; - } - return num; -} diff --git a/ccan/ntdb/test/lock-tracking.h b/ccan/ntdb/test/lock-tracking.h deleted file mode 100644 index f2c9c446..00000000 --- a/ccan/ntdb/test/lock-tracking.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef LOCK_TRACKING_H -#define LOCK_TRACKING_H -#include - -/* Set this if you want a callback after fnctl unlock. */ -extern void (*unlock_callback)(int fd); - -/* Replacement fcntl. */ -int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ); - -/* Discard locking info: returns number of locks outstanding. */ -unsigned int forget_locking(void); - -/* Number of errors in locking. */ -extern int locking_errors; - -/* Suppress lock checking. */ -extern bool suppress_lockcheck; - -/* Make all locks non-blocking. */ -extern bool nonblocking_locks; - -/* Number of times we failed a lock because we made it non-blocking. */ -extern int locking_would_block; -#endif /* LOCK_TRACKING_H */ diff --git a/ccan/ntdb/test/logging.c b/ccan/ntdb/test/logging.c deleted file mode 100644 index 2819dd7c..00000000 --- a/ccan/ntdb/test/logging.c +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include -#include "tap-interface.h" -#include "logging.h" - -unsigned tap_log_messages; -const char *log_prefix = ""; -char *log_last = NULL; -bool suppress_logging; - -union ntdb_attribute tap_log_attr = { - .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG }, - .fn = tap_log_fn } -}; - -void tap_log_fn(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, void *priv) -{ - if (suppress_logging) - return; - - diag("ntdb log level %u: %s: %s%s", - level, ntdb_errorstr(ecode), log_prefix, message); - if (log_last) - free(log_last); - log_last = strdup(message); - tap_log_messages++; -} diff --git a/ccan/ntdb/test/logging.h b/ccan/ntdb/test/logging.h deleted file mode 100644 index f8e1eb0e..00000000 --- a/ccan/ntdb/test/logging.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef NTDB_TEST_LOGGING_H -#define NTDB_TEST_LOGGING_H -#include "../ntdb.h" -#include -#include - -extern bool suppress_logging; -extern const char *log_prefix; -extern unsigned tap_log_messages; -extern union ntdb_attribute tap_log_attr; -extern char *log_last; - -void tap_log_fn(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, void *priv); -#endif /* NTDB_TEST_LOGGING_H */ diff --git a/ccan/ntdb/test/no-fsync.h b/ccan/ntdb/test/no-fsync.h deleted file mode 100644 index f0c098e2..00000000 --- a/ccan/ntdb/test/no-fsync.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef NTDB_NO_FSYNC_H -#define NTDB_NO_FSYNC_H -/* Obey $TDB_NO_FSYNC, a bit like tdb does (only note our NTDB_NOSYNC - * does less) */ -#define MAYBE_NOSYNC (getenv("TDB_NO_FSYNC") ? NTDB_NOSYNC : 0) -#endif diff --git a/ccan/ntdb/test/ntdb-source.h b/ccan/ntdb/test/ntdb-source.h deleted file mode 100644 index 88e517eb..00000000 --- a/ccan/ntdb/test/ntdb-source.h +++ /dev/null @@ -1,11 +0,0 @@ -#include "config.h" -#include "../check.c" -#include "../free.c" -#include "../hash.c" -#include "../io.c" -#include "../lock.c" -#include "../open.c" -#include "../summary.c" -#include "../ntdb.c" -#include "../transaction.c" -#include "../traverse.c" diff --git a/ccan/ntdb/test/python-api.py b/ccan/ntdb/test/python-api.py deleted file mode 100644 index b728ad02..00000000 --- a/ccan/ntdb/test/python-api.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python -# Some simple tests for the Python bindings for TDB -# Note that this tests the interface of the Python bindings -# It does not test tdb itself. -# -# Copyright (C) 2007-2013 Jelmer Vernooij -# Published under the GNU LGPLv3 or later - -import ntdb -from unittest import TestCase -import os, tempfile - - -class OpenTdbTests(TestCase): - - def test_nonexistent_read(self): - self.assertRaises(IOError, ntdb.Ntdb, "/some/nonexistent/file", 0, - ntdb.DEFAULT, os.O_RDWR) - -class CloseTdbTests(TestCase): - - def test_double_close(self): - self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT, - os.O_CREAT|os.O_RDWR) - self.assertNotEqual(None, self.ntdb) - - # ensure that double close does not crash python - self.ntdb.close() - self.ntdb.close() - - # Check that further operations do not crash python - self.assertRaises(RuntimeError, lambda: self.ntdb.transaction_start()) - - self.assertRaises(RuntimeError, lambda: self.ntdb["bar"]) - - -class InternalTdbTests(TestCase): - - def test_repr(self): - self.ntdb = ntdb.Ntdb() - - # repr used to crash on internal db - self.assertEquals(repr(self.ntdb), "Ntdb()") - - -class SimpleTdbTests(TestCase): - - def setUp(self): - super(SimpleTdbTests, self).setUp() - self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT, - os.O_CREAT|os.O_RDWR) - self.assertNotEqual(None, self.ntdb) - - def tearDown(self): - del self.ntdb - - def test_repr(self): - self.assertTrue(repr(self.ntdb).startswith("Ntdb('")) - - def test_lockall(self): - self.ntdb.lock_all() - - def test_unlockall(self): - self.ntdb.lock_all() - self.ntdb.unlock_all() - - def test_lockall_read(self): - self.ntdb.read_lock_all() - self.ntdb.read_unlock_all() - - def test_store(self): - self.ntdb.store("bar", "bla") - self.assertEquals("bla", self.ntdb.get("bar")) - - def test_getitem(self): - self.ntdb["bar"] = "foo" - self.assertEquals("foo", self.ntdb["bar"]) - - def test_delete(self): - self.ntdb["bar"] = "foo" - del self.ntdb["bar"] - self.assertRaises(KeyError, lambda: self.ntdb["bar"]) - - def test_contains(self): - self.ntdb["bla"] = "bloe" - self.assertTrue("bla" in self.ntdb) - - def test_keyerror(self): - self.assertRaises(KeyError, lambda: self.ntdb["bla"]) - - def test_name(self): - self.ntdb.filename - - def test_iterator(self): - self.ntdb["bla"] = "1" - self.ntdb["brainslug"] = "2" - l = list(self.ntdb) - l.sort() - self.assertEquals(["bla", "brainslug"], l) - - def test_transaction_cancel(self): - self.ntdb["bloe"] = "2" - self.ntdb.transaction_start() - self.ntdb["bloe"] = "1" - self.ntdb.transaction_cancel() - self.assertEquals("2", self.ntdb["bloe"]) - - def test_transaction_commit(self): - self.ntdb["bloe"] = "2" - self.ntdb.transaction_start() - self.ntdb["bloe"] = "1" - self.ntdb.transaction_commit() - self.assertEquals("1", self.ntdb["bloe"]) - - def test_transaction_prepare_commit(self): - self.ntdb["bloe"] = "2" - self.ntdb.transaction_start() - self.ntdb["bloe"] = "1" - self.ntdb.transaction_prepare_commit() - self.ntdb.transaction_commit() - self.assertEquals("1", self.ntdb["bloe"]) - - def test_iterkeys(self): - self.ntdb["bloe"] = "2" - self.ntdb["bla"] = "25" - i = self.ntdb.iterkeys() - self.assertEquals(set(["bloe", "bla"]), set([i.next(), i.next()])) - - def test_clear(self): - self.ntdb["bloe"] = "2" - self.ntdb["bla"] = "25" - self.assertEquals(2, len(list(self.ntdb))) - self.ntdb.clear() - self.assertEquals(0, len(list(self.ntdb))) - - def test_len(self): - self.assertEquals(0, len(list(self.ntdb))) - self.ntdb["entry"] = "value" - self.assertEquals(1, len(list(self.ntdb))) - - def test_add_flags(self): - self.ntdb.add_flag(ntdb.NOMMAP) - self.ntdb.remove_flag(ntdb.NOMMAP) - - -class VersionTests(TestCase): - - def test_present(self): - self.assertTrue(isinstance(ntdb.__version__, str)) - - -if __name__ == '__main__': - import unittest - unittest.TestProgram() diff --git a/ccan/ntdb/test/run-001-encode.c b/ccan/ntdb/test/run-001-encode.c deleted file mode 100644 index 81f3e170..00000000 --- a/ccan/ntdb/test/run-001-encode.c +++ /dev/null @@ -1,40 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_used_record rec; - struct ntdb_context ntdb = { .log_fn = tap_log_fn }; - - plan_tests(64 + 32 + 48*5 + 1); - - /* We should be able to encode any data value. */ - for (i = 0; i < 64; i++) - ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i, - 1ULL << i) == 0); - - /* And any key and data with < 64 bits between them. */ - for (i = 0; i < 32; i++) { - ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i; - ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen, - klen + dlen) == 0); - } - - /* We should neatly encode all values. */ - for (i = 0; i < 48; i++) { - uint64_t klen = 1ULL << (i < 16 ? i : 15); - uint64_t dlen = 1ULL << i; - uint64_t xlen = 1ULL << (i < 32 ? i : 31); - ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen, - klen+dlen+xlen) == 0); - ok1(rec_key_length(&rec) == klen); - ok1(rec_data_length(&rec) == dlen); - ok1(rec_extra_padding(&rec) == xlen); - ok1(rec_magic(&rec) == NTDB_USED_MAGIC); - } - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-001-fls.c b/ccan/ntdb/test/run-001-fls.c deleted file mode 100644 index 6ed46fe0..00000000 --- a/ccan/ntdb/test/run-001-fls.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "helprun-external-agent.h" - -static unsigned int dumb_fls(uint64_t num) -{ - int i; - - for (i = 63; i >= 0; i--) { - if (num & (1ULL << i)) - break; - } - return i + 1; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - - plan_tests(64 * 64 + 2); - - ok1(fls64(0) == 0); - ok1(dumb_fls(0) == 0); - - for (i = 0; i < 64; i++) { - for (j = 0; j < 64; j++) { - uint64_t val = (1ULL << i) | (1ULL << j); - ok(fls64(val) == dumb_fls(val), - "%llu -> %u should be %u", (long long)val, - fls64(val), dumb_fls(val)); - } - } - return exit_status(); -} diff --git a/ccan/ntdb/test/run-01-new_database.c b/ccan/ntdb/test/run-01-new_database.c deleted file mode 100644 index 11fb0248..00000000 --- a/ccan/ntdb/test/run-01-new_database.c +++ /dev/null @@ -1,41 +0,0 @@ -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 3); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-new_database.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - failtest_exit(exit_status()); - - failtest_suppress = true; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - failtest_suppress = false; - ntdb_close(ntdb); - if (!ok1(tap_log_messages == 0)) - break; - } - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-02-expand.c b/ccan/ntdb/test/run-02-expand.c deleted file mode 100644 index 55927d9b..00000000 --- a/ccan/ntdb/test/run-02-expand.c +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - uint64_t val; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1); - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - failtest_suppress = true; - ntdb = ntdb_open("run-expand.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - break; - - val = ntdb->file->map_size; - /* Need some hash lock for expand. */ - ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0); - failtest_suppress = false; - if (!ok1(ntdb_expand(ntdb, 1) == 0)) { - failtest_suppress = true; - ntdb_close(ntdb); - break; - } - failtest_suppress = true; - - ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR); - ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - val = ntdb->file->map_size; - ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0); - failtest_suppress = false; - if (!ok1(ntdb_expand(ntdb, 1024) == 0)) { - failtest_suppress = true; - ntdb_close(ntdb); - break; - } - failtest_suppress = true; - ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0); - ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-03-coalesce.c b/ccan/ntdb/test/run-03-coalesce.c deleted file mode 100644 index e86ee656..00000000 --- a/ccan/ntdb/test/run-03-coalesce.c +++ /dev/null @@ -1,179 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "layout.h" -#include "helprun-external-agent.h" - -static ntdb_len_t free_record_length(struct ntdb_context *ntdb, ntdb_off_t off) -{ - struct ntdb_free_record f; - enum NTDB_ERROR ecode; - - ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f)); - if (ecode != NTDB_SUCCESS) - return ecode; - if (frec_magic(&f) != NTDB_FREE_MAGIC) - return NTDB_ERR_CORRUPT; - return frec_len(&f); -} - -int main(int argc, char *argv[]) -{ - ntdb_off_t b_off, test; - struct ntdb_context *ntdb; - struct ntdb_layout *layout; - NTDB_DATA data, key; - ntdb_len_t len; - - /* FIXME: Test NTDB_CONVERT */ - /* FIXME: Test lock order fail. */ - - plan_tests(42); - data = ntdb_mkdata("world", 5); - key = ntdb_mkdata("hello", 5); - - /* No coalescing can be done due to EOF */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - len = 15560; - ntdb_layout_add_free(layout, len, 0); - ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); - /* NOMMAP is for lockcheck. */ - ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, - O_RDWR, 0, &tap_log_attr); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == len); - - /* Figure out which bucket free entry is. */ - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len)); - /* Lock and fail to coalesce. */ - ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); - test = layout->elem[1].base.off; - ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, len, &test) - == 0); - ntdb_unlock_free_bucket(ntdb, b_off); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == len); - ok1(test == layout->elem[1].base.off); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - ntdb_layout_free(layout); - - /* No coalescing can be done due to used record */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_free(layout, 15528, 0); - ntdb_layout_add_used(layout, key, data, 6); - ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); - /* NOMMAP is for lockcheck. */ - ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, - O_RDWR, 0, &tap_log_attr); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Figure out which bucket free entry is. */ - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(15528)); - /* Lock and fail to coalesce. */ - ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); - test = layout->elem[1].base.off; - ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 15528, &test) - == 0); - ntdb_unlock_free_bucket(ntdb, b_off); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528); - ok1(test == layout->elem[1].base.off); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - ntdb_layout_free(layout); - - /* Coalescing can be done due to two free records, then EOF */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_free(layout, 1024, 0); - ntdb_layout_add_free(layout, 14520, 0); - ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); - /* NOMMAP is for lockcheck. */ - ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, - O_RDWR, 0, &tap_log_attr); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); - ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14520); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Figure out which bucket (first) free entry is. */ - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024)); - /* Lock and coalesce. */ - ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); - test = layout->elem[2].base.off; - ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) - == 1024 + sizeof(struct ntdb_used_record) + 14520); - /* Should tell us it's erased this one... */ - ok1(test == NTDB_ERR_NOEXIST); - ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); - ok1(free_record_length(ntdb, layout->elem[1].base.off) - == 1024 + sizeof(struct ntdb_used_record) + 14520); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - ntdb_layout_free(layout); - - /* Coalescing can be done due to two free records, then data */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_free(layout, 1024, 0); - ntdb_layout_add_free(layout, 14488, 0); - ntdb_layout_add_used(layout, key, data, 6); - ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); - /* NOMMAP is for lockcheck. */ - ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, - O_RDWR, 0, &tap_log_attr); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); - ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14488); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Figure out which bucket free entry is. */ - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024)); - /* Lock and coalesce. */ - ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); - test = layout->elem[2].base.off; - ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) - == 1024 + sizeof(struct ntdb_used_record) + 14488); - ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); - ok1(free_record_length(ntdb, layout->elem[1].base.off) - == 1024 + sizeof(struct ntdb_used_record) + 14488); - ok1(test == NTDB_ERR_NOEXIST); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - ntdb_layout_free(layout); - - /* Coalescing can be done due to three free records, then EOF */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_free(layout, 1024, 0); - ntdb_layout_add_free(layout, 512, 0); - ntdb_layout_add_free(layout, 13992, 0); - ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); - /* NOMMAP is for lockcheck. */ - ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, - O_RDWR, 0, &tap_log_attr); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); - ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512); - ok1(free_record_length(ntdb, layout->elem[3].base.off) == 13992); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Figure out which bucket free entry is. */ - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024)); - /* Lock and coalesce. */ - ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); - test = layout->elem[2].base.off; - ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) - == 1024 + sizeof(struct ntdb_used_record) + 512 - + sizeof(struct ntdb_used_record) + 13992); - ok1(ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0); - ok1(free_record_length(ntdb, layout->elem[1].base.off) - == 1024 + sizeof(struct ntdb_used_record) + 512 - + sizeof(struct ntdb_used_record) + 13992); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - ntdb_layout_free(layout); - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-04-basichash.c b/ccan/ntdb/test/run-04-basichash.c deleted file mode 100644 index 9936d859..00000000 --- a/ccan/ntdb/test/run-04-basichash.c +++ /dev/null @@ -1,322 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -/* We rig the hash so all records clash. */ -static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv) -{ - return *((const unsigned int *)key) << 20; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - unsigned int v; - struct ntdb_used_record rec; - NTDB_DATA key = { (unsigned char *)&v, sizeof(v) }; - NTDB_DATA dbuf = { (unsigned char *)&v, sizeof(v) }; - union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = clash } }; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT, - }; - - hattr.base.next = &tap_log_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 137 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - struct hash_info h; - ntdb_off_t new_off, new_off2, off; - - ntdb = ntdb_open("run-04-basichash.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); - ok1(ntdb); - if (!ntdb) - continue; - - v = 0; - /* Should not find it. */ - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in top table, bucket 0. */ - ok1(h.table == NTDB_HASH_OFFSET); - ok1(h.table_size == (1 << ntdb->hash_bits)); - ok1(h.bucket == 0); - ok1(h.old_val == 0); - - /* Should have lock on bucket 0 */ - ok1(h.h == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - /* FIXME: Check lock length */ - - /* Allocate a new record. */ - new_off = alloc(ntdb, key.dsize, dbuf.dsize, - NTDB_USED_MAGIC, false); - ok1(!NTDB_OFF_IS_ERR(new_off)); - - /* We should be able to add it now. */ - ok1(add_to_hash(ntdb, &h, new_off) == 0); - - /* Make sure we fill it in for later finding. */ - off = new_off + sizeof(struct ntdb_used_record); - ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); - off += key.dsize; - ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); - - /* We should be able to unlock that OK. */ - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* Database should be consistent. */ - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Now, this should give a successful lookup. */ - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located it in top table, bucket 0. */ - ok1(h.table == NTDB_HASH_OFFSET); - ok1(h.table_size == (1 << ntdb->hash_bits)); - ok1(h.bucket == 0); - - /* Should have lock on bucket 0 */ - ok1(h.h == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - /* FIXME: Check lock length */ - - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* Database should be consistent. */ - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Test expansion. */ - v = 1; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located clash in toplevel bucket 0. */ - ok1(h.table == NTDB_HASH_OFFSET); - ok1(h.table_size == (1 << ntdb->hash_bits)); - ok1(h.bucket == 0); - ok1((h.old_val & NTDB_OFF_MASK) == new_off); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - /* FIXME: Check lock length */ - - new_off2 = alloc(ntdb, key.dsize, dbuf.dsize, - NTDB_USED_MAGIC, false); - ok1(!NTDB_OFF_IS_ERR(new_off2)); - - off = new_off2 + sizeof(struct ntdb_used_record); - ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); - off += key.dsize; - ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); - - /* We should be able to add it now. */ - ok1(add_to_hash(ntdb, &h, new_off2) == 0); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* Should be happy with expansion. */ - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Should be able to find both. */ - v = 1; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in chain. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 2); - ok1(h.bucket == 1); - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - v = 0; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in chain. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 2); - ok1(h.bucket == 0); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - /* FIXME: Check lock length */ - - /* Simple delete should work. */ - ok1(delete_from_hash(ntdb, &h) == 0); - ok1(add_free_record(ntdb, new_off, - sizeof(struct ntdb_used_record) - + rec_key_length(&rec) - + rec_data_length(&rec) - + rec_extra_padding(&rec), - NTDB_LOCK_NOWAIT, false) == 0); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Should still be able to find other record. */ - v = 1; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in chain. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 2); - ok1(h.bucket == 1); - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* Now should find empty space. */ - v = 0; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in chain, bucket 0. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 2); - ok1(h.bucket == 0); - ok1(h.old_val == 0); - - /* Adding another record should work. */ - v = 2; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in chain, bucket 0. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 2); - ok1(h.bucket == 0); - ok1(h.old_val == 0); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - - new_off = alloc(ntdb, key.dsize, dbuf.dsize, - NTDB_USED_MAGIC, false); - ok1(!NTDB_OFF_IS_ERR(new_off2)); - ok1(add_to_hash(ntdb, &h, new_off) == 0); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - off = new_off + sizeof(struct ntdb_used_record); - ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); - off += key.dsize; - ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); - - /* Adding another record should cause expansion. */ - v = 3; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should not have located space in chain. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 2); - ok1(h.bucket == 2); - ok1(h.old_val != 0); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - - new_off = alloc(ntdb, key.dsize, dbuf.dsize, - NTDB_USED_MAGIC, false); - ok1(!NTDB_OFF_IS_ERR(new_off2)); - off = new_off + sizeof(struct ntdb_used_record); - ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); - off += key.dsize; - ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); - ok1(add_to_hash(ntdb, &h, new_off) == 0); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* Retrieve it and check. */ - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have appended to chain, bucket 2. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 3); - ok1(h.bucket == 2); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* YA record: relocation. */ - v = 4; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should not have located space in chain. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 3); - ok1(h.bucket == 3); - ok1(h.old_val != 0); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - - new_off = alloc(ntdb, key.dsize, dbuf.dsize, - NTDB_USED_MAGIC, false); - ok1(!NTDB_OFF_IS_ERR(new_off2)); - off = new_off + sizeof(struct ntdb_used_record); - ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); - off += key.dsize; - ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); - ok1(add_to_hash(ntdb, &h, new_off) == 0); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - /* Retrieve it and check. */ - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have appended to chain, bucket 2. */ - ok1(h.table > NTDB_HASH_OFFSET); - ok1(h.table_size == 4); - ok1(h.bucket == 3); - - /* Should have lock on bucket 0 */ - ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); - ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); - ok1((ntdb->flags & NTDB_NOLOCK) - || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-05-readonly-open.c b/ccan/ntdb/test/run-05-readonly-open.c deleted file mode 100644 index 057fa088..00000000 --- a/ccan/ntdb/test/run-05-readonly-open.c +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4), d; - union ntdb_attribute seed_attr; - unsigned int msgs = 0; - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - - seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; - seed_attr.base.next = &tap_log_attr; - seed_attr.seed.seed = 0; - - failtest_suppress = true; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 11); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-05-readonly-open.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, - &seed_attr); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ntdb_close(ntdb); - - failtest_suppress = false; - ntdb = ntdb_open("run-05-readonly-open.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDONLY, 0600, &tap_log_attr); - if (!ok1(ntdb)) - break; - ok1(tap_log_messages == msgs); - /* Fetch should succeed, stores should fail. */ - if (!ok1(ntdb_fetch(ntdb, key, &d) == 0)) - goto fail; - ok1(ntdb_deq(d, data)); - free(d.dptr); - if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) - == NTDB_ERR_RDONLY)) - goto fail; - ok1(tap_log_messages == ++msgs); - if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) - == NTDB_ERR_RDONLY)) - goto fail; - ok1(tap_log_messages == ++msgs); - failtest_suppress = true; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - ok1(tap_log_messages == msgs); - /* SIGH: failtest bug, it doesn't save the ntdb file because - * we have it read-only. If we go around again, it gets - * changed underneath us and things get screwy. */ - if (failtest_has_failed()) - break; - } - failtest_exit(exit_status()); - -fail: - failtest_suppress = true; - ntdb_close(ntdb); - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-10-simple-store.c b/ccan/ntdb/test/run-10-simple-store.c deleted file mode 100644 index d3f3b7fd..00000000 --- a/ccan/ntdb/test/run-10-simple-store.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - - failtest_suppress = true; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-10-simple-store.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - break; - /* Modify should fail. */ - failtest_suppress = false; - if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) - == NTDB_ERR_NOEXIST)) - goto fail; - failtest_suppress = true; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Insert should succeed. */ - failtest_suppress = false; - if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0)) - goto fail; - failtest_suppress = true; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Second insert should fail. */ - failtest_suppress = false; - if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) - == NTDB_ERR_EXISTS)) - goto fail; - failtest_suppress = true; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - } - ok1(tap_log_messages == 0); - failtest_exit(exit_status()); - -fail: - failtest_suppress = true; - ntdb_close(ntdb); - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-11-simple-fetch.c b/ccan/ntdb/test/run-11-simple-fetch.c deleted file mode 100644 index fba76202..00000000 --- a/ccan/ntdb/test/run-11-simple-fetch.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - - failtest_suppress = true; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-11-simple-fetch.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (ntdb) { - NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ - - /* fetch should fail. */ - failtest_suppress = false; - if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST)) - goto fail; - failtest_suppress = true; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Insert should succeed. */ - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Fetch should now work. */ - failtest_suppress = false; - if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS)) - goto fail; - failtest_suppress = true; - ok1(ntdb_deq(d, data)); - free(d.dptr); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - } - } - ok1(tap_log_messages == 0); - failtest_exit(exit_status()); - -fail: - failtest_suppress = true; - ntdb_close(ntdb); - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-12-check.c b/ccan/ntdb/test/run-12-check.c deleted file mode 100644 index c2354cb5..00000000 --- a/ccan/ntdb/test/run-12-check.c +++ /dev/null @@ -1,53 +0,0 @@ -#include "../private.h" -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, - NTDB_INTERNAL|NTDB_CONVERT, - NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - - failtest_suppress = true; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-12-check.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - - /* This is what we really want to test: ntdb_check(). */ - failtest_suppress = false; - if (!ok1(ntdb_check(ntdb, NULL, NULL) == 0)) - goto fail; - failtest_suppress = true; - - ntdb_close(ntdb); - } - ok1(tap_log_messages == 0); - failtest_exit(exit_status()); - -fail: - failtest_suppress = true; - ntdb_close(ntdb); - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-15-append.c b/ccan/ntdb/test/run-15-append.c deleted file mode 100644 index fb8d7c20..00000000 --- a/ccan/ntdb/test/run-15-append.c +++ /dev/null @@ -1,131 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "helprun-external-agent.h" - -#define MAX_SIZE 13100 -#define SIZE_STEP 131 - -static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key) -{ - ntdb_off_t off; - struct ntdb_used_record urec; - struct hash_info h; - - off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL); - if (NTDB_OFF_IS_ERR(off)) - return 0; - ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - return off; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j, moves; - struct ntdb_context *ntdb; - unsigned char *buffer; - ntdb_off_t oldoff = 0, newoff; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data; - - buffer = malloc(MAX_SIZE); - for (i = 0; i < MAX_SIZE; i++) - buffer[i] = i; - - plan_tests(sizeof(flags) / sizeof(flags[0]) - * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7) - + 1); - - /* Using ntdb_store. */ - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - moves = 0; - for (j = 0; j < MAX_SIZE; j += SIZE_STEP) { - data.dptr = buffer; - data.dsize = j; - ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); - ok1(data.dsize == j); - ok1(memcmp(data.dptr, buffer, data.dsize) == 0); - free(data.dptr); - newoff = ntdb_offset(ntdb, key); - if (newoff != oldoff) - moves++; - oldoff = newoff; - } - ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0)); - /* We should increase by 50% each time... */ - ok(moves <= ilog64(j / SIZE_STEP)*2, - "Moved %u times", moves); - ntdb_close(ntdb); - } - - /* Using ntdb_append. */ - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - size_t prev_len = 0; - ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - moves = 0; - for (j = 0; j < MAX_SIZE; j += SIZE_STEP) { - data.dptr = buffer + prev_len; - data.dsize = j - prev_len; - ok1(ntdb_append(ntdb, key, data) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); - ok1(data.dsize == j); - ok1(memcmp(data.dptr, buffer, data.dsize) == 0); - free(data.dptr); - prev_len = data.dsize; - newoff = ntdb_offset(ntdb, key); - if (newoff != oldoff) - moves++; - oldoff = newoff; - } - ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0)); - /* We should increase by 50% each time... */ - ok(moves <= ilog64(j / SIZE_STEP)*2, - "Moved %u times", moves); - ntdb_close(ntdb); - } - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Huge initial store. */ - data.dptr = buffer; - data.dsize = MAX_SIZE; - ok1(ntdb_append(ntdb, key, data) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); - ok1(data.dsize == MAX_SIZE); - ok1(memcmp(data.dptr, buffer, data.dsize) == 0); - free(data.dptr); - ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 - && ntdb->file->num_lockrecs == 0)); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - free(buffer); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-25-hashoverload.c b/ccan/ntdb/test/run-25-hashoverload.c deleted file mode 100644 index 5a2c9cd7..00000000 --- a/ccan/ntdb/test/run-25-hashoverload.c +++ /dev/null @@ -1,94 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -#define OVERLOAD 100 - -static uint32_t badhash(const void *key, size_t len, uint32_t seed, void *priv) -{ - return 0; -} - -static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p) -{ - if (p) - return ntdb_delete(ntdb, key); - return 0; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; - NTDB_DATA dbuf = { (unsigned char *)&j, sizeof(j) }; - union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = badhash } }; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT, - }; - - hattr.base.next = &tap_log_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * (7 * OVERLOAD + 11) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ - - ntdb = ntdb_open("run-25-hashoverload.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Overload a bucket. */ - for (j = 0; j < OVERLOAD; j++) { - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - } - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Check we can find them all. */ - for (j = 0; j < OVERLOAD; j++) { - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == sizeof(j)); - ok1(d.dptr != NULL); - ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); - free(d.dptr); - } - - /* Traverse through them. */ - ok1(ntdb_traverse(ntdb, trav, NULL) == OVERLOAD); - - /* Delete the first 99. */ - for (j = 0; j < OVERLOAD-1; j++) - ok1(ntdb_delete(ntdb, key) == 0); - - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == sizeof(j)); - ok1(d.dptr != NULL); - ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); - free(d.dptr); - - /* Traverse through them. */ - ok1(ntdb_traverse(ntdb, trav, NULL) == 1); - - /* Re-add */ - for (j = 0; j < OVERLOAD-1; j++) { - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - } - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Now try deleting as we go. */ - ok1(ntdb_traverse(ntdb, trav, trav) == OVERLOAD); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb_traverse(ntdb, trav, NULL) == 0); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-30-exhaust-before-expand.c b/ccan/ntdb/test/run-30-exhaust-before-expand.c deleted file mode 100644 index e44b32c8..00000000 --- a/ccan/ntdb/test/run-30-exhaust-before-expand.c +++ /dev/null @@ -1,77 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static bool empty_freetable(struct ntdb_context *ntdb) -{ - struct ntdb_freetable ftab; - unsigned int i; - - /* Now, free table should be completely exhausted in zone 0 */ - if (ntdb_read_convert(ntdb, ntdb->ftable_off, &ftab, sizeof(ftab)) != 0) - abort(); - - for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) { - if (ftab.buckets[i]) - return false; - } - return true; -} - - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - NTDB_DATA k, d; - uint64_t size; - bool was_empty = false; - - k.dptr = (void *)&j; - k.dsize = sizeof(j); - - ntdb = ntdb_open("run-30-exhaust-before-expand.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* There's one empty record in initial db. */ - ok1(!empty_freetable(ntdb)); - - size = ntdb->file->map_size; - - /* Create one record to chew up most space. */ - d.dsize = size - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 32; - d.dptr = calloc(d.dsize, 1); - j = 0; - ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); - ok1(ntdb->file->map_size == size); - free(d.dptr); - - /* Now insert minimal-length records until we expand. */ - for (j = 1; ntdb->file->map_size == size; j++) { - was_empty = empty_freetable(ntdb); - if (ntdb_store(ntdb, k, k, NTDB_INSERT) != 0) - err(1, "Failed to store record %i", j); - } - - /* Would have been empty before expansion, but no longer. */ - ok1(was_empty); - ok1(!empty_freetable(ntdb)); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-35-convert.c b/ccan/ntdb/test/run-35-convert.c deleted file mode 100644 index 4899dc66..00000000 --- a/ccan/ntdb/test/run-35-convert.c +++ /dev/null @@ -1,65 +0,0 @@ -#include "../private.h" -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include "logging.h" -#include "failtest_helper.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i, messages = 0; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 4); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-35-convert.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - if (!ok1(ntdb)) - failtest_exit(exit_status()); - - ntdb_close(ntdb); - /* We can fail in log message formatting or open. That's OK */ - if (failtest_has_failed()) { - failtest_exit(exit_status()); - } - /* If we say NTDB_CONVERT, it must be converted */ - ntdb = ntdb_open("run-35-convert.ntdb", - flags[i]|NTDB_CONVERT|MAYBE_NOSYNC, - O_RDWR, 0600, &tap_log_attr); - if (flags[i] & NTDB_CONVERT) { - if (!ntdb) - failtest_exit(exit_status()); - ok1(ntdb_get_flags(ntdb) & NTDB_CONVERT); - ntdb_close(ntdb); - } else { - if (!ok1(!ntdb && errno == EIO)) - failtest_exit(exit_status()); - ok1(tap_log_messages == ++messages); - if (!ok1(log_last && strstr(log_last, "NTDB_CONVERT"))) - failtest_exit(exit_status()); - } - - /* If don't say NTDB_CONVERT, it *may* be converted */ - ntdb = ntdb_open("run-35-convert.ntdb", - (flags[i] & ~NTDB_CONVERT)|MAYBE_NOSYNC, - O_RDWR, 0600, &tap_log_attr); - if (!ntdb) - failtest_exit(exit_status()); - ok1(ntdb_get_flags(ntdb) == (flags[i]|MAYBE_NOSYNC)); - ntdb_close(ntdb); - } - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-50-multiple-freelists.c b/ccan/ntdb/test/run-50-multiple-freelists.c deleted file mode 100644 index 4a7cf899..00000000 --- a/ccan/ntdb/test/run-50-multiple-freelists.c +++ /dev/null @@ -1,71 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "layout.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - ntdb_off_t off; - struct ntdb_context *ntdb; - struct ntdb_layout *layout; - NTDB_DATA key, data; - union ntdb_attribute seed; - - /* This seed value previously tickled a layout.c bug. */ - seed.base.attr = NTDB_ATTRIBUTE_SEED; - seed.seed.seed = 0xb1142bc054d035b4ULL; - seed.base.next = &tap_log_attr; - - plan_tests(11); - key = ntdb_mkdata("Hello", 5); - data = ntdb_mkdata("world", 5); - - /* Create a NTDB with three free tables. */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_free(layout, 80, 0); - /* Used record prevent coalescing. */ - ntdb_layout_add_used(layout, key, data, 6); - ntdb_layout_add_free(layout, 160, 1); - key.dsize--; - ntdb_layout_add_used(layout, key, data, 7); - ntdb_layout_add_free(layout, 320, 2); - key.dsize--; - ntdb_layout_add_used(layout, key, data, 8); - ntdb_layout_add_free(layout, 40, 0); - ntdb = ntdb_layout_get(layout, free, &seed); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC); - ok1(off == layout->elem[3].base.off); - ok1(ntdb->ftable_off == layout->elem[0].base.off); - - off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC); - ok1(off == layout->elem[5].base.off); - ok1(ntdb->ftable_off == layout->elem[1].base.off); - - off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC); - ok1(off == layout->elem[7].base.off); - ok1(ntdb->ftable_off == layout->elem[2].base.off); - - off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC); - ok1(off == layout->elem[9].base.off); - ok1(ntdb->ftable_off == layout->elem[0].base.off); - - /* Now we fail. */ - off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC); - ok1(off == 0); - - ntdb_close(ntdb); - ntdb_layout_free(layout); - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-56-open-during-transaction.c b/ccan/ntdb/test/run-56-open-during-transaction.c deleted file mode 100644 index c28fbfd3..00000000 --- a/ccan/ntdb/test/run-56-open-during-transaction.c +++ /dev/null @@ -1,166 +0,0 @@ -#include "../private.h" -#include -#include "lock-tracking.h" - -static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); -static ssize_t write_check(int fd, const void *buf, size_t count); -static int ftruncate_check(int fd, off_t length); - -#define pwrite pwrite_check -#define write write_check -#define fcntl fcntl_with_lockcheck -#define ftruncate ftruncate_check - -#include "ntdb-source.h" -#include "tap-interface.h" -#include -#include -#include -#include "external-agent.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static struct agent *agent; -static bool opened; -static int errors = 0; -#define TEST_DBNAME "run-56-open-during-transaction.ntdb" - -#undef write -#undef pwrite -#undef fcntl -#undef ftruncate - -static bool is_same(const char *snapshot, const char *latest, off_t len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (snapshot[i] != latest[i]) - return false; - } - return true; -} - -static bool compare_file(int fd, const char *snapshot, off_t snapshot_len) -{ - char *contents; - bool ret; - - /* over-length read serves as length check. */ - contents = malloc(snapshot_len+1); - ret = pread(fd, contents, snapshot_len+1, 0) == snapshot_len - && is_same(snapshot, contents, snapshot_len); - free(contents); - return ret; -} - -static void check_file_intact(int fd) -{ - enum agent_return ret; - struct stat st; - char *contents; - - fstat(fd, &st); - contents = malloc(st.st_size); - if (pread(fd, contents, st.st_size, 0) != st.st_size) { - diag("Read fail"); - errors++; - return; - } - - /* Ask agent to open file. */ - ret = external_agent_operation(agent, OPEN, TEST_DBNAME); - - /* It's OK to open it, but it must not have changed! */ - if (!compare_file(fd, contents, st.st_size)) { - diag("Agent changed file after opening %s", - agent_return_name(ret)); - errors++; - } - - if (ret == SUCCESS) { - ret = external_agent_operation(agent, CLOSE, NULL); - if (ret != SUCCESS) { - diag("Agent failed to close ntdb: %s", - agent_return_name(ret)); - errors++; - } - } else if (ret != WOULD_HAVE_BLOCKED) { - diag("Agent opening file gave %s", - agent_return_name(ret)); - errors++; - } - - free(contents); -} - -static void after_unlock(int fd) -{ - if (opened) - check_file_intact(fd); -} - -static ssize_t pwrite_check(int fd, - const void *buf, size_t count, off_t offset) -{ - if (opened) - check_file_intact(fd); - - return pwrite(fd, buf, count, offset); -} - -static ssize_t write_check(int fd, const void *buf, size_t count) -{ - if (opened) - check_file_intact(fd); - - return write(fd, buf, count); -} - -static int ftruncate_check(int fd, off_t length) -{ - if (opened) - check_file_intact(fd); - - return ftruncate(fd, length); - -} - -int main(int argc, char *argv[]) -{ - const int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - int i; - struct ntdb_context *ntdb; - NTDB_DATA key, data; - - plan_tests(sizeof(flags)/sizeof(flags[0]) * 5); - agent = prepare_external_agent(); - if (!agent) - err(1, "preparing agent"); - - unlock_callback = after_unlock; - for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { - diag("Test with %s and %s\n", - (flags[i] & NTDB_CONVERT) ? "CONVERT" : "DEFAULT", - (flags[i] & NTDB_NOMMAP) ? "no mmap" : "mmap"); - unlink(TEST_DBNAME); - ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - - opened = true; - ok1(ntdb_transaction_start(ntdb) == 0); - key = ntdb_mkdata("hi", strlen("hi")); - data = ntdb_mkdata("world", strlen("world")); - - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb_transaction_commit(ntdb) == 0); - ok(!errors, "We had %u open errors", errors); - - opened = false; - ntdb_close(ntdb); - } - - return exit_status(); -} diff --git a/ccan/ntdb/test/run-57-die-during-transaction.c b/ccan/ntdb/test/run-57-die-during-transaction.c deleted file mode 100644 index 9a86fca0..00000000 --- a/ccan/ntdb/test/run-57-die-during-transaction.c +++ /dev/null @@ -1,322 +0,0 @@ -#include "../private.h" -#include -#include "lock-tracking.h" -#include "tap-interface.h" -#include -#include -static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); -static ssize_t write_check(int fd, const void *buf, size_t count); -static int ftruncate_check(int fd, off_t length); - -#define pwrite pwrite_check -#define write write_check -#define fcntl fcntl_with_lockcheck -#define ftruncate ftruncate_check - -/* There's a malloc inside transaction_setup_recovery, and valgrind complains - * when we longjmp and leak it. */ -#define MAX_ALLOCATIONS 10 -static void *allocated[MAX_ALLOCATIONS]; -static unsigned max_alloc = 0; - -static void *malloc_noleak(size_t len) -{ - unsigned int i; - - for (i = 0; i < MAX_ALLOCATIONS; i++) - if (!allocated[i]) { - allocated[i] = malloc(len); - if (i > max_alloc) { - max_alloc = i; - diag("max_alloc: %i", max_alloc); - } - return allocated[i]; - } - diag("Too many allocations!"); - abort(); -} - -static void *realloc_noleak(void *p, size_t size) -{ - unsigned int i; - - for (i = 0; i < MAX_ALLOCATIONS; i++) { - if (allocated[i] == p) { - if (i > max_alloc) { - max_alloc = i; - diag("max_alloc: %i", max_alloc); - } - return allocated[i] = realloc(p, size); - } - } - diag("Untracked realloc!"); - abort(); -} - -static void free_noleak(void *p) -{ - unsigned int i; - - /* We don't catch asprintf, so don't complain if we miss one. */ - for (i = 0; i < MAX_ALLOCATIONS; i++) { - if (allocated[i] == p) { - allocated[i] = NULL; - break; - } - } - free(p); -} - -static void free_all(void) -{ - unsigned int i; - - for (i = 0; i < MAX_ALLOCATIONS; i++) { - free(allocated[i]); - allocated[i] = NULL; - } -} - -#define malloc malloc_noleak -#define free(x) free_noleak(x) -#define realloc realloc_noleak - -#include "ntdb-source.h" - -#undef malloc -#undef free -#undef realloc -#undef write -#undef pwrite -#undef fcntl -#undef ftruncate - -#include -#include -#include -#include -#include "external-agent.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static bool in_transaction; -static int target, current; -static jmp_buf jmpbuf; -#define TEST_DBNAME "run-57-die-during-transaction.ntdb" -#define KEY_STRING "helloworld" -#define DATA_STRING "Helloworld" - -static void maybe_die(int fd) -{ - if (in_transaction && current++ == target) { - longjmp(jmpbuf, 1); - } -} - -static ssize_t pwrite_check(int fd, - const void *buf, size_t count, off_t offset) -{ - ssize_t ret; - - maybe_die(fd); - - ret = pwrite(fd, buf, count, offset); - if (ret != count) - return ret; - - maybe_die(fd); - return ret; -} - -static ssize_t write_check(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - maybe_die(fd); - - ret = write(fd, buf, count); - if (ret != count) - return ret; - - maybe_die(fd); - return ret; -} - -static int ftruncate_check(int fd, off_t length) -{ - int ret; - - maybe_die(fd); - - ret = ftruncate(fd, length); - - maybe_die(fd); - return ret; -} - -static bool test_death(enum operation op, struct agent *agent, - bool pre_create_recovery) -{ - struct ntdb_context *ntdb = NULL; - NTDB_DATA key, data; - enum agent_return ret; - int needed_recovery = 0; - - current = target = 0; - /* Big long data to force a change. */ - data = ntdb_mkdata(DATA_STRING, strlen(DATA_STRING)); - -reset: - unlink(TEST_DBNAME); - ntdb = ntdb_open(TEST_DBNAME, NTDB_NOMMAP|MAYBE_NOSYNC, - O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr); - if (!ntdb) { - diag("Failed opening NTDB: %s", strerror(errno)); - return false; - } - - if (setjmp(jmpbuf) != 0) { - /* We're partway through. Simulate our death. */ - close(ntdb->file->fd); - forget_locking(); - in_transaction = false; - - ret = external_agent_operation(agent, NEEDS_RECOVERY, ""); - if (ret == SUCCESS) - needed_recovery++; - else if (ret != FAILED) { - diag("Step %u agent NEEDS_RECOVERY = %s", current, - agent_return_name(ret)); - return false; - } - - /* Could be key, or data. */ - ret = external_agent_operation(agent, op, - KEY_STRING "=" KEY_STRING); - if (ret != SUCCESS) { - ret = external_agent_operation(agent, op, - KEY_STRING - "=" DATA_STRING); - } - if (ret != SUCCESS) { - diag("Step %u op %s failed = %s", current, - operation_name(op), - agent_return_name(ret)); - return false; - } - - ret = external_agent_operation(agent, NEEDS_RECOVERY, ""); - if (ret != FAILED) { - diag("Still needs recovery after step %u = %s", - current, agent_return_name(ret)); - return false; - } - - ret = external_agent_operation(agent, CHECK, ""); - if (ret != SUCCESS) { - diag("Step %u check failed = %s", current, - agent_return_name(ret)); - return false; - } - - ret = external_agent_operation(agent, CLOSE, ""); - if (ret != SUCCESS) { - diag("Step %u close failed = %s", current, - agent_return_name(ret)); - return false; - } - - /* Suppress logging as this tries to use closed fd. */ - suppress_logging = true; - suppress_lockcheck = true; - ntdb_close(ntdb); - suppress_logging = false; - suppress_lockcheck = false; - target++; - current = 0; - free_all(); - goto reset; - } - - /* Put key for agent to fetch. */ - key = ntdb_mkdata(KEY_STRING, strlen(KEY_STRING)); - - if (pre_create_recovery) { - /* Using a transaction now means we allocate the recovery - * area immediately. That makes the later transaction smaller - * and thus tickles a bug we had. */ - if (ntdb_transaction_start(ntdb) != 0) - return false; - } - if (ntdb_store(ntdb, key, key, NTDB_INSERT) != 0) - return false; - if (pre_create_recovery) { - if (ntdb_transaction_commit(ntdb) != 0) - return false; - } - - /* This is the key we insert in transaction. */ - key.dsize--; - - ret = external_agent_operation(agent, OPEN, TEST_DBNAME); - if (ret != SUCCESS) - errx(1, "Agent failed to open: %s", agent_return_name(ret)); - - ret = external_agent_operation(agent, FETCH, KEY_STRING "=" KEY_STRING); - if (ret != SUCCESS) - errx(1, "Agent failed find key: %s", agent_return_name(ret)); - - in_transaction = true; - if (ntdb_transaction_start(ntdb) != 0) - return false; - - if (ntdb_store(ntdb, key, data, NTDB_INSERT) != 0) - return false; - - if (ntdb_transaction_commit(ntdb) != 0) - return false; - - in_transaction = false; - - /* We made it! */ - diag("Completed %u runs", current); - ntdb_close(ntdb); - ret = external_agent_operation(agent, CLOSE, ""); - if (ret != SUCCESS) { - diag("Step %u close failed = %s", current, - agent_return_name(ret)); - return false; - } - - ok1(needed_recovery); - ok1(locking_errors == 0); - ok1(forget_locking() == 0); - locking_errors = 0; - return true; -} - -int main(int argc, char *argv[]) -{ - enum operation ops[] = { FETCH, STORE, TRANSACTION_START }; - struct agent *agent; - int i, j; - - plan_tests(24); - unlock_callback = maybe_die; - - external_agent_free = free_noleak; - agent = prepare_external_agent(); - if (!agent) - err(1, "preparing agent"); - - for (j = 0; j < 2; j++) { - for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { - diag("Testing %s after death (%s recovery area)", - operation_name(ops[i]), j ? "with" : "without"); - ok1(test_death(ops[i], agent, j)); - } - } - - free_external_agent(agent); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-64-bit-tdb.c b/ccan/ntdb/test/run-64-bit-tdb.c deleted file mode 100644 index 9fcc6c9b..00000000 --- a/ccan/ntdb/test/run-64-bit-tdb.c +++ /dev/null @@ -1,89 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -/* The largest 32-bit value which is still a multiple of NTDB_PGSIZE */ -#define ALMOST_4G ((uint32_t)-NTDB_PGSIZE) -/* And this pushes it over 32 bits */ -#define A_LITTLE_BIT (NTDB_PGSIZE * 2) - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - if (sizeof(off_t) <= 4) { - plan_tests(1); - pass("No 64 bit off_t"); - return exit_status(); - } - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 16); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - off_t old_size; - NTDB_DATA k, d; - struct hash_info h; - struct ntdb_used_record rec; - ntdb_off_t off; - - ntdb = ntdb_open("run-64-bit-ntdb.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - old_size = ntdb->file->map_size; - - /* Add a fake record to chew up the existing free space. */ - k = ntdb_mkdata("fake", 4); - d.dsize = ntdb->file->map_size - - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8; - d.dptr = malloc(d.dsize); - memset(d.dptr, 0, d.dsize); - ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); - ok1(ntdb->file->map_size == old_size); - free(d.dptr); - - /* This makes a sparse file */ - ok1(ftruncate(ntdb->file->fd, ALMOST_4G) == 0); - ok1(add_free_record(ntdb, old_size, ALMOST_4G - old_size, - NTDB_LOCK_WAIT, false) == NTDB_SUCCESS); - - /* Now add a little record past the 4G barrier. */ - ok1(ntdb_expand_file(ntdb, A_LITTLE_BIT) == NTDB_SUCCESS); - ok1(add_free_record(ntdb, ALMOST_4G, A_LITTLE_BIT, - NTDB_LOCK_WAIT, false) - == NTDB_SUCCESS); - - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - - /* Test allocation path. */ - k = ntdb_mkdata("key", 4); - d = ntdb_mkdata("data", 5); - ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - - /* Make sure it put it at end as we expected. */ - off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL); - ok1(off >= ALMOST_4G); - ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - - ok1(ntdb_fetch(ntdb, k, &d) == 0); - ok1(d.dsize == 5); - ok1(strcmp((char *)d.dptr, "data") == 0); - free(d.dptr); - - ok1(ntdb_delete(ntdb, k) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - - ntdb_close(ntdb); - } - - /* We might get messages about mmap failing, so don't test - * tap_log_messages */ - return exit_status(); -} diff --git a/ccan/ntdb/test/run-90-get-set-attributes.c b/ccan/ntdb/test/run-90-get-set-attributes.c deleted file mode 100644 index aafd4613..00000000 --- a/ccan/ntdb/test/run-90-get-set-attributes.c +++ /dev/null @@ -1,162 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag, - void *unused) -{ - return 0; -} - -static int myunlock(int fd, int rw, off_t off, off_t len, void *unused) -{ - return 0; -} - -static uint32_t hash_fn(const void *key, size_t len, uint32_t seed, - void *priv) -{ - return 0; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - union ntdb_attribute seed_attr; - union ntdb_attribute hash_attr; - union ntdb_attribute lock_attr; - - seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; - seed_attr.base.next = &hash_attr; - seed_attr.seed.seed = 100; - - hash_attr.base.attr = NTDB_ATTRIBUTE_HASH; - hash_attr.base.next = &lock_attr; - hash_attr.hash.fn = hash_fn; - hash_attr.hash.data = &hash_attr; - - lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK; - lock_attr.base.next = &tap_log_attr; - lock_attr.flock.lock = mylock; - lock_attr.flock.unlock = myunlock; - lock_attr.flock.data = &lock_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 50); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - union ntdb_attribute attr; - - /* First open with no attributes. */ - ntdb = ntdb_open("run-90-get-set-attributes.ntdb", - flags[i] |MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, NULL); - ok1(ntdb); - - /* Get log on no attributes will fail */ - attr.base.attr = NTDB_ATTRIBUTE_LOG; - ok1(ntdb_get_attribute(ntdb, &attr) == NTDB_ERR_NOEXIST); - /* These always work. */ - attr.base.attr = NTDB_ATTRIBUTE_HASH; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH); - ok1(attr.hash.fn == ntdb_jenkins_hash); - attr.base.attr = NTDB_ATTRIBUTE_FLOCK; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK); - ok1(attr.flock.lock == ntdb_fcntl_lock); - ok1(attr.flock.unlock == ntdb_fcntl_unlock); - attr.base.attr = NTDB_ATTRIBUTE_SEED; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED); - /* This is possible, just astronomically unlikely. */ - ok1(attr.seed.seed != 0); - - /* Unset attributes. */ - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG); - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); - - /* Set them. */ - ok1(ntdb_set_attribute(ntdb, &tap_log_attr) == 0); - ok1(ntdb_set_attribute(ntdb, &lock_attr) == 0); - /* These should fail. */ - ok1(ntdb_set_attribute(ntdb, &seed_attr) == NTDB_ERR_EINVAL); - ok1(tap_log_messages == 1); - ok1(ntdb_set_attribute(ntdb, &hash_attr) == NTDB_ERR_EINVAL); - ok1(tap_log_messages == 2); - tap_log_messages = 0; - - /* Getting them should work as expected. */ - attr.base.attr = NTDB_ATTRIBUTE_LOG; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG); - ok1(attr.log.fn == tap_log_attr.log.fn); - ok1(attr.log.data == tap_log_attr.log.data); - - attr.base.attr = NTDB_ATTRIBUTE_FLOCK; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK); - ok1(attr.flock.lock == mylock); - ok1(attr.flock.unlock == myunlock); - ok1(attr.flock.data == &lock_attr); - - /* Unset them again. */ - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); - ok1(tap_log_messages == 0); - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG); - ok1(tap_log_messages == 0); - - ntdb_close(ntdb); - ok1(tap_log_messages == 0); - - /* Now open with all attributes. */ - ntdb = ntdb_open("run-90-get-set-attributes.ntdb", - flags[i] | MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, - &seed_attr); - - ok1(ntdb); - - /* Get will succeed */ - attr.base.attr = NTDB_ATTRIBUTE_LOG; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG); - ok1(attr.log.fn == tap_log_attr.log.fn); - ok1(attr.log.data == tap_log_attr.log.data); - - attr.base.attr = NTDB_ATTRIBUTE_HASH; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH); - ok1(attr.hash.fn == hash_fn); - ok1(attr.hash.data == &hash_attr); - - attr.base.attr = NTDB_ATTRIBUTE_FLOCK; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK); - ok1(attr.flock.lock == mylock); - ok1(attr.flock.unlock == myunlock); - ok1(attr.flock.data == &lock_attr); - - attr.base.attr = NTDB_ATTRIBUTE_SEED; - ok1(ntdb_get_attribute(ntdb, &attr) == 0); - ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED); - ok1(attr.seed.seed == seed_attr.seed.seed); - - /* Unset attributes. */ - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_HASH); - ok1(tap_log_messages == 1); - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_SEED); - ok1(tap_log_messages == 2); - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); - ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG); - ok1(tap_log_messages == 2); - tap_log_messages = 0; - - ntdb_close(ntdb); - - } - return exit_status(); -} diff --git a/ccan/ntdb/test/run-capabilities.c b/ccan/ntdb/test/run-capabilities.c deleted file mode 100644 index dc2df2ab..00000000 --- a/ccan/ntdb/test/run-capabilities.c +++ /dev/null @@ -1,284 +0,0 @@ -#include -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "layout.h" -#include "failtest_helper.h" -#include -#include "helprun-external-agent.h" - -static size_t len_of(bool breaks_check, bool breaks_write, bool breaks_open) -{ - size_t len = 0; - if (breaks_check) - len += 8; - if (breaks_write) - len += 16; - if (breaks_open) - len += 32; - return len; -} - -/* Creates a NTDB with various capabilities. */ -static void create_ntdb(const char *name, - unsigned int cap, - bool breaks_check, - bool breaks_write, - bool breaks_open, ...) -{ - NTDB_DATA key, data; - va_list ap; - struct ntdb_layout *layout; - struct ntdb_context *ntdb; - int fd, clen; - union ntdb_attribute seed_attr; - - /* Force a seed which doesn't allow records to clash! */ - seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; - seed_attr.base.next = &tap_log_attr; - seed_attr.seed.seed = 0; - - key = ntdb_mkdata("Hello", 5); - data = ntdb_mkdata("world", 5); - - /* Create a NTDB with some data, and some capabilities */ - layout = new_ntdb_layout(); - ntdb_layout_add_freetable(layout); - ntdb_layout_add_used(layout, key, data, 6); - clen = len_of(breaks_check, breaks_write, breaks_open); - ntdb_layout_add_free(layout, 15496 - clen, 0); - ntdb_layout_add_capability(layout, cap, - breaks_write, breaks_check, breaks_open, - clen); - - va_start(ap, breaks_open); - while ((cap = va_arg(ap, int)) != 0) { - breaks_check = va_arg(ap, int); - breaks_write = va_arg(ap, int); - breaks_open = va_arg(ap, int); - - key.dsize--; - ntdb_layout_add_used(layout, key, data, 11 - key.dsize); - clen = len_of(breaks_check, breaks_write, breaks_open); - ntdb_layout_add_free(layout, 16304 - clen, 0); - ntdb_layout_add_capability(layout, cap, - breaks_write, breaks_check, - breaks_open, clen); - } - va_end(ap); - - /* We open-code this, because we need to use the failtest write. */ - ntdb = ntdb_layout_get(layout, failtest_free, &seed_attr); - - fd = open(name, O_RDWR|O_TRUNC|O_CREAT, 0600); - if (fd < 0) - err(1, "opening %s for writing", name); - if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size) - != ntdb->file->map_size) - err(1, "writing %s", name); - close(fd); - ntdb_close(ntdb); - ntdb_layout_free(layout); -} - -/* Note all the "goto out" early exits: they're to shorten failtest time. */ -int main(int argc, char *argv[]) -{ - struct ntdb_context *ntdb; - char *summary; - - failtest_init(argc, argv); - failtest_hook = block_repeat_failures; - failtest_exit_check = exit_check_log; - plan_tests(60); - - failtest_suppress = true; - /* Capability says you can ignore it? */ - create_ntdb("run-capabilities.ntdb", 1, false, false, false, 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - if (!ok1(ntdb)) - goto out; - ok1(tap_log_messages == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - ok1(tap_log_messages == 0); - ntdb_close(ntdb); - - /* Two capabilitues say you can ignore them? */ - create_ntdb("run-capabilities.ntdb", - 1, false, false, false, - 2, false, false, false, 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - if (!ok1(ntdb)) - goto out; - ok1(tap_log_messages == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - ok1(tap_log_messages == 0); - ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); - ok1(strstr(summary, "Capability 1\n")); - free(summary); - ntdb_close(ntdb); - - /* Capability says you can't check. */ - create_ntdb("run-capabilities.ntdb", - 1, false, false, false, - 2, true, false, false, 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - if (!ok1(ntdb)) - goto out; - ok1(tap_log_messages == 0); - ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - /* We expect a warning! */ - ok1(tap_log_messages == 1); - ok1(strstr(log_last, "capabilit")); - ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); - ok1(strstr(summary, "Capability 1\n")); - ok1(strstr(summary, "Capability 2 (uncheckable)\n")); - free(summary); - ntdb_close(ntdb); - - /* Capability says you can't write. */ - create_ntdb("run-capabilities.ntdb", - 1, false, false, false, - 2, false, true, false, 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - /* We expect a message. */ - ok1(!ntdb); - if (!ok1(tap_log_messages == 2)) - goto out; - if (!ok1(strstr(log_last, "unknown"))) - goto out; - ok1(strstr(log_last, "write")); - - /* We can open it read-only though! */ - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0, - &tap_log_attr); - failtest_suppress = true; - if (!ok1(ntdb)) - goto out; - ok1(tap_log_messages == 2); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - ok1(tap_log_messages == 2); - ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); - ok1(strstr(summary, "Capability 1\n")); - ok1(strstr(summary, "Capability 2 (read-only)\n")); - free(summary); - ntdb_close(ntdb); - - /* Capability says you can't open. */ - create_ntdb("run-capabilities.ntdb", - 1, false, false, false, - 2, false, false, true, 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - /* We expect a message. */ - ok1(!ntdb); - if (!ok1(tap_log_messages == 3)) - goto out; - if (!ok1(strstr(log_last, "unknown"))) - goto out; - - /* Combine capabilities correctly. */ - create_ntdb("run-capabilities.ntdb", - 1, false, false, false, - 2, true, false, false, - 3, false, true, false, 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - /* We expect a message. */ - ok1(!ntdb); - if (!ok1(tap_log_messages == 4)) - goto out; - if (!ok1(strstr(log_last, "unknown"))) - goto out; - ok1(strstr(log_last, "write")); - - /* We can open it read-only though! */ - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0, - &tap_log_attr); - failtest_suppress = true; - if (!ok1(ntdb)) - goto out; - ok1(tap_log_messages == 4); - ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - /* We expect a warning! */ - ok1(tap_log_messages == 5); - ok1(strstr(log_last, "unknown")); - ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); - ok1(strstr(summary, "Capability 1\n")); - ok1(strstr(summary, "Capability 2 (uncheckable)\n")); - ok1(strstr(summary, "Capability 3 (read-only)\n")); - free(summary); - ntdb_close(ntdb); - - /* Two capability flags in one. */ - create_ntdb("run-capabilities.ntdb", - 1, false, false, false, - 2, true, true, false, - 0); - - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, - &tap_log_attr); - failtest_suppress = true; - /* We expect a message. */ - ok1(!ntdb); - if (!ok1(tap_log_messages == 6)) - goto out; - if (!ok1(strstr(log_last, "unknown"))) - goto out; - ok1(strstr(log_last, "write")); - - /* We can open it read-only though! */ - failtest_suppress = false; - ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0, - &tap_log_attr); - failtest_suppress = true; - if (!ok1(ntdb)) - goto out; - ok1(tap_log_messages == 6); - ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK); - ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); - /* We expect a warning! */ - ok1(tap_log_messages == 7); - ok1(strstr(log_last, "unknown")); - ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); - ok1(strstr(summary, "Capability 1\n")); - ok1(strstr(summary, "Capability 2 (uncheckable,read-only)\n")); - free(summary); - ntdb_close(ntdb); - -out: - failtest_exit(exit_status()); - - /* - * We will never reach this but the compiler complains if we do not - * return in this function. - */ - return EFAULT; -} diff --git a/ccan/ntdb/test/run-expand-in-transaction.c b/ccan/ntdb/test/run-expand-in-transaction.c deleted file mode 100644 index 20a28ee6..00000000 --- a/ccan/ntdb/test/run-expand-in-transaction.c +++ /dev/null @@ -1,48 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = ntdb_mkdata("key", 3); - NTDB_DATA data = ntdb_mkdata("data", 4); - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1); - - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - size_t size; - NTDB_DATA k, d; - ntdb = ntdb_open("run-expand-in-transaction.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - size = ntdb->file->map_size; - /* Add a fake record to chew up the existing free space. */ - k = ntdb_mkdata("fake", 4); - d.dsize = ntdb->file->map_size - - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8; - d.dptr = malloc(d.dsize); - memset(d.dptr, 0, d.dsize); - ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); - ok1(ntdb->file->map_size == size); - free(d.dptr); - ok1(ntdb_transaction_start(ntdb) == 0); - ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); - ok1(ntdb->file->map_size > size); - ok1(ntdb_transaction_commit(ntdb) == 0); - ok1(ntdb->file->map_size > size); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-features.c b/ccan/ntdb/test/run-features.c deleted file mode 100644 index 631ce876..00000000 --- a/ccan/ntdb/test/run-features.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; - NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - uint64_t features; - ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Put some stuff in there. */ - for (j = 0; j < 100; j++) { - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - fail("Storing in ntdb"); - } - - /* Mess with features fields in hdr. */ - features = (~NTDB_FEATURE_MASK ^ 1); - ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header, - features_used), - &features, sizeof(features)) == 0); - ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header, - features_offered), - &features, sizeof(features)) == 0); - ntdb_close(ntdb); - - ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR, 0, &tap_log_attr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Should not have changed features offered. */ - ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header, - features_offered), - &features, sizeof(features)) == 0); - ok1(features == (~NTDB_FEATURE_MASK ^ 1)); - - /* Should have cleared unknown bits in features_used. */ - ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header, - features_used), - &features, sizeof(features)) == 0); - ok1(features == (1 & NTDB_FEATURE_MASK)); - - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-lockall.c b/ccan/ntdb/test/run-lockall.c deleted file mode 100644 index a4cd1e47..00000000 --- a/ccan/ntdb/test/run-lockall.c +++ /dev/null @@ -1,75 +0,0 @@ -#include "../private.h" -#include -#include "lock-tracking.h" - -#define fcntl fcntl_with_lockcheck -#include "ntdb-source.h" - -#include "tap-interface.h" -#include -#include -#include -#include "external-agent.h" -#include "logging.h" -#include "helprun-external-agent.h" - -#define TEST_DBNAME "run-lockall.ntdb" -#define KEY_STR "key" - -#undef fcntl - -int main(int argc, char *argv[]) -{ - struct agent *agent; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - int i; - - plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1); - agent = prepare_external_agent(); - if (!agent) - err(1, "preparing agent"); - - for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { - enum agent_return ret; - struct ntdb_context *ntdb; - - ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ok1(ntdb); - - ret = external_agent_operation(agent, OPEN, TEST_DBNAME); - ok1(ret == SUCCESS); - - ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); - ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) - == WOULD_HAVE_BLOCKED); - ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) - == WOULD_HAVE_BLOCKED); - /* Test nesting. */ - ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); - ntdb_unlockall(ntdb); - ntdb_unlockall(ntdb); - - ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) - == SUCCESS); - - ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS); - ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) - == WOULD_HAVE_BLOCKED); - ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) - == SUCCESS); - ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS); - ntdb_unlockall_read(ntdb); - ntdb_unlockall_read(ntdb); - - ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) - == SUCCESS); - ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS); - ntdb_close(ntdb); - } - - free_external_agent(agent); - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-remap-in-read_traverse.c b/ccan/ntdb/test/run-remap-in-read_traverse.c deleted file mode 100644 index 6fe537d4..00000000 --- a/ccan/ntdb/test/run-remap-in-read_traverse.c +++ /dev/null @@ -1,58 +0,0 @@ -#include "ntdb-source.h" -/* We had a bug where we marked the ntdb read-only for a ntdb_traverse_read. - * If we then expanded the ntdb, we would remap read-only, and later SEGV. */ -#include "tap-interface.h" -#include "external-agent.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static bool file_larger(int fd, ntdb_len_t size) -{ - struct stat st; - - fstat(fd, &st); - return st.st_size != size; -} - -static unsigned add_records_to_grow(struct agent *agent, int fd, ntdb_len_t size) -{ - unsigned int i; - - for (i = 0; !file_larger(fd, size); i++) { - char data[50]; - sprintf(data, "%i=%i", i, i); - if (external_agent_operation(agent, STORE, data) != SUCCESS) - return 0; - } - diag("Added %u records to grow file", i); - return i; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct agent *agent; - struct ntdb_context *ntdb; - NTDB_DATA d = ntdb_mkdata("hello", 5); - const char filename[] = "run-remap-in-read_traverse.ntdb"; - - plan_tests(4); - - agent = prepare_external_agent(); - - ntdb = ntdb_open(filename, MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - - ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS); - i = add_records_to_grow(agent, ntdb->file->fd, ntdb->file->map_size); - - /* Do a traverse. */ - ok1(ntdb_traverse(ntdb, NULL, NULL) == i); - - /* Now store something! */ - ok1(ntdb_store(ntdb, d, d, NTDB_INSERT) == 0); - ok1(tap_log_messages == 0); - ntdb_close(ntdb); - free_external_agent(agent); - return exit_status(); -} diff --git a/ccan/ntdb/test/run-seed.c b/ccan/ntdb/test/run-seed.c deleted file mode 100644 index 5ca6678a..00000000 --- a/ccan/ntdb/test/run-seed.c +++ /dev/null @@ -1,62 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static int log_count = 0; - -/* Normally we get a log when setting random seed. */ -static void my_log_fn(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, void *priv) -{ - log_count++; -} - -static union ntdb_attribute log_attr = { - .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG }, - .fn = my_log_fn } -}; - -int main(int argc, char *argv[]) -{ - unsigned int i; - struct ntdb_context *ntdb; - union ntdb_attribute attr; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - - attr.seed.base.attr = NTDB_ATTRIBUTE_SEED; - attr.seed.base.next = &log_attr; - attr.seed.seed = 42; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - struct ntdb_header hdr; - int fd; - ntdb = ntdb_open("run-seed.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &attr); - ok1(ntdb); - if (!ntdb) - continue; - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(ntdb->hash_seed == 42); - ok1(log_count == 0); - ntdb_close(ntdb); - - if (flags[i] & NTDB_INTERNAL) - continue; - - fd = open("run-seed.ntdb", O_RDONLY); - ok1(fd >= 0); - ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)); - if (flags[i] & NTDB_CONVERT) - ok1(bswap_64(hdr.hash_seed) == 42); - else - ok1(hdr.hash_seed == 42); - close(fd); - } - return exit_status(); -} diff --git a/ccan/ntdb/test/run-tdb_errorstr.c b/ccan/ntdb/test/run-tdb_errorstr.c deleted file mode 100644 index 499eb42e..00000000 --- a/ccan/ntdb/test/run-tdb_errorstr.c +++ /dev/null @@ -1,53 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "helprun-external-agent.h" - -int main(int argc, char *argv[]) -{ - enum NTDB_ERROR e; - plan_tests(NTDB_ERR_RDONLY*-1 + 2); - - for (e = NTDB_SUCCESS; e >= NTDB_ERR_RDONLY; e--) { - switch (e) { - case NTDB_SUCCESS: - ok1(!strcmp(ntdb_errorstr(e), - "Success")); - break; - case NTDB_ERR_IO: - ok1(!strcmp(ntdb_errorstr(e), - "IO Error")); - break; - case NTDB_ERR_LOCK: - ok1(!strcmp(ntdb_errorstr(e), - "Locking error")); - break; - case NTDB_ERR_OOM: - ok1(!strcmp(ntdb_errorstr(e), - "Out of memory")); - break; - case NTDB_ERR_EXISTS: - ok1(!strcmp(ntdb_errorstr(e), - "Record exists")); - break; - case NTDB_ERR_EINVAL: - ok1(!strcmp(ntdb_errorstr(e), - "Invalid parameter")); - break; - case NTDB_ERR_NOEXIST: - ok1(!strcmp(ntdb_errorstr(e), - "Record does not exist")); - break; - case NTDB_ERR_RDONLY: - ok1(!strcmp(ntdb_errorstr(e), - "write not permitted")); - break; - case NTDB_ERR_CORRUPT: - ok1(!strcmp(ntdb_errorstr(e), - "Corrupt database")); - break; - } - } - ok1(!strcmp(ntdb_errorstr(e), "Invalid error code")); - - return exit_status(); -} diff --git a/ccan/ntdb/test/run-tdb_foreach.c b/ccan/ntdb/test/run-tdb_foreach.c deleted file mode 100644 index 532474b9..00000000 --- a/ccan/ntdb/test/run-tdb_foreach.c +++ /dev/null @@ -1,91 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -static int drop_count(struct ntdb_context *ntdb, unsigned int *count) -{ - if (--(*count) == 0) - return 1; - return 0; -} - -static int set_found(struct ntdb_context *ntdb, bool found[3]) -{ - unsigned int idx; - - if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach0.ntdb") == 0) - idx = 0; - else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach1.ntdb") == 0) - idx = 1; - else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach2.ntdb") == 0) - idx = 2; - else - abort(); - - if (found[idx]) - abort(); - found[idx] = true; - return 0; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, count; - bool found[3]; - struct ntdb_context *ntdb0, *ntdb1, *ntdb; - int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 8); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb0 = ntdb_open("run-ntdb_foreach0.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - ntdb = ntdb_open("run-ntdb_foreach2.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); - - memset(found, 0, sizeof(found)); - ntdb_foreach(set_found, found); - ok1(found[0] && found[1] && found[2]); - - /* Test premature iteration termination */ - count = 1; - ntdb_foreach(drop_count, &count); - ok1(count == 0); - - ntdb_close(ntdb1); - memset(found, 0, sizeof(found)); - ntdb_foreach(set_found, found); - ok1(found[0] && !found[1] && found[2]); - - ntdb_close(ntdb); - memset(found, 0, sizeof(found)); - ntdb_foreach(set_found, found); - ok1(found[0] && !found[1] && !found[2]); - - ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb", - flags[i]|MAYBE_NOSYNC, - O_RDWR, 0600, &tap_log_attr); - memset(found, 0, sizeof(found)); - ntdb_foreach(set_found, found); - ok1(found[0] && found[1] && !found[2]); - - ntdb_close(ntdb0); - memset(found, 0, sizeof(found)); - ntdb_foreach(set_found, found); - ok1(!found[0] && found[1] && !found[2]); - - ntdb_close(ntdb1); - memset(found, 0, sizeof(found)); - ntdb_foreach(set_found, found); - ok1(!found[0] && !found[1] && !found[2]); - ok1(tap_log_messages == 0); - } - - return exit_status(); -} diff --git a/ccan/ntdb/test/run-traverse.c b/ccan/ntdb/test/run-traverse.c deleted file mode 100644 index 29b517db..00000000 --- a/ccan/ntdb/test/run-traverse.c +++ /dev/null @@ -1,204 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" -#include "helprun-external-agent.h" - -#define NUM_RECORDS 1000 - -/* We use the same seed which we saw a failure on. */ -static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) -{ - return hash64_stable((const unsigned char *)key, len, - *(uint64_t *)p); -} - -static bool store_records(struct ntdb_context *ntdb) -{ - int i; - NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; - NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; - - for (i = 0; i < NUM_RECORDS; i++) - if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) - return false; - return true; -} - -struct trav_data { - unsigned int calls, call_limit; - int low, high; - bool mismatch; - bool delete; - enum NTDB_ERROR delete_error; -}; - -static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, - struct trav_data *td) -{ - int val; - - td->calls++; - if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val) - || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) { - td->mismatch = true; - return -1; - } - memcpy(&val, dbuf.dptr, dbuf.dsize); - if (val < td->low) - td->low = val; - if (val > td->high) - td->high = val; - - if (td->delete) { - td->delete_error = ntdb_delete(ntdb, key); - if (td->delete_error != NTDB_SUCCESS) { - return -1; - } - } - - if (td->calls == td->call_limit) - return 1; - return 0; -} - -struct trav_grow_data { - unsigned int calls; - unsigned int num_large; - bool mismatch; - enum NTDB_ERROR error; -}; - -static int trav_grow(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, - struct trav_grow_data *tgd) -{ - int val; - unsigned char buffer[128] = { 0 }; - - tgd->calls++; - if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val) - || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) { - tgd->mismatch = true; - return -1; - } - - if (dbuf.dsize > sizeof(val)) - /* We must have seen this before! */ - tgd->num_large++; - - /* Make a big difference to the database. */ - dbuf.dptr = buffer; - dbuf.dsize = sizeof(buffer); - tgd->error = ntdb_append(ntdb, key, dbuf); - if (tgd->error != NTDB_SUCCESS) { - return -1; - } - return 0; -} - -int main(int argc, char *argv[]) -{ - unsigned int i; - int num; - struct trav_data td; - struct trav_grow_data tgd; - struct ntdb_context *ntdb; - uint64_t seed = 16014841315512641303ULL; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT }; - union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = fixedhash, - .data = &seed } }; - - hattr.base.next = &tap_log_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - ntdb = ntdb_open("run-traverse.ntdb", flags[i]|MAYBE_NOSYNC, - O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); - ok1(ntdb); - if (!ntdb) - continue; - - ok1(ntdb_traverse(ntdb, NULL, NULL) == 0); - - ok1(store_records(ntdb)); - num = ntdb_traverse(ntdb, NULL, NULL); - ok1(num == NUM_RECORDS); - - /* Full traverse. */ - td.calls = 0; - td.call_limit = UINT_MAX; - td.low = INT_MAX; - td.high = INT_MIN; - td.mismatch = false; - td.delete = false; - - num = ntdb_traverse(ntdb, trav, &td); - ok1(num == NUM_RECORDS); - ok1(!td.mismatch); - ok1(td.calls == NUM_RECORDS); - ok1(td.low == 0); - ok1(td.high == NUM_RECORDS-1); - - /* Short traverse. */ - td.calls = 0; - td.call_limit = NUM_RECORDS / 2; - td.low = INT_MAX; - td.high = INT_MIN; - td.mismatch = false; - td.delete = false; - - num = ntdb_traverse(ntdb, trav, &td); - ok1(num == NUM_RECORDS / 2); - ok1(!td.mismatch); - ok1(td.calls == NUM_RECORDS / 2); - ok1(td.low <= NUM_RECORDS / 2); - ok1(td.high > NUM_RECORDS / 2); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(tap_log_messages == 0); - - /* Deleting traverse (delete everything). */ - td.calls = 0; - td.call_limit = UINT_MAX; - td.low = INT_MAX; - td.high = INT_MIN; - td.mismatch = false; - td.delete = true; - td.delete_error = NTDB_SUCCESS; - num = ntdb_traverse(ntdb, trav, &td); - ok1(num == NUM_RECORDS); - ok1(td.delete_error == NTDB_SUCCESS); - ok1(!td.mismatch); - ok1(td.calls == NUM_RECORDS); - ok1(td.low == 0); - ok1(td.high == NUM_RECORDS - 1); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Now it's empty! */ - ok1(ntdb_traverse(ntdb, NULL, NULL) == 0); - - /* Re-add. */ - ok1(store_records(ntdb)); - ok1(ntdb_traverse(ntdb, NULL, NULL) == NUM_RECORDS); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Grow. This will cause us to be reshuffled. */ - tgd.calls = 0; - tgd.num_large = 0; - tgd.mismatch = false; - tgd.error = NTDB_SUCCESS; - ok1(ntdb_traverse(ntdb, trav_grow, &tgd) > 1); - ok1(tgd.error == 0); - ok1(!tgd.mismatch); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - ok1(tgd.num_large < tgd.calls); - diag("growing db: %u calls, %u repeats", - tgd.calls, tgd.num_large); - - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/ccan/ntdb/test/tap-interface.c b/ccan/ntdb/test/tap-interface.c deleted file mode 100644 index 077ec2cd..00000000 --- a/ccan/ntdb/test/tap-interface.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "tap-interface.h" - -unsigned tap_ok_count, tap_ok_target = -1U; diff --git a/ccan/ntdb/test/tap-interface.h b/ccan/ntdb/test/tap-interface.h deleted file mode 100644 index 5363c32b..00000000 --- a/ccan/ntdb/test/tap-interface.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - Unix SMB/CIFS implementation. - Simplistic implementation of tap interface. - - Copyright (C) Rusty Russell 2012 - - ** NOTE! The following LGPL license applies to the talloc - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include -#include -#include "no-fsync.h" - -#ifndef __location__ -#define __TAP_STRING_LINE1__(s) #s -#define __TAP_STRING_LINE2__(s) __TAP_STRING_LINE1__(s) -#define __TAP_STRING_LINE3__ __TAP_STRING_LINE2__(__LINE__) -#define __location__ __FILE__ ":" __TAP_STRING_LINE3__ -#endif - -extern unsigned tap_ok_count, tap_ok_target; -#define plan_tests(num) do { tap_ok_target = (num); } while(0) -#define ok(e, ...) ((e) ? (printf("."), tap_ok_count++, true) : (warnx(__VA_ARGS__), false)) -#define ok1(e) ok((e), "%s:%s", __location__, #e) -#define pass(...) (printf("."), tap_ok_count++) -#define fail(...) warnx(__VA_ARGS__) -#define diag(...) do { printf(__VA_ARGS__); printf("\n"); } while(0) -#define exit_status() (tap_ok_count == tap_ok_target ? 0 : 1) diff --git a/ccan/ntdb/tools/Makefile b/ccan/ntdb/tools/Makefile deleted file mode 100644 index 087c256d..00000000 --- a/ccan/ntdb/tools/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -OBJS:=../../ntdb.o ../../hash.o ../../tally.o -CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg -LDFLAGS:=-L../../.. - -default: ntdbtorture ntdbtool ntdbdump ntdbrestore mkntdb speed growtdb-bench - -ntdbdump: ntdbdump.c $(OBJS) -ntdbrestore: ntdbrestore.c $(OBJS) -ntdbtorture: ntdbtorture.c $(OBJS) -ntdbtool: ntdbtool.c $(OBJS) -mkntdb: mkntdb.c $(OBJS) -speed: speed.c $(OBJS) -growtdb-bench: growtdb-bench.c $(OBJS) - -clean: - rm -f ntdbtorture ntdbdump ntdbrestore ntdbtool mkntdb speed growtdb-bench diff --git a/ccan/ntdb/tools/growtdb-bench.c b/ccan/ntdb/tools/growtdb-bench.c deleted file mode 100644 index 28c1de83..00000000 --- a/ccan/ntdb/tools/growtdb-bench.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "ntdb.h" -#include -#include -#include -#include -#include - -static void logfn(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) -{ - fprintf(stderr, "ntdb:%s:%s:%s\n", - ntdb_name(ntdb), ntdb_errorstr(ecode), message); -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j, users, groups; - NTDB_DATA idxkey, idxdata; - NTDB_DATA k, d, gk; - char cmd[100]; - struct ntdb_context *ntdb; - enum NTDB_ERROR ecode; - union ntdb_attribute log; - - if (argc != 3) { - printf("Usage: growtdb-bench \n"); - exit(1); - } - users = atoi(argv[1]); - groups = atoi(argv[2]); - - sprintf(cmd, "cat /proc/%i/statm", getpid()); - - log.base.attr = NTDB_ATTRIBUTE_LOG; - log.base.next = NULL; - log.log.fn = logfn; - - ntdb = ntdb_open("/tmp/growtdb.ntdb", NTDB_DEFAULT, - O_RDWR|O_CREAT|O_TRUNC, 0600, &log); - - idxkey.dptr = (unsigned char *)"User index"; - idxkey.dsize = strlen("User index"); - idxdata.dsize = 51; - idxdata.dptr = calloc(idxdata.dsize, 1); - if (idxdata.dptr == NULL) { - fprintf(stderr, "Unable to allocate memory for idxdata.dptr\n"); - return -1; - } - - /* Create users. */ - k.dsize = 48; - k.dptr = calloc(k.dsize, 1); - if (k.dptr == NULL) { - fprintf(stderr, "Unable to allocate memory for k.dptr\n"); - return -1; - } - d.dsize = 64; - d.dptr = calloc(d.dsize, 1); - if (d.dptr == NULL) { - fprintf(stderr, "Unable to allocate memory for d.dptr\n"); - return -1; - } - - ntdb_transaction_start(ntdb); - for (i = 0; i < users; i++) { - memcpy(k.dptr, &i, sizeof(i)); - ecode = ntdb_store(ntdb, k, d, NTDB_INSERT); - if (ecode != NTDB_SUCCESS) - errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode)); - - /* This simulates a growing index record. */ - ecode = ntdb_append(ntdb, idxkey, idxdata); - if (ecode != NTDB_SUCCESS) - errx(1, "ntdb append failed: %s", ntdb_errorstr(ecode)); - } - if ((ecode = ntdb_transaction_commit(ntdb)) != 0) - errx(1, "ntdb commit1 failed: %s", ntdb_errorstr(ecode)); - - if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0) - errx(1, "ntdb_check failed after initial insert!"); - - system(cmd); - - /* Now put them all in groups: add 32 bytes to each record for - * a group. */ - gk.dsize = 48; - gk.dptr = calloc(k.dsize, 1); - if (gk.dptr == NULL) { - fprintf(stderr, "Unable to allocate memory for gk.dptr\n"); - return -1; - } - gk.dptr[gk.dsize-1] = 1; - - d.dsize = 32; - for (i = 0; i < groups; i++) { - ntdb_transaction_start(ntdb); - /* Create the "group". */ - memcpy(gk.dptr, &i, sizeof(i)); - ecode = ntdb_store(ntdb, gk, d, NTDB_INSERT); - if (ecode != NTDB_SUCCESS) - errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode)); - - /* Now populate it. */ - for (j = 0; j < users; j++) { - /* Append to the user. */ - memcpy(k.dptr, &j, sizeof(j)); - if ((ecode = ntdb_append(ntdb, k, d)) != 0) - errx(1, "ntdb append failed: %s", - ntdb_errorstr(ecode)); - - /* Append to the group. */ - if ((ecode = ntdb_append(ntdb, gk, d)) != 0) - errx(1, "ntdb append failed: %s", - ntdb_errorstr(ecode)); - } - if ((ecode = ntdb_transaction_commit(ntdb)) != 0) - errx(1, "ntdb commit2 failed: %s", ntdb_errorstr(ecode)); - if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0) - errx(1, "ntdb_check failed after iteration %i!", i); - system(cmd); - } - - return 0; -} diff --git a/ccan/ntdb/tools/mkntdb.c b/ccan/ntdb/tools/mkntdb.c deleted file mode 100644 index e728987a..00000000 --- a/ccan/ntdb/tools/mkntdb.c +++ /dev/null @@ -1,29 +0,0 @@ -#include "ntdb.h" -#include -#include -#include -#include - -int main(int argc, char *argv[]) -{ - unsigned int i, num_recs; - struct ntdb_context *ntdb; - - if (argc != 3 || (num_recs = atoi(argv[2])) == 0) - errx(1, "Usage: mktdb "); - - ntdb = ntdb_open(argv[1], NTDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL); - if (!ntdb) - err(1, "Opening %s", argv[1]); - - for (i = 0; i < num_recs; i++) { - NTDB_DATA d; - - d.dptr = (void *)&i; - d.dsize = sizeof(i); - if (ntdb_store(ntdb, d, d, NTDB_INSERT) != 0) - err(1, "Failed to store record %i", i); - } - printf("Done\n"); - return 0; -} diff --git a/ccan/ntdb/tools/ntdbbackup.c b/ccan/ntdb/tools/ntdbbackup.c deleted file mode 100644 index c632f0ed..00000000 --- a/ccan/ntdb/tools/ntdbbackup.c +++ /dev/null @@ -1,340 +0,0 @@ -/* - Unix SMB/CIFS implementation. - low level ntdb backup and restore utility - Copyright (C) Andrew Tridgell 2002 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -/* - - This program is meant for backup/restore of ntdb databases. Typical usage would be: - tdbbackup *.ntdb - when Samba shuts down cleanly, which will make a backup of all the local databases - to *.bak files. Then on Samba startup you would use: - tdbbackup -v *.ntdb - and this will check the databases for corruption and if corruption is detected then - the backup will be restored. - - You may also like to do a backup on a regular basis while Samba is - running, perhaps using cron. - - The reason this program is needed is to cope with power failures - while Samba is running. A power failure could lead to database - corruption and Samba will then not start correctly. - - Note that many of the databases in Samba are transient and thus - don't need to be backed up, so you can optimise the above a little - by only running the backup on the critical databases. - - */ - -#include "config.h" -#include "ntdb.h" -#include "private.h" - -#ifdef HAVE_GETOPT_H -#include -#endif - -static int failed; - -static void ntdb_log(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) -{ - fprintf(stderr, "%s:%s\n", ntdb_errorstr(ecode), message); -} - -static char *add_suffix(const char *name, const char *suffix) -{ - char *ret; - int len = strlen(name) + strlen(suffix) + 1; - ret = (char *)malloc(len); - if (!ret) { - fprintf(stderr,"Out of memory!\n"); - exit(1); - } - snprintf(ret, len, "%s%s", name, suffix); - return ret; -} - -static int copy_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - struct ntdb_context *ntdb_new = (struct ntdb_context *)state; - enum NTDB_ERROR err; - - err = ntdb_store(ntdb_new, key, dbuf, NTDB_INSERT); - if (err) { - fprintf(stderr,"Failed to insert into %s: %s\n", - ntdb_name(ntdb_new), ntdb_errorstr(err)); - failed = 1; - return 1; - } - return 0; -} - - -static int test_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - return 0; -} - -/* - carefully backup a ntdb, validating the contents and - only doing the backup if its OK - this function is also used for restore -*/ -static int backup_ntdb(const char *old_name, const char *new_name) -{ - struct ntdb_context *ntdb; - struct ntdb_context *ntdb_new; - char *tmp_name; - struct stat st; - int count1, count2; - enum NTDB_ERROR err; - union ntdb_attribute log_attr; - - tmp_name = add_suffix(new_name, ".tmp"); - - /* stat the old ntdb to find its permissions */ - if (stat(old_name, &st) != 0) { - perror(old_name); - free(tmp_name); - return 1; - } - - log_attr.base.attr = NTDB_ATTRIBUTE_LOG; - log_attr.base.next = NULL; - log_attr.log.fn = ntdb_log; - - /* open the old ntdb */ - ntdb = ntdb_open(old_name, NTDB_DEFAULT, O_RDWR, 0, &log_attr); - if (!ntdb) { - printf("Failed to open %s\n", old_name); - free(tmp_name); - return 1; - } - - unlink(tmp_name); - ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, - O_RDWR|O_CREAT|O_EXCL, st.st_mode & 0777, - &log_attr); - if (!ntdb_new) { - perror(tmp_name); - free(tmp_name); - return 1; - } - - err = ntdb_transaction_start(ntdb); - if (err) { - fprintf(stderr, "Failed to start transaction on old ntdb: %s\n", - ntdb_errorstr(err)); - ntdb_close(ntdb); - ntdb_close(ntdb_new); - unlink(tmp_name); - free(tmp_name); - return 1; - } - - /* lock the backup ntdb so that nobody else can change it */ - err = ntdb_lockall(ntdb_new); - if (err) { - fprintf(stderr, "Failed to lock backup ntdb: %s\n", - ntdb_errorstr(err)); - ntdb_close(ntdb); - ntdb_close(ntdb_new); - unlink(tmp_name); - free(tmp_name); - return 1; - } - - failed = 0; - - /* traverse and copy */ - count1 = ntdb_traverse(ntdb, copy_fn, (void *)ntdb_new); - if (count1 < 0 || failed) { - fprintf(stderr,"failed to copy %s\n", old_name); - ntdb_close(ntdb); - ntdb_close(ntdb_new); - unlink(tmp_name); - free(tmp_name); - return 1; - } - - /* close the old ntdb */ - ntdb_close(ntdb); - - /* copy done, unlock the backup ntdb */ - ntdb_unlockall(ntdb_new); - -#ifdef HAVE_FDATASYNC - if (fdatasync(ntdb_fd(ntdb_new)) != 0) { -#else - if (fsync(ntdb_fd(ntdb_new)) != 0) { -#endif - /* not fatal */ - fprintf(stderr, "failed to fsync backup file\n"); - } - - /* close the new ntdb and re-open read-only */ - ntdb_close(ntdb_new); - - /* we don't need the hash attr any more */ - log_attr.base.next = NULL; - - ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, O_RDONLY, 0, &log_attr); - if (!ntdb_new) { - fprintf(stderr,"failed to reopen %s\n", tmp_name); - unlink(tmp_name); - perror(tmp_name); - free(tmp_name); - return 1; - } - - /* traverse the new ntdb to confirm */ - count2 = ntdb_traverse(ntdb_new, test_fn, NULL); - if (count2 != count1) { - fprintf(stderr,"failed to copy %s\n", old_name); - ntdb_close(ntdb_new); - unlink(tmp_name); - free(tmp_name); - return 1; - } - - /* close the new ntdb and rename it to .bak */ - ntdb_close(ntdb_new); - if (rename(tmp_name, new_name) != 0) { - perror(new_name); - free(tmp_name); - return 1; - } - - free(tmp_name); - - return 0; -} - -/* - verify a ntdb and if it is corrupt then restore from *.bak -*/ -static int verify_ntdb(const char *fname, const char *bak_name) -{ - struct ntdb_context *ntdb; - int count = -1; - union ntdb_attribute log_attr; - - log_attr.base.attr = NTDB_ATTRIBUTE_LOG; - log_attr.base.next = NULL; - log_attr.log.fn = ntdb_log; - - /* open the ntdb */ - ntdb = ntdb_open(fname, NTDB_DEFAULT, O_RDONLY, 0, &log_attr); - - /* traverse the ntdb, then close it */ - if (ntdb) { - count = ntdb_traverse(ntdb, test_fn, NULL); - ntdb_close(ntdb); - } - - /* count is < 0 means an error */ - if (count < 0) { - printf("restoring %s\n", fname); - return backup_ntdb(bak_name, fname); - } - - printf("%s : %d records\n", fname, count); - - return 0; -} - -/* - see if one file is newer than another -*/ -static int file_newer(const char *fname1, const char *fname2) -{ - struct stat st1, st2; - if (stat(fname1, &st1) != 0) { - return 0; - } - if (stat(fname2, &st2) != 0) { - return 1; - } - return (st1.st_mtime > st2.st_mtime); -} - -static void usage(void) -{ - printf("Usage: ntdbbackup [options] \n\n"); - printf(" -h this help message\n"); - printf(" -v verify mode (restore if corrupt)\n"); - printf(" -s suffix set the backup suffix\n"); - printf(" -v verify mode (restore if corrupt)\n"); -} - - - int main(int argc, char *argv[]) -{ - int i; - int ret = 0; - int c; - int verify = 0; - const char *suffix = ".bak"; - - while ((c = getopt(argc, argv, "vhs:")) != -1) { - switch (c) { - case 'h': - usage(); - exit(0); - case 'v': - verify = 1; - break; - case 's': - suffix = optarg; - break; - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - usage(); - exit(1); - } - - for (i=0; i. -*/ -#include "config.h" -#include "ntdb.h" -#include "private.h" - -static void print_data(NTDB_DATA d) -{ - unsigned char *p = (unsigned char *)d.dptr; - int len = d.dsize; - while (len--) { - if (isprint(*p) && !strchr("\"\\", *p)) { - fputc(*p, stdout); - } else { - printf("\\%02X", *p); - } - p++; - } -} - -static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - printf("{\n"); - printf("key(%d) = \"", (int)key.dsize); - print_data(key); - printf("\"\n"); - printf("data(%d) = \"", (int)dbuf.dsize); - print_data(dbuf); - printf("\"\n"); - printf("}\n"); - return 0; -} - -static int dump_ntdb(const char *fname, const char *keyname) -{ - struct ntdb_context *ntdb; - NTDB_DATA key, value; - - ntdb = ntdb_open(fname, 0, O_RDONLY, 0, NULL); - if (!ntdb) { - printf("Failed to open %s\n", fname); - return 1; - } - - if (!keyname) { - ntdb_traverse(ntdb, traverse_fn, NULL); - } else { - key = ntdb_mkdata(keyname, strlen(keyname)); - if (ntdb_fetch(ntdb, key, &value) != 0) { - return 1; - } else { - print_data(value); - free(value.dptr); - } - } - - return 0; -} - -static void usage( void) -{ - printf( "Usage: ntdbdump [options] \n\n"); - printf( " -h this help message\n"); - printf( " -k keyname dumps value of keyname\n"); -} - - int main(int argc, char *argv[]) -{ - char *fname, *keyname=NULL; - int c; - - if (argc < 2) { - printf("Usage: ntdbdump \n"); - exit(1); - } - - while ((c = getopt( argc, argv, "hk:")) != -1) { - switch (c) { - case 'h': - usage(); - exit( 0); - case 'k': - keyname = optarg; - break; - default: - usage(); - exit( 1); - } - } - - fname = argv[optind]; - - return dump_ntdb(fname, keyname); -} diff --git a/ccan/ntdb/tools/ntdbrestore.c b/ccan/ntdb/tools/ntdbrestore.c deleted file mode 100644 index 695af79c..00000000 --- a/ccan/ntdb/tools/ntdbrestore.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - ntdbrestore -- construct a ntdb from (n)tdbdump output. - Copyright (C) Rusty Russell 2012 - Copyright (C) Volker Lendecke 2010 - Copyright (C) Simon McVittie 2005 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "config.h" -#include "ntdb.h" -#include "private.h" -#include - -static int read_linehead(FILE *f) -{ - int i, c; - int num_bytes; - char prefix[128]; - - while (1) { - c = getc(f); - if (c == EOF) { - return -1; - } - if (c == '(') { - break; - } - } - for (i=0; idptr = (unsigned char *)malloc(size); - if (d->dptr == NULL) { - return -1; - } - d->dsize = size; - - for (i=0; idptr[i] = (low|high); - } else { - d->dptr[i] = c; - } - } - return 0; -} - -static int swallow(FILE *f, const char *s, int *eof) -{ - char line[128]; - - if (fgets(line, sizeof(line), f) == NULL) { - if (eof != NULL) { - *eof = 1; - } - return -1; - } - if (strcmp(line, s) != 0) { - return -1; - } - return 0; -} - -static bool read_rec(FILE *f, struct ntdb_context *ntdb, int *eof) -{ - int length; - NTDB_DATA key, data; - bool ret = false; - enum NTDB_ERROR e; - - key.dptr = NULL; - data.dptr = NULL; - - if (swallow(f, "{\n", eof) == -1) { - goto fail; - } - length = read_linehead(f); - if (length == -1) { - goto fail; - } - if (read_data(f, &key, length) == -1) { - goto fail; - } - if (swallow(f, "\"\n", NULL) == -1) { - goto fail; - } - length = read_linehead(f); - if (length == -1) { - goto fail; - } - if (read_data(f, &data, length) == -1) { - goto fail; - } - if ((swallow(f, "\"\n", NULL) == -1) - || (swallow(f, "}\n", NULL) == -1)) { - goto fail; - } - e = ntdb_store(ntdb, key, data, NTDB_INSERT); - if (e != NTDB_SUCCESS) { - fprintf(stderr, "NTDB error: %s\n", ntdb_errorstr(e)); - goto fail; - } - - ret = true; -fail: - free(key.dptr); - free(data.dptr); - return ret; -} - -static int restore_ntdb(const char *fname, unsigned int hsize) -{ - struct ntdb_context *ntdb; - union ntdb_attribute hashsize; - - hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE; - hashsize.base.next = NULL; - hashsize.hashsize.size = hsize; - - ntdb = ntdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, - hsize ? &hashsize : NULL); - if (!ntdb) { - perror("ntdb_open"); - fprintf(stderr, "Failed to open %s\n", fname); - return 1; - } - - while (1) { - int eof = 0; - if (!read_rec(stdin, ntdb, &eof)) { - if (eof) { - break; - } - return 1; - } - } - if (ntdb_close(ntdb)) { - fprintf(stderr, "Error closing ntdb\n"); - return 1; - } - fprintf(stderr, "EOF\n"); - return 0; -} - -int main(int argc, char *argv[]) -{ - unsigned int hsize = 0; - const char *execname = argv[0]; - - if (argv[1] && strcmp(argv[1], "-h") == 0) { - if (argv[2]) { - hsize = atoi(argv[2]); - } - if (hsize == 0) { - fprintf(stderr, "-h requires a integer value" - " (eg. 128 or 131072)\n"); - exit(1); - } - argv += 2; - argc -= 2; - } - if (argc != 2) { - printf("Usage: %s [-h ] dbname < tdbdump_output\n", - execname); - exit(1); - } - - - return restore_ntdb(argv[1], hsize); -} diff --git a/ccan/ntdb/tools/ntdbtool.c b/ccan/ntdb/tools/ntdbtool.c deleted file mode 100644 index 144cd92f..00000000 --- a/ccan/ntdb/tools/ntdbtool.c +++ /dev/null @@ -1,794 +0,0 @@ -/* - Unix SMB/CIFS implementation. - Samba database functions - Copyright (C) Andrew Tridgell 1999-2000 - Copyright (C) Paul `Rusty' Russell 2000 - Copyright (C) Jeremy Allison 2000 - Copyright (C) Andrew Esh 2001 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "config.h" -#include "ntdb.h" -#include "private.h" - -static int do_command(void); -const char *cmdname; -char *arg1, *arg2; -size_t arg1len, arg2len; -int bIterate = 0; -char *line; -NTDB_DATA iterate_kbuf; -char cmdline[1024]; -static int disable_mmap; - -enum commands { - CMD_CREATE_NTDB, - CMD_OPEN_NTDB, - CMD_TRANSACTION_START, - CMD_TRANSACTION_COMMIT, - CMD_TRANSACTION_CANCEL, - CMD_ERASE, - CMD_DUMP, - CMD_INSERT, - CMD_MOVE, - CMD_STORE, - CMD_SHOW, - CMD_KEYS, - CMD_HEXKEYS, - CMD_DELETE, -#if 0 - CMD_LIST_HASH_FREE, - CMD_LIST_FREE, -#endif - CMD_INFO, - CMD_MMAP, - CMD_SPEED, - CMD_FIRST, - CMD_NEXT, - CMD_SYSTEM, - CMD_CHECK, - CMD_QUIT, - CMD_HELP -}; - -typedef struct { - const char *name; - enum commands cmd; -} COMMAND_TABLE; - -COMMAND_TABLE cmd_table[] = { - {"create", CMD_CREATE_NTDB}, - {"open", CMD_OPEN_NTDB}, -#if 0 - {"transaction_start", CMD_TRANSACTION_START}, - {"transaction_commit", CMD_TRANSACTION_COMMIT}, - {"transaction_cancel", CMD_TRANSACTION_CANCEL}, -#endif - {"erase", CMD_ERASE}, - {"dump", CMD_DUMP}, - {"insert", CMD_INSERT}, - {"move", CMD_MOVE}, - {"store", CMD_STORE}, - {"show", CMD_SHOW}, - {"keys", CMD_KEYS}, - {"hexkeys", CMD_HEXKEYS}, - {"delete", CMD_DELETE}, -#if 0 - {"list", CMD_LIST_HASH_FREE}, - {"free", CMD_LIST_FREE}, -#endif - {"info", CMD_INFO}, - {"speed", CMD_SPEED}, - {"mmap", CMD_MMAP}, - {"first", CMD_FIRST}, - {"1", CMD_FIRST}, - {"next", CMD_NEXT}, - {"n", CMD_NEXT}, - {"check", CMD_CHECK}, - {"quit", CMD_QUIT}, - {"q", CMD_QUIT}, - {"!", CMD_SYSTEM}, - {NULL, CMD_HELP} -}; - -struct timeval tp1,tp2; - -static void _start_timer(void) -{ - gettimeofday(&tp1,NULL); -} - -static double _end_timer(void) -{ - gettimeofday(&tp2,NULL); - return((tp2.tv_sec - tp1.tv_sec) + - (tp2.tv_usec - tp1.tv_usec)*1.0e-6); -} - -static void ntdb_log(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) -{ - fprintf(stderr, "ntdb:%s:%s:%s\n", - ntdb_name(ntdb), ntdb_errorstr(ecode), message); -} - -/* a ntdb tool for manipulating a ntdb database */ - -static struct ntdb_context *ntdb; - -static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state); -static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state); -static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state); - -static void print_asc(const char *buf,int len) -{ - int i; - - /* We're probably printing ASCII strings so don't try to display - the trailing NULL character. */ - - if (buf[len - 1] == 0) - len--; - - for (i=0;i8) printf(" "); - while (n--) printf(" "); - - n = i%16; - if (n > 8) n = 8; - print_asc(&buf[i-(i%16)],n); printf(" "); - n = (i%16) - n; - if (n>0) print_asc(&buf[i-n],n); - printf("\n"); - } -} - -static void help(void) -{ - printf("\n" -"tdbtool: \n" -" create dbname : create a database\n" -" open dbname : open an existing database\n" -" openjh dbname : open an existing database (jenkins hash)\n" -" transaction_start : start a transaction\n" -" transaction_commit : commit a transaction\n" -" transaction_cancel : cancel a transaction\n" -" erase : erase the database\n" -" dump : dump the database as strings\n" -" keys : dump the database keys as strings\n" -" hexkeys : dump the database keys as hex values\n" -" info : print summary info about the database\n" -" insert key data : insert a record\n" -" move key file : move a record to a destination ntdb\n" -" store key data : store a record (replace)\n" -" show key : show a record by key\n" -" delete key : delete a record by key\n" -#if 0 -" list : print the database hash table and freelist\n" -" free : print the database freelist\n" -#endif -" check : check the integrity of an opened database\n" -" speed : perform speed tests on the database\n" -" ! command : execute system command\n" -" 1 | first : print the first record\n" -" n | next : print the next record\n" -" q | quit : terminate\n" -" \\n : repeat 'next' command\n" -"\n"); -} - -static void terror(enum NTDB_ERROR err, const char *why) -{ - if (err != NTDB_SUCCESS) - printf("%s:%s\n", ntdb_errorstr(err), why); - else - printf("%s\n", why); -} - -static void create_ntdb(const char *tdbname) -{ - union ntdb_attribute log_attr; - log_attr.base.attr = NTDB_ATTRIBUTE_LOG; - log_attr.base.next = NULL; - log_attr.log.fn = ntdb_log; - - if (ntdb) ntdb_close(ntdb); - ntdb = ntdb_open(tdbname, (disable_mmap?NTDB_NOMMAP:0), - O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr); - if (!ntdb) { - printf("Could not create %s: %s\n", tdbname, strerror(errno)); - } -} - -static void open_ntdb(const char *tdbname) -{ - union ntdb_attribute log_attr; - log_attr.base.attr = NTDB_ATTRIBUTE_LOG; - log_attr.base.next = NULL; - log_attr.log.fn = ntdb_log; - - if (ntdb) ntdb_close(ntdb); - ntdb = ntdb_open(tdbname, disable_mmap?NTDB_NOMMAP:0, O_RDWR, 0600, - &log_attr); - if (!ntdb) { - printf("Could not open %s: %s\n", tdbname, strerror(errno)); - } -} - -static void insert_ntdb(char *keyname, size_t keylen, char* data, size_t datalen) -{ - NTDB_DATA key, dbuf; - enum NTDB_ERROR ecode; - - if ((keyname == NULL) || (keylen == 0)) { - terror(NTDB_SUCCESS, "need key"); - return; - } - - key.dptr = (unsigned char *)keyname; - key.dsize = keylen; - dbuf.dptr = (unsigned char *)data; - dbuf.dsize = datalen; - - ecode = ntdb_store(ntdb, key, dbuf, NTDB_INSERT); - if (ecode) { - terror(ecode, "insert failed"); - } -} - -static void store_ntdb(char *keyname, size_t keylen, char* data, size_t datalen) -{ - NTDB_DATA key, dbuf; - enum NTDB_ERROR ecode; - - if ((keyname == NULL) || (keylen == 0)) { - terror(NTDB_SUCCESS, "need key"); - return; - } - - if ((data == NULL) || (datalen == 0)) { - terror(NTDB_SUCCESS, "need data"); - return; - } - - key.dptr = (unsigned char *)keyname; - key.dsize = keylen; - dbuf.dptr = (unsigned char *)data; - dbuf.dsize = datalen; - - printf("Storing key:\n"); - print_rec(ntdb, key, dbuf, NULL); - - ecode = ntdb_store(ntdb, key, dbuf, NTDB_REPLACE); - if (ecode) { - terror(ecode, "store failed"); - } -} - -static void show_ntdb(char *keyname, size_t keylen) -{ - NTDB_DATA key, dbuf; - enum NTDB_ERROR ecode; - - if ((keyname == NULL) || (keylen == 0)) { - terror(NTDB_SUCCESS, "need key"); - return; - } - - key.dptr = (unsigned char *)keyname; - key.dsize = keylen; - - ecode = ntdb_fetch(ntdb, key, &dbuf); - if (ecode) { - terror(ecode, "fetch failed"); - return; - } - - print_rec(ntdb, key, dbuf, NULL); - - free( dbuf.dptr ); -} - -static void delete_ntdb(char *keyname, size_t keylen) -{ - NTDB_DATA key; - enum NTDB_ERROR ecode; - - if ((keyname == NULL) || (keylen == 0)) { - terror(NTDB_SUCCESS, "need key"); - return; - } - - key.dptr = (unsigned char *)keyname; - key.dsize = keylen; - - ecode = ntdb_delete(ntdb, key); - if (ecode) { - terror(ecode, "delete failed"); - } -} - -static void move_rec(char *keyname, size_t keylen, char* tdbname) -{ - NTDB_DATA key, dbuf; - struct ntdb_context *dst_ntdb; - enum NTDB_ERROR ecode; - - if ((keyname == NULL) || (keylen == 0)) { - terror(NTDB_SUCCESS, "need key"); - return; - } - - if ( !tdbname ) { - terror(NTDB_SUCCESS, "need destination ntdb name"); - return; - } - - key.dptr = (unsigned char *)keyname; - key.dsize = keylen; - - ecode = ntdb_fetch(ntdb, key, &dbuf); - if (ecode) { - terror(ecode, "fetch failed"); - return; - } - - print_rec(ntdb, key, dbuf, NULL); - - dst_ntdb = ntdb_open(tdbname, 0, O_RDWR, 0600, NULL); - if ( !dst_ntdb ) { - terror(NTDB_SUCCESS, "unable to open destination ntdb"); - return; - } - - ecode = ntdb_store( dst_ntdb, key, dbuf, NTDB_REPLACE); - if (ecode) - terror(ecode, "failed to move record"); - else - printf("record moved\n"); - - ntdb_close( dst_ntdb ); -} - -static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - printf("\nkey %d bytes\n", (int)key.dsize); - print_asc((const char *)key.dptr, key.dsize); - printf("\ndata %d bytes\n", (int)dbuf.dsize); - print_data((const char *)dbuf.dptr, dbuf.dsize); - return 0; -} - -static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - printf("key %d bytes: ", (int)key.dsize); - print_asc((const char *)key.dptr, key.dsize); - printf("\n"); - return 0; -} - -static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - printf("key %d bytes\n", (int)key.dsize); - print_data((const char *)key.dptr, key.dsize); - printf("\n"); - return 0; -} - -static int total_bytes; - -static int traverse_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) -{ - total_bytes += dbuf.dsize; - return 0; -} - -static void info_ntdb(void) -{ - enum NTDB_ERROR ecode; - char *summary; - - ecode = ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &summary); - - if (ecode) { - terror(ecode, "Getting summary"); - } else { - printf("%s", summary); - free(summary); - } -} - -static void speed_ntdb(const char *tlimit) -{ - unsigned timelimit = tlimit?atoi(tlimit):0; - double t; - int ops; - if (timelimit == 0) timelimit = 5; - - ops = 0; - printf("Testing store speed for %u seconds\n", timelimit); - _start_timer(); - do { - long int r = random(); - NTDB_DATA key, dbuf; - key = ntdb_mkdata("store test", strlen("store test")); - dbuf.dptr = (unsigned char *)&r; - dbuf.dsize = sizeof(r); - ntdb_store(ntdb, key, dbuf, NTDB_REPLACE); - t = _end_timer(); - ops++; - } while (t < timelimit); - printf("%10.3f ops/sec\n", ops/t); - - ops = 0; - printf("Testing fetch speed for %u seconds\n", timelimit); - _start_timer(); - do { - long int r = random(); - NTDB_DATA key, dbuf; - key = ntdb_mkdata("store test", strlen("store test")); - dbuf.dptr = (unsigned char *)&r; - dbuf.dsize = sizeof(r); - ntdb_fetch(ntdb, key, &dbuf); - t = _end_timer(); - ops++; - } while (t < timelimit); - printf("%10.3f ops/sec\n", ops/t); - - ops = 0; - printf("Testing transaction speed for %u seconds\n", timelimit); - _start_timer(); - do { - long int r = random(); - NTDB_DATA key, dbuf; - key = ntdb_mkdata("transaction test", strlen("transaction test")); - dbuf.dptr = (unsigned char *)&r; - dbuf.dsize = sizeof(r); - ntdb_transaction_start(ntdb); - ntdb_store(ntdb, key, dbuf, NTDB_REPLACE); - ntdb_transaction_commit(ntdb); - t = _end_timer(); - ops++; - } while (t < timelimit); - printf("%10.3f ops/sec\n", ops/t); - - ops = 0; - printf("Testing traverse speed for %u seconds\n", timelimit); - _start_timer(); - do { - ntdb_traverse(ntdb, traverse_fn, NULL); - t = _end_timer(); - ops++; - } while (t < timelimit); - printf("%10.3f ops/sec\n", ops/t); -} - -static void toggle_mmap(void) -{ - disable_mmap = !disable_mmap; - if (disable_mmap) { - printf("mmap is disabled\n"); - } else { - printf("mmap is enabled\n"); - } -} - -static char *ntdb_getline(const char *prompt) -{ - static char thisline[1024]; - char *p; - fputs(prompt, stdout); - thisline[0] = 0; - p = fgets(thisline, sizeof(thisline)-1, stdin); - if (p) p = strchr(p, '\n'); - if (p) *p = 0; - return p?thisline:NULL; -} - -static int do_delete_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, - void *state) -{ - return ntdb_delete(the_ntdb, key); -} - -static void first_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey) -{ - NTDB_DATA dbuf; - enum NTDB_ERROR ecode; - ecode = ntdb_firstkey(the_ntdb, pkey); - if (!ecode) - ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf); - if (ecode) terror(ecode, "fetch failed"); - else { - print_rec(the_ntdb, *pkey, dbuf, NULL); - } -} - -static void next_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey) -{ - NTDB_DATA dbuf; - enum NTDB_ERROR ecode; - ecode = ntdb_nextkey(the_ntdb, pkey); - - if (!ecode) - ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf); - if (ecode) - terror(ecode, "fetch failed"); - else - print_rec(the_ntdb, *pkey, dbuf, NULL); -} - -static void check_db(struct ntdb_context *the_ntdb) -{ - if (!the_ntdb) { - printf("Error: No database opened!\n"); - } else { - if (ntdb_check(the_ntdb, NULL, NULL) != 0) - printf("Integrity check for the opened database failed.\n"); - else - printf("Database integrity is OK.\n"); - } -} - -static int do_command(void) -{ - COMMAND_TABLE *ctp = cmd_table; - enum commands mycmd = CMD_HELP; - int cmd_len; - - if (cmdname && strlen(cmdname) == 0) { - mycmd = CMD_NEXT; - } else { - while (ctp->name) { - cmd_len = strlen(ctp->name); - if (strncmp(ctp->name,cmdname,cmd_len) == 0) { - mycmd = ctp->cmd; - break; - } - ctp++; - } - } - - switch (mycmd) { - case CMD_CREATE_NTDB: - bIterate = 0; - create_ntdb(arg1); - return 0; - case CMD_OPEN_NTDB: - bIterate = 0; - open_ntdb(arg1); - return 0; - case CMD_SYSTEM: - /* Shell command */ - if (system(arg1) == -1) { - terror(NTDB_SUCCESS, "system() call failed\n"); - } - return 0; - case CMD_QUIT: - return 1; - default: - /* all the rest require a open database */ - if (!ntdb) { - bIterate = 0; - terror(NTDB_SUCCESS, "database not open"); - help(); - return 0; - } - switch (mycmd) { - case CMD_TRANSACTION_START: - bIterate = 0; - ntdb_transaction_start(ntdb); - return 0; - case CMD_TRANSACTION_COMMIT: - bIterate = 0; - ntdb_transaction_commit(ntdb); - return 0; - case CMD_TRANSACTION_CANCEL: - bIterate = 0; - ntdb_transaction_cancel(ntdb); - return 0; - case CMD_ERASE: - bIterate = 0; - ntdb_traverse(ntdb, do_delete_fn, NULL); - return 0; - case CMD_DUMP: - bIterate = 0; - ntdb_traverse(ntdb, print_rec, NULL); - return 0; - case CMD_INSERT: - bIterate = 0; - insert_ntdb(arg1, arg1len,arg2,arg2len); - return 0; - case CMD_MOVE: - bIterate = 0; - move_rec(arg1,arg1len,arg2); - return 0; - case CMD_STORE: - bIterate = 0; - store_ntdb(arg1,arg1len,arg2,arg2len); - return 0; - case CMD_SHOW: - bIterate = 0; - show_ntdb(arg1, arg1len); - return 0; - case CMD_KEYS: - ntdb_traverse(ntdb, print_key, NULL); - return 0; - case CMD_HEXKEYS: - ntdb_traverse(ntdb, print_hexkey, NULL); - return 0; - case CMD_DELETE: - bIterate = 0; - delete_ntdb(arg1,arg1len); - return 0; -#if 0 - case CMD_LIST_HASH_FREE: - ntdb_dump_all(ntdb); - return 0; - case CMD_LIST_FREE: - ntdb_printfreelist(ntdb); - return 0; -#endif - case CMD_INFO: - info_ntdb(); - return 0; - case CMD_SPEED: - speed_ntdb(arg1); - return 0; - case CMD_MMAP: - toggle_mmap(); - return 0; - case CMD_FIRST: - bIterate = 1; - first_record(ntdb, &iterate_kbuf); - return 0; - case CMD_NEXT: - if (bIterate) - next_record(ntdb, &iterate_kbuf); - return 0; - case CMD_CHECK: - check_db(ntdb); - return 0; - case CMD_HELP: - help(); - return 0; - case CMD_CREATE_NTDB: - case CMD_OPEN_NTDB: - case CMD_SYSTEM: - case CMD_QUIT: - /* - * unhandled commands. cases included here to avoid compiler - * warnings. - */ - return 0; - } - } - - return 0; -} - -static char *convert_string(char *instring, size_t *sizep) -{ - size_t length = 0; - char *outp, *inp; - char temp[3]; - - outp = inp = instring; - - while (*inp) { - if (*inp == '\\') { - inp++; - if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) { - temp[0] = *inp++; - temp[1] = '\0'; - if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) { - temp[1] = *inp++; - temp[2] = '\0'; - } - *outp++ = (char)strtol((const char *)temp,NULL,16); - } else { - *outp++ = *inp++; - } - } else { - *outp++ = *inp++; - } - length++; - } - *sizep = length; - return instring; -} - -int main(int argc, char *argv[]) -{ - cmdname = ""; - arg1 = NULL; - arg1len = 0; - arg2 = NULL; - arg2len = 0; - - if (argv[1]) { - cmdname = "open"; - arg1 = argv[1]; - do_command(); - cmdname = ""; - arg1 = NULL; - } - - switch (argc) { - case 1: - case 2: - /* Interactive mode */ - while ((cmdname = ntdb_getline("ntdb> "))) { - arg2 = arg1 = NULL; - if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) { - arg1++; - arg2 = arg1; - while (*arg2) { - if (*arg2 == ' ') { - *arg2++ = '\0'; - break; - } - if ((*arg2++ == '\\') && (*arg2 == ' ')) { - arg2++; - } - } - } - if (arg1) arg1 = convert_string(arg1,&arg1len); - if (arg2) arg2 = convert_string(arg2,&arg2len); - if (do_command()) break; - } - break; - case 5: - arg2 = convert_string(argv[4],&arg2len); - case 4: - arg1 = convert_string(argv[3],&arg1len); - case 3: - cmdname = argv[2]; - default: - do_command(); - break; - } - - if (ntdb) ntdb_close(ntdb); - - return 0; -} diff --git a/ccan/ntdb/tools/ntdbtorture.c b/ccan/ntdb/tools/ntdbtorture.c deleted file mode 100644 index 9fd25ca7..00000000 --- a/ccan/ntdb/tools/ntdbtorture.c +++ /dev/null @@ -1,535 +0,0 @@ -/* this tests ntdb by doing lots of ops from several simultaneous - writers - that stresses the locking code. -*/ - -#include "config.h" -#include "ntdb.h" -#include "private.h" -#include - -//#define REOPEN_PROB 30 -#define DELETE_PROB 8 -#define STORE_PROB 4 -#define APPEND_PROB 6 -#define TRANSACTION_PROB 10 -#define TRANSACTION_PREPARE_PROB 2 -#define LOCKSTORE_PROB 5 -#define TRAVERSE_PROB 20 -#define TRAVERSE_MOD_PROB 100 -#define TRAVERSE_ABORT_PROB 500 -#define CULL_PROB 100 -#define KEYLEN 3 -#define DATALEN 100 - -static struct ntdb_context *db; -static int in_transaction; -static int in_traverse; -static int error_count; -#if TRANSACTION_PROB -static int always_transaction = 0; -#endif -static int loopnum; -static int count_pipe; -static union ntdb_attribute log_attr; -static union ntdb_attribute seed_attr; -static union ntdb_attribute hsize_attr; - -static void ntdb_log(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) -{ - printf("ntdb:%s:%s:%s\n", - ntdb_name(ntdb), ntdb_errorstr(ecode), message); - fflush(stdout); -#if 0 - { - char str[200]; - signal(SIGUSR1, SIG_IGN); - sprintf(str,"xterm -e gdb /proc/%u/exe %u", (unsigned int)getpid(), (unsigned int)getpid()); - system(str); - } -#endif -} - -#include "../private.h" - -static void segv_handler(int sig, siginfo_t *info, void *p) -{ - char string[100]; - - sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n", - (unsigned int)getpid(), info->si_addr, db->file->map_ptr, - (size_t)db->file->map_size); - if (write(2, string, strlen(string)) > 0) - sleep(60); - _exit(11); -} - -static void warn_on_err(enum NTDB_ERROR e, struct ntdb_context *ntdb, - const char *why) -{ - if (e != NTDB_SUCCESS) { - fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), why, - ntdb ? ntdb_errorstr(e) : "(no ntdb)"); - error_count++; - } -} - -static char *randbuf(int len) -{ - char *buf; - int i; - buf = (char *)malloc(len+1); - if (buf == NULL) { - perror("randbuf: unable to allocate memory for buffer.\n"); - exit(1); - } - - for (i=0;i -#include -#include -#include -#include -#include -#include -#include -#include -#include "ntdb.h" - -/* Nanoseconds per operation */ -static size_t normalize(const struct timeval *start, - const struct timeval *stop, - unsigned int num) -{ - struct timeval diff; - - timersub(stop, start, &diff); - - /* Floating point is more accurate here. */ - return (double)(diff.tv_sec * 1000000 + diff.tv_usec) - / num * 1000; -} - -static size_t file_size(void) -{ - struct stat st; - - if (stat("/tmp/speed.ntdb", &st) != 0) - return -1; - return st.st_size; -} - -static int count_record(struct ntdb_context *ntdb, - NTDB_DATA key, NTDB_DATA data, void *p) -{ - int *total = p; - *total += *(int *)data.dptr; - return 0; -} - -static void dump_and_clear_stats(struct ntdb_context **ntdb, - int flags, - union ntdb_attribute *attr) -{ - union ntdb_attribute stats; - enum NTDB_ERROR ecode; - - stats.base.attr = NTDB_ATTRIBUTE_STATS; - stats.stats.size = sizeof(stats.stats); - ecode = ntdb_get_attribute(*ntdb, &stats); - if (ecode != NTDB_SUCCESS) - errx(1, "Getting stats: %s", ntdb_errorstr(ecode)); - - printf("allocs = %llu\n", - (unsigned long long)stats.stats.allocs); - printf(" alloc_subhash = %llu\n", - (unsigned long long)stats.stats.alloc_subhash); - printf(" alloc_chain = %llu\n", - (unsigned long long)stats.stats.alloc_chain); - printf(" alloc_bucket_exact = %llu\n", - (unsigned long long)stats.stats.alloc_bucket_exact); - printf(" alloc_bucket_max = %llu\n", - (unsigned long long)stats.stats.alloc_bucket_max); - printf(" alloc_leftover = %llu\n", - (unsigned long long)stats.stats.alloc_leftover); - printf(" alloc_coalesce_tried = %llu\n", - (unsigned long long)stats.stats.alloc_coalesce_tried); - printf(" alloc_coalesce_iterate_clash = %llu\n", - (unsigned long long)stats.stats.alloc_coalesce_iterate_clash); - printf(" alloc_coalesce_lockfail = %llu\n", - (unsigned long long)stats.stats.alloc_coalesce_lockfail); - printf(" alloc_coalesce_race = %llu\n", - (unsigned long long)stats.stats.alloc_coalesce_race); - printf(" alloc_coalesce_succeeded = %llu\n", - (unsigned long long)stats.stats.alloc_coalesce_succeeded); - printf(" alloc_coalesce_num_merged = %llu\n", - (unsigned long long)stats.stats.alloc_coalesce_num_merged); - printf("compares = %llu\n", - (unsigned long long)stats.stats.compares); - printf(" compare_wrong_offsetbits = %llu\n", - (unsigned long long)stats.stats.compare_wrong_offsetbits); - printf(" compare_wrong_keylen = %llu\n", - (unsigned long long)stats.stats.compare_wrong_keylen); - printf(" compare_wrong_rechash = %llu\n", - (unsigned long long)stats.stats.compare_wrong_rechash); - printf(" compare_wrong_keycmp = %llu\n", - (unsigned long long)stats.stats.compare_wrong_keycmp); - printf("transactions = %llu\n", - (unsigned long long)stats.stats.transactions); - printf(" transaction_cancel = %llu\n", - (unsigned long long)stats.stats.transaction_cancel); - printf(" transaction_nest = %llu\n", - (unsigned long long)stats.stats.transaction_nest); - printf(" transaction_expand_file = %llu\n", - (unsigned long long)stats.stats.transaction_expand_file); - printf(" transaction_read_direct = %llu\n", - (unsigned long long)stats.stats.transaction_read_direct); - printf(" transaction_read_direct_fail = %llu\n", - (unsigned long long)stats.stats.transaction_read_direct_fail); - printf(" transaction_write_direct = %llu\n", - (unsigned long long)stats.stats.transaction_write_direct); - printf(" transaction_write_direct_fail = %llu\n", - (unsigned long long)stats.stats.transaction_write_direct_fail); - printf("expands = %llu\n", - (unsigned long long)stats.stats.expands); - printf("frees = %llu\n", - (unsigned long long)stats.stats.frees); - printf("locks = %llu\n", - (unsigned long long)stats.stats.locks); - printf(" lock_lowlevel = %llu\n", - (unsigned long long)stats.stats.lock_lowlevel); - printf(" lock_nonblock = %llu\n", - (unsigned long long)stats.stats.lock_nonblock); - printf(" lock_nonblock_fail = %llu\n", - (unsigned long long)stats.stats.lock_nonblock_fail); - - /* Now clear. */ - ntdb_close(*ntdb); - *ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR, 0, attr); -} - -static void ntdb_log(struct ntdb_context *ntdb, - enum ntdb_log_level level, - enum NTDB_ERROR ecode, - const char *message, - void *data) -{ - fprintf(stderr, "ntdb:%s:%s:%s\n", - ntdb_name(ntdb), ntdb_errorstr(ecode), message); -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j, num = 1000, stage = 0, stopat = -1; - int flags = NTDB_DEFAULT; - bool transaction = false, summary = false; - NTDB_DATA key, data; - struct ntdb_context *ntdb; - struct timeval start, stop; - union ntdb_attribute seed, log; - bool do_stats = false; - enum NTDB_ERROR ecode; - - /* Try to keep benchmarks even. */ - seed.base.attr = NTDB_ATTRIBUTE_SEED; - seed.base.next = NULL; - seed.seed.seed = 0; - - log.base.attr = NTDB_ATTRIBUTE_LOG; - log.base.next = &seed; - log.log.fn = ntdb_log; - - if (argv[1] && strcmp(argv[1], "--internal") == 0) { - flags = NTDB_INTERNAL; - argc--; - argv++; - } - if (argv[1] && strcmp(argv[1], "--transaction") == 0) { - transaction = true; - argc--; - argv++; - } - if (argv[1] && strcmp(argv[1], "--no-sync") == 0) { - flags |= NTDB_NOSYNC; - argc--; - argv++; - } - if (argv[1] && strcmp(argv[1], "--summary") == 0) { - summary = true; - argc--; - argv++; - } - if (argv[1] && strcmp(argv[1], "--stats") == 0) { - do_stats = true; - argc--; - argv++; - } - - ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR|O_CREAT|O_TRUNC, - 0600, &log); - if (!ntdb) - err(1, "Opening /tmp/speed.ntdb"); - - key.dptr = (void *)&i; - key.dsize = sizeof(i); - data = key; - - if (argv[1]) { - num = atoi(argv[1]); - argv++; - argc--; - } - - if (argv[1]) { - stopat = atoi(argv[1]); - argv++; - argc--; - } - - /* Add 1000 records. */ - printf("Adding %u records: ", num); fflush(stdout); - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - gettimeofday(&start, NULL); - for (i = 0; i < num; i++) - if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0) - errx(1, "Inserting key %u in ntdb: %s", - i, ntdb_errorstr(ecode)); - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - - if (++stage == stopat) - exit(0); - - /* Finding 1000 records. */ - printf("Finding %u records: ", num); fflush(stdout); - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - gettimeofday(&start, NULL); - for (i = 0; i < num; i++) { - NTDB_DATA dbuf; - if ((ecode = ntdb_fetch(ntdb, key, &dbuf)) != NTDB_SUCCESS - || *(int *)dbuf.dptr != i) { - errx(1, "Fetching key %u in ntdb gave %u", - i, ecode ? ecode : *(int *)dbuf.dptr); - } - } - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - if (++stage == stopat) - exit(0); - - /* Missing 1000 records. */ - printf("Missing %u records: ", num); fflush(stdout); - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - gettimeofday(&start, NULL); - for (i = num; i < num*2; i++) { - NTDB_DATA dbuf; - ecode = ntdb_fetch(ntdb, key, &dbuf); - if (ecode != NTDB_ERR_NOEXIST) - errx(1, "Fetching key %u in ntdb gave %s", - i, ntdb_errorstr(ecode)); - } - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - if (++stage == stopat) - exit(0); - - /* Traverse 1000 records. */ - printf("Traversing %u records: ", num); fflush(stdout); - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - i = 0; - gettimeofday(&start, NULL); - if (ntdb_traverse(ntdb, count_record, &i) != num) - errx(1, "Traverse returned wrong number of records"); - if (i != (num - 1) * (num / 2)) - errx(1, "Traverse tallied to %u", i); - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - if (++stage == stopat) - exit(0); - - /* Delete 1000 records (not in order). */ - printf("Deleting %u records: ", num); fflush(stdout); - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - gettimeofday(&start, NULL); - for (j = 0; j < num; j++) { - i = (j + 100003) % num; - if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS) - errx(1, "Deleting key %u in ntdb: %s", - i, ntdb_errorstr(ecode)); - } - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - if (++stage == stopat) - exit(0); - - /* Re-add 1000 records (not in order). */ - printf("Re-adding %u records: ", num); fflush(stdout); - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - gettimeofday(&start, NULL); - for (j = 0; j < num; j++) { - i = (j + 100003) % num; - if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0) - errx(1, "Inserting key %u in ntdb: %s", - i, ntdb_errorstr(ecode)); - } - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - if (++stage == stopat) - exit(0); - - /* Append 1000 records. */ - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - printf("Appending %u records: ", num); fflush(stdout); - gettimeofday(&start, NULL); - for (i = 0; i < num; i++) - if ((ecode = ntdb_append(ntdb, key, data)) != NTDB_SUCCESS) - errx(1, "Appending key %u in ntdb: %s", - i, ntdb_errorstr(ecode)); - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (++stage == stopat) - exit(0); - - /* Churn 1000 records: not in order! */ - if (transaction && (ecode = ntdb_transaction_start(ntdb))) - errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); - printf("Churning %u records: ", num); fflush(stdout); - gettimeofday(&start, NULL); - for (j = 0; j < num; j++) { - i = (j + 1000019) % num; - if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS) - errx(1, "Deleting key %u in ntdb: %s", - i, ntdb_errorstr(ecode)); - i += num; - if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0) - errx(1, "Inserting key %u in ntdb: %s", - i, ntdb_errorstr(ecode)); - } - gettimeofday(&stop, NULL); - if (transaction && (ecode = ntdb_transaction_commit(ntdb))) - errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); - printf(" %zu ns (%zu bytes)\n", - normalize(&start, &stop, num), file_size()); - - if (ntdb_check(ntdb, NULL, NULL)) - errx(1, "ntdb_check failed!"); - if (summary) { - char *sumstr = NULL; - ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); - printf("%s\n", sumstr); - free(sumstr); - } - if (do_stats) - dump_and_clear_stats(&ntdb, flags, &log); - if (++stage == stopat) - exit(0); - - return 0; -} diff --git a/ccan/ntdb/transaction.c b/ccan/ntdb/transaction.c deleted file mode 100644 index f2762166..00000000 --- a/ccan/ntdb/transaction.c +++ /dev/null @@ -1,1317 +0,0 @@ - /* - Unix SMB/CIFS implementation. - - trivial database library - - Copyright (C) Andrew Tridgell 2005 - Copyright (C) Rusty Russell 2010 - - ** NOTE! The following LGPL license applies to the ntdb - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ - -#include "private.h" -#include -#define SAFE_FREE(ntdb, x) do { if ((x) != NULL) {ntdb->free_fn((void *)x, ntdb->alloc_data); (x)=NULL;} } while(0) - -/* - transaction design: - - - only allow a single transaction at a time per database. This makes - using the transaction API simpler, as otherwise the caller would - have to cope with temporary failures in transactions that conflict - with other current transactions - - - keep the transaction recovery information in the same file as the - database, using a special 'transaction recovery' record pointed at - by the header. This removes the need for extra journal files as - used by some other databases - - - dynamically allocated the transaction recover record, re-using it - for subsequent transactions. If a larger record is needed then - ntdb_free() the old record to place it on the normal ntdb freelist - before allocating the new record - - - during transactions, keep a linked list of writes all that have - been performed by intercepting all ntdb_write() calls. The hooked - transaction versions of ntdb_read() and ntdb_write() check this - linked list and try to use the elements of the list in preference - to the real database. - - - don't allow any locks to be held when a transaction starts, - otherwise we can end up with deadlock (plus lack of lock nesting - in POSIX locks would mean the lock is lost) - - - if the caller gains a lock during the transaction but doesn't - release it then fail the commit - - - allow for nested calls to ntdb_transaction_start(), re-using the - existing transaction record. If the inner transaction is canceled - then a subsequent commit will fail - - - keep a mirrored copy of the ntdb hash chain heads to allow for the - fast hash heads scan on traverse, updating the mirrored copy in - the transaction version of ntdb_write - - - allow callers to mix transaction and non-transaction use of ntdb, - although once a transaction is started then an exclusive lock is - gained until the transaction is committed or canceled - - - the commit stategy involves first saving away all modified data - into a linearised buffer in the transaction recovery area, then - marking the transaction recovery area with a magic value to - indicate a valid recovery record. In total 4 fsync/msync calls are - needed per commit to prevent race conditions. It might be possible - to reduce this to 3 or even 2 with some more work. - - - check for a valid recovery record on open of the ntdb, while the - open lock is held. Automatically recover from the transaction - recovery area if needed, then continue with the open as - usual. This allows for smooth crash recovery with no administrator - intervention. - - - if NTDB_NOSYNC is passed to flags in ntdb_open then transactions are - still available, but fsync/msync calls are made. This means we - still are safe against unexpected death during transaction commit, - but not against machine reboots. -*/ - -/* - hold the context of any current transaction -*/ -struct ntdb_transaction { - /* the original io methods - used to do IOs to the real db */ - const struct ntdb_methods *io_methods; - - /* the list of transaction blocks. When a block is first - written to, it gets created in this list */ - uint8_t **blocks; - size_t num_blocks; - - /* non-zero when an internal transaction error has - occurred. All write operations will then fail until the - transaction is ended */ - int transaction_error; - - /* when inside a transaction we need to keep track of any - nested ntdb_transaction_start() calls, as these are allowed, - but don't create a new transaction */ - unsigned int nesting; - - /* set when a prepare has already occurred */ - bool prepared; - ntdb_off_t magic_offset; - - /* old file size before transaction */ - ntdb_len_t old_map_size; -}; - -/* - read while in a transaction. We need to check first if the data is in our list - of transaction elements, then if not do a real read -*/ -static enum NTDB_ERROR transaction_read(struct ntdb_context *ntdb, ntdb_off_t off, - void *buf, ntdb_len_t len) -{ - size_t blk; - enum NTDB_ERROR ecode; - - /* break it down into block sized ops */ - while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) { - ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE); - ecode = transaction_read(ntdb, off, buf, len2); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - len -= len2; - off += len2; - buf = (void *)(len2 + (char *)buf); - } - - if (len == 0) { - return NTDB_SUCCESS; - } - - blk = off / NTDB_PGSIZE; - - /* see if we have it in the block list */ - if (ntdb->transaction->num_blocks <= blk || - ntdb->transaction->blocks[blk] == NULL) { - /* nope, do a real read */ - ecode = ntdb->transaction->io_methods->tread(ntdb, off, buf, len); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - return 0; - } - - /* now copy it out of this block */ - memcpy(buf, ntdb->transaction->blocks[blk] + (off % NTDB_PGSIZE), len); - return NTDB_SUCCESS; - -fail: - ntdb->transaction->transaction_error = 1; - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "transaction_read: failed at off=%zu len=%zu", - (size_t)off, (size_t)len); -} - - -/* - write while in a transaction -*/ -static enum NTDB_ERROR transaction_write(struct ntdb_context *ntdb, ntdb_off_t off, - const void *buf, ntdb_len_t len) -{ - size_t blk; - enum NTDB_ERROR ecode; - - /* Only a commit is allowed on a prepared transaction */ - if (ntdb->transaction->prepared) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR, - "transaction_write: transaction already" - " prepared, write not allowed"); - goto fail; - } - - /* break it up into block sized chunks */ - while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) { - ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE); - ecode = transaction_write(ntdb, off, buf, len2); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - len -= len2; - off += len2; - if (buf != NULL) { - buf = (const void *)(len2 + (const char *)buf); - } - } - - if (len == 0) { - return NTDB_SUCCESS; - } - - blk = off / NTDB_PGSIZE; - off = off % NTDB_PGSIZE; - - if (ntdb->transaction->num_blocks <= blk) { - uint8_t **new_blocks; - /* expand the blocks array */ - if (ntdb->transaction->blocks == NULL) { - new_blocks = (uint8_t **)ntdb->alloc_fn(ntdb, - (blk+1)*sizeof(uint8_t *), ntdb->alloc_data); - } else { - new_blocks = (uint8_t **)ntdb->expand_fn( - ntdb->transaction->blocks, - (blk+1)*sizeof(uint8_t *), ntdb->alloc_data); - } - if (new_blocks == NULL) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "transaction_write:" - " failed to allocate"); - goto fail; - } - memset(&new_blocks[ntdb->transaction->num_blocks], 0, - (1+(blk - ntdb->transaction->num_blocks))*sizeof(uint8_t *)); - ntdb->transaction->blocks = new_blocks; - ntdb->transaction->num_blocks = blk+1; - } - - /* allocate and fill a block? */ - if (ntdb->transaction->blocks[blk] == NULL) { - ntdb->transaction->blocks[blk] = (uint8_t *) - ntdb->alloc_fn(ntdb->transaction->blocks, NTDB_PGSIZE, - ntdb->alloc_data); - if (ntdb->transaction->blocks[blk] == NULL) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "transaction_write:" - " failed to allocate"); - goto fail; - } - memset(ntdb->transaction->blocks[blk], 0, NTDB_PGSIZE); - if (ntdb->transaction->old_map_size > blk * NTDB_PGSIZE) { - ntdb_len_t len2 = NTDB_PGSIZE; - if (len2 + (blk * NTDB_PGSIZE) > ntdb->transaction->old_map_size) { - len2 = ntdb->transaction->old_map_size - (blk * NTDB_PGSIZE); - } - ecode = ntdb->transaction->io_methods->tread(ntdb, - blk * NTDB_PGSIZE, - ntdb->transaction->blocks[blk], - len2); - if (ecode != NTDB_SUCCESS) { - ecode = ntdb_logerr(ntdb, ecode, - NTDB_LOG_ERROR, - "transaction_write:" - " failed to" - " read old block: %s", - strerror(errno)); - SAFE_FREE(ntdb, ntdb->transaction->blocks[blk]); - goto fail; - } - } - } - - /* overwrite part of an existing block */ - if (buf == NULL) { - memset(ntdb->transaction->blocks[blk] + off, 0, len); - } else { - memcpy(ntdb->transaction->blocks[blk] + off, buf, len); - } - return NTDB_SUCCESS; - -fail: - ntdb->transaction->transaction_error = 1; - return ecode; -} - - -/* - write while in a transaction - this variant never expands the transaction blocks, it only - updates existing blocks. This means it cannot change the recovery size -*/ -static void transaction_write_existing(struct ntdb_context *ntdb, ntdb_off_t off, - const void *buf, ntdb_len_t len) -{ - size_t blk; - - /* break it up into block sized chunks */ - while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) { - ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE); - transaction_write_existing(ntdb, off, buf, len2); - len -= len2; - off += len2; - if (buf != NULL) { - buf = (const void *)(len2 + (const char *)buf); - } - } - - if (len == 0) { - return; - } - - blk = off / NTDB_PGSIZE; - off = off % NTDB_PGSIZE; - - if (ntdb->transaction->num_blocks <= blk || - ntdb->transaction->blocks[blk] == NULL) { - return; - } - - /* overwrite part of an existing block */ - memcpy(ntdb->transaction->blocks[blk] + off, buf, len); -} - - -/* - out of bounds check during a transaction -*/ -static enum NTDB_ERROR transaction_oob(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_len_t len, bool probe) -{ - if ((off + len >= off && off + len <= ntdb->file->map_size) || probe) { - return NTDB_SUCCESS; - } - - ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_oob len %lld beyond transaction size %lld", - (long long)(off + len), - (long long)ntdb->file->map_size); - return NTDB_ERR_IO; -} - -/* - transaction version of ntdb_expand(). -*/ -static enum NTDB_ERROR transaction_expand_file(struct ntdb_context *ntdb, - ntdb_off_t addition) -{ - enum NTDB_ERROR ecode; - - assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0); - - /* add a write to the transaction elements, so subsequent - reads see the zero data */ - ecode = transaction_write(ntdb, ntdb->file->map_size, NULL, addition); - if (ecode == NTDB_SUCCESS) { - ntdb->file->map_size += addition; - } - return ecode; -} - -static void *transaction_direct(struct ntdb_context *ntdb, ntdb_off_t off, - size_t len, bool write_mode) -{ - size_t blk = off / NTDB_PGSIZE, end_blk; - - /* This is wrong for zero-length blocks, but will fail gracefully */ - end_blk = (off + len - 1) / NTDB_PGSIZE; - - /* Can only do direct if in single block and we've already copied. */ - if (write_mode) { - ntdb->stats.transaction_write_direct++; - if (blk != end_blk - || blk >= ntdb->transaction->num_blocks - || ntdb->transaction->blocks[blk] == NULL) { - ntdb->stats.transaction_write_direct_fail++; - return NULL; - } - return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE; - } - - ntdb->stats.transaction_read_direct++; - /* Single which we have copied? */ - if (blk == end_blk - && blk < ntdb->transaction->num_blocks - && ntdb->transaction->blocks[blk]) - return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE; - - /* Otherwise must be all not copied. */ - while (blk <= end_blk) { - if (blk >= ntdb->transaction->num_blocks) - break; - if (ntdb->transaction->blocks[blk]) { - ntdb->stats.transaction_read_direct_fail++; - return NULL; - } - blk++; - } - return ntdb->transaction->io_methods->direct(ntdb, off, len, false); -} - -static ntdb_off_t transaction_read_off(struct ntdb_context *ntdb, - ntdb_off_t off) -{ - ntdb_off_t ret; - enum NTDB_ERROR ecode; - - ecode = transaction_read(ntdb, off, &ret, sizeof(ret)); - ntdb_convert(ntdb, &ret, sizeof(ret)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - return ret; -} - -static enum NTDB_ERROR transaction_write_off(struct ntdb_context *ntdb, - ntdb_off_t off, ntdb_off_t val) -{ - ntdb_convert(ntdb, &val, sizeof(val)); - return transaction_write(ntdb, off, &val, sizeof(val)); -} - -static const struct ntdb_methods transaction_methods = { - transaction_read, - transaction_write, - transaction_oob, - transaction_expand_file, - transaction_direct, - transaction_read_off, - transaction_write_off, -}; - -/* - sync to disk -*/ -static enum NTDB_ERROR transaction_sync(struct ntdb_context *ntdb, - ntdb_off_t offset, ntdb_len_t length) -{ - if (ntdb->flags & NTDB_NOSYNC) { - return NTDB_SUCCESS; - } - - if (fsync(ntdb->file->fd) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_transaction: fsync failed: %s", - strerror(errno)); - } -#ifdef MS_SYNC - if (ntdb->file->map_ptr) { - ntdb_off_t moffset = offset & ~(getpagesize()-1); - if (msync(moffset + (char *)ntdb->file->map_ptr, - length + (offset - moffset), MS_SYNC) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, - "ntdb_transaction: msync failed: %s", - strerror(errno)); - } - } -#endif - return NTDB_SUCCESS; -} - -static void free_transaction_blocks(struct ntdb_context *ntdb) -{ - int i; - - /* free all the transaction blocks */ - for (i=0;itransaction->num_blocks;i++) { - if (ntdb->transaction->blocks[i] != NULL) { - ntdb->free_fn(ntdb->transaction->blocks[i], - ntdb->alloc_data); - } - } - SAFE_FREE(ntdb, ntdb->transaction->blocks); - ntdb->transaction->num_blocks = 0; -} - -static void _ntdb_transaction_cancel(struct ntdb_context *ntdb) -{ - enum NTDB_ERROR ecode; - - if (ntdb->transaction == NULL) { - ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_transaction_cancel: no transaction"); - return; - } - - if (ntdb->transaction->nesting != 0) { - ntdb->transaction->transaction_error = 1; - ntdb->transaction->nesting--; - return; - } - - ntdb->file->map_size = ntdb->transaction->old_map_size; - - free_transaction_blocks(ntdb); - - if (ntdb->transaction->magic_offset) { - const struct ntdb_methods *methods = ntdb->transaction->io_methods; - uint64_t invalid = NTDB_RECOVERY_INVALID_MAGIC; - - /* remove the recovery marker */ - ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset, - &invalid, sizeof(invalid)); - if (ecode == NTDB_SUCCESS) - ecode = transaction_sync(ntdb, - ntdb->transaction->magic_offset, - sizeof(invalid)); - if (ecode != NTDB_SUCCESS) { - ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_cancel: failed to remove" - " recovery magic"); - } - } - - if (ntdb->file->allrecord_lock.count) - ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype); - - /* restore the normal io methods */ - ntdb->io = ntdb->transaction->io_methods; - - ntdb_transaction_unlock(ntdb, F_WRLCK); - - if (ntdb_has_open_lock(ntdb)) - ntdb_unlock_open(ntdb, F_WRLCK); - - SAFE_FREE(ntdb, ntdb->transaction); -} - -/* - start a ntdb transaction. No token is returned, as only a single - transaction is allowed to be pending per ntdb_context -*/ -_PUBLIC_ enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb) -{ - enum NTDB_ERROR ecode; - - ntdb->stats.transactions++; - /* some sanity checks */ - if (ntdb->flags & NTDB_INTERNAL) { - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_transaction_start:" - " cannot start a transaction on an" - " internal ntdb"); - } - - if (ntdb->flags & NTDB_RDONLY) { - return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, - "ntdb_transaction_start:" - " cannot start a transaction on a" - " read-only ntdb"); - } - - /* cope with nested ntdb_transaction_start() calls */ - if (ntdb->transaction != NULL) { - if (!(ntdb->flags & NTDB_ALLOW_NESTING)) { - return ntdb_logerr(ntdb, NTDB_ERR_IO, - NTDB_LOG_USE_ERROR, - "ntdb_transaction_start:" - " already inside transaction"); - } - ntdb->transaction->nesting++; - ntdb->stats.transaction_nest++; - return 0; - } - - if (ntdb_has_hash_locks(ntdb)) { - /* the caller must not have any locks when starting a - transaction as otherwise we'll be screwed by lack - of nested locks in POSIX */ - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, - NTDB_LOG_USE_ERROR, - "ntdb_transaction_start:" - " cannot start a transaction with locks" - " held"); - } - - ntdb->transaction = (struct ntdb_transaction *) - ntdb->alloc_fn(ntdb, sizeof(struct ntdb_transaction), - ntdb->alloc_data); - if (ntdb->transaction == NULL) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_transaction_start:" - " cannot allocate"); - } - memset(ntdb->transaction, 0, sizeof(*ntdb->transaction)); - - /* get the transaction write lock. This is a blocking lock. As - discussed with Volker, there are a number of ways we could - make this async, which we will probably do in the future */ - ecode = ntdb_transaction_lock(ntdb, F_WRLCK); - if (ecode != NTDB_SUCCESS) { - SAFE_FREE(ntdb, ntdb->transaction->blocks); - SAFE_FREE(ntdb, ntdb->transaction); - return ecode; - } - - /* get a read lock over entire file. This is upgraded to a write - lock during the commit */ - ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, true); - if (ecode != NTDB_SUCCESS) { - goto fail_allrecord_lock; - } - - /* make sure we know about any file expansions already done by - anyone else */ - ntdb_oob(ntdb, ntdb->file->map_size, 1, true); - ntdb->transaction->old_map_size = ntdb->file->map_size; - - /* finally hook the io methods, replacing them with - transaction specific methods */ - ntdb->transaction->io_methods = ntdb->io; - ntdb->io = &transaction_methods; - return NTDB_SUCCESS; - -fail_allrecord_lock: - ntdb_transaction_unlock(ntdb, F_WRLCK); - SAFE_FREE(ntdb, ntdb->transaction->blocks); - SAFE_FREE(ntdb, ntdb->transaction); - return ecode; -} - - -/* - cancel the current transaction -*/ -_PUBLIC_ void ntdb_transaction_cancel(struct ntdb_context *ntdb) -{ - ntdb->stats.transaction_cancel++; - _ntdb_transaction_cancel(ntdb); -} - -/* - work out how much space the linearised recovery data will consume (worst case) -*/ -static ntdb_len_t ntdb_recovery_size(struct ntdb_context *ntdb) -{ - ntdb_len_t recovery_size = 0; - int i; - - recovery_size = 0; - for (i=0;itransaction->num_blocks;i++) { - if (i * NTDB_PGSIZE >= ntdb->transaction->old_map_size) { - break; - } - if (ntdb->transaction->blocks[i] == NULL) { - continue; - } - recovery_size += 2*sizeof(ntdb_off_t) + NTDB_PGSIZE; - } - - return recovery_size; -} - -static enum NTDB_ERROR ntdb_recovery_area(struct ntdb_context *ntdb, - const struct ntdb_methods *methods, - ntdb_off_t *recovery_offset, - struct ntdb_recovery_record *rec) -{ - enum NTDB_ERROR ecode; - - *recovery_offset = ntdb_read_off(ntdb, - offsetof(struct ntdb_header, recovery)); - if (NTDB_OFF_IS_ERR(*recovery_offset)) { - return NTDB_OFF_TO_ERR(*recovery_offset); - } - - if (*recovery_offset == 0) { - rec->max_len = 0; - return NTDB_SUCCESS; - } - - ecode = methods->tread(ntdb, *recovery_offset, rec, sizeof(*rec)); - if (ecode != NTDB_SUCCESS) - return ecode; - - ntdb_convert(ntdb, rec, sizeof(*rec)); - /* ignore invalid recovery regions: can happen in crash */ - if (rec->magic != NTDB_RECOVERY_MAGIC && - rec->magic != NTDB_RECOVERY_INVALID_MAGIC) { - *recovery_offset = 0; - rec->max_len = 0; - } - return NTDB_SUCCESS; -} - -static unsigned int same(const unsigned char *new, - const unsigned char *old, - unsigned int length) -{ - unsigned int i; - - for (i = 0; i < length; i++) { - if (new[i] != old[i]) - break; - } - return i; -} - -static unsigned int different(const unsigned char *new, - const unsigned char *old, - unsigned int length, - unsigned int min_same, - unsigned int *samelen) -{ - unsigned int i; - - *samelen = 0; - for (i = 0; i < length; i++) { - if (new[i] == old[i]) { - (*samelen)++; - } else { - if (*samelen >= min_same) { - return i - *samelen; - } - *samelen = 0; - } - } - - if (*samelen < min_same) - *samelen = 0; - return length - *samelen; -} - -/* Allocates recovery blob, without ntdb_recovery_record at head set up. */ -static struct ntdb_recovery_record *alloc_recovery(struct ntdb_context *ntdb, - ntdb_len_t *len) -{ - struct ntdb_recovery_record *rec; - size_t i; - enum NTDB_ERROR ecode; - unsigned char *p; - const struct ntdb_methods *old_methods = ntdb->io; - - rec = ntdb->alloc_fn(ntdb, sizeof(*rec) + ntdb_recovery_size(ntdb), - ntdb->alloc_data); - if (!rec) { - ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "transaction_setup_recovery:" - " cannot allocate"); - return NTDB_ERR_PTR(NTDB_ERR_OOM); - } - - /* We temporarily revert to the old I/O methods, so we can use - * ntdb_access_read */ - ntdb->io = ntdb->transaction->io_methods; - - /* build the recovery data into a single blob to allow us to do a single - large write, which should be more efficient */ - p = (unsigned char *)(rec + 1); - for (i=0;itransaction->num_blocks;i++) { - ntdb_off_t offset; - ntdb_len_t length; - unsigned int off; - const unsigned char *buffer; - - if (ntdb->transaction->blocks[i] == NULL) { - continue; - } - - offset = i * NTDB_PGSIZE; - length = NTDB_PGSIZE; - if (offset >= ntdb->transaction->old_map_size) { - continue; - } - - if (offset + length > ntdb->file->map_size) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_transaction_setup_recovery:" - " transaction data over new region" - " boundary"); - goto fail; - } - buffer = ntdb_access_read(ntdb, offset, length, false); - if (NTDB_PTR_IS_ERR(buffer)) { - ecode = NTDB_PTR_ERR(buffer); - goto fail; - } - - /* Skip over anything the same at the start. */ - off = same(ntdb->transaction->blocks[i], buffer, length); - offset += off; - - while (off < length) { - ntdb_len_t len1; - unsigned int samelen; - - len1 = different(ntdb->transaction->blocks[i] + off, - buffer + off, length - off, - sizeof(offset) + sizeof(len1) + 1, - &samelen); - - memcpy(p, &offset, sizeof(offset)); - memcpy(p + sizeof(offset), &len1, sizeof(len1)); - ntdb_convert(ntdb, p, sizeof(offset) + sizeof(len1)); - p += sizeof(offset) + sizeof(len1); - memcpy(p, buffer + off, len1); - p += len1; - off += len1 + samelen; - offset += len1 + samelen; - } - ntdb_access_release(ntdb, buffer); - } - - *len = p - (unsigned char *)(rec + 1); - ntdb->io = old_methods; - return rec; - -fail: - ntdb->free_fn(rec, ntdb->alloc_data); - ntdb->io = old_methods; - return NTDB_ERR_PTR(ecode); -} - -static ntdb_off_t create_recovery_area(struct ntdb_context *ntdb, - ntdb_len_t rec_length, - struct ntdb_recovery_record *rec) -{ - ntdb_off_t off, recovery_off; - ntdb_len_t addition; - enum NTDB_ERROR ecode; - const struct ntdb_methods *methods = ntdb->transaction->io_methods; - - /* round up to a multiple of page size. Overallocate, since each - * such allocation forces us to expand the file. */ - rec->max_len = ntdb_expand_adjust(ntdb->file->map_size, rec_length); - - /* Round up to a page. */ - rec->max_len = ((sizeof(*rec) + rec->max_len + NTDB_PGSIZE-1) - & ~(NTDB_PGSIZE-1)) - - sizeof(*rec); - - off = ntdb->file->map_size; - - /* Restore ->map_size before calling underlying expand_file. - Also so that we don't try to expand the file again in the - transaction commit, which would destroy the recovery - area */ - addition = (ntdb->file->map_size - ntdb->transaction->old_map_size) + - sizeof(*rec) + rec->max_len; - ntdb->file->map_size = ntdb->transaction->old_map_size; - ntdb->stats.transaction_expand_file++; - ecode = methods->expand_file(ntdb, addition); - if (ecode != NTDB_SUCCESS) { - ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_recovery_allocate:" - " failed to create recovery area"); - return NTDB_ERR_TO_OFF(ecode); - } - - /* we have to reset the old map size so that we don't try to - expand the file again in the transaction commit, which - would destroy the recovery area */ - ntdb->transaction->old_map_size = ntdb->file->map_size; - - /* write the recovery header offset and sync - we can sync without a race here - as the magic ptr in the recovery record has not been set */ - recovery_off = off; - ntdb_convert(ntdb, &recovery_off, sizeof(recovery_off)); - ecode = methods->twrite(ntdb, offsetof(struct ntdb_header, recovery), - &recovery_off, sizeof(ntdb_off_t)); - if (ecode != NTDB_SUCCESS) { - ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_recovery_allocate:" - " failed to write recovery head"); - return NTDB_ERR_TO_OFF(ecode); - } - transaction_write_existing(ntdb, offsetof(struct ntdb_header, recovery), - &recovery_off, - sizeof(ntdb_off_t)); - return off; -} - -/* - setup the recovery data that will be used on a crash during commit -*/ -static enum NTDB_ERROR transaction_setup_recovery(struct ntdb_context *ntdb) -{ - ntdb_len_t recovery_size = 0; - ntdb_off_t recovery_off = 0; - ntdb_off_t old_map_size = ntdb->transaction->old_map_size; - struct ntdb_recovery_record *recovery; - const struct ntdb_methods *methods = ntdb->transaction->io_methods; - uint64_t magic; - enum NTDB_ERROR ecode; - - recovery = alloc_recovery(ntdb, &recovery_size); - if (NTDB_PTR_IS_ERR(recovery)) - return NTDB_PTR_ERR(recovery); - - /* If we didn't actually change anything we overwrote? */ - if (recovery_size == 0) { - /* In theory, we could have just appended data. */ - if (ntdb->transaction->num_blocks * NTDB_PGSIZE - < ntdb->transaction->old_map_size) { - free_transaction_blocks(ntdb); - } - ntdb->free_fn(recovery, ntdb->alloc_data); - return NTDB_SUCCESS; - } - - ecode = ntdb_recovery_area(ntdb, methods, &recovery_off, recovery); - if (ecode) { - ntdb->free_fn(recovery, ntdb->alloc_data); - return ecode; - } - - if (recovery->max_len < recovery_size) { - /* Not large enough. Free up old recovery area. */ - if (recovery_off) { - ntdb->stats.frees++; - ecode = add_free_record(ntdb, recovery_off, - sizeof(*recovery) - + recovery->max_len, - NTDB_LOCK_WAIT, true); - ntdb->free_fn(recovery, ntdb->alloc_data); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_recovery_allocate:" - " failed to free previous" - " recovery area"); - } - - /* Refresh recovery after add_free_record above. */ - recovery = alloc_recovery(ntdb, &recovery_size); - if (NTDB_PTR_IS_ERR(recovery)) - return NTDB_PTR_ERR(recovery); - } - - recovery_off = create_recovery_area(ntdb, recovery_size, - recovery); - if (NTDB_OFF_IS_ERR(recovery_off)) { - ntdb->free_fn(recovery, ntdb->alloc_data); - return NTDB_OFF_TO_ERR(recovery_off); - } - } - - /* Now we know size, convert rec header. */ - recovery->magic = NTDB_RECOVERY_INVALID_MAGIC; - recovery->len = recovery_size; - recovery->eof = old_map_size; - ntdb_convert(ntdb, recovery, sizeof(*recovery)); - - /* write the recovery data to the recovery area */ - ecode = methods->twrite(ntdb, recovery_off, recovery, - sizeof(*recovery) + recovery_size); - if (ecode != NTDB_SUCCESS) { - ntdb->free_fn(recovery, ntdb->alloc_data); - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_setup_recovery:" - " failed to write recovery data"); - } - transaction_write_existing(ntdb, recovery_off, recovery, recovery_size); - - ntdb->free_fn(recovery, ntdb->alloc_data); - - /* as we don't have ordered writes, we have to sync the recovery - data before we update the magic to indicate that the recovery - data is present */ - ecode = transaction_sync(ntdb, recovery_off, recovery_size); - if (ecode != NTDB_SUCCESS) - return ecode; - - magic = NTDB_RECOVERY_MAGIC; - ntdb_convert(ntdb, &magic, sizeof(magic)); - - ntdb->transaction->magic_offset - = recovery_off + offsetof(struct ntdb_recovery_record, magic); - - ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset, - &magic, sizeof(magic)); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_setup_recovery:" - " failed to write recovery magic"); - } - transaction_write_existing(ntdb, ntdb->transaction->magic_offset, - &magic, sizeof(magic)); - - /* ensure the recovery magic marker is on disk */ - return transaction_sync(ntdb, ntdb->transaction->magic_offset, - sizeof(magic)); -} - -static enum NTDB_ERROR _ntdb_transaction_prepare_commit(struct ntdb_context *ntdb) -{ - const struct ntdb_methods *methods; - enum NTDB_ERROR ecode; - - if (ntdb->transaction == NULL) { - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_transaction_prepare_commit:" - " no transaction"); - } - - if (ntdb->transaction->prepared) { - _ntdb_transaction_cancel(ntdb); - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_transaction_prepare_commit:" - " transaction already prepared"); - } - - if (ntdb->transaction->transaction_error) { - _ntdb_transaction_cancel(ntdb); - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR, - "ntdb_transaction_prepare_commit:" - " transaction error pending"); - } - - - if (ntdb->transaction->nesting != 0) { - return NTDB_SUCCESS; - } - - /* check for a null transaction */ - if (ntdb->transaction->blocks == NULL) { - return NTDB_SUCCESS; - } - - methods = ntdb->transaction->io_methods; - - /* upgrade the main transaction lock region to a write lock */ - ecode = ntdb_allrecord_upgrade(ntdb, NTDB_HASH_LOCK_START); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* get the open lock - this prevents new users attaching to the database - during the commit */ - ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* Sets up ntdb->transaction->recovery and - * ntdb->transaction->magic_offset. */ - ecode = transaction_setup_recovery(ntdb); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - ntdb->transaction->prepared = true; - - /* expand the file to the new size if needed */ - if (ntdb->file->map_size != ntdb->transaction->old_map_size) { - ntdb_len_t add; - - add = ntdb->file->map_size - ntdb->transaction->old_map_size; - /* Restore original map size for ntdb_expand_file */ - ntdb->file->map_size = ntdb->transaction->old_map_size; - ecode = methods->expand_file(ntdb, add); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - } - - /* Keep the open lock until the actual commit */ - return NTDB_SUCCESS; -} - -/* - prepare to commit the current transaction -*/ -_PUBLIC_ enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb) -{ - return _ntdb_transaction_prepare_commit(ntdb); -} - -/* - commit the current transaction -*/ -_PUBLIC_ enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb) -{ - const struct ntdb_methods *methods; - int i; - enum NTDB_ERROR ecode; - - if (ntdb->transaction == NULL) { - return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, - "ntdb_transaction_commit:" - " no transaction"); - } - - ntdb_trace(ntdb, "ntdb_transaction_commit"); - - if (ntdb->transaction->nesting != 0) { - ntdb->transaction->nesting--; - return NTDB_SUCCESS; - } - - if (!ntdb->transaction->prepared) { - ecode = _ntdb_transaction_prepare_commit(ntdb); - if (ecode != NTDB_SUCCESS) { - _ntdb_transaction_cancel(ntdb); - return ecode; - } - } - - /* check for a null transaction (prepare_commit may do this!) */ - if (ntdb->transaction->blocks == NULL) { - _ntdb_transaction_cancel(ntdb); - return NTDB_SUCCESS; - } - - methods = ntdb->transaction->io_methods; - - /* perform all the writes */ - for (i=0;itransaction->num_blocks;i++) { - ntdb_off_t offset; - ntdb_len_t length; - - if (ntdb->transaction->blocks[i] == NULL) { - continue; - } - - offset = i * NTDB_PGSIZE; - length = NTDB_PGSIZE; - - ecode = methods->twrite(ntdb, offset, - ntdb->transaction->blocks[i], length); - if (ecode != NTDB_SUCCESS) { - /* we've overwritten part of the data and - possibly expanded the file, so we need to - run the crash recovery code */ - ntdb->io = methods; - ntdb_transaction_recover(ntdb); - - _ntdb_transaction_cancel(ntdb); - - return ecode; - } - SAFE_FREE(ntdb, ntdb->transaction->blocks[i]); - } - - SAFE_FREE(ntdb, ntdb->transaction->blocks); - ntdb->transaction->num_blocks = 0; - - /* ensure the new data is on disk */ - ecode = transaction_sync(ntdb, 0, ntdb->file->map_size); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - /* - TODO: maybe write to some dummy hdr field, or write to magic - offset without mmap, before the last sync, instead of the - utime() call - */ - - /* on some systems (like Linux 2.6.x) changes via mmap/msync - don't change the mtime of the file, this means the file may - not be backed up (as ntdb rounding to block sizes means that - file size changes are quite rare too). The following forces - mtime changes when a transaction completes */ -#if HAVE_UTIME - utime(ntdb->name, NULL); -#endif - - /* use a transaction cancel to free memory and remove the - transaction locks: it "restores" map_size, too. */ - ntdb->transaction->old_map_size = ntdb->file->map_size; - _ntdb_transaction_cancel(ntdb); - - return NTDB_SUCCESS; -} - - -/* - recover from an aborted transaction. Must be called with exclusive - database write access already established (including the open - lock to prevent new processes attaching) -*/ -enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb) -{ - ntdb_off_t recovery_head, recovery_eof; - unsigned char *data, *p; - struct ntdb_recovery_record rec; - enum NTDB_ERROR ecode; - - /* find the recovery area */ - recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery)); - if (NTDB_OFF_IS_ERR(recovery_head)) { - ecode = NTDB_OFF_TO_ERR(recovery_head); - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to read recovery head"); - } - - if (recovery_head == 0) { - /* we have never allocated a recovery record */ - return NTDB_SUCCESS; - } - - /* read the recovery record */ - ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to read recovery record"); - } - - if (rec.magic != NTDB_RECOVERY_MAGIC) { - /* there is no valid recovery data */ - return NTDB_SUCCESS; - } - - if (ntdb->flags & NTDB_RDONLY) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " attempt to recover read only database"); - } - - recovery_eof = rec.eof; - - data = (unsigned char *)ntdb->alloc_fn(ntdb, rec.len, ntdb->alloc_data); - if (data == NULL) { - return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to allocate recovery data"); - } - - /* read the full recovery data */ - ecode = ntdb->io->tread(ntdb, recovery_head + sizeof(rec), data, - rec.len); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to read recovery data"); - } - - /* recover the file data */ - p = data; - while (p+sizeof(ntdb_off_t)+sizeof(ntdb_len_t) < data + rec.len) { - ntdb_off_t ofs; - ntdb_len_t len; - ntdb_convert(ntdb, p, sizeof(ofs) + sizeof(len)); - memcpy(&ofs, p, sizeof(ofs)); - memcpy(&len, p + sizeof(ofs), sizeof(len)); - p += sizeof(ofs) + sizeof(len); - - ecode = ntdb->io->twrite(ntdb, ofs, p, len); - if (ecode != NTDB_SUCCESS) { - ntdb->free_fn(data, ntdb->alloc_data); - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to recover %zu bytes" - " at offset %zu", - (size_t)len, (size_t)ofs); - } - p += len; - } - - ntdb->free_fn(data, ntdb->alloc_data); - - ecode = transaction_sync(ntdb, 0, ntdb->file->map_size); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to sync recovery"); - } - - /* if the recovery area is after the recovered eof then remove it */ - if (recovery_eof <= recovery_head) { - ecode = ntdb_write_off(ntdb, offsetof(struct ntdb_header, - recovery), - 0); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to remove recovery head"); - } - } - - /* remove the recovery magic */ - ecode = ntdb_write_off(ntdb, - recovery_head - + offsetof(struct ntdb_recovery_record, magic), - NTDB_RECOVERY_INVALID_MAGIC); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to remove recovery magic"); - } - - ecode = transaction_sync(ntdb, 0, recovery_eof); - if (ecode != NTDB_SUCCESS) { - return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, - "ntdb_transaction_recover:" - " failed to sync2 recovery"); - } - - ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, - "ntdb_transaction_recover: recovered %zu byte database", - (size_t)recovery_eof); - - /* all done */ - return NTDB_SUCCESS; -} - -ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb) -{ - ntdb_off_t recovery_head; - struct ntdb_recovery_record rec; - enum NTDB_ERROR ecode; - - /* find the recovery area */ - recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery)); - if (NTDB_OFF_IS_ERR(recovery_head)) { - return recovery_head; - } - - if (recovery_head == 0) { - /* we have never allocated a recovery record */ - return false; - } - - /* read the recovery record */ - ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - return (rec.magic == NTDB_RECOVERY_MAGIC); -} diff --git a/ccan/ntdb/traverse.c b/ccan/ntdb/traverse.c deleted file mode 100644 index 2e6763cb..00000000 --- a/ccan/ntdb/traverse.c +++ /dev/null @@ -1,100 +0,0 @@ - /* - Trivial Database 2: traverse function. - Copyright (C) Rusty Russell 2010 - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "private.h" -#include - -_PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb, - int (*fn)(struct ntdb_context *, - NTDB_DATA, NTDB_DATA, void *), - void *p) -{ - enum NTDB_ERROR ecode; - struct hash_info h; - NTDB_DATA k, d; - int64_t count = 0; - - k.dptr = NULL; - for (ecode = first_in_hash(ntdb, &h, &k, &d.dsize); - ecode == NTDB_SUCCESS; - ecode = next_in_hash(ntdb, &h, &k, &d.dsize)) { - d.dptr = k.dptr + k.dsize; - - count++; - if (fn && fn(ntdb, k, d, p)) { - ntdb->free_fn(k.dptr, ntdb->alloc_data); - return count; - } - ntdb->free_fn(k.dptr, ntdb->alloc_data); - } - - if (ecode != NTDB_ERR_NOEXIST) { - return NTDB_ERR_TO_OFF(ecode); - } - return count; -} - -_PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key) -{ - struct hash_info h; - - return first_in_hash(ntdb, &h, key, NULL); -} - -/* We lock twice, not very efficient. We could keep last key & h cached. */ -_PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key) -{ - struct hash_info h; - struct ntdb_used_record rec; - ntdb_off_t off; - - off = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, NULL); - ntdb->free_fn(key->dptr, ntdb->alloc_data); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - - /* If we found something, skip to next. */ - if (off) - h.bucket++; - return next_in_hash(ntdb, &h, key, NULL); -} - -static int wipe_one(struct ntdb_context *ntdb, - NTDB_DATA key, NTDB_DATA data, enum NTDB_ERROR *ecode) -{ - *ecode = ntdb_delete(ntdb, key); - return (*ecode != NTDB_SUCCESS); -} - -_PUBLIC_ enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb) -{ - enum NTDB_ERROR ecode; - int64_t count; - - ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false); - if (ecode != NTDB_SUCCESS) - return ecode; - - /* FIXME: Be smarter. */ - count = ntdb_traverse(ntdb, wipe_one, &ecode); - if (count < 0) - ecode = NTDB_OFF_TO_ERR(count); - ntdb_allrecord_unlock(ntdb, F_WRLCK); - return ecode; -} diff --git a/ccan/ntdb/wscript b/ccan/ntdb/wscript deleted file mode 100644 index a21c1a30..00000000 --- a/ccan/ntdb/wscript +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/bin/env python - -APPNAME = 'ntdb' -VERSION = '1.0' - -blddir = 'bin' - -import sys, os - -# find the buildtools directory -srcdir = '.' -while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5: - srcdir = srcdir + '/..' -sys.path.insert(0, srcdir + '/buildtools/wafsamba') - -import wafsamba, samba_dist, Options, Logs, glob - -samba_dist.DIST_DIRS('lib/ntdb:. lib/replace:lib/replace lib/ccan:lib/ccan buildtools:buildtools') - -def set_options(opt): - opt.BUILTIN_DEFAULT('replace,ccan') - opt.PRIVATE_EXTENSION_DEFAULT('ntdb', noextension='ntdb') - opt.RECURSE('lib/replace') - opt.add_option('--valgrind', - help=("use valgrind on tests programs"), - action="store_true", dest='VALGRIND', default=False) - opt.add_option('--valgrind-log', - help=("where to put the valgrind log"), - action="store", dest='VALGRINDLOG', default=None) - - if opt.IN_LAUNCH_DIR(): - opt.add_option('--disable-python', - help=("disable the pyntdb module"), - action="store_true", dest='disable_python', default=False) - -def configure(conf): - conf.RECURSE('lib/replace') - conf.RECURSE('lib/ccan') - - conf.env.NTDB_TEST_RUN_SRC=['test/run-001-encode.c', - 'test/run-001-fls.c', - 'test/run-01-new_database.c', - 'test/run-02-expand.c', - 'test/run-03-coalesce.c', - 'test/run-04-basichash.c', - 'test/run-05-readonly-open.c', - 'test/run-10-simple-store.c', - 'test/run-11-simple-fetch.c', - 'test/run-12-check.c', - 'test/run-15-append.c', - 'test/run-25-hashoverload.c', - 'test/run-30-exhaust-before-expand.c', - 'test/run-35-convert.c', - 'test/run-50-multiple-freelists.c', - 'test/run-56-open-during-transaction.c', - 'test/run-57-die-during-transaction.c', - 'test/run-64-bit-tdb.c', - 'test/run-90-get-set-attributes.c', - 'test/run-capabilities.c', - 'test/run-expand-in-transaction.c', - 'test/run-features.c', - 'test/run-lockall.c', - 'test/run-remap-in-read_traverse.c', - 'test/run-seed.c', - 'test/run-tdb_errorstr.c', - 'test/run-tdb_foreach.c', - 'test/run-traverse.c'] - conf.env.NTDB_TEST_API_SRC=['test/api-12-store.c', - 'test/api-13-delete.c', - 'test/api-14-exists.c', - 'test/api-16-wipe_all.c', - 'test/api-20-alloc-attr.c', - 'test/api-21-parse_record.c', - 'test/api-55-transaction.c', - 'test/api-60-noop-transaction.c', - 'test/api-80-tdb_fd.c', - 'test/api-81-seqnum.c', - 'test/api-82-lockattr.c', - 'test/api-83-openhook.c', - 'test/api-91-get-stats.c', - 'test/api-92-get-set-readonly.c', - 'test/api-93-repack.c', - 'test/api-94-expand-during-parse.c', - 'test/api-95-read-only-during-parse.c', - 'test/api-add-remove-flags.c', - 'test/api-check-callback.c', - 'test/api-firstkey-nextkey.c', - 'test/api-fork-test.c', - 'test/api-locktimeout.c', - 'test/api-missing-entries.c', - 'test/api-open-multiple-times.c', - 'test/api-record-expand.c', - 'test/api-simple-delete.c', - 'test/api-summary.c'] - conf.env.NTDB_TEST_API_PY=['test/python-api.py'] - conf.env.NTDB_TEST_API_HELPER_SRC=['test/helpapi-external-agent.c'] - conf.env.NTDB_TEST_RUN_HELPER_SRC=['test/helprun-external-agent.c', - 'test/helprun-layout.c'] - conf.env.NTDB_TEST_HELPER_SRC=['test/external-agent.c', - 'test/failtest_helper.c', - 'test/lock-tracking.c', - 'test/logging.c', - 'test/tap-interface.c'] - - conf.env.standalone_ntdb = conf.IN_LAUNCH_DIR() - conf.env.disable_python = getattr(Options.options, 'disable_python', False) - - if not conf.env.standalone_ntdb: - if conf.CHECK_BUNDLED_SYSTEM('ntdb', minversion=VERSION, - implied_deps='replace'): - conf.define('USING_SYSTEM_NTDB', 1) - if conf.CHECK_BUNDLED_SYSTEM_PYTHON('pyntdb', 'ntdb', minversion=VERSION): - conf.define('USING_SYSTEM_PYNTDB', 1) - - if not conf.env.disable_python: - # also disable if we don't have the python libs installed - conf.find_program('python', var='PYTHON') - conf.check_tool('python') - conf.check_python_version((2,4,2)) - conf.SAMBA_CHECK_PYTHON_HEADERS(mandatory=False) - if not conf.env.HAVE_PYTHON_H: - Logs.warn('Disabling pyntdb as python devel libs not found') - conf.env.disable_python = True - - conf.CHECK_XSLTPROC_MANPAGES() - - # This make #include work. - conf.ADD_EXTRA_INCLUDES('''#lib''') - - conf.SAMBA_CONFIG_H() - -def build(bld): - bld.RECURSE('lib/replace') - bld.RECURSE('lib/ccan') - - if bld.env.standalone_ntdb: - bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' - private_library = False - else: - private_library = True - - SRC = '''check.c free.c hash.c io.c lock.c open.c - summary.c ntdb.c transaction.c traverse.c''' - - if not bld.CONFIG_SET('USING_SYSTEM_NTDB'): - NTDB_CCAN='ccan-likely ccan-ilog ccan-hash ccan-tally' - bld.SAMBA_LIBRARY('ntdb', - SRC, - deps='replace ' + NTDB_CCAN , - includes='.', - abi_directory='ABI', - abi_match='ntdb_*', - hide_symbols=True, - vnum=VERSION, - public_headers='ntdb.h', - public_headers_install=not private_library, - pc_files='ntdb.pc', - private_library=private_library, - manpages='man/ntdb.3') - - bld.SAMBA_BINARY('ntdbtorture', - 'tools/ntdbtorture.c', - deps='ntdb ccan-err', - install=False) - - bld.SAMBA_BINARY('ntdbtool', - 'tools/ntdbtool.c', - deps='ntdb', manpages='man/ntdbtool.8') - - bld.SAMBA_BINARY('ntdbdump', - 'tools/ntdbdump.c', - deps='ntdb', manpages='man/ntdbdump.8') - - bld.SAMBA_BINARY('ntdbrestore', - 'tools/ntdbrestore.c', - deps='ntdb', manpages='man/ntdbrestore.8') - - bld.SAMBA_BINARY('ntdbbackup', - 'tools/ntdbbackup.c', - deps='ntdb', manpages='man/ntdbbackup.8') - - if bld.env.DEVELOPER_MODE: - # FIXME: We need CCAN for some API tests, but waf thinks it's - # already available via ntdb. It is, but not publicly. - # Workaround is to build a private, non-hiding version. - bld.SAMBA_SUBSYSTEM('ntdb-testing', - SRC, - deps='replace ' + NTDB_CCAN, - includes='.') - - bld.SAMBA_SUBSYSTEM('ntdb-test-helpers', - bld.env.NTDB_TEST_HELPER_SRC, - deps='replace', - allow_warnings=True) - bld.SAMBA_SUBSYSTEM('ntdb-run-helpers', - bld.env.NTDB_TEST_RUN_HELPER_SRC, - deps='replace') - bld.SAMBA_SUBSYSTEM('ntdb-api-helpers', - bld.env.NTDB_TEST_API_HELPER_SRC, - deps='replace') - - for f in bld.env.NTDB_TEST_RUN_SRC: - base = os.path.splitext(os.path.basename(f))[0] - bld.SAMBA_BINARY('ntdb-' + base, f, - deps=NTDB_CCAN + ' ccan-failtest ntdb-test-helpers ntdb-run-helpers', - install=False) - - for f in bld.env.NTDB_TEST_API_SRC: - base = os.path.splitext(os.path.basename(f))[0] - bld.SAMBA_BINARY('ntdb-' + base, f, - deps='ntdb-test-helpers ntdb-api-helpers ntdb-testing', - install=False) - - if not bld.CONFIG_SET('USING_SYSTEM_PYNTDB'): - bld.SAMBA_PYTHON('pyntdb', - source='pyntdb.c', - deps='ntdb', - enabled=not bld.env.disable_python, - realname='ntdb.so', - cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION) - -def testonly(ctx): - '''run ntdb testsuite''' - import Utils, samba_utils, shutil - ecode = 0; - - env = samba_utils.LOAD_ENVIRONMENT() - - if env.standalone_ntdb: - # FIXME: This is horrible :( - test_prefix = "%s/st" % (Utils.g_module.blddir) - shutil.rmtree(test_prefix, ignore_errors=True) - os.makedirs(test_prefix) - - # Create scratch directory for tests. - testdir = os.path.join(test_prefix, 'ntdb-tests') - samba_utils.mkdir_p(testdir) - # Symlink back to source dir so it can find tests in test/ - link = os.path.join(testdir, 'test') - if not os.path.exists(link): - os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link) - - if env.options['VALGRIND']: - os.environ['VALGRIND'] = 'valgrind -q --num-callers=30 --error-exitcode=11' - if env.options['VALGRINDLOG']: - os.environ['VALGRIND'] += ' --log-file=%s' % Options.options.VALGRINDLOG - - for f in env.NTDB_TEST_RUN_SRC + env.NTDB_TEST_API_SRC: - name = "ntdb-" + os.path.splitext(os.path.basename(f))[0] - cmd = "cd " + testdir + " && $VALGRIND " + os.path.abspath(os.path.join(Utils.g_module.blddir, name)) + " > test-output 2>&1" - print("..." + f) - ret = samba_utils.RUN_COMMAND(cmd) - if ret != 0: - print("%s (%s) failed:" % (name, f)) - samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output')) - ecode = ret; - break; - if not env.disable_python: - for f in env.NTDB_TEST_API_PY: - print("..." + f) - cmd = "cd " + testdir + " && PYTHONPATH=%s %s %s > test-output 2>&1" % ( - os.path.abspath(os.path.join(Utils.g_module.blddir, "python")), - env["PYTHON"], os.path.abspath(f)) - ret = samba_utils.RUN_COMMAND(cmd) - if ret != 0: - print("%s (%s) failed:" % (name, f)) - samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output')) - ecode = ret - break - - sys.exit(ecode) - -# WAF doesn't build the unit tests for this, maybe because they don't link with ntdb? -# This forces it -def test(ctx): - import Scripting - Scripting.commands.append('build') - Scripting.commands.append('testonly') - -def dist(): - '''makes a tarball for distribution''' - samba_dist.dist() - -def reconfigure(ctx): - '''reconfigure if config scripts have changed''' - import samba_utils - samba_utils.reconfigure(ctx) diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/ABI/ntdb-0.9.sigs b/junkcode/rusty@rustcorp.com.au-ntdb/ABI/ntdb-0.9.sigs new file mode 100644 index 00000000..6b12ddbd --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/ABI/ntdb-0.9.sigs @@ -0,0 +1,38 @@ +ntdb_add_flag: void (struct ntdb_context *, unsigned int) +ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA) +ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) +ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) +ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA) +ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA) +ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) +ntdb_close: int (struct ntdb_context *) +ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) +ntdb_errorstr: const char *(enum NTDB_ERROR) +ntdb_exists: bool (struct ntdb_context *, NTDB_DATA) +ntdb_fd: int (const struct ntdb_context *) +ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *) +ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) +ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *) +ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *) +ntdb_get_flags: unsigned int (struct ntdb_context *) +ntdb_get_seqnum: int64_t (struct ntdb_context *) +ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *) +ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *) +ntdb_name: const char *(const struct ntdb_context *) +ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) +ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *) +ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) +ntdb_remove_flag: void (struct ntdb_context *, unsigned int) +ntdb_repack: enum NTDB_ERROR (struct ntdb_context *) +ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *) +ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int) +ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **) +ntdb_transaction_cancel: void (struct ntdb_context *) +ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *) +ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *) +ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *) +ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *) +ntdb_unlockall: void (struct ntdb_context *) +ntdb_unlockall_read: void (struct ntdb_context *) +ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type) +ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *) diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/ABI/ntdb-1.0.sigs b/junkcode/rusty@rustcorp.com.au-ntdb/ABI/ntdb-1.0.sigs new file mode 100644 index 00000000..6b12ddbd --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/ABI/ntdb-1.0.sigs @@ -0,0 +1,38 @@ +ntdb_add_flag: void (struct ntdb_context *, unsigned int) +ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA) +ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) +ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) +ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA) +ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA) +ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) +ntdb_close: int (struct ntdb_context *) +ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA) +ntdb_errorstr: const char *(enum NTDB_ERROR) +ntdb_exists: bool (struct ntdb_context *, NTDB_DATA) +ntdb_fd: int (const struct ntdb_context *) +ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *) +ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) +ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *) +ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *) +ntdb_get_flags: unsigned int (struct ntdb_context *) +ntdb_get_seqnum: int64_t (struct ntdb_context *) +ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *) +ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *) +ntdb_name: const char *(const struct ntdb_context *) +ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *) +ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *) +ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *) +ntdb_remove_flag: void (struct ntdb_context *, unsigned int) +ntdb_repack: enum NTDB_ERROR (struct ntdb_context *) +ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *) +ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int) +ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **) +ntdb_transaction_cancel: void (struct ntdb_context *) +ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *) +ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *) +ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *) +ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *) +ntdb_unlockall: void (struct ntdb_context *) +ntdb_unlockall_read: void (struct ntdb_context *) +ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type) +ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *) diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/LICENSE b/junkcode/rusty@rustcorp.com.au-ntdb/LICENSE new file mode 120000 index 00000000..74550445 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/LICENSE @@ -0,0 +1 @@ +../../licenses/LGPL-3 \ No newline at end of file diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/Makefile b/junkcode/rusty@rustcorp.com.au-ntdb/Makefile new file mode 100644 index 00000000..3ce5fd16 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/Makefile @@ -0,0 +1,80 @@ +CC=gcc +CFLAGS=-g -O0 -Wall -W -I../../ -I./ +LIBS= + +LIBNTDB_OBJ = ccan_hash.o ccan_tally.o check.o free.o hash.o io.o lock.o open.o summary.o ntdb.o transaction.o traverse.o + +all: ntdbtorture ntdbtool ntdbdump ntdbrestore ntdbbackup + +ntdbtorture: tools/ntdbtorture.c libntdb.a + $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) + +ntdbtool: tools/ntdbtool.c libntdb.a + $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) + +ntdbdump: tools/ntdbdump.c libntdb.a + $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) + +ntdbrestore: tools/ntdbrestore.c libntdb.a + $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) + +ntdbbackup: tools/ntdbbackup.c libntdb.a + $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS) + +libntdb.a: $(LIBNTDB_OBJ) + @echo Creating library $@ + ar r libntdb.a $(LIBNTDB_OBJ) + ranlib libntdb.a + +check.o: check.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c check.c -o $@ + +free.o: free.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c free.c -o $@ + +hash.o: hash.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c hash.c -o $@ + +io.o: io.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c io.c -o $@ + +lock.o: lock.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c lock.c -o $@ + +open.o: open.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c open.c -o $@ + +summary.o: summary.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c summary.c -o $@ + +ntdb.o: ntdb.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c ntdb.c -o $@ + +transaction.o: transaction.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c transaction.c -o $@ + +traverse.o: traverse.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c traverse.c -o $@ + +ccan_hash.o: ../hash/hash.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c ../hash/hash.c -o $@ + +ccan_tally.o: ../tally/tally.c + @echo Compiling $@ + $(CC) $(CFLAGS) -c ../tally/tally.c -o $@ + +clean: + rm -f *.o + rm -f *.a + rm -f tools/ntdbtorture tools/ntdbtool tools/ntdbdump tools/ntdbrestore tools/ntdbbackup diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/_info b/junkcode/rusty@rustcorp.com.au-ntdb/_info new file mode 100644 index 00000000..5aedb81a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/_info @@ -0,0 +1,72 @@ +#include "config.h" +#include +#include + +/** + * ntdb - Next Generation Trivial Database + * + * This package provides an experimental persistent keyword/data store. + * Its main advantage over tdb is that it's 64-bit. + * + * Example: + * #include + * #include + * #include + * #include + * + * int main(int argc, char *argv[]) + * { + * NTDB_DATA key = ntdb_mkdata("key", 3); + * NTDB_DATA val = ntdb_mkdata("val", 3); + * struct ntdb_context *ntdb; + * + * ntdb = ntdb_open("example.ntdb", NTDB_DEFAULT, + * O_RDWR | O_CREAT | O_TRUNC, 0600, NULL); + * if (ntdb == NULL) + * errx(1, "failed to open database file"); + * + * ntdb_store(ntdb, key, val, NTDB_INSERT); + * + * ntdb_close(ntdb); + * + * return 0; + * } + * + * License: LGPL (v3 or any later version) + * Authors: Rusty Russell + * Andrew Tridgell + * Jeremy Allison + * Jelmer Vernooij + * Volker Lendecke + * Andrew Esh + * Simon McVittie + * Tim Potter + * Maintainer: Rusty Russell + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/asearch\n"); + printf("ccan/build_assert\n"); + printf("ccan/cast\n"); + printf("ccan/compiler\n"); + printf("ccan/endian\n"); + printf("ccan/hash\n"); + printf("ccan/ilog\n"); + printf("ccan/likely\n"); + printf("ccan/tally\n"); + printf("ccan/typesafe_cb\n"); + return 0; + } + + if (strcmp(argv[1], "testdepends") == 0) { + printf("ccan/failtest\n"); + printf("ccan/err\n"); + return 0; + } + + return 1; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/check.c b/junkcode/rusty@rustcorp.com.au-ntdb/check.c new file mode 100644 index 00000000..f2423945 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/check.c @@ -0,0 +1,726 @@ + /* + Trivial Database 2: free list/block handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include +#include + +/* We keep an ordered array of offsets. */ +static bool append(struct ntdb_context *ntdb, + ntdb_off_t **arr, size_t *num, ntdb_off_t off) +{ + ntdb_off_t *new; + + if (*num == 0) { + new = ntdb->alloc_fn(ntdb, sizeof(ntdb_off_t), ntdb->alloc_data); + } else { + new = ntdb->expand_fn(*arr, (*num + 1) * sizeof(ntdb_off_t), + ntdb->alloc_data); + } + if (!new) + return false; + new[(*num)++] = off; + *arr = new; + return true; +} + +static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, + ntdb_off_t *recovery, + uint64_t *features, + size_t *num_capabilities) +{ + uint64_t hash_test; + struct ntdb_header hdr; + enum NTDB_ERROR ecode; + ntdb_off_t off, next; + + ecode = ntdb_read_convert(ntdb, 0, &hdr, sizeof(hdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + /* magic food should not be converted, so convert back. */ + ntdb_convert(ntdb, hdr.magic_food, sizeof(hdr.magic_food)); + + hash_test = NTDB_HASH_MAGIC; + hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test)); + if (hdr.hash_test != hash_test) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "check: hash test %llu should be %llu", + (long long)hdr.hash_test, + (long long)hash_test); + } + + if (strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "check: bad magic '%.*s'", + (unsigned)sizeof(hdr.magic_food), + hdr.magic_food); + } + + /* Features which are used must be a subset of features offered. */ + if (hdr.features_used & ~hdr.features_offered) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "check: features used (0x%llx) which" + " are not offered (0x%llx)", + (long long)hdr.features_used, + (long long)hdr.features_offered); + } + + *features = hdr.features_offered; + *recovery = hdr.recovery; + if (*recovery) { + if (*recovery < sizeof(hdr) + || *recovery > ntdb->file->map_size) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check:" + " invalid recovery offset %zu", + (size_t)*recovery); + } + } + + for (off = hdr.capabilities; off && ecode == NTDB_SUCCESS; off = next) { + const struct ntdb_capability *cap; + enum NTDB_ERROR e; + + cap = ntdb_access_read(ntdb, off, sizeof(*cap), true); + if (NTDB_PTR_IS_ERR(cap)) { + return NTDB_PTR_ERR(cap); + } + + /* All capabilities are unknown. */ + e = unknown_capability(ntdb, "ntdb_check", cap->type); + next = cap->next; + ntdb_access_release(ntdb, cap); + if (e) + return e; + (*num_capabilities)++; + } + + /* Don't check reserved: they *can* be used later. */ + return NTDB_SUCCESS; +} + +static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b, void *ctx) +{ + /* Can overflow an int. */ + return *a > *b ? 1 + : *a < *b ? -1 + : 0; +} + +static enum NTDB_ERROR check_entry(struct ntdb_context *ntdb, + ntdb_off_t off_and_hash, + ntdb_len_t bucket, + ntdb_off_t used[], + size_t num_used, + size_t *num_found, + enum NTDB_ERROR (*check)(NTDB_DATA, + NTDB_DATA, + void *), + void *data) +{ + enum NTDB_ERROR ecode; + const struct ntdb_used_record *r; + const unsigned char *kptr; + ntdb_len_t klen, dlen; + uint32_t hash; + ntdb_off_t off = off_and_hash & NTDB_OFF_MASK; + ntdb_off_t *p; + + /* Empty bucket is fine. */ + if (!off_and_hash) { + return NTDB_SUCCESS; + } + + /* This can't point to a chain, we handled those at toplevel. */ + if (off_and_hash & (1ULL << NTDB_OFF_CHAIN_BIT)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Invalid chain bit in offset " + " %llu", (long long)off_and_hash); + } + + p = asearch(&off, used, num_used, off_cmp, NULL); + if (!p) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Invalid offset" + " %llu in hash", (long long)off); + } + /* Mark it invalid. */ + *p ^= 1; + (*num_found)++; + + r = ntdb_access_read(ntdb, off, sizeof(*r), true); + if (NTDB_PTR_IS_ERR(r)) { + return NTDB_PTR_ERR(r); + } + klen = rec_key_length(r); + dlen = rec_data_length(r); + ntdb_access_release(ntdb, r); + + kptr = ntdb_access_read(ntdb, off + sizeof(*r), klen + dlen, false); + if (NTDB_PTR_IS_ERR(kptr)) { + return NTDB_PTR_ERR(kptr); + } + + hash = ntdb_hash(ntdb, kptr, klen); + + /* Are we in the right chain? */ + if (bits_from(hash, 0, ntdb->hash_bits) != bucket) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: Bad bucket %u vs %llu", + bits_from(hash, 0, ntdb->hash_bits), + (long long)bucket); + /* Next 8 bits should be the same as top bits of bucket. */ + } else if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL) + != bits_from(off_and_hash, 64-NTDB_OFF_UPPER_STEAL, + NTDB_OFF_UPPER_STEAL)) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: Bad hash bits %llu vs %llu", + (long long)off_and_hash, + (long long)hash); + } else if (check) { + NTDB_DATA k, d; + + k = ntdb_mkdata(kptr, klen); + d = ntdb_mkdata(kptr + klen, dlen); + ecode = check(k, d, data); + } else { + ecode = NTDB_SUCCESS; + } + ntdb_access_release(ntdb, kptr); + + return ecode; +} + +static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb, + ntdb_off_t off, + ntdb_len_t bucket, + ntdb_off_t used[], + size_t num_used, + size_t *num_found, + enum NTDB_ERROR (*check)(NTDB_DATA, + NTDB_DATA, + void *), + void *data) +{ + struct ntdb_used_record rec; + enum NTDB_ERROR ecode; + const ntdb_off_t *entries; + ntdb_len_t i, num; + + /* This is a used entry. */ + (*num_found)++; + + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash chain magic %llu", + (long long)rec_magic(&rec)); + } + + if (rec_data_length(&rec) % sizeof(ntdb_off_t)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash chain data length %llu", + (long long)rec_data_length(&rec)); + } + + if (rec_key_length(&rec) != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash chain key length %llu", + (long long)rec_key_length(&rec)); + } + + off += sizeof(rec); + num = rec_data_length(&rec) / sizeof(ntdb_off_t); + entries = ntdb_access_read(ntdb, off, rec_data_length(&rec), true); + if (NTDB_PTR_IS_ERR(entries)) { + return NTDB_PTR_ERR(entries); + } + + /* Check each non-deleted entry in chain. */ + for (i = 0; i < num; i++) { + ecode = check_entry(ntdb, entries[i], bucket, + used, num_used, num_found, check, data); + if (ecode) { + break; + } + } + + ntdb_access_release(ntdb, entries); + return ecode; +} + +static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb, + ntdb_off_t used[], + size_t num_used, + size_t num_other_used, + enum NTDB_ERROR (*check)(NTDB_DATA, + NTDB_DATA, + void *), + void *data) +{ + enum NTDB_ERROR ecode; + struct ntdb_used_record rec; + const ntdb_off_t *entries; + ntdb_len_t i; + /* Free tables and capabilities also show up as used, as do we. */ + size_t num_found = num_other_used + 1; + + ecode = ntdb_read_convert(ntdb, NTDB_HASH_OFFSET, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash table magic %llu", + (long long)rec_magic(&rec)); + } + + if (rec_data_length(&rec) != (sizeof(ntdb_off_t) << ntdb->hash_bits)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash table data length %llu", + (long long)rec_data_length(&rec)); + } + + if (rec_key_length(&rec) != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash table key length %llu", + (long long)rec_key_length(&rec)); + } + + entries = ntdb_access_read(ntdb, NTDB_HASH_OFFSET + sizeof(rec), + rec_data_length(&rec), true); + if (NTDB_PTR_IS_ERR(entries)) { + return NTDB_PTR_ERR(entries); + } + + for (i = 0; i < (1 << ntdb->hash_bits); i++) { + ntdb_off_t off = entries[i] & NTDB_OFF_MASK; + if (entries[i] & (1ULL << NTDB_OFF_CHAIN_BIT)) { + ecode = check_hash_chain(ntdb, off, i, + used, num_used, &num_found, + check, data); + } else { + ecode = check_entry(ntdb, entries[i], i, + used, num_used, &num_found, + check, data); + } + if (ecode) { + break; + } + } + ntdb_access_release(ntdb, entries); + + if (ecode == NTDB_SUCCESS && num_found != num_used) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Not all entries are in hash"); + } + return ecode; +} + +static enum NTDB_ERROR check_free(struct ntdb_context *ntdb, + ntdb_off_t off, + const struct ntdb_free_record *frec, + ntdb_off_t prev, unsigned int ftable, + unsigned int bucket) +{ + enum NTDB_ERROR ecode; + + if (frec_magic(frec) != NTDB_FREE_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: offset %llu bad magic 0x%llx", + (long long)off, + (long long)frec->magic_and_prev); + } + if (frec_ftable(frec) != ftable) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: offset %llu bad freetable %u", + (long long)off, frec_ftable(frec)); + + } + + ecode = ntdb_oob(ntdb, off, + frec_len(frec) + sizeof(struct ntdb_used_record), + false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + if (size_to_bucket(frec_len(frec)) != bucket) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: offset %llu in wrong bucket" + " (%u vs %u)", + (long long)off, + bucket, size_to_bucket(frec_len(frec))); + } + if (prev && prev != frec_prev(frec)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: offset %llu bad prev" + " (%llu vs %llu)", + (long long)off, + (long long)prev, (long long)frec_len(frec)); + } + return NTDB_SUCCESS; +} + +static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb, + ntdb_off_t ftable_off, + unsigned ftable_num, + ntdb_off_t fr[], + size_t num_free, + size_t *num_found) +{ + struct ntdb_freetable ft; + ntdb_off_t h; + unsigned int i; + enum NTDB_ERROR ecode; + + ecode = ntdb_read_convert(ntdb, ftable_off, &ft, sizeof(ft)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC + || rec_key_length(&ft.hdr) != 0 + || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Invalid header on free table"); + } + + for (i = 0; i < NTDB_FREE_BUCKETS; i++) { + ntdb_off_t off, prev = 0, *p, first = 0; + struct ntdb_free_record f; + + h = bucket_off(ftable_off, i); + for (off = ntdb_read_off(ntdb, h); off; off = f.next) { + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + if (!first) { + off &= NTDB_OFF_MASK; + first = off; + } + ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + ecode = check_free(ntdb, off, &f, prev, ftable_num, i); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* FIXME: Check hash bits */ + p = asearch(&off, fr, num_free, off_cmp, NULL); + if (!p) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: Invalid offset" + " %llu in free table", + (long long)off); + } + /* Mark it invalid. */ + *p ^= 1; + (*num_found)++; + prev = off; + } + + if (first) { + /* Now we can check first back pointer. */ + ecode = ntdb_read_convert(ntdb, first, &f, sizeof(f)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + ecode = check_free(ntdb, first, &f, prev, ftable_num, i); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + } + } + return NTDB_SUCCESS; +} + +/* Slow, but should be very rare. */ +ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off) +{ + size_t len; + enum NTDB_ERROR ecode; + + for (len = 0; off + len < ntdb->file->map_size; len++) { + char c; + ecode = ntdb->io->tread(ntdb, off, &c, 1); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + if (c != 0 && c != 0x43) + break; + } + return len; +} + +static enum NTDB_ERROR check_linear(struct ntdb_context *ntdb, + ntdb_off_t **used, size_t *num_used, + ntdb_off_t **fr, size_t *num_free, + uint64_t features, ntdb_off_t recovery) +{ + ntdb_off_t off; + ntdb_len_t len; + enum NTDB_ERROR ecode; + bool found_recovery = false; + + for (off = sizeof(struct ntdb_header); + off < ntdb->file->map_size; + off += len) { + union { + struct ntdb_used_record u; + struct ntdb_free_record f; + struct ntdb_recovery_record r; + } rec; + /* r is larger: only get that if we need to. */ + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.f)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* If we crash after ftruncate, we can get zeroes or fill. */ + if (rec.r.magic == NTDB_RECOVERY_INVALID_MAGIC + || rec.r.magic == 0x4343434343434343ULL) { + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + if (recovery == off) { + found_recovery = true; + len = sizeof(rec.r) + rec.r.max_len; + } else { + len = dead_space(ntdb, off); + if (NTDB_OFF_IS_ERR(len)) { + return NTDB_OFF_TO_ERR(len); + } + if (len < sizeof(rec.r)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: invalid" + " dead space at %zu", + (size_t)off); + } + + ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, + "Dead space at %zu-%zu (of %zu)", + (size_t)off, (size_t)(off + len), + (size_t)ntdb->file->map_size); + } + } else if (rec.r.magic == NTDB_RECOVERY_MAGIC) { + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + if (recovery != off) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: unexpected" + " recovery record at offset" + " %zu", + (size_t)off); + } + if (rec.r.len > rec.r.max_len) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: invalid recovery" + " length %zu", + (size_t)rec.r.len); + } + if (rec.r.eof > ntdb->file->map_size) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: invalid old EOF" + " %zu", (size_t)rec.r.eof); + } + found_recovery = true; + len = sizeof(rec.r) + rec.r.max_len; + } else if (frec_magic(&rec.f) == NTDB_FREE_MAGIC) { + len = sizeof(rec.u) + frec_len(&rec.f); + if (off + len > ntdb->file->map_size) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: free overlength" + " %llu at offset %llu", + (long long)len, + (long long)off); + } + /* This record should be in free lists. */ + if (frec_ftable(&rec.f) != NTDB_FTABLE_NONE + && !append(ntdb, fr, num_free, off)) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, + NTDB_LOG_ERROR, + "ntdb_check: tracking %zu'th" + " free record.", *num_free); + } + } else if (rec_magic(&rec.u) == NTDB_USED_MAGIC + || rec_magic(&rec.u) == NTDB_CHAIN_MAGIC + || rec_magic(&rec.u) == NTDB_HTABLE_MAGIC + || rec_magic(&rec.u) == NTDB_FTABLE_MAGIC + || rec_magic(&rec.u) == NTDB_CAP_MAGIC) { + uint64_t klen, dlen, extra; + + /* This record is used! */ + if (!append(ntdb, used, num_used, off)) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, + NTDB_LOG_ERROR, + "ntdb_check: tracking %zu'th" + " used record.", *num_used); + } + + klen = rec_key_length(&rec.u); + dlen = rec_data_length(&rec.u); + extra = rec_extra_padding(&rec.u); + + len = sizeof(rec.u) + klen + dlen + extra; + if (off + len > ntdb->file->map_size) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: used overlength" + " %llu at offset %llu", + (long long)len, + (long long)off); + } + + if (len < sizeof(rec.f)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: too short record" + " %llu at %llu", + (long long)len, + (long long)off); + } + + /* Check that records have correct 0 at end (but may + * not in future). */ + if (extra && !features + && rec_magic(&rec.u) != NTDB_CAP_MAGIC) { + const char *p; + char c; + p = ntdb_access_read(ntdb, off + sizeof(rec.u) + + klen + dlen, 1, false); + if (NTDB_PTR_IS_ERR(p)) + return NTDB_PTR_ERR(p); + c = *p; + ntdb_access_release(ntdb, p); + + if (c != '\0') { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check:" + " non-zero extra" + " at %llu", + (long long)off); + } + } + } else { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: Bad magic 0x%llx" + " at offset %zu", + (long long)rec_magic(&rec.u), + (size_t)off); + } + } + + /* We must have found recovery area if there was one. */ + if (recovery != 0 && !found_recovery) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: expected a recovery area at %zu", + (size_t)recovery); + } + + return NTDB_SUCCESS; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb, + enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *), + void *data) +{ + ntdb_off_t *fr = NULL, *used = NULL; + ntdb_off_t ft = 0, recovery = 0; + size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0, + num_capabilities = 0; + uint64_t features = 0; + enum NTDB_ERROR ecode; + + if (ntdb->flags & NTDB_CANT_CHECK) { + return ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, + "ntdb_check: database has unknown capability," + " cannot check."); + } + + ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + ecode = ntdb_lock_expand(ntdb, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + ntdb_allrecord_unlock(ntdb, F_RDLCK); + return ecode; + } + + ecode = check_header(ntdb, &recovery, &features, &num_capabilities); + if (ecode != NTDB_SUCCESS) + goto out; + + /* First we do a linear scan, checking all records. */ + ecode = check_linear(ntdb, &used, &num_used, &fr, &num_free, features, + recovery); + if (ecode != NTDB_SUCCESS) + goto out; + + for (ft = first_ftable(ntdb); ft; ft = next_ftable(ntdb, ft)) { + if (NTDB_OFF_IS_ERR(ft)) { + ecode = NTDB_OFF_TO_ERR(ft); + goto out; + } + ecode = check_free_table(ntdb, ft, num_ftables, fr, num_free, + &num_found); + if (ecode != NTDB_SUCCESS) + goto out; + num_ftables++; + } + + /* FIXME: Check key uniqueness? */ + ecode = check_hash(ntdb, used, num_used, num_ftables + num_capabilities, + check, data); + if (ecode != NTDB_SUCCESS) + goto out; + + if (num_found != num_free) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Not all entries are in" + " free table"); + } + +out: + ntdb_allrecord_unlock(ntdb, F_RDLCK); + ntdb_unlock_expand(ntdb, F_RDLCK); + ntdb->free_fn(fr, ntdb->alloc_data); + ntdb->free_fn(used, ntdb->alloc_data); + return ecode; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/doc/TDB_porting.txt b/junkcode/rusty@rustcorp.com.au-ntdb/doc/TDB_porting.txt new file mode 100644 index 00000000..5daf94b7 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/doc/TDB_porting.txt @@ -0,0 +1,483 @@ +Interface differences between TDB and NTDB. + +- ntdb shares 'struct TDB_DATA' with tdb, but TDB defines the TDB_DATA + typedef, whereas ntdb defines NTDB_DATA (ie. both are compatible). + If you include both ntdb.h and tdb.h, #include tdb.h first, + otherwise you'll get a compile error when tdb.h re-defined struct + TDB_DATA. + + Example: + #include + #include + +- ntdb functions return NTDB_SUCCESS (ie 0) on success, and a negative + error on failure, whereas tdb functions returned 0 on success, and + -1 on failure. tdb then used tdb_error() to determine the error; + this API is nasty if we ever want to support threads, so is not supported. + + Example: + #include + #include + + void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d) + { + if (tdb_store(tdb, key, d) == -1) { + printf("store failed: %s\n", tdb_errorstr(tdb)); + } + } + + void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d) + { + enum NTDB_ERROR e; + + e = ntdb_store(ntdb, key, d); + if (e) { + printf("store failed: %s\n", ntdb_errorstr(e)); + } + } + +- ntdb's ntdb_fetch() returns an error, tdb's returned the data directly + (or tdb_null, and you were supposed to check tdb_error() to find out why). + + Example: + #include + #include + + void tdb_example(struct tdb_context *tdb, TDB_DATA key) + { + TDB_DATA data; + + data = tdb_fetch(tdb, key); + if (!data.dptr) { + printf("fetch failed: %s\n", tdb_errorstr(tdb)); + } + } + + void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key) + { + NTDB_DATA data; + enum NTDB_ERROR e; + + e = ntdb_fetch(ntdb, key, &data); + if (e) { + printf("fetch failed: %s\n", ntdb_errorstr(e)); + } + } + +- ntdb's ntdb_nextkey() frees the old key's dptr, in tdb you needed to do + this manually. + + Example: + #include + #include + + void tdb_example(struct tdb_context *tdb) + { + TDB_DATA key, next, data; + + for (key = tdb_firstkey(tdb); key.dptr; key = next) { + printf("Got key!\n"); + next = tdb_nextkey(tdb, key); + free(key.dptr); + } + } + + + void ntdb_example(struct ntdb_context *ntdb) + { + NTDB_DATA k, data; + enum NTDB_ERROR e; + + for (e = ntdb_firstkey(ntdb,&k); !e; e = ntdb_nextkey(ntdb,&k)) + printf("Got key!\n"); + } + +- Unlike tdb_open/tdb_open_ex, ntdb_open does not allow NULL names, + even for NTDB_INTERNAL dbs, and thus ntdb_name() never returns NULL. + + Example: + #include + #include + + struct tdb_context *tdb_example(void) + { + return tdb_open(NULL, 0, TDB_INTERNAL, O_RDWR, 0); + } + + struct ntdb_context *ntdb_example(void) + { + return ntdb_open("example", NTDB_INTERNAL, O_RDWR, 0); + } + +- ntdb uses a linked list of attribute structures to implement logging and + alternate hashes. tdb used tdb_open_ex, which was not extensible. + + Example: + #include + #include + + /* Custom hash function */ + static unsigned int my_tdb_hash_func(TDB_DATA *key) + { + return key->dsize; + } + + struct tdb_context *tdb_example(void) + { + return tdb_open_ex("example.tdb", 0, TDB_DEFAULT, + O_CREAT|O_RDWR, 0600, NULL, my_hash_func); + } + + /* Custom hash function */ + static unsigned int my_ntdb_hash_func(const void *key, size_t len, + uint32_t seed, void *data) + { + return len; + } + + struct ntdb_context *ntdb_example(void) + { + union ntdb_attribute hash; + + hash.base.attr = NTDB_ATTRIBUTE_HASH; + hash.base.next = NULL; + hash.hash.fn = my_ntdb_hash_func; + return ntdb_open("example.ntdb", NTDB_DEFAULT, + O_CREAT|O_RDWR, 0600, &hash); + } + +- tdb's tdb_open/tdb_open_ex took an explicit hash size, defaulting to + 131. ntdb's uses an attribute for this, defaulting to 8192. + + Example: + #include + #include + + struct tdb_context *tdb_example(void) + { + return tdb_open("example.tdb", 10007, TDB_DEFAULT, + O_CREAT|O_RDWR, 0600); + } + + struct ntdb_context *ntdb_example(void) + { + union ntdb_attribute hashsize; + + hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE; + hashsize.base.next = NULL; + hashsize.hashsize.size = 16384; + return ntdb_open("example.ntdb", NTDB_DEFAULT, + O_CREAT|O_RDWR, 0600, &hashsize); + } + +- ntdb's log function is simpler than tdb's log function. The string + is already formatted, is not terminated by a '\n', and it takes an + enum ntdb_log_level not a tdb_debug_level, and which has only three + values: NTDB_LOG_ERROR, NTDB_LOG_USE_ERROR and NTDB_LOG_WARNING. + + #include + #include + + static void tdb_log(struct tdb_context *tdb, + enum tdb_debug_level level, const char *fmt, ...) + { + va_list ap; + const char *name; + + switch (level) { + case TDB_DEBUG_FATAL: + fprintf(stderr, "FATAL: "); + break; + case TDB_DEBUG_ERROR: + fprintf(stderr, "ERROR: "); + break; + case TDB_DEBUG_WARNING: + fprintf(stderr, "WARNING: "); + break; + case TDB_DEBUG_TRACE: + /* Don't print out tracing. */ + return; + } + + name = tdb_name(tdb); + if (!name) { + name = "unnamed"; + } + + fprintf(stderr, "tdb(%s):", name); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } + + struct tdb_context *tdb_example(void) + { + struct tdb_logging_context lctx; + + lctx.log_fn = tdb_log; + return tdb_open_ex("example.tdb", 0, TDB_DEFAULT, + O_CREAT|O_RDWR, 0600, &lctx, NULL); + } + + static void ntdb_log(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) + { + switch (level) { + case NTDB_LOG_ERROR: + fprintf(stderr, "ERROR: "); + break; + case NTDB_LOG_USE_ERROR: + /* We made a mistake, so abort. */ + abort(); + break; + case NTDB_LOG_WARNING: + fprintf(stderr, "WARNING: "); + break; + } + + fprintf(stderr, "ntdb(%s):%s:%s\n", + ntdb_name(ntdb), ntdb_errorstr(ecode), message); + } + + struct ntdb_context *ntdb_example(void) + { + union ntdb_attribute log; + + log.base.attr = NTDB_ATTRIBUTE_LOG; + log.base.next = NULL; + log.log.fn = ntdb_log; + return ntdb_open("example.ntdb", NTDB_DEFAULT, + O_CREAT|O_RDWR, 0600, &log); + } + +- ntdb provides ntdb_deq() for comparing two NTDB_DATA, and ntdb_mkdata() for + creating an NTDB_DATA. + + #include + #include + + void tdb_example(struct tdb_context *tdb) + { + TDB_DATA data, key; + + key.dsize = strlen("hello"); + key.dptr = "hello"; + data = tdb_fetch(tdb, key); + if (data.dsize == key.dsize + && !memcmp(data.dptr, key.dptr, key.dsize)) + printf("key is same as data\n"); + } + free(data.dptr); + } + + void ntdb_example(struct ntdb_context *ntdb) + { + NTDB_DATA data, key; + + key = ntdb_mkdata("hello", strlen("hello")); + if (ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS) { + if (ntdb_deq(key, data)) { + printf("key is same as data\n"); + } + free(data.dptr); + } + } + +- ntdb's ntdb_parse_record() takes a type-checked callback data + pointer, not a void * (though a void * pointer still works). The + callback function is allowed to do read operations on the database, + or write operations if you first call ntdb_lockall(). TDB's + tdb_parse_record() did not allow any database access within the + callback, could crash if you tried. + + Example: + #include + #include + + static int tdb_parser(TDB_DATA key, TDB_DATA data, void *private_data) + { + TDB_DATA *expect = private_data; + + return data.dsize == expect->dsize + && !memcmp(data.dptr, expect->dptr, data.dsize); + } + + void tdb_example(struct tdb_context *tdb, TDB_DATA key, NTDB_DATA d) + { + switch (tdb_parse_record(tdb, key, tdb_parser, &d)) { + case -1: + printf("parse failed: %s\n", tdb_errorstr(tdb)); + break; + case 0: + printf("data was different!\n"); + break; + case 1: + printf("data was same!\n"); + break; + } + } + + static int ntdb_parser(TDB_DATA key, TDB_DATA data, TDB_DATA *expect) + { + return ntdb_deq(data, *expect); + } + + void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d) + { + enum NTDB_ERROR e; + + e = tdb_parse_record(tdb, key, tdb_parser, &d); + switch (e) { + case 0: + printf("data was different!\n"); + break; + case 1: + printf("data was same!\n"); + break; + default: + printf("parse failed: %s\n", ntdb_errorstr(e)); + break; + } + } + +- ntdb does locking on read-only databases (ie. O_RDONLY passed to ntdb_open). + tdb did not: use the NTDB_NOLOCK flag if you want to suppress locking. + + Example: + #include + #include + + struct tdb_context *tdb_example(void) + { + return tdb_open("example.tdb", 0, TDB_DEFAULT, O_RDONLY, 0); + } + + struct ntdb_context *ntdb_example(void) + { + return ntdb_open("example.ntdb", NTDB_NOLOCK, O_RDONLY, NULL); + } + +- Failure inside a transaction (such as a lock function failing) does + not implicitly cancel the transaction; you still need to call + ntdb_transaction_cancel(). + + #include + #include + + void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d) + { + if (tdb_transaction_start(tdb) == -1) { + printf("transaction failed: %s\n", tdb_errorstr(tdb)); + return; + } + + if (tdb_store(tdb, key, d) == -1) { + printf("store failed: %s\n", tdb_errorstr(tdb)); + return; + } + if (tdb_transaction_commit(tdb) == -1) { + printf("commit failed: %s\n", tdb_errorstr(tdb)); + } + } + + void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d) + { + enum NTDB_ERROR e; + + e = ntdb_transaction_start(ntdb); + if (e) { + printf("transaction failed: %s\n", ntdb_errorstr(e)); + return; + } + + e = ntdb_store(ntdb, key, d); + if (e) { + printf("store failed: %s\n", ntdb_errorstr(e)); + ntdb_transaction_cancel(ntdb); + } + + e = ntdb_transaction_commit(ntdb); + if (e) { + printf("commit failed: %s\n", ntdb_errorstr(e)); + } + } + +- There is no NTDB_CLEAR_IF_FIRST flag; it has severe scalability and + API problems. If necessary, you can emulate this by using the open + hook and placing a 1-byte lock at offset 4. If your program forks + and exits, you will need to place this lock again in the child before + the parent exits. + + Example: + + #include + #include + + struct tdb_context *tdb_example(void) + { + return tdb_open("example.tdb", 0, TDB_CLEAR_IF_FIRST, + O_CREAT|O_RDWR, 0600); + } + + static enum NTDB_ERROR clear_if_first(int fd, void *unused) + { + /* We hold a lock offset 4 always, so we can tell if + * anyone else is. */ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 4; /* ACTIVE_LOCK */ + fl.l_len = 1; + + if (fcntl(fd, F_SETLK, &fl) == 0) { + /* We must be first ones to open it! Clear it. */ + if (ftruncate(fd, 0) != 0) { + return NTDB_ERR_IO; + } + } + fl.l_type = F_RDLCK; + if (fcntl(fd, F_SETLKW, &fl) != 0) { + return NTDB_ERR_IO; + } + return NTDB_SUCCESS; + } + + struct ntdb_context *ntdb_example(void) + { + union ntdb_attribute open_attr; + + open_attr.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK; + open_attr.openhook.base.next = NULL; + open_attr.openhook.fn = clear_if_first; + + return ntdb_open("example.ntdb", NTDB_DEFAULT, + O_CREAT|O_RDWR, 0600, &open_attr); + } + +- ntdb traversals are not reliable if the database is changed during + the traversal, ie your traversal may not cover all elements, or may + cover elements multiple times. As a special exception, deleting the + current record within ntdb_traverse() is reliable. + +- There is no ntdb_traverse_read, since ntdb_traverse does not hold + a lock across the entire traversal anyway. If you want to make sure + that your traversal function does not write to the database, you can + set and clear the NTDB_RDONLY flag around the traversal. + +- ntdb does not need tdb_reopen() or tdb_reopen_all(). If you call + fork() after during certain operations the child should close the + ntdb, or complete the operations before continuing to use the tdb: + + ntdb_transaction_start(): child must ntdb_transaction_cancel() + ntdb_lockall(): child must call ntdb_unlockall() + ntdb_lockall_read(): child must call ntdb_unlockall_read() + ntdb_chainlock(): child must call ntdb_chainunlock() + ntdb_parse() callback: child must return from ntdb_parse() + +- ntdb will not open a non-ntdb file, even if O_CREAT is specified. tdb + will overwrite an unknown file in that case. diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.lyx b/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.lyx new file mode 100644 index 00000000..5a10ee35 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.lyx @@ -0,0 +1,2727 @@ +#LyX 2.0 created this file. For more info see http://www.lyx.org/ +\lyxformat 413 +\begin_document +\begin_header +\textclass article +\use_default_options true +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding auto +\fontencoding global +\font_roman default +\font_sans default +\font_typewriter default +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 +\font_tt_scale 100 + +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\paperfontsize default +\use_hyperref false +\papersize default +\use_geometry false +\use_amsmath 1 +\use_esint 1 +\use_mhchem 1 +\use_mathdots 1 +\cite_engine basic +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\use_refstyle 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\quotes_language english +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes true +\output_changes true +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Title +NTDB: Redesigning The Trivial DataBase +\end_layout + +\begin_layout Author +Rusty Russell, IBM Corporation +\end_layout + +\begin_layout Date +19 June 2012 +\end_layout + +\begin_layout Abstract +The Trivial DataBase on-disk format is 32 bits; with usage cases heading + towards the 4G limit, that must change. + This required breakage provides an opportunity to revisit TDB's other design + decisions and reassess them. +\end_layout + +\begin_layout Section +Introduction +\end_layout + +\begin_layout Standard +The Trivial DataBase was originally written by Andrew Tridgell as a simple + key/data pair storage system with the same API as dbm, but allowing multiple + readers and writers while being small enough (< 1000 lines of C) to include + in SAMBA. + The simple design created in 1999 has proven surprisingly robust and performant +, used in Samba versions 3 and 4 as well as numerous other projects. + Its useful life was greatly increased by the (backwards-compatible!) addition + of transaction support in 2005. +\end_layout + +\begin_layout Standard +The wider variety and greater demands of TDB-using code has lead to some + organic growth of the API, as well as some compromises on the implementation. + None of these, by themselves, are seen as show-stoppers, but the cumulative + effect is to a loss of elegance over the initial, simple TDB implementation. + Here is a table of the approximate number of lines of implementation code + and number of API functions at the end of each year: +\end_layout + +\begin_layout Standard +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +Year End +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +API Functions +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +Lines of C Code Implementation +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +1999 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +13 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1195 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2000 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +24 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1725 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2001 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +32 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2228 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2002 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2481 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2003 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2552 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2004 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +40 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2584 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2005 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +38 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2647 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2006 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +52 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3754 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2007 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +66 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4398 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2008 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +71 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4768 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2009 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +73 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +5715 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +This review is an attempt to catalog and address all the known issues with + TDB and create solutions which address the problems without significantly + increasing complexity; all involved are far too aware of the dangers of + second system syndrome in rewriting a successful project like this. +\end_layout + +\begin_layout Standard +Note: the final decision was to make ntdb a separate library, with a separarate + 'ntdb' namespace so both can potentially be linked together. + This document still refers to +\begin_inset Quotes eld +\end_inset + +tdb +\begin_inset Quotes erd +\end_inset + + everywhere, for simplicity. +\end_layout + +\begin_layout Section +API Issues +\end_layout + +\begin_layout Subsection +tdb_open_ex Is Not Expandable +\end_layout + +\begin_layout Standard +The tdb_open() call was expanded to tdb_open_ex(), which added an optional + hashing function and an optional logging function argument. + Additional arguments to open would require the introduction of a tdb_open_ex2 + call etc. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\begin_inset CommandInset label +LatexCommand label +name "attributes" + +\end_inset + + +\end_layout + +\begin_layout Standard +tdb_open() will take a linked-list of attributes: +\end_layout + +\begin_layout LyX-Code +enum tdb_attribute { +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_LOG = 0, +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_HASH = 1 +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_base { +\end_layout + +\begin_layout LyX-Code + enum tdb_attribute attr; +\end_layout + +\begin_layout LyX-Code + union tdb_attribute *next; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_log { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ +\end_layout + +\begin_layout LyX-Code + tdb_log_func log_fn; +\end_layout + +\begin_layout LyX-Code + void *log_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_hash { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ +\end_layout + +\begin_layout LyX-Code + tdb_hash_func hash_fn; +\end_layout + +\begin_layout LyX-Code + void *hash_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +union tdb_attribute { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_log log; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_hash hash; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +This allows future attributes to be added, even if this expands the size + of the union. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_traverse Makes Impossible Guarantees +\end_layout + +\begin_layout Standard +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it + was thought that it was important to guarantee that all records which exist + at the start and end of the traversal would be included, and no record + would be included twice. +\end_layout + +\begin_layout Standard +This adds complexity (see +\begin_inset CommandInset ref +LatexCommand ref +reference "Reliable-Traversal-Adds" + +\end_inset + +) and does not work anyway for records which are altered (in particular, + those which are expanded may be effectively deleted and re-added behind + the traversal). +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "traverse-Proposed-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Abandon the guarantee. + You will see every record if no changes occur during your traversal, otherwise + you will see some subset. + You can prevent changes by using a transaction or the locking API. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. + Delete-during-traverse will still delete every record, too (assuming no + other changes). +\end_layout + +\begin_layout Subsection +Nesting of Transactions Is Fraught +\end_layout + +\begin_layout Standard +TDB has alternated between allowing nested transactions and not allowing + them. + Various paths in the Samba codebase assume that transactions will nest, + and in a sense they can: the operation is only committed to disk when the + outer transaction is committed. + There are two problems, however: +\end_layout + +\begin_layout Enumerate +Canceling the inner transaction will cause the outer transaction commit + to fail, and will not undo any operations since the inner transaction began. + This problem is soluble with some additional internal code. +\end_layout + +\begin_layout Enumerate +An inner transaction commit can be cancelled by the outer transaction. + This is desirable in the way which Samba's database initialization code + uses transactions, but could be a surprise to any users expecting a successful + transaction commit to expose changes to others. +\end_layout + +\begin_layout Standard +The current solution is to specify the behavior at tdb_open(), with the + default currently that nested transactions are allowed. + This flag can also be changed at runtime. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Given the usage patterns, it seems that the +\begin_inset Quotes eld +\end_inset + +least-surprise +\begin_inset Quotes erd +\end_inset + + behavior of disallowing nested transactions should become the default. + Additionally, it seems the outer transaction is the only code which knows + whether inner transactions should be allowed, so a flag to indicate this + could be added to tdb_transaction_start. + However, this behavior can be simulated with a wrapper which uses tdb_add_flags +() and tdb_remove_flags(), so the API should not be expanded for this relatively +-obscure case. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete; the nesting flag has been removed. +\end_layout + +\begin_layout Subsection +Incorrect Hash Function is Not Detected +\end_layout + +\begin_layout Standard +tdb_open_ex() allows the calling code to specify a different hash function + to use, but does not check that all other processes accessing this tdb + are using the same hash function. + The result is that records are missing from tdb_fetch(). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain an example hash result (eg. + the hash of 0xdeadbeef), and tdb_open_ex() should check that the given + hash function produces the same answer, or fail the tdb_open call. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_set_max_dead/TDB_VOLATILE Expose Implementation +\end_layout + +\begin_layout Standard +In response to scalability issues with the free list ( +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Freelist-Is" + +\end_inset + +) two API workarounds have been incorporated in TDB: tdb_set_max_dead() + and the TDB_VOLATILE flag to tdb_open. + The latter actually calls the former with an argument of +\begin_inset Quotes eld +\end_inset + +5 +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +This code allows deleted records to accumulate without putting them in the + free list. + On delete we iterate through each chain and free them in a batch if there + are more than max_dead entries. + These are never otherwise recycled except as a side-effect of a tdb_repack. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With the scalability problems of the freelist solved, this API can be removed. + The TDB_VOLATILE flag may still be useful as a hint that store and delete + of records will be at least as common as fetch in order to allow some internal + tuning, but initially will become a no-op. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. + Unknown flags cause tdb_open() to fail as well, so they can be detected + at runtime. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Files-Cannot" + +\end_inset + +TDB Files Cannot Be Opened Multiple Times In The Same Process +\end_layout + +\begin_layout Standard +No process can open the same TDB twice; we check and disallow it. + This is an unfortunate side-effect of fcntl locks, which operate on a per-file + rather than per-file-descriptor basis, and do not nest. + Thus, closing any file descriptor on a file clears all the locks obtained + by this process, even if they were placed using a different file descriptor! +\end_layout + +\begin_layout Standard +Note that even if this were solved, deadlock could occur if operations were + nested: this is a more manageable programming error in most cases. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could lobby POSIX to fix the perverse rules, or at least lobby Linux + to violate them so that the most common implementation does not have this + restriction. + This would be a generally good idea for other fcntl lock users. +\end_layout + +\begin_layout Standard +Samba uses a wrapper which hands out the same tdb_context to multiple callers + if this happens, and does simple reference counting. + We should do this inside the tdb library, which already emulates lock nesting + internally; it would need to recognize when deadlock occurs within a single + process. + This would create a new failure mode for tdb operations (while we currently + handle locking failures, they are impossible in normal use and a process + encountering them can do little but give up). +\end_layout + +\begin_layout Standard +I do not see benefit in an additional tdb_open flag to indicate whether + re-opening is allowed, as though there may be some benefit to adding a + call to detect when a tdb_context is shared, to allow other to create such + an API. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB API Is Not POSIX Thread-safe +\end_layout + +\begin_layout Standard +The TDB API uses an error code which can be queried after an operation to + determine what went wrong. + This programming model does not work with threads, unless specific additional + guarantees are given by the implementation. + In addition, even otherwise-independent threads cannot open the same TDB + (as in +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Files-Cannot" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Reachitecting the API to include a tdb_errcode pointer would be a great + deal of churn, but fortunately most functions return 0 on success and -1 + on error: we can change these to return 0 on success and a negative error + code on error, and the API remains similar to previous. + The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA + pointer and return an error code. + It is also simpler to have tdb_nextkey replace its key argument in place, + freeing up any old .dptr. +\end_layout + +\begin_layout Standard +Internal locking is required to make sure that fcntl locks do not overlap + between threads, and also that the global list of tdbs is maintained. +\end_layout + +\begin_layout Standard +The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe + version of the library, and otherwise no overhead will exist. + Alternatively, a hooking mechanism similar to that proposed for +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + could be used to enable pthread locking at runtime. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete; API has been changed but thread safety has not been implemented. +\end_layout + +\begin_layout Subsection +*_nonblock Functions And *_mark Functions Expose Implementation +\end_layout + +\begin_layout Standard +CTDB +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +Clustered TDB, see http://ctdb.samba.org +\end_layout + +\end_inset + + wishes to operate on TDB in a non-blocking manner. + This is currently done as follows: +\end_layout + +\begin_layout Enumerate +Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). + If this fails: +\end_layout + +\begin_layout Enumerate +Fork a child process, and wait for it to call the normal variant (eg. + tdb_lockall). +\end_layout + +\begin_layout Enumerate +If the child succeeds, call the _mark variant to indicate we already have + the locks (eg. + tdb_lockall_mark). +\end_layout + +\begin_layout Enumerate +Upon completion, tell the child to release the locks (eg. + tdb_unlockall). +\end_layout + +\begin_layout Enumerate +Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). +\end_layout + +\begin_layout Standard +There are several issues with this approach. + Firstly, adding two new variants of each function clutters the API for + an obscure use, and so not all functions have three variants. + Secondly, it assumes that all paths of the functions ask for the same locks, + otherwise the parent process will have to get a lock which the child doesn't + have under some circumstances. + I don't believe this is currently the case, but it constrains the implementatio +n. +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "Proposed-Solution-locking-hook" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Implement a hook for locking methods, so that the caller can control the + calls to create and remove fcntl locks. + In this scenario, ctdbd would operate as follows: +\end_layout + +\begin_layout Enumerate +Call the normal API function, eg tdb_lockall(). +\end_layout + +\begin_layout Enumerate +When the lock callback comes in, check if the child has the lock. + Initially, this is always false. + If so, return 0. + Otherwise, try to obtain it in non-blocking mode. + If that fails, return EWOULDBLOCK. +\end_layout + +\begin_layout Enumerate +Release locks in the unlock callback as normal. +\end_layout + +\begin_layout Enumerate +If tdb_lockall() fails, see if we recorded a lock failure; if so, call the + child to repeat the operation. +\end_layout + +\begin_layout Enumerate +The child records what locks it obtains, and returns that information to + the parent. +\end_layout + +\begin_layout Enumerate +When the child has succeeded, goto 1. +\end_layout + +\begin_layout Standard +This is flexible enough to handle any potential locking scenario, even when + lock requirements change. + It can be optimized so that the parent does not release locks, just tells + the child which locks it doesn't need to obtain. +\end_layout + +\begin_layout Standard +It also keeps the complexity out of the API, and in ctdbd where it is needed. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_chainlock Functions Expose Implementation +\end_layout + +\begin_layout Standard +tdb_chainlock locks some number of records, including the record indicated + by the given key. + This gave atomicity guarantees; no-one can start a transaction, alter, + read or delete that key while the lock is held. +\end_layout + +\begin_layout Standard +It also makes the same guarantee for any other key in the chain, which is + an internal implementation detail and potentially a cause for deadlock. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + It would be nice to have an explicit single entry lock which effected no + other keys. + Unfortunately, this won't work for an entry which doesn't exist. + Thus while chainlock may be implemented more efficiently for the existing + case, it will still have overlap issues with the non-existing case. + So it is best to keep the current (lack of) guarantee about which records + will be effected to avoid constraining our implementation. +\end_layout + +\begin_layout Subsection +Signal Handling is Not Race-Free +\end_layout + +\begin_layout Standard +The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate + that the tdb locking code should return with a failure, rather than trying + again when a signal is received (and errno == EAGAIN). + This is usually used to implement timeouts. +\end_layout + +\begin_layout Standard +Unfortunately, this does not work in the case where the signal is received + before the tdb code enters the fcntl() call to place the lock: the code + will sleep within the fcntl() code, unaware that the signal wants it to + exit. + In the case of long timeouts, this does not happen in practice. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The locking hooks proposed in +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + would allow the user to decide on whether to fail the lock acquisition + on a signal. + This allows the caller to choose their own compromise: they could narrow + the race by checking immediately before the fcntl call. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +It may be possible to make this race-free in some implementations by having + the signal handler alter the struct flock to make it invalid. + This will cause the fcntl() lock call to fail with EINVAL if the signal + occurs before the kernel is entered, otherwise EAGAIN. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +The API Uses Gratuitous Typedefs, Capitals +\end_layout + +\begin_layout Standard +typedefs are useful for providing source compatibility when types can differ + across implementations, or arguably in the case of function pointer definitions + which are hard for humans to parse. + Otherwise it is simply obfuscation and pollutes the namespace. +\end_layout + +\begin_layout Standard +Capitalization is usually reserved for compile-time constants and macros. +\end_layout + +\begin_layout Description +TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the + definition isn't visible to the API user anyway. +\end_layout + +\begin_layout Description +TDB_DATA There is no reason to use this over struct TDB_DATA; the struct + needs to be understood by the API user. +\end_layout + +\begin_layout Description +struct +\begin_inset space ~ +\end_inset + +TDB_DATA This would normally be called 'struct tdb_data'. +\end_layout + +\begin_layout Description +enum +\begin_inset space ~ +\end_inset + +TDB_ERROR Similarly, this would normally be enum tdb_error. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + Introducing lower case variants would please pedants like myself, but if + it were done the existing ones should be kept. + There is little point forcing a purely cosmetic change upon tdb users. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "tdb_log_func-Doesnt-Take" + +\end_inset + +tdb_log_func Doesn't Take The Private Pointer +\end_layout + +\begin_layout Standard +For API compatibility reasons, the logging function needs to call tdb_get_loggin +g_private() to retrieve the pointer registered by the tdb_open_ex for logging. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +It should simply take an extra argument, since we are prepared to break + the API/ABI. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Various Callback Functions Are Not Typesafe +\end_layout + +\begin_layout Standard +The callback functions in tdb_set_logging_function (after +\begin_inset CommandInset ref +LatexCommand ref +reference "tdb_log_func-Doesnt-Take" + +\end_inset + + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check + all take void * and must internally convert it to the argument type they + were expecting. +\end_layout + +\begin_layout Standard +If this type changes, the compiler will not produce warnings on the callers, + since it only sees void *. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With careful use of macros, we can create callback functions which give + a warning when used on gcc and the types of the callback and its private + argument differ. + Unsupported compilers will not give a warning, which is no worse than now. + In addition, the callbacks become clearer, as they need not use void * + for their parameter. +\end_layout + +\begin_layout Standard +See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic +\end_layout + +\begin_layout Standard +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should + be cleared if the caller discovers it is the only process with the TDB + open. + However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not + be detected, so will have the TDB erased underneath them (usually resulting + in a crash). +\end_layout + +\begin_layout Standard +There is a similar issue on fork(); if the parent exits (or otherwise closes + the tdb) before the child calls tdb_reopen_all() to establish the lock + used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener + at that moment will believe it alone has opened the TDB and will erase + it. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove TDB_CLEAR_IF_FIRST. + Other workarounds are possible, but see +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. + An open hook is provided to replicate this functionality if required. +\end_layout + +\begin_layout Subsection +Extending The Header Is Difficult +\end_layout + +\begin_layout Standard +We have reserved (zeroed) words in the TDB header, which can be used for + future features. + If the future features are compulsory, the version number must be updated + to prevent old code from accessing the database. + But if the future feature is optional, we have no way of telling if older + code is accessing the database or not. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain a +\begin_inset Quotes eld +\end_inset + +format variant +\begin_inset Quotes erd +\end_inset + + value (64-bit). + This is divided into two 32-bit parts: +\end_layout + +\begin_layout Enumerate +The lower part reflects the format variant understood by code accessing + the database. +\end_layout + +\begin_layout Enumerate +The upper part reflects the format variant you must understand to write + to the database (otherwise you can only open for reading). +\end_layout + +\begin_layout Standard +The latter field can only be written at creation time, the former should + be written under the OPEN_LOCK when opening the database for writing, if + the variant of the code is lower than the current lowest variant. +\end_layout + +\begin_layout Standard +This should allow backwards-compatible features to be added, and detection + if older code (which doesn't understand the feature) writes to the database. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Record Headers Are Not Expandible +\end_layout + +\begin_layout Standard +If we later want to add (say) checksums on keys and data, it would require + another format change, which we'd like to avoid. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We often have extra padding at the tail of a record. + If we ensure that the first byte (if any) of this padding is zero, we will + have a way for future changes to detect code which doesn't understand a + new format: the new code would write (say) a 1 at the tail, and thus if + there is no tail or the first byte is 0, we would know the extension is + not present on that record. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Does Not Use Talloc +\end_layout + +\begin_layout Standard +Many users of TDB (particularly Samba) use the talloc allocator, and thus + have to wrap TDB in a talloc context to use it conveniently. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The allocation within TDB is not complicated enough to justify the use of + talloc, and I am reluctant to force another (excellent) library on TDB + users. + Nonetheless a compromise is possible. + An attribute (see +\begin_inset CommandInset ref +LatexCommand ref +reference "attributes" + +\end_inset + +) can be added later to tdb_open() to provide an alternate allocation mechanism, + specifically for talloc but usable by any other allocator (which would + ignore the +\begin_inset Quotes eld +\end_inset + +context +\begin_inset Quotes erd +\end_inset + + argument). +\end_layout + +\begin_layout Standard +This would form a talloc heirarchy as expected, but the caller would still + have to attach a destructor to the tdb context returned from tdb_open to + close it. + All TDB_DATA fields would be children of the tdb_context, and the caller + would still have to manage them (using talloc_free() or talloc_steal()). +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute. +\end_layout + +\begin_layout Section +Performance And Scalability Issues +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +TDB_CLEAR_IF_FIRST Imposes Performance Penalty +\end_layout + +\begin_layout Standard +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset + 4 (aka. + the ACTIVE_LOCK). + While these locks never conflict in normal tdb usage, they do add substantial + overhead for most fcntl lock implementations when the kernel scans to detect + if a lock conflict exists. + This is often a single linked list, making the time to acquire and release + a fcntl lock O(N) where N is the number of processes with the TDB open, + not the number actually doing work. +\end_layout + +\begin_layout Standard +In a Samba server it is common to have huge numbers of clients sitting idle, + and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +There is a flag to tdb_reopen_all() which is used for this optimization: + if the parent process will outlive the child, the child does not need the + ACTIVE_LOCK. + This is a workaround for this very performance issue. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove the flag. + It was a neat idea, but even trivial servers tend to know when they are + initializing for the first time and can simply unlink the old tdb at that + point. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Files Have a 4G Limit +\end_layout + +\begin_layout Standard +This seems to be becoming an issue (so much for +\begin_inset Quotes eld +\end_inset + +trivial +\begin_inset Quotes erd +\end_inset + +!), particularly for ldb. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +A new, incompatible TDB format which uses 64 bit offsets internally rather + than 32 bit as now. + For simplicity of endian conversion (which TDB does on the fly if required), + all values will be 64 bit on disk. + In practice, some upper bits may be used for other purposes, but at least + 56 bits will be available for file offsets. +\end_layout + +\begin_layout Standard +tdb_open() will automatically detect the old version, and even create them + if TDB_VERSION6 is specified to tdb_open. +\end_layout + +\begin_layout Standard +32 bit processes will still be able to access TDBs larger than 4G (assuming + that their off_t allows them to seek to 64 bits), they will gracefully + fall back as they fail to mmap. + This can happen already with large TDBs. +\end_layout + +\begin_layout Standard +Old versions of tdb will fail to open the new TDB files (since 28 August + 2009, commit 398d0c29290: prior to that any unrecognized file format would + be erased and initialized as a fresh tdb!) +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Records Have a 4G Limit +\end_layout + +\begin_layout Standard +This has not been a reported problem, and the API uses size_t which can + be 64 bit on 64 bit platforms. + However, other limits may have made such an issue moot. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Record sizes will be 64 bit, with an error returned on 32 bit platforms + which try to access such records (the current implementation would return + TDB_ERR_OOM in a similar case). + It seems unlikely that 32 bit keys will be a limitation, so the implementation + may not support this (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Hash Size Is Determined At TDB Creation Time +\end_layout + +\begin_layout Standard +TDB contains a number of hash chains in the header; the number is specified + at creation time, and defaults to 131. + This is such a bottleneck on large databases (as each hash chain gets quite + long), that LDB uses 10,000 for this hash. + In general it is impossible to know what the 'right' answer is at database + creation time. +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Hash-Size-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +After comprehensive performance testing on various scalable hash variants +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying + because I was previously convinced that an expanding tree of hashes would + be very close to optimal. +\end_layout + +\end_inset + +, it became clear that it is hard to beat a straight linear hash table which + doubles in size when it reaches saturation. + Unfortunately, altering the hash table introduces serious locking complications +: the entire hash table needs to be locked to enlarge the hash table, and + others might be holding locks. + Particularly insidious are insertions done under tdb_chainlock. +\end_layout + +\begin_layout Standard +Thus an expanding layered hash will be used: an array of hash groups, with + each hash group exploding into pointers to lower hash groups once it fills, + turning into a hash tree. + This has implications for locking: we must lock the entire group in case + we need to expand it, yet we don't know how deep the tree is at that point. +\end_layout + +\begin_layout Standard +Note that bits from the hash table entries should be stolen to hold more + hash bits to reduce the penalty of collisions. + We can use the otherwise-unused lower 3 bits. + If we limit the size of the database to 64 exabytes, we can use the top + 8 bits of the hash entry as well. + These 11 bits would reduce false positives down to 1 in 2000 which is more + than we need: we can use one of the bits to indicate that the extra hash + bits are valid. + This means we can choose not to re-hash all entries when we expand a hash + group; simply use the next bits we need and mark them invalid. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Ignore. + Scaling the hash automatically proved inefficient at small hash sizes; + we default to a 8192-element hash (changable via NTDB_ATTRIBUTE_HASHSIZE), + and when buckets clash we expand to an array of hash entries. + This scales slightly better than the tdb chain (due to the 8 top bits containin +g extra hash). +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Freelist-Is" + +\end_inset + +TDB Freelist Is Highly Contended +\end_layout + +\begin_layout Standard +TDB uses a single linked list for the free list. + Allocation occurs as follows, using heuristics which have evolved over + time: +\end_layout + +\begin_layout Enumerate +Get the free list lock for this whole operation. +\end_layout + +\begin_layout Enumerate +Multiply length by 1.25, so we always over-allocate by 25%. +\end_layout + +\begin_layout Enumerate +Set the slack multiplier to 1. +\end_layout + +\begin_layout Enumerate +Examine the current freelist entry: if it is > length but < the current + best case, remember it as the best case. +\end_layout + +\begin_layout Enumerate +Multiply the slack multiplier by 1.05. +\end_layout + +\begin_layout Enumerate +If our best fit so far is less than length * slack multiplier, return it. + The slack will be turned into a new free record if it's large enough. +\end_layout + +\begin_layout Enumerate +Otherwise, go onto the next freelist entry. +\end_layout + +\begin_layout Standard +Deleting a record occurs as follows: +\end_layout + +\begin_layout Enumerate +Lock the hash chain for this whole operation. +\end_layout + +\begin_layout Enumerate +Walk the chain to find the record, keeping the prev pointer offset. +\end_layout + +\begin_layout Enumerate +If max_dead is non-zero: +\end_layout + +\begin_deeper +\begin_layout Enumerate +Walk the hash chain again and count the dead records. +\end_layout + +\begin_layout Enumerate +If it's more than max_dead, bulk free all the dead ones (similar to steps + 4 and below, but the lock is only obtained once). +\end_layout + +\begin_layout Enumerate +Simply mark this record as dead and return. +\end_layout + +\end_deeper +\begin_layout Enumerate +Get the free list lock for the remainder of this operation. +\end_layout + +\begin_layout Enumerate +\begin_inset CommandInset label +LatexCommand label +name "right-merging" + +\end_inset + +Examine the following block to see if it is free; if so, enlarge the current + block and remove that block from the free list. + This was disabled, as removal from the free list was O(entries-in-free-list). +\end_layout + +\begin_layout Enumerate +Examine the preceeding block to see if it is free: for this reason, each + block has a 32-bit tailer which indicates its length. + If it is free, expand it to cover our new block and return. +\end_layout + +\begin_layout Enumerate +Otherwise, prepend ourselves to the free list. +\end_layout + +\begin_layout Standard +Disabling right-merging (step +\begin_inset CommandInset ref +LatexCommand ref +reference "right-merging" + +\end_inset + +) causes fragmentation; the other heuristics proved insufficient to address + this, so the final answer to this was that when we expand the TDB file + inside a transaction commit, we repack the entire tdb. +\end_layout + +\begin_layout Standard +The single list lock limits our allocation rate; due to the other issues + this is not currently seen as a bottleneck. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The first step is to remove all the current heuristics, as they obviously + interact, then examine them once the lock contention is addressed. +\end_layout + +\begin_layout Standard +The free list must be split to reduce contention. + Assuming perfect free merging, we can at most have 1 free list entry for + each entry. + This implies that the number of free lists is related to the size of the + hash table, but as it is rare to walk a large number of free list entries + we can use far fewer, say 1/32 of the number of hash buckets. +\end_layout + +\begin_layout Standard +It seems tempting to try to reuse the hash implementation which we use for + records here, but we have two ways of searching for free entries: for allocatio +n we search by size (and possibly zone) which produces too many clashes + for our hash table to handle well, and for coalescing we search by address. + Thus an array of doubly-linked free lists seems preferable. +\end_layout + +\begin_layout Standard +There are various benefits in using per-size free lists (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +) but it's not clear this would reduce contention in the common case where + all processes are allocating/freeing the same size. + Thus we almost certainly need to divide in other ways: the most obvious + is to divide the file into zones, and using a free list (or table of free + lists) for each. + This approximates address ordering. +\end_layout + +\begin_layout Standard +Unfortunately it is difficult to know what heuristics should be used to + determine zone sizes, and our transaction code relies on being able to + create a +\begin_inset Quotes eld +\end_inset + +recovery area +\begin_inset Quotes erd +\end_inset + + by simply appending to the file (difficult if it would need to create a + new zone header). + Thus we use a linked-list of free tables; currently we only ever create + one, but if there is more than one we choose one at random to use. + In future we may use heuristics to add new free tables on contention. + We only expand the file when all free tables are exhausted. +\end_layout + +\begin_layout Standard +The basic algorithm is as follows. + Freeing is simple: +\end_layout + +\begin_layout Enumerate +Identify the correct free list. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the list (we didn't have a lock, sizes could have changed): relock + if necessary. +\end_layout + +\begin_layout Enumerate +Place the freed entry in the list. +\end_layout + +\begin_layout Standard +Allocation is a little more complicated, as we perform delayed coalescing + at this point: +\end_layout + +\begin_layout Enumerate +Pick a free table; usually the previous one. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +If the top entry is -large enough, remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, coalesce entries in the list.If there was no entry large enough, + unlock the list and try the next largest list +\end_layout + +\begin_layout Enumerate +If no list has an entry which meets our needs, try the next free table. +\end_layout + +\begin_layout Enumerate +If no zone satisfies, expand the file. +\end_layout + +\begin_layout Standard +This optimizes rapid insert/delete of free list entries by not coalescing + them all the time.. + First-fit address ordering ordering seems to be fairly good for keeping + fragmentation low (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +). + Note that address ordering does not need a tailer to coalesce, though if + we needed one we could have one cheaply: see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +. +\end_layout + +\begin_layout Standard +Each free entry has the free table number in the header: less than 255. + It also contains a doubly-linked list for easy deletion. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Becomes-Fragmented" + +\end_inset + +TDB Becomes Fragmented +\end_layout + +\begin_layout Standard +Much of this is a result of allocation strategy +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute +xas.edu/pub/garbage/malloc/ismm98.ps +\end_layout + +\end_inset + + and deliberate hobbling of coalescing; internal fragmentation (aka overallocati +on) is deliberately set at 25%, and external fragmentation is only cured + by the decision to repack the entire db when a transaction commit needs + to enlarge the file. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The 25% overhead on allocation works in practice for ldb because indexes + tend to expand by one record at a time. + This internal fragmentation can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. +\end_layout + +\begin_layout Standard +There are is a spectrum of possible solutions for external fragmentation: + one is to use a fragmentation-avoiding allocation strategy such as best-fit + address-order allocator. + The other end of the spectrum would be to use a bump allocator (very fast + and simple) and simply repack the file when we reach the end. +\end_layout + +\begin_layout Standard +There are three problems with efficient fragmentation-avoiding allocators: + they are non-trivial, they tend to use a single free list for each size, + and there's no evidence that tdb allocation patterns will match those recorded + for general allocators (though it seems likely). +\end_layout + +\begin_layout Standard +Thus we don't spend too much effort on external fragmentation; we will be + no worse than the current code if we need to repack on occasion. + More effort is spent on reducing freelist contention, and reducing overhead. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Records-Incur-A" + +\end_inset + +Records Incur A 28-Byte Overhead +\end_layout + +\begin_layout Standard +Each TDB record has a header as follows: +\end_layout + +\begin_layout LyX-Code +struct tdb_record { +\end_layout + +\begin_layout LyX-Code + tdb_off_t next; /* offset of the next record in the list */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t rec_len; /* total byte length of record */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t key_len; /* byte length of key */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t data_len; /* byte length of data */ +\end_layout + +\begin_layout LyX-Code + uint32_t full_hash; /* the full 32 bit hash of the key */ +\end_layout + +\begin_layout LyX-Code + uint32_t magic; /* try to catch errors */ +\end_layout + +\begin_layout LyX-Code + /* the following union is implied: +\end_layout + +\begin_layout LyX-Code + union { +\end_layout + +\begin_layout LyX-Code + char record[rec_len]; +\end_layout + +\begin_layout LyX-Code + struct { +\end_layout + +\begin_layout LyX-Code + char key[key_len]; +\end_layout + +\begin_layout LyX-Code + char data[data_len]; +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + uint32_t totalsize; (tailer) +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + */ +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Naively, this would double to a 56-byte overhead on a 64 bit implementation. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We can use various techniques to reduce this for an allocated block: +\end_layout + +\begin_layout Enumerate +The 'next' pointer is not required, as we are using a flat hash table. +\end_layout + +\begin_layout Enumerate +'rec_len' can instead be expressed as an addition to key_len and data_len + (it accounts for wasted or overallocated length in the record). + Since the record length is always a multiple of 8, we can conveniently + fit it in 32 bits (representing up to 35 bits). +\end_layout + +\begin_layout Enumerate +'key_len' and 'data_len' can be reduced. + I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine + the two into one 64-bit field and using a 5 bit value which indicates at + what bit to divide the two. + Keys are unlikely to scale as fast as data, so I'm assuming a maximum key + size of 32 bits. +\end_layout + +\begin_layout Enumerate +'full_hash' is used to avoid a memcmp on the +\begin_inset Quotes eld +\end_inset + +miss +\begin_inset Quotes erd +\end_inset + + case, but this is diminishing returns after a handful of bits (at 10 bits, + it reduces 99.9% of false memcmp). + As an aside, as the lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. + Note that it's not clear that these bits will be a win, given the extra + bits in the hash table itself (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Hash-Size-Solution" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate +'magic' does not need to be enlarged: it currently reflects one of 5 values + (used, free, dead, recovery, and unused_recovery). + It is useful for quick sanity checking however, and should not be eliminated. +\end_layout + +\begin_layout Enumerate +'tailer' is only used to coalesce free blocks (so a block to the right can + find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of the following + block (and the tailer only exists in free blocks). +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +This technique from Thomas Standish. + Data Structure Techniques. + Addison-Wesley, Reading, Massachusetts, 1980. +\end_layout + +\end_inset + + The current proposed coalescing algorithm doesn't need this, however. +\end_layout + +\begin_layout Standard +This produces a 16 byte used header like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_used_record { +\end_layout + +\begin_layout LyX-Code + uint32_t used_magic : 16, +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout LyX-Code + key_data_divide: 5, +\end_layout + +\begin_layout LyX-Code + top_hash: 11; +\end_layout + +\begin_layout LyX-Code + uint32_t extra_octets; +\end_layout + +\begin_layout LyX-Code + uint64_t key_and_data_len; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +And a free record like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_free_record { +\end_layout + +\begin_layout LyX-Code + uint64_t free_magic: 8, +\end_layout + +\begin_layout LyX-Code + prev : 56; +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout LyX-Code + uint64_t free_table: 8, +\end_layout + +\begin_layout LyX-Code + total_length : 56 +\end_layout + +\begin_layout LyX-Code + uint64_t next;; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Note that by limiting valid offsets to 56 bits, we can pack everything we + need into 3 64-byte words, meaning our minimum record size is 8 bytes. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Transaction Commit Requires 4 fdatasync +\end_layout + +\begin_layout Standard +The current transaction algorithm is: +\end_layout + +\begin_layout Enumerate +write_recovery_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +write_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +overwrite_with_new_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +remove_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Standard +On current ext3, each sync flushes all data to disk, so the next 3 syncs + are relatively expensive. + But this could become a performance bottleneck on other filesystems such + as ext4. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Neil Brown points out that this is overzealous, and only one sync is needed: +\end_layout + +\begin_layout Enumerate +Bundle the recovery data, a transaction counter and a strong checksum of + the new data. +\end_layout + +\begin_layout Enumerate +Strong checksum that whole bundle. +\end_layout + +\begin_layout Enumerate +Store the bundle in the database. +\end_layout + +\begin_layout Enumerate +Overwrite the oldest of the two recovery pointers in the header (identified + using the transaction counter) with the offset of this bundle. +\end_layout + +\begin_layout Enumerate +sync. +\end_layout + +\begin_layout Enumerate +Write the new data to the file. +\end_layout + +\begin_layout Standard +Checking for recovery means identifying the latest bundle with a valid checksum + and using the new data checksum to ensure that it has been applied. + This is more expensive than the current check, but need only be done at + open. + For running databases, a separate header field can be used to indicate + a transaction in progress; we need only check for recovery if this is set. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Does-Not" + +\end_inset + +TDB Does Not Have Snapshot Support +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + + (but see +\begin_inset CommandInset ref +LatexCommand ref +reference "replay-attribute" + +\end_inset + +). +\end_layout + +\begin_layout Standard +But as a thought experiment, if we implemented transactions to only overwrite + free entries (this is tricky: there must not be a header in each entry + which indicates whether it is free, but use of presence in metadata elsewhere), + and a pointer to the hash table, we could create an entirely new commit + without destroying existing data. + Then it would be easy to implement snapshots in a similar way. +\end_layout + +\begin_layout Standard +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\end_layout + +\begin_layout Standard +We could then implement snapshots using a similar method, using multiple + different hash tables/free tables. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +Transactions Cannot Operate in Parallel +\end_layout + +\begin_layout Standard +This would be useless for ldb, as it hits the index records with just about + every update. + It would add significant complexity in resolving clashes, and cause the + all transaction callers to write their code to loop in the case where the + transactions spuriously failed. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None (but see +\begin_inset CommandInset ref +LatexCommand ref +reference "replay-attribute" + +\end_inset + +). + We could solve a small part of the problem by providing read-only transactions. + These would allow one write transaction to begin, but it could not commit + until all r/o transactions are done. + This would require a new RO_TRANSACTION_LOCK, which would be upgraded on + commit. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +Default Hash Function Is Suboptimal +\end_layout + +\begin_layout Standard +The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially + if we expand it to 64 bits), and works best when the hash bucket size is + a prime number (which also means a slow modulus). + In addition, it is highly predictable which could potentially lead to a + Denial of Service attack in some TDB uses. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The Jenkins lookup3 hash +\begin_inset Foot +status open + +\begin_layout Plain Layout +http://burtleburtle.net/bob/c/lookup3.c +\end_layout + +\end_inset + + is a fast and superbly-mixing hash. + It's used by the Linux kernel and almost everything else. + This has the particular properties that it takes an initial seed, and produces + two 32 bit hash numbers, which we can combine into a 64-bit hash. +\end_layout + +\begin_layout Standard +The seed should be created at tdb-creation time from some random source, + and placed in the header. + This is far from foolproof, but adds a little bit of protection against + hash bombing. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "Reliable-Traversal-Adds" + +\end_inset + +Reliable Traversal Adds Complexity +\end_layout + +\begin_layout Standard +We lock a record during traversal iteration, and try to grab that lock in + the delete code. + If that grab on delete fails, we simply mark it deleted and continue onwards; + traversal checks for this condition and does the delete when it moves off + the record. +\end_layout + +\begin_layout Standard +If traversal terminates, the dead record may be left indefinitely. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove reliability guarantees; see +\begin_inset CommandInset ref +LatexCommand ref +reference "traverse-Proposed-Solution" + +\end_inset + +. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Fcntl Locking Adds Overhead +\end_layout + +\begin_layout Standard +Placing a fcntl lock means a system call, as does removing one. + This is actually one reason why transactions can be faster (everything + is locked once at transaction start). + In the uncontended case, this overhead can theoretically be eliminated. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +We tried this before with spinlock support, in the early days of TDB, and + it didn't make much difference except in manufactured benchmarks. +\end_layout + +\begin_layout Standard +We could use spinlocks (with futex kernel support under Linux), but it means + that we lose automatic cleanup when a process dies with a lock. + There is a method of auto-cleanup under Linux, but it's not supported by + other operating systems. + We could reintroduce a clear-if-first-style lock and sweep for dead futexes + on open, but that wouldn't help the normal case of one concurrent opener + dying. + Increasingly elaborate repair schemes could be considered, but they require + an ABI change (everyone must use them) anyway, so there's no need to do + this at the same time as everything else. +\end_layout + +\begin_layout Subsection +Some Transactions Don't Require Durability +\end_layout + +\begin_layout Standard +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast) + usage, and occasionally empties the results into a transactional TDB. + This kind of usage prioritizes performance over durability: as long as + we are consistent, data can be lost. +\end_layout + +\begin_layout Standard +This would be more neatly implemented inside tdb: a +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + transaction commit (ie. + syncless) which meant that data may be reverted on a crash. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +Unfortunately any transaction scheme which overwrites old data requires + a sync before that overwrite to avoid the possibility of corruption. +\end_layout + +\begin_layout Standard +It seems possible to use a scheme similar to that described in +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Does-Not" + +\end_inset + +,where transactions are committed without overwriting existing data, and + an array of top-level pointers were available in the header. + If the transaction is +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + then we would not need a sync at all: existing processes would pick up + the new hash table and free list and work with that. +\end_layout + +\begin_layout Standard +At some later point, a sync would allow recovery of the old data into the + free lists (perhaps when the array of top-level pointers filled). + On crash, tdb_open() would examine the array of top levels, and apply the + transactions until it encountered an invalid checksum. +\end_layout + +\begin_layout Subsection +Tracing Is Fragile, Replay Is External +\end_layout + +\begin_layout Standard +The current TDB has compile-time-enabled tracing code, but it often breaks + as it is not enabled by default. + In a similar way, the ctdb code has an external wrapper which does replay + tracing so it can coordinate cluster-wide transactions. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\begin_inset CommandInset label +LatexCommand label +name "replay-attribute" + +\end_inset + + +\end_layout + +\begin_layout Standard +Tridge points out that an attribute can be later added to tdb_open (see +\begin_inset CommandInset ref +LatexCommand ref +reference "attributes" + +\end_inset + +) to provide replay/trace hooks, which could become the basis for this and + future parallel transactions and snapshot support. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\end_body +\end_document diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.pdf b/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.pdf new file mode 100644 index 00000000..83819146 Binary files /dev/null and b/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.pdf differ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.txt b/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.txt new file mode 100644 index 00000000..bd680f09 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/doc/design.txt @@ -0,0 +1,1270 @@ +NTDB: Redesigning The Trivial DataBase + +Rusty Russell, IBM Corporation + +19 June 2012 + +Abstract + +The Trivial DataBase on-disk format is 32 bits; with usage cases +heading towards the 4G limit, that must change. This required +breakage provides an opportunity to revisit TDB's other design +decisions and reassess them. + +1 Introduction + +The Trivial DataBase was originally written by Andrew Tridgell as +a simple key/data pair storage system with the same API as dbm, +but allowing multiple readers and writers while being small +enough (< 1000 lines of C) to include in SAMBA. The simple design +created in 1999 has proven surprisingly robust and performant, +used in Samba versions 3 and 4 as well as numerous other +projects. Its useful life was greatly increased by the +(backwards-compatible!) addition of transaction support in 2005. + +The wider variety and greater demands of TDB-using code has lead +to some organic growth of the API, as well as some compromises on +the implementation. None of these, by themselves, are seen as +show-stoppers, but the cumulative effect is to a loss of elegance +over the initial, simple TDB implementation. Here is a table of +the approximate number of lines of implementation code and number +of API functions at the end of each year: + + ++-----------+----------------+--------------------------------+ +| Year End | API Functions | Lines of C Code Implementation | ++-----------+----------------+--------------------------------+ ++-----------+----------------+--------------------------------+ +| 1999 | 13 | 1195 | ++-----------+----------------+--------------------------------+ +| 2000 | 24 | 1725 | ++-----------+----------------+--------------------------------+ +| 2001 | 32 | 2228 | ++-----------+----------------+--------------------------------+ +| 2002 | 35 | 2481 | ++-----------+----------------+--------------------------------+ +| 2003 | 35 | 2552 | ++-----------+----------------+--------------------------------+ +| 2004 | 40 | 2584 | ++-----------+----------------+--------------------------------+ +| 2005 | 38 | 2647 | ++-----------+----------------+--------------------------------+ +| 2006 | 52 | 3754 | ++-----------+----------------+--------------------------------+ +| 2007 | 66 | 4398 | ++-----------+----------------+--------------------------------+ +| 2008 | 71 | 4768 | ++-----------+----------------+--------------------------------+ +| 2009 | 73 | 5715 | ++-----------+----------------+--------------------------------+ + + +This review is an attempt to catalog and address all the known +issues with TDB and create solutions which address the problems +without significantly increasing complexity; all involved are far +too aware of the dangers of second system syndrome in rewriting a +successful project like this. + +Note: the final decision was to make ntdb a separate library, +with a separarate 'ntdb' namespace so both can potentially be +linked together. This document still refers to “tdb” everywhere, +for simplicity. + +2 API Issues + +2.1 tdb_open_ex Is Not Expandable + +The tdb_open() call was expanded to tdb_open_ex(), which added an +optional hashing function and an optional logging function +argument. Additional arguments to open would require the +introduction of a tdb_open_ex2 call etc. + +2.1.1 Proposed Solution + +tdb_open() will take a linked-list of attributes: + +enum tdb_attribute { + + TDB_ATTRIBUTE_LOG = 0, + + TDB_ATTRIBUTE_HASH = 1 + +}; + +struct tdb_attribute_base { + + enum tdb_attribute attr; + + union tdb_attribute *next; + +}; + +struct tdb_attribute_log { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG +*/ + + tdb_log_func log_fn; + + void *log_private; + +}; + +struct tdb_attribute_hash { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH +*/ + + tdb_hash_func hash_fn; + + void *hash_private; + +}; + +union tdb_attribute { + + struct tdb_attribute_base base; + + struct tdb_attribute_log log; + + struct tdb_attribute_hash hash; + +}; + +This allows future attributes to be added, even if this expands +the size of the union. + +2.1.2 Status + +Complete. + +2.2 tdb_traverse Makes Impossible Guarantees + +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, +and it was thought that it was important to guarantee that all +records which exist at the start and end of the traversal would +be included, and no record would be included twice. + +This adds complexity (see[Reliable-Traversal-Adds]) and does not +work anyway for records which are altered (in particular, those +which are expanded may be effectively deleted and re-added behind +the traversal). + +2.2.1 Proposed Solution + +Abandon the guarantee. You will see every record if no changes +occur during your traversal, otherwise you will see some subset. +You can prevent changes by using a transaction or the locking +API. + +2.2.2 Status + +Complete. Delete-during-traverse will still delete every record, +too (assuming no other changes). + +2.3 Nesting of Transactions Is Fraught + +TDB has alternated between allowing nested transactions and not +allowing them. Various paths in the Samba codebase assume that +transactions will nest, and in a sense they can: the operation is +only committed to disk when the outer transaction is committed. +There are two problems, however: + +1. Canceling the inner transaction will cause the outer + transaction commit to fail, and will not undo any operations + since the inner transaction began. This problem is soluble with + some additional internal code. + +2. An inner transaction commit can be cancelled by the outer + transaction. This is desirable in the way which Samba's + database initialization code uses transactions, but could be a + surprise to any users expecting a successful transaction commit + to expose changes to others. + +The current solution is to specify the behavior at tdb_open(), +with the default currently that nested transactions are allowed. +This flag can also be changed at runtime. + +2.3.1 Proposed Solution + +Given the usage patterns, it seems that the“least-surprise” +behavior of disallowing nested transactions should become the +default. Additionally, it seems the outer transaction is the only +code which knows whether inner transactions should be allowed, so +a flag to indicate this could be added to tdb_transaction_start. +However, this behavior can be simulated with a wrapper which uses +tdb_add_flags() and tdb_remove_flags(), so the API should not be +expanded for this relatively-obscure case. + +2.3.2 Status + +Complete; the nesting flag has been removed. + +2.4 Incorrect Hash Function is Not Detected + +tdb_open_ex() allows the calling code to specify a different hash +function to use, but does not check that all other processes +accessing this tdb are using the same hash function. The result +is that records are missing from tdb_fetch(). + +2.4.1 Proposed Solution + +The header should contain an example hash result (eg. the hash of +0xdeadbeef), and tdb_open_ex() should check that the given hash +function produces the same answer, or fail the tdb_open call. + +2.4.2 Status + +Complete. + +2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation + +In response to scalability issues with the free list ([TDB-Freelist-Is] +) two API workarounds have been incorporated in TDB: +tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The +latter actually calls the former with an argument of“5”. + +This code allows deleted records to accumulate without putting +them in the free list. On delete we iterate through each chain +and free them in a batch if there are more than max_dead entries. +These are never otherwise recycled except as a side-effect of a +tdb_repack. + +2.5.1 Proposed Solution + +With the scalability problems of the freelist solved, this API +can be removed. The TDB_VOLATILE flag may still be useful as a +hint that store and delete of records will be at least as common +as fetch in order to allow some internal tuning, but initially +will become a no-op. + +2.5.2 Status + +Complete. Unknown flags cause tdb_open() to fail as well, so they +can be detected at runtime. + +2.6 TDB Files Cannot Be Opened Multiple Times + In The Same Process + +No process can open the same TDB twice; we check and disallow it. +This is an unfortunate side-effect of fcntl locks, which operate +on a per-file rather than per-file-descriptor basis, and do not +nest. Thus, closing any file descriptor on a file clears all the +locks obtained by this process, even if they were placed using a +different file descriptor! + +Note that even if this were solved, deadlock could occur if +operations were nested: this is a more manageable programming +error in most cases. + +2.6.1 Proposed Solution + +We could lobby POSIX to fix the perverse rules, or at least lobby +Linux to violate them so that the most common implementation does +not have this restriction. This would be a generally good idea +for other fcntl lock users. + +Samba uses a wrapper which hands out the same tdb_context to +multiple callers if this happens, and does simple reference +counting. We should do this inside the tdb library, which already +emulates lock nesting internally; it would need to recognize when +deadlock occurs within a single process. This would create a new +failure mode for tdb operations (while we currently handle +locking failures, they are impossible in normal use and a process +encountering them can do little but give up). + +I do not see benefit in an additional tdb_open flag to indicate +whether re-opening is allowed, as though there may be some +benefit to adding a call to detect when a tdb_context is shared, +to allow other to create such an API. + +2.6.2 Status + +Complete. + +2.7 TDB API Is Not POSIX Thread-safe + +The TDB API uses an error code which can be queried after an +operation to determine what went wrong. This programming model +does not work with threads, unless specific additional guarantees +are given by the implementation. In addition, even +otherwise-independent threads cannot open the same TDB (as in[TDB-Files-Cannot] +). + +2.7.1 Proposed Solution + +Reachitecting the API to include a tdb_errcode pointer would be a +great deal of churn, but fortunately most functions return 0 on +success and -1 on error: we can change these to return 0 on +success and a negative error code on error, and the API remains +similar to previous. The tdb_fetch, tdb_firstkey and tdb_nextkey +functions need to take a TDB_DATA pointer and return an error +code. It is also simpler to have tdb_nextkey replace its key +argument in place, freeing up any old .dptr. + +Internal locking is required to make sure that fcntl locks do not +overlap between threads, and also that the global list of tdbs is +maintained. + +The aim is that building tdb with -DTDB_PTHREAD will result in a +pthread-safe version of the library, and otherwise no overhead +will exist. Alternatively, a hooking mechanism similar to that +proposed for[Proposed-Solution-locking-hook] could be used to +enable pthread locking at runtime. + +2.7.2 Status + +Incomplete; API has been changed but thread safety has not been +implemented. + +2.8 *_nonblock Functions And *_mark Functions Expose + Implementation + +CTDB[footnote: +Clustered TDB, see http://ctdb.samba.org +] wishes to operate on TDB in a non-blocking manner. This is +currently done as follows: + +1. Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). If this fails: + +2. Fork a child process, and wait for it to call the normal + variant (eg. tdb_lockall). + +3. If the child succeeds, call the _mark variant to indicate we + already have the locks (eg. tdb_lockall_mark). + +4. Upon completion, tell the child to release the locks (eg. + tdb_unlockall). + +5. Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). + +There are several issues with this approach. Firstly, adding two +new variants of each function clutters the API for an obscure +use, and so not all functions have three variants. Secondly, it +assumes that all paths of the functions ask for the same locks, +otherwise the parent process will have to get a lock which the +child doesn't have under some circumstances. I don't believe this +is currently the case, but it constrains the implementation. + +2.8.1 Proposed Solution + +Implement a hook for locking methods, so that the caller can +control the calls to create and remove fcntl locks. In this +scenario, ctdbd would operate as follows: + +1. Call the normal API function, eg tdb_lockall(). + +2. When the lock callback comes in, check if the child has the + lock. Initially, this is always false. If so, return 0. + Otherwise, try to obtain it in non-blocking mode. If that + fails, return EWOULDBLOCK. + +3. Release locks in the unlock callback as normal. + +4. If tdb_lockall() fails, see if we recorded a lock failure; if + so, call the child to repeat the operation. + +5. The child records what locks it obtains, and returns that + information to the parent. + +6. When the child has succeeded, goto 1. + +This is flexible enough to handle any potential locking scenario, +even when lock requirements change. It can be optimized so that +the parent does not release locks, just tells the child which +locks it doesn't need to obtain. + +It also keeps the complexity out of the API, and in ctdbd where +it is needed. + +2.8.2 Status + +Complete. + +2.9 tdb_chainlock Functions Expose Implementation + +tdb_chainlock locks some number of records, including the record +indicated by the given key. This gave atomicity guarantees; +no-one can start a transaction, alter, read or delete that key +while the lock is held. + +It also makes the same guarantee for any other key in the chain, +which is an internal implementation detail and potentially a +cause for deadlock. + +2.9.1 Proposed Solution + +None. It would be nice to have an explicit single entry lock +which effected no other keys. Unfortunately, this won't work for +an entry which doesn't exist. Thus while chainlock may be +implemented more efficiently for the existing case, it will still +have overlap issues with the non-existing case. So it is best to +keep the current (lack of) guarantee about which records will be +effected to avoid constraining our implementation. + +2.10 Signal Handling is Not Race-Free + +The tdb_setalarm_sigptr() call allows the caller's signal handler +to indicate that the tdb locking code should return with a +failure, rather than trying again when a signal is received (and +errno == EAGAIN). This is usually used to implement timeouts. + +Unfortunately, this does not work in the case where the signal is +received before the tdb code enters the fcntl() call to place the +lock: the code will sleep within the fcntl() code, unaware that +the signal wants it to exit. In the case of long timeouts, this +does not happen in practice. + +2.10.1 Proposed Solution + +The locking hooks proposed in[Proposed-Solution-locking-hook] +would allow the user to decide on whether to fail the lock +acquisition on a signal. This allows the caller to choose their +own compromise: they could narrow the race by checking +immediately before the fcntl call.[footnote: +It may be possible to make this race-free in some implementations +by having the signal handler alter the struct flock to make it +invalid. This will cause the fcntl() lock call to fail with +EINVAL if the signal occurs before the kernel is entered, +otherwise EAGAIN. +] + +2.10.2 Status + +Complete. + +2.11 The API Uses Gratuitous Typedefs, Capitals + +typedefs are useful for providing source compatibility when types +can differ across implementations, or arguably in the case of +function pointer definitions which are hard for humans to parse. +Otherwise it is simply obfuscation and pollutes the namespace. + +Capitalization is usually reserved for compile-time constants and +macros. + + TDB_CONTEXT There is no reason to use this over 'struct + tdb_context'; the definition isn't visible to the API user + anyway. + + TDB_DATA There is no reason to use this over struct TDB_DATA; + the struct needs to be understood by the API user. + + struct TDB_DATA This would normally be called 'struct + tdb_data'. + + enum TDB_ERROR Similarly, this would normally be enum + tdb_error. + +2.11.1 Proposed Solution + +None. Introducing lower case variants would please pedants like +myself, but if it were done the existing ones should be kept. +There is little point forcing a purely cosmetic change upon tdb +users. + +2.12 tdb_log_func Doesn't Take The + Private Pointer + +For API compatibility reasons, the logging function needs to call +tdb_get_logging_private() to retrieve the pointer registered by +the tdb_open_ex for logging. + +2.12.1 Proposed Solution + +It should simply take an extra argument, since we are prepared to +break the API/ABI. + +2.12.2 Status + +Complete. + +2.13 Various Callback Functions Are Not Typesafe + +The callback functions in tdb_set_logging_function (after[tdb_log_func-Doesnt-Take] + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read +and tdb_check all take void * and must internally convert it to +the argument type they were expecting. + +If this type changes, the compiler will not produce warnings on +the callers, since it only sees void *. + +2.13.1 Proposed Solution + +With careful use of macros, we can create callback functions +which give a warning when used on gcc and the types of the +callback and its private argument differ. Unsupported compilers +will not give a warning, which is no worse than now. In addition, +the callbacks become clearer, as they need not use void * for +their parameter. + +See CCAN's typesafe_cb module at +http://ccan.ozlabs.org/info/typesafe_cb.html + +2.13.2 Status + +Complete. + +2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, + tdb_reopen_all Problematic + +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB +file should be cleared if the caller discovers it is the only +process with the TDB open. However, if any caller does not +specify TDB_CLEAR_IF_FIRST it will not be detected, so will have +the TDB erased underneath them (usually resulting in a crash). + +There is a similar issue on fork(); if the parent exits (or +otherwise closes the tdb) before the child calls tdb_reopen_all() +to establish the lock used to indicate the TDB is opened by +someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe +it alone has opened the TDB and will erase it. + +2.14.1 Proposed Solution + +Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but +see[TDB_CLEAR_IF_FIRST-Imposes-Performance]. + +2.14.2 Status + +Complete. An open hook is provided to replicate this +functionality if required. + +2.15 Extending The Header Is Difficult + +We have reserved (zeroed) words in the TDB header, which can be +used for future features. If the future features are compulsory, +the version number must be updated to prevent old code from +accessing the database. But if the future feature is optional, we +have no way of telling if older code is accessing the database or +not. + +2.15.1 Proposed Solution + +The header should contain a“format variant” value (64-bit). This +is divided into two 32-bit parts: + +1. The lower part reflects the format variant understood by code + accessing the database. + +2. The upper part reflects the format variant you must understand + to write to the database (otherwise you can only open for + reading). + +The latter field can only be written at creation time, the former +should be written under the OPEN_LOCK when opening the database +for writing, if the variant of the code is lower than the current +lowest variant. + +This should allow backwards-compatible features to be added, and +detection if older code (which doesn't understand the feature) +writes to the database. + +2.15.2 Status + +Complete. + +2.16 Record Headers Are Not Expandible + +If we later want to add (say) checksums on keys and data, it +would require another format change, which we'd like to avoid. + +2.16.1 Proposed Solution + +We often have extra padding at the tail of a record. If we ensure +that the first byte (if any) of this padding is zero, we will +have a way for future changes to detect code which doesn't +understand a new format: the new code would write (say) a 1 at +the tail, and thus if there is no tail or the first byte is 0, we +would know the extension is not present on that record. + +2.16.2 Status + +Complete. + +2.17 TDB Does Not Use Talloc + +Many users of TDB (particularly Samba) use the talloc allocator, +and thus have to wrap TDB in a talloc context to use it +conveniently. + +2.17.1 Proposed Solution + +The allocation within TDB is not complicated enough to justify +the use of talloc, and I am reluctant to force another +(excellent) library on TDB users. Nonetheless a compromise is +possible. An attribute (see[attributes]) can be added later to +tdb_open() to provide an alternate allocation mechanism, +specifically for talloc but usable by any other allocator (which +would ignore the“context” argument). + +This would form a talloc heirarchy as expected, but the caller +would still have to attach a destructor to the tdb context +returned from tdb_open to close it. All TDB_DATA fields would be +children of the tdb_context, and the caller would still have to +manage them (using talloc_free() or talloc_steal()). + +2.17.2 Status + +Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute. + +3 Performance And Scalability Issues + +3.1 TDB_CLEAR_IF_FIRST + Imposes Performance Penalty + +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is +placed at offset 4 (aka. the ACTIVE_LOCK). While these locks +never conflict in normal tdb usage, they do add substantial +overhead for most fcntl lock implementations when the kernel +scans to detect if a lock conflict exists. This is often a single +linked list, making the time to acquire and release a fcntl lock +O(N) where N is the number of processes with the TDB open, not +the number actually doing work. + +In a Samba server it is common to have huge numbers of clients +sitting idle, and thus they have weaned themselves off the +TDB_CLEAR_IF_FIRST flag.[footnote: +There is a flag to tdb_reopen_all() which is used for this +optimization: if the parent process will outlive the child, the +child does not need the ACTIVE_LOCK. This is a workaround for +this very performance issue. +] + +3.1.1 Proposed Solution + +Remove the flag. It was a neat idea, but even trivial servers +tend to know when they are initializing for the first time and +can simply unlink the old tdb at that point. + +3.1.2 Status + +Complete. + +3.2 TDB Files Have a 4G Limit + +This seems to be becoming an issue (so much for“trivial”!), +particularly for ldb. + +3.2.1 Proposed Solution + +A new, incompatible TDB format which uses 64 bit offsets +internally rather than 32 bit as now. For simplicity of endian +conversion (which TDB does on the fly if required), all values +will be 64 bit on disk. In practice, some upper bits may be used +for other purposes, but at least 56 bits will be available for +file offsets. + +tdb_open() will automatically detect the old version, and even +create them if TDB_VERSION6 is specified to tdb_open. + +32 bit processes will still be able to access TDBs larger than 4G +(assuming that their off_t allows them to seek to 64 bits), they +will gracefully fall back as they fail to mmap. This can happen +already with large TDBs. + +Old versions of tdb will fail to open the new TDB files (since 28 +August 2009, commit 398d0c29290: prior to that any unrecognized +file format would be erased and initialized as a fresh tdb!) + +3.2.2 Status + +Complete. + +3.3 TDB Records Have a 4G Limit + +This has not been a reported problem, and the API uses size_t +which can be 64 bit on 64 bit platforms. However, other limits +may have made such an issue moot. + +3.3.1 Proposed Solution + +Record sizes will be 64 bit, with an error returned on 32 bit +platforms which try to access such records (the current +implementation would return TDB_ERR_OOM in a similar case). It +seems unlikely that 32 bit keys will be a limitation, so the +implementation may not support this (see[sub:Records-Incur-A]). + +3.3.2 Status + +Complete. + +3.4 Hash Size Is Determined At TDB Creation Time + +TDB contains a number of hash chains in the header; the number is +specified at creation time, and defaults to 131. This is such a +bottleneck on large databases (as each hash chain gets quite +long), that LDB uses 10,000 for this hash. In general it is +impossible to know what the 'right' answer is at database +creation time. + +3.4.1 Proposed Solution + +After comprehensive performance testing on various scalable hash +variants[footnote: +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 +This was annoying because I was previously convinced that an +expanding tree of hashes would be very close to optimal. +], it became clear that it is hard to beat a straight linear hash +table which doubles in size when it reaches saturation. +Unfortunately, altering the hash table introduces serious locking +complications: the entire hash table needs to be locked to +enlarge the hash table, and others might be holding locks. +Particularly insidious are insertions done under tdb_chainlock. + +Thus an expanding layered hash will be used: an array of hash +groups, with each hash group exploding into pointers to lower +hash groups once it fills, turning into a hash tree. This has +implications for locking: we must lock the entire group in case +we need to expand it, yet we don't know how deep the tree is at +that point. + +Note that bits from the hash table entries should be stolen to +hold more hash bits to reduce the penalty of collisions. We can +use the otherwise-unused lower 3 bits. If we limit the size of +the database to 64 exabytes, we can use the top 8 bits of the +hash entry as well. These 11 bits would reduce false positives +down to 1 in 2000 which is more than we need: we can use one of +the bits to indicate that the extra hash bits are valid. This +means we can choose not to re-hash all entries when we expand a +hash group; simply use the next bits we need and mark them +invalid. + +3.4.2 Status + +Ignore. Scaling the hash automatically proved inefficient at +small hash sizes; we default to a 8192-element hash (changable +via NTDB_ATTRIBUTE_HASHSIZE), and when buckets clash we expand to +an array of hash entries. This scales slightly better than the +tdb chain (due to the 8 top bits containing extra hash). + +3.5 TDB Freelist Is Highly Contended + +TDB uses a single linked list for the free list. Allocation +occurs as follows, using heuristics which have evolved over time: + +1. Get the free list lock for this whole operation. + +2. Multiply length by 1.25, so we always over-allocate by 25%. + +3. Set the slack multiplier to 1. + +4. Examine the current freelist entry: if it is > length but < + the current best case, remember it as the best case. + +5. Multiply the slack multiplier by 1.05. + +6. If our best fit so far is less than length * slack multiplier, + return it. The slack will be turned into a new free record if + it's large enough. + +7. Otherwise, go onto the next freelist entry. + +Deleting a record occurs as follows: + +1. Lock the hash chain for this whole operation. + +2. Walk the chain to find the record, keeping the prev pointer + offset. + +3. If max_dead is non-zero: + + (a) Walk the hash chain again and count the dead records. + + (b) If it's more than max_dead, bulk free all the dead ones + (similar to steps 4 and below, but the lock is only obtained + once). + + (c) Simply mark this record as dead and return. + +4. Get the free list lock for the remainder of this operation. + +5. Examine the following block to see if it is + free; if so, enlarge the current block and remove that block + from the free list. This was disabled, as removal from the free + list was O(entries-in-free-list). + +6. Examine the preceeding block to see if it is free: for this + reason, each block has a 32-bit tailer which indicates its + length. If it is free, expand it to cover our new block and + return. + +7. Otherwise, prepend ourselves to the free list. + +Disabling right-merging (step[right-merging]) causes +fragmentation; the other heuristics proved insufficient to +address this, so the final answer to this was that when we expand +the TDB file inside a transaction commit, we repack the entire +tdb. + +The single list lock limits our allocation rate; due to the other +issues this is not currently seen as a bottleneck. + +3.5.1 Proposed Solution + +The first step is to remove all the current heuristics, as they +obviously interact, then examine them once the lock contention is +addressed. + +The free list must be split to reduce contention. Assuming +perfect free merging, we can at most have 1 free list entry for +each entry. This implies that the number of free lists is related +to the size of the hash table, but as it is rare to walk a large +number of free list entries we can use far fewer, say 1/32 of the +number of hash buckets. + +It seems tempting to try to reuse the hash implementation which +we use for records here, but we have two ways of searching for +free entries: for allocation we search by size (and possibly +zone) which produces too many clashes for our hash table to +handle well, and for coalescing we search by address. Thus an +array of doubly-linked free lists seems preferable. + +There are various benefits in using per-size free lists (see[sub:TDB-Becomes-Fragmented] +) but it's not clear this would reduce contention in the common +case where all processes are allocating/freeing the same size. +Thus we almost certainly need to divide in other ways: the most +obvious is to divide the file into zones, and using a free list +(or table of free lists) for each. This approximates address +ordering. + +Unfortunately it is difficult to know what heuristics should be +used to determine zone sizes, and our transaction code relies on +being able to create a“recovery area” by simply appending to the +file (difficult if it would need to create a new zone header). +Thus we use a linked-list of free tables; currently we only ever +create one, but if there is more than one we choose one at random +to use. In future we may use heuristics to add new free tables on +contention. We only expand the file when all free tables are +exhausted. + +The basic algorithm is as follows. Freeing is simple: + +1. Identify the correct free list. + +2. Lock the corresponding list. + +3. Re-check the list (we didn't have a lock, sizes could have + changed): relock if necessary. + +4. Place the freed entry in the list. + +Allocation is a little more complicated, as we perform delayed +coalescing at this point: + +1. Pick a free table; usually the previous one. + +2. Lock the corresponding list. + +3. If the top entry is -large enough, remove it from the list and + return it. + +4. Otherwise, coalesce entries in the list.If there was no entry + large enough, unlock the list and try the next largest list + +5. If no list has an entry which meets our needs, try the next + free table. + +6. If no zone satisfies, expand the file. + +This optimizes rapid insert/delete of free list entries by not +coalescing them all the time.. First-fit address ordering +ordering seems to be fairly good for keeping fragmentation low +(see[sub:TDB-Becomes-Fragmented]). Note that address ordering +does not need a tailer to coalesce, though if we needed one we +could have one cheaply: see[sub:Records-Incur-A]. + +Each free entry has the free table number in the header: less +than 255. It also contains a doubly-linked list for easy +deletion. + +3.6 TDB Becomes Fragmented + +Much of this is a result of allocation strategy[footnote: +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 +ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps +] and deliberate hobbling of coalescing; internal fragmentation +(aka overallocation) is deliberately set at 25%, and external +fragmentation is only cured by the decision to repack the entire +db when a transaction commit needs to enlarge the file. + +3.6.1 Proposed Solution + +The 25% overhead on allocation works in practice for ldb because +indexes tend to expand by one record at a time. This internal +fragmentation can be resolved by having an“expanded” bit in the +header to note entries that have previously expanded, and +allocating more space for them. + +There are is a spectrum of possible solutions for external +fragmentation: one is to use a fragmentation-avoiding allocation +strategy such as best-fit address-order allocator. The other end +of the spectrum would be to use a bump allocator (very fast and +simple) and simply repack the file when we reach the end. + +There are three problems with efficient fragmentation-avoiding +allocators: they are non-trivial, they tend to use a single free +list for each size, and there's no evidence that tdb allocation +patterns will match those recorded for general allocators (though +it seems likely). + +Thus we don't spend too much effort on external fragmentation; we +will be no worse than the current code if we need to repack on +occasion. More effort is spent on reducing freelist contention, +and reducing overhead. + +3.7 Records Incur A 28-Byte Overhead + +Each TDB record has a header as follows: + +struct tdb_record { + + tdb_off_t next; /* offset of the next record in the list +*/ + + tdb_len_t rec_len; /* total byte length of record */ + + tdb_len_t key_len; /* byte length of key */ + + tdb_len_t data_len; /* byte length of data */ + + uint32_t full_hash; /* the full 32 bit hash of the key */ + + uint32_t magic; /* try to catch errors */ + + /* the following union is implied: + + union { + + char record[rec_len]; + + struct { + + char key[key_len]; + + char data[data_len]; + + } + + uint32_t totalsize; (tailer) + + } + + */ + +}; + +Naively, this would double to a 56-byte overhead on a 64 bit +implementation. + +3.7.1 Proposed Solution + +We can use various techniques to reduce this for an allocated +block: + +1. The 'next' pointer is not required, as we are using a flat + hash table. + +2. 'rec_len' can instead be expressed as an addition to key_len + and data_len (it accounts for wasted or overallocated length in + the record). Since the record length is always a multiple of 8, + we can conveniently fit it in 32 bits (representing up to 35 + bits). + +3. 'key_len' and 'data_len' can be reduced. I'm unwilling to + restrict 'data_len' to 32 bits, but instead we can combine the + two into one 64-bit field and using a 5 bit value which + indicates at what bit to divide the two. Keys are unlikely to + scale as fast as data, so I'm assuming a maximum key size of 32 + bits. + +4. 'full_hash' is used to avoid a memcmp on the“miss” case, but + this is diminishing returns after a handful of bits (at 10 + bits, it reduces 99.9% of false memcmp). As an aside, as the + lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. Note that it's + not clear that these bits will be a win, given the extra bits + in the hash table itself (see[sub:Hash-Size-Solution]). + +5. 'magic' does not need to be enlarged: it currently reflects + one of 5 values (used, free, dead, recovery, and + unused_recovery). It is useful for quick sanity checking + however, and should not be eliminated. + +6. 'tailer' is only used to coalesce free blocks (so a block to + the right can find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of + the following block (and the tailer only exists in free + blocks).[footnote: +This technique from Thomas Standish. Data Structure Techniques. +Addison-Wesley, Reading, Massachusetts, 1980. +] The current proposed coalescing algorithm doesn't need this, + however. + +This produces a 16 byte used header like this: + +struct tdb_used_record { + + uint32_t used_magic : 16, + + + + key_data_divide: 5, + + top_hash: 11; + + uint32_t extra_octets; + + uint64_t key_and_data_len; + +}; + +And a free record like this: + +struct tdb_free_record { + + uint64_t free_magic: 8, + + prev : 56; + + + + uint64_t free_table: 8, + + total_length : 56 + + uint64_t next;; + +}; + +Note that by limiting valid offsets to 56 bits, we can pack +everything we need into 3 64-byte words, meaning our minimum +record size is 8 bytes. + +3.7.2 Status + +Complete. + +3.8 Transaction Commit Requires 4 fdatasync + +The current transaction algorithm is: + +1. write_recovery_data(); + +2. sync(); + +3. write_recovery_header(); + +4. sync(); + +5. overwrite_with_new_data(); + +6. sync(); + +7. remove_recovery_header(); + +8. sync(); + +On current ext3, each sync flushes all data to disk, so the next +3 syncs are relatively expensive. But this could become a +performance bottleneck on other filesystems such as ext4. + +3.8.1 Proposed Solution + +Neil Brown points out that this is overzealous, and only one sync +is needed: + +1. Bundle the recovery data, a transaction counter and a strong + checksum of the new data. + +2. Strong checksum that whole bundle. + +3. Store the bundle in the database. + +4. Overwrite the oldest of the two recovery pointers in the + header (identified using the transaction counter) with the + offset of this bundle. + +5. sync. + +6. Write the new data to the file. + +Checking for recovery means identifying the latest bundle with a +valid checksum and using the new data checksum to ensure that it +has been applied. This is more expensive than the current check, +but need only be done at open. For running databases, a separate +header field can be used to indicate a transaction in progress; +we need only check for recovery if this is set. + +3.8.2 Status + +Deferred. + +3.9 TDB Does Not Have Snapshot Support + +3.9.1 Proposed Solution + +None. At some point you say“use a real database” (but see[replay-attribute] +). + +But as a thought experiment, if we implemented transactions to +only overwrite free entries (this is tricky: there must not be a +header in each entry which indicates whether it is free, but use +of presence in metadata elsewhere), and a pointer to the hash +table, we could create an entirely new commit without destroying +existing data. Then it would be easy to implement snapshots in a +similar way. + +This would not allow arbitrary changes to the database, such as +tdb_repack does, and would require more space (since we have to +preserve the current and future entries at once). If we used hash +trees rather than one big hash table, we might only have to +rewrite some sections of the hash, too. + +We could then implement snapshots using a similar method, using +multiple different hash tables/free tables. + +3.9.2 Status + +Deferred. + +3.10 Transactions Cannot Operate in Parallel + +This would be useless for ldb, as it hits the index records with +just about every update. It would add significant complexity in +resolving clashes, and cause the all transaction callers to write +their code to loop in the case where the transactions spuriously +failed. + +3.10.1 Proposed Solution + +None (but see[replay-attribute]). We could solve a small part of +the problem by providing read-only transactions. These would +allow one write transaction to begin, but it could not commit +until all r/o transactions are done. This would require a new +RO_TRANSACTION_LOCK, which would be upgraded on commit. + +3.10.2 Status + +Deferred. + +3.11 Default Hash Function Is Suboptimal + +The Knuth-inspired multiplicative hash used by tdb is fairly slow +(especially if we expand it to 64 bits), and works best when the +hash bucket size is a prime number (which also means a slow +modulus). In addition, it is highly predictable which could +potentially lead to a Denial of Service attack in some TDB uses. + +3.11.1 Proposed Solution + +The Jenkins lookup3 hash[footnote: +http://burtleburtle.net/bob/c/lookup3.c +] is a fast and superbly-mixing hash. It's used by the Linux +kernel and almost everything else. This has the particular +properties that it takes an initial seed, and produces two 32 bit +hash numbers, which we can combine into a 64-bit hash. + +The seed should be created at tdb-creation time from some random +source, and placed in the header. This is far from foolproof, but +adds a little bit of protection against hash bombing. + +3.11.2 Status + +Complete. + +3.12 Reliable Traversal Adds Complexity + +We lock a record during traversal iteration, and try to grab that +lock in the delete code. If that grab on delete fails, we simply +mark it deleted and continue onwards; traversal checks for this +condition and does the delete when it moves off the record. + +If traversal terminates, the dead record may be left +indefinitely. + +3.12.1 Proposed Solution + +Remove reliability guarantees; see[traverse-Proposed-Solution]. + +3.12.2 Status + +Complete. + +3.13 Fcntl Locking Adds Overhead + +Placing a fcntl lock means a system call, as does removing one. +This is actually one reason why transactions can be faster +(everything is locked once at transaction start). In the +uncontended case, this overhead can theoretically be eliminated. + +3.13.1 Proposed Solution + +None. + +We tried this before with spinlock support, in the early days of +TDB, and it didn't make much difference except in manufactured +benchmarks. + +We could use spinlocks (with futex kernel support under Linux), +but it means that we lose automatic cleanup when a process dies +with a lock. There is a method of auto-cleanup under Linux, but +it's not supported by other operating systems. We could +reintroduce a clear-if-first-style lock and sweep for dead +futexes on open, but that wouldn't help the normal case of one +concurrent opener dying. Increasingly elaborate repair schemes +could be considered, but they require an ABI change (everyone +must use them) anyway, so there's no need to do this at the same +time as everything else. + +3.14 Some Transactions Don't Require Durability + +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for +normal (fast) usage, and occasionally empties the results into a +transactional TDB. This kind of usage prioritizes performance +over durability: as long as we are consistent, data can be lost. + +This would be more neatly implemented inside tdb: a“soft” +transaction commit (ie. syncless) which meant that data may be +reverted on a crash. + +3.14.1 Proposed Solution + +None. + +Unfortunately any transaction scheme which overwrites old data +requires a sync before that overwrite to avoid the possibility of +corruption. + +It seems possible to use a scheme similar to that described in[sub:TDB-Does-Not] +,where transactions are committed without overwriting existing +data, and an array of top-level pointers were available in the +header. If the transaction is“soft” then we would not need a sync +at all: existing processes would pick up the new hash table and +free list and work with that. + +At some later point, a sync would allow recovery of the old data +into the free lists (perhaps when the array of top-level pointers +filled). On crash, tdb_open() would examine the array of top +levels, and apply the transactions until it encountered an +invalid checksum. + +3.15 Tracing Is Fragile, Replay Is External + +The current TDB has compile-time-enabled tracing code, but it +often breaks as it is not enabled by default. In a similar way, +the ctdb code has an external wrapper which does replay tracing +so it can coordinate cluster-wide transactions. + +3.15.1 Proposed Solution + +Tridge points out that an attribute can be later added to +tdb_open (see[attributes]) to provide replay/trace hooks, which +could become the basis for this and future parallel transactions +and snapshot support. + +3.15.2 Status + +Deferred. diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/free.c b/junkcode/rusty@rustcorp.com.au-ntdb/free.c new file mode 100644 index 00000000..0d0e25f1 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/free.c @@ -0,0 +1,972 @@ + /* + Trivial Database 2: free list/block handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include +#include +#include +#include + +static unsigned fls64(uint64_t val) +{ + return ilog64(val); +} + +/* In which bucket would we find a particular record size? (ignoring header) */ +unsigned int size_to_bucket(ntdb_len_t data_len) +{ + unsigned int bucket; + + /* We can't have records smaller than this. */ + assert(data_len >= NTDB_MIN_DATA_LEN); + + /* Ignoring the header... */ + if (data_len - NTDB_MIN_DATA_LEN <= 64) { + /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */ + bucket = (data_len - NTDB_MIN_DATA_LEN) / 8; + } else { + /* After that we go power of 2. */ + bucket = fls64(data_len - NTDB_MIN_DATA_LEN) + 2; + } + + if (unlikely(bucket >= NTDB_FREE_BUCKETS)) + bucket = NTDB_FREE_BUCKETS - 1; + return bucket; +} + +ntdb_off_t first_ftable(struct ntdb_context *ntdb) +{ + return ntdb_read_off(ntdb, offsetof(struct ntdb_header, free_table)); +} + +ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable) +{ + return ntdb_read_off(ntdb, ftable + offsetof(struct ntdb_freetable,next)); +} + +enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb) +{ + /* Use reservoir sampling algorithm to select a free list at random. */ + unsigned int rnd, max = 0, count = 0; + ntdb_off_t off; + + ntdb->ftable_off = off = first_ftable(ntdb); + ntdb->ftable = 0; + + while (off) { + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + + rnd = random(); + if (rnd >= max) { + ntdb->ftable_off = off; + ntdb->ftable = count; + max = rnd; + } + + off = next_ftable(ntdb, off); + count++; + } + return NTDB_SUCCESS; +} + +/* Offset of a given bucket. */ +ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket) +{ + return ftable_off + offsetof(struct ntdb_freetable, buckets) + + bucket * sizeof(ntdb_off_t); +} + +/* Returns free_buckets + 1, or list number to search, or -ve error. */ +static ntdb_off_t find_free_head(struct ntdb_context *ntdb, + ntdb_off_t ftable_off, + ntdb_off_t bucket) +{ + /* Speculatively search for a non-zero bucket. */ + return ntdb_find_nonzero_off(ntdb, bucket_off(ftable_off, 0), + bucket, NTDB_FREE_BUCKETS); +} + +static void check_list(struct ntdb_context *ntdb, ntdb_off_t b_off) +{ +#ifdef CCAN_NTDB_DEBUG + ntdb_off_t off, prev = 0, first; + struct ntdb_free_record r; + + first = off = (ntdb_read_off(ntdb, b_off) & NTDB_OFF_MASK); + while (off != 0) { + ntdb_read_convert(ntdb, off, &r, sizeof(r)); + if (frec_magic(&r) != NTDB_FREE_MAGIC) + abort(); + if (prev && frec_prev(&r) != prev) + abort(); + prev = off; + off = r.next; + } + + if (first) { + ntdb_read_convert(ntdb, first, &r, sizeof(r)); + if (frec_prev(&r) != prev) + abort(); + } +#endif +} + +/* Remove from free bucket. */ +static enum NTDB_ERROR remove_from_list(struct ntdb_context *ntdb, + ntdb_off_t b_off, ntdb_off_t r_off, + const struct ntdb_free_record *r) +{ + ntdb_off_t off, prev_next, head; + enum NTDB_ERROR ecode; + + /* Is this only element in list? Zero out bucket, and we're done. */ + if (frec_prev(r) == r_off) + return ntdb_write_off(ntdb, b_off, 0); + + /* off = &r->prev->next */ + off = frec_prev(r) + offsetof(struct ntdb_free_record, next); + + /* Get prev->next */ + prev_next = ntdb_read_off(ntdb, off); + if (NTDB_OFF_IS_ERR(prev_next)) + return NTDB_OFF_TO_ERR(prev_next); + + /* If prev->next == 0, we were head: update bucket to point to next. */ + if (prev_next == 0) { + /* We must preserve upper bits. */ + head = ntdb_read_off(ntdb, b_off); + if (NTDB_OFF_IS_ERR(head)) + return NTDB_OFF_TO_ERR(head); + + if ((head & NTDB_OFF_MASK) != r_off) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "remove_from_list:" + " %llu head %llu on list %llu", + (long long)r_off, + (long long)head, + (long long)b_off); + } + head = ((head & ~NTDB_OFF_MASK) | r->next); + ecode = ntdb_write_off(ntdb, b_off, head); + if (ecode != NTDB_SUCCESS) + return ecode; + } else { + /* r->prev->next = r->next */ + ecode = ntdb_write_off(ntdb, off, r->next); + if (ecode != NTDB_SUCCESS) + return ecode; + } + + /* If we were the tail, off = &head->prev. */ + if (r->next == 0) { + head = ntdb_read_off(ntdb, b_off); + if (NTDB_OFF_IS_ERR(head)) + return NTDB_OFF_TO_ERR(head); + head &= NTDB_OFF_MASK; + off = head + offsetof(struct ntdb_free_record, magic_and_prev); + } else { + /* off = &r->next->prev */ + off = r->next + offsetof(struct ntdb_free_record, + magic_and_prev); + } + +#ifdef CCAN_NTDB_DEBUG + /* *off == r */ + if ((ntdb_read_off(ntdb, off) & NTDB_OFF_MASK) != r_off) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "remove_from_list:" + " %llu bad prev in list %llu", + (long long)r_off, (long long)b_off); + } +#endif + /* r->next->prev = r->prev */ + return ntdb_write_off(ntdb, off, r->magic_and_prev); +} + +/* Enqueue in this free bucket: sets coalesce if we've added 128 + * entries to it. */ +static enum NTDB_ERROR enqueue_in_free(struct ntdb_context *ntdb, + ntdb_off_t b_off, + ntdb_off_t off, + ntdb_len_t len, + bool *coalesce) +{ + struct ntdb_free_record new; + enum NTDB_ERROR ecode; + ntdb_off_t prev, head; + uint64_t magic = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL)); + + head = ntdb_read_off(ntdb, b_off); + if (NTDB_OFF_IS_ERR(head)) + return NTDB_OFF_TO_ERR(head); + + /* We only need to set ftable_and_len; rest is set in enqueue_in_free */ + new.ftable_and_len = ((uint64_t)ntdb->ftable + << (64 - NTDB_OFF_UPPER_STEAL)) + | len; + + /* new->next = head. */ + new.next = (head & NTDB_OFF_MASK); + + /* First element? Prev points to ourselves. */ + if (!new.next) { + new.magic_and_prev = (magic | off); + } else { + /* new->prev = next->prev */ + prev = ntdb_read_off(ntdb, + new.next + offsetof(struct ntdb_free_record, + magic_and_prev)); + new.magic_and_prev = prev; + if (frec_magic(&new) != NTDB_FREE_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "enqueue_in_free: %llu bad head" + " prev %llu", + (long long)new.next, + (long long)prev); + } + /* next->prev = new. */ + ecode = ntdb_write_off(ntdb, new.next + + offsetof(struct ntdb_free_record, + magic_and_prev), + off | magic); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + +#ifdef CCAN_NTDB_DEBUG + prev = ntdb_read_off(ntdb, frec_prev(&new) + + offsetof(struct ntdb_free_record, next)); + if (prev != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "enqueue_in_free:" + " %llu bad tail next ptr %llu", + (long long)frec_prev(&new) + + offsetof(struct ntdb_free_record, + next), + (long long)prev); + } +#endif + } + + /* Update enqueue count, but don't set high bit: see NTDB_OFF_IS_ERR */ + if (*coalesce) + head += (1ULL << (64 - NTDB_OFF_UPPER_STEAL)); + head &= ~(NTDB_OFF_MASK | (1ULL << 63)); + head |= off; + + ecode = ntdb_write_off(ntdb, b_off, head); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* It's time to coalesce if counter wrapped. */ + if (*coalesce) + *coalesce = ((head & ~NTDB_OFF_MASK) == 0); + + return ntdb_write_convert(ntdb, off, &new, sizeof(new)); +} + +static ntdb_off_t ftable_offset(struct ntdb_context *ntdb, unsigned int ftable) +{ + ntdb_off_t off; + unsigned int i; + + if (likely(ntdb->ftable == ftable)) + return ntdb->ftable_off; + + off = first_ftable(ntdb); + for (i = 0; i < ftable; i++) { + if (NTDB_OFF_IS_ERR(off)) { + break; + } + off = next_ftable(ntdb, off); + } + return off; +} + +/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and + * need to blatt the *protect record (which is set to an error). */ +static ntdb_len_t coalesce(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_off_t b_off, + ntdb_len_t data_len, + ntdb_off_t *protect) +{ + ntdb_off_t end; + struct ntdb_free_record rec; + enum NTDB_ERROR ecode; + + ntdb->stats.alloc_coalesce_tried++; + end = off + sizeof(struct ntdb_used_record) + data_len; + + while (end < ntdb->file->map_size) { + const struct ntdb_free_record *r; + ntdb_off_t nb_off; + unsigned ftable, bucket; + + r = ntdb_access_read(ntdb, end, sizeof(*r), true); + if (NTDB_PTR_IS_ERR(r)) { + ecode = NTDB_PTR_ERR(r); + goto err; + } + + if (frec_magic(r) != NTDB_FREE_MAGIC + || frec_ftable(r) == NTDB_FTABLE_NONE) { + ntdb_access_release(ntdb, r); + break; + } + + ftable = frec_ftable(r); + bucket = size_to_bucket(frec_len(r)); + nb_off = ftable_offset(ntdb, ftable); + if (NTDB_OFF_IS_ERR(nb_off)) { + ntdb_access_release(ntdb, r); + ecode = NTDB_OFF_TO_ERR(nb_off); + goto err; + } + nb_off = bucket_off(nb_off, bucket); + ntdb_access_release(ntdb, r); + + /* We may be violating lock order here, so best effort. */ + if (ntdb_lock_free_bucket(ntdb, nb_off, NTDB_LOCK_NOWAIT) + != NTDB_SUCCESS) { + ntdb->stats.alloc_coalesce_lockfail++; + break; + } + + /* Now we have lock, re-check. */ + ecode = ntdb_read_convert(ntdb, end, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + ntdb_unlock_free_bucket(ntdb, nb_off); + goto err; + } + + if (unlikely(frec_magic(&rec) != NTDB_FREE_MAGIC)) { + ntdb->stats.alloc_coalesce_race++; + ntdb_unlock_free_bucket(ntdb, nb_off); + break; + } + + if (unlikely(frec_ftable(&rec) != ftable) + || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) { + ntdb->stats.alloc_coalesce_race++; + ntdb_unlock_free_bucket(ntdb, nb_off); + break; + } + + /* Did we just mess up a record you were hoping to use? */ + if (end == *protect) { + ntdb->stats.alloc_coalesce_iterate_clash++; + *protect = NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST); + } + + ecode = remove_from_list(ntdb, nb_off, end, &rec); + check_list(ntdb, nb_off); + if (ecode != NTDB_SUCCESS) { + ntdb_unlock_free_bucket(ntdb, nb_off); + goto err; + } + + end += sizeof(struct ntdb_used_record) + frec_len(&rec); + ntdb_unlock_free_bucket(ntdb, nb_off); + ntdb->stats.alloc_coalesce_num_merged++; + } + + /* Didn't find any adjacent free? */ + if (end == off + sizeof(struct ntdb_used_record) + data_len) + return 0; + + /* Before we expand, check this isn't one you wanted protected? */ + if (off == *protect) { + *protect = NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS); + ntdb->stats.alloc_coalesce_iterate_clash++; + } + + /* OK, expand initial record */ + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + goto err; + } + + if (frec_len(&rec) != data_len) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "coalesce: expected data len %zu not %zu", + (size_t)data_len, (size_t)frec_len(&rec)); + goto err; + } + + ecode = remove_from_list(ntdb, b_off, off, &rec); + check_list(ntdb, b_off); + if (ecode != NTDB_SUCCESS) { + goto err; + } + + /* Try locking violation first. We don't allow coalesce recursion! */ + ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_NOWAIT, false); + if (ecode != NTDB_SUCCESS) { + /* Need to drop lock. Can't rely on anything stable. */ + ntdb->stats.alloc_coalesce_lockfail++; + *protect = NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT); + + /* We have to drop this to avoid deadlocks, so make sure record + * doesn't get coalesced by someone else! */ + rec.ftable_and_len = (NTDB_FTABLE_NONE + << (64 - NTDB_OFF_UPPER_STEAL)) + | (end - off - sizeof(struct ntdb_used_record)); + ecode = ntdb_write_off(ntdb, + off + offsetof(struct ntdb_free_record, + ftable_and_len), + rec.ftable_and_len); + if (ecode != NTDB_SUCCESS) { + goto err; + } + + ntdb_unlock_free_bucket(ntdb, b_off); + + ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_WAIT, + false); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + } else if (NTDB_OFF_IS_ERR(*protect)) { + /* For simplicity, we always drop lock if they can't continue */ + ntdb_unlock_free_bucket(ntdb, b_off); + } + ntdb->stats.alloc_coalesce_succeeded++; + + /* Return usable length. */ + return end - off - sizeof(struct ntdb_used_record); + +err: + /* To unify error paths, we *always* unlock bucket on error. */ + ntdb_unlock_free_bucket(ntdb, b_off); + return NTDB_ERR_TO_OFF(ecode); +} + +/* List is locked: we unlock it. */ +static enum NTDB_ERROR coalesce_list(struct ntdb_context *ntdb, + ntdb_off_t ftable_off, + ntdb_off_t b_off, + unsigned int limit) +{ + enum NTDB_ERROR ecode; + ntdb_off_t off; + + off = ntdb_read_off(ntdb, b_off); + if (NTDB_OFF_IS_ERR(off)) { + ecode = NTDB_OFF_TO_ERR(off); + goto unlock_err; + } + /* A little bit of paranoia: counter should be 0. */ + off &= NTDB_OFF_MASK; + + while (off && limit--) { + struct ntdb_free_record rec; + ntdb_len_t coal; + ntdb_off_t next; + + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + next = rec.next; + coal = coalesce(ntdb, off, b_off, frec_len(&rec), &next); + if (NTDB_OFF_IS_ERR(coal)) { + /* This has already unlocked on error. */ + return NTDB_OFF_TO_ERR(coal); + } + if (NTDB_OFF_IS_ERR(next)) { + /* Coalescing had to unlock, so stop. */ + return NTDB_SUCCESS; + } + /* Keep going if we're doing well... */ + limit += size_to_bucket(coal / 16 + NTDB_MIN_DATA_LEN); + off = next; + } + + /* Now, move those elements to the tail of the list so we get something + * else next time. */ + if (off) { + struct ntdb_free_record oldhrec, newhrec, oldtrec, newtrec; + ntdb_off_t oldhoff, oldtoff, newtoff; + + /* The record we were up to is the new head. */ + ecode = ntdb_read_convert(ntdb, off, &newhrec, sizeof(newhrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + /* Get the new tail. */ + newtoff = frec_prev(&newhrec); + ecode = ntdb_read_convert(ntdb, newtoff, &newtrec, + sizeof(newtrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + /* Get the old head. */ + oldhoff = ntdb_read_off(ntdb, b_off); + if (NTDB_OFF_IS_ERR(oldhoff)) { + ecode = NTDB_OFF_TO_ERR(oldhoff); + goto unlock_err; + } + + /* This could happen if they all coalesced away. */ + if (oldhoff == off) + goto out; + + ecode = ntdb_read_convert(ntdb, oldhoff, &oldhrec, + sizeof(oldhrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + /* Get the old tail. */ + oldtoff = frec_prev(&oldhrec); + ecode = ntdb_read_convert(ntdb, oldtoff, &oldtrec, + sizeof(oldtrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + /* Old tail's next points to old head. */ + oldtrec.next = oldhoff; + + /* Old head's prev points to old tail. */ + oldhrec.magic_and_prev + = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL)) + | oldtoff; + + /* New tail's next is 0. */ + newtrec.next = 0; + + /* Write out the modified versions. */ + ecode = ntdb_write_convert(ntdb, oldtoff, &oldtrec, + sizeof(oldtrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + ecode = ntdb_write_convert(ntdb, oldhoff, &oldhrec, + sizeof(oldhrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + ecode = ntdb_write_convert(ntdb, newtoff, &newtrec, + sizeof(newtrec)); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + + /* And finally link in new head. */ + ecode = ntdb_write_off(ntdb, b_off, off); + if (ecode != NTDB_SUCCESS) + goto unlock_err; + } +out: + ntdb_unlock_free_bucket(ntdb, b_off); + return NTDB_SUCCESS; + +unlock_err: + ntdb_unlock_free_bucket(ntdb, b_off); + return ecode; +} + +/* List must not be locked if coalesce_ok is set. */ +enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len_with_header, + enum ntdb_lock_flags waitflag, + bool coalesce_ok) +{ + ntdb_off_t b_off; + ntdb_len_t len; + enum NTDB_ERROR ecode; + + assert(len_with_header >= sizeof(struct ntdb_free_record)); + + len = len_with_header - sizeof(struct ntdb_used_record); + + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len)); + ecode = ntdb_lock_free_bucket(ntdb, b_off, waitflag); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + ecode = enqueue_in_free(ntdb, b_off, off, len, &coalesce_ok); + check_list(ntdb, b_off); + + /* Coalescing unlocks free list. */ + if (!ecode && coalesce_ok) + ecode = coalesce_list(ntdb, ntdb->ftable_off, b_off, 2); + else + ntdb_unlock_free_bucket(ntdb, b_off); + return ecode; +} + +static size_t adjust_size(size_t keylen, size_t datalen) +{ + size_t size = keylen + datalen; + + if (size < NTDB_MIN_DATA_LEN) + size = NTDB_MIN_DATA_LEN; + + /* Round to next uint64_t boundary. */ + return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL); +} + +/* If we have enough left over to be useful, split that off. */ +static size_t record_leftover(size_t keylen, size_t datalen, + bool want_extra, size_t total_len) +{ + ssize_t leftover; + + if (want_extra) + datalen += datalen / 2; + leftover = total_len - adjust_size(keylen, datalen); + + if (leftover < (ssize_t)sizeof(struct ntdb_free_record)) + return 0; + + return leftover; +} + +/* We need size bytes to put our key and data in. */ +static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb, + ntdb_off_t ftable_off, + ntdb_off_t bucket, + size_t keylen, size_t datalen, + bool want_extra, + unsigned magic) +{ + ntdb_off_t off, b_off,best_off; + struct ntdb_free_record best = { 0 }; + double multiplier; + size_t size = adjust_size(keylen, datalen); + enum NTDB_ERROR ecode; + + ntdb->stats.allocs++; + b_off = bucket_off(ftable_off, bucket); + + /* FIXME: Try non-blocking wait first, to measure contention. */ + /* Lock this bucket. */ + ecode = ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + + best.ftable_and_len = -1ULL; + best_off = 0; + + /* Get slack if we're after extra. */ + if (want_extra) + multiplier = 1.5; + else + multiplier = 1.0; + + /* Walk the list to see if any are large enough, getting less fussy + * as we go. */ + off = ntdb_read_off(ntdb, b_off); + if (NTDB_OFF_IS_ERR(off)) { + ecode = NTDB_OFF_TO_ERR(off); + goto unlock_err; + } + off &= NTDB_OFF_MASK; + + while (off) { + const struct ntdb_free_record *r; + ntdb_off_t next; + + r = ntdb_access_read(ntdb, off, sizeof(*r), true); + if (NTDB_PTR_IS_ERR(r)) { + ecode = NTDB_PTR_ERR(r); + goto unlock_err; + } + + if (frec_magic(r) != NTDB_FREE_MAGIC) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "lock_and_alloc:" + " %llu non-free 0x%llx", + (long long)off, + (long long)r->magic_and_prev); + ntdb_access_release(ntdb, r); + goto unlock_err; + } + + if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) { + best_off = off; + best = *r; + } + + if (frec_len(&best) <= size * multiplier && best_off) { + ntdb_access_release(ntdb, r); + break; + } + + multiplier *= 1.01; + + next = r->next; + ntdb_access_release(ntdb, r); + off = next; + } + + /* If we found anything at all, use it. */ + if (best_off) { + struct ntdb_used_record rec; + size_t leftover; + + /* We're happy with this size: take it. */ + ecode = remove_from_list(ntdb, b_off, best_off, &best); + check_list(ntdb, b_off); + if (ecode != NTDB_SUCCESS) { + goto unlock_err; + } + + leftover = record_leftover(keylen, datalen, want_extra, + frec_len(&best)); + + assert(keylen + datalen + leftover <= frec_len(&best)); + /* We need to mark non-free before we drop lock, otherwise + * coalesce() could try to merge it! */ + ecode = set_header(ntdb, &rec, magic, keylen, datalen, + frec_len(&best) - leftover); + if (ecode != NTDB_SUCCESS) { + goto unlock_err; + } + + ecode = ntdb_write_convert(ntdb, best_off, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + goto unlock_err; + } + + /* For futureproofing, we put a 0 in any unused space. */ + if (rec_extra_padding(&rec)) { + ecode = ntdb->io->twrite(ntdb, best_off + sizeof(rec) + + keylen + datalen, "", 1); + if (ecode != NTDB_SUCCESS) { + goto unlock_err; + } + } + + /* Bucket of leftover will be <= current bucket, so nested + * locking is allowed. */ + if (leftover) { + ntdb->stats.alloc_leftover++; + ecode = add_free_record(ntdb, + best_off + sizeof(rec) + + frec_len(&best) - leftover, + leftover, NTDB_LOCK_WAIT, false); + if (ecode != NTDB_SUCCESS) { + best_off = NTDB_ERR_TO_OFF(ecode); + } + } + ntdb_unlock_free_bucket(ntdb, b_off); + + return best_off; + } + + ntdb_unlock_free_bucket(ntdb, b_off); + return 0; + +unlock_err: + ntdb_unlock_free_bucket(ntdb, b_off); + return NTDB_ERR_TO_OFF(ecode); +} + +/* Get a free block from current free list, or 0 if none, -ve on error. */ +static ntdb_off_t get_free(struct ntdb_context *ntdb, + size_t keylen, size_t datalen, bool want_extra, + unsigned magic) +{ + ntdb_off_t off, ftable_off; + ntdb_off_t start_b, b, ftable; + bool wrapped = false; + + /* If they are growing, add 50% to get to higher bucket. */ + if (want_extra) + start_b = size_to_bucket(adjust_size(keylen, + datalen + datalen / 2)); + else + start_b = size_to_bucket(adjust_size(keylen, datalen)); + + ftable_off = ntdb->ftable_off; + ftable = ntdb->ftable; + while (!wrapped || ftable_off != ntdb->ftable_off) { + /* Start at exact size bucket, and search up... */ + for (b = find_free_head(ntdb, ftable_off, start_b); + b < NTDB_FREE_BUCKETS; + b = find_free_head(ntdb, ftable_off, b + 1)) { + /* Try getting one from list. */ + off = lock_and_alloc(ntdb, ftable_off, + b, keylen, datalen, want_extra, + magic); + if (NTDB_OFF_IS_ERR(off)) + return off; + if (off != 0) { + if (b == start_b) + ntdb->stats.alloc_bucket_exact++; + if (b == NTDB_FREE_BUCKETS - 1) + ntdb->stats.alloc_bucket_max++; + /* Worked? Stay using this list. */ + ntdb->ftable_off = ftable_off; + ntdb->ftable = ftable; + return off; + } + /* Didn't work. Try next bucket. */ + } + + if (NTDB_OFF_IS_ERR(b)) { + return b; + } + + /* Hmm, try next table. */ + ftable_off = next_ftable(ntdb, ftable_off); + if (NTDB_OFF_IS_ERR(ftable_off)) { + return ftable_off; + } + ftable++; + + if (ftable_off == 0) { + wrapped = true; + ftable_off = first_ftable(ntdb); + if (NTDB_OFF_IS_ERR(ftable_off)) { + return ftable_off; + } + ftable = 0; + } + } + + return 0; +} + +enum NTDB_ERROR set_header(struct ntdb_context *ntdb, + struct ntdb_used_record *rec, + unsigned magic, uint64_t keylen, uint64_t datalen, + uint64_t actuallen) +{ + uint64_t keybits = (fls64(keylen) + 1) / 2; + + rec->magic_and_meta = ((actuallen - (keylen + datalen)) << 11) + | (keybits << 43) + | ((uint64_t)magic << 48); + rec->key_and_data_len = (keylen | (datalen << (keybits*2))); + + /* Encoding can fail on big values. */ + if (rec_key_length(rec) != keylen + || rec_data_length(rec) != datalen + || rec_extra_padding(rec) != actuallen - (keylen + datalen)) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "Could not encode k=%llu,d=%llu,a=%llu", + (long long)keylen, (long long)datalen, + (long long)actuallen); + } + return NTDB_SUCCESS; +} + +/* You need 'size', this tells you how much you should expand by. */ +ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size) +{ + ntdb_off_t new_size, top_size; + + /* limit size in order to avoid using up huge amounts of memory for + * in memory tdbs if an oddball huge record creeps in */ + if (size > 100 * 1024) { + top_size = map_size + size * 2; + } else { + top_size = map_size + size * 100; + } + + /* always make room for at least top_size more records, and at + least 25% more space. if the DB is smaller than 100MiB, + otherwise grow it by 10% only. */ + if (map_size > 100 * 1024 * 1024) { + new_size = map_size * 1.10; + } else { + new_size = map_size * 1.25; + } + + if (new_size < top_size) + new_size = top_size; + + /* We always make the file a multiple of transaction page + * size. This guarantees that the transaction recovery area + * is always aligned, otherwise the transaction code can overwrite + * itself. */ + new_size = (new_size + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1); + return new_size - map_size; +} + +/* Expand the database. */ +static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size) +{ + uint64_t old_size; + ntdb_len_t wanted; + enum NTDB_ERROR ecode; + + /* Need to hold a hash lock to expand DB: transactions rely on it. */ + if (!(ntdb->flags & NTDB_NOLOCK) + && !ntdb->file->allrecord_lock.count && !ntdb_has_hash_locks(ntdb)) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_expand: must hold lock during expand"); + } + + /* Only one person can expand file at a time. */ + ecode = ntdb_lock_expand(ntdb, F_WRLCK); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* Someone else may have expanded the file, so retry. */ + old_size = ntdb->file->map_size; + ntdb_oob(ntdb, ntdb->file->map_size, 1, true); + if (ntdb->file->map_size != old_size) { + ntdb_unlock_expand(ntdb, F_WRLCK); + return NTDB_SUCCESS; + } + + /* We need room for the record header too. */ + size = adjust_size(0, sizeof(struct ntdb_used_record) + size); + /* Overallocate. */ + wanted = ntdb_expand_adjust(old_size, size); + + ecode = ntdb->io->expand_file(ntdb, wanted); + if (ecode != NTDB_SUCCESS) { + ntdb_unlock_expand(ntdb, F_WRLCK); + return ecode; + } + + /* We need to drop this lock before adding free record. */ + ntdb_unlock_expand(ntdb, F_WRLCK); + + ntdb->stats.expands++; + return add_free_record(ntdb, old_size, wanted, NTDB_LOCK_WAIT, true); +} + +/* This won't fail: it will expand the database if it has to. */ +ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, + unsigned magic, bool growing) +{ + ntdb_off_t off; + + for (;;) { + enum NTDB_ERROR ecode; + off = get_free(ntdb, keylen, datalen, growing, magic); + if (likely(off != 0)) + break; + + ecode = ntdb_expand(ntdb, adjust_size(keylen, datalen)); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + } + + return off; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/hash.c b/junkcode/rusty@rustcorp.com.au-ntdb/hash.c new file mode 100644 index 00000000..b223668d --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/hash.c @@ -0,0 +1,624 @@ + /* + Trivial Database 2: hash handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include + +/* Default hash function. */ +uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed, + void *unused) +{ + return hash_stable((const unsigned char *)key, length, seed); +} + +uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len) +{ + return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data); +} + +static ntdb_bool_err key_matches(struct ntdb_context *ntdb, + const struct ntdb_used_record *rec, + ntdb_off_t off, + const NTDB_DATA *key, + const char **rptr) +{ + ntdb_bool_err ret = false; + const char *rkey; + + if (rec_key_length(rec) != key->dsize) { + ntdb->stats.compare_wrong_keylen++; + return ret; + } + + rkey = ntdb_access_read(ntdb, off + sizeof(*rec), + key->dsize + rec_data_length(rec), false); + if (NTDB_PTR_IS_ERR(rkey)) { + return (ntdb_bool_err)NTDB_PTR_ERR(rkey); + } + if (memcmp(rkey, key->dptr, key->dsize) == 0) { + if (rptr) { + *rptr = rkey; + } else { + ntdb_access_release(ntdb, rkey); + } + return true; + } + ntdb->stats.compare_wrong_keycmp++; + ntdb_access_release(ntdb, rkey); + return ret; +} + +/* Does entry match? */ +static ntdb_bool_err match(struct ntdb_context *ntdb, + uint32_t hash, + const NTDB_DATA *key, + ntdb_off_t val, + struct ntdb_used_record *rec, + const char **rptr) +{ + ntdb_off_t off; + enum NTDB_ERROR ecode; + + ntdb->stats.compares++; + + /* Top bits of offset == next bits of hash. */ + if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL) + != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) { + ntdb->stats.compare_wrong_offsetbits++; + return false; + } + + off = val & NTDB_OFF_MASK; + ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec)); + if (ecode != NTDB_SUCCESS) { + return (ntdb_bool_err)ecode; + } + + return key_matches(ntdb, rec, off, key, rptr); +} + +static bool is_chain(ntdb_off_t val) +{ + return val & (1ULL << NTDB_OFF_CHAIN_BIT); +} + +static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx) +{ + return base + sizeof(struct ntdb_used_record) + + idx * sizeof(ntdb_off_t); +} + +/* This is the core routine which searches the hashtable for an entry. + * On error, no locks are held and -ve is returned. + * Otherwise, hinfo is filled in. + * If not found, the return value is 0. + * If found, the return value is the offset, and *rec is the record. */ +ntdb_off_t find_and_lock(struct ntdb_context *ntdb, + NTDB_DATA key, + int ltype, + struct hash_info *h, + struct ntdb_used_record *rec, + const char **rptr) +{ + ntdb_off_t off, val; + const ntdb_off_t *arr = NULL; + ntdb_len_t i; + bool found_empty; + enum NTDB_ERROR ecode; + struct ntdb_used_record chdr; + ntdb_bool_err berr; + + h->h = ntdb_hash(ntdb, key.dptr, key.dsize); + + h->table = NTDB_HASH_OFFSET; + h->table_size = 1 << ntdb->hash_bits; + h->bucket = bits_from(h->h, 0, ntdb->hash_bits); + h->old_val = 0; + + ecode = ntdb_lock_hash(ntdb, h->bucket, ltype); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + + off = hbucket_off(h->table, h->bucket); + val = ntdb_read_off(ntdb, off); + if (NTDB_OFF_IS_ERR(val)) { + ecode = NTDB_OFF_TO_ERR(val); + goto fail; + } + + /* Directly in hash table? */ + if (!likely(is_chain(val))) { + if (val) { + berr = match(ntdb, h->h, &key, val, rec, rptr); + if (berr < 0) { + ecode = NTDB_OFF_TO_ERR(berr); + goto fail; + } + if (berr) { + return val & NTDB_OFF_MASK; + } + /* If you want to insert here, make a chain. */ + h->old_val = val; + } + return 0; + } + + /* Nope? Iterate through chain. */ + h->table = val & NTDB_OFF_MASK; + + ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + + if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "find_and_lock:" + " corrupt record %#x at %llu", + rec_magic(&chdr), (long long)off); + goto fail; + } + + h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t); + + arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0), + rec_data_length(&chdr), true); + if (NTDB_PTR_IS_ERR(arr)) { + ecode = NTDB_PTR_ERR(arr); + goto fail; + } + + found_empty = false; + for (i = 0; i < h->table_size; i++) { + if (arr[i] == 0) { + if (!found_empty) { + h->bucket = i; + found_empty = true; + } + } else { + berr = match(ntdb, h->h, &key, arr[i], rec, rptr); + if (berr < 0) { + ecode = NTDB_OFF_TO_ERR(berr); + ntdb_access_release(ntdb, arr); + goto fail; + } + if (berr) { + /* We found it! */ + h->bucket = i; + off = arr[i] & NTDB_OFF_MASK; + ntdb_access_release(ntdb, arr); + return off; + } + } + } + if (!found_empty) { + /* Set to any non-zero value */ + h->old_val = 1; + h->bucket = i; + } + + ntdb_access_release(ntdb, arr); + return 0; + +fail: + ntdb_unlock_hash(ntdb, h->bucket, ltype); + return NTDB_ERR_TO_OFF(ecode); +} + +static ntdb_off_t encode_offset(const struct ntdb_context *ntdb, + ntdb_off_t new_off, uint32_t hash) +{ + ntdb_off_t extra; + + assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0); + assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0); + /* We pack extra hash bits into the upper bits of the offset. */ + extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL); + extra <<= (64 - NTDB_OFF_UPPER_STEAL); + + return new_off | extra; +} + +/* Simply overwrite the hash entry we found before. */ +enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, + const struct hash_info *h, + ntdb_off_t new_off) +{ + return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), + encode_offset(ntdb, new_off, h->h)); +} + +enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, + const struct hash_info *h) +{ + return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0); +} + + +enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, + const struct hash_info *h, + ntdb_off_t new_off) +{ + enum NTDB_ERROR ecode; + ntdb_off_t chain; + struct ntdb_used_record chdr; + const ntdb_off_t *old; + ntdb_off_t *new; + + /* We hit an empty bucket during search? That's where it goes. */ + if (!h->old_val) { + return replace_in_hash(ntdb, h, new_off); + } + + /* Full at top-level? Create a 2-element chain. */ + if (h->table == NTDB_HASH_OFFSET) { + ntdb_off_t pair[2]; + + /* One element is old value, the other is the new value. */ + pair[0] = h->old_val; + pair[1] = encode_offset(ntdb, new_off, h->h); + + chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true); + if (NTDB_OFF_IS_ERR(chain)) { + return NTDB_OFF_TO_ERR(chain); + } + ecode = ntdb_write_convert(ntdb, + chain + + sizeof(struct ntdb_used_record), + pair, sizeof(pair)); + if (ecode == NTDB_SUCCESS) { + ecode = ntdb_write_off(ntdb, + hbucket_off(h->table, h->bucket), + chain + | (1ULL << NTDB_OFF_CHAIN_BIT)); + } + return ecode; + } + + /* Full bucket. Expand. */ + ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (rec_extra_padding(&chdr) >= sizeof(new_off)) { + /* Expand in place. */ + uint64_t dlen = rec_data_length(&chdr); + + ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0, + dlen + sizeof(new_off), + dlen + rec_extra_padding(&chdr)); + + if (ecode != NTDB_SUCCESS) { + return ecode; + } + /* find_and_lock set up h to point to last bucket. */ + ecode = replace_in_hash(ntdb, h, new_off); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + /* For futureproofing, we always make the first byte of padding + * a zero. */ + if (rec_extra_padding(&chdr)) { + ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr) + + dlen + sizeof(new_off), + "", 1); + } + return ecode; + } + + /* We need to reallocate the chain. */ + chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t), + NTDB_CHAIN_MAGIC, true); + if (NTDB_OFF_IS_ERR(chain)) { + return NTDB_OFF_TO_ERR(chain); + } + + /* Map both and copy across old buckets. */ + old = ntdb_access_read(ntdb, hbucket_off(h->table, 0), + h->table_size*sizeof(ntdb_off_t), true); + if (NTDB_PTR_IS_ERR(old)) { + return NTDB_PTR_ERR(old); + } + new = ntdb_access_write(ntdb, hbucket_off(chain, 0), + (h->table_size + 1)*sizeof(ntdb_off_t), true); + if (NTDB_PTR_IS_ERR(new)) { + ntdb_access_release(ntdb, old); + return NTDB_PTR_ERR(new); + } + + memcpy(new, old, h->bucket * sizeof(ntdb_off_t)); + new[h->bucket] = encode_offset(ntdb, new_off, h->h); + ntdb_access_release(ntdb, old); + + ecode = ntdb_access_commit(ntdb, new); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* Free the old chain. */ + ecode = add_free_record(ntdb, h->table, + sizeof(struct ntdb_used_record) + + rec_data_length(&chdr) + + rec_extra_padding(&chdr), + NTDB_LOCK_WAIT, true); + + /* Replace top-level to point to new chain */ + return ntdb_write_off(ntdb, + hbucket_off(NTDB_HASH_OFFSET, + bits_from(h->h, 0, ntdb->hash_bits)), + chain | (1ULL << NTDB_OFF_CHAIN_BIT)); +} + +/* Traverse support: returns offset of record, or 0 or -ve error. */ +static ntdb_off_t iterate_chain(struct ntdb_context *ntdb, + ntdb_off_t val, + struct hash_info *h) +{ + ntdb_off_t i; + enum NTDB_ERROR ecode; + struct ntdb_used_record chdr; + + /* First load up chain header. */ + h->table = val & NTDB_OFF_MASK; + ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "get_table:" + " corrupt record %#x at %llu", + rec_magic(&chdr), + (long long)h->table); + } + + /* Chain length is implied by data length. */ + h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t); + + i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket, + h->table_size); + if (NTDB_OFF_IS_ERR(i)) { + return i; + } + + if (i != h->table_size) { + /* Return to next bucket. */ + h->bucket = i + 1; + val = ntdb_read_off(ntdb, hbucket_off(h->table, i)); + if (NTDB_OFF_IS_ERR(val)) { + return val; + } + return val & NTDB_OFF_MASK; + } + + /* Go back up to hash table. */ + h->table = NTDB_HASH_OFFSET; + h->table_size = 1 << ntdb->hash_bits; + h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1; + return 0; +} + +/* Keeps hash locked unless returns 0 or error. */ +static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb, + struct hash_info *h) +{ + ntdb_off_t val, i; + enum NTDB_ERROR ecode; + + if (h->table != NTDB_HASH_OFFSET) { + /* We're in a chain. */ + i = bits_from(h->h, 0, ntdb->hash_bits); + ecode = ntdb_lock_hash(ntdb, i, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + + /* We dropped lock, bucket might have moved! */ + val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i)); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; + } + + /* We don't remove chains: there should still be one there! */ + if (!val || !is_chain(val)) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "iterate_hash:" + " vanished hchain %llu at %llu", + (long long)val, + (long long)i); + val = NTDB_ERR_TO_OFF(ecode); + goto unlock; + } + + /* Find next bucket in the chain. */ + val = iterate_chain(ntdb, val, h); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; + } + if (val != 0) { + return val; + } + ntdb_unlock_hash(ntdb, i, F_RDLCK); + + /* OK, we've reset h back to top level. */ + } + + /* We do this unlocked, then re-check. */ + for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), + h->bucket, h->table_size); + i != h->table_size; + i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), + i+1, h->table_size)) { + ecode = ntdb_lock_hash(ntdb, i, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + + val = ntdb_read_off(ntdb, hbucket_off(h->table, i)); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; + } + + /* Lost race, and it's empty? */ + if (!val) { + ntdb->stats.traverse_val_vanished++; + ntdb_unlock_hash(ntdb, i, F_RDLCK); + continue; + } + + if (!is_chain(val)) { + /* So caller knows what lock to free. */ + h->h = i; + /* Return to next bucket. */ + h->bucket = i + 1; + val &= NTDB_OFF_MASK; + return val; + } + + /* Start at beginning of chain */ + h->bucket = 0; + h->h = i; + + val = iterate_chain(ntdb, val, h); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; + } + if (val != 0) { + return val; + } + + /* Otherwise, bucket has been set to i+1 */ + ntdb_unlock_hash(ntdb, i, F_RDLCK); + } + return 0; + +unlock: + ntdb_unlock_hash(ntdb, i, F_RDLCK); + return val; +} + +/* Return success if we find something, NTDB_ERR_NOEXIST if none. */ +enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb, + struct hash_info *h, + NTDB_DATA *kbuf, size_t *dlen) +{ + ntdb_off_t off; + struct ntdb_used_record rec; + enum NTDB_ERROR ecode; + + off = lock_and_iterate_hash(ntdb, h); + + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } else if (off == 0) { + return NTDB_ERR_NOEXIST; + } + + /* The hash for this key is still locked. */ + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + goto unlock; + } + if (rec_magic(&rec) != NTDB_USED_MAGIC) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "next_in_hash:" + " corrupt record at %llu", + (long long)off); + goto unlock; + } + + kbuf->dsize = rec_key_length(&rec); + + /* They want data as well? */ + if (dlen) { + *dlen = rec_data_length(&rec); + kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec), + kbuf->dsize + *dlen); + } else { + kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec), + kbuf->dsize); + } + if (NTDB_PTR_IS_ERR(kbuf->dptr)) { + ecode = NTDB_PTR_ERR(kbuf->dptr); + goto unlock; + } + ecode = NTDB_SUCCESS; + +unlock: + ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK); + return ecode; + +} + +enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb, + struct hash_info *h, + NTDB_DATA *kbuf, size_t *dlen) +{ + h->table = NTDB_HASH_OFFSET; + h->table_size = 1 << ntdb->hash_bits; + h->bucket = 0; + + return next_in_hash(ntdb, h, kbuf, dlen); +} + +/* Even if the entry isn't in this hash bucket, you'd have to lock this + * bucket to find it. */ +static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb, + const NTDB_DATA *key, int ltype) +{ + uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize); + + return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype); +} + +/* lock/unlock one hash chain. This is meant to be used to reduce + contention - it cannot guarantee how many records will be locked */ +_PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key) +{ + return chainlock(ntdb, &key, F_WRLCK); +} + +_PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key) +{ + uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); + + ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK); +} + +_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, + NTDB_DATA key) +{ + return chainlock(ntdb, &key, F_RDLCK); +} + +_PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key) +{ + uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); + + ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/io.c b/junkcode/rusty@rustcorp.com.au-ntdb/io.c new file mode 100644 index 00000000..7645cddc --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/io.c @@ -0,0 +1,750 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the ntdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include + +static void free_old_mmaps(struct ntdb_context *ntdb) +{ + struct ntdb_old_mmap *i; + + assert(ntdb->file->direct_count == 0); + + while ((i = ntdb->file->old_mmaps) != NULL) { + ntdb->file->old_mmaps = i->next; + if (ntdb->flags & NTDB_INTERNAL) { + ntdb->free_fn(i->map_ptr, ntdb->alloc_data); + } else { + munmap(i->map_ptr, i->map_size); + } + ntdb->free_fn(i, ntdb->alloc_data); + } +} + +static enum NTDB_ERROR save_old_map(struct ntdb_context *ntdb) +{ + struct ntdb_old_mmap *old; + + assert(ntdb->file->direct_count); + + old = ntdb->alloc_fn(ntdb->file, sizeof(*old), ntdb->alloc_data); + if (!old) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "save_old_map alloc failed"); + } + old->next = ntdb->file->old_mmaps; + old->map_ptr = ntdb->file->map_ptr; + old->map_size = ntdb->file->map_size; + ntdb->file->old_mmaps = old; + + return NTDB_SUCCESS; +} + +enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb) +{ + if (ntdb->file->fd == -1) { + return NTDB_SUCCESS; + } + + if (!ntdb->file->map_ptr) { + return NTDB_SUCCESS; + } + + /* We can't unmap now if there are accessors. */ + if (ntdb->file->direct_count) { + return save_old_map(ntdb); + } else { + munmap(ntdb->file->map_ptr, ntdb->file->map_size); + ntdb->file->map_ptr = NULL; + } + return NTDB_SUCCESS; +} + +enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb) +{ + int mmap_flags; + + if (ntdb->flags & NTDB_INTERNAL) + return NTDB_SUCCESS; + +#ifndef HAVE_INCOHERENT_MMAP + if (ntdb->flags & NTDB_NOMMAP) + return NTDB_SUCCESS; +#endif + + if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) + mmap_flags = PROT_READ; + else + mmap_flags = PROT_READ | PROT_WRITE; + + /* size_t can be smaller than off_t. */ + if ((size_t)ntdb->file->map_size == ntdb->file->map_size) { + ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size, + mmap_flags, + MAP_SHARED, ntdb->file->fd, 0); + } else + ntdb->file->map_ptr = MAP_FAILED; + + /* + * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! + */ + if (ntdb->file->map_ptr == MAP_FAILED) { + ntdb->file->map_ptr = NULL; +#ifdef HAVE_INCOHERENT_MMAP + /* Incoherent mmap means everyone must mmap! */ + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_mmap failed for size %lld (%s)", + (long long)ntdb->file->map_size, + strerror(errno)); +#else + ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, + "ntdb_mmap failed for size %lld (%s)", + (long long)ntdb->file->map_size, strerror(errno)); +#endif + } + return NTDB_SUCCESS; +} + +/* check for an out of bounds access - if it is out of bounds then + see if the database has been expanded by someone else and expand + if necessary + note that "len" is the minimum length needed for the db. + + If probe is true, len being too large isn't a failure. +*/ +static enum NTDB_ERROR ntdb_normal_oob(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, + bool probe) +{ + struct stat st; + enum NTDB_ERROR ecode; + + if (len + off < len) { + if (probe) + return NTDB_SUCCESS; + + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob off %llu len %llu wrap\n", + (long long)off, (long long)len); + } + + if (ntdb->flags & NTDB_INTERNAL) { + if (probe) + return NTDB_SUCCESS; + + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob len %lld beyond internal" + " alloc size %lld", + (long long)(off + len), + (long long)ntdb->file->map_size); + return NTDB_ERR_IO; + } + + ecode = ntdb_lock_expand(ntdb, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (fstat(ntdb->file->fd, &st) != 0) { + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "Failed to fstat file: %s", strerror(errno)); + ntdb_unlock_expand(ntdb, F_RDLCK); + return NTDB_ERR_IO; + } + + ntdb_unlock_expand(ntdb, F_RDLCK); + + if (st.st_size < off + len) { + if (probe) + return NTDB_SUCCESS; + + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob len %llu beyond eof at %llu", + (long long)(off + len), (long long)st.st_size); + return NTDB_ERR_IO; + } + + /* Unmap, update size, remap */ + ecode = ntdb_munmap(ntdb); + if (ecode) { + return ecode; + } + + ntdb->file->map_size = st.st_size; + return ntdb_mmap(ntdb); +} + +/* Endian conversion: we only ever deal with 8 byte quantities */ +void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size) +{ + assert(size % 8 == 0); + if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) { + uint64_t i, *p = (uint64_t *)buf; + for (i = 0; i < size / 8; i++) + p[i] = bswap_64(p[i]); + } + return buf; +} + +/* Return first non-zero offset in offset array, or end, or -ve error. */ +/* FIXME: Return the off? */ +uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb, + ntdb_off_t base, uint64_t start, uint64_t end) +{ + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t), + (end - start) * sizeof(ntdb_off_t), false); + if (NTDB_PTR_IS_ERR(val)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val)); + } + + for (i = 0; i < (end - start); i++) { + if (val[i]) + break; + } + ntdb_access_release(ntdb, val); + return start + i; +} + +/* Return first zero offset in num offset array, or num, or -ve error. */ +uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off, + uint64_t num) +{ + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false); + if (NTDB_PTR_IS_ERR(val)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val)); + } + + for (i = 0; i < num; i++) { + if (!val[i]) + break; + } + ntdb_access_release(ntdb, val); + return i; +} + +enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len) +{ + char buf[8192] = { 0 }; + void *p = ntdb->io->direct(ntdb, off, len, true); + enum NTDB_ERROR ecode = NTDB_SUCCESS; + + assert(!(ntdb->flags & NTDB_RDONLY)); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_PTR_ERR(p); + } + if (p) { + memset(p, 0, len); + return ecode; + } + while (len) { + unsigned todo = len < sizeof(buf) ? len : sizeof(buf); + ecode = ntdb->io->twrite(ntdb, off, buf, todo); + if (ecode != NTDB_SUCCESS) { + break; + } + len -= todo; + off += todo; + } + return ecode; +} + +/* write a lump of data at a specified offset */ +static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off, + const void *buf, ntdb_len_t len) +{ + enum NTDB_ERROR ecode; + + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Write to read-only database"); + } + + ecode = ntdb_oob(ntdb, off, len, false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (ntdb->file->map_ptr) { + memcpy(off + (char *)ntdb->file->map_ptr, buf, len); + } else { +#ifdef HAVE_INCOHERENT_MMAP + return NTDB_ERR_IO; +#else + ssize_t ret; + ret = pwrite(ntdb->file->fd, buf, len, off); + if (ret != len) { + /* This shouldn't happen: we avoid sparse files. */ + if (ret >= 0) + errno = ENOSPC; + + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_write: %zi at %zu len=%zu (%s)", + ret, (size_t)off, (size_t)len, + strerror(errno)); + } +#endif + } + return NTDB_SUCCESS; +} + +/* read a lump of data at a specified offset */ +static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off, + void *buf, ntdb_len_t len) +{ + enum NTDB_ERROR ecode; + + ecode = ntdb_oob(ntdb, off, len, false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (ntdb->file->map_ptr) { + memcpy(buf, off + (char *)ntdb->file->map_ptr, len); + } else { +#ifdef HAVE_INCOHERENT_MMAP + return NTDB_ERR_IO; +#else + ssize_t r = pread(ntdb->file->fd, buf, len, off); + if (r != len) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_read failed with %zi at %zu " + "len=%zu (%s) map_size=%zu", + r, (size_t)off, (size_t)len, + strerror(errno), + (size_t)ntdb->file->map_size); + } +#endif + } + return NTDB_SUCCESS; +} + +enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off, + const void *rec, size_t len) +{ + enum NTDB_ERROR ecode; + + if (unlikely((ntdb->flags & NTDB_CONVERT))) { + void *conv = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data); + if (!conv) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_write: no memory converting" + " %zu bytes", len); + } + memcpy(conv, rec, len); + ecode = ntdb->io->twrite(ntdb, off, + ntdb_convert(ntdb, conv, len), len); + ntdb->free_fn(conv, ntdb->alloc_data); + } else { + ecode = ntdb->io->twrite(ntdb, off, rec, len); + } + return ecode; +} + +enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off, + void *rec, size_t len) +{ + enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len); + ntdb_convert(ntdb, rec, len); + return ecode; +} + +static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, + ntdb_len_t len, unsigned int prefix) +{ + unsigned char *buf; + enum NTDB_ERROR ecode; + + /* some systems don't like zero length malloc */ + buf = ntdb->alloc_fn(ntdb, prefix + len ? prefix + len : 1, + ntdb->alloc_data); + if (!buf) { + ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_alloc_read alloc failed len=%zu", + (size_t)(prefix + len)); + return NTDB_ERR_PTR(NTDB_ERR_OOM); + } else { + ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len); + if (unlikely(ecode != NTDB_SUCCESS)) { + ntdb->free_fn(buf, ntdb->alloc_data); + return NTDB_ERR_PTR(ecode); + } + } + return buf; +} + +/* read a lump of data, allocating the space for it */ +void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len) +{ + return _ntdb_alloc_read(ntdb, offset, len, 0); +} + +static enum NTDB_ERROR fill(struct ntdb_context *ntdb, + const void *buf, size_t size, + ntdb_off_t off, ntdb_len_t len) +{ + while (len) { + size_t n = len > size ? size : len; + ssize_t ret = pwrite(ntdb->file->fd, buf, n, off); + if (ret != n) { + if (ret >= 0) + errno = ENOSPC; + + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "fill failed:" + " %zi at %zu len=%zu (%s)", + ret, (size_t)off, (size_t)len, + strerror(errno)); + } + len -= n; + off += n; + } + return NTDB_SUCCESS; +} + +/* expand a file. we prefer to use ftruncate, as that is what posix + says to use for mmap expansion */ +static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb, + ntdb_len_t addition) +{ + char buf[8192]; + enum NTDB_ERROR ecode; + + assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0); + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Expand on read-only database"); + } + + if (ntdb->flags & NTDB_INTERNAL) { + char *new; + + /* Can't free it if we have direct accesses. */ + if (ntdb->file->direct_count) { + ecode = save_old_map(ntdb); + if (ecode) { + return ecode; + } + new = ntdb->alloc_fn(ntdb->file, + ntdb->file->map_size + addition, + ntdb->alloc_data); + if (new) { + memcpy(new, ntdb->file->map_ptr, + ntdb->file->map_size); + } + } else { + new = ntdb->expand_fn(ntdb->file->map_ptr, + ntdb->file->map_size + addition, + ntdb->alloc_data); + } + if (!new) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "No memory to expand database"); + } + ntdb->file->map_ptr = new; + ntdb->file->map_size += addition; + return NTDB_SUCCESS; + } else { + /* Unmap before trying to write; old NTDB claimed OpenBSD had + * problem with this otherwise. */ + ecode = ntdb_munmap(ntdb); + if (ecode) { + return ecode; + } + + /* If this fails, we try to fill anyway. */ + if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition)) + ; + + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, 0x43, sizeof(buf)); + ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size, + addition); + if (ecode != NTDB_SUCCESS) + return ecode; + ntdb->file->map_size += addition; + return ntdb_mmap(ntdb); + } +} + +const void *ntdb_access_read(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool convert) +{ + void *ret = NULL; + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + ret = ntdb->io->direct(ntdb, off, len, false); + + if (NTDB_PTR_IS_ERR(ret)) { + return ret; + } + } + if (!ret) { + struct ntdb_access_hdr *hdr; + hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr)); + if (NTDB_PTR_IS_ERR(hdr)) { + return hdr; + } + hdr->next = ntdb->access; + ntdb->access = hdr; + ret = hdr + 1; + if (convert) { + ntdb_convert(ntdb, (void *)ret, len); + } + } else { + ntdb->file->direct_count++; + } + + return ret; +} + +void *ntdb_access_write(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool convert) +{ + void *ret = NULL; + + if (ntdb->flags & NTDB_RDONLY) { + ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Write to read-only database"); + return NTDB_ERR_PTR(NTDB_ERR_RDONLY); + } + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + ret = ntdb->io->direct(ntdb, off, len, true); + + if (NTDB_PTR_IS_ERR(ret)) { + return ret; + } + } + + if (!ret) { + struct ntdb_access_hdr *hdr; + hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr)); + if (NTDB_PTR_IS_ERR(hdr)) { + return hdr; + } + hdr->next = ntdb->access; + ntdb->access = hdr; + hdr->off = off; + hdr->len = len; + hdr->convert = convert; + ret = hdr + 1; + if (convert) + ntdb_convert(ntdb, (void *)ret, len); + } else { + ntdb->file->direct_count++; + } + return ret; +} + +static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p) +{ + struct ntdb_access_hdr **hp; + + for (hp = &ntdb->access; *hp; hp = &(*hp)->next) { + if (*hp + 1 == p) + return hp; + } + return NULL; +} + +void ntdb_access_release(struct ntdb_context *ntdb, const void *p) +{ + struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p); + + if (hp) { + hdr = *hp; + *hp = hdr->next; + ntdb->free_fn(hdr, ntdb->alloc_data); + } else { + if (--ntdb->file->direct_count == 0) { + free_old_mmaps(ntdb); + } + } +} + +enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p) +{ + struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p); + enum NTDB_ERROR ecode; + + if (hp) { + hdr = *hp; + if (hdr->convert) + ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len); + else + ecode = ntdb_write(ntdb, hdr->off, p, hdr->len); + *hp = hdr->next; + ntdb->free_fn(hdr, ntdb->alloc_data); + } else { + if (--ntdb->file->direct_count == 0) { + free_old_mmaps(ntdb); + } + ecode = NTDB_SUCCESS; + } + + return ecode; +} + +static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len, + bool write_mode) +{ + enum NTDB_ERROR ecode; + + if (unlikely(!ntdb->file->map_ptr)) + return NULL; + + ecode = ntdb_oob(ntdb, off, len, false); + if (unlikely(ecode != NTDB_SUCCESS)) + return NTDB_ERR_PTR(ecode); + return (char *)ntdb->file->map_ptr + off; +} + +static ntdb_off_t ntdb_read_normal_off(struct ntdb_context *ntdb, + ntdb_off_t off) +{ + ntdb_off_t ret; + enum NTDB_ERROR ecode; + ntdb_off_t *p; + + p = ntdb_direct(ntdb, off, sizeof(*p), false); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p)); + } + if (likely(p)) { + return *p; + } + + ecode = ntdb_read(ntdb, off, &ret, sizeof(ret)); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + return ret; +} + +static ntdb_off_t ntdb_read_convert_off(struct ntdb_context *ntdb, + ntdb_off_t off) +{ + ntdb_off_t ret; + enum NTDB_ERROR ecode; + + ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret)); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + return ret; +} + +static enum NTDB_ERROR ntdb_write_normal_off(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_off_t val) +{ + ntdb_off_t *p; + + p = ntdb_direct(ntdb, off, sizeof(*p), true); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_PTR_ERR(p); + } + if (likely(p)) { + *p = val; + return NTDB_SUCCESS; + } + return ntdb_write(ntdb, off, &val, sizeof(val)); +} + +static enum NTDB_ERROR ntdb_write_convert_off(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_off_t val) +{ + return ntdb_write_convert(ntdb, off, &val, sizeof(val)); +} + +void ntdb_inc_seqnum(struct ntdb_context *ntdb) +{ + ntdb_off_t seq; + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + int64_t *direct; + + direct = ntdb->io->direct(ntdb, + offsetof(struct ntdb_header, seqnum), + sizeof(*direct), true); + if (likely(direct)) { + /* Don't let it go negative, even briefly */ + if (unlikely((*direct) + 1) < 0) + *direct = 0; + (*direct)++; + return; + } + } + + seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum)); + if (!NTDB_OFF_IS_ERR(seq)) { + seq++; + if (unlikely((int64_t)seq < 0)) + seq = 0; + ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq); + } +} + +static const struct ntdb_methods io_methods = { + ntdb_read, + ntdb_write, + ntdb_normal_oob, + ntdb_expand_file, + ntdb_direct, + ntdb_read_normal_off, + ntdb_write_normal_off, +}; + +static const struct ntdb_methods io_convert_methods = { + ntdb_read, + ntdb_write, + ntdb_normal_oob, + ntdb_expand_file, + ntdb_direct, + ntdb_read_convert_off, + ntdb_write_convert_off, +}; + +/* + initialise the default methods table +*/ +void ntdb_io_init(struct ntdb_context *ntdb) +{ + if (ntdb->flags & NTDB_CONVERT) + ntdb->io = &io_convert_methods; + else + ntdb->io = &io_methods; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/lock.c b/junkcode/rusty@rustcorp.com.au-ntdb/lock.c new file mode 100644 index 00000000..71d6d852 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/lock.c @@ -0,0 +1,886 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + + ** NOTE! The following LGPL license applies to the ntdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include "private.h" +#include + +/* If we were threaded, we could wait for unlock, but we're not, so fail. */ +enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call) +{ + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "%s: lock owned by another ntdb in this process.", + call); +} + +/* If we fork, we no longer really own locks. */ +bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log) +{ + /* No locks? No problem! */ + if (ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0) { + return true; + } + + /* No fork? No problem! */ + if (ntdb->file->locker == getpid()) { + return true; + } + + if (log) { + ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "%s: fork() detected after lock acquisition!" + " (%u vs %u)", call, + (unsigned int)ntdb->file->locker, + (unsigned int)getpid()); + } + return false; +} + +int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *unused) +{ + struct flock fl; + int ret; + + do { + fl.l_type = rw; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + if (waitflag) + ret = fcntl(fd, F_SETLKW, &fl); + else + ret = fcntl(fd, F_SETLK, &fl); + } while (ret != 0 && errno == EINTR); + return ret; +} + +int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused) +{ + struct flock fl; + int ret; + + do { + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + ret = fcntl(fd, F_SETLKW, &fl); + } while (ret != 0 && errno == EINTR); + return ret; +} + +static int lock(struct ntdb_context *ntdb, + int rw, off_t off, off_t len, bool waitflag) +{ + int ret; + if (ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0) { + ntdb->file->locker = getpid(); + } + + ntdb->stats.lock_lowlevel++; + ret = ntdb->lock_fn(ntdb->file->fd, rw, off, len, waitflag, + ntdb->lock_data); + if (!waitflag) { + ntdb->stats.lock_nonblock++; + if (ret != 0) + ntdb->stats.lock_nonblock_fail++; + } + return ret; +} + +static int unlock(struct ntdb_context *ntdb, int rw, off_t off, off_t len) +{ +#if 0 /* Check they matched up locks and unlocks correctly. */ + char line[80]; + FILE *locks; + bool found = false; + + locks = fopen("/proc/locks", "r"); + + while (fgets(line, 80, locks)) { + char *p; + int type, start, l; + + /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */ + p = strchr(line, ':') + 1; + if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY "))) + continue; + p += strlen(" FLOCK ADVISORY "); + if (strncmp(p, "READ ", strlen("READ ")) == 0) + type = F_RDLCK; + else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0) + type = F_WRLCK; + else + abort(); + p += 6; + if (atoi(p) != getpid()) + continue; + p = strchr(strchr(p, ' ') + 1, ' ') + 1; + start = atoi(p); + p = strchr(p, ' ') + 1; + if (strncmp(p, "EOF", 3) == 0) + l = 0; + else + l = atoi(p) - start + 1; + + if (off == start) { + if (len != l) { + fprintf(stderr, "Len %u should be %u: %s", + (int)len, l, line); + abort(); + } + if (type != rw) { + fprintf(stderr, "Type %s wrong: %s", + rw == F_RDLCK ? "READ" : "WRITE", line); + abort(); + } + found = true; + break; + } + } + + if (!found) { + fprintf(stderr, "Unlock on %u@%u not found!", + (int)off, (int)len); + abort(); + } + + fclose(locks); +#endif + + return ntdb->unlock_fn(ntdb->file->fd, rw, off, len, ntdb->lock_data); +} + +/* a byte range locking function - return 0 on success + this functions locks len bytes at the specified offset. + + note that a len of zero means lock to end of file +*/ +static enum NTDB_ERROR ntdb_brlock(struct ntdb_context *ntdb, + int rw_type, ntdb_off_t offset, ntdb_off_t len, + enum ntdb_lock_flags flags) +{ + int ret; + + if (rw_type == F_WRLCK && (ntdb->flags & NTDB_RDONLY)) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Write lock attempted on read-only database"); + } + + if (ntdb->flags & NTDB_NOLOCK) { + return NTDB_SUCCESS; + } + + /* A 32 bit system cannot open a 64-bit file, but it could have + * expanded since then: check here. */ + if ((size_t)(offset + len) != offset + len) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_brlock: lock on giant offset %llu", + (long long)(offset + len)); + } + + ret = lock(ntdb, rw_type, offset, len, flags & NTDB_LOCK_WAIT); + if (ret != 0) { + /* Generic lock error. errno set by fcntl. + * EAGAIN is an expected return from non-blocking + * locks. */ + if (!(flags & NTDB_LOCK_PROBE) + && (errno != EAGAIN && errno != EINTR)) { + ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_brlock failed (fd=%d) at" + " offset %zu rw_type=%d flags=%d len=%zu:" + " %s", + ntdb->file->fd, (size_t)offset, rw_type, + flags, (size_t)len, strerror(errno)); + } + return NTDB_ERR_LOCK; + } + return NTDB_SUCCESS; +} + +static enum NTDB_ERROR ntdb_brunlock(struct ntdb_context *ntdb, + int rw_type, ntdb_off_t offset, size_t len) +{ + if (ntdb->flags & NTDB_NOLOCK) { + return NTDB_SUCCESS; + } + + if (!check_lock_pid(ntdb, "ntdb_brunlock", false)) + return NTDB_ERR_LOCK; + + if (unlock(ntdb, rw_type, offset, len) == -1) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_brunlock failed (fd=%d) at offset %zu" + " rw_type=%d len=%zu: %s", + ntdb->file->fd, (size_t)offset, rw_type, + (size_t)len, strerror(errno)); + } + return NTDB_SUCCESS; +} + +/* + upgrade a read lock to a write lock. This needs to be handled in a + special way as some OSes (such as solaris) have too conservative + deadlock detection and claim a deadlock when progress can be + made. For those OSes we may loop for a while. +*/ +enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start) +{ + int count = 1000; + + if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true)) + return NTDB_ERR_LOCK; + + if (ntdb->file->allrecord_lock.count != 1) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_allrecord_upgrade failed:" + " count %u too high", + ntdb->file->allrecord_lock.count); + } + + if (ntdb->file->allrecord_lock.off != 1) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_allrecord_upgrade failed:" + " already upgraded?"); + } + + if (ntdb->file->allrecord_lock.owner != ntdb) { + return owner_conflict(ntdb, "ntdb_allrecord_upgrade"); + } + + while (count--) { + struct timeval tv; + if (ntdb_brlock(ntdb, F_WRLCK, start, 0, + NTDB_LOCK_WAIT|NTDB_LOCK_PROBE) == NTDB_SUCCESS) { + ntdb->file->allrecord_lock.ltype = F_WRLCK; + ntdb->file->allrecord_lock.off = 0; + return NTDB_SUCCESS; + } + if (errno != EDEADLK) { + break; + } + /* sleep for as short a time as we can - more portable than usleep() */ + tv.tv_sec = 0; + tv.tv_usec = 1; + select(0, NULL, NULL, NULL, &tv); + } + + if (errno != EAGAIN && errno != EINTR) + ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_allrecord_upgrade failed"); + return NTDB_ERR_LOCK; +} + +static struct ntdb_lock *find_nestlock(struct ntdb_context *ntdb, ntdb_off_t offset, + const struct ntdb_context *owner) +{ + unsigned int i; + + for (i=0; ifile->num_lockrecs; i++) { + if (ntdb->file->lockrecs[i].off == offset) { + if (owner && ntdb->file->lockrecs[i].owner != owner) + return NULL; + return &ntdb->file->lockrecs[i]; + } + } + return NULL; +} + +enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb) +{ + enum NTDB_ERROR ecode; + + if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true)) + return NTDB_ERR_LOCK; + + ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK, + false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); + if (ecode != NTDB_SUCCESS) { + ntdb_allrecord_unlock(ntdb, F_WRLCK); + return ecode; + } + ecode = ntdb_transaction_recover(ntdb); + ntdb_unlock_open(ntdb, F_WRLCK); + ntdb_allrecord_unlock(ntdb, F_WRLCK); + + return ecode; +} + +/* lock an offset in the database. */ +static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb, + ntdb_off_t offset, int ltype, + enum ntdb_lock_flags flags) +{ + struct ntdb_lock *new_lck; + enum NTDB_ERROR ecode; + + assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits) + + ntdb->file->map_size / 8)); + + if (ntdb->flags & NTDB_NOLOCK) + return NTDB_SUCCESS; + + if (!check_lock_pid(ntdb, "ntdb_nest_lock", true)) { + return NTDB_ERR_LOCK; + } + + ntdb->stats.locks++; + + new_lck = find_nestlock(ntdb, offset, NULL); + if (new_lck) { + if (new_lck->owner != ntdb) { + return owner_conflict(ntdb, "ntdb_nest_lock"); + } + + if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_nest_lock:" + " offset %zu has read lock", + (size_t)offset); + } + /* Just increment the struct, posix locks don't stack. */ + new_lck->count++; + return NTDB_SUCCESS; + } + +#if 0 + if (ntdb->file->num_lockrecs + && offset >= NTDB_HASH_LOCK_START + && offset < NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_nest_lock: already have a hash lock?"); + } +#endif + if (ntdb->file->lockrecs == NULL) { + new_lck = ntdb->alloc_fn(ntdb->file, sizeof(*ntdb->file->lockrecs), + ntdb->alloc_data); + } else { + new_lck = (struct ntdb_lock *)ntdb->expand_fn( + ntdb->file->lockrecs, + sizeof(*ntdb->file->lockrecs) + * (ntdb->file->num_lockrecs+1), + ntdb->alloc_data); + } + if (new_lck == NULL) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_nest_lock:" + " unable to allocate %zu lock struct", + ntdb->file->num_lockrecs + 1); + } + ntdb->file->lockrecs = new_lck; + + /* Since fcntl locks don't nest, we do a lock for the first one, + and simply bump the count for future ones */ + ecode = ntdb_brlock(ntdb, ltype, offset, 1, flags); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* First time we grab a lock, perhaps someone died in commit? */ + if (!(flags & NTDB_LOCK_NOCHECK) + && ntdb->file->num_lockrecs == 0) { + ntdb_bool_err berr = ntdb_needs_recovery(ntdb); + if (berr != false) { + ntdb_brunlock(ntdb, ltype, offset, 1); + + if (berr < 0) + return NTDB_OFF_TO_ERR(berr); + ecode = ntdb_lock_and_recover(ntdb); + if (ecode == NTDB_SUCCESS) { + ecode = ntdb_brlock(ntdb, ltype, offset, 1, + flags); + } + if (ecode != NTDB_SUCCESS) { + return ecode; + } + } + } + + ntdb->file->lockrecs[ntdb->file->num_lockrecs].owner = ntdb; + ntdb->file->lockrecs[ntdb->file->num_lockrecs].off = offset; + ntdb->file->lockrecs[ntdb->file->num_lockrecs].count = 1; + ntdb->file->lockrecs[ntdb->file->num_lockrecs].ltype = ltype; + ntdb->file->num_lockrecs++; + + return NTDB_SUCCESS; +} + +static enum NTDB_ERROR ntdb_nest_unlock(struct ntdb_context *ntdb, + ntdb_off_t off, int ltype) +{ + struct ntdb_lock *lck; + enum NTDB_ERROR ecode; + + if (ntdb->flags & NTDB_NOLOCK) + return NTDB_SUCCESS; + + lck = find_nestlock(ntdb, off, ntdb); + if ((lck == NULL) || (lck->count == 0)) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_nest_unlock: no lock for %zu", + (size_t)off); + } + + if (lck->count > 1) { + lck->count--; + return NTDB_SUCCESS; + } + + /* + * This lock has count==1 left, so we need to unlock it in the + * kernel. We don't bother with decrementing the in-memory array + * element, we're about to overwrite it with the last array element + * anyway. + */ + ecode = ntdb_brunlock(ntdb, ltype, off, 1); + + /* + * Shrink the array by overwriting the element just unlocked with the + * last array element. + */ + *lck = ntdb->file->lockrecs[--ntdb->file->num_lockrecs]; + + return ecode; +} + +/* + get the transaction lock + */ +enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype) +{ + return ntdb_nest_lock(ntdb, NTDB_TRANSACTION_LOCK, ltype, NTDB_LOCK_WAIT); +} + +/* + release the transaction lock + */ +void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype) +{ + ntdb_nest_unlock(ntdb, NTDB_TRANSACTION_LOCK, ltype); +} + +/* We only need to lock individual bytes, but Linux merges consecutive locks + * so we lock in contiguous ranges. */ +static enum NTDB_ERROR ntdb_lock_gradual(struct ntdb_context *ntdb, + int ltype, enum ntdb_lock_flags flags, + ntdb_off_t off, ntdb_off_t len) +{ + enum NTDB_ERROR ecode; + enum ntdb_lock_flags nb_flags = (flags & ~NTDB_LOCK_WAIT); + + if (len <= 1) { + /* 0 would mean to end-of-file... */ + assert(len != 0); + /* Single hash. Just do blocking lock. */ + return ntdb_brlock(ntdb, ltype, off, len, flags); + } + + /* First we try non-blocking. */ + ecode = ntdb_brlock(ntdb, ltype, off, len, nb_flags); + if (ecode != NTDB_ERR_LOCK) { + return ecode; + } + + /* Try locking first half, then second. */ + ecode = ntdb_lock_gradual(ntdb, ltype, flags, off, len / 2); + if (ecode != NTDB_SUCCESS) + return ecode; + + ecode = ntdb_lock_gradual(ntdb, ltype, flags, + off + len / 2, len - len / 2); + if (ecode != NTDB_SUCCESS) { + ntdb_brunlock(ntdb, ltype, off, len / 2); + } + return ecode; +} + +/* lock/unlock entire database. It can only be upgradable if you have some + * other way of guaranteeing exclusivity (ie. transaction write lock). */ +enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype, + enum ntdb_lock_flags flags, bool upgradable) +{ + enum NTDB_ERROR ecode; + ntdb_bool_err berr; + + if (ntdb->flags & NTDB_NOLOCK) { + return NTDB_SUCCESS; + } + + if (!check_lock_pid(ntdb, "ntdb_allrecord_lock", true)) { + return NTDB_ERR_LOCK; + } + + if (ntdb->file->allrecord_lock.count) { + if (ntdb->file->allrecord_lock.owner != ntdb) { + return owner_conflict(ntdb, "ntdb_allrecord_lock"); + } + + if (ltype == F_RDLCK + || ntdb->file->allrecord_lock.ltype == F_WRLCK) { + ntdb->file->allrecord_lock.count++; + return NTDB_SUCCESS; + } + + /* a global lock of a different type exists */ + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "ntdb_allrecord_lock: already have %s lock", + ntdb->file->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + } + + if (ntdb_has_hash_locks(ntdb)) { + /* can't combine global and chain locks */ + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "ntdb_allrecord_lock:" + " already have chain lock"); + } + + if (upgradable && ltype != F_RDLCK) { + /* ntdb error: you can't upgrade a write lock! */ + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_allrecord_lock:" + " can't upgrade a write lock"); + } + + ntdb->stats.locks++; +again: + /* Lock hashes, gradually. */ + ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START, + 1 << ntdb->hash_bits); + if (ecode != NTDB_SUCCESS) + return ecode; + + /* Lock free tables: there to end of file. */ + ecode = ntdb_brlock(ntdb, ltype, + NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits), + 0, flags); + if (ecode != NTDB_SUCCESS) { + ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, + 1 << ntdb->hash_bits); + return ecode; + } + + ntdb->file->allrecord_lock.owner = ntdb; + ntdb->file->allrecord_lock.count = 1; + /* If it's upgradable, it's actually exclusive so we can treat + * it as a write lock. */ + ntdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype; + ntdb->file->allrecord_lock.off = upgradable; + + /* Now check for needing recovery. */ + if (flags & NTDB_LOCK_NOCHECK) + return NTDB_SUCCESS; + + berr = ntdb_needs_recovery(ntdb); + if (likely(berr == false)) + return NTDB_SUCCESS; + + ntdb_allrecord_unlock(ntdb, ltype); + if (berr < 0) + return NTDB_OFF_TO_ERR(berr); + ecode = ntdb_lock_and_recover(ntdb); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + goto again; +} + +enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb, + int ltype, enum ntdb_lock_flags flags) +{ + return ntdb_nest_lock(ntdb, NTDB_OPEN_LOCK, ltype, flags); +} + +void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype) +{ + ntdb_nest_unlock(ntdb, NTDB_OPEN_LOCK, ltype); +} + +bool ntdb_has_open_lock(struct ntdb_context *ntdb) +{ + return !(ntdb->flags & NTDB_NOLOCK) + && find_nestlock(ntdb, NTDB_OPEN_LOCK, ntdb) != NULL; +} + +enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype) +{ + /* Lock doesn't protect data, so don't check (we recurse if we do!) */ + return ntdb_nest_lock(ntdb, NTDB_EXPANSION_LOCK, ltype, + NTDB_LOCK_WAIT | NTDB_LOCK_NOCHECK); +} + +void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype) +{ + ntdb_nest_unlock(ntdb, NTDB_EXPANSION_LOCK, ltype); +} + +/* unlock entire db */ +void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype) +{ + if (ntdb->flags & NTDB_NOLOCK) + return; + + if (ntdb->file->allrecord_lock.count == 0) { + ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "ntdb_allrecord_unlock: not locked!"); + return; + } + + if (ntdb->file->allrecord_lock.owner != ntdb) { + ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "ntdb_allrecord_unlock: not locked by us!"); + return; + } + + /* Upgradable locks are marked as write locks. */ + if (ntdb->file->allrecord_lock.ltype != ltype + && (!ntdb->file->allrecord_lock.off || ltype != F_RDLCK)) { + ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_allrecord_unlock: have %s lock", + ntdb->file->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + return; + } + + if (ntdb->file->allrecord_lock.count > 1) { + ntdb->file->allrecord_lock.count--; + return; + } + + ntdb->file->allrecord_lock.count = 0; + ntdb->file->allrecord_lock.ltype = 0; + + ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, 0); +} + +bool ntdb_has_expansion_lock(struct ntdb_context *ntdb) +{ + return find_nestlock(ntdb, NTDB_EXPANSION_LOCK, ntdb) != NULL; +} + +bool ntdb_has_hash_locks(struct ntdb_context *ntdb) +{ + unsigned int i; + + for (i=0; ifile->num_lockrecs; i++) { + if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START + && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START + + (1 << ntdb->hash_bits))) + return true; + } + return false; +} + +static bool ntdb_has_free_lock(struct ntdb_context *ntdb) +{ + unsigned int i; + + if (ntdb->flags & NTDB_NOLOCK) + return false; + + for (i=0; ifile->num_lockrecs; i++) { + if (ntdb->file->lockrecs[i].off + > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)) + return true; + } + return false; +} + +enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb, + unsigned int h, + int ltype) +{ + unsigned l = NTDB_HASH_LOCK_START + h; + + assert(h < (1 << ntdb->hash_bits)); + + /* a allrecord lock allows us to avoid per chain locks */ + if (ntdb->file->allrecord_lock.count) { + if (!check_lock_pid(ntdb, "ntdb_lock_hashes", true)) + return NTDB_ERR_LOCK; + + if (ntdb->file->allrecord_lock.owner != ntdb) + return owner_conflict(ntdb, "ntdb_lock_hashes"); + if (ltype == ntdb->file->allrecord_lock.ltype + || ltype == F_RDLCK) { + return NTDB_SUCCESS; + } + + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "ntdb_lock_hashes:" + " already have %s allrecordlock", + ntdb->file->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + } + + if (ntdb_has_free_lock(ntdb)) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_lock_hashes: already have free lock"); + } + + if (ntdb_has_expansion_lock(ntdb)) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_lock_hashes:" + " already have expansion lock"); + } + + return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT); +} + +enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb, + unsigned int h, int ltype) +{ + unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1)); + + if (ntdb->flags & NTDB_NOLOCK) + return 0; + + /* a allrecord lock allows us to avoid per chain locks */ + if (ntdb->file->allrecord_lock.count) { + if (ntdb->file->allrecord_lock.ltype == F_RDLCK + && ltype == F_WRLCK) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_unlock_hashes RO allrecord!"); + } + if (ntdb->file->allrecord_lock.owner != ntdb) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR, + "ntdb_unlock_hashes:" + " not locked by us!"); + } + return NTDB_SUCCESS; + } + + return ntdb_nest_unlock(ntdb, l, ltype); +} + +/* Hash locks use NTDB_HASH_LOCK_START + .. + * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide. + * The result is that on 32 bit systems we don't use lock values > 2^31 on + * files that are less than 4GB. + */ +static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb, + ntdb_off_t b_off) +{ + return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits) + + b_off / sizeof(ntdb_off_t); +} + +enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off, + enum ntdb_lock_flags waitflag) +{ + assert(b_off >= sizeof(struct ntdb_header)); + + if (ntdb->flags & NTDB_NOLOCK) + return 0; + + /* a allrecord lock allows us to avoid per chain locks */ + if (ntdb->file->allrecord_lock.count) { + if (!check_lock_pid(ntdb, "ntdb_lock_free_bucket", true)) + return NTDB_ERR_LOCK; + + if (ntdb->file->allrecord_lock.owner != ntdb) { + return owner_conflict(ntdb, "ntdb_lock_free_bucket"); + } + + if (ntdb->file->allrecord_lock.ltype == F_WRLCK) + return 0; + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_lock_free_bucket with" + " read-only allrecordlock!"); + } + +#if 0 /* FIXME */ + if (ntdb_has_expansion_lock(ntdb)) { + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, + "ntdb_lock_free_bucket:" + " already have expansion lock"); + } +#endif + + return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK, + waitflag); +} + +void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off) +{ + if (ntdb->file->allrecord_lock.count) + return; + + ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK); +} + +_PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb) +{ + return ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false); +} + +_PUBLIC_ void ntdb_unlockall(struct ntdb_context *ntdb) +{ + ntdb_allrecord_unlock(ntdb, F_WRLCK); +} + +_PUBLIC_ enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb) +{ + return ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); +} + +_PUBLIC_ void ntdb_unlockall_read(struct ntdb_context *ntdb) +{ + ntdb_allrecord_unlock(ntdb, F_RDLCK); +} + +void ntdb_lock_cleanup(struct ntdb_context *ntdb) +{ + unsigned int i; + + /* We don't want to warn: they're allowed to close ntdb after fork. */ + if (!check_lock_pid(ntdb, "ntdb_close", false)) + return; + + while (ntdb->file->allrecord_lock.count + && ntdb->file->allrecord_lock.owner == ntdb) { + ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype); + } + + for (i=0; ifile->num_lockrecs; i++) { + if (ntdb->file->lockrecs[i].owner == ntdb) { + ntdb_nest_unlock(ntdb, + ntdb->file->lockrecs[i].off, + ntdb->file->lockrecs[i].ltype); + i--; + } + } +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdb.3.xml b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdb.3.xml new file mode 100644 index 00000000..3bac9746 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdb.3.xml @@ -0,0 +1,132 @@ + + + + + ntdb + 3 + Samba + System Administration tools + 4.1 + + + ntdb +A not-so trivial keyword/data database system + + +#include <ntdb.h> + + DESCRIPTION + + If you have previously used the tdb library from Samba, much of + this will seem familiar, but there are some API changes which a + compiler will warn you about if you simply replace 'tdb' with + 'ntdb' in your code! The on-disk format for ntdb is + incompatible with tdb. + + + tdb's API was based on gdbm, and ntdb continues this tradition, + with enhancements. A differences guide is available in the text + file lib/ntdb/doc/TDB_porting.txt in the + SAMBA source tree. + + + NTDB API OVERVIEW + + The complete API is documented in the ntdb.h header, which is + kept up-to-date and recommended reading. + + + Normal usage is to call ntdb_open() to create or open an ntdb + file. ntdb_store() is used to add records, ntdb_fetch() is used + to fetch them. Traversals are supported via callback + (ntdb_traverse()) or iteration (ntdb_firstkey() and + ntdb_nextkey()). Transactions are supported for batching + updates or reads atomically, using ntdb_transaction_start() and + ntdb_transaction_commit(). + + Use With Talloc + + ntdb_open() takes an optional linked list of attributes: + in particular you can specify an alternate allocator (such as + talloc): + + +#include <talloc.h> +#include <ntdb.h> + +static void *my_alloc(const void *owner, size_t len, void *priv) +{ + return talloc_size(owner, len); +} + +static void *my_expand(void *old, size_t newlen, void *priv) +{ + return talloc_realloc_size(NULL, old, newlen); +} + +static void my_free(void *old, void *priv) +{ + talloc_free(old); +} + +/* This opens an ntdb file as a talloc object with given parent. */ +struct ntdb_context *ntdb_open_talloc(const void *parent, + const char *filename) +{ + struct ntdb_context *ntdb; + union ntdb_attribute alloc; + + alloc.base.attr = NTDB_ATTRIBUTE_ALLOCATOR; + alloc.base.next = NULL; + alloc.alloc.alloc = my_alloc; + alloc.alloc.expand = my_expand; + alloc.alloc.free = my_free; + + ntdb = ntdb_open(filename, NTDB_DEFAULT, O_RDWR|O_CREAT, 0600, + &alloc); + if (ntdb) { + talloc_steal(parent, ntdb); + talloc_set_name(ntdb, "%s", filename); + } + return ntdb; +} + + + + SEE ALSO + + + + + + AUTHOR + The original tdb software was created by Andrew Tridgell, and + is now developed by the + Samba Team as an Open Source project similar to the way the + Linux kernel is developed. ntdb was derived from tdb, but mostly + rewritten by Rusty Russell. + + + + COPYRIGHT/LICENSE + + Copyright (C) Rusty Russell 2013, IBM Corporation + + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 3 of the + License, or (at your option) any later version. + + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + + You should have received a copy of the GNU General Public License + along with this program; if not, see http://www.gnu.org/licenses/. + + + diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbbackup.8.xml b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbbackup.8.xml new file mode 100644 index 00000000..04ae85f9 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbbackup.8.xml @@ -0,0 +1,150 @@ + + + + + + ntdbbackup + 8 + Samba + System Administration tools + 4.1 + + + + + ntdbbackup + tool for backing up and for validating the integrity of samba .ntdb files + + + + + ntdbbackup + -s suffix + -v + -h + + + + + DESCRIPTION + + This tool is part of the samba + 1 suite. + + ntdbbackup is a tool that may be used to backup samba .ntdb + files. This tool may also be used to verify the integrity of the .ntdb files prior + to samba startup or during normal operation. If it finds file damage and it finds + a prior backup the backup file will be restored. + + + + + + OPTIONS + + + + + -h + + Get help information. + + + + + -s suffix + + The -s option allows the administrator to specify a file + backup extension. This way it is possible to keep a history of ntdb backup + files by using a new suffix for each backup. + + + + + -v + + The -v will check the database for damages (corrupt data) + which if detected causes the backup to be restored. + + + + + + + + + COMMANDS + + GENERAL INFORMATION + + + The ntdbbackup utility can safely be run at any time. It was designed so + that it can be used at any time to validate the integrity of ntdb files, even during Samba + operation. Typical usage for the command will be: + + + ntdbbackup [-s suffix] *.ntdb + + + Before restarting samba the following command may be run to validate .ntdb files: + + + ntdbbackup -v [-s suffix] *.ntdb + + + Note that Samba 4 can use .tdb files instead, so you should + use tdbbackup on those files. + + + + Samba .tdb and .ntdb files are stored in various locations, be sure to run backup all + .(n)tdb files on the system. Important files includes: + + + + + secrets.(n)tdb - usual location is in the /usr/local/samba/private + directory, or on some systems in /etc/samba. + + + + passdb.(n)tdb - usual location is in the /usr/local/samba/private + directory, or on some systems in /etc/samba. + + + + *.tdb and *.ntdb located in the /usr/local/samba/var directory or on some + systems in the /var/cache or /var/lib/samba directories. + + + + + + + VERSION + + This man page is correct for version 4 of the Samba suite. + + + + SEE ALSO + + + tdbbackup(8), ntdbrestore(8) + + + + + AUTHOR + + + The original Samba software and related utilities were created by Andrew Tridgell. + Samba is now developed by the Samba Team as an Open Source project similar to the way + the Linux kernel is developed. + + + The ntdbbackup man page was written by Rusty Russell, + based on the tdbbackup man page by John H Terpstra. + + + diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbdump.8.xml b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbdump.8.xml new file mode 100644 index 00000000..45d1be19 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbdump.8.xml @@ -0,0 +1,93 @@ + + + + + + ntdbdump + 8 + Samba + System Administration tools + 4.1 + + + + + ntdbdump + tool for printing the contents of an NTDB file + + + + + ntdbdump + -k keyname + -e + -h + filename + + + + + DESCRIPTION + + This tool is part of the samba + 1 suite. + + ntdbdump is a very simple utility that 'dumps' the + contents of a NTDB (New Trivial DataBase) file to standard output in a + human-readable format. + + + This tool can be used when debugging problems with NTDB files. It is + intended for those who are somewhat familiar with Samba internals. + + + + + OPTIONS + + + + + -h + + Get help information. + + + + + -k keyname + + The -k option restricts dumping to a single key, if found. + + + + + + + + SEE ALSO + + + tdbdump(8), ntdbtool(8) + + + + + VERSION + + This man page is correct for version 4 of the Samba suite. + + + + AUTHOR + + + The original Samba software and related utilities were created by Andrew Tridgell. + Samba is now developed by the Samba Team as an Open Source project similar to the way + the Linux kernel is developed. + + + The ntdbdump man page was written by Rusty Russell, base on the tdbdump man page by Jelmer Vernooij. + + + diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbrestore.8.xml b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbrestore.8.xml new file mode 100644 index 00000000..713aabaa --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbrestore.8.xml @@ -0,0 +1,74 @@ + + + + + + ntdbrestore + 8 + Samba + System Administration tools + 4.1 + + + + + ntdbrestore + tool for creating a NTDB file out of a ntdbdump output + + + + + ntdbrestore + ntdbfilename + + + + + DESCRIPTION + + This tool is part of the samba + 1 suite. + + ntdbrestore is a very simple utility that 'restores' the + contents of dump file into NTDB (New Trivial DataBase) file. The dump file is obtained from the ntdbdump or tdbdump + commands. + + + This tool wait on the standard input for the content of the dump and will write the ntdb in the ntdbfilename + parameter. + + This tool can be used to translate between ntdb and tdb files by dumping and restoring. + + + + + + VERSION + + This man page is correct for version 4 of the Samba suite. + + + + SEE ALSO + + + ntdbdump(8), tdbrestore(8) + + + + + AUTHOR + + + The original Samba software and related utilities were created by Andrew Tridgell. + Samba is now developed by the Samba Team as an Open Source project similar to the way + the Linux kernel is developed. + + ntdbrestore was written by Rusty Russell based on tdbrestore, which was initially written by Volker Lendecke based on an + idea by Simon McVittie. + + + The ntdbrestore man page was written by Rusty Russell, based on the tdbrestore man page by Matthieu Patou. + + + diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbtool.8.xml b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbtool.8.xml new file mode 100644 index 00000000..7e6530c4 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/man/ntdbtool.8.xml @@ -0,0 +1,247 @@ + + + + + + ntdbtool + 8 + Samba + System Administration tools + 4.1 + + + + + ntdbtool + manipulate the contents NTDB files + + + + + + ntdbtool + + NTDBFILE + + + COMMANDS + + + + + + + DESCRIPTION + + This tool is part of the + samba + 1 suite. + + ntdbtool a tool for displaying and + altering the contents of Samba NTDB (New Trivial DataBase) files. Each + of the commands listed below can be entered interactively or + provided on the command line. + + + + + + COMMANDS + + + + + + NTDBFILE + Create a new database named + NTDBFILE. + + + + + + NTDBFILE + Open an existing database named + NTDBFILE. + + + + + + Erase the current database. + + + + + + Dump the current database as strings. + + + + + + Dump the current database as connection records. + + + + + + Dump the current database keys as strings. + + + + + + Dump the current database keys as hex values. + + + + + + Print summary information about the + current database. + + + + + + KEY + DATA + + Insert a record into the + current database. + + + + + + KEY + NTDBFILE + + Move a record from the + current database into NTDBFILE. + + + + + + KEY + DATA + + Store (replace) a record in the + current database. + + + + + + KEY + + Show a record by key. + + + + + + KEY + + Delete a record by key. + + + + + + + Print the current database hash table and free list. + + + + + + + Print the current database and free list. + + + + + + COMMAND + + Execute the given system command. + + + + + + + + Print the first record in the current database. + + + + + + + + Print the next record in the current database. + + + + + + + + Check the integrity of the current database. + + + + + + + + Repack a database using a temporary file to remove fragmentation. + + + + + + + + Exit ntdbtool. + + + + + + + + SEE ALSO + + + tdbtool(8) + + + + + CAVEATS + The contents of the Samba NTDB files are private + to the implementation and should not be altered with + ntdbtool. + + + + + VERSION + This man page is correct for version 4.0 of the Samba suite. + + + + AUTHOR + + The original Samba software and related utilities were + created by Andrew Tridgell. Samba is now developed by the + Samba Team as an Open Source project similar to the way the + Linux kernel is developed. + + + diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.c b/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.c new file mode 100644 index 00000000..51fbbcae --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.c @@ -0,0 +1,601 @@ + /* + Trivial Database 2: fetch, store and misc routines. + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#ifndef HAVE_LIBREPLACE +#include +#endif + +static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb, + ntdb_off_t off, + ntdb_len_t keylen, + ntdb_len_t datalen, + struct ntdb_used_record *rec) +{ + uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); + enum NTDB_ERROR ecode; + + ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen, + keylen + dataroom); + if (ecode == NTDB_SUCCESS) { + ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec)); + } + return ecode; +} + +static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb, + struct hash_info *h, + NTDB_DATA key, NTDB_DATA dbuf, + ntdb_off_t old_off, ntdb_len_t old_room, + bool growing) +{ + ntdb_off_t new_off; + enum NTDB_ERROR ecode; + + /* Allocate a new record. */ + new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing); + if (NTDB_OFF_IS_ERR(new_off)) { + return NTDB_OFF_TO_ERR(new_off); + } + + /* We didn't like the existing one: remove it. */ + if (old_off) { + ntdb->stats.frees++; + ecode = add_free_record(ntdb, old_off, + sizeof(struct ntdb_used_record) + + key.dsize + old_room, + NTDB_LOCK_WAIT, true); + if (ecode == NTDB_SUCCESS) + ecode = replace_in_hash(ntdb, h, new_off); + } else { + ecode = add_to_hash(ntdb, h, new_off); + } + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + new_off += sizeof(struct ntdb_used_record); + ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + new_off += key.dsize; + ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (ntdb->flags & NTDB_SEQNUM) + ntdb_inc_seqnum(ntdb); + + return NTDB_SUCCESS; +} + +static enum NTDB_ERROR update_data(struct ntdb_context *ntdb, + ntdb_off_t off, + NTDB_DATA dbuf, + ntdb_len_t extra) +{ + enum NTDB_ERROR ecode; + + ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize); + if (ecode == NTDB_SUCCESS && extra) { + /* Put a zero in; future versions may append other data. */ + ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1); + } + if (ntdb->flags & NTDB_SEQNUM) + ntdb_inc_seqnum(ntdb); + + return ecode; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, + NTDB_DATA key, NTDB_DATA dbuf, int flag) +{ + struct hash_info h; + ntdb_off_t off; + ntdb_len_t old_room = 0; + struct ntdb_used_record rec; + enum NTDB_ERROR ecode; + + off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + + /* Now we have lock on this hash bucket. */ + if (flag == NTDB_INSERT) { + if (off) { + ecode = NTDB_ERR_EXISTS; + goto out; + } + } else { + if (off) { + old_room = rec_data_length(&rec) + + rec_extra_padding(&rec); + if (old_room >= dbuf.dsize) { + /* Can modify in-place. Easy! */ + ecode = update_rec_hdr(ntdb, off, + key.dsize, dbuf.dsize, + &rec); + if (ecode != NTDB_SUCCESS) { + goto out; + } + ecode = update_data(ntdb, + off + sizeof(rec) + + key.dsize, dbuf, + old_room - dbuf.dsize); + if (ecode != NTDB_SUCCESS) { + goto out; + } + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); + return NTDB_SUCCESS; + } + } else { + if (flag == NTDB_MODIFY) { + /* if the record doesn't exist and we + are in NTDB_MODIFY mode then we should fail + the store */ + ecode = NTDB_ERR_NOEXIST; + goto out; + } + } + } + + /* If we didn't use the old record, this implies we're growing. */ + ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off); +out: + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); + return ecode; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, + NTDB_DATA key, NTDB_DATA dbuf) +{ + struct hash_info h; + ntdb_off_t off; + struct ntdb_used_record rec; + ntdb_len_t old_room = 0, old_dlen; + unsigned char *newdata; + NTDB_DATA new_dbuf; + enum NTDB_ERROR ecode; + + off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + + if (off) { + old_dlen = rec_data_length(&rec); + old_room = old_dlen + rec_extra_padding(&rec); + + /* Fast path: can append in place. */ + if (rec_extra_padding(&rec) >= dbuf.dsize) { + ecode = update_rec_hdr(ntdb, off, key.dsize, + old_dlen + dbuf.dsize, &rec); + if (ecode != NTDB_SUCCESS) { + goto out; + } + + off += sizeof(rec) + key.dsize + old_dlen; + ecode = update_data(ntdb, off, dbuf, + rec_extra_padding(&rec)); + goto out; + } + + /* Slow path. */ + newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize, + ntdb->alloc_data); + if (!newdata) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_append:" + " failed to allocate %zu bytes", + (size_t)(key.dsize + old_dlen + + dbuf.dsize)); + goto out; + } + ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize, + newdata, old_dlen); + if (ecode != NTDB_SUCCESS) { + goto out_free_newdata; + } + memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize); + new_dbuf.dptr = newdata; + new_dbuf.dsize = old_dlen + dbuf.dsize; + } else { + newdata = NULL; + new_dbuf = dbuf; + } + + /* If they're using ntdb_append(), it implies they're growing record. */ + ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true); + +out_free_newdata: + ntdb->free_fn(newdata, ntdb->alloc_data); +out: + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); + return ecode; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key, + NTDB_DATA *data) +{ + ntdb_off_t off; + struct ntdb_used_record rec; + struct hash_info h; + enum NTDB_ERROR ecode; + const char *keyp; + + off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + + if (!off) { + ecode = NTDB_ERR_NOEXIST; + } else { + data->dsize = rec_data_length(&rec); + data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data); + if (unlikely(!data->dptr)) { + ecode = NTDB_ERR_OOM; + } else { + memcpy(data->dptr, keyp + key.dsize, data->dsize); + ecode = NTDB_SUCCESS; + } + ntdb_access_release(ntdb, keyp); + } + + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); + return ecode; +} + +_PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key) +{ + ntdb_off_t off; + struct ntdb_used_record rec; + struct hash_info h; + + off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL); + if (NTDB_OFF_IS_ERR(off)) { + return false; + } + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); + + return off ? true : false; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key) +{ + ntdb_off_t off; + struct ntdb_used_record rec; + struct hash_info h; + enum NTDB_ERROR ecode; + + off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + + if (!off) { + ecode = NTDB_ERR_NOEXIST; + goto unlock; + } + + ecode = delete_from_hash(ntdb, &h); + if (ecode != NTDB_SUCCESS) { + goto unlock; + } + + /* Free the deleted entry. */ + ntdb->stats.frees++; + ecode = add_free_record(ntdb, off, + sizeof(struct ntdb_used_record) + + rec_key_length(&rec) + + rec_data_length(&rec) + + rec_extra_padding(&rec), + NTDB_LOCK_WAIT, true); + + if (ntdb->flags & NTDB_SEQNUM) + ntdb_inc_seqnum(ntdb); + +unlock: + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); + return ecode; +} + +_PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb) +{ + return ntdb->flags; +} + +static bool inside_transaction(const struct ntdb_context *ntdb) +{ + return ntdb->transaction != NULL; +} + +static bool readonly_changable(struct ntdb_context *ntdb, const char *caller) +{ + if (inside_transaction(ntdb)) { + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "%s: can't change" + " NTDB_RDONLY inside transaction", + caller); + return false; + } + return true; +} + +_PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag) +{ + if (ntdb->flags & NTDB_INTERNAL) { + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_add_flag: internal db"); + return; + } + switch (flag) { + case NTDB_NOLOCK: + ntdb->flags |= NTDB_NOLOCK; + break; + case NTDB_NOMMAP: + if (ntdb->file->direct_count) { + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_add_flag: Can't get NTDB_NOMMAP from" + " ntdb_parse_record!"); + return; + } + ntdb->flags |= NTDB_NOMMAP; +#ifndef HAVE_INCOHERENT_MMAP + ntdb_munmap(ntdb); +#endif + break; + case NTDB_NOSYNC: + ntdb->flags |= NTDB_NOSYNC; + break; + case NTDB_SEQNUM: + ntdb->flags |= NTDB_SEQNUM; + break; + case NTDB_ALLOW_NESTING: + ntdb->flags |= NTDB_ALLOW_NESTING; + break; + case NTDB_RDONLY: + if (readonly_changable(ntdb, "ntdb_add_flag")) + ntdb->flags |= NTDB_RDONLY; + break; + default: + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_add_flag: Unknown flag %u", flag); + } +} + +_PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag) +{ + if (ntdb->flags & NTDB_INTERNAL) { + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_remove_flag: internal db"); + return; + } + switch (flag) { + case NTDB_NOLOCK: + ntdb->flags &= ~NTDB_NOLOCK; + break; + case NTDB_NOMMAP: + ntdb->flags &= ~NTDB_NOMMAP; +#ifndef HAVE_INCOHERENT_MMAP + /* If mmap incoherent, we were mmaping anyway. */ + ntdb_mmap(ntdb); +#endif + break; + case NTDB_NOSYNC: + ntdb->flags &= ~NTDB_NOSYNC; + break; + case NTDB_SEQNUM: + ntdb->flags &= ~NTDB_SEQNUM; + break; + case NTDB_ALLOW_NESTING: + ntdb->flags &= ~NTDB_ALLOW_NESTING; + break; + case NTDB_RDONLY: + if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) { + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_remove_flag: can't" + " remove NTDB_RDONLY on ntdb" + " opened with O_RDONLY"); + break; + } + if (readonly_changable(ntdb, "ntdb_remove_flag")) + ntdb->flags &= ~NTDB_RDONLY; + break; + default: + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_remove_flag: Unknown flag %u", + flag); + } +} + +_PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode) +{ + /* Gcc warns if you miss a case in the switch, so use that. */ + switch (NTDB_ERR_TO_OFF(ecode)) { + case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success"; + case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database"; + case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error"; + case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error"; + case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory"; + case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists"; + case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter"; + case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist"; + case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted"; + } + return "Invalid error code"; +} + +enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb, + enum NTDB_ERROR ecode, + enum ntdb_log_level level, + const char *fmt, ...) +{ + char *message; + va_list ap; + size_t len; + /* ntdb_open paths care about errno, so save it. */ + int saved_errno = errno; + + if (!ntdb->log_fn) + return ecode; + + va_start(ap, fmt); + len = vsnprintf(NULL, 0, fmt, ap); + va_end(ap); + + message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data); + if (!message) { + ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM, + "out of memory formatting message:", ntdb->log_data); + ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data); + } else { + va_start(ap, fmt); + vsnprintf(message, len+1, fmt, ap); + va_end(ap); + ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data); + ntdb->free_fn(message, ntdb->alloc_data); + } + errno = saved_errno; + return ecode; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb, + NTDB_DATA key, + enum NTDB_ERROR (*parse)(NTDB_DATA k, + NTDB_DATA d, + void *data), + void *data) +{ + ntdb_off_t off; + struct ntdb_used_record rec; + struct hash_info h; + enum NTDB_ERROR ecode; + const char *keyp; + + off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + + if (!off) { + ecode = NTDB_ERR_NOEXIST; + } else { + unsigned int old_flags; + NTDB_DATA d = ntdb_mkdata(keyp + key.dsize, + rec_data_length(&rec)); + + /* + * Make sure they don't try to write db, since they + * have read lock! They can if they've done + * ntdb_lockall(): if it was ntdb_lockall_read, that'll + * stop them doing a write operation anyway. + */ + old_flags = ntdb->flags; + if (!ntdb->file->allrecord_lock.count && + !(ntdb->flags & NTDB_NOLOCK)) { + ntdb->flags |= NTDB_RDONLY; + } + ecode = parse(key, d, data); + ntdb->flags = old_flags; + ntdb_access_release(ntdb, keyp); + } + + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); + return ecode; +} + +_PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb) +{ + return ntdb->name; +} + +_PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb) +{ + return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum)); +} + + +_PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb) +{ + return ntdb->file->fd; +} + +struct traverse_state { + enum NTDB_ERROR error; + struct ntdb_context *dest_db; +}; + +/* + traverse function for repacking + */ +static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data, + struct traverse_state *state) +{ + state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT); + if (state->error != NTDB_SUCCESS) { + return -1; + } + return 0; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb) +{ + struct ntdb_context *tmp_db; + struct traverse_state state; + + state.error = ntdb_transaction_start(ntdb); + if (state.error != NTDB_SUCCESS) { + return state.error; + } + + tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL); + if (tmp_db == NULL) { + state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + __location__ + " Failed to create tmp_db"); + ntdb_transaction_cancel(ntdb); + return state.error; + } + + state.dest_db = tmp_db; + if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) { + goto fail; + } + + state.error = ntdb_wipe_all(ntdb); + if (state.error != NTDB_SUCCESS) { + goto fail; + } + + state.dest_db = ntdb; + if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) { + goto fail; + } + + ntdb_close(tmp_db); + return ntdb_transaction_commit(ntdb); + +fail: + ntdb_transaction_cancel(ntdb); + ntdb_close(tmp_db); + return state.error; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.h b/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.h new file mode 100644 index 00000000..a0c229c1 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.h @@ -0,0 +1,933 @@ +#ifndef CCAN_NTDB_H +#define CCAN_NTDB_H + +/* + NTDB: trivial database library version 2 + + Copyright (C) Andrew Tridgell 1999-2004 + Copyright (C) Rusty Russell 2010-2012 + + ** NOTE! The following LGPL license applies to the ntdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_LIBREPLACE +#include +#include +#else +#include "config.h" +#if HAVE_FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif + +#ifndef _PUBLIC_ +#ifdef HAVE_VISIBILITY_ATTR +#define _PUBLIC_ __attribute__((visibility("default"))) +#else +#define _PUBLIC_ +#endif +#endif + +/* For mode_t */ +#include +/* For O_* flags. */ +#include +/* For sig_atomic_t. */ +#include +/* For uint64_t */ +#include +/* For bool */ +#include +/* For memcmp */ +#include +#endif + +#include +#include +#include + +union ntdb_attribute; +struct ntdb_context; + +/** + * struct TDB_DATA - (n)tdb data blob + * + * To ease compatibility, we use 'struct TDB_DATA' from tdb.h, so if + * you want to include both tdb.h and ntdb.h, you need to #include + * tdb.h first. + */ +#ifndef __TDB_H__ +struct TDB_DATA { + unsigned char *dptr; + size_t dsize; +}; +#endif + +typedef struct TDB_DATA NTDB_DATA; + +/** + * ntdb_open - open a database file + * @name: the file name (or database name if flags contains NTDB_INTERNAL) + * @ntdb_flags: options for this database + * @open_flags: flags argument for ntdb's open() call. + * @mode: mode argument for ntdb's open() call. + * @attributes: linked list of extra attributes for this ntdb. + * + * This call opens (and potentially creates) a database file. + * Multiple processes can have the NTDB file open at once. + * + * On failure it will return NULL, and set errno: it may also call + * any log attribute found in @attributes. + * + * See also: + * union ntdb_attribute + */ +struct ntdb_context *ntdb_open(const char *name, int ntdb_flags, + int open_flags, mode_t mode, + union ntdb_attribute *attributes); + + +/* flags for ntdb_open() */ +#define NTDB_DEFAULT 0 /* just a readability place holder */ +#define NTDB_INTERNAL 2 /* don't store on disk */ +#define NTDB_NOLOCK 4 /* don't do any locking */ +#define NTDB_NOMMAP 8 /* don't use mmap */ +#define NTDB_CONVERT 16 /* convert endian */ +#define NTDB_NOSYNC 64 /* don't use synchronous transactions */ +#define NTDB_SEQNUM 128 /* maintain a sequence number */ +#define NTDB_ALLOW_NESTING 256 /* fake nested transactions */ +#define NTDB_RDONLY 512 /* implied by O_RDONLY */ +#define NTDB_CANT_CHECK 2048 /* has a feature which we don't understand */ + +/** + * ntdb_close - close and free a ntdb. + * @ntdb: the ntdb context returned from ntdb_open() + * + * This always succeeds, in that @ntdb is unusable after this call. But if + * some unexpected error occurred while closing, it will return non-zero + * (the only clue as to cause will be via the log attribute). + */ +int ntdb_close(struct ntdb_context *ntdb); + +/** + * enum NTDB_ERROR - error returns for NTDB + * + * See Also: + * ntdb_errorstr() + */ +enum NTDB_ERROR { + NTDB_SUCCESS = 0, /* No error. */ + NTDB_ERR_CORRUPT = -1, /* We read the db, and it was bogus. */ + NTDB_ERR_IO = -2, /* We couldn't read/write the db. */ + NTDB_ERR_LOCK = -3, /* Locking failed. */ + NTDB_ERR_OOM = -4, /* Out of Memory. */ + NTDB_ERR_EXISTS = -5, /* The key already exists. */ + NTDB_ERR_NOEXIST = -6, /* The key does not exist. */ + NTDB_ERR_EINVAL = -7, /* You're using it wrong. */ + NTDB_ERR_RDONLY = -8, /* The database is read-only. */ + NTDB_ERR_LAST = NTDB_ERR_RDONLY +}; + +/** + * ntdb_store - store a key/value pair in a ntdb. + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key + * @dbuf: the data to associate with the key. + * @flag: NTDB_REPLACE, NTDB_INSERT or NTDB_MODIFY. + * + * This inserts (or overwrites) a key/value pair in the NTDB. If flag + * is NTDB_REPLACE, it doesn't matter whether the key exists or not; + * NTDB_INSERT means it must not exist (returns NTDB_ERR_EXISTS otherwise), + * and NTDB_MODIFY means it must exist (returns NTDB_ERR_NOEXIST otherwise). + * + * On success, this returns NTDB_SUCCESS. + * + * See also: + * ntdb_fetch, ntdb_transaction_start, ntdb_append, ntdb_delete. + */ +enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, + NTDB_DATA key, + NTDB_DATA dbuf, + int flag); + +/* flags to ntdb_store() */ +#define NTDB_REPLACE 1 /* A readability place holder */ +#define NTDB_INSERT 2 /* Don't overwrite an existing entry */ +#define NTDB_MODIFY 3 /* Don't create an existing entry */ + +/** + * ntdb_fetch - fetch a value from a ntdb. + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key + * @data: pointer to data. + * + * This looks up a key in the database and sets it in @data. + * + * If it returns NTDB_SUCCESS, the key was found: it is your + * responsibility to call free() on @data->dptr. + * + * Otherwise, it returns an error (usually, NTDB_ERR_NOEXIST) and @data is + * undefined. + */ +enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key, + NTDB_DATA *data); + +/** + * ntdb_errorstr - map the ntdb error onto a constant readable string + * @ecode: the enum NTDB_ERROR to map. + * + * This is useful for displaying errors to users. + */ +const char *ntdb_errorstr(enum NTDB_ERROR ecode); + +/** + * ntdb_append - append a value to a key/value pair in a ntdb. + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key + * @dbuf: the data to append. + * + * This is equivalent to fetching a record, reallocating .dptr to add the + * data, and writing it back, only it's much more efficient. If the key + * doesn't exist, it's equivalent to ntdb_store (with an additional hint that + * you expect to expand the record in future). + * + * See Also: + * ntdb_fetch(), ntdb_store() + */ +enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, + NTDB_DATA key, NTDB_DATA dbuf); + +/** + * ntdb_delete - delete a key from a ntdb. + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key to delete. + * + * Returns NTDB_SUCCESS on success, or an error (usually NTDB_ERR_NOEXIST). + * + * See Also: + * ntdb_fetch(), ntdb_store() + */ +enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key); + +/** + * ntdb_exists - does a key exist in the database? + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key to search for. + * + * Returns true if it exists, or false if it doesn't or any other error. + */ +bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key); + +/** + * ntdb_deq - are NTDB_DATA equal? + * @a: one NTDB_DATA + * @b: another NTDB_DATA + */ +static inline bool ntdb_deq(NTDB_DATA a, NTDB_DATA b) +{ + return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0; +} + +/** + * ntdb_mkdata - make a NTDB_DATA from const data + * @p: the constant pointer + * @len: the length + * + * As the dptr member of NTDB_DATA is not constant, you need to + * cast it. This function keeps thost casts in one place, as well as + * suppressing the warning some compilers give when casting away a + * qualifier (eg. gcc with -Wcast-qual) + */ +static inline NTDB_DATA ntdb_mkdata(const void *p, size_t len) +{ + NTDB_DATA d; + d.dptr = cast_const(void *, p); + d.dsize = len; + return d; +} + +/** + * ntdb_transaction_start - start a transaction + * @ntdb: the ntdb context returned from ntdb_open() + * + * This begins a series of atomic operations. Other processes will be able + * to read the ntdb, but not alter it (they will block), nor will they see + * any changes until ntdb_transaction_commit() is called. + * + * Note that if the NTDB_ALLOW_NESTING flag is set, a ntdb_transaction_start() + * within a transaction will succeed, but it's not a real transaction: + * (1) An inner transaction which is committed is not actually committed until + * the outer transaction is; if the outer transaction is cancelled, the + * inner ones are discarded. + * (2) ntdb_transaction_cancel() marks the outer transaction as having an error, + * so the final ntdb_transaction_commit() will fail. + * (3) the outer transaction will see the results of the inner transaction. + * + * See Also: + * ntdb_transaction_cancel, ntdb_transaction_commit. + */ +enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb); + +/** + * ntdb_transaction_cancel - abandon a transaction + * @ntdb: the ntdb context returned from ntdb_open() + * + * This aborts a transaction, discarding any changes which were made. + * ntdb_close() does this implicitly. + */ +void ntdb_transaction_cancel(struct ntdb_context *ntdb); + +/** + * ntdb_transaction_commit - commit a transaction + * @ntdb: the ntdb context returned from ntdb_open() + * + * This completes a transaction, writing any changes which were made. + * + * fsync() is used to commit the transaction (unless NTDB_NOSYNC is set), + * making it robust against machine crashes, but very slow compared to + * other NTDB operations. + * + * A failure can only be caused by unexpected errors (eg. I/O or + * memory); this is no point looping on transaction failure. + * + * See Also: + * ntdb_transaction_prepare_commit() + */ +enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb); + +/** + * ntdb_transaction_prepare_commit - prepare to commit a transaction + * @ntdb: the ntdb context returned from ntdb_open() + * + * This ensures we have the resources to commit a transaction (using + * ntdb_transaction_commit): if this succeeds then a transaction will only + * fail if the write() or fsync() calls fail. + * + * If this fails you must still call ntdb_transaction_cancel() to cancel + * the transaction. + * + * See Also: + * ntdb_transaction_commit() + */ +enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb); + +/** + * ntdb_traverse - traverse a NTDB + * @ntdb: the ntdb context returned from ntdb_open() + * @fn: the function to call for every key/value pair (or NULL) + * @p: the pointer to hand to @f + * + * This walks the NTDB until all they keys have been traversed, or @fn + * returns non-zero. If the traverse function or other processes are + * changing data or adding or deleting keys, the traverse may be + * unreliable: keys may be skipped or (rarely) visited twice. + * + * There is one specific exception: the special case of deleting the + * current key does not undermine the reliability of the traversal. + * + * On success, returns the number of keys iterated. On error returns + * a negative enum NTDB_ERROR value. + */ +#define ntdb_traverse(ntdb, fn, p) \ + ntdb_traverse_(ntdb, typesafe_cb_preargs(int, void *, (fn), (p), \ + struct ntdb_context *, \ + NTDB_DATA, NTDB_DATA), (p)) + +int64_t ntdb_traverse_(struct ntdb_context *ntdb, + int (*fn)(struct ntdb_context *, + NTDB_DATA, NTDB_DATA, void *), void *p); + +/** + * ntdb_parse_record - operate directly on data in the database. + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key whose record we should hand to @parse + * @parse: the function to call for the data + * @data: the private pointer to hand to @parse (types must match). + * + * This avoids a copy for many cases, by handing you a pointer into + * the memory-mapped database. It also locks the record to prevent + * other accesses at the same time, so it won't change. + * + * Within the @parse callback you can perform read operations on the + * database, but no write operations: no ntdb_store() or + * ntdb_delete(), for example. The exception is if you call + * ntdb_lockall() before ntdb_parse_record(). + * + * Never alter the data handed to parse()! + */ +#define ntdb_parse_record(ntdb, key, parse, data) \ + ntdb_parse_record_((ntdb), (key), \ + typesafe_cb_preargs(enum NTDB_ERROR, void *, \ + (parse), (data), \ + NTDB_DATA, NTDB_DATA), (data)) + +enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb, + NTDB_DATA key, + enum NTDB_ERROR (*parse)(NTDB_DATA k, + NTDB_DATA d, + void *data), + void *data); + +/** + * ntdb_get_seqnum - get a database sequence number + * @ntdb: the ntdb context returned from ntdb_open() + * + * This returns a sequence number: any change to the database from a + * ntdb context opened with the NTDB_SEQNUM flag will cause that number + * to increment. Note that the incrementing is unreliable (it is done + * without locking), so this is only useful as an optimization. + * + * For example, you may have a regular database backup routine which + * does not operate if the sequence number is unchanged. In the + * unlikely event of a failed increment, it will be backed up next + * time any way. + * + * Returns an enum NTDB_ERROR (ie. negative) on error. + */ +int64_t ntdb_get_seqnum(struct ntdb_context *ntdb); + +/** + * ntdb_firstkey - get the "first" key in a NTDB + * @ntdb: the ntdb context returned from ntdb_open() + * @key: pointer to key. + * + * This returns an arbitrary key in the database; with ntdb_nextkey() it allows + * open-coded traversal of the database, though it is slightly less efficient + * than ntdb_traverse. + * + * It is your responsibility to free @key->dptr on success. + * + * Returns NTDB_ERR_NOEXIST if the database is empty. + */ +enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key); + +/** + * ntdb_nextkey - get the "next" key in a NTDB + * @ntdb: the ntdb context returned from ntdb_open() + * @key: a key returned by ntdb_firstkey() or ntdb_nextkey(). + * + * This returns another key in the database; it will free @key.dptr for + * your convenience. + * + * Returns NTDB_ERR_NOEXIST if there are no more keys. + */ +enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key); + +/** + * ntdb_chainlock - lock a record in the NTDB + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key to lock. + * + * This prevents any access occurring to a group of keys including @key, + * even if @key does not exist. This allows primitive atomic updates of + * records without using transactions. + * + * You cannot begin a transaction while holding a ntdb_chainlock(), nor can + * you do any operations on any other keys in the database. This also means + * that you cannot hold more than one ntdb_chainlock() at a time. + * + * See Also: + * ntdb_chainunlock() + */ +enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key); + +/** + * ntdb_chainunlock - unlock a record in the NTDB + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key to unlock. + * + * The key must have previously been locked by ntdb_chainlock(). + */ +void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key); + +/** + * ntdb_chainlock_read - lock a record in the NTDB, for reading + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key to lock. + * + * This prevents any changes from occurring to a group of keys including @key, + * even if @key does not exist. This allows primitive atomic updates of + * records without using transactions. + * + * You cannot begin a transaction while holding a ntdb_chainlock_read(), nor can + * you do any operations on any other keys in the database. This also means + * that you cannot hold more than one ntdb_chainlock()/read() at a time. + * + * See Also: + * ntdb_chainlock() + */ +enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key); + +/** + * ntdb_chainunlock_read - unlock a record in the NTDB for reading + * @ntdb: the ntdb context returned from ntdb_open() + * @key: the key to unlock. + * + * The key must have previously been locked by ntdb_chainlock_read(). + */ +void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key); + +/** + * ntdb_lockall - lock the entire NTDB + * @ntdb: the ntdb context returned from ntdb_open() + * + * You cannot hold a ntdb_chainlock while calling this. It nests, so you + * must call ntdb_unlockall as many times as you call ntdb_lockall. + */ +enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb); + +/** + * ntdb_unlockall - unlock the entire NTDB + * @ntdb: the ntdb context returned from ntdb_open() + */ +void ntdb_unlockall(struct ntdb_context *ntdb); + +/** + * ntdb_lockall_read - lock the entire NTDB for reading + * @ntdb: the ntdb context returned from ntdb_open() + * + * This prevents others writing to the database, eg. ntdb_delete, ntdb_store, + * ntdb_append, but not ntdb_fetch. + * + * You cannot hold a ntdb_chainlock while calling this. It nests, so you + * must call ntdb_unlockall_read as many times as you call ntdb_lockall_read. + */ +enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb); + +/** + * ntdb_unlockall_read - unlock the entire NTDB for reading + * @ntdb: the ntdb context returned from ntdb_open() + */ +void ntdb_unlockall_read(struct ntdb_context *ntdb); + +/** + * ntdb_wipe_all - wipe the database clean + * @ntdb: the ntdb context returned from ntdb_open() + * + * Completely erase the database. This is faster than iterating through + * each key and doing ntdb_delete. + */ +enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb); + +/** + * ntdb_repack - repack the database + * @ntdb: the ntdb context returned from ntdb_open() + * + * This repacks the database; if it is suffering from a great deal of + * fragmentation this might help. However, it can take twice the + * memory of the existing NTDB. + */ +enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb); + +/** + * ntdb_check - check a NTDB for consistency + * @ntdb: the ntdb context returned from ntdb_open() + * @check: function to check each key/data pair (or NULL) + * @data: argument for @check, must match type. + * + * This performs a consistency check of the open database, optionally calling + * a check() function on each record so you can do your own data consistency + * checks as well. If check() returns an error, that is returned from + * ntdb_check(). + * + * Note that the NTDB uses a feature which we don't understand which + * indicates we can't run ntdb_check(), this will log a warning to that + * effect and return NTDB_SUCCESS. You can detect this condition by + * looking for NTDB_CANT_CHECK in ntdb_get_flags(). + * + * Returns NTDB_SUCCESS or an error. + */ +#define ntdb_check(ntdb, check, data) \ + ntdb_check_((ntdb), typesafe_cb_preargs(enum NTDB_ERROR, void *, \ + (check), (data), \ + NTDB_DATA, \ + NTDB_DATA), \ + (data)) + +enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb, + enum NTDB_ERROR (*check)(NTDB_DATA k, + NTDB_DATA d, + void *data), + void *data); + +/** + * enum ntdb_summary_flags - flags for ntdb_summary. + */ +enum ntdb_summary_flags { + NTDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */ +}; + +/** + * ntdb_summary - return a string describing the NTDB state + * @ntdb: the ntdb context returned from ntdb_open() + * @flags: flags to control the summary output. + * @summary: pointer to string to allocate. + * + * This returns a developer-readable string describing the overall + * state of the ntdb, such as the percentage used and sizes of records. + * It is designed to provide information about the ntdb at a glance + * without displaying any keys or data in the database. + * + * On success, sets @summary to point to a malloc()'ed nul-terminated + * multi-line string. It is your responsibility to free() it. + */ +enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, + enum ntdb_summary_flags flags, + char **summary); + + +/** + * ntdb_get_flags - return the flags for a ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * + * This returns the flags on the current ntdb. Some of these are caused by + * the flags argument to ntdb_open(), others (such as NTDB_CONVERT) are + * intuited. + */ +unsigned int ntdb_get_flags(struct ntdb_context *ntdb); + +/** + * ntdb_add_flag - set a flag for a ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING. + * + * You can use this to set a flag on the NTDB. You cannot set these flags + * on a NTDB_INTERNAL ntdb. + */ +void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag); + +/** + * ntdb_remove_flag - unset a flag for a ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING. + * + * You can use this to clear a flag on the NTDB. You cannot clear flags + * on a NTDB_INTERNAL ntdb. + */ +void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag); + +/** + * enum ntdb_attribute_type - descriminator for union ntdb_attribute. + */ +enum ntdb_attribute_type { + NTDB_ATTRIBUTE_LOG = 0, + NTDB_ATTRIBUTE_HASH = 1, + NTDB_ATTRIBUTE_SEED = 2, + NTDB_ATTRIBUTE_STATS = 3, + NTDB_ATTRIBUTE_OPENHOOK = 4, + NTDB_ATTRIBUTE_FLOCK = 5, + NTDB_ATTRIBUTE_ALLOCATOR = 6, + NTDB_ATTRIBUTE_HASHSIZE = 7 +}; + +/** + * ntdb_get_attribute - get an attribute for an existing ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * @attr: the union ntdb_attribute to set. + * + * This gets an attribute from a NTDB which has previously been set (or + * may return the default values). Set @attr.base.attr to the + * attribute type you want get. + */ +enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb, + union ntdb_attribute *attr); + +/** + * ntdb_set_attribute - set an attribute for an existing ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * @attr: the union ntdb_attribute to set. + * + * This sets an attribute on a NTDB, overriding any previous attribute + * of the same type. It returns NTDB_ERR_EINVAL if the attribute is + * unknown or invalid. + * + * Note that NTDB_ATTRIBUTE_HASH, NTDB_ATTRIBUTE_SEED, and + * NTDB_ATTRIBUTE_OPENHOOK cannot currently be set after ntdb_open. + */ +enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb, + const union ntdb_attribute *attr); + +/** + * ntdb_unset_attribute - reset an attribute for an existing ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * @type: the attribute type to unset. + * + * This unsets an attribute on a NTDB, returning it to the defaults + * (where applicable). + * + * Note that it only makes sense for NTDB_ATTRIBUTE_LOG and NTDB_ATTRIBUTE_FLOCK + * to be unset. + */ +void ntdb_unset_attribute(struct ntdb_context *ntdb, + enum ntdb_attribute_type type); + +/** + * ntdb_name - get the name of a ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * + * This returns a copy of the name string, made at ntdb_open() time. + * + * This is mostly useful for logging. + */ +const char *ntdb_name(const struct ntdb_context *ntdb); + +/** + * ntdb_fd - get the file descriptor of a ntdb + * @ntdb: the ntdb context returned from ntdb_open() + * + * This returns the file descriptor for the underlying database file, or -1 + * for NTDB_INTERNAL. + */ +int ntdb_fd(const struct ntdb_context *ntdb); + +/** + * ntdb_foreach - iterate through every open NTDB. + * @fn: the function to call for every NTDB + * @p: the pointer to hand to @fn + * + * NTDB internally keeps track of all open TDBs; this function allows you to + * iterate through them. If @fn returns non-zero, traversal stops. + */ +#define ntdb_foreach(fn, p) \ + ntdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p), \ + struct ntdb_context *), (p)) + +void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p); + +/** + * struct ntdb_attribute_base - common fields for all ntdb attributes. + */ +struct ntdb_attribute_base { + enum ntdb_attribute_type attr; + union ntdb_attribute *next; +}; + +/** + * enum ntdb_log_level - log levels for ntdb_attribute_log + * @NTDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors + * or internal consistency failures. + * @NTDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters + * or writing to a read-only database. + * @NTDB_LOG_WARNING: used for informational messages on issues which + * are unusual but handled by NTDB internally, such + * as a failure to mmap or failure to open /dev/urandom. + * It's also used when ntdb_open() fails without O_CREAT + * because a file does not exist. + */ +enum ntdb_log_level { + NTDB_LOG_ERROR, + NTDB_LOG_USE_ERROR, + NTDB_LOG_WARNING +}; + +/** + * struct ntdb_attribute_log - log function attribute + * + * This attribute provides a hook for you to log errors. + */ +struct ntdb_attribute_log { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_LOG */ + void (*fn)(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data); + void *data; +}; + +/** + * struct ntdb_attribute_hash - hash function attribute + * + * This attribute allows you to provide an alternative hash function. + * This hash function will be handed keys from the database; it will also + * be handed the 8-byte NTDB_HASH_MAGIC value for checking the header (the + * ntdb_open() will fail if the hash value doesn't match the header). + * + * Note that if your hash function gives different results on + * different machine endians, your ntdb will no longer work across + * different architectures! + */ +struct ntdb_attribute_hash { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */ + uint32_t (*fn)(const void *key, size_t len, uint32_t seed, + void *data); + void *data; +}; + +/** + * struct ntdb_attribute_seed - hash function seed attribute + * + * The hash function seed is normally taken from /dev/urandom (or equivalent) + * but can be set manually here. This is mainly for testing purposes. + */ +struct ntdb_attribute_seed { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_SEED */ + uint64_t seed; +}; + +/** + * struct ntdb_attribute_stats - ntdb operational statistics + * + * This attribute records statistics of various low-level NTDB operations. + * This can be used to assist performance evaluation. This is only + * useful for ntdb_get_attribute(). + * + * New fields will be added at the end, hence the "size" argument which + * indicates how large your structure is: it must be filled in before + * calling ntdb_get_attribute(), which will overwrite it with the size + * ntdb knows about. + */ +struct ntdb_attribute_stats { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_STATS */ + size_t size; /* = sizeof(struct ntdb_attribute_stats) */ + uint64_t allocs; + uint64_t alloc_subhash; + uint64_t alloc_chain; + uint64_t alloc_bucket_exact; + uint64_t alloc_bucket_max; + uint64_t alloc_leftover; + uint64_t alloc_coalesce_tried; + uint64_t alloc_coalesce_iterate_clash; + uint64_t alloc_coalesce_lockfail; + uint64_t alloc_coalesce_race; + uint64_t alloc_coalesce_succeeded; + uint64_t alloc_coalesce_num_merged; + uint64_t compares; + uint64_t compare_wrong_offsetbits; + uint64_t compare_wrong_keylen; + uint64_t compare_wrong_rechash; + uint64_t compare_wrong_keycmp; + uint64_t transactions; + uint64_t transaction_cancel; + uint64_t transaction_nest; + uint64_t transaction_expand_file; + uint64_t transaction_read_direct; + uint64_t transaction_read_direct_fail; + uint64_t transaction_write_direct; + uint64_t transaction_write_direct_fail; + uint64_t traverses; + uint64_t traverse_val_vanished; + uint64_t expands; + uint64_t frees; + uint64_t locks; + uint64_t lock_lowlevel; + uint64_t lock_nonblock; + uint64_t lock_nonblock_fail; +}; + +/** + * struct ntdb_attribute_openhook - ntdb special effects hook for open + * + * This attribute contains a function to call once we have the OPEN_LOCK + * for the ntdb, but before we've examined its contents. If this succeeds, + * the ntdb will be populated if it's then zero-length. + * + * This is a hack to allow support for TDB-style TDB_CLEAR_IF_FIRST + * behaviour. + */ +struct ntdb_attribute_openhook { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_OPENHOOK */ + enum NTDB_ERROR (*fn)(int fd, void *data); + void *data; +}; + +/** + * struct ntdb_attribute_flock - ntdb special effects hook for file locking + * + * This attribute contains function to call to place locks on a file; it can + * be used to support non-blocking operations or lock proxying. + * + * They should return 0 on success, -1 on failure and set errno. + * + * An error will be logged on error if errno is neither EAGAIN nor EINTR + * (normally it would only return EAGAIN if waitflag is false, and + * loop internally on EINTR). + */ +struct ntdb_attribute_flock { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_FLOCK */ + int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *); + int (*unlock)(int fd, int rw, off_t off, off_t len, void *); + void *data; +}; + +/** + * struct ntdb_attribute_hashsize - ntdb hashsize setting. + * + * This attribute is only settable on ntdb_open; it indicates that we create + * a hashtable of the given size, rather than the default. + */ +struct ntdb_attribute_hashsize { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASHSIZE */ + uint32_t size; +}; + +/** + * struct ntdb_attribute_allocator - allocator for ntdb to use. + * + * You can replace malloc/free with your own allocation functions. + * The allocator takes an "owner" pointer, which is either NULL (for + * the initial struct ntdb_context and struct ntdb_file), or a + * previously allocated pointer. This is useful for relationship + * tracking, such as the talloc library. + * + * The expand function is realloc, but only ever used to expand an + * existing allocation. + * + * Be careful mixing allocators: two ntdb_contexts which have the same file + * open will share the same struct ntdb_file. This may be allocated by one + * ntdb's allocator, and freed by the other. + */ +struct ntdb_attribute_allocator { + struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_ALLOCATOR */ + void *(*alloc)(const void *owner, size_t len, void *priv_data); + void *(*expand)(void *old, size_t newlen, void *priv_data); + void (*free)(void *old, void *priv_data); + void *priv_data; +}; + +/** + * union ntdb_attribute - ntdb attributes. + * + * This represents all the known attributes. + * + * See also: + * struct ntdb_attribute_log, struct ntdb_attribute_hash, + * struct ntdb_attribute_seed, struct ntdb_attribute_stats, + * struct ntdb_attribute_openhook, struct ntdb_attribute_flock, + * struct ntdb_attribute_allocator alloc. + */ +union ntdb_attribute { + struct ntdb_attribute_base base; + struct ntdb_attribute_log log; + struct ntdb_attribute_hash hash; + struct ntdb_attribute_seed seed; + struct ntdb_attribute_stats stats; + struct ntdb_attribute_openhook openhook; + struct ntdb_attribute_flock flock; + struct ntdb_attribute_allocator alloc; + struct ntdb_attribute_hashsize hashsize; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* ntdb.h */ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.pc.in b/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.pc.in new file mode 100644 index 00000000..36a7d513 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/ntdb.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: ntdb +Description: A (not-so) trivial database +Version: @PACKAGE_VERSION@ +Libs: @LIB_RPATH@ -L${libdir} -lntdb +Cflags: -I${includedir} +URL: http://tdb.samba.org/ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/open.c b/junkcode/rusty@rustcorp.com.au-ntdb/open.c new file mode 100644 index 00000000..2a265afe --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/open.c @@ -0,0 +1,911 @@ + /* + Trivial Database 2: opening and closing TDBs + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include + +/* all tdbs, to detect double-opens (fcntl file don't nest!) */ +static struct ntdb_context *tdbs = NULL; + +static struct ntdb_file *find_file(dev_t device, ino_t ino) +{ + struct ntdb_context *i; + + for (i = tdbs; i; i = i->next) { + if (i->file->device == device && i->file->inode == ino) { + i->file->refcnt++; + return i->file; + } + } + return NULL; +} + +static bool read_all(int fd, void *buf, size_t len) +{ + while (len) { + ssize_t ret; + ret = read(fd, buf, len); + if (ret < 0) + return false; + if (ret == 0) { + /* ETOOSHORT? */ + errno = EWOULDBLOCK; + return false; + } + buf = (char *)buf + ret; + len -= ret; + } + return true; +} + +static uint32_t random_number(struct ntdb_context *ntdb) +{ + int fd; + uint32_t ret = 0; + struct timeval now; + + fd = open("/dev/urandom", O_RDONLY); + if (fd >= 0) { + if (read_all(fd, &ret, sizeof(ret))) { + close(fd); + return ret; + } + close(fd); + } + /* FIXME: Untested! Based on Wikipedia protocol description! */ + fd = open("/dev/egd-pool", O_RDWR); + if (fd >= 0) { + /* Command is 1, next byte is size we want to read. */ + char cmd[2] = { 1, sizeof(uint32_t) }; + if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) { + char reply[1 + sizeof(uint32_t)]; + int r = read(fd, reply, sizeof(reply)); + if (r > 1) { + /* Copy at least some bytes. */ + memcpy(&ret, reply+1, r - 1); + if (reply[0] == sizeof(uint32_t) + && r == sizeof(reply)) { + close(fd); + return ret; + } + } + } + close(fd); + } + + /* Fallback: pid and time. */ + gettimeofday(&now, NULL); + ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec; + ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, + "ntdb_open: random from getpid and time"); + return ret; +} + +static void ntdb_context_init(struct ntdb_context *ntdb) +{ + /* Initialize the NTDB fields here */ + ntdb_io_init(ntdb); + ntdb->transaction = NULL; + ntdb->access = NULL; +} + +/* initialise a new database: + * + * struct ntdb_header; + * struct { + * struct ntdb_used_record hash_header; + * ntdb_off_t hash_buckets[1 << ntdb->hash_bits]; + * } hash; + * struct ntdb_freetable ftable; + * struct { + * struct ntdb_free_record free_header; + * char forty_three[...]; + * } remainder; + */ +#define NEW_DATABASE_HDR_SIZE(hbits) \ + (sizeof(struct ntdb_header) \ + + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \ + + sizeof(struct ntdb_freetable) \ + + sizeof(struct ntdb_free_record)) + +static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb, + struct ntdb_attribute_seed *seed, + struct ntdb_header *rhdr) +{ + /* We make it up in memory, then write it out if not internal */ + struct ntdb_freetable *ftable; + struct ntdb_used_record *htable; + struct ntdb_header *hdr; + struct ntdb_free_record *remainder; + char *mem; + unsigned int magic_len; + ssize_t rlen; + size_t dbsize, hashsize, hdrsize, remaindersize; + enum NTDB_ERROR ecode; + + hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits; + + /* Always make db a multiple of NTDB_PGSIZE */ + hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits); + dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1); + + mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data); + if (!mem) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_new_database: failed to allocate"); + } + + hdr = (void *)mem; + htable = (void *)(mem + sizeof(*hdr)); + ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize); + remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize + + sizeof(*ftable)); + + /* Fill in the header */ + hdr->version = NTDB_VERSION; + if (seed) + hdr->hash_seed = seed->seed; + else + hdr->hash_seed = random_number(ntdb); + hdr->hash_test = NTDB_HASH_MAGIC; + hdr->hash_test = ntdb->hash_fn(&hdr->hash_test, + sizeof(hdr->hash_test), + hdr->hash_seed, + ntdb->hash_data); + hdr->hash_bits = ntdb->hash_bits; + hdr->recovery = 0; + hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK; + hdr->seqnum = 0; + hdr->capabilities = 0; + memset(hdr->reserved, 0, sizeof(hdr->reserved)); + + /* Hash is all zero after header. */ + set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize); + memset(htable + 1, 0, hashsize); + + /* Free is empty. */ + hdr->free_table = (char *)ftable - (char *)hdr; + memset(ftable, 0, sizeof(*ftable)); + ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0, + sizeof(*ftable) - sizeof(ftable->hdr), + sizeof(*ftable) - sizeof(ftable->hdr)); + if (ecode != NTDB_SUCCESS) { + goto out; + } + + /* Rest of database is a free record, containing junk. */ + remaindersize = dbsize - hdrsize; + remainder->ftable_and_len + = (remaindersize + sizeof(*remainder) + - sizeof(struct ntdb_used_record)); + remainder->next = 0; + remainder->magic_and_prev + = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL)) + | ((char *)remainder - (char *)hdr); + memset(remainder + 1, 0x43, remaindersize); + + /* Put in our single free entry. */ + ftable->buckets[size_to_bucket(remaindersize)] = + (char *)remainder - (char *)hdr; + + /* Magic food */ + memset(hdr->magic_food, 0, sizeof(hdr->magic_food)); + strcpy(hdr->magic_food, NTDB_MAGIC_FOOD); + + /* This creates an endian-converted database, as if read from disk */ + magic_len = sizeof(hdr->magic_food); + ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len); + + /* Return copy of header. */ + *rhdr = *hdr; + + if (ntdb->flags & NTDB_INTERNAL) { + ntdb->file->map_size = dbsize; + ntdb->file->map_ptr = hdr; + return NTDB_SUCCESS; + } + if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_new_database:" + " failed to seek: %s", strerror(errno)); + goto out; + } + + if (ftruncate(ntdb->file->fd, 0) == -1) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_new_database:" + " failed to truncate: %s", strerror(errno)); + goto out; + } + + rlen = write(ntdb->file->fd, hdr, dbsize); + if (rlen != dbsize) { + if (rlen >= 0) + errno = ENOSPC; + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_new_database: %zi writing header: %s", + rlen, strerror(errno)); + goto out; + } + +out: + ntdb->free_fn(hdr, ntdb->alloc_data); + return ecode; +} + +static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb) +{ + ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data); + if (!ntdb->file) + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_open: cannot alloc ntdb_file structure"); + ntdb->file->num_lockrecs = 0; + ntdb->file->lockrecs = NULL; + ntdb->file->allrecord_lock.count = 0; + ntdb->file->refcnt = 1; + ntdb->file->map_ptr = NULL; + ntdb->file->direct_count = 0; + ntdb->file->old_mmaps = NULL; + return NTDB_SUCCESS; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb, + const union ntdb_attribute *attr) +{ + switch (attr->base.attr) { + case NTDB_ATTRIBUTE_LOG: + ntdb->log_fn = attr->log.fn; + ntdb->log_data = attr->log.data; + break; + case NTDB_ATTRIBUTE_HASH: + case NTDB_ATTRIBUTE_SEED: + case NTDB_ATTRIBUTE_OPENHOOK: + case NTDB_ATTRIBUTE_HASHSIZE: + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_set_attribute:" + " cannot set %s after opening", + attr->base.attr == NTDB_ATTRIBUTE_HASH + ? "NTDB_ATTRIBUTE_HASH" + : attr->base.attr == NTDB_ATTRIBUTE_SEED + ? "NTDB_ATTRIBUTE_SEED" + : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK + ? "NTDB_ATTRIBUTE_OPENHOOK" + : "NTDB_ATTRIBUTE_HASHSIZE"); + case NTDB_ATTRIBUTE_STATS: + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_set_attribute:" + " cannot set NTDB_ATTRIBUTE_STATS"); + case NTDB_ATTRIBUTE_FLOCK: + ntdb->lock_fn = attr->flock.lock; + ntdb->unlock_fn = attr->flock.unlock; + ntdb->lock_data = attr->flock.data; + break; + case NTDB_ATTRIBUTE_ALLOCATOR: + ntdb->alloc_fn = attr->alloc.alloc; + ntdb->expand_fn = attr->alloc.expand; + ntdb->free_fn = attr->alloc.free; + ntdb->alloc_data = attr->alloc.priv_data; + break; + default: + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_set_attribute:" + " unknown attribute type %u", + attr->base.attr); + } + return NTDB_SUCCESS; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb, + union ntdb_attribute *attr) +{ + switch (attr->base.attr) { + case NTDB_ATTRIBUTE_LOG: + if (!ntdb->log_fn) + return NTDB_ERR_NOEXIST; + attr->log.fn = ntdb->log_fn; + attr->log.data = ntdb->log_data; + break; + case NTDB_ATTRIBUTE_HASH: + attr->hash.fn = ntdb->hash_fn; + attr->hash.data = ntdb->hash_data; + break; + case NTDB_ATTRIBUTE_SEED: + attr->seed.seed = ntdb->hash_seed; + break; + case NTDB_ATTRIBUTE_OPENHOOK: + if (!ntdb->openhook) + return NTDB_ERR_NOEXIST; + attr->openhook.fn = ntdb->openhook; + attr->openhook.data = ntdb->openhook_data; + break; + case NTDB_ATTRIBUTE_STATS: { + size_t size = attr->stats.size; + if (size > ntdb->stats.size) + size = ntdb->stats.size; + memcpy(&attr->stats, &ntdb->stats, size); + break; + } + case NTDB_ATTRIBUTE_FLOCK: + attr->flock.lock = ntdb->lock_fn; + attr->flock.unlock = ntdb->unlock_fn; + attr->flock.data = ntdb->lock_data; + break; + case NTDB_ATTRIBUTE_ALLOCATOR: + attr->alloc.alloc = ntdb->alloc_fn; + attr->alloc.expand = ntdb->expand_fn; + attr->alloc.free = ntdb->free_fn; + attr->alloc.priv_data = ntdb->alloc_data; + break; + case NTDB_ATTRIBUTE_HASHSIZE: + attr->hashsize.size = 1 << ntdb->hash_bits; + break; + default: + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_get_attribute:" + " unknown attribute type %u", + attr->base.attr); + } + attr->base.next = NULL; + return NTDB_SUCCESS; +} + +_PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb, + enum ntdb_attribute_type type) +{ + switch (type) { + case NTDB_ATTRIBUTE_LOG: + ntdb->log_fn = NULL; + break; + case NTDB_ATTRIBUTE_OPENHOOK: + ntdb->openhook = NULL; + break; + case NTDB_ATTRIBUTE_HASH: + case NTDB_ATTRIBUTE_SEED: + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_unset_attribute: cannot unset %s after opening", + type == NTDB_ATTRIBUTE_HASH + ? "NTDB_ATTRIBUTE_HASH" + : "NTDB_ATTRIBUTE_SEED"); + break; + case NTDB_ATTRIBUTE_STATS: + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_unset_attribute:" + "cannot unset NTDB_ATTRIBUTE_STATS"); + break; + case NTDB_ATTRIBUTE_FLOCK: + ntdb->lock_fn = ntdb_fcntl_lock; + ntdb->unlock_fn = ntdb_fcntl_unlock; + break; + default: + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_unset_attribute: unknown attribute type %u", + type); + } +} + +/* The top three bits of the capability tell us whether it matters. */ +enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller, + ntdb_off_t type) +{ + if (type & NTDB_CAP_NOOPEN) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "%s: file has unknown capability %llu", + caller, type & NTDB_CAP_NOOPEN); + } + + if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR, + "%s: file has unknown capability %llu" + " (cannot write to it)", + caller, type & NTDB_CAP_NOOPEN); + } + + if (type & NTDB_CAP_NOCHECK) { + ntdb->flags |= NTDB_CANT_CHECK; + } + return NTDB_SUCCESS; +} + +static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb, + ntdb_off_t capabilities) +{ + ntdb_off_t off, next; + enum NTDB_ERROR ecode = NTDB_SUCCESS; + const struct ntdb_capability *cap; + + /* Check capability list. */ + for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) { + cap = ntdb_access_read(ntdb, off, sizeof(*cap), true); + if (NTDB_PTR_IS_ERR(cap)) { + return NTDB_PTR_ERR(cap); + } + + switch (cap->type & NTDB_CAP_TYPE_MASK) { + /* We don't understand any capabilities (yet). */ + default: + ecode = unknown_capability(ntdb, "ntdb_open", cap->type); + } + next = cap->next; + ntdb_access_release(ntdb, cap); + } + return ecode; +} + +static void *default_alloc(const void *owner, size_t len, void *priv_data) +{ + return malloc(len); +} + +static void *default_expand(void *ptr, size_t len, void *priv_data) +{ + return realloc(ptr, len); +} + +static void default_free(void *ptr, void *priv_data) +{ + free(ptr); +} + +/* First allocation needs manual search of attributes. */ +static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr, + const char *name) +{ + size_t len = sizeof(struct ntdb_context) + strlen(name) + 1; + + while (attr) { + if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) { + return attr->alloc.alloc(NULL, len, + attr->alloc.priv_data); + } + attr = attr->base.next; + } + return default_alloc(NULL, len, NULL); +} + +static unsigned int next_pow2(uint64_t size) +{ + unsigned int bits = 1; + + while ((1ULL << bits) < size) + bits++; + return bits; +} + +_PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags, + int open_flags, mode_t mode, + union ntdb_attribute *attr) +{ + struct ntdb_context *ntdb; + struct stat st; + int saved_errno = 0; + uint64_t hash_test; + unsigned v; + ssize_t rlen; + struct ntdb_header hdr; + struct ntdb_attribute_seed *seed = NULL; + ntdb_bool_err berr; + enum NTDB_ERROR ecode; + int openlock; + + ntdb = alloc_ntdb(attr, name); + if (!ntdb) { + /* Can't log this */ + errno = ENOMEM; + return NULL; + } + /* Set name immediately for logging functions. */ + ntdb->name = strcpy((char *)(ntdb + 1), name); + ntdb->flags = ntdb_flags; + ntdb->log_fn = NULL; + ntdb->open_flags = open_flags; + ntdb->file = NULL; + ntdb->openhook = NULL; + ntdb->lock_fn = ntdb_fcntl_lock; + ntdb->unlock_fn = ntdb_fcntl_unlock; + ntdb->hash_fn = ntdb_jenkins_hash; + memset(&ntdb->stats, 0, sizeof(ntdb->stats)); + ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS; + ntdb->stats.size = sizeof(ntdb->stats); + ntdb->alloc_fn = default_alloc; + ntdb->expand_fn = default_expand; + ntdb->free_fn = default_free; + ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */ + + while (attr) { + switch (attr->base.attr) { + case NTDB_ATTRIBUTE_HASH: + ntdb->hash_fn = attr->hash.fn; + ntdb->hash_data = attr->hash.data; + break; + case NTDB_ATTRIBUTE_SEED: + seed = &attr->seed; + break; + case NTDB_ATTRIBUTE_OPENHOOK: + ntdb->openhook = attr->openhook.fn; + ntdb->openhook_data = attr->openhook.data; + break; + case NTDB_ATTRIBUTE_HASHSIZE: + ntdb->hash_bits = next_pow2(attr->hashsize.size); + if (ntdb->hash_bits > 31) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_open: hash_size %u" + " too large", + attr->hashsize.size); + goto fail; + } + break; + default: + /* These are set as normal. */ + ecode = ntdb_set_attribute(ntdb, attr); + if (ecode != NTDB_SUCCESS) + goto fail; + } + attr = attr->base.next; + } + + if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT + | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING + | NTDB_RDONLY)) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_open: unknown flags %u", ntdb_flags); + goto fail; + } + + if (seed) { + if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_open:" + " cannot set NTDB_ATTRIBUTE_SEED" + " without O_CREAT."); + goto fail; + } + } + + if ((open_flags & O_ACCMODE) == O_WRONLY) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_open: can't open ntdb %s write-only", + name); + goto fail; + } + + if ((open_flags & O_ACCMODE) == O_RDONLY) { + openlock = F_RDLCK; + ntdb->flags |= NTDB_RDONLY; + } else { + if (ntdb_flags & NTDB_RDONLY) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, + NTDB_LOG_USE_ERROR, + "ntdb_open: can't use NTDB_RDONLY" + " without O_RDONLY"); + goto fail; + } + openlock = F_WRLCK; + } + + /* internal databases don't need any of the rest. */ + if (ntdb->flags & NTDB_INTERNAL) { + ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP); + ecode = ntdb_new_file(ntdb); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + ntdb->file->fd = -1; + ecode = ntdb_new_database(ntdb, seed, &hdr); + if (ecode == NTDB_SUCCESS) { + ntdb_convert(ntdb, &hdr.hash_seed, + sizeof(hdr.hash_seed)); + ntdb->hash_seed = hdr.hash_seed; + ntdb_context_init(ntdb); + ntdb_ftable_init(ntdb); + } + if (ecode != NTDB_SUCCESS) { + goto fail; + } + return ntdb; + } + + if (stat(name, &st) != -1) + ntdb->file = find_file(st.st_dev, st.st_ino); + + if (!ntdb->file) { + ecode = ntdb_new_file(ntdb); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + + /* Set this now, as ntdb_nest_lock examines it. */ + ntdb->file->map_size = 0; + + if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) { + enum ntdb_log_level lvl; + /* errno set by open(2) */ + saved_errno = errno; + + /* Probing for files like this is a common pattern. */ + if (!(open_flags & O_CREAT) && errno == ENOENT) { + lvl = NTDB_LOG_WARNING; + } else { + lvl = NTDB_LOG_ERROR; + } + ntdb_logerr(ntdb, NTDB_ERR_IO, lvl, + "ntdb_open: could not open file %s: %s", + name, strerror(errno)); + + goto fail_errno; + } + + /* ensure there is only one process initialising at once: + * do it immediately to reduce the create/openlock race. */ + ecode = ntdb_lock_open(ntdb, openlock, + NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); + if (ecode != NTDB_SUCCESS) { + saved_errno = errno; + goto fail_errno; + } + + /* on exec, don't inherit the fd */ + v = fcntl(ntdb->file->fd, F_GETFD, 0); + fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC); + + if (fstat(ntdb->file->fd, &st) == -1) { + saved_errno = errno; + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open: could not stat open %s: %s", + name, strerror(errno)); + goto fail_errno; + } + + ntdb->file->device = st.st_dev; + ntdb->file->inode = st.st_ino; + + /* call their open hook if they gave us one. */ + if (ntdb->openhook) { + ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data); + if (ecode != NTDB_SUCCESS) { + ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_open: open hook failed"); + goto fail; + } + open_flags |= O_CREAT; + } + } else { + /* ensure there is only one process initialising at once */ + ecode = ntdb_lock_open(ntdb, openlock, + NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); + if (ecode != NTDB_SUCCESS) { + saved_errno = errno; + goto fail_errno; + } + } + + /* If they used O_TRUNC, read will return 0. */ + rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0); + if (rlen == 0 && (open_flags & O_CREAT)) { + ecode = ntdb_new_database(ntdb, seed, &hdr); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + } else if (rlen < 0) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open: error %s reading %s", + strerror(errno), name); + goto fail; + } else if (rlen < sizeof(hdr) + || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open: %s is not a ntdb file", name); + goto fail; + } + + if (hdr.version != NTDB_VERSION) { + if (hdr.version == bswap_64(NTDB_VERSION)) + ntdb->flags |= NTDB_CONVERT; + else { + /* wrong version */ + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open:" + " %s is unknown version 0x%llx", + name, (long long)hdr.version); + goto fail; + } + } else if (ntdb->flags & NTDB_CONVERT) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open:" + " %s does not need NTDB_CONVERT", + name); + goto fail; + } + + ntdb_context_init(ntdb); + + ntdb_convert(ntdb, &hdr, sizeof(hdr)); + ntdb->hash_bits = hdr.hash_bits; + ntdb->hash_seed = hdr.hash_seed; + hash_test = NTDB_HASH_MAGIC; + hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test)); + if (hdr.hash_test != hash_test) { + /* wrong hash variant */ + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open:" + " %s uses a different hash function", + name); + goto fail; + } + + ecode = capabilities_ok(ntdb, hdr.capabilities); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + + /* Clear any features we don't understand. */ + if ((open_flags & O_ACCMODE) != O_RDONLY) { + hdr.features_used &= NTDB_FEATURE_MASK; + ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header, + features_used), + &hdr.features_used, + sizeof(hdr.features_used)); + if (ecode != NTDB_SUCCESS) + goto fail; + } + + ntdb_unlock_open(ntdb, openlock); + + /* This makes sure we have current map_size and mmap. */ + ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true); + if (unlikely(ecode != NTDB_SUCCESS)) + goto fail; + + if (ntdb->file->map_size % NTDB_PGSIZE != 0) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open:" + " %s size %llu isn't a multiple of %u", + name, (long long)ntdb->file->map_size, + NTDB_PGSIZE); + goto fail; + } + + /* Now it's fully formed, recover if necessary. */ + berr = ntdb_needs_recovery(ntdb); + if (unlikely(berr != false)) { + if (berr < 0) { + ecode = NTDB_OFF_TO_ERR(berr); + goto fail; + } + ecode = ntdb_lock_and_recover(ntdb); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + } + + ecode = ntdb_ftable_init(ntdb); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + + ntdb->next = tdbs; + tdbs = ntdb; + return ntdb; + + fail: + /* Map ecode to some logical errno. */ + switch (NTDB_ERR_TO_OFF(ecode)) { + case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): + case NTDB_ERR_TO_OFF(NTDB_ERR_IO): + saved_errno = EIO; + break; + case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): + saved_errno = EWOULDBLOCK; + break; + case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): + saved_errno = ENOMEM; + break; + case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): + saved_errno = EINVAL; + break; + default: + saved_errno = EINVAL; + break; + } + +fail_errno: +#ifdef NTDB_TRACE + close(ntdb->tracefd); +#endif + if (ntdb->file) { + ntdb_lock_cleanup(ntdb); + if (--ntdb->file->refcnt == 0) { + assert(ntdb->file->num_lockrecs == 0); + if (ntdb->file->map_ptr) { + if (ntdb->flags & NTDB_INTERNAL) { + ntdb->free_fn(ntdb->file->map_ptr, + ntdb->alloc_data); + } else + ntdb_munmap(ntdb); + } + if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0) + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_open: failed to close ntdb fd" + " on error: %s", strerror(errno)); + ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data); + ntdb->free_fn(ntdb->file, ntdb->alloc_data); + } + } + + ntdb->free_fn(ntdb, ntdb->alloc_data); + errno = saved_errno; + return NULL; +} + +_PUBLIC_ int ntdb_close(struct ntdb_context *ntdb) +{ + int ret = 0; + struct ntdb_context **i; + + ntdb_trace(ntdb, "ntdb_close"); + + if (ntdb->transaction) { + ntdb_transaction_cancel(ntdb); + } + + ntdb_lock_cleanup(ntdb); + if (--ntdb->file->refcnt == 0) { + if (ntdb->file->map_ptr) { + if (ntdb->flags & NTDB_INTERNAL) { + ntdb->free_fn(ntdb->file->map_ptr, + ntdb->alloc_data); + } else { + ntdb_munmap(ntdb); + } + } + ret = close(ntdb->file->fd); + ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data); + ntdb->free_fn(ntdb->file, ntdb->alloc_data); + } + + /* Remove from tdbs list */ + for (i = &tdbs; *i; i = &(*i)->next) { + if (*i == ntdb) { + *i = ntdb->next; + break; + } + } + +#ifdef NTDB_TRACE + close(ntdb->tracefd); +#endif + ntdb->free_fn(ntdb, ntdb->alloc_data); + + return ret; +} + +_PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p) +{ + struct ntdb_context *i; + + for (i = tdbs; i; i = i->next) { + if (fn(i, p) != 0) + break; + } +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/private.h b/junkcode/rusty@rustcorp.com.au-ntdb/private.h new file mode 100644 index 00000000..24925375 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/private.h @@ -0,0 +1,677 @@ +#ifndef NTDB_PRIVATE_H +#define NTDB_PRIVATE_H +/* + Trivial Database 2: private types and prototypes + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include "config.h" +#ifndef HAVE_CCAN +#error You need ccan to build ntdb! +#endif +#include "ntdb.h" +#include +#include +#include + +#ifdef HAVE_LIBREPLACE +#include "replace.h" +#include "system/filesys.h" +#include "system/time.h" +#include "system/shmem.h" +#include "system/select.h" +#include "system/wait.h" +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif +#include + +#ifndef TEST_IT +#define TEST_IT(cond) +#endif + +/* #define NTDB_TRACE 1 */ + +#ifndef __STRING +#define __STRING(x) #x +#endif + +#ifndef __STRINGSTRING +#define __STRINGSTRING(x) __STRING(x) +#endif + +#ifndef __location__ +#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__) +#endif + +typedef uint64_t ntdb_len_t; +typedef uint64_t ntdb_off_t; + +#define NTDB_MAGIC_FOOD "NTDB file\n" +#define NTDB_VERSION ((uint64_t)(0x26011967 + 7)) +#define NTDB_USED_MAGIC ((uint64_t)0x1999) +#define NTDB_HTABLE_MAGIC ((uint64_t)0x1888) +#define NTDB_CHAIN_MAGIC ((uint64_t)0x1777) +#define NTDB_FTABLE_MAGIC ((uint64_t)0x1666) +#define NTDB_CAP_MAGIC ((uint64_t)0x1555) +#define NTDB_FREE_MAGIC ((uint64_t)0xFE) +#define NTDB_HASH_MAGIC (0xA1ABE11A01092008ULL) +#define NTDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL) +#define NTDB_RECOVERY_INVALID_MAGIC (0x0ULL) + +/* Capability bits. */ +#define NTDB_CAP_TYPE_MASK 0x1FFFFFFFFFFFFFFFULL +#define NTDB_CAP_NOCHECK 0x8000000000000000ULL +#define NTDB_CAP_NOWRITE 0x4000000000000000ULL +#define NTDB_CAP_NOOPEN 0x2000000000000000ULL + +#define NTDB_OFF_IS_ERR(off) unlikely(off >= (ntdb_off_t)(long)NTDB_ERR_LAST) +#define NTDB_OFF_TO_ERR(off) ((enum NTDB_ERROR)(long)(off)) +#define NTDB_ERR_TO_OFF(ecode) ((ntdb_off_t)(long)(ecode)) + +/* Packing errors into pointers and v.v. */ +#define NTDB_PTR_IS_ERR(ptr) \ + unlikely((unsigned long)(ptr) >= (unsigned long)NTDB_ERR_LAST) +#define NTDB_PTR_ERR(p) ((enum NTDB_ERROR)(long)(p)) +#define NTDB_ERR_PTR(err) ((void *)(long)(err)) + +/* This doesn't really need to be pagesize, but we use it for similar + * reasons. */ +#define NTDB_PGSIZE 16384 + +/* Common case of returning true, false or -ve error. */ +typedef int ntdb_bool_err; + +/* Prevent others from opening the file. */ +#define NTDB_OPEN_LOCK 0 +/* Expanding file. */ +#define NTDB_EXPANSION_LOCK 2 +/* Doing a transaction. */ +#define NTDB_TRANSACTION_LOCK 8 +/* Hash chain locks. */ +#define NTDB_HASH_LOCK_START 64 + +/* Extend file by least 100 times larger than needed. */ +#define NTDB_EXTENSION_FACTOR 100 + +/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */ +#define NTDB_OFF_UPPER_STEAL 8 + +/* And we use the lower bit, too. */ +#define NTDB_OFF_CHAIN_BIT 0 + +/* Hash table sits just after the header. */ +#define NTDB_HASH_OFFSET (sizeof(struct ntdb_header)) + +/* Additional features we understand. Currently: none. */ +#define NTDB_FEATURE_MASK ((uint64_t)0) + +/* The bit number where we store the extra hash bits. */ +/* Convenience mask to get actual offset. */ +#define NTDB_OFF_MASK \ + (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - (1<magic_and_meta >> 43) & ((1 << 5)-1)) * 2; +} + +static inline uint64_t rec_key_length(const struct ntdb_used_record *r) +{ + return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1); +} + +static inline uint64_t rec_data_length(const struct ntdb_used_record *r) +{ + return r->key_and_data_len >> rec_key_bits(r); +} + +static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r) +{ + return (r->magic_and_meta >> 11) & 0xFFFFFFFF; +} + +static inline uint16_t rec_magic(const struct ntdb_used_record *r) +{ + return (r->magic_and_meta >> 48); +} + +struct ntdb_free_record { + uint64_t magic_and_prev; /* NTDB_OFF_UPPER_STEAL bits magic, then prev */ + uint64_t ftable_and_len; /* Len not counting these two fields. */ + /* This is why the minimum record size is 8 bytes. */ + uint64_t next; +}; + +static inline uint64_t frec_prev(const struct ntdb_free_record *f) +{ + return f->magic_and_prev & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1); +} + +static inline uint64_t frec_magic(const struct ntdb_free_record *f) +{ + return f->magic_and_prev >> (64 - NTDB_OFF_UPPER_STEAL); +} + +static inline uint64_t frec_len(const struct ntdb_free_record *f) +{ + return f->ftable_and_len & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL))-1); +} + +static inline unsigned frec_ftable(const struct ntdb_free_record *f) +{ + return f->ftable_and_len >> (64 - NTDB_OFF_UPPER_STEAL); +} + +struct ntdb_recovery_record { + uint64_t magic; + /* Length of record (add this header to get total length). */ + uint64_t max_len; + /* Length used. */ + uint64_t len; + /* Old length of file before transaction. */ + uint64_t eof; +}; + +/* this is stored at the front of every database */ +struct ntdb_header { + char magic_food[64]; /* for /etc/magic */ + /* FIXME: Make me 32 bit? */ + uint64_t version; /* version of the code */ + uint64_t hash_bits; /* bits for toplevel hash table. */ + uint64_t hash_test; /* result of hashing HASH_MAGIC. */ + uint64_t hash_seed; /* "random" seed written at creation time. */ + ntdb_off_t free_table; /* (First) free table. */ + ntdb_off_t recovery; /* Transaction recovery area. */ + + uint64_t features_used; /* Features all writers understand */ + uint64_t features_offered; /* Features offered */ + + uint64_t seqnum; /* Sequence number for NTDB_SEQNUM */ + + ntdb_off_t capabilities; /* Optional linked list of capabilities. */ + ntdb_off_t reserved[22]; + + /* + * Hash table is next: + * + * struct ntdb_used_record htable_hdr; + * ntdb_off_t htable[1 << hash_bits]; + */ +}; + +struct ntdb_freetable { + struct ntdb_used_record hdr; + ntdb_off_t next; + ntdb_off_t buckets[NTDB_FREE_BUCKETS]; +}; + +struct ntdb_capability { + struct ntdb_used_record hdr; + ntdb_off_t type; + ntdb_off_t next; + /* ... */ +}; + +/* Information about a particular (locked) hash entry. */ +struct hash_info { + /* Full hash value of entry. */ + uint32_t h; + /* Start of hash table / chain. */ + ntdb_off_t table; + /* Number of entries in this table/chain. */ + ntdb_off_t table_size; + /* Bucket we (or an empty space) were found in. */ + ntdb_off_t bucket; + /* Old value that was in that entry (if not found) */ + ntdb_off_t old_val; +}; + +enum ntdb_lock_flags { + /* WAIT == F_SETLKW, NOWAIT == F_SETLK */ + NTDB_LOCK_NOWAIT = 0, + NTDB_LOCK_WAIT = 1, + /* If set, don't log an error on failure. */ + NTDB_LOCK_PROBE = 2, + /* If set, don't check for recovery (used by recovery code). */ + NTDB_LOCK_NOCHECK = 4, +}; + +struct ntdb_lock { + struct ntdb_context *owner; + off_t off; + uint32_t count; + uint32_t ltype; +}; + +/* This is only needed for ntdb_access_commit, but used everywhere to + * simplify. */ +struct ntdb_access_hdr { + struct ntdb_access_hdr *next; + ntdb_off_t off; + ntdb_len_t len; + bool convert; +}; + +/* mmaps we are keeping around because they are still direct accessed */ +struct ntdb_old_mmap { + struct ntdb_old_mmap *next; + + void *map_ptr; + ntdb_len_t map_size; +}; + +struct ntdb_file { + /* How many are sharing us? */ + unsigned int refcnt; + + /* Mmap (if any), or malloc (for NTDB_INTERNAL). */ + void *map_ptr; + + /* How much space has been mapped (<= current file size) */ + ntdb_len_t map_size; + + /* The file descriptor (-1 for NTDB_INTERNAL). */ + int fd; + + /* How many are accessing directly? */ + unsigned int direct_count; + + /* Old maps, still direct accessed. */ + struct ntdb_old_mmap *old_mmaps; + + /* Lock information */ + pid_t locker; + struct ntdb_lock allrecord_lock; + size_t num_lockrecs; + struct ntdb_lock *lockrecs; + + /* Identity of this file. */ + dev_t device; + ino_t inode; +}; + +struct ntdb_methods { + enum NTDB_ERROR (*tread)(struct ntdb_context *, ntdb_off_t, void *, + ntdb_len_t); + enum NTDB_ERROR (*twrite)(struct ntdb_context *, ntdb_off_t, const void *, + ntdb_len_t); + enum NTDB_ERROR (*oob)(struct ntdb_context *, ntdb_off_t, ntdb_len_t, bool); + enum NTDB_ERROR (*expand_file)(struct ntdb_context *, ntdb_len_t); + void *(*direct)(struct ntdb_context *, ntdb_off_t, size_t, bool); + ntdb_off_t (*read_off)(struct ntdb_context *ntdb, ntdb_off_t off); + enum NTDB_ERROR (*write_off)(struct ntdb_context *ntdb, ntdb_off_t off, + ntdb_off_t val); +}; + +/* + internal prototypes +*/ +/* Get bits from a value. */ +static inline uint32_t bits_from(uint64_t val, unsigned start, unsigned num) +{ + assert(num <= 32); + return (val >> start) & ((1U << num) - 1); +} + + +/* hash.c: */ +uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed, + void *unused); + +enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb, + struct hash_info *h, + NTDB_DATA *kbuf, size_t *dlen); + +enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb, + struct hash_info *h, + NTDB_DATA *kbuf, size_t *dlen); + +/* Hash random memory. */ +uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len); + +/* Find and lock a hash entry (or where it would be). */ +ntdb_off_t find_and_lock(struct ntdb_context *ntdb, + NTDB_DATA key, + int ltype, + struct hash_info *h, + struct ntdb_used_record *rec, + const char **rkey); + +enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, + const struct hash_info *h, + ntdb_off_t new_off); + +enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, + const struct hash_info *h, + ntdb_off_t new_off); + +enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, + const struct hash_info *h); + +/* For ntdb_check */ +bool is_subhash(ntdb_off_t val); +enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller, + ntdb_off_t type); + +/* free.c: */ +enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb); + +/* check.c needs these to iterate through free lists. */ +ntdb_off_t first_ftable(struct ntdb_context *ntdb); +ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable); + +/* This returns space or -ve error number. */ +ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, + unsigned magic, bool growing); + +/* Put this record in a free list. */ +enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len_with_header, + enum ntdb_lock_flags waitflag, + bool coalesce_ok); + +/* Set up header for a used/ftable/htable/chain/capability record. */ +enum NTDB_ERROR set_header(struct ntdb_context *ntdb, + struct ntdb_used_record *rec, + unsigned magic, uint64_t keylen, uint64_t datalen, + uint64_t actuallen); + +/* Used by ntdb_check to verify. */ +unsigned int size_to_bucket(ntdb_len_t data_len); +ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket); + +/* Used by ntdb_summary */ +ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off); + +/* Adjust expansion, used by create_recovery_area */ +ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size); + +/* io.c: */ +/* Initialize ntdb->methods. */ +void ntdb_io_init(struct ntdb_context *ntdb); + +/* Convert endian of the buffer if required. */ +void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size); + +/* Unmap and try to map the ntdb. */ +enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb); +enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb); + +/* Either alloc a copy, or give direct access. Release frees or noop. */ +const void *ntdb_access_read(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool convert); +void *ntdb_access_write(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool convert); + +/* Release result of ntdb_access_read/write. */ +void ntdb_access_release(struct ntdb_context *ntdb, const void *p); +/* Commit result of ntdb_acces_write. */ +enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p); + +/* Clear an ondisk area. */ +enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len); + +/* Return a non-zero offset between >= start < end in this array (or end). */ +ntdb_off_t ntdb_find_nonzero_off(struct ntdb_context *ntdb, + ntdb_off_t base, + uint64_t start, + uint64_t end); + +/* Return a zero offset in this array, or num. */ +ntdb_off_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off, + uint64_t num); + +/* Allocate and make a copy of some offset. */ +void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len); + +/* Writes a converted copy of a record. */ +enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off, + const void *rec, size_t len); + +/* Reads record and converts it */ +enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off, + void *rec, size_t len); + +/* Bump the seqnum (caller checks for ntdb->flags & NTDB_SEQNUM) */ +void ntdb_inc_seqnum(struct ntdb_context *ntdb); + +/* lock.c: */ +/* Print message because another ntdb owns a lock we want. */ +enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call); + +/* If we fork, we no longer really own locks. */ +bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log); + +/* Lock/unlock a hash bucket. */ +enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb, + unsigned int hbucket, + int ltype); +enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb, + unsigned int hash, int ltype); + +/* For closing the file. */ +void ntdb_lock_cleanup(struct ntdb_context *ntdb); + +/* Lock/unlock a particular free bucket. */ +enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off, + enum ntdb_lock_flags waitflag); +void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off); + +/* Serialize transaction start. */ +enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype); +void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype); + +/* Do we have any hash locks (ie. via ntdb_chainlock) ? */ +bool ntdb_has_hash_locks(struct ntdb_context *ntdb); + +/* Lock entire database. */ +enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype, + enum ntdb_lock_flags flags, bool upgradable); +void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype); +enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start); + +/* Serialize db open. */ +enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb, + int ltype, enum ntdb_lock_flags flags); +void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype); +bool ntdb_has_open_lock(struct ntdb_context *ntdb); + +/* Serialize db expand. */ +enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype); +void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype); +bool ntdb_has_expansion_lock(struct ntdb_context *ntdb); + +/* If it needs recovery, grab all the locks and do it. */ +enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb); + +/* Default lock and unlock functions. */ +int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *); +int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *); + +/* transaction.c: */ +enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb); +ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb); + +struct ntdb_context { + /* Single list of all TDBs, to detect multiple opens. */ + struct ntdb_context *next; + + /* Filename of the database. */ + const char *name; + + /* Logging function */ + void (*log_fn)(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data); + void *log_data; + + /* Open flags passed to ntdb_open. */ + int open_flags; + + /* low level (fnctl) lock functions. */ + int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *); + int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *); + void *lock_data; + + /* the ntdb flags passed to ntdb_open. */ + uint32_t flags; + + /* Our statistics. */ + struct ntdb_attribute_stats stats; + + /* The actual file information */ + struct ntdb_file *file; + + /* Hash function. */ + uint32_t (*hash_fn)(const void *key, size_t len, uint32_t seed, void *); + void *hash_data; + uint32_t hash_seed; + /* Bits in toplevel hash table. */ + unsigned int hash_bits; + + /* Allocate and free functions. */ + void *(*alloc_fn)(const void *owner, size_t len, void *priv_data); + void *(*expand_fn)(void *old, size_t newlen, void *priv_data); + void (*free_fn)(void *old, void *priv_data); + void *alloc_data; + + /* Our open hook, if any. */ + enum NTDB_ERROR (*openhook)(int fd, void *data); + void *openhook_data; + + /* Set if we are in a transaction. */ + struct ntdb_transaction *transaction; + + /* What free table are we using? */ + ntdb_off_t ftable_off; + unsigned int ftable; + + /* IO methods: changes for transactions. */ + const struct ntdb_methods *io; + + /* Direct access information */ + struct ntdb_access_hdr *access; +}; + +/* ntdb.c: */ +enum NTDB_ERROR COLD PRINTF_FMT(4, 5) + ntdb_logerr(struct ntdb_context *ntdb, + enum NTDB_ERROR ecode, + enum ntdb_log_level level, + const char *fmt, ...); + +static inline enum NTDB_ERROR ntdb_oob(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, + bool probe) +{ + if (likely(off + len >= off) + && likely(off + len <= ntdb->file->map_size) + && likely(!probe)) { + return NTDB_SUCCESS; + } + return ntdb->io->oob(ntdb, off, len, probe); +} + +/* Convenience routine to get an offset. */ +static inline ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb, + ntdb_off_t off) +{ + return ntdb->io->read_off(ntdb, off); +} + +/* Write an offset at an offset. */ +static inline enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb, + ntdb_off_t off, + ntdb_off_t val) +{ + return ntdb->io->write_off(ntdb, off, val); +} + +#ifdef NTDB_TRACE +void ntdb_trace(struct ntdb_context *ntdb, const char *op); +void ntdb_trace_seqnum(struct ntdb_context *ntdb, uint32_t seqnum, const char *op); +void ntdb_trace_open(struct ntdb_context *ntdb, const char *op, + unsigned hash_size, unsigned ntdb_flags, unsigned open_flags); +void ntdb_trace_ret(struct ntdb_context *ntdb, const char *op, int ret); +void ntdb_trace_retrec(struct ntdb_context *ntdb, const char *op, NTDB_DATA ret); +void ntdb_trace_1rec(struct ntdb_context *ntdb, const char *op, + NTDB_DATA rec); +void ntdb_trace_1rec_ret(struct ntdb_context *ntdb, const char *op, + NTDB_DATA rec, int ret); +void ntdb_trace_1rec_retrec(struct ntdb_context *ntdb, const char *op, + NTDB_DATA rec, NTDB_DATA ret); +void ntdb_trace_2rec_flag_ret(struct ntdb_context *ntdb, const char *op, + NTDB_DATA rec1, NTDB_DATA rec2, unsigned flag, + int ret); +void ntdb_trace_2rec_retrec(struct ntdb_context *ntdb, const char *op, + NTDB_DATA rec1, NTDB_DATA rec2, NTDB_DATA ret); +#else +#define ntdb_trace(ntdb, op) +#define ntdb_trace_seqnum(ntdb, seqnum, op) +#define ntdb_trace_open(ntdb, op, hash_size, ntdb_flags, open_flags) +#define ntdb_trace_ret(ntdb, op, ret) +#define ntdb_trace_retrec(ntdb, op, ret) +#define ntdb_trace_1rec(ntdb, op, rec) +#define ntdb_trace_1rec_ret(ntdb, op, rec, ret) +#define ntdb_trace_1rec_retrec(ntdb, op, rec, ret) +#define ntdb_trace_2rec_flag_ret(ntdb, op, rec1, rec2, flag, ret) +#define ntdb_trace_2rec_retrec(ntdb, op, rec1, rec2, ret) +#endif /* !NTDB_TRACE */ + +#endif diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/python/pyntdb.c b/junkcode/rusty@rustcorp.com.au-ntdb/python/pyntdb.c new file mode 100644 index 00000000..72e62647 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/python/pyntdb.c @@ -0,0 +1,643 @@ +/* + Unix SMB/CIFS implementation. + + Python interface to ntdb. Simply modified from tdb version. + + Copyright (C) 2004-2006 Tim Potter + Copyright (C) 2007-2008 Jelmer Vernooij + Copyright (C) 2011 Rusty Russell + + ** NOTE! The following LGPL license applies to the ntdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include +#include "replace.h" +#include "system/filesys.h" + +/* Include ntdb headers */ +#include + +typedef struct { + PyObject_HEAD + struct ntdb_context *ctx; + bool closed; +} PyNtdbObject; + +static PyTypeObject PyNtdb; + +static void PyErr_SetTDBError(enum NTDB_ERROR e) +{ + PyErr_SetObject(PyExc_RuntimeError, + Py_BuildValue("(i,s)", e, ntdb_errorstr(e))); +} + +static NTDB_DATA PyString_AsNtdb_Data(PyObject *data) +{ + NTDB_DATA ret; + ret.dptr = (unsigned char *)PyString_AsString(data); + ret.dsize = PyString_Size(data); + return ret; +} + +static PyObject *PyString_FromNtdb_Data(NTDB_DATA data) +{ + PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr, + data.dsize); + free(data.dptr); + return ret; +} + +#define PyErr_NTDB_ERROR_IS_ERR_RAISE(ret) \ + if (ret != NTDB_SUCCESS) { \ + PyErr_SetTDBError(ret); \ + return NULL; \ + } + +#define PyNtdb_CHECK_CLOSED(pyobj) \ + if (pyobj->closed) {\ + PyErr_SetObject(PyExc_RuntimeError, \ + Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed")); \ + return NULL; \ + } + +static void stderr_log(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) +{ + fprintf(stderr, "%s:%s:%s\n", + ntdb_name(ntdb), ntdb_errorstr(ecode), message); +} + +static PyObject *py_ntdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + char *name = NULL; + int ntdb_flags = NTDB_DEFAULT, flags = O_RDWR, mode = 0600; + struct ntdb_context *ctx; + PyNtdbObject *ret; + union ntdb_attribute logattr; + const char *kwnames[] = { "name", "ntdb_flags", "flags", "mode", NULL }; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", cast_const2(char **, kwnames), &name, &ntdb_flags, &flags, &mode)) + return NULL; + + if (name == NULL) { + ntdb_flags |= NTDB_INTERNAL; + name = ""; + } + + logattr.log.base.attr = NTDB_ATTRIBUTE_LOG; + logattr.log.base.next = NULL; + logattr.log.fn = stderr_log; + ctx = ntdb_open(name, ntdb_flags, flags, mode, &logattr); + if (ctx == NULL) { + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + ret = PyObject_New(PyNtdbObject, &PyNtdb); + if (!ret) { + ntdb_close(ctx); + return NULL; + } + + ret->ctx = ctx; + ret->closed = false; + return (PyObject *)ret; +} + +static PyObject *obj_transaction_cancel(PyNtdbObject *self) +{ + PyNtdb_CHECK_CLOSED(self); + ntdb_transaction_cancel(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_transaction_commit(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + PyNtdb_CHECK_CLOSED(self); + ret = ntdb_transaction_commit(self->ctx); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_transaction_prepare_commit(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + PyNtdb_CHECK_CLOSED(self); + ret = ntdb_transaction_prepare_commit(self->ctx); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_transaction_start(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + PyNtdb_CHECK_CLOSED(self); + ret = ntdb_transaction_start(self->ctx); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_lockall(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + PyNtdb_CHECK_CLOSED(self); + ret = ntdb_lockall(self->ctx); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_unlockall(PyNtdbObject *self) +{ + PyNtdb_CHECK_CLOSED(self); + ntdb_unlockall(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_lockall_read(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + PyNtdb_CHECK_CLOSED(self); + ret = ntdb_lockall_read(self->ctx); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_unlockall_read(PyNtdbObject *self) +{ + PyNtdb_CHECK_CLOSED(self); + ntdb_unlockall_read(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_close(PyNtdbObject *self) +{ + int ret; + if (self->closed) + Py_RETURN_NONE; + ret = ntdb_close(self->ctx); + self->closed = true; + if (ret != 0) { + PyErr_SetTDBError(NTDB_ERR_IO); + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject *obj_get(PyNtdbObject *self, PyObject *args) +{ + NTDB_DATA key, data; + PyObject *py_key; + enum NTDB_ERROR ret; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + key = PyString_AsNtdb_Data(py_key); + ret = ntdb_fetch(self->ctx, key, &data); + if (ret == NTDB_ERR_NOEXIST) + Py_RETURN_NONE; + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + return PyString_FromNtdb_Data(data); +} + +static PyObject *obj_append(PyNtdbObject *self, PyObject *args) +{ + NTDB_DATA key, data; + PyObject *py_key, *py_data; + enum NTDB_ERROR ret; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data)) + return NULL; + + key = PyString_AsNtdb_Data(py_key); + data = PyString_AsNtdb_Data(py_data); + + ret = ntdb_append(self->ctx, key, data); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_firstkey(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + NTDB_DATA key; + + PyNtdb_CHECK_CLOSED(self); + + ret = ntdb_firstkey(self->ctx, &key); + if (ret == NTDB_ERR_NOEXIST) + Py_RETURN_NONE; + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + + return PyString_FromNtdb_Data(key); +} + +static PyObject *obj_nextkey(PyNtdbObject *self, PyObject *args) +{ + NTDB_DATA key; + PyObject *py_key; + enum NTDB_ERROR ret; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + /* Malloc here, since ntdb_nextkey frees. */ + key.dsize = PyString_Size(py_key); + key.dptr = malloc(key.dsize); + memcpy(key.dptr, PyString_AsString(py_key), key.dsize); + + ret = ntdb_nextkey(self->ctx, &key); + if (ret == NTDB_ERR_NOEXIST) + Py_RETURN_NONE; + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + + return PyString_FromNtdb_Data(key); +} + +static PyObject *obj_delete(PyNtdbObject *self, PyObject *args) +{ + NTDB_DATA key; + PyObject *py_key; + enum NTDB_ERROR ret; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + key = PyString_AsNtdb_Data(py_key); + ret = ntdb_delete(self->ctx, key); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_has_key(PyNtdbObject *self, PyObject *args) +{ + NTDB_DATA key; + PyObject *py_key; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + key = PyString_AsNtdb_Data(py_key); + if (ntdb_exists(self->ctx, key)) + return Py_True; + return Py_False; +} + +static PyObject *obj_store(PyNtdbObject *self, PyObject *args) +{ + NTDB_DATA key, value; + enum NTDB_ERROR ret; + int flag = NTDB_REPLACE; + PyObject *py_key, *py_value; + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag)) + return NULL; + + key = PyString_AsNtdb_Data(py_key); + value = PyString_AsNtdb_Data(py_value); + + ret = ntdb_store(self->ctx, key, value, flag); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_add_flag(PyNtdbObject *self, PyObject *args) +{ + unsigned flag; + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "I", &flag)) + return NULL; + + ntdb_add_flag(self->ctx, flag); + Py_RETURN_NONE; +} + +static PyObject *obj_remove_flag(PyNtdbObject *self, PyObject *args) +{ + unsigned flag; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyArg_ParseTuple(args, "I", &flag)) + return NULL; + + ntdb_remove_flag(self->ctx, flag); + Py_RETURN_NONE; +} + +typedef struct { + PyObject_HEAD + NTDB_DATA current; + bool end; + PyNtdbObject *iteratee; +} PyNtdbIteratorObject; + +static PyObject *ntdb_iter_next(PyNtdbIteratorObject *self) +{ + enum NTDB_ERROR e; + PyObject *ret; + if (self->end) + return NULL; + ret = PyString_FromStringAndSize((const char *)self->current.dptr, + self->current.dsize); + e = ntdb_nextkey(self->iteratee->ctx, &self->current); + if (e == NTDB_ERR_NOEXIST) + self->end = true; + else + PyErr_NTDB_ERROR_IS_ERR_RAISE(e); + return ret; +} + +static void ntdb_iter_dealloc(PyNtdbIteratorObject *self) +{ + Py_DECREF(self->iteratee); + PyObject_Del(self); +} + +PyTypeObject PyNtdbIterator = { + .tp_name = "Iterator", + .tp_basicsize = sizeof(PyNtdbIteratorObject), + .tp_iternext = (iternextfunc)ntdb_iter_next, + .tp_dealloc = (destructor)ntdb_iter_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, +}; + +static PyObject *ntdb_object_iter(PyNtdbObject *self) +{ + PyNtdbIteratorObject *ret; + enum NTDB_ERROR e; + PyNtdb_CHECK_CLOSED(self); + + ret = PyObject_New(PyNtdbIteratorObject, &PyNtdbIterator); + if (!ret) + return NULL; + e = ntdb_firstkey(self->ctx, &ret->current); + if (e == NTDB_ERR_NOEXIST) { + ret->end = true; + } else { + PyErr_NTDB_ERROR_IS_ERR_RAISE(e); + ret->end = false; + } + ret->iteratee = self; + Py_INCREF(self); + return (PyObject *)ret; +} + +static PyObject *obj_clear(PyNtdbObject *self) +{ + enum NTDB_ERROR ret; + PyNtdb_CHECK_CLOSED(self); + ret = ntdb_wipe_all(self->ctx); + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_enable_seqnum(PyNtdbObject *self) +{ + PyNtdb_CHECK_CLOSED(self); + ntdb_add_flag(self->ctx, NTDB_SEQNUM); + Py_RETURN_NONE; +} + +static PyMethodDef ntdb_object_methods[] = { + { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS, + "S.transaction_cancel() -> None\n" + "Cancel the currently active transaction." }, + { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS, + "S.transaction_commit() -> None\n" + "Commit the currently active transaction." }, + { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS, + "S.transaction_prepare_commit() -> None\n" + "Prepare to commit the currently active transaction" }, + { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS, + "S.transaction_start() -> None\n" + "Start a new transaction." }, + { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL }, + { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL }, + { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL }, + { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL }, + { "close", (PyCFunction)obj_close, METH_NOARGS, NULL }, + { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n" + "Fetch a value." }, + { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n" + "Append data to an existing key." }, + { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n" + "Return the first key in this database." }, + { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n" + "Return the next key in this database." }, + { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n" + "Delete an entry." }, + { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n" + "Check whether key exists in this database." }, + { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None" + "Store data." }, + { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" }, + { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" }, + { "iterkeys", (PyCFunction)ntdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" }, + { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n" + "Wipe the entire database." }, + { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS, + "S.enable_seqnum() -> None" }, + { NULL } +}; + +static PyObject *obj_get_flags(PyNtdbObject *self, void *closure) +{ + PyNtdb_CHECK_CLOSED(self); + return PyInt_FromLong(ntdb_get_flags(self->ctx)); +} + +static PyObject *obj_get_filename(PyNtdbObject *self, void *closure) +{ + PyNtdb_CHECK_CLOSED(self); + return PyString_FromString(ntdb_name(self->ctx)); +} + +static PyObject *obj_get_seqnum(PyNtdbObject *self, void *closure) +{ + PyNtdb_CHECK_CLOSED(self); + return PyInt_FromLong(ntdb_get_seqnum(self->ctx)); +} + + +static PyGetSetDef ntdb_object_getsetters[] = { + { cast_const(char *, "flags"), (getter)obj_get_flags, NULL, NULL }, + { cast_const(char *, "filename"), (getter)obj_get_filename, NULL, + cast_const(char *, "The filename of this NTDB file.")}, + { cast_const(char *, "seqnum"), (getter)obj_get_seqnum, NULL, NULL }, + { NULL } +}; + +static PyObject *ntdb_object_repr(PyNtdbObject *self) +{ + if (ntdb_get_flags(self->ctx) & NTDB_INTERNAL) { + return PyString_FromString("Ntdb()"); + } else { + return PyString_FromFormat("Ntdb('%s')", ntdb_name(self->ctx)); + } +} + +static void ntdb_object_dealloc(PyNtdbObject *self) +{ + if (!self->closed) + ntdb_close(self->ctx); + self->ob_type->tp_free(self); +} + +static PyObject *obj_getitem(PyNtdbObject *self, PyObject *key) +{ + NTDB_DATA tkey, val; + enum NTDB_ERROR ret; + + PyNtdb_CHECK_CLOSED(self); + + if (!PyString_Check(key)) { + PyErr_SetString(PyExc_TypeError, "Expected string as key"); + return NULL; + } + + tkey.dptr = (unsigned char *)PyString_AsString(key); + tkey.dsize = PyString_Size(key); + + ret = ntdb_fetch(self->ctx, tkey, &val); + if (ret == NTDB_ERR_NOEXIST) { + PyErr_SetString(PyExc_KeyError, "No such NTDB entry"); + return NULL; + } else { + PyErr_NTDB_ERROR_IS_ERR_RAISE(ret); + return PyString_FromNtdb_Data(val); + } +} + +static int obj_setitem(PyNtdbObject *self, PyObject *key, PyObject *value) +{ + NTDB_DATA tkey, tval; + enum NTDB_ERROR ret; + if (self->closed) { + PyErr_SetObject(PyExc_RuntimeError, + Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed")); + return -1; + } + + if (!PyString_Check(key)) { + PyErr_SetString(PyExc_TypeError, "Expected string as key"); + return -1; + } + + tkey = PyString_AsNtdb_Data(key); + + if (value == NULL) { + ret = ntdb_delete(self->ctx, tkey); + } else { + if (!PyString_Check(value)) { + PyErr_SetString(PyExc_TypeError, "Expected string as value"); + return -1; + } + + tval = PyString_AsNtdb_Data(value); + + ret = ntdb_store(self->ctx, tkey, tval, NTDB_REPLACE); + } + + if (ret != NTDB_SUCCESS) { + PyErr_SetTDBError(ret); + return -1; + } + + return ret; +} + +static PyMappingMethods ntdb_object_mapping = { + .mp_subscript = (binaryfunc)obj_getitem, + .mp_ass_subscript = (objobjargproc)obj_setitem, +}; + +static PyTypeObject PyNtdb = { + .tp_name = "ntdb.Ntdb", + .tp_basicsize = sizeof(PyNtdbObject), + .tp_methods = ntdb_object_methods, + .tp_getset = ntdb_object_getsetters, + .tp_new = py_ntdb_open, + .tp_doc = "A NTDB file", + .tp_repr = (reprfunc)ntdb_object_repr, + .tp_dealloc = (destructor)ntdb_object_dealloc, + .tp_as_mapping = &ntdb_object_mapping, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER, + .tp_iter = (getiterfunc)ntdb_object_iter, +}; + +static PyMethodDef ntdb_methods[] = { + { "open", (PyCFunction)py_ntdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, ntdb_flags=NTDB_DEFAULT, flags=O_RDWR, mode=0600)\n" + "Open a NTDB file." }, + { NULL } +}; + +void initntdb(void); +void initntdb(void) +{ + PyObject *m; + + if (PyType_Ready(&PyNtdb) < 0) + return; + + if (PyType_Ready(&PyNtdbIterator) < 0) + return; + + m = Py_InitModule3("ntdb", ntdb_methods, "NTDB is a simple key-value database similar to GDBM that supports multiple writers."); + if (m == NULL) + return; + + PyModule_AddObject(m, "REPLACE", PyInt_FromLong(NTDB_REPLACE)); + PyModule_AddObject(m, "INSERT", PyInt_FromLong(NTDB_INSERT)); + PyModule_AddObject(m, "MODIFY", PyInt_FromLong(NTDB_MODIFY)); + + PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(NTDB_DEFAULT)); + PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(NTDB_INTERNAL)); + PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(NTDB_NOLOCK)); + PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(NTDB_NOMMAP)); + PyModule_AddObject(m, "CONVERT", PyInt_FromLong(NTDB_CONVERT)); + PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(NTDB_NOSYNC)); + PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(NTDB_SEQNUM)); + PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(NTDB_ALLOW_NESTING)); + + PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText")); + + PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION)); + + Py_INCREF(&PyNtdb); + PyModule_AddObject(m, "Ntdb", (PyObject *)&PyNtdb); + + Py_INCREF(&PyNtdbIterator); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/summary.c b/junkcode/rusty@rustcorp.com.au-ntdb/summary.c new file mode 100644 index 00000000..5a75dc5b --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/summary.c @@ -0,0 +1,321 @@ + /* + Trivial Database 2: human-readable summary code + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include + +#define SUMMARY_FORMAT \ + "Size of file/data: %zu/%zu\n" \ + "Number of records: %zu\n" \ + "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \ + "Smallest/average/largest data: %zu/%zu/%zu\n%s" \ + "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \ + "Number of free records: %zu\n" \ + "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \ + "Number of uncoalesced records: %zu\n" \ + "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ + "Toplevel hash used: %u of %u\n" \ + "Number of hashes: %zu\n" \ + "Smallest/average/largest hash chains: %zu/%zu/%zu\n%s" \ + "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" + +#define BUCKET_SUMMARY_FORMAT_A \ + "Free bucket %zu: total entries %zu.\n" \ + "Smallest/average/largest length: %zu/%zu/%zu\n%s" +#define BUCKET_SUMMARY_FORMAT_B \ + "Free bucket %zu-%zu: total entries %zu.\n" \ + "Smallest/average/largest length: %zu/%zu/%zu\n%s" +#define CAPABILITY_FORMAT \ + "Capability %llu%s\n" + +#define HISTO_WIDTH 70 +#define HISTO_HEIGHT 20 + +static ntdb_off_t count_hash(struct ntdb_context *ntdb, + ntdb_off_t hash_off, + ntdb_off_t num) +{ + const ntdb_off_t *h; + ntdb_off_t i, count = 0; + + h = ntdb_access_read(ntdb, hash_off, sizeof(*h) * num, true); + if (NTDB_PTR_IS_ERR(h)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h)); + } + for (i = 0; i < num; i++) + count += (h[i] != 0); + + ntdb_access_release(ntdb, h); + return count; +} + +static enum NTDB_ERROR summarize(struct ntdb_context *ntdb, + struct tally *ftables, + struct tally *fr, + struct tally *keys, + struct tally *data, + struct tally *extra, + struct tally *uncoal, + struct tally *hashes, + size_t *num_caps) +{ + ntdb_off_t off; + ntdb_len_t len; + ntdb_len_t unc = 0; + + for (off = sizeof(struct ntdb_header); + off < ntdb->file->map_size; + off += len) { + const union { + struct ntdb_used_record u; + struct ntdb_free_record f; + struct ntdb_recovery_record r; + } *p; + /* We might not be able to get the whole thing. */ + p = ntdb_access_read(ntdb, off, sizeof(p->f), true); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_PTR_ERR(p); + } + if (frec_magic(&p->f) != NTDB_FREE_MAGIC) { + if (unc > 1) { + tally_add(uncoal, unc); + unc = 0; + } + } + + if (p->r.magic == NTDB_RECOVERY_INVALID_MAGIC + || p->r.magic == NTDB_RECOVERY_MAGIC) { + len = sizeof(p->r) + p->r.max_len; + } else if (frec_magic(&p->f) == NTDB_FREE_MAGIC) { + len = frec_len(&p->f); + tally_add(fr, len); + len += sizeof(p->u); + unc++; + } else if (rec_magic(&p->u) == NTDB_USED_MAGIC) { + len = sizeof(p->u) + + rec_key_length(&p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + + tally_add(keys, rec_key_length(&p->u)); + tally_add(data, rec_data_length(&p->u)); + tally_add(extra, rec_extra_padding(&p->u)); + } else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) { + ntdb_off_t count = count_hash(ntdb, + off + sizeof(p->u), + 1 << ntdb->hash_bits); + if (NTDB_OFF_IS_ERR(count)) { + return NTDB_OFF_TO_ERR(count); + } + tally_add(hashes, count); + tally_add(extra, rec_extra_padding(&p->u)); + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + } else if (rec_magic(&p->u) == NTDB_FTABLE_MAGIC) { + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + tally_add(ftables, rec_data_length(&p->u)); + tally_add(extra, rec_extra_padding(&p->u)); + } else if (rec_magic(&p->u) == NTDB_CHAIN_MAGIC) { + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + tally_add(hashes, + rec_data_length(&p->u)/sizeof(ntdb_off_t)); + tally_add(extra, rec_extra_padding(&p->u)); + } else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) { + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + (*num_caps)++; + } else { + len = dead_space(ntdb, off); + if (NTDB_OFF_IS_ERR(len)) { + return NTDB_OFF_TO_ERR(len); + } + } + ntdb_access_release(ntdb, p); + } + if (unc) + tally_add(uncoal, unc); + return NTDB_SUCCESS; +} + +static void add_capabilities(struct ntdb_context *ntdb, char *summary) +{ + ntdb_off_t off, next; + const struct ntdb_capability *cap; + size_t count = 0; + + /* Append to summary. */ + summary += strlen(summary); + + off = ntdb_read_off(ntdb, offsetof(struct ntdb_header, capabilities)); + if (NTDB_OFF_IS_ERR(off)) + return; + + /* Walk capability list. */ + for (; off; off = next) { + cap = ntdb_access_read(ntdb, off, sizeof(*cap), true); + if (NTDB_PTR_IS_ERR(cap)) { + break; + } + count++; + sprintf(summary, CAPABILITY_FORMAT, + cap->type & NTDB_CAP_TYPE_MASK, + /* Noopen? How did we get here? */ + (cap->type & NTDB_CAP_NOOPEN) ? " (unopenable)" + : ((cap->type & NTDB_CAP_NOWRITE) + && (cap->type & NTDB_CAP_NOCHECK)) ? " (uncheckable,read-only)" + : (cap->type & NTDB_CAP_NOWRITE) ? " (read-only)" + : (cap->type & NTDB_CAP_NOCHECK) ? " (uncheckable)" + : ""); + summary += strlen(summary); + next = cap->next; + ntdb_access_release(ntdb, cap); + } +} + +_PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, + enum ntdb_summary_flags flags, + char **summary) +{ + ntdb_len_t len; + size_t num_caps = 0; + struct tally *ftables, *freet, *keys, *data, *extra, *uncoal, *hashes; + char *freeg, *keysg, *datag, *extrag, *uncoalg, *hashesg; + enum NTDB_ERROR ecode; + + freeg = keysg = datag = extrag = uncoalg = hashesg = NULL; + + ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + ecode = ntdb_lock_expand(ntdb, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + ntdb_allrecord_unlock(ntdb, F_RDLCK); + return ecode; + } + + /* Start stats off empty. */ + ftables = tally_new(HISTO_HEIGHT); + freet = tally_new(HISTO_HEIGHT); + keys = tally_new(HISTO_HEIGHT); + data = tally_new(HISTO_HEIGHT); + extra = tally_new(HISTO_HEIGHT); + uncoal = tally_new(HISTO_HEIGHT); + hashes = tally_new(HISTO_HEIGHT); + if (!ftables || !freet || !keys || !data || !extra + || !uncoal || !hashes) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_summary: failed to allocate" + " tally structures"); + goto unlock; + } + + ecode = summarize(ntdb, ftables, freet, keys, data, extra, + uncoal, hashes, &num_caps); + if (ecode != NTDB_SUCCESS) { + goto unlock; + } + + if (flags & NTDB_SUMMARY_HISTOGRAMS) { + freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT); + keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT); + datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT); + extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT); + uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT); + hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); + } + + /* 20 is max length of a %llu. */ + len = strlen(SUMMARY_FORMAT) + 33*20 + 1 + + (freeg ? strlen(freeg) : 0) + + (keysg ? strlen(keysg) : 0) + + (datag ? strlen(datag) : 0) + + (extrag ? strlen(extrag) : 0) + + (uncoalg ? strlen(uncoalg) : 0) + + (hashesg ? strlen(hashesg) : 0) + + num_caps * (strlen(CAPABILITY_FORMAT) + 20 + + strlen(" (uncheckable,read-only)")); + + *summary = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data); + if (!*summary) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_summary: failed to allocate string"); + goto unlock; + } + + sprintf(*summary, SUMMARY_FORMAT, + (size_t)ntdb->file->map_size, + tally_total(keys, NULL) + tally_total(data, NULL), + tally_num(keys), + tally_min(keys), tally_mean(keys), tally_max(keys), + keysg ? keysg : "", + tally_min(data), tally_mean(data), tally_max(data), + datag ? datag : "", + tally_min(extra), tally_mean(extra), tally_max(extra), + extrag ? extrag : "", + tally_num(freet), + tally_min(freet), tally_mean(freet), tally_max(freet), + freeg ? freeg : "", + tally_total(uncoal, NULL), + tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal), + uncoalg ? uncoalg : "", + (unsigned)count_hash(ntdb, sizeof(struct ntdb_header), + 1 << ntdb->hash_bits), + 1 << ntdb->hash_bits, + tally_num(hashes), + tally_min(hashes), tally_mean(hashes), tally_max(hashes), + hashesg ? hashesg : "", + tally_total(keys, NULL) * 100.0 / ntdb->file->map_size, + tally_total(data, NULL) * 100.0 / ntdb->file->map_size, + tally_total(extra, NULL) * 100.0 / ntdb->file->map_size, + tally_total(freet, NULL) * 100.0 / ntdb->file->map_size, + (tally_num(keys) + tally_num(freet) + tally_num(hashes)) + * sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size, + tally_num(ftables) * sizeof(struct ntdb_freetable) + * 100.0 / ntdb->file->map_size, + (tally_total(hashes, NULL) * sizeof(ntdb_off_t) + + (sizeof(ntdb_off_t) << ntdb->hash_bits)) + * 100.0 / ntdb->file->map_size); + + add_capabilities(ntdb, *summary); + +unlock: + ntdb->free_fn(freeg, ntdb->alloc_data); + ntdb->free_fn(keysg, ntdb->alloc_data); + ntdb->free_fn(datag, ntdb->alloc_data); + ntdb->free_fn(extrag, ntdb->alloc_data); + ntdb->free_fn(uncoalg, ntdb->alloc_data); + ntdb->free_fn(hashesg, ntdb->alloc_data); + ntdb->free_fn(freet, ntdb->alloc_data); + ntdb->free_fn(keys, ntdb->alloc_data); + ntdb->free_fn(data, ntdb->alloc_data); + ntdb->free_fn(extra, ntdb->alloc_data); + ntdb->free_fn(uncoal, ntdb->alloc_data); + ntdb->free_fn(ftables, ntdb->alloc_data); + ntdb->free_fn(hashes, ntdb->alloc_data); + + ntdb_allrecord_unlock(ntdb, F_RDLCK); + ntdb_unlock_expand(ntdb, F_RDLCK); + return ecode; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-12-store.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-12-store.c new file mode 100644 index 00000000..f5b3b72a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-12-store.c @@ -0,0 +1,55 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "helpapi-external-agent.h" + +/* We use the same seed which we saw a failure on. */ +static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + *(uint64_t *)p); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + uint64_t seed = 16014841315512641303ULL; + union ntdb_attribute fixed_hattr + = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = fixedhash, + .data = &seed } }; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; + NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; + + fixed_hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-12-store.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr); + ok1(ntdb); + if (!ntdb) + continue; + + /* We seemed to lose some keys. + * Insert and check they're in there! */ + for (j = 0; j < 500; j++) { + NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(d, data)); + free(d.dptr); + } + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-13-delete.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-13-delete.c new file mode 100644 index 00000000..44820ff1 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-13-delete.c @@ -0,0 +1,202 @@ +#include "../private.h" // For NTDB_TOPLEVEL_HASH_BITS +#include +#include "../ntdb.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +/* We rig the hash so adjacent-numbered records always clash. */ +static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv) +{ + return *((const unsigned int *)key) / 2; +} + +/* We use the same seed which we saw a failure on. */ +static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + *(uint64_t *)p); +} + +static bool store_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA d, data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + ntdb_fetch(ntdb, key, &d); + if (!ntdb_deq(d, data)) + return false; + free(d.dptr); + } + return true; +} + +static void test_val(struct ntdb_context *ntdb, uint64_t val) +{ + uint64_t v; + NTDB_DATA key = { (unsigned char *)&v, sizeof(v) }; + NTDB_DATA d, data = { (unsigned char *)&v, sizeof(v) }; + + /* Insert an entry, then delete it. */ + v = val; + /* Delete should fail. */ + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Insert should succeed. */ + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Delete should succeed. */ + ok1(ntdb_delete(ntdb, key) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Re-add it, then add collision. */ + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + v = val + 1; + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Can find both? */ + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* Delete second one. */ + v = val + 1; + ok1(ntdb_delete(ntdb, key) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Re-add */ + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Now, try deleting first one. */ + v = val; + ok1(ntdb_delete(ntdb, key) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Can still find second? */ + v = val + 1; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* Now, this will be ideally placed. */ + v = val + 2; + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* This will collide with both. */ + v = val; + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + + /* We can still find them all, right? */ + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val + 1; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val + 2; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* And if we delete val + 1, that val + 2 should not move! */ + v = val + 1; + ok1(ntdb_delete(ntdb, key) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + v = val; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val + 2; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* Delete those two, so we are empty. */ + ok1(ntdb_delete(ntdb, key) == 0); + v = val; + ok1(ntdb_delete(ntdb, key) == 0); + + ok1(ntdb_check(ntdb, NULL, NULL) == 0); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + uint64_t seed = 16014841315512641303ULL; + union ntdb_attribute clash_hattr + = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = clash } }; + union ntdb_attribute fixed_hattr + = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = fixedhash, + .data = &seed } }; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + /* These two values gave trouble before. */ + int vals[] = { 755, 837 }; + + clash_hattr.base.next = &tap_log_attr; + fixed_hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr); + ok1(ntdb); + if (!ntdb) + continue; + + /* Check start of hash table. */ + test_val(ntdb, 0); + + /* Check end of hash table. */ + test_val(ntdb, -1ULL); + + /* Check mixed bitpattern. */ + test_val(ntdb, 0x123456789ABCDEF0ULL); + + ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0)); + ntdb_close(ntdb); + + /* Deleting these entries in the db gave problems. */ + ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(store_records(ntdb)); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) { + NTDB_DATA key; + + key.dptr = (unsigned char *)&vals[j]; + key.dsize = sizeof(vals[j]); + ok1(ntdb_delete(ntdb, key) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + } + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-14-exists.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-14-exists.c new file mode 100644 index 00000000..37f40b00 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-14-exists.c @@ -0,0 +1,53 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +static bool test_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (ntdb_exists(ntdb, key)) + return false; + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + if (!ntdb_exists(ntdb, key)) + return false; + } + + for (i = 0; i < 1000; i++) { + if (!ntdb_exists(ntdb, key)) + return false; + if (ntdb_delete(ntdb, key) != 0) + return false; + if (ntdb_exists(ntdb, key)) + return false; + } + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-14-exists.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (ok1(ntdb)) + ok1(test_records(ntdb)); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-16-wipe_all.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-16-wipe_all.c new file mode 100644 index 00000000..fb70523f --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-16-wipe_all.c @@ -0,0 +1,45 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +static bool add_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + } + return true; +} + + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-16-wipe_all.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (ok1(ntdb)) { + NTDB_DATA key; + ok1(add_records(ntdb)); + ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS); + ok1(ntdb_firstkey(ntdb, &key) == NTDB_ERR_NOEXIST); + ntdb_close(ntdb); + } + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-20-alloc-attr.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-20-alloc-attr.c new file mode 100644 index 00000000..868764fa --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-20-alloc-attr.c @@ -0,0 +1,107 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include +#include + +#include "logging.h" +#include "helpapi-external-agent.h" + +static const struct ntdb_context *curr_ntdb; +static const struct ntdb_file *curr_file; + +static int owner_null_count, + owner_weird_count, alloc_count, free_count, expand_count; + +static void *test_alloc(const void *owner, size_t len, void *priv_data) +{ + void *ret; + + if (!owner) { + owner_null_count++; + } else if (owner != curr_ntdb && owner != curr_file) { + owner_weird_count++; + } + + alloc_count++; + ret = malloc(len); + + /* The first time, this is the current ntdb, next is + * for the file struct. */ + if (!owner) { + if (!curr_ntdb) { + curr_ntdb = ret; + } else if (!curr_file) { + curr_file = ret; + } + } + assert(priv_data == &owner_weird_count); + return ret; +} + +static void *test_expand(void *old, size_t newlen, void *priv_data) +{ + expand_count++; + + assert(priv_data == &owner_weird_count); + return realloc(old, newlen); +} + +static void test_free(void *old, void *priv_data) +{ + assert(priv_data == &owner_weird_count); + if (old) { + free_count++; + } + free(old); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + union ntdb_attribute alloc_attr; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; + NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; + + alloc_attr.base.next = &tap_log_attr; + alloc_attr.base.attr = NTDB_ATTRIBUTE_ALLOCATOR; + + alloc_attr.alloc.alloc = test_alloc; + alloc_attr.alloc.expand = test_expand; + alloc_attr.alloc.free = test_free; + alloc_attr.alloc.priv_data = &owner_weird_count; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 700 * 3 + 4) + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + curr_ntdb = NULL; + curr_file = NULL; + ntdb = ntdb_open("run-20-alloc-attr.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &alloc_attr); + ok1(ntdb); + if (!ntdb) + continue; + + for (j = 0; j < 700; j++) { + NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(d, data)); + test_free(d.dptr, &owner_weird_count); + } + ntdb_close(ntdb); + + ok1(owner_null_count == 2+i*2); + ok1(owner_weird_count == 0); + ok1(alloc_count == free_count); + ok1(expand_count != 0); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-21-parse_record.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-21-parse_record.c new file mode 100644 index 00000000..a841a1a7 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-21-parse_record.c @@ -0,0 +1,67 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, NTDB_DATA *expected) +{ + if (!ntdb_deq(data, *expected)) + return NTDB_ERR_EINVAL; + return NTDB_SUCCESS; +} + +static enum NTDB_ERROR parse_err(NTDB_DATA key, NTDB_DATA data, void *unused) +{ + return 100; +} + +static bool test_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + } + + for (i = 0; i < 1000; i++) { + if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_SUCCESS) + return false; + } + + if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_ERR_NOEXIST) + return false; + + /* Test error return from parse function. */ + i = 0; + if (ntdb_parse_record(ntdb, key, parse_err, NULL) != 100) + return false; + + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-21-parse_record.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (ok1(ntdb)) + ok1(test_records(ntdb)); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-55-transaction.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-55-transaction.c new file mode 100644 index 00000000..21dd1c46 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-55-transaction.c @@ -0,0 +1,72 @@ +#include "../private.h" // struct ntdb_context +#include "../ntdb.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + unsigned char *buffer; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data; + + buffer = malloc(1000); + for (i = 0; i < 1000; i++) + buffer[i] = i; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-55-transaction.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(ntdb_transaction_start(ntdb) == 0); + data.dptr = buffer; + data.dsize = 1000; + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); + ok1(data.dsize == 1000); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + + /* Cancelling a transaction means no store */ + ntdb_transaction_cancel(ntdb); + ok1(ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_ERR_NOEXIST); + + /* Commit the transaction. */ + ok1(ntdb_transaction_start(ntdb) == 0); + data.dptr = buffer; + data.dsize = 1000; + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); + ok1(data.dsize == 1000); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + ok1(ntdb_transaction_commit(ntdb) == 0); + ok1(ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); + ok1(data.dsize == 1000); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + free(buffer); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-60-noop-transaction.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-60-noop-transaction.c new file mode 100644 index 00000000..53a4ade6 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-60-noop-transaction.c @@ -0,0 +1,56 @@ +#include "../private.h" // struct ntdb_context +#include "../ntdb.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4), d; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 12 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-60-transaction.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + + ok1(ntdb_transaction_start(ntdb) == 0); + /* Do an identical replace. */ + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + ok1(ntdb_transaction_commit(ntdb) == 0); + + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(data, d)); + free(d.dptr); + ntdb_close(ntdb); + + /* Reopen, fetch. */ + ntdb = ntdb_open("api-60-transaction.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(data, d)); + free(d.dptr); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-80-tdb_fd.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-80-tdb_fd.c new file mode 100644 index 00000000..0d37754e --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-80-tdb_fd.c @@ -0,0 +1,31 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 3); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-80-ntdb_fd.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + continue; + + if (flags[i] & NTDB_INTERNAL) + ok1(ntdb_fd(ntdb) == -1); + else + ok1(ntdb_fd(ntdb) > 2); + ntdb_close(ntdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-81-seqnum.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-81-seqnum.c new file mode 100644 index 00000000..33ced82c --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-81-seqnum.c @@ -0,0 +1,69 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, seq; + struct ntdb_context *ntdb; + NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-81-seqnum.ntdb", + flags[i]|NTDB_SEQNUM|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + continue; + + seq = 0; + ok1(ntdb_get_seqnum(ntdb) == seq); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + /* Fetch doesn't change seqnum */ + if (ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS)) + free(d.dptr); + ok1(ntdb_get_seqnum(ntdb) == seq); + ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + + ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + /* Empty append works */ + ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + + ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + + if (!(flags[i] & NTDB_INTERNAL)) { + ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == ++seq); + ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS); + ok1(ntdb_get_seqnum(ntdb) == seq); + + ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_get_seqnum(ntdb) == seq + 1); + ntdb_transaction_cancel(ntdb); + ok1(ntdb_get_seqnum(ntdb) == seq); + } + ntdb_close(ntdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-82-lockattr.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-82-lockattr.c new file mode 100644 index 00000000..3b7ba8c8 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-82-lockattr.c @@ -0,0 +1,238 @@ +#include "../private.h" // for ntdb_fcntl_unlock +#include "../ntdb.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "helpapi-external-agent.h" + +static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *_err) +{ + int *lock_err = _err; + struct flock fl; + int ret; + + if (*lock_err) { + errno = *lock_err; + return -1; + } + + do { + fl.l_type = rw; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + if (waitflag) + ret = fcntl(fd, F_SETLKW, &fl); + else + ret = fcntl(fd, F_SETLK, &fl); + } while (ret != 0 && errno == EINTR); + + return ret; +} + +static int trav_err; +static int trav(struct ntdb_context *ntdb, NTDB_DATA k, NTDB_DATA d, int *terr) +{ + *terr = trav_err; + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + union ntdb_attribute lock_attr; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + int lock_err; + + lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK; + lock_attr.base.next = &tap_log_attr; + lock_attr.flock.lock = mylock; + lock_attr.flock.unlock = ntdb_fcntl_unlock; + lock_attr.flock.data = &lock_err; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 81); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + NTDB_DATA d; + + /* Nonblocking open; expect no error message. */ + lock_err = EAGAIN; + ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + ok(errno == lock_err, "Errno is %u", errno); + ok1(!ntdb); + ok1(tap_log_messages == 0); + + lock_err = EINTR; + ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + ok(errno == lock_err, "Errno is %u", errno); + ok1(!ntdb); + ok1(tap_log_messages == 0); + + /* Forced fail open. */ + lock_err = ENOMEM; + ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + ok1(errno == lock_err); + ok1(!ntdb); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = 0; + ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + if (!ok1(ntdb)) + continue; + ok1(tap_log_messages == 0); + + /* Nonblocking store. */ + lock_err = EAGAIN; + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking fetch. */ + lock_err = EAGAIN; + ok1(!ntdb_exists(ntdb, key)); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(!ntdb_exists(ntdb, key)); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(!ntdb_exists(ntdb, key)); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking delete. */ + lock_err = EAGAIN; + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking locks. */ + lock_err = EAGAIN; + ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK); + /* This actually does divide and conquer. */ + ok1(tap_log_messages > 0); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages > 0); + tap_log_messages = 0; + + /* Nonblocking traverse; go nonblock partway through. */ + lock_err = 0; + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + /* Need two entries to ensure two lock attempts! */ + ok1(ntdb_store(ntdb, ntdb_mkdata("key2", 4), data, + NTDB_REPLACE) == 0); + trav_err = EAGAIN; + ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + trav_err = EINTR; + lock_err = 0; + ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + trav_err = ENOMEM; + lock_err = 0; + ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking transactions. */ + lock_err = EAGAIN; + ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking transaction prepare. */ + lock_err = 0; + ok1(ntdb_transaction_start(ntdb) == 0); + ok1(ntdb_delete(ntdb, key) == 0); + + lock_err = EAGAIN; + ok1(ntdb_transaction_prepare_commit(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + + lock_err = 0; + ok1(ntdb_transaction_prepare_commit(ntdb) == 0); + ok1(ntdb_transaction_commit(ntdb) == 0); + + /* And the transaction was committed, right? */ + ok1(!ntdb_exists(ntdb, key)); + ntdb_close(ntdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-83-openhook.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-83-openhook.c new file mode 100644 index 00000000..cdd015a5 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-83-openhook.c @@ -0,0 +1,104 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "external-agent.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +#define KEY_STR "key" + +static enum NTDB_ERROR clear_if_first(int fd, void *arg) +{ +/* We hold a lock offset 4 always, so we can tell if anyone is holding it. + * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */ + struct flock fl; + + if (arg != clear_if_first) + return NTDB_ERR_CORRUPT; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 4; + fl.l_len = 1; + + if (fcntl(fd, F_SETLK, &fl) == 0) { + /* We must be first ones to open it! */ + diag("truncating file!"); + if (ftruncate(fd, 0) != 0) { + return NTDB_ERR_IO; + } + } + fl.l_type = F_RDLCK; + if (fcntl(fd, F_SETLKW, &fl) != 0) { + return NTDB_ERR_IO; + } + return NTDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb, *ntdb2; + struct agent *agent; + union ntdb_attribute cif; + NTDB_DATA key = ntdb_mkdata(KEY_STR, strlen(KEY_STR)); + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + + cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK; + cif.openhook.base.next = &tap_log_attr; + cif.openhook.fn = clear_if_first; + cif.openhook.data = clear_if_first; + + agent = prepare_external_agent(); + plan_tests(sizeof(flags) / sizeof(flags[0]) * 16); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + /* Create it */ + ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, NULL); + ok1(ntdb); + ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0); + ntdb_close(ntdb); + + /* Now, open with CIF, should clear it. */ + ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR, 0, &cif); + ok1(ntdb); + ok1(!ntdb_exists(ntdb, key)); + ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0); + + /* Agent should not clear it, since it's still open. */ + ok1(external_agent_operation(agent, OPEN_WITH_HOOK, + "run-83-openhook.ntdb") == SUCCESS); + ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) + == SUCCESS); + ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS); + + /* Still exists for us too. */ + ok1(ntdb_exists(ntdb, key)); + + /* Nested open should not erase db. */ + ntdb2 = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR, 0, &cif); + ok1(ntdb_exists(ntdb2, key)); + ok1(ntdb_exists(ntdb, key)); + ntdb_close(ntdb2); + + ok1(ntdb_exists(ntdb, key)); + + /* Close it, now agent should clear it. */ + ntdb_close(ntdb); + + ok1(external_agent_operation(agent, OPEN_WITH_HOOK, + "run-83-openhook.ntdb") == SUCCESS); + ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) + == FAILED); + ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS); + + ok1(tap_log_messages == 0); + } + + free_external_agent(agent); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-91-get-stats.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-91-get-stats.c new file mode 100644 index 00000000..120b62ed --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-91-get-stats.c @@ -0,0 +1,58 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 11); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + union ntdb_attribute *attr; + NTDB_DATA key = ntdb_mkdata("key", 3), data; + + ntdb = ntdb_open("run-91-get-stats.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + /* Force an expansion */ + data.dsize = 65536; + data.dptr = calloc(data.dsize, 1); + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + free(data.dptr); + + /* Use malloc so valgrind will catch overruns. */ + attr = malloc(sizeof *attr); + attr->stats.base.attr = NTDB_ATTRIBUTE_STATS; + attr->stats.size = sizeof(*attr); + + ok1(ntdb_get_attribute(ntdb, attr) == 0); + ok1(attr->stats.size == sizeof(*attr)); + ok1(attr->stats.allocs > 0); + ok1(attr->stats.expands > 0); + ok1(attr->stats.locks > 0); + free(attr); + + /* Try short one. */ + attr = malloc(offsetof(struct ntdb_attribute_stats, allocs) + + sizeof(attr->stats.allocs)); + attr->stats.base.attr = NTDB_ATTRIBUTE_STATS; + attr->stats.size = offsetof(struct ntdb_attribute_stats, allocs) + + sizeof(attr->stats.allocs); + ok1(ntdb_get_attribute(ntdb, attr) == 0); + ok1(attr->stats.size == sizeof(*attr)); + ok1(attr->stats.allocs > 0); + free(attr); + ok1(tap_log_messages == 0); + + ntdb_close(ntdb); + + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-92-get-set-readonly.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-92-get-set-readonly.c new file mode 100644 index 00000000..dda5acb9 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-92-get-set-readonly.c @@ -0,0 +1,106 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 48); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + /* RW -> R0 */ + ntdb = ntdb_open("run-92-get-set-readonly.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY)); + + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); + + ntdb_add_flag(ntdb, NTDB_RDONLY); + ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY); + + /* Can't store, append, delete. */ + ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 1); + ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 2); + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 3); + + /* Can't start a transaction, or any write lock. */ + ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 4); + ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 5); + ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 6); + ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 7); + + /* Back to RW. */ + ntdb_remove_flag(ntdb, NTDB_RDONLY); + ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY)); + + ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_SUCCESS); + ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS); + ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); + + ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); + ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS); + + ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS); + ntdb_chainunlock(ntdb, key); + ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); + ntdb_unlockall(ntdb); + ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS); + ok1(tap_log_messages == 7); + + ntdb_close(ntdb); + + /* R0 -> RW */ + ntdb = ntdb_open("run-92-get-set-readonly.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDONLY, 0600, &tap_log_attr); + ok1(ntdb); + ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY); + + /* Can't store, append, delete. */ + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 8); + ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 9); + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 10); + + /* Can't start a transaction, or any write lock. */ + ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 11); + ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 12); + ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 13); + ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY); + ok1(tap_log_messages == 14); + + /* Can't remove NTDB_RDONLY since we opened with O_RDONLY */ + ntdb_remove_flag(ntdb, NTDB_RDONLY); + ok1(tap_log_messages == 15); + ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY); + ntdb_close(ntdb); + + ok1(tap_log_messages == 15); + tap_log_messages = 0; + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-93-repack.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-93-repack.c new file mode 100644 index 00000000..437c0f85 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-93-repack.c @@ -0,0 +1,80 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +#define NUM_TESTS 1000 + +static bool store_all(struct ntdb_context *ntdb) +{ + unsigned int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA dbuf = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_TESTS; i++) { + if (ntdb_store(ntdb, key, dbuf, NTDB_INSERT) != NTDB_SUCCESS) + return false; + } + return true; +} + +static int mark_entry(struct ntdb_context *ntdb, + NTDB_DATA key, NTDB_DATA data, bool found[]) +{ + unsigned int num; + + if (key.dsize != sizeof(num)) + return -1; + memcpy(&num, key.dptr, key.dsize); + if (num >= NUM_TESTS) + return -1; + if (found[num]) + return -1; + found[num] = true; + return 0; +} + +static bool is_all_set(bool found[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + if (!found[i]) + return false; + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + bool found[NUM_TESTS]; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT + }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 6 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-93-repack.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + break; + + ok1(store_all(ntdb)); + + ok1(ntdb_repack(ntdb) == NTDB_SUCCESS); + memset(found, 0, sizeof(found)); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + ok1(ntdb_traverse(ntdb, mark_entry, found) == NUM_TESTS); + ok1(is_all_set(found, NUM_TESTS)); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-94-expand-during-parse.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-94-expand-during-parse.c new file mode 100644 index 00000000..39b19988 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-94-expand-during-parse.c @@ -0,0 +1,87 @@ +/* We use direct access to hand to the parse function: what if db expands? */ +#include "config.h" +#include "../ntdb.h" +#include "tap-interface.h" +#include "logging.h" +#include "../private.h" /* To establish size, esp. for NTDB_INTERNAL dbs */ +#include "helpapi-external-agent.h" + +static struct ntdb_context *ntdb; + +static off_t ntdb_size(void) +{ + return ntdb->file->map_size; +} + +struct parse_info { + unsigned int depth; + NTDB_DATA expected; +}; + +static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, + struct parse_info *pinfo) +{ + off_t flen; + unsigned int i; + + if (!ntdb_deq(data, pinfo->expected)) + return NTDB_ERR_EINVAL; + + flen = ntdb_size(); + + for (i = 0; ntdb_size() == flen; i++) { + NTDB_DATA add = ntdb_mkdata(&i, sizeof(i)); + + /* This is technically illegal parse(), which is why we + * grabbed allrecord lock.*/ + ntdb_store(ntdb, add, add, NTDB_INSERT); + } + + /* Access the record again. */ + if (!ntdb_deq(data, pinfo->expected)) + return NTDB_ERR_EINVAL; + + /* Recurse! Woot! */ + if (pinfo->depth != 0) { + enum NTDB_ERROR ecode; + + pinfo->depth--; + ecode = ntdb_parse_record(ntdb, key, parse, pinfo); + if (ecode) { + return ecode; + } + } + + /* Access the record one more time. */ + if (!ntdb_deq(data, pinfo->expected)) + return NTDB_ERR_EINVAL; + + return NTDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + struct parse_info pinfo; + NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-94-expand-during-parse.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); + ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); + pinfo.expected = data; + pinfo.depth = 3; + ok1(ntdb_parse_record(ntdb, key, parse, &pinfo) == NTDB_SUCCESS); + ntdb_unlockall(ntdb); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-95-read-only-during-parse.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-95-read-only-during-parse.c new file mode 100644 index 00000000..0b0eb69f --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-95-read-only-during-parse.c @@ -0,0 +1,93 @@ +/* Make sure write operations fail during ntdb_parse(). */ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +static struct ntdb_context *ntdb; + +/* We could get either of these. */ +static bool xfail(enum NTDB_ERROR ecode) +{ + return ecode == NTDB_ERR_RDONLY || ecode == NTDB_ERR_LOCK; +} + +static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, + NTDB_DATA *expected) +{ + NTDB_DATA add = ntdb_mkdata("another", strlen("another")); + + if (!ntdb_deq(data, *expected)) { + return NTDB_ERR_EINVAL; + } + + /* These should all fail.*/ + if (!xfail(ntdb_store(ntdb, add, add, NTDB_INSERT))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_append(ntdb, key, add))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_delete(ntdb, key))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_transaction_start(ntdb))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_chainlock(ntdb, key))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_lockall(ntdb))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_wipe_all(ntdb))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + if (!xfail(ntdb_repack(ntdb))) { + return NTDB_ERR_EINVAL; + } + tap_log_messages--; + + /* Access the record one more time. */ + if (!ntdb_deq(data, *expected)) { + return NTDB_ERR_EINVAL; + } + + return NTDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-95-read-only-during-parse.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS); + ok1(ntdb_parse_record(ntdb, key, parse, &data) == NTDB_SUCCESS); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-add-remove-flags.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-add-remove-flags.c new file mode 100644 index 00000000..a09046e2 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-add-remove-flags.c @@ -0,0 +1,88 @@ +#include "../private.h" // for ntdb_context +#include "../ntdb.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(87); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-add-remove-flags.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(ntdb_get_flags(ntdb) == ntdb->flags); + tap_log_messages = 0; + ntdb_add_flag(ntdb, NTDB_NOLOCK); + if (flags[i] & NTDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(ntdb_get_flags(ntdb) & NTDB_NOLOCK); + } + + tap_log_messages = 0; + ntdb_add_flag(ntdb, NTDB_NOMMAP); + if (flags[i] & NTDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(ntdb_get_flags(ntdb) & NTDB_NOMMAP); + ok1(ntdb->file->map_ptr == NULL); + } + + tap_log_messages = 0; + ntdb_add_flag(ntdb, NTDB_NOSYNC); + if (flags[i] & NTDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(ntdb_get_flags(ntdb) & NTDB_NOSYNC); + } + + ok1(ntdb_get_flags(ntdb) == ntdb->flags); + + tap_log_messages = 0; + ntdb_remove_flag(ntdb, NTDB_NOLOCK); + if (flags[i] & NTDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(!(ntdb_get_flags(ntdb) & NTDB_NOLOCK)); + } + + tap_log_messages = 0; + ntdb_remove_flag(ntdb, NTDB_NOMMAP); + if (flags[i] & NTDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(!(ntdb_get_flags(ntdb) & NTDB_NOMMAP)); + ok1(ntdb->file->map_ptr != NULL); + } + + tap_log_messages = 0; + ntdb_remove_flag(ntdb, NTDB_NOSYNC); + if (flags[i] & NTDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(!(ntdb_get_flags(ntdb) & NTDB_NOSYNC)); + } + + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-check-callback.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-check-callback.c new file mode 100644 index 00000000..eaf60d48 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-check-callback.c @@ -0,0 +1,87 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +#define NUM_RECORDS 1000 + +static bool store_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + return true; +} + +static enum NTDB_ERROR check(NTDB_DATA key, + NTDB_DATA data, + bool *array) +{ + int val; + + if (key.dsize != sizeof(val)) { + diag("Wrong key size: %zu\n", key.dsize); + return NTDB_ERR_CORRUPT; + } + + if (key.dsize != data.dsize + || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) { + diag("Key and data differ\n"); + return NTDB_ERR_CORRUPT; + } + + memcpy(&val, key.dptr, sizeof(val)); + if (val >= NUM_RECORDS || val < 0) { + diag("check value %i\n", val); + return NTDB_ERR_CORRUPT; + } + + if (array[val]) { + diag("Value %i already seen\n", val); + return NTDB_ERR_CORRUPT; + } + + array[val] = true; + return NTDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + return 0; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + bool array[NUM_RECORDS]; + + ntdb = ntdb_open("run-check-callback.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(store_records(ntdb)); + for (j = 0; j < NUM_RECORDS; j++) + array[j] = false; + ok1(ntdb_check(ntdb, check, array) == NTDB_SUCCESS); + for (j = 0; j < NUM_RECORDS; j++) + if (!array[j]) + break; + ok1(j == NUM_RECORDS); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-firstkey-nextkey.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-firstkey-nextkey.c new file mode 100644 index 00000000..6d9ad67a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-firstkey-nextkey.c @@ -0,0 +1,158 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +#define NUM_RECORDS 1000 + +static bool store_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + return true; +} + +struct trav_data { + unsigned int records[NUM_RECORDS]; + unsigned int calls; +}; + +static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p) +{ + struct trav_data *td = p; + int val; + + memcpy(&val, dbuf.dptr, dbuf.dsize); + td->records[td->calls++] = val; + return 0; +} + +/* Since ntdb_nextkey frees dptr, we need to clone it. */ +static NTDB_DATA dup_key(NTDB_DATA key) +{ + void *p = malloc(key.dsize); + memcpy(p, key.dptr, key.dsize); + key.dptr = p; + return key; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + int num; + struct trav_data td; + NTDB_DATA k; + struct ntdb_context *ntdb; + union ntdb_attribute seed_attr; + enum NTDB_ERROR ecode; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; + seed_attr.base.next = &tap_log_attr; + seed_attr.seed.seed = 6334326220117065685ULL; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("api-firstkey-nextkey.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, + &seed_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(ntdb_firstkey(ntdb, &k) == NTDB_ERR_NOEXIST); + + /* One entry... */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 0; + ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0); + ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS); + ok1(k.dsize == sizeof(num)); + ok1(memcmp(k.dptr, &num, sizeof(num)) == 0); + ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST); + + /* Two entries. */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 1; + ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0); + ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS); + ok1(k.dsize == sizeof(num)); + memcpy(&num, k.dptr, sizeof(num)); + ok1(num == 0 || num == 1); + ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS); + ok1(k.dsize == sizeof(j)); + memcpy(&j, k.dptr, sizeof(j)); + ok1(j == 0 || j == 1); + ok1(j != num); + ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST); + + /* Clean up. */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 0; + ok1(ntdb_delete(ntdb, k) == 0); + num = 1; + ok1(ntdb_delete(ntdb, k) == 0); + + /* Now lots of records. */ + ok1(store_records(ntdb)); + td.calls = 0; + + num = ntdb_traverse(ntdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(td.calls == NUM_RECORDS); + + /* Simple loop should match ntdb_traverse */ + for (j = 0, ecode = ntdb_firstkey(ntdb, &k); j < td.calls; j++) { + int val; + + ok1(ecode == NTDB_SUCCESS); + ok1(k.dsize == sizeof(val)); + memcpy(&val, k.dptr, k.dsize); + ok1(td.records[j] == val); + ecode = ntdb_nextkey(ntdb, &k); + } + + /* But arbitrary orderings should work too. */ + for (j = td.calls-1; j > 0; j--) { + k.dptr = (unsigned char *)&td.records[j-1]; + k.dsize = sizeof(td.records[j-1]); + k = dup_key(k); + ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS); + ok1(k.dsize == sizeof(td.records[j])); + ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0); + free(k.dptr); + } + + /* Even delete should work. */ + for (j = 0, ecode = ntdb_firstkey(ntdb, &k); + ecode != NTDB_ERR_NOEXIST; + j++) { + ok1(ecode == NTDB_SUCCESS); + ok1(k.dsize == 4); + ok1(ntdb_delete(ntdb, k) == 0); + ecode = ntdb_nextkey(ntdb, &k); + } + + diag("delete using first/nextkey gave %u of %u records", + j, NUM_RECORDS); + ok1(j == NUM_RECORDS); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-fork-test.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-fork-test.c new file mode 100644 index 00000000..32c6ebe4 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-fork-test.c @@ -0,0 +1,195 @@ +/* Test forking while holding lock. + * + * There are only five ways to do this currently: + * (1) grab a ntdb_chainlock, then fork. + * (2) grab a ntdb_lockall, then fork. + * (3) grab a ntdb_lockall_read, then fork. + * (4) start a transaction, then fork. + * (5) fork from inside a ntdb_parse() callback. + * + * Note that we don't hold a lock across ntdb_traverse callbacks, so + * that doesn't matter. + */ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +static bool am_child = false; + +static enum NTDB_ERROR fork_in_parse(NTDB_DATA key, NTDB_DATA data, + struct ntdb_context *ntdb) +{ + int status; + + if (fork() == 0) { + am_child = true; + + /* We expect this to fail. */ + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) + exit(1); + + if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) + exit(1); + + if (tap_log_messages != 2) + exit(2); + + return NTDB_SUCCESS; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + return NTDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 14); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + int status; + + tap_log_messages = 0; + + ntdb = ntdb_open("run-fork-test.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + continue; + + /* Put a record in here. */ + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_SUCCESS); + + ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) + return 1; + + if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + /* Child can do this without any complaints. */ + ntdb_chainunlock(ntdb, key); + if (tap_log_messages != 2) + return 3; + ntdb_close(ntdb); + if (tap_log_messages != 2) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + ntdb_chainunlock(ntdb, key); + + ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) + return 1; + + if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + /* Child can do this without any complaints. */ + ntdb_unlockall(ntdb); + if (tap_log_messages != 2) + return 3; + ntdb_close(ntdb); + if (tap_log_messages != 2) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + ntdb_unlockall(ntdb); + + ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + /* This would always fail anyway... */ + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) + return 1; + + if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + /* Child can do this without any complaints. */ + ntdb_unlockall_read(ntdb); + if (tap_log_messages != 2) + return 3; + ntdb_close(ntdb); + if (tap_log_messages != 2) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + ntdb_unlockall_read(ntdb); + + ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS); + /* If transactions is empty, noop "commit" succeeds. */ + ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS); + if (fork() == 0) { + int last_log_messages; + + /* We expect this to fail. */ + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK) + return 1; + + if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + if (ntdb_transaction_prepare_commit(ntdb) + != NTDB_ERR_LOCK) + return 3; + if (tap_log_messages == 2) + return 4; + + last_log_messages = tap_log_messages; + /* Child can do this without any complaints. */ + ntdb_transaction_cancel(ntdb); + if (tap_log_messages != last_log_messages) + return 4; + ntdb_close(ntdb); + if (tap_log_messages != last_log_messages) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + ntdb_transaction_cancel(ntdb); + + ok1(ntdb_parse_record(ntdb, key, fork_in_parse, ntdb) + == NTDB_SUCCESS); + ntdb_close(ntdb); + if (am_child) { + /* Child can return from parse without complaints. */ + if (tap_log_messages != 2) + exit(3); + exit(0); + } + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-locktimeout.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-locktimeout.c new file mode 100644 index 00000000..235409b3 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-locktimeout.c @@ -0,0 +1,190 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "external-agent.h" +#include "helpapi-external-agent.h" + +#undef alarm +#define alarm fast_alarm + +/* Speed things up by doing things in milliseconds. */ +static unsigned int fast_alarm(unsigned int milli_seconds) +{ + struct itimerval it; + + it.it_interval.tv_sec = it.it_interval.tv_usec = 0; + it.it_value.tv_sec = milli_seconds / 1000; + it.it_value.tv_usec = milli_seconds * 1000; + setitimer(ITIMER_REAL, &it, NULL); + return 0; +} + +#define CatchSignal(sig, handler) signal((sig), (handler)) + +static void do_nothing(int signum) +{ +} + +/* This example code is taken from SAMBA, so try not to change it. */ +static struct flock flock_struct; + +/* Return a value which is none of v1, v2 or v3. */ +static inline short int invalid_value(short int v1, short int v2, short int v3) +{ + short int try = (v1+v2+v3)^((v1+v2+v3) << 16); + while (try == v1 || try == v2 || try == v3) + try++; + return try; +} + +/* We invalidate in as many ways as we can, so the OS rejects it */ +static void invalidate_flock_struct(int signum) +{ + flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK); + flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END); + flock_struct.l_start = -1; + /* A large negative. */ + flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1); +} + +static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *_timeout) +{ + int ret, saved_errno = errno; + unsigned int timeout = *(unsigned int *)_timeout; + + flock_struct.l_type = rw; + flock_struct.l_whence = SEEK_SET; + flock_struct.l_start = off; + flock_struct.l_len = len; + + CatchSignal(SIGALRM, invalidate_flock_struct); + alarm(timeout); + + for (;;) { + if (waitflag) + ret = fcntl(fd, F_SETLKW, &flock_struct); + else + ret = fcntl(fd, F_SETLK, &flock_struct); + + if (ret == 0) + break; + + /* Not signalled? Something else went wrong. */ + if (flock_struct.l_len == len) { + if (errno == EAGAIN || errno == EINTR) + continue; + saved_errno = errno; + break; + } else { + saved_errno = EINTR; + break; + } + } + + alarm(0); + errno = saved_errno; + return ret; +} + +static int ntdb_chainlock_with_timeout_internal(struct ntdb_context *ntdb, + NTDB_DATA key, + unsigned int timeout, + int rw_type) +{ + union ntdb_attribute locking; + enum NTDB_ERROR ecode; + + if (timeout) { + locking.base.attr = NTDB_ATTRIBUTE_FLOCK; + ecode = ntdb_get_attribute(ntdb, &locking); + if (ecode != NTDB_SUCCESS) + return ecode; + + /* Replace locking function with our own. */ + locking.flock.data = &timeout; + locking.flock.lock = timeout_lock; + + ecode = ntdb_set_attribute(ntdb, &locking); + if (ecode != NTDB_SUCCESS) + return ecode; + } + if (rw_type == F_RDLCK) + ecode = ntdb_chainlock_read(ntdb, key); + else + ecode = ntdb_chainlock(ntdb, key); + + if (timeout) { + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); + } + return ecode; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + NTDB_DATA key = ntdb_mkdata("hello", 5); + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + struct agent *agent; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 15); + + agent = prepare_external_agent(); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + enum NTDB_ERROR ecode; + ntdb = ntdb_open("run-locktimeout.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + break; + + /* Simple cases: should succeed. */ + ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, + F_RDLCK); + ok1(ecode == NTDB_SUCCESS); + ok1(tap_log_messages == 0); + + ntdb_chainunlock_read(ntdb, key); + ok1(tap_log_messages == 0); + + ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, + F_WRLCK); + ok1(ecode == NTDB_SUCCESS); + ok1(tap_log_messages == 0); + + ntdb_chainunlock(ntdb, key); + ok1(tap_log_messages == 0); + + /* OK, get agent to start transaction, then we should time out. */ + ok1(external_agent_operation(agent, OPEN, "run-locktimeout.ntdb") + == SUCCESS); + ok1(external_agent_operation(agent, TRANSACTION_START, "") + == SUCCESS); + ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, + F_WRLCK); + ok1(ecode == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + + /* Even if we get a different signal, should be fine. */ + CatchSignal(SIGUSR1, do_nothing); + external_agent_operation(agent, SEND_SIGNAL, ""); + ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20, + F_WRLCK); + ok1(ecode == NTDB_ERR_LOCK); + ok1(tap_log_messages == 0); + + ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "") + == SUCCESS); + ok1(external_agent_operation(agent, CLOSE, "") + == SUCCESS); + ntdb_close(ntdb); + } + free_external_agent(agent); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-missing-entries.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-missing-entries.c new file mode 100644 index 00000000..a6427c07 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-missing-entries.c @@ -0,0 +1,43 @@ +/* Another test revealed that we lost an entry. This reproduces it. */ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +#define NUM_RECORDS 1189 + +/* We use the same seed which we saw this failure on. */ +static uint32_t failhash(const void *key, size_t len, uint32_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + 699537674708983027ULL); +} + +int main(int argc, char *argv[]) +{ + int i; + struct ntdb_context *ntdb; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = failhash } }; + + hattr.base.next = &tap_log_attr; + plan_tests(1 + NUM_RECORDS + 2); + + ntdb = ntdb_open("run-missing-entries.ntdb", NTDB_INTERNAL, + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + if (ok1(ntdb)) { + for (i = 0; i < NUM_RECORDS; i++) { + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + } + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-open-multiple-times.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-open-multiple-times.c new file mode 100644 index 00000000..59a03629 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-open-multiple-times.c @@ -0,0 +1,87 @@ +#include "config.h" +#include "../ntdb.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "../private.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb, *ntdb2; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 30); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-open-multiple-times.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ntdb2 = ntdb_open("run-open-multiple-times.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT, 0600, &tap_log_attr); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_check(ntdb2, NULL, NULL) == 0); + ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr); + + /* Store in one, fetch in the other. */ + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(d, data)); + free(d.dptr); + + /* Vice versa, with delete. */ + ok1(ntdb_delete(ntdb2, key) == 0); + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST); + + /* OK, now close first one, check second still good. */ + ok1(ntdb_close(ntdb) == 0); + + ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr); + ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == 0); + ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(d, data)); + free(d.dptr); + + /* Reopen */ + ntdb = ntdb_open("run-open-multiple-times.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT, 0600, &tap_log_attr); + ok1(ntdb); + + ok1(ntdb_transaction_start(ntdb2) == 0); + + /* Anything in the other one should fail. */ + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 1); + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 2); + ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 3); + ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK); + ok1(tap_log_messages == 4); + + /* Transaciton should work as normal. */ + ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == NTDB_SUCCESS); + + /* Now... try closing with locks held. */ + ok1(ntdb_close(ntdb2) == 0); + + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(ntdb_deq(d, data)); + free(d.dptr); + ok1(ntdb_close(ntdb) == 0); + ok1(tap_log_messages == 4); + tap_log_messages = 0; + } + + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-record-expand.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-record-expand.c new file mode 100644 index 00000000..b92f13fe --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-record-expand.c @@ -0,0 +1,50 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +#define MAX_SIZE 10000 +#define SIZE_STEP 131 + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data; + + data.dptr = malloc(MAX_SIZE); + memset(data.dptr, 0x24, MAX_SIZE); + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-record-expand.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + data.dsize = 0; + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + for (data.dsize = 0; + data.dsize < MAX_SIZE; + data.dsize += SIZE_STEP) { + memset(data.dptr, data.dsize, data.dsize); + ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + } + ntdb_close(ntdb); + } + ok1(tap_log_messages == 0); + free(data.dptr); + + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-simple-delete.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-simple-delete.c new file mode 100644 index 00000000..0b886c3a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-simple-delete.c @@ -0,0 +1,39 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-simple-delete.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (ntdb) { + /* Delete should fail. */ + ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Insert should succeed. */ + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Delete should now work. */ + ok1(ntdb_delete(ntdb, key) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + } + } + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/api-summary.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-summary.c new file mode 100644 index 00000000..7701f26a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/api-summary.c @@ -0,0 +1,56 @@ +#include "config.h" +#include "../ntdb.h" +#include "../private.h" +#include "tap-interface.h" +#include "logging.h" +#include "helpapi-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; + NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; + char *summary; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-summary.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + /* Put some stuff in there. */ + for (j = 0; j < 500; j++) { + /* Make sure padding varies to we get some graphs! */ + data.dsize = j % (sizeof(j) + 1); + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + fail("Storing in ntdb"); + } + + for (j = 0; + j <= NTDB_SUMMARY_HISTOGRAMS; + j += NTDB_SUMMARY_HISTOGRAMS) { + ok1(ntdb_summary(ntdb, j, &summary) == NTDB_SUCCESS); + ok1(strstr(summary, "Number of records: 500\n")); + ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n")); + ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n")); + if (j == NTDB_SUMMARY_HISTOGRAMS) { + ok1(strstr(summary, "|") + && strstr(summary, "*")); + } else { + ok1(!strstr(summary, "|") + && !strstr(summary, "*")); + } + free(summary); + } + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/external-agent.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/external-agent.c new file mode 100644 index 00000000..a06b70f7 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/external-agent.c @@ -0,0 +1,261 @@ +#include "external-agent.h" +#include "logging.h" +#include "lock-tracking.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tap-interface.h" +#include +#include + +static struct ntdb_context *ntdb; + +void (*external_agent_free)(void *) = free; + +static enum NTDB_ERROR clear_if_first(int fd, void *arg) +{ +/* We hold a lock offset 4 always, so we can tell if anyone is holding it. + * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 4; + fl.l_len = 1; + + if (fcntl(fd, F_SETLK, &fl) == 0) { + /* We must be first ones to open it! */ + diag("agent truncating file!"); + if (ftruncate(fd, 0) != 0) { + return NTDB_ERR_IO; + } + } + fl.l_type = F_RDLCK; + if (fcntl(fd, F_SETLKW, &fl) != 0) { + return NTDB_ERR_IO; + } + return NTDB_SUCCESS; +} + +static enum agent_return do_operation(enum operation op, const char *name) +{ + NTDB_DATA k, d; + enum agent_return ret; + NTDB_DATA data; + enum NTDB_ERROR ecode; + union ntdb_attribute cif; + const char *eq; + + if (op != OPEN && op != OPEN_WITH_HOOK && !ntdb) { + diag("external: No ntdb open!"); + return OTHER_FAILURE; + } + + diag("external: %s", operation_name(op)); + + eq = strchr(name, '='); + if (eq) { + k = ntdb_mkdata(name, eq - name); + d = ntdb_mkdata(eq + 1, strlen(eq+1)); + } else { + k = ntdb_mkdata(name, strlen(name)); + d.dsize = 0; + d.dptr = NULL; + } + + locking_would_block = 0; + switch (op) { + case OPEN: + if (ntdb) { + diag("Already have ntdb %s open", ntdb_name(ntdb)); + return OTHER_FAILURE; + } + ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &tap_log_attr); + if (!ntdb) { + if (!locking_would_block) + diag("Opening ntdb gave %s", strerror(errno)); + forget_locking(); + ret = OTHER_FAILURE; + } else + ret = SUCCESS; + break; + case OPEN_WITH_HOOK: + if (ntdb) { + diag("Already have ntdb %s open", ntdb_name(ntdb)); + return OTHER_FAILURE; + } + cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK; + cif.openhook.base.next = &tap_log_attr; + cif.openhook.fn = clear_if_first; + ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &cif); + if (!ntdb) { + if (!locking_would_block) + diag("Opening ntdb gave %s", strerror(errno)); + forget_locking(); + ret = OTHER_FAILURE; + } else + ret = SUCCESS; + break; + case FETCH: + ecode = ntdb_fetch(ntdb, k, &data); + if (ecode == NTDB_ERR_NOEXIST) { + ret = FAILED; + } else if (ecode < 0) { + ret = OTHER_FAILURE; + } else if (!ntdb_deq(data, d)) { + ret = OTHER_FAILURE; + external_agent_free(data.dptr); + } else { + ret = SUCCESS; + external_agent_free(data.dptr); + } + break; + case STORE: + ret = ntdb_store(ntdb, k, d, 0) == 0 ? SUCCESS : OTHER_FAILURE; + break; + case TRANSACTION_START: + ret = ntdb_transaction_start(ntdb) == 0 ? SUCCESS : OTHER_FAILURE; + break; + case TRANSACTION_COMMIT: + ret = ntdb_transaction_commit(ntdb)==0 ? SUCCESS : OTHER_FAILURE; + break; + case NEEDS_RECOVERY: + ret = external_agent_needs_rec(ntdb); + break; + case CHECK: + ret = ntdb_check(ntdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE; + break; + case CLOSE: + ret = ntdb_close(ntdb) == 0 ? SUCCESS : OTHER_FAILURE; + ntdb = NULL; + break; + case SEND_SIGNAL: + /* We do this async */ + ret = SUCCESS; + break; + default: + ret = OTHER_FAILURE; + } + + if (locking_would_block) + ret = WOULD_HAVE_BLOCKED; + + return ret; +} + +struct agent { + int cmdfd, responsefd; +}; + +/* Do this before doing any ntdb stuff. Return handle, or NULL. */ +struct agent *prepare_external_agent(void) +{ + int pid, ret; + int command[2], response[2]; + char name[1+PATH_MAX]; + + if (pipe(command) != 0 || pipe(response) != 0) + return NULL; + + pid = fork(); + if (pid < 0) + return NULL; + + if (pid != 0) { + struct agent *agent = malloc(sizeof(*agent)); + + close(command[0]); + close(response[1]); + agent->cmdfd = command[1]; + agent->responsefd = response[0]; + return agent; + } + + close(command[1]); + close(response[0]); + + /* We want to fail, not block. */ + nonblocking_locks = true; + log_prefix = "external: "; + while ((ret = read(command[0], name, sizeof(name))) > 0) { + enum agent_return result; + + result = do_operation(name[0], name+1); + if (write(response[1], &result, sizeof(result)) + != sizeof(result)) + err(1, "Writing response"); + if (name[0] == SEND_SIGNAL) { + struct timeval ten_ms; + ten_ms.tv_sec = 0; + ten_ms.tv_usec = 10000; + select(0, NULL, NULL, NULL, &ten_ms); + kill(getppid(), SIGUSR1); + } + } + exit(0); +} + +/* Ask the external agent to try to do an operation. */ +enum agent_return external_agent_operation(struct agent *agent, + enum operation op, + const char *name) +{ + enum agent_return res; + unsigned int len; + char *string; + + if (!name) + name = ""; + len = 1 + strlen(name) + 1; + string = malloc(len); + + string[0] = op; + strcpy(string+1, name); + + if (write(agent->cmdfd, string, len) != len + || read(agent->responsefd, &res, sizeof(res)) != sizeof(res)) + res = AGENT_DIED; + + free(string); + return res; +} + +const char *agent_return_name(enum agent_return ret) +{ + return ret == SUCCESS ? "SUCCESS" + : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED" + : ret == AGENT_DIED ? "AGENT_DIED" + : ret == FAILED ? "FAILED" + : ret == OTHER_FAILURE ? "OTHER_FAILURE" + : "**INVALID**"; +} + +const char *operation_name(enum operation op) +{ + switch (op) { + case OPEN: return "OPEN"; + case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK"; + case FETCH: return "FETCH"; + case STORE: return "STORE"; + case CHECK: return "CHECK"; + case TRANSACTION_START: return "TRANSACTION_START"; + case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT"; + case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; + case SEND_SIGNAL: return "SEND_SIGNAL"; + case CLOSE: return "CLOSE"; + } + return "**INVALID**"; +} + +void free_external_agent(struct agent *agent) +{ + close(agent->cmdfd); + close(agent->responsefd); + free(agent); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/external-agent.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/external-agent.h new file mode 100644 index 00000000..559a92cd --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/external-agent.h @@ -0,0 +1,51 @@ +#ifndef NTDB_TEST_EXTERNAL_AGENT_H +#define NTDB_TEST_EXTERNAL_AGENT_H + +/* For locking tests, we need a different process to try things at + * various times. */ +enum operation { + OPEN, + OPEN_WITH_HOOK, + FETCH, + STORE, + TRANSACTION_START, + TRANSACTION_COMMIT, + NEEDS_RECOVERY, + CHECK, + SEND_SIGNAL, + CLOSE, +}; + +/* Do this before doing any ntdb stuff. Return handle, or -1. */ +struct agent *prepare_external_agent(void); + +enum agent_return { + SUCCESS, + WOULD_HAVE_BLOCKED, + AGENT_DIED, + FAILED, /* For fetch, or NEEDS_RECOVERY */ + OTHER_FAILURE, +}; + +/* Ask the external agent to try to do an operation. + * name == ntdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST, + * = for FETCH/STORE. + */ +enum agent_return external_agent_operation(struct agent *handle, + enum operation op, + const char *name); + +/* Hook into free() on ntdb_data in external agent. */ +extern void (*external_agent_free)(void *); + +/* Mapping enum -> string. */ +const char *agent_return_name(enum agent_return ret); +const char *operation_name(enum operation op); + +void free_external_agent(struct agent *agent); + +/* Internal use: */ +struct ntdb_context; +enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb); + +#endif /* NTDB_TEST_EXTERNAL_AGENT_H */ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/failtest_helper.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/failtest_helper.c new file mode 100644 index 00000000..ab7e61b0 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/failtest_helper.c @@ -0,0 +1,90 @@ +#include "failtest_helper.h" +#include "logging.h" +#include +#include "tap-interface.h" + +bool failtest_suppress = false; + +bool failmatch(const struct failtest_call *call, + const char *file, int line, enum failtest_call_type type) +{ + return call->type == type + && call->line == line + && ((strcmp(call->file, file) == 0) + || (strends(call->file, file) + && (call->file[strlen(call->file) - strlen(file) - 1] + == '/'))); +} + +static bool is_nonblocking_lock(const struct failtest_call *call) +{ + return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK; +} + +static bool is_unlock(const struct failtest_call *call) +{ + return call->type == FAILTEST_FCNTL + && call->u.fcntl.arg.fl.l_type == F_UNLCK; +} + +bool exit_check_log(struct tlist_calls *history) +{ + const struct failtest_call *i; + unsigned int malloc_count = 0; + + tlist_for_each(history, i, list) { + if (!i->fail) + continue; + /* Failing the /dev/urandom open doesn't count: we fall back. */ + if (failmatch(i, URANDOM_OPEN)) + continue; + + /* Similarly with read fail. */ + if (failmatch(i, URANDOM_READ)) + continue; + + /* Initial allocation of ntdb doesn't log. */ + if (i->type == FAILTEST_MALLOC) { + if (malloc_count++ == 0) { + continue; + } + } + + /* We don't block "failures" on non-blocking locks. */ + if (is_nonblocking_lock(i)) + continue; + + if (!tap_log_messages) + diag("We didn't log for %s:%u", i->file, i->line); + return tap_log_messages != 0; + } + return true; +} + +/* Some places we soldier on despite errors: only fail them once. */ +enum failtest_result +block_repeat_failures(struct tlist_calls *history) +{ + const struct failtest_call *last; + + last = tlist_tail(history, list); + + if (failtest_suppress) + return FAIL_DONT_FAIL; + + if (failmatch(last, URANDOM_OPEN) + || failmatch(last, URANDOM_READ)) { + return FAIL_PROBE; + } + + /* We handle mmap failing, by falling back to read/write, so + * don't try all possible paths. */ + if (last->type == FAILTEST_MMAP) + return FAIL_PROBE; + + /* Unlock or non-blocking lock is fail-once. */ + if (is_unlock(last) || is_nonblocking_lock(last)) + return FAIL_PROBE; + + return FAIL_OK; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/failtest_helper.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/failtest_helper.h new file mode 100644 index 00000000..8d1c3745 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/failtest_helper.h @@ -0,0 +1,18 @@ +#ifndef NTDB_TEST_FAILTEST_HELPER_H +#define NTDB_TEST_FAILTEST_HELPER_H +#include +#include + +/* FIXME: Check these! */ +#define URANDOM_OPEN "open.c", 62, FAILTEST_OPEN +#define URANDOM_READ "open.c", 42, FAILTEST_READ + +bool exit_check_log(struct tlist_calls *history); +bool failmatch(const struct failtest_call *call, + const char *file, int line, enum failtest_call_type type); +enum failtest_result block_repeat_failures(struct tlist_calls *history); + +/* Set this to suppress failure. */ +extern bool failtest_suppress; + +#endif /* NTDB_TEST_LOGGING_H */ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/helpapi-external-agent.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/helpapi-external-agent.h new file mode 100644 index 00000000..eb813990 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/helpapi-external-agent.h @@ -0,0 +1,7 @@ +#include "external-agent.h" + +/* This isn't possible with via the ntdb API, but this makes it link. */ +enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb) +{ + return FAILED; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/helprun-external-agent.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/helprun-external-agent.h new file mode 100644 index 00000000..12610411 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/helprun-external-agent.h @@ -0,0 +1,7 @@ +#include "external-agent.h" +#include "../private.h" + +enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb) +{ + return ntdb_needs_recovery(ntdb) ? SUCCESS : FAILED; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/helprun-layout.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/helprun-layout.h new file mode 100644 index 00000000..1bacd5e7 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/helprun-layout.h @@ -0,0 +1,341 @@ +/* NTDB tools to create various canned database layouts. */ +#include "layout.h" +#include +#include +#include +#include +#include "logging.h" + +struct ntdb_layout *new_ntdb_layout(void) +{ + struct ntdb_layout *layout = malloc(sizeof(*layout)); + layout->num_elems = 0; + layout->elem = NULL; + return layout; +} + +static void add(struct ntdb_layout *layout, union ntdb_layout_elem elem) +{ + layout->elem = realloc(layout->elem, + sizeof(layout->elem[0]) + * (layout->num_elems+1)); + layout->elem[layout->num_elems++] = elem; +} + +void ntdb_layout_add_freetable(struct ntdb_layout *layout) +{ + union ntdb_layout_elem elem; + elem.base.type = FREETABLE; + add(layout, elem); +} + +void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len, + unsigned ftable) +{ + union ntdb_layout_elem elem; + elem.base.type = FREE; + elem.free.len = len; + elem.free.ftable_num = ftable; + add(layout, elem); +} + +void ntdb_layout_add_capability(struct ntdb_layout *layout, + uint64_t type, + bool write_breaks, + bool check_breaks, + bool open_breaks, + ntdb_len_t extra) +{ + union ntdb_layout_elem elem; + elem.base.type = CAPABILITY; + elem.capability.type = type; + if (write_breaks) + elem.capability.type |= NTDB_CAP_NOWRITE; + if (open_breaks) + elem.capability.type |= NTDB_CAP_NOOPEN; + if (check_breaks) + elem.capability.type |= NTDB_CAP_NOCHECK; + elem.capability.extra = extra; + add(layout, elem); +} + +static NTDB_DATA dup_key(NTDB_DATA key) +{ + NTDB_DATA ret; + ret.dsize = key.dsize; + ret.dptr = malloc(ret.dsize); + memcpy(ret.dptr, key.dptr, ret.dsize); + return ret; +} + +void ntdb_layout_add_used(struct ntdb_layout *layout, + NTDB_DATA key, NTDB_DATA data, + ntdb_len_t extra) +{ + union ntdb_layout_elem elem; + elem.base.type = DATA; + elem.used.key = dup_key(key); + elem.used.data = dup_key(data); + elem.used.extra = extra; + add(layout, elem); +} + +static ntdb_len_t free_record_len(ntdb_len_t len) +{ + return sizeof(struct ntdb_used_record) + len; +} + +static ntdb_len_t data_record_len(struct tle_used *used) +{ + ntdb_len_t len; + len = sizeof(struct ntdb_used_record) + + used->key.dsize + used->data.dsize + used->extra; + assert(len >= sizeof(struct ntdb_free_record)); + return len; +} + +static ntdb_len_t capability_len(struct tle_capability *cap) +{ + return sizeof(struct ntdb_capability) + cap->extra; +} + +static ntdb_len_t freetable_len(struct tle_freetable *ftable) +{ + return sizeof(struct ntdb_freetable); +} + +static void set_free_record(void *mem, ntdb_len_t len) +{ + /* We do all the work in add_to_freetable */ +} + +static void add_zero_pad(struct ntdb_used_record *u, size_t len, size_t extra) +{ + if (extra) + ((char *)(u + 1))[len] = '\0'; +} + +static void set_data_record(void *mem, struct ntdb_context *ntdb, + struct tle_used *used) +{ + struct ntdb_used_record *u = mem; + + set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize, + used->key.dsize + used->data.dsize + used->extra); + memcpy(u + 1, used->key.dptr, used->key.dsize); + memcpy((char *)(u + 1) + used->key.dsize, + used->data.dptr, used->data.dsize); + add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra); +} + +static void set_capability(void *mem, struct ntdb_context *ntdb, + struct tle_capability *cap, struct ntdb_header *hdr, + ntdb_off_t last_cap) +{ + struct ntdb_capability *c = mem; + ntdb_len_t len = sizeof(*c) - sizeof(struct ntdb_used_record) + cap->extra; + + c->type = cap->type; + c->next = 0; + set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len); + + /* Append to capability list. */ + if (!last_cap) { + hdr->capabilities = cap->base.off; + } else { + c = (struct ntdb_capability *)((char *)hdr + last_cap); + c->next = cap->base.off; + } +} + +static void set_freetable(void *mem, struct ntdb_context *ntdb, + struct tle_freetable *freetable, struct ntdb_header *hdr, + ntdb_off_t last_ftable) +{ + struct ntdb_freetable *ftable = mem; + memset(ftable, 0, sizeof(*ftable)); + set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0, + sizeof(*ftable) - sizeof(ftable->hdr), + sizeof(*ftable) - sizeof(ftable->hdr)); + + if (last_ftable) { + ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable); + ftable->next = freetable->base.off; + } else { + hdr->free_table = freetable->base.off; + } +} + +static void add_to_freetable(struct ntdb_context *ntdb, + ntdb_off_t eoff, + ntdb_off_t elen, + unsigned ftable, + struct tle_freetable *freetable) +{ + ntdb->ftable_off = freetable->base.off; + ntdb->ftable = ftable; + add_free_record(ntdb, eoff, sizeof(struct ntdb_used_record) + elen, + NTDB_LOCK_WAIT, false); +} + +static ntdb_off_t hbucket_offset(ntdb_len_t idx) +{ + return sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record) + + idx * sizeof(ntdb_off_t); +} + +/* FIXME: Our hash table handling here is primitive: we don't expand! */ +static void add_to_hashtable(struct ntdb_context *ntdb, + ntdb_off_t eoff, + NTDB_DATA key) +{ + ntdb_off_t b_off; + uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); + + b_off = hbucket_offset(h & ((1 << ntdb->hash_bits)-1)); + if (ntdb_read_off(ntdb, b_off) != 0) + abort(); + + ntdb_write_off(ntdb, b_off, encode_offset(ntdb, eoff, h)); +} + +static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num) +{ + unsigned i; + + for (i = 0; i < layout->num_elems; i++) { + if (layout->elem[i].base.type != FREETABLE) + continue; + if (num == 0) + return &layout->elem[i].ftable; + num--; + } + abort(); +} + +/* FIXME: Support NTDB_CONVERT */ +struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, + void (*freefn)(void *), + union ntdb_attribute *attr) +{ + unsigned int i; + ntdb_off_t off, hdrlen, len, last_ftable, last_cap; + char *mem; + struct ntdb_context *ntdb; + + /* Now populate our header, cribbing from a real NTDB header. */ + ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr); + + off = sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record) + + (sizeof(ntdb_off_t) << ntdb->hash_bits); + hdrlen = off; + + /* First pass of layout: calc lengths */ + for (i = 0; i < layout->num_elems; i++) { + union ntdb_layout_elem *e = &layout->elem[i]; + e->base.off = off; + switch (e->base.type) { + case FREETABLE: + len = freetable_len(&e->ftable); + break; + case FREE: + len = free_record_len(e->free.len); + break; + case DATA: + len = data_record_len(&e->used); + break; + case CAPABILITY: + len = capability_len(&e->capability); + break; + default: + abort(); + } + off += len; + } + + mem = malloc(off); + /* Fill with some weird pattern. */ + memset(mem, 0x99, off); + memcpy(mem, ntdb->file->map_ptr, hdrlen); + + /* Mug the ntdb we have to make it use this. */ + freefn(ntdb->file->map_ptr); + ntdb->file->map_ptr = mem; + ntdb->file->map_size = off; + + last_ftable = 0; + last_cap = 0; + for (i = 0; i < layout->num_elems; i++) { + union ntdb_layout_elem *e = &layout->elem[i]; + switch (e->base.type) { + case FREETABLE: + set_freetable(mem + e->base.off, ntdb, &e->ftable, + (struct ntdb_header *)mem, last_ftable); + last_ftable = e->base.off; + break; + case FREE: + set_free_record(mem + e->base.off, e->free.len); + break; + case DATA: + set_data_record(mem + e->base.off, ntdb, &e->used); + break; + case CAPABILITY: + set_capability(mem + e->base.off, ntdb, &e->capability, + (struct ntdb_header *)mem, last_cap); + last_cap = e->base.off; + break; + } + } + /* Must have a free table! */ + assert(last_ftable); + + /* Now fill the free and hash tables. */ + for (i = 0; i < layout->num_elems; i++) { + union ntdb_layout_elem *e = &layout->elem[i]; + switch (e->base.type) { + case FREE: + add_to_freetable(ntdb, e->base.off, e->free.len, + e->free.ftable_num, + find_ftable(layout, e->free.ftable_num)); + break; + case DATA: + add_to_hashtable(ntdb, e->base.off, e->used.key); + break; + default: + break; + } + } + + ntdb->ftable_off = find_ftable(layout, 0)->base.off; + return ntdb; +} + +void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *), + union ntdb_attribute *attr, const char *filename) +{ + struct ntdb_context *ntdb = ntdb_layout_get(layout, freefn, attr); + int fd; + + fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0600); + if (fd < 0) + err(1, "opening %s for writing", filename); + if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size) + != ntdb->file->map_size) + err(1, "writing %s", filename); + close(fd); + ntdb_close(ntdb); +} + +void ntdb_layout_free(struct ntdb_layout *layout) +{ + unsigned int i; + + for (i = 0; i < layout->num_elems; i++) { + if (layout->elem[i].base.type == DATA) { + free(layout->elem[i].used.key.dptr); + free(layout->elem[i].used.data.dptr); + } + } + free(layout->elem); + free(layout); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/layout.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/layout.h new file mode 100644 index 00000000..ea84382c --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/layout.h @@ -0,0 +1,81 @@ +#ifndef NTDB_TEST_LAYOUT_H +#define NTDB_TEST_LAYOUT_H +#include "../private.h" + +struct ntdb_layout *new_ntdb_layout(void); +void ntdb_layout_add_freetable(struct ntdb_layout *layout); +void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len, + unsigned ftable); +void ntdb_layout_add_used(struct ntdb_layout *layout, + NTDB_DATA key, NTDB_DATA data, + ntdb_len_t extra); +void ntdb_layout_add_capability(struct ntdb_layout *layout, + uint64_t type, + bool write_breaks, + bool check_breaks, + bool open_breaks, + ntdb_len_t extra); + +#if 0 /* FIXME: Allow allocation of subtables */ +void ntdb_layout_add_hashtable(struct ntdb_layout *layout, + int htable_parent, /* -1 == toplevel */ + unsigned int bucket, + ntdb_len_t extra); +#endif +/* freefn is needed if we're using failtest_free. */ +struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, + void (*freefn)(void *), + union ntdb_attribute *attr); +void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *), + union ntdb_attribute *attr, const char *filename); + +void ntdb_layout_free(struct ntdb_layout *layout); + +enum layout_type { + FREETABLE, FREE, DATA, CAPABILITY +}; + +/* Shared by all union members. */ +struct tle_base { + enum layout_type type; + ntdb_off_t off; +}; + +struct tle_freetable { + struct tle_base base; +}; + +struct tle_free { + struct tle_base base; + ntdb_len_t len; + unsigned ftable_num; +}; + +struct tle_used { + struct tle_base base; + NTDB_DATA key; + NTDB_DATA data; + ntdb_len_t extra; +}; + +struct tle_capability { + struct tle_base base; + uint64_t type; + ntdb_len_t extra; +}; + +union ntdb_layout_elem { + struct tle_base base; + struct tle_freetable ftable; + struct tle_free free; + struct tle_used used; + struct tle_capability capability; +}; + +struct ntdb_layout { + unsigned int num_elems; + union ntdb_layout_elem *elem; +}; + +#include "helprun-layout.h" +#endif /* NTDB_TEST_LAYOUT_H */ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/lock-tracking.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/lock-tracking.c new file mode 100644 index 00000000..2d654e4b --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/lock-tracking.c @@ -0,0 +1,147 @@ +/* We save the locks so we can reaquire them. */ +#include "../private.h" /* For NTDB_HASH_LOCK_START, etc. */ +#include +#include +#include +#include +#include "tap-interface.h" +#include "lock-tracking.h" + +struct lock { + struct lock *next; + unsigned int off; + unsigned int len; + int type; +}; +static struct lock *locks; +int locking_errors = 0; +bool suppress_lockcheck = false; +bool nonblocking_locks; +int locking_would_block = 0; +void (*unlock_callback)(int fd); + +int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ) +{ + va_list ap; + int ret, arg3; + struct flock *fl; + bool may_block = false; + + if (cmd != F_SETLK && cmd != F_SETLKW) { + /* This may be totally bogus, but we don't know in general. */ + va_start(ap, cmd); + arg3 = va_arg(ap, int); + va_end(ap); + + return fcntl(fd, cmd, arg3); + } + + va_start(ap, cmd); + fl = va_arg(ap, struct flock *); + va_end(ap); + + if (cmd == F_SETLKW && nonblocking_locks) { + cmd = F_SETLK; + may_block = true; + } + ret = fcntl(fd, cmd, fl); + + /* Detect when we failed, but might have been OK if we waited. */ + if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) { + locking_would_block++; + } + + if (fl->l_type == F_UNLCK) { + struct lock **l; + struct lock *old = NULL; + + for (l = &locks; *l; l = &(*l)->next) { + if ((*l)->off == fl->l_start + && (*l)->len == fl->l_len) { + if (ret == 0) { + old = *l; + *l = (*l)->next; + free(old); + } + break; + } + } + if (!old && !suppress_lockcheck) { + diag("Unknown unlock %u@%u - %i", + (int)fl->l_len, (int)fl->l_start, ret); + locking_errors++; + } + } else { + struct lock *new, *i; + unsigned int fl_end = fl->l_start + fl->l_len; + if (fl->l_len == 0) + fl_end = (unsigned int)-1; + + /* Check for overlaps: we shouldn't do this. */ + for (i = locks; i; i = i->next) { + unsigned int i_end = i->off + i->len; + if (i->len == 0) + i_end = (unsigned int)-1; + + if (fl->l_start >= i->off && fl->l_start < i_end) + break; + if (fl_end > i->off && fl_end < i_end) + break; + + /* ntdb_allrecord_lock does this, handle adjacent: */ + if (fl->l_start > NTDB_HASH_LOCK_START + && fl->l_start == i_end && fl->l_type == i->type) { + if (ret == 0) { + i->len = fl->l_len + ? i->len + fl->l_len + : 0; + } + goto done; + } + } + if (i) { + /* Special case: upgrade of allrecord lock. */ + if (i->type == F_RDLCK && fl->l_type == F_WRLCK + && i->off == NTDB_HASH_LOCK_START + && fl->l_start == NTDB_HASH_LOCK_START + && i->len == 0 + && fl->l_len == 0) { + if (ret == 0) + i->type = F_WRLCK; + goto done; + } + if (!suppress_lockcheck) { + diag("%s lock %u@%u overlaps %u@%u", + fl->l_type == F_WRLCK ? "write" : "read", + (int)fl->l_len, (int)fl->l_start, + i->len, (int)i->off); + locking_errors++; + } + } + + if (ret == 0) { + new = malloc(sizeof *new); + new->off = fl->l_start; + new->len = fl->l_len; + new->type = fl->l_type; + new->next = locks; + locks = new; + } + } +done: + if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback) + unlock_callback(fd); + return ret; +} + +unsigned int forget_locking(void) +{ + unsigned int num = 0; + while (locks) { + struct lock *next = locks->next; + free(locks); + locks = next; + num++; + } + return num; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/lock-tracking.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/lock-tracking.h new file mode 100644 index 00000000..f2c9c446 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/lock-tracking.h @@ -0,0 +1,25 @@ +#ifndef LOCK_TRACKING_H +#define LOCK_TRACKING_H +#include + +/* Set this if you want a callback after fnctl unlock. */ +extern void (*unlock_callback)(int fd); + +/* Replacement fcntl. */ +int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ); + +/* Discard locking info: returns number of locks outstanding. */ +unsigned int forget_locking(void); + +/* Number of errors in locking. */ +extern int locking_errors; + +/* Suppress lock checking. */ +extern bool suppress_lockcheck; + +/* Make all locks non-blocking. */ +extern bool nonblocking_locks; + +/* Number of times we failed a lock because we made it non-blocking. */ +extern int locking_would_block; +#endif /* LOCK_TRACKING_H */ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/logging.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/logging.c new file mode 100644 index 00000000..2819dd7c --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/logging.c @@ -0,0 +1,30 @@ +#include +#include +#include "tap-interface.h" +#include "logging.h" + +unsigned tap_log_messages; +const char *log_prefix = ""; +char *log_last = NULL; +bool suppress_logging; + +union ntdb_attribute tap_log_attr = { + .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG }, + .fn = tap_log_fn } +}; + +void tap_log_fn(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, void *priv) +{ + if (suppress_logging) + return; + + diag("ntdb log level %u: %s: %s%s", + level, ntdb_errorstr(ecode), log_prefix, message); + if (log_last) + free(log_last); + log_last = strdup(message); + tap_log_messages++; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/logging.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/logging.h new file mode 100644 index 00000000..f8e1eb0e --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/logging.h @@ -0,0 +1,17 @@ +#ifndef NTDB_TEST_LOGGING_H +#define NTDB_TEST_LOGGING_H +#include "../ntdb.h" +#include +#include + +extern bool suppress_logging; +extern const char *log_prefix; +extern unsigned tap_log_messages; +extern union ntdb_attribute tap_log_attr; +extern char *log_last; + +void tap_log_fn(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, void *priv); +#endif /* NTDB_TEST_LOGGING_H */ diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/no-fsync.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/no-fsync.h new file mode 100644 index 00000000..f0c098e2 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/no-fsync.h @@ -0,0 +1,6 @@ +#ifndef NTDB_NO_FSYNC_H +#define NTDB_NO_FSYNC_H +/* Obey $TDB_NO_FSYNC, a bit like tdb does (only note our NTDB_NOSYNC + * does less) */ +#define MAYBE_NOSYNC (getenv("TDB_NO_FSYNC") ? NTDB_NOSYNC : 0) +#endif diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/ntdb-source.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/ntdb-source.h new file mode 100644 index 00000000..88e517eb --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/ntdb-source.h @@ -0,0 +1,11 @@ +#include "config.h" +#include "../check.c" +#include "../free.c" +#include "../hash.c" +#include "../io.c" +#include "../lock.c" +#include "../open.c" +#include "../summary.c" +#include "../ntdb.c" +#include "../transaction.c" +#include "../traverse.c" diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/python-api.py b/junkcode/rusty@rustcorp.com.au-ntdb/test/python-api.py new file mode 100644 index 00000000..b728ad02 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/python-api.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# Some simple tests for the Python bindings for TDB +# Note that this tests the interface of the Python bindings +# It does not test tdb itself. +# +# Copyright (C) 2007-2013 Jelmer Vernooij +# Published under the GNU LGPLv3 or later + +import ntdb +from unittest import TestCase +import os, tempfile + + +class OpenTdbTests(TestCase): + + def test_nonexistent_read(self): + self.assertRaises(IOError, ntdb.Ntdb, "/some/nonexistent/file", 0, + ntdb.DEFAULT, os.O_RDWR) + +class CloseTdbTests(TestCase): + + def test_double_close(self): + self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT, + os.O_CREAT|os.O_RDWR) + self.assertNotEqual(None, self.ntdb) + + # ensure that double close does not crash python + self.ntdb.close() + self.ntdb.close() + + # Check that further operations do not crash python + self.assertRaises(RuntimeError, lambda: self.ntdb.transaction_start()) + + self.assertRaises(RuntimeError, lambda: self.ntdb["bar"]) + + +class InternalTdbTests(TestCase): + + def test_repr(self): + self.ntdb = ntdb.Ntdb() + + # repr used to crash on internal db + self.assertEquals(repr(self.ntdb), "Ntdb()") + + +class SimpleTdbTests(TestCase): + + def setUp(self): + super(SimpleTdbTests, self).setUp() + self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT, + os.O_CREAT|os.O_RDWR) + self.assertNotEqual(None, self.ntdb) + + def tearDown(self): + del self.ntdb + + def test_repr(self): + self.assertTrue(repr(self.ntdb).startswith("Ntdb('")) + + def test_lockall(self): + self.ntdb.lock_all() + + def test_unlockall(self): + self.ntdb.lock_all() + self.ntdb.unlock_all() + + def test_lockall_read(self): + self.ntdb.read_lock_all() + self.ntdb.read_unlock_all() + + def test_store(self): + self.ntdb.store("bar", "bla") + self.assertEquals("bla", self.ntdb.get("bar")) + + def test_getitem(self): + self.ntdb["bar"] = "foo" + self.assertEquals("foo", self.ntdb["bar"]) + + def test_delete(self): + self.ntdb["bar"] = "foo" + del self.ntdb["bar"] + self.assertRaises(KeyError, lambda: self.ntdb["bar"]) + + def test_contains(self): + self.ntdb["bla"] = "bloe" + self.assertTrue("bla" in self.ntdb) + + def test_keyerror(self): + self.assertRaises(KeyError, lambda: self.ntdb["bla"]) + + def test_name(self): + self.ntdb.filename + + def test_iterator(self): + self.ntdb["bla"] = "1" + self.ntdb["brainslug"] = "2" + l = list(self.ntdb) + l.sort() + self.assertEquals(["bla", "brainslug"], l) + + def test_transaction_cancel(self): + self.ntdb["bloe"] = "2" + self.ntdb.transaction_start() + self.ntdb["bloe"] = "1" + self.ntdb.transaction_cancel() + self.assertEquals("2", self.ntdb["bloe"]) + + def test_transaction_commit(self): + self.ntdb["bloe"] = "2" + self.ntdb.transaction_start() + self.ntdb["bloe"] = "1" + self.ntdb.transaction_commit() + self.assertEquals("1", self.ntdb["bloe"]) + + def test_transaction_prepare_commit(self): + self.ntdb["bloe"] = "2" + self.ntdb.transaction_start() + self.ntdb["bloe"] = "1" + self.ntdb.transaction_prepare_commit() + self.ntdb.transaction_commit() + self.assertEquals("1", self.ntdb["bloe"]) + + def test_iterkeys(self): + self.ntdb["bloe"] = "2" + self.ntdb["bla"] = "25" + i = self.ntdb.iterkeys() + self.assertEquals(set(["bloe", "bla"]), set([i.next(), i.next()])) + + def test_clear(self): + self.ntdb["bloe"] = "2" + self.ntdb["bla"] = "25" + self.assertEquals(2, len(list(self.ntdb))) + self.ntdb.clear() + self.assertEquals(0, len(list(self.ntdb))) + + def test_len(self): + self.assertEquals(0, len(list(self.ntdb))) + self.ntdb["entry"] = "value" + self.assertEquals(1, len(list(self.ntdb))) + + def test_add_flags(self): + self.ntdb.add_flag(ntdb.NOMMAP) + self.ntdb.remove_flag(ntdb.NOMMAP) + + +class VersionTests(TestCase): + + def test_present(self): + self.assertTrue(isinstance(ntdb.__version__, str)) + + +if __name__ == '__main__': + import unittest + unittest.TestProgram() diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-001-encode.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-001-encode.c new file mode 100644 index 00000000..81f3e170 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-001-encode.c @@ -0,0 +1,40 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_used_record rec; + struct ntdb_context ntdb = { .log_fn = tap_log_fn }; + + plan_tests(64 + 32 + 48*5 + 1); + + /* We should be able to encode any data value. */ + for (i = 0; i < 64; i++) + ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i, + 1ULL << i) == 0); + + /* And any key and data with < 64 bits between them. */ + for (i = 0; i < 32; i++) { + ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i; + ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen, + klen + dlen) == 0); + } + + /* We should neatly encode all values. */ + for (i = 0; i < 48; i++) { + uint64_t klen = 1ULL << (i < 16 ? i : 15); + uint64_t dlen = 1ULL << i; + uint64_t xlen = 1ULL << (i < 32 ? i : 31); + ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen, + klen+dlen+xlen) == 0); + ok1(rec_key_length(&rec) == klen); + ok1(rec_data_length(&rec) == dlen); + ok1(rec_extra_padding(&rec) == xlen); + ok1(rec_magic(&rec) == NTDB_USED_MAGIC); + } + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-001-fls.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-001-fls.c new file mode 100644 index 00000000..6ed46fe0 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-001-fls.c @@ -0,0 +1,34 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "helprun-external-agent.h" + +static unsigned int dumb_fls(uint64_t num) +{ + int i; + + for (i = 63; i >= 0; i--) { + if (num & (1ULL << i)) + break; + } + return i + 1; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + + plan_tests(64 * 64 + 2); + + ok1(fls64(0) == 0); + ok1(dumb_fls(0) == 0); + + for (i = 0; i < 64; i++) { + for (j = 0; j < 64; j++) { + uint64_t val = (1ULL << i) | (1ULL << j); + ok(fls64(val) == dumb_fls(val), + "%llu -> %u should be %u", (long long)val, + fls64(val), dumb_fls(val)); + } + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-01-new_database.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-01-new_database.c new file mode 100644 index 00000000..11fb0248 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-01-new_database.c @@ -0,0 +1,41 @@ +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 3); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-new_database.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + failtest_exit(exit_status()); + + failtest_suppress = true; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + failtest_suppress = false; + ntdb_close(ntdb); + if (!ok1(tap_log_messages == 0)) + break; + } + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-02-expand.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-02-expand.c new file mode 100644 index 00000000..55927d9b --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-02-expand.c @@ -0,0 +1,69 @@ +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + uint64_t val; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1); + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + failtest_suppress = true; + ntdb = ntdb_open("run-expand.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + break; + + val = ntdb->file->map_size; + /* Need some hash lock for expand. */ + ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0); + failtest_suppress = false; + if (!ok1(ntdb_expand(ntdb, 1) == 0)) { + failtest_suppress = true; + ntdb_close(ntdb); + break; + } + failtest_suppress = true; + + ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR); + ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + val = ntdb->file->map_size; + ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0); + failtest_suppress = false; + if (!ok1(ntdb_expand(ntdb, 1024) == 0)) { + failtest_suppress = true; + ntdb_close(ntdb); + break; + } + failtest_suppress = true; + ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0); + ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-03-coalesce.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-03-coalesce.c new file mode 100644 index 00000000..e86ee656 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-03-coalesce.c @@ -0,0 +1,179 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "layout.h" +#include "helprun-external-agent.h" + +static ntdb_len_t free_record_length(struct ntdb_context *ntdb, ntdb_off_t off) +{ + struct ntdb_free_record f; + enum NTDB_ERROR ecode; + + ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f)); + if (ecode != NTDB_SUCCESS) + return ecode; + if (frec_magic(&f) != NTDB_FREE_MAGIC) + return NTDB_ERR_CORRUPT; + return frec_len(&f); +} + +int main(int argc, char *argv[]) +{ + ntdb_off_t b_off, test; + struct ntdb_context *ntdb; + struct ntdb_layout *layout; + NTDB_DATA data, key; + ntdb_len_t len; + + /* FIXME: Test NTDB_CONVERT */ + /* FIXME: Test lock order fail. */ + + plan_tests(42); + data = ntdb_mkdata("world", 5); + key = ntdb_mkdata("hello", 5); + + /* No coalescing can be done due to EOF */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + len = 15560; + ntdb_layout_add_free(layout, len, 0); + ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); + /* NOMMAP is for lockcheck. */ + ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, + O_RDWR, 0, &tap_log_attr); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == len); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len)); + /* Lock and fail to coalesce. */ + ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); + test = layout->elem[1].base.off; + ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, len, &test) + == 0); + ntdb_unlock_free_bucket(ntdb, b_off); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == len); + ok1(test == layout->elem[1].base.off); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + ntdb_layout_free(layout); + + /* No coalescing can be done due to used record */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_free(layout, 15528, 0); + ntdb_layout_add_used(layout, key, data, 6); + ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); + /* NOMMAP is for lockcheck. */ + ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, + O_RDWR, 0, &tap_log_attr); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(15528)); + /* Lock and fail to coalesce. */ + ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); + test = layout->elem[1].base.off; + ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 15528, &test) + == 0); + ntdb_unlock_free_bucket(ntdb, b_off); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528); + ok1(test == layout->elem[1].base.off); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + ntdb_layout_free(layout); + + /* Coalescing can be done due to two free records, then EOF */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_free(layout, 1024, 0); + ntdb_layout_add_free(layout, 14520, 0); + ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); + /* NOMMAP is for lockcheck. */ + ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, + O_RDWR, 0, &tap_log_attr); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); + ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14520); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Figure out which bucket (first) free entry is. */ + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024)); + /* Lock and coalesce. */ + ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); + test = layout->elem[2].base.off; + ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) + == 1024 + sizeof(struct ntdb_used_record) + 14520); + /* Should tell us it's erased this one... */ + ok1(test == NTDB_ERR_NOEXIST); + ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); + ok1(free_record_length(ntdb, layout->elem[1].base.off) + == 1024 + sizeof(struct ntdb_used_record) + 14520); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + ntdb_layout_free(layout); + + /* Coalescing can be done due to two free records, then data */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_free(layout, 1024, 0); + ntdb_layout_add_free(layout, 14488, 0); + ntdb_layout_add_used(layout, key, data, 6); + ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); + /* NOMMAP is for lockcheck. */ + ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, + O_RDWR, 0, &tap_log_attr); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); + ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14488); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024)); + /* Lock and coalesce. */ + ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); + test = layout->elem[2].base.off; + ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) + == 1024 + sizeof(struct ntdb_used_record) + 14488); + ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); + ok1(free_record_length(ntdb, layout->elem[1].base.off) + == 1024 + sizeof(struct ntdb_used_record) + 14488); + ok1(test == NTDB_ERR_NOEXIST); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + ntdb_layout_free(layout); + + /* Coalescing can be done due to three free records, then EOF */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_free(layout, 1024, 0); + ntdb_layout_add_free(layout, 512, 0); + ntdb_layout_add_free(layout, 13992, 0); + ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); + /* NOMMAP is for lockcheck. */ + ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC, + O_RDWR, 0, &tap_log_attr); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); + ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512); + ok1(free_record_length(ntdb, layout->elem[3].base.off) == 13992); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024)); + /* Lock and coalesce. */ + ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); + test = layout->elem[2].base.off; + ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) + == 1024 + sizeof(struct ntdb_used_record) + 512 + + sizeof(struct ntdb_used_record) + 13992); + ok1(ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0); + ok1(free_record_length(ntdb, layout->elem[1].base.off) + == 1024 + sizeof(struct ntdb_used_record) + 512 + + sizeof(struct ntdb_used_record) + 13992); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + ntdb_layout_free(layout); + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-04-basichash.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-04-basichash.c new file mode 100644 index 00000000..9936d859 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-04-basichash.c @@ -0,0 +1,322 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +/* We rig the hash so all records clash. */ +static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv) +{ + return *((const unsigned int *)key) << 20; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + unsigned int v; + struct ntdb_used_record rec; + NTDB_DATA key = { (unsigned char *)&v, sizeof(v) }; + NTDB_DATA dbuf = { (unsigned char *)&v, sizeof(v) }; + union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = clash } }; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT, + }; + + hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 137 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct hash_info h; + ntdb_off_t new_off, new_off2, off; + + ntdb = ntdb_open("run-04-basichash.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(ntdb); + if (!ntdb) + continue; + + v = 0; + /* Should not find it. */ + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in top table, bucket 0. */ + ok1(h.table == NTDB_HASH_OFFSET); + ok1(h.table_size == (1 << ntdb->hash_bits)); + ok1(h.bucket == 0); + ok1(h.old_val == 0); + + /* Should have lock on bucket 0 */ + ok1(h.h == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + /* Allocate a new record. */ + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off)); + + /* We should be able to add it now. */ + ok1(add_to_hash(ntdb, &h, new_off) == 0); + + /* Make sure we fill it in for later finding. */ + off = new_off + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + + /* We should be able to unlock that OK. */ + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Database should be consistent. */ + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Now, this should give a successful lookup. */ + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located it in top table, bucket 0. */ + ok1(h.table == NTDB_HASH_OFFSET); + ok1(h.table_size == (1 << ntdb->hash_bits)); + ok1(h.bucket == 0); + + /* Should have lock on bucket 0 */ + ok1(h.h == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Database should be consistent. */ + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Test expansion. */ + v = 1; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located clash in toplevel bucket 0. */ + ok1(h.table == NTDB_HASH_OFFSET); + ok1(h.table_size == (1 << ntdb->hash_bits)); + ok1(h.bucket == 0); + ok1((h.old_val & NTDB_OFF_MASK) == new_off); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + new_off2 = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); + + off = new_off2 + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + + /* We should be able to add it now. */ + ok1(add_to_hash(ntdb, &h, new_off2) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Should be happy with expansion. */ + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Should be able to find both. */ + v = 1; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 1); + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + v = 0; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 0); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + /* Simple delete should work. */ + ok1(delete_from_hash(ntdb, &h) == 0); + ok1(add_free_record(ntdb, new_off, + sizeof(struct ntdb_used_record) + + rec_key_length(&rec) + + rec_data_length(&rec) + + rec_extra_padding(&rec), + NTDB_LOCK_NOWAIT, false) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Should still be able to find other record. */ + v = 1; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 1); + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Now should find empty space. */ + v = 0; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain, bucket 0. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 0); + ok1(h.old_val == 0); + + /* Adding another record should work. */ + v = 2; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain, bucket 0. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 0); + ok1(h.old_val == 0); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); + ok1(add_to_hash(ntdb, &h, new_off) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + off = new_off + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + + /* Adding another record should cause expansion. */ + v = 3; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should not have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 2); + ok1(h.old_val != 0); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); + off = new_off + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + ok1(add_to_hash(ntdb, &h, new_off) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Retrieve it and check. */ + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have appended to chain, bucket 2. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 3); + ok1(h.bucket == 2); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* YA record: relocation. */ + v = 4; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should not have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 3); + ok1(h.bucket == 3); + ok1(h.old_val != 0); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); + off = new_off + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + ok1(add_to_hash(ntdb, &h, new_off) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Retrieve it and check. */ + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have appended to chain, bucket 2. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 4); + ok1(h.bucket == 3); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-05-readonly-open.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-05-readonly-open.c new file mode 100644 index 00000000..057fa088 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-05-readonly-open.c @@ -0,0 +1,80 @@ +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4), d; + union ntdb_attribute seed_attr; + unsigned int msgs = 0; + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + + seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; + seed_attr.base.next = &tap_log_attr; + seed_attr.seed.seed = 0; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 11); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-05-readonly-open.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, + &seed_attr); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ntdb_close(ntdb); + + failtest_suppress = false; + ntdb = ntdb_open("run-05-readonly-open.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDONLY, 0600, &tap_log_attr); + if (!ok1(ntdb)) + break; + ok1(tap_log_messages == msgs); + /* Fetch should succeed, stores should fail. */ + if (!ok1(ntdb_fetch(ntdb, key, &d) == 0)) + goto fail; + ok1(ntdb_deq(d, data)); + free(d.dptr); + if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) + == NTDB_ERR_RDONLY)) + goto fail; + ok1(tap_log_messages == ++msgs); + if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) + == NTDB_ERR_RDONLY)) + goto fail; + ok1(tap_log_messages == ++msgs); + failtest_suppress = true; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + ok1(tap_log_messages == msgs); + /* SIGH: failtest bug, it doesn't save the ntdb file because + * we have it read-only. If we go around again, it gets + * changed underneath us and things get screwy. */ + if (failtest_has_failed()) + break; + } + failtest_exit(exit_status()); + +fail: + failtest_suppress = true; + ntdb_close(ntdb); + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-10-simple-store.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-10-simple-store.c new file mode 100644 index 00000000..d3f3b7fd --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-10-simple-store.c @@ -0,0 +1,66 @@ +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-10-simple-store.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + break; + /* Modify should fail. */ + failtest_suppress = false; + if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) + == NTDB_ERR_NOEXIST)) + goto fail; + failtest_suppress = true; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Insert should succeed. */ + failtest_suppress = false; + if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0)) + goto fail; + failtest_suppress = true; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Second insert should fail. */ + failtest_suppress = false; + if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) + == NTDB_ERR_EXISTS)) + goto fail; + failtest_suppress = true; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + } + ok1(tap_log_messages == 0); + failtest_exit(exit_status()); + +fail: + failtest_suppress = true; + ntdb_close(ntdb); + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-11-simple-fetch.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-11-simple-fetch.c new file mode 100644 index 00000000..fba76202 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-11-simple-fetch.c @@ -0,0 +1,66 @@ +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-11-simple-fetch.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (ntdb) { + NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ + + /* fetch should fail. */ + failtest_suppress = false; + if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST)) + goto fail; + failtest_suppress = true; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Insert should succeed. */ + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Fetch should now work. */ + failtest_suppress = false; + if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS)) + goto fail; + failtest_suppress = true; + ok1(ntdb_deq(d, data)); + free(d.dptr); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + } + } + ok1(tap_log_messages == 0); + failtest_exit(exit_status()); + +fail: + failtest_suppress = true; + ntdb_close(ntdb); + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-12-check.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-12-check.c new file mode 100644 index 00000000..c2354cb5 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-12-check.c @@ -0,0 +1,53 @@ +#include "../private.h" +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, + NTDB_INTERNAL|NTDB_CONVERT, + NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-12-check.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + + /* This is what we really want to test: ntdb_check(). */ + failtest_suppress = false; + if (!ok1(ntdb_check(ntdb, NULL, NULL) == 0)) + goto fail; + failtest_suppress = true; + + ntdb_close(ntdb); + } + ok1(tap_log_messages == 0); + failtest_exit(exit_status()); + +fail: + failtest_suppress = true; + ntdb_close(ntdb); + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-15-append.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-15-append.c new file mode 100644 index 00000000..fb8d7c20 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-15-append.c @@ -0,0 +1,131 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "helprun-external-agent.h" + +#define MAX_SIZE 13100 +#define SIZE_STEP 131 + +static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key) +{ + ntdb_off_t off; + struct ntdb_used_record urec; + struct hash_info h; + + off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL); + if (NTDB_OFF_IS_ERR(off)) + return 0; + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); + return off; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, moves; + struct ntdb_context *ntdb; + unsigned char *buffer; + ntdb_off_t oldoff = 0, newoff; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data; + + buffer = malloc(MAX_SIZE); + for (i = 0; i < MAX_SIZE; i++) + buffer[i] = i; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7) + + 1); + + /* Using ntdb_store. */ + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + moves = 0; + for (j = 0; j < MAX_SIZE; j += SIZE_STEP) { + data.dptr = buffer; + data.dsize = j; + ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); + ok1(data.dsize == j); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + newoff = ntdb_offset(ntdb, key); + if (newoff != oldoff) + moves++; + oldoff = newoff; + } + ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0)); + /* We should increase by 50% each time... */ + ok(moves <= ilog64(j / SIZE_STEP)*2, + "Moved %u times", moves); + ntdb_close(ntdb); + } + + /* Using ntdb_append. */ + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + size_t prev_len = 0; + ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + moves = 0; + for (j = 0; j < MAX_SIZE; j += SIZE_STEP) { + data.dptr = buffer + prev_len; + data.dsize = j - prev_len; + ok1(ntdb_append(ntdb, key, data) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); + ok1(data.dsize == j); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + prev_len = data.dsize; + newoff = ntdb_offset(ntdb, key); + if (newoff != oldoff) + moves++; + oldoff = newoff; + } + ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0)); + /* We should increase by 50% each time... */ + ok(moves <= ilog64(j / SIZE_STEP)*2, + "Moved %u times", moves); + ntdb_close(ntdb); + } + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + /* Huge initial store. */ + data.dptr = buffer; + data.dsize = MAX_SIZE; + ok1(ntdb_append(ntdb, key, data) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS); + ok1(data.dsize == MAX_SIZE); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0 + && ntdb->file->num_lockrecs == 0)); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + free(buffer); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-25-hashoverload.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-25-hashoverload.c new file mode 100644 index 00000000..5a2c9cd7 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-25-hashoverload.c @@ -0,0 +1,94 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +#define OVERLOAD 100 + +static uint32_t badhash(const void *key, size_t len, uint32_t seed, void *priv) +{ + return 0; +} + +static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p) +{ + if (p) + return ntdb_delete(ntdb, key); + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; + NTDB_DATA dbuf = { (unsigned char *)&j, sizeof(j) }; + union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = badhash } }; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT, + }; + + hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (7 * OVERLOAD + 11) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ + + ntdb = ntdb_open("run-25-hashoverload.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(ntdb); + if (!ntdb) + continue; + + /* Overload a bucket. */ + for (j = 0; j < OVERLOAD; j++) { + ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); + } + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Check we can find them all. */ + for (j = 0; j < OVERLOAD; j++) { + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == sizeof(j)); + ok1(d.dptr != NULL); + ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); + free(d.dptr); + } + + /* Traverse through them. */ + ok1(ntdb_traverse(ntdb, trav, NULL) == OVERLOAD); + + /* Delete the first 99. */ + for (j = 0; j < OVERLOAD-1; j++) + ok1(ntdb_delete(ntdb, key) == 0); + + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == sizeof(j)); + ok1(d.dptr != NULL); + ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); + free(d.dptr); + + /* Traverse through them. */ + ok1(ntdb_traverse(ntdb, trav, NULL) == 1); + + /* Re-add */ + for (j = 0; j < OVERLOAD-1; j++) { + ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); + } + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Now try deleting as we go. */ + ok1(ntdb_traverse(ntdb, trav, trav) == OVERLOAD); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb_traverse(ntdb, trav, NULL) == 0); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-30-exhaust-before-expand.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-30-exhaust-before-expand.c new file mode 100644 index 00000000..e44b32c8 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-30-exhaust-before-expand.c @@ -0,0 +1,77 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static bool empty_freetable(struct ntdb_context *ntdb) +{ + struct ntdb_freetable ftab; + unsigned int i; + + /* Now, free table should be completely exhausted in zone 0 */ + if (ntdb_read_convert(ntdb, ntdb->ftable_off, &ftab, sizeof(ftab)) != 0) + abort(); + + for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) { + if (ftab.buckets[i]) + return false; + } + return true; +} + + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + NTDB_DATA k, d; + uint64_t size; + bool was_empty = false; + + k.dptr = (void *)&j; + k.dsize = sizeof(j); + + ntdb = ntdb_open("run-30-exhaust-before-expand.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* There's one empty record in initial db. */ + ok1(!empty_freetable(ntdb)); + + size = ntdb->file->map_size; + + /* Create one record to chew up most space. */ + d.dsize = size - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 32; + d.dptr = calloc(d.dsize, 1); + j = 0; + ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); + ok1(ntdb->file->map_size == size); + free(d.dptr); + + /* Now insert minimal-length records until we expand. */ + for (j = 1; ntdb->file->map_size == size; j++) { + was_empty = empty_freetable(ntdb); + if (ntdb_store(ntdb, k, k, NTDB_INSERT) != 0) + err(1, "Failed to store record %i", j); + } + + /* Would have been empty before expansion, but no longer. */ + ok1(was_empty); + ok1(!empty_freetable(ntdb)); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-35-convert.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-35-convert.c new file mode 100644 index 00000000..4899dc66 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-35-convert.c @@ -0,0 +1,65 @@ +#include "../private.h" +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include "logging.h" +#include "failtest_helper.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, messages = 0; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-35-convert.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(ntdb)) + failtest_exit(exit_status()); + + ntdb_close(ntdb); + /* We can fail in log message formatting or open. That's OK */ + if (failtest_has_failed()) { + failtest_exit(exit_status()); + } + /* If we say NTDB_CONVERT, it must be converted */ + ntdb = ntdb_open("run-35-convert.ntdb", + flags[i]|NTDB_CONVERT|MAYBE_NOSYNC, + O_RDWR, 0600, &tap_log_attr); + if (flags[i] & NTDB_CONVERT) { + if (!ntdb) + failtest_exit(exit_status()); + ok1(ntdb_get_flags(ntdb) & NTDB_CONVERT); + ntdb_close(ntdb); + } else { + if (!ok1(!ntdb && errno == EIO)) + failtest_exit(exit_status()); + ok1(tap_log_messages == ++messages); + if (!ok1(log_last && strstr(log_last, "NTDB_CONVERT"))) + failtest_exit(exit_status()); + } + + /* If don't say NTDB_CONVERT, it *may* be converted */ + ntdb = ntdb_open("run-35-convert.ntdb", + (flags[i] & ~NTDB_CONVERT)|MAYBE_NOSYNC, + O_RDWR, 0600, &tap_log_attr); + if (!ntdb) + failtest_exit(exit_status()); + ok1(ntdb_get_flags(ntdb) == (flags[i]|MAYBE_NOSYNC)); + ntdb_close(ntdb); + } + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-50-multiple-freelists.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-50-multiple-freelists.c new file mode 100644 index 00000000..4a7cf899 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-50-multiple-freelists.c @@ -0,0 +1,71 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "layout.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + ntdb_off_t off; + struct ntdb_context *ntdb; + struct ntdb_layout *layout; + NTDB_DATA key, data; + union ntdb_attribute seed; + + /* This seed value previously tickled a layout.c bug. */ + seed.base.attr = NTDB_ATTRIBUTE_SEED; + seed.seed.seed = 0xb1142bc054d035b4ULL; + seed.base.next = &tap_log_attr; + + plan_tests(11); + key = ntdb_mkdata("Hello", 5); + data = ntdb_mkdata("world", 5); + + /* Create a NTDB with three free tables. */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_free(layout, 80, 0); + /* Used record prevent coalescing. */ + ntdb_layout_add_used(layout, key, data, 6); + ntdb_layout_add_free(layout, 160, 1); + key.dsize--; + ntdb_layout_add_used(layout, key, data, 7); + ntdb_layout_add_free(layout, 320, 2); + key.dsize--; + ntdb_layout_add_used(layout, key, data, 8); + ntdb_layout_add_free(layout, 40, 0); + ntdb = ntdb_layout_get(layout, free, &seed); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0, + NTDB_USED_MAGIC); + ok1(off == layout->elem[3].base.off); + ok1(ntdb->ftable_off == layout->elem[0].base.off); + + off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0, + NTDB_USED_MAGIC); + ok1(off == layout->elem[5].base.off); + ok1(ntdb->ftable_off == layout->elem[1].base.off); + + off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0, + NTDB_USED_MAGIC); + ok1(off == layout->elem[7].base.off); + ok1(ntdb->ftable_off == layout->elem[2].base.off); + + off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0, + NTDB_USED_MAGIC); + ok1(off == layout->elem[9].base.off); + ok1(ntdb->ftable_off == layout->elem[0].base.off); + + /* Now we fail. */ + off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC); + ok1(off == 0); + + ntdb_close(ntdb); + ntdb_layout_free(layout); + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-56-open-during-transaction.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-56-open-during-transaction.c new file mode 100644 index 00000000..c28fbfd3 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-56-open-during-transaction.c @@ -0,0 +1,166 @@ +#include "../private.h" +#include +#include "lock-tracking.h" + +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +#include "ntdb-source.h" +#include "tap-interface.h" +#include +#include +#include +#include "external-agent.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static struct agent *agent; +static bool opened; +static int errors = 0; +#define TEST_DBNAME "run-56-open-during-transaction.ntdb" + +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +static bool is_same(const char *snapshot, const char *latest, off_t len) +{ + unsigned i; + + for (i = 0; i < len; i++) { + if (snapshot[i] != latest[i]) + return false; + } + return true; +} + +static bool compare_file(int fd, const char *snapshot, off_t snapshot_len) +{ + char *contents; + bool ret; + + /* over-length read serves as length check. */ + contents = malloc(snapshot_len+1); + ret = pread(fd, contents, snapshot_len+1, 0) == snapshot_len + && is_same(snapshot, contents, snapshot_len); + free(contents); + return ret; +} + +static void check_file_intact(int fd) +{ + enum agent_return ret; + struct stat st; + char *contents; + + fstat(fd, &st); + contents = malloc(st.st_size); + if (pread(fd, contents, st.st_size, 0) != st.st_size) { + diag("Read fail"); + errors++; + return; + } + + /* Ask agent to open file. */ + ret = external_agent_operation(agent, OPEN, TEST_DBNAME); + + /* It's OK to open it, but it must not have changed! */ + if (!compare_file(fd, contents, st.st_size)) { + diag("Agent changed file after opening %s", + agent_return_name(ret)); + errors++; + } + + if (ret == SUCCESS) { + ret = external_agent_operation(agent, CLOSE, NULL); + if (ret != SUCCESS) { + diag("Agent failed to close ntdb: %s", + agent_return_name(ret)); + errors++; + } + } else if (ret != WOULD_HAVE_BLOCKED) { + diag("Agent opening file gave %s", + agent_return_name(ret)); + errors++; + } + + free(contents); +} + +static void after_unlock(int fd) +{ + if (opened) + check_file_intact(fd); +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + if (opened) + check_file_intact(fd); + + return pwrite(fd, buf, count, offset); +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + if (opened) + check_file_intact(fd); + + return write(fd, buf, count); +} + +static int ftruncate_check(int fd, off_t length) +{ + if (opened) + check_file_intact(fd); + + return ftruncate(fd, length); + +} + +int main(int argc, char *argv[]) +{ + const int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + int i; + struct ntdb_context *ntdb; + NTDB_DATA key, data; + + plan_tests(sizeof(flags)/sizeof(flags[0]) * 5); + agent = prepare_external_agent(); + if (!agent) + err(1, "preparing agent"); + + unlock_callback = after_unlock; + for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { + diag("Test with %s and %s\n", + (flags[i] & NTDB_CONVERT) ? "CONVERT" : "DEFAULT", + (flags[i] & NTDB_NOMMAP) ? "no mmap" : "mmap"); + unlink(TEST_DBNAME); + ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + + opened = true; + ok1(ntdb_transaction_start(ntdb) == 0); + key = ntdb_mkdata("hi", strlen("hi")); + data = ntdb_mkdata("world", strlen("world")); + + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb_transaction_commit(ntdb) == 0); + ok(!errors, "We had %u open errors", errors); + + opened = false; + ntdb_close(ntdb); + } + + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-57-die-during-transaction.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-57-die-during-transaction.c new file mode 100644 index 00000000..9a86fca0 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-57-die-during-transaction.c @@ -0,0 +1,322 @@ +#include "../private.h" +#include +#include "lock-tracking.h" +#include "tap-interface.h" +#include +#include +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +/* There's a malloc inside transaction_setup_recovery, and valgrind complains + * when we longjmp and leak it. */ +#define MAX_ALLOCATIONS 10 +static void *allocated[MAX_ALLOCATIONS]; +static unsigned max_alloc = 0; + +static void *malloc_noleak(size_t len) +{ + unsigned int i; + + for (i = 0; i < MAX_ALLOCATIONS; i++) + if (!allocated[i]) { + allocated[i] = malloc(len); + if (i > max_alloc) { + max_alloc = i; + diag("max_alloc: %i", max_alloc); + } + return allocated[i]; + } + diag("Too many allocations!"); + abort(); +} + +static void *realloc_noleak(void *p, size_t size) +{ + unsigned int i; + + for (i = 0; i < MAX_ALLOCATIONS; i++) { + if (allocated[i] == p) { + if (i > max_alloc) { + max_alloc = i; + diag("max_alloc: %i", max_alloc); + } + return allocated[i] = realloc(p, size); + } + } + diag("Untracked realloc!"); + abort(); +} + +static void free_noleak(void *p) +{ + unsigned int i; + + /* We don't catch asprintf, so don't complain if we miss one. */ + for (i = 0; i < MAX_ALLOCATIONS; i++) { + if (allocated[i] == p) { + allocated[i] = NULL; + break; + } + } + free(p); +} + +static void free_all(void) +{ + unsigned int i; + + for (i = 0; i < MAX_ALLOCATIONS; i++) { + free(allocated[i]); + allocated[i] = NULL; + } +} + +#define malloc malloc_noleak +#define free(x) free_noleak(x) +#define realloc realloc_noleak + +#include "ntdb-source.h" + +#undef malloc +#undef free +#undef realloc +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +#include +#include +#include +#include +#include "external-agent.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static bool in_transaction; +static int target, current; +static jmp_buf jmpbuf; +#define TEST_DBNAME "run-57-die-during-transaction.ntdb" +#define KEY_STRING "helloworld" +#define DATA_STRING "Helloworld" + +static void maybe_die(int fd) +{ + if (in_transaction && current++ == target) { + longjmp(jmpbuf, 1); + } +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + ssize_t ret; + + maybe_die(fd); + + ret = pwrite(fd, buf, count, offset); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + maybe_die(fd); + + ret = write(fd, buf, count); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static int ftruncate_check(int fd, off_t length) +{ + int ret; + + maybe_die(fd); + + ret = ftruncate(fd, length); + + maybe_die(fd); + return ret; +} + +static bool test_death(enum operation op, struct agent *agent, + bool pre_create_recovery) +{ + struct ntdb_context *ntdb = NULL; + NTDB_DATA key, data; + enum agent_return ret; + int needed_recovery = 0; + + current = target = 0; + /* Big long data to force a change. */ + data = ntdb_mkdata(DATA_STRING, strlen(DATA_STRING)); + +reset: + unlink(TEST_DBNAME); + ntdb = ntdb_open(TEST_DBNAME, NTDB_NOMMAP|MAYBE_NOSYNC, + O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr); + if (!ntdb) { + diag("Failed opening NTDB: %s", strerror(errno)); + return false; + } + + if (setjmp(jmpbuf) != 0) { + /* We're partway through. Simulate our death. */ + close(ntdb->file->fd); + forget_locking(); + in_transaction = false; + + ret = external_agent_operation(agent, NEEDS_RECOVERY, ""); + if (ret == SUCCESS) + needed_recovery++; + else if (ret != FAILED) { + diag("Step %u agent NEEDS_RECOVERY = %s", current, + agent_return_name(ret)); + return false; + } + + /* Could be key, or data. */ + ret = external_agent_operation(agent, op, + KEY_STRING "=" KEY_STRING); + if (ret != SUCCESS) { + ret = external_agent_operation(agent, op, + KEY_STRING + "=" DATA_STRING); + } + if (ret != SUCCESS) { + diag("Step %u op %s failed = %s", current, + operation_name(op), + agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, NEEDS_RECOVERY, ""); + if (ret != FAILED) { + diag("Still needs recovery after step %u = %s", + current, agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, CHECK, ""); + if (ret != SUCCESS) { + diag("Step %u check failed = %s", current, + agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, CLOSE, ""); + if (ret != SUCCESS) { + diag("Step %u close failed = %s", current, + agent_return_name(ret)); + return false; + } + + /* Suppress logging as this tries to use closed fd. */ + suppress_logging = true; + suppress_lockcheck = true; + ntdb_close(ntdb); + suppress_logging = false; + suppress_lockcheck = false; + target++; + current = 0; + free_all(); + goto reset; + } + + /* Put key for agent to fetch. */ + key = ntdb_mkdata(KEY_STRING, strlen(KEY_STRING)); + + if (pre_create_recovery) { + /* Using a transaction now means we allocate the recovery + * area immediately. That makes the later transaction smaller + * and thus tickles a bug we had. */ + if (ntdb_transaction_start(ntdb) != 0) + return false; + } + if (ntdb_store(ntdb, key, key, NTDB_INSERT) != 0) + return false; + if (pre_create_recovery) { + if (ntdb_transaction_commit(ntdb) != 0) + return false; + } + + /* This is the key we insert in transaction. */ + key.dsize--; + + ret = external_agent_operation(agent, OPEN, TEST_DBNAME); + if (ret != SUCCESS) + errx(1, "Agent failed to open: %s", agent_return_name(ret)); + + ret = external_agent_operation(agent, FETCH, KEY_STRING "=" KEY_STRING); + if (ret != SUCCESS) + errx(1, "Agent failed find key: %s", agent_return_name(ret)); + + in_transaction = true; + if (ntdb_transaction_start(ntdb) != 0) + return false; + + if (ntdb_store(ntdb, key, data, NTDB_INSERT) != 0) + return false; + + if (ntdb_transaction_commit(ntdb) != 0) + return false; + + in_transaction = false; + + /* We made it! */ + diag("Completed %u runs", current); + ntdb_close(ntdb); + ret = external_agent_operation(agent, CLOSE, ""); + if (ret != SUCCESS) { + diag("Step %u close failed = %s", current, + agent_return_name(ret)); + return false; + } + + ok1(needed_recovery); + ok1(locking_errors == 0); + ok1(forget_locking() == 0); + locking_errors = 0; + return true; +} + +int main(int argc, char *argv[]) +{ + enum operation ops[] = { FETCH, STORE, TRANSACTION_START }; + struct agent *agent; + int i, j; + + plan_tests(24); + unlock_callback = maybe_die; + + external_agent_free = free_noleak; + agent = prepare_external_agent(); + if (!agent) + err(1, "preparing agent"); + + for (j = 0; j < 2; j++) { + for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { + diag("Testing %s after death (%s recovery area)", + operation_name(ops[i]), j ? "with" : "without"); + ok1(test_death(ops[i], agent, j)); + } + } + + free_external_agent(agent); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-64-bit-tdb.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-64-bit-tdb.c new file mode 100644 index 00000000..9fcc6c9b --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-64-bit-tdb.c @@ -0,0 +1,89 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +/* The largest 32-bit value which is still a multiple of NTDB_PGSIZE */ +#define ALMOST_4G ((uint32_t)-NTDB_PGSIZE) +/* And this pushes it over 32 bits */ +#define A_LITTLE_BIT (NTDB_PGSIZE * 2) + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + if (sizeof(off_t) <= 4) { + plan_tests(1); + pass("No 64 bit off_t"); + return exit_status(); + } + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 16); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + off_t old_size; + NTDB_DATA k, d; + struct hash_info h; + struct ntdb_used_record rec; + ntdb_off_t off; + + ntdb = ntdb_open("run-64-bit-ntdb.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + old_size = ntdb->file->map_size; + + /* Add a fake record to chew up the existing free space. */ + k = ntdb_mkdata("fake", 4); + d.dsize = ntdb->file->map_size + - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8; + d.dptr = malloc(d.dsize); + memset(d.dptr, 0, d.dsize); + ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); + ok1(ntdb->file->map_size == old_size); + free(d.dptr); + + /* This makes a sparse file */ + ok1(ftruncate(ntdb->file->fd, ALMOST_4G) == 0); + ok1(add_free_record(ntdb, old_size, ALMOST_4G - old_size, + NTDB_LOCK_WAIT, false) == NTDB_SUCCESS); + + /* Now add a little record past the 4G barrier. */ + ok1(ntdb_expand_file(ntdb, A_LITTLE_BIT) == NTDB_SUCCESS); + ok1(add_free_record(ntdb, ALMOST_4G, A_LITTLE_BIT, + NTDB_LOCK_WAIT, false) + == NTDB_SUCCESS); + + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + + /* Test allocation path. */ + k = ntdb_mkdata("key", 4); + d = ntdb_mkdata("data", 5); + ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + + /* Make sure it put it at end as we expected. */ + off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL); + ok1(off >= ALMOST_4G); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); + + ok1(ntdb_fetch(ntdb, k, &d) == 0); + ok1(d.dsize == 5); + ok1(strcmp((char *)d.dptr, "data") == 0); + free(d.dptr); + + ok1(ntdb_delete(ntdb, k) == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + + ntdb_close(ntdb); + } + + /* We might get messages about mmap failing, so don't test + * tap_log_messages */ + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-90-get-set-attributes.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-90-get-set-attributes.c new file mode 100644 index 00000000..aafd4613 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-90-get-set-attributes.c @@ -0,0 +1,162 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *unused) +{ + return 0; +} + +static int myunlock(int fd, int rw, off_t off, off_t len, void *unused) +{ + return 0; +} + +static uint32_t hash_fn(const void *key, size_t len, uint32_t seed, + void *priv) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + union ntdb_attribute seed_attr; + union ntdb_attribute hash_attr; + union ntdb_attribute lock_attr; + + seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; + seed_attr.base.next = &hash_attr; + seed_attr.seed.seed = 100; + + hash_attr.base.attr = NTDB_ATTRIBUTE_HASH; + hash_attr.base.next = &lock_attr; + hash_attr.hash.fn = hash_fn; + hash_attr.hash.data = &hash_attr; + + lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK; + lock_attr.base.next = &tap_log_attr; + lock_attr.flock.lock = mylock; + lock_attr.flock.unlock = myunlock; + lock_attr.flock.data = &lock_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 50); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + union ntdb_attribute attr; + + /* First open with no attributes. */ + ntdb = ntdb_open("run-90-get-set-attributes.ntdb", + flags[i] |MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, NULL); + ok1(ntdb); + + /* Get log on no attributes will fail */ + attr.base.attr = NTDB_ATTRIBUTE_LOG; + ok1(ntdb_get_attribute(ntdb, &attr) == NTDB_ERR_NOEXIST); + /* These always work. */ + attr.base.attr = NTDB_ATTRIBUTE_HASH; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH); + ok1(attr.hash.fn == ntdb_jenkins_hash); + attr.base.attr = NTDB_ATTRIBUTE_FLOCK; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK); + ok1(attr.flock.lock == ntdb_fcntl_lock); + ok1(attr.flock.unlock == ntdb_fcntl_unlock); + attr.base.attr = NTDB_ATTRIBUTE_SEED; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED); + /* This is possible, just astronomically unlikely. */ + ok1(attr.seed.seed != 0); + + /* Unset attributes. */ + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG); + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); + + /* Set them. */ + ok1(ntdb_set_attribute(ntdb, &tap_log_attr) == 0); + ok1(ntdb_set_attribute(ntdb, &lock_attr) == 0); + /* These should fail. */ + ok1(ntdb_set_attribute(ntdb, &seed_attr) == NTDB_ERR_EINVAL); + ok1(tap_log_messages == 1); + ok1(ntdb_set_attribute(ntdb, &hash_attr) == NTDB_ERR_EINVAL); + ok1(tap_log_messages == 2); + tap_log_messages = 0; + + /* Getting them should work as expected. */ + attr.base.attr = NTDB_ATTRIBUTE_LOG; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG); + ok1(attr.log.fn == tap_log_attr.log.fn); + ok1(attr.log.data == tap_log_attr.log.data); + + attr.base.attr = NTDB_ATTRIBUTE_FLOCK; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK); + ok1(attr.flock.lock == mylock); + ok1(attr.flock.unlock == myunlock); + ok1(attr.flock.data == &lock_attr); + + /* Unset them again. */ + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); + ok1(tap_log_messages == 0); + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG); + ok1(tap_log_messages == 0); + + ntdb_close(ntdb); + ok1(tap_log_messages == 0); + + /* Now open with all attributes. */ + ntdb = ntdb_open("run-90-get-set-attributes.ntdb", + flags[i] | MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, + &seed_attr); + + ok1(ntdb); + + /* Get will succeed */ + attr.base.attr = NTDB_ATTRIBUTE_LOG; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG); + ok1(attr.log.fn == tap_log_attr.log.fn); + ok1(attr.log.data == tap_log_attr.log.data); + + attr.base.attr = NTDB_ATTRIBUTE_HASH; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH); + ok1(attr.hash.fn == hash_fn); + ok1(attr.hash.data == &hash_attr); + + attr.base.attr = NTDB_ATTRIBUTE_FLOCK; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK); + ok1(attr.flock.lock == mylock); + ok1(attr.flock.unlock == myunlock); + ok1(attr.flock.data == &lock_attr); + + attr.base.attr = NTDB_ATTRIBUTE_SEED; + ok1(ntdb_get_attribute(ntdb, &attr) == 0); + ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED); + ok1(attr.seed.seed == seed_attr.seed.seed); + + /* Unset attributes. */ + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_HASH); + ok1(tap_log_messages == 1); + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_SEED); + ok1(tap_log_messages == 2); + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK); + ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG); + ok1(tap_log_messages == 2); + tap_log_messages = 0; + + ntdb_close(ntdb); + + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-capabilities.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-capabilities.c new file mode 100644 index 00000000..dc2df2ab --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-capabilities.c @@ -0,0 +1,284 @@ +#include +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "layout.h" +#include "failtest_helper.h" +#include +#include "helprun-external-agent.h" + +static size_t len_of(bool breaks_check, bool breaks_write, bool breaks_open) +{ + size_t len = 0; + if (breaks_check) + len += 8; + if (breaks_write) + len += 16; + if (breaks_open) + len += 32; + return len; +} + +/* Creates a NTDB with various capabilities. */ +static void create_ntdb(const char *name, + unsigned int cap, + bool breaks_check, + bool breaks_write, + bool breaks_open, ...) +{ + NTDB_DATA key, data; + va_list ap; + struct ntdb_layout *layout; + struct ntdb_context *ntdb; + int fd, clen; + union ntdb_attribute seed_attr; + + /* Force a seed which doesn't allow records to clash! */ + seed_attr.base.attr = NTDB_ATTRIBUTE_SEED; + seed_attr.base.next = &tap_log_attr; + seed_attr.seed.seed = 0; + + key = ntdb_mkdata("Hello", 5); + data = ntdb_mkdata("world", 5); + + /* Create a NTDB with some data, and some capabilities */ + layout = new_ntdb_layout(); + ntdb_layout_add_freetable(layout); + ntdb_layout_add_used(layout, key, data, 6); + clen = len_of(breaks_check, breaks_write, breaks_open); + ntdb_layout_add_free(layout, 15496 - clen, 0); + ntdb_layout_add_capability(layout, cap, + breaks_write, breaks_check, breaks_open, + clen); + + va_start(ap, breaks_open); + while ((cap = va_arg(ap, int)) != 0) { + breaks_check = va_arg(ap, int); + breaks_write = va_arg(ap, int); + breaks_open = va_arg(ap, int); + + key.dsize--; + ntdb_layout_add_used(layout, key, data, 11 - key.dsize); + clen = len_of(breaks_check, breaks_write, breaks_open); + ntdb_layout_add_free(layout, 16304 - clen, 0); + ntdb_layout_add_capability(layout, cap, + breaks_write, breaks_check, + breaks_open, clen); + } + va_end(ap); + + /* We open-code this, because we need to use the failtest write. */ + ntdb = ntdb_layout_get(layout, failtest_free, &seed_attr); + + fd = open(name, O_RDWR|O_TRUNC|O_CREAT, 0600); + if (fd < 0) + err(1, "opening %s for writing", name); + if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size) + != ntdb->file->map_size) + err(1, "writing %s", name); + close(fd); + ntdb_close(ntdb); + ntdb_layout_free(layout); +} + +/* Note all the "goto out" early exits: they're to shorten failtest time. */ +int main(int argc, char *argv[]) +{ + struct ntdb_context *ntdb; + char *summary; + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + plan_tests(60); + + failtest_suppress = true; + /* Capability says you can ignore it? */ + create_ntdb("run-capabilities.ntdb", 1, false, false, false, 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + if (!ok1(ntdb)) + goto out; + ok1(tap_log_messages == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + ok1(tap_log_messages == 0); + ntdb_close(ntdb); + + /* Two capabilitues say you can ignore them? */ + create_ntdb("run-capabilities.ntdb", + 1, false, false, false, + 2, false, false, false, 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + if (!ok1(ntdb)) + goto out; + ok1(tap_log_messages == 0); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + ok1(tap_log_messages == 0); + ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); + ok1(strstr(summary, "Capability 1\n")); + free(summary); + ntdb_close(ntdb); + + /* Capability says you can't check. */ + create_ntdb("run-capabilities.ntdb", + 1, false, false, false, + 2, true, false, false, 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + if (!ok1(ntdb)) + goto out; + ok1(tap_log_messages == 0); + ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + /* We expect a warning! */ + ok1(tap_log_messages == 1); + ok1(strstr(log_last, "capabilit")); + ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); + ok1(strstr(summary, "Capability 1\n")); + ok1(strstr(summary, "Capability 2 (uncheckable)\n")); + free(summary); + ntdb_close(ntdb); + + /* Capability says you can't write. */ + create_ntdb("run-capabilities.ntdb", + 1, false, false, false, + 2, false, true, false, 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + /* We expect a message. */ + ok1(!ntdb); + if (!ok1(tap_log_messages == 2)) + goto out; + if (!ok1(strstr(log_last, "unknown"))) + goto out; + ok1(strstr(log_last, "write")); + + /* We can open it read-only though! */ + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0, + &tap_log_attr); + failtest_suppress = true; + if (!ok1(ntdb)) + goto out; + ok1(tap_log_messages == 2); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + ok1(tap_log_messages == 2); + ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); + ok1(strstr(summary, "Capability 1\n")); + ok1(strstr(summary, "Capability 2 (read-only)\n")); + free(summary); + ntdb_close(ntdb); + + /* Capability says you can't open. */ + create_ntdb("run-capabilities.ntdb", + 1, false, false, false, + 2, false, false, true, 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + /* We expect a message. */ + ok1(!ntdb); + if (!ok1(tap_log_messages == 3)) + goto out; + if (!ok1(strstr(log_last, "unknown"))) + goto out; + + /* Combine capabilities correctly. */ + create_ntdb("run-capabilities.ntdb", + 1, false, false, false, + 2, true, false, false, + 3, false, true, false, 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + /* We expect a message. */ + ok1(!ntdb); + if (!ok1(tap_log_messages == 4)) + goto out; + if (!ok1(strstr(log_last, "unknown"))) + goto out; + ok1(strstr(log_last, "write")); + + /* We can open it read-only though! */ + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0, + &tap_log_attr); + failtest_suppress = true; + if (!ok1(ntdb)) + goto out; + ok1(tap_log_messages == 4); + ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + /* We expect a warning! */ + ok1(tap_log_messages == 5); + ok1(strstr(log_last, "unknown")); + ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); + ok1(strstr(summary, "Capability 1\n")); + ok1(strstr(summary, "Capability 2 (uncheckable)\n")); + ok1(strstr(summary, "Capability 3 (read-only)\n")); + free(summary); + ntdb_close(ntdb); + + /* Two capability flags in one. */ + create_ntdb("run-capabilities.ntdb", + 1, false, false, false, + 2, true, true, false, + 0); + + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0, + &tap_log_attr); + failtest_suppress = true; + /* We expect a message. */ + ok1(!ntdb); + if (!ok1(tap_log_messages == 6)) + goto out; + if (!ok1(strstr(log_last, "unknown"))) + goto out; + ok1(strstr(log_last, "write")); + + /* We can open it read-only though! */ + failtest_suppress = false; + ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0, + &tap_log_attr); + failtest_suppress = true; + if (!ok1(ntdb)) + goto out; + ok1(tap_log_messages == 6); + ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK); + ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); + /* We expect a warning! */ + ok1(tap_log_messages == 7); + ok1(strstr(log_last, "unknown")); + ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS); + ok1(strstr(summary, "Capability 1\n")); + ok1(strstr(summary, "Capability 2 (uncheckable,read-only)\n")); + free(summary); + ntdb_close(ntdb); + +out: + failtest_exit(exit_status()); + + /* + * We will never reach this but the compiler complains if we do not + * return in this function. + */ + return EFAULT; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-expand-in-transaction.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-expand-in-transaction.c new file mode 100644 index 00000000..20a28ee6 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-expand-in-transaction.c @@ -0,0 +1,48 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = ntdb_mkdata("key", 3); + NTDB_DATA data = ntdb_mkdata("data", 4); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + size_t size; + NTDB_DATA k, d; + ntdb = ntdb_open("run-expand-in-transaction.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + size = ntdb->file->map_size; + /* Add a fake record to chew up the existing free space. */ + k = ntdb_mkdata("fake", 4); + d.dsize = ntdb->file->map_size + - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8; + d.dptr = malloc(d.dsize); + memset(d.dptr, 0, d.dsize); + ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); + ok1(ntdb->file->map_size == size); + free(d.dptr); + ok1(ntdb_transaction_start(ntdb) == 0); + ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0); + ok1(ntdb->file->map_size > size); + ok1(ntdb_transaction_commit(ntdb) == 0); + ok1(ntdb->file->map_size > size); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-features.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-features.c new file mode 100644 index 00000000..631ce876 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-features.c @@ -0,0 +1,63 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct ntdb_context *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + NTDB_DATA key = { (unsigned char *)&j, sizeof(j) }; + NTDB_DATA data = { (unsigned char *)&j, sizeof(j) }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + uint64_t features; + ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + /* Put some stuff in there. */ + for (j = 0; j < 100; j++) { + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + fail("Storing in ntdb"); + } + + /* Mess with features fields in hdr. */ + features = (~NTDB_FEATURE_MASK ^ 1); + ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header, + features_used), + &features, sizeof(features)) == 0); + ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header, + features_offered), + &features, sizeof(features)) == 0); + ntdb_close(ntdb); + + ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR, 0, &tap_log_attr); + ok1(ntdb); + if (!ntdb) + continue; + + /* Should not have changed features offered. */ + ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header, + features_offered), + &features, sizeof(features)) == 0); + ok1(features == (~NTDB_FEATURE_MASK ^ 1)); + + /* Should have cleared unknown bits in features_used. */ + ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header, + features_used), + &features, sizeof(features)) == 0); + ok1(features == (1 & NTDB_FEATURE_MASK)); + + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-lockall.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-lockall.c new file mode 100644 index 00000000..a4cd1e47 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-lockall.c @@ -0,0 +1,75 @@ +#include "../private.h" +#include +#include "lock-tracking.h" + +#define fcntl fcntl_with_lockcheck +#include "ntdb-source.h" + +#include "tap-interface.h" +#include +#include +#include +#include "external-agent.h" +#include "logging.h" +#include "helprun-external-agent.h" + +#define TEST_DBNAME "run-lockall.ntdb" +#define KEY_STR "key" + +#undef fcntl + +int main(int argc, char *argv[]) +{ + struct agent *agent; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + int i; + + plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1); + agent = prepare_external_agent(); + if (!agent) + err(1, "preparing agent"); + + for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { + enum agent_return ret; + struct ntdb_context *ntdb; + + ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(ntdb); + + ret = external_agent_operation(agent, OPEN, TEST_DBNAME); + ok1(ret == SUCCESS); + + ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); + ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) + == WOULD_HAVE_BLOCKED); + ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) + == WOULD_HAVE_BLOCKED); + /* Test nesting. */ + ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS); + ntdb_unlockall(ntdb); + ntdb_unlockall(ntdb); + + ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) + == SUCCESS); + + ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS); + ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) + == WOULD_HAVE_BLOCKED); + ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR) + == SUCCESS); + ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS); + ntdb_unlockall_read(ntdb); + ntdb_unlockall_read(ntdb); + + ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR) + == SUCCESS); + ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS); + ntdb_close(ntdb); + } + + free_external_agent(agent); + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-remap-in-read_traverse.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-remap-in-read_traverse.c new file mode 100644 index 00000000..6fe537d4 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-remap-in-read_traverse.c @@ -0,0 +1,58 @@ +#include "ntdb-source.h" +/* We had a bug where we marked the ntdb read-only for a ntdb_traverse_read. + * If we then expanded the ntdb, we would remap read-only, and later SEGV. */ +#include "tap-interface.h" +#include "external-agent.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static bool file_larger(int fd, ntdb_len_t size) +{ + struct stat st; + + fstat(fd, &st); + return st.st_size != size; +} + +static unsigned add_records_to_grow(struct agent *agent, int fd, ntdb_len_t size) +{ + unsigned int i; + + for (i = 0; !file_larger(fd, size); i++) { + char data[50]; + sprintf(data, "%i=%i", i, i); + if (external_agent_operation(agent, STORE, data) != SUCCESS) + return 0; + } + diag("Added %u records to grow file", i); + return i; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct agent *agent; + struct ntdb_context *ntdb; + NTDB_DATA d = ntdb_mkdata("hello", 5); + const char filename[] = "run-remap-in-read_traverse.ntdb"; + + plan_tests(4); + + agent = prepare_external_agent(); + + ntdb = ntdb_open(filename, MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + + ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS); + i = add_records_to_grow(agent, ntdb->file->fd, ntdb->file->map_size); + + /* Do a traverse. */ + ok1(ntdb_traverse(ntdb, NULL, NULL) == i); + + /* Now store something! */ + ok1(ntdb_store(ntdb, d, d, NTDB_INSERT) == 0); + ok1(tap_log_messages == 0); + ntdb_close(ntdb); + free_external_agent(agent); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-seed.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-seed.c new file mode 100644 index 00000000..5ca6678a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-seed.c @@ -0,0 +1,62 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static int log_count = 0; + +/* Normally we get a log when setting random seed. */ +static void my_log_fn(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, void *priv) +{ + log_count++; +} + +static union ntdb_attribute log_attr = { + .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG }, + .fn = my_log_fn } +}; + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct ntdb_context *ntdb; + union ntdb_attribute attr; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + + attr.seed.base.attr = NTDB_ATTRIBUTE_SEED; + attr.seed.base.next = &log_attr; + attr.seed.seed = 42; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct ntdb_header hdr; + int fd; + ntdb = ntdb_open("run-seed.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &attr); + ok1(ntdb); + if (!ntdb) + continue; + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(ntdb->hash_seed == 42); + ok1(log_count == 0); + ntdb_close(ntdb); + + if (flags[i] & NTDB_INTERNAL) + continue; + + fd = open("run-seed.ntdb", O_RDONLY); + ok1(fd >= 0); + ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)); + if (flags[i] & NTDB_CONVERT) + ok1(bswap_64(hdr.hash_seed) == 42); + else + ok1(hdr.hash_seed == 42); + close(fd); + } + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-tdb_errorstr.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-tdb_errorstr.c new file mode 100644 index 00000000..499eb42e --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-tdb_errorstr.c @@ -0,0 +1,53 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "helprun-external-agent.h" + +int main(int argc, char *argv[]) +{ + enum NTDB_ERROR e; + plan_tests(NTDB_ERR_RDONLY*-1 + 2); + + for (e = NTDB_SUCCESS; e >= NTDB_ERR_RDONLY; e--) { + switch (e) { + case NTDB_SUCCESS: + ok1(!strcmp(ntdb_errorstr(e), + "Success")); + break; + case NTDB_ERR_IO: + ok1(!strcmp(ntdb_errorstr(e), + "IO Error")); + break; + case NTDB_ERR_LOCK: + ok1(!strcmp(ntdb_errorstr(e), + "Locking error")); + break; + case NTDB_ERR_OOM: + ok1(!strcmp(ntdb_errorstr(e), + "Out of memory")); + break; + case NTDB_ERR_EXISTS: + ok1(!strcmp(ntdb_errorstr(e), + "Record exists")); + break; + case NTDB_ERR_EINVAL: + ok1(!strcmp(ntdb_errorstr(e), + "Invalid parameter")); + break; + case NTDB_ERR_NOEXIST: + ok1(!strcmp(ntdb_errorstr(e), + "Record does not exist")); + break; + case NTDB_ERR_RDONLY: + ok1(!strcmp(ntdb_errorstr(e), + "write not permitted")); + break; + case NTDB_ERR_CORRUPT: + ok1(!strcmp(ntdb_errorstr(e), + "Corrupt database")); + break; + } + } + ok1(!strcmp(ntdb_errorstr(e), "Invalid error code")); + + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-tdb_foreach.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-tdb_foreach.c new file mode 100644 index 00000000..532474b9 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-tdb_foreach.c @@ -0,0 +1,91 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +static int drop_count(struct ntdb_context *ntdb, unsigned int *count) +{ + if (--(*count) == 0) + return 1; + return 0; +} + +static int set_found(struct ntdb_context *ntdb, bool found[3]) +{ + unsigned int idx; + + if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach0.ntdb") == 0) + idx = 0; + else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach1.ntdb") == 0) + idx = 1; + else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach2.ntdb") == 0) + idx = 2; + else + abort(); + + if (found[idx]) + abort(); + found[idx] = true; + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, count; + bool found[3]; + struct ntdb_context *ntdb0, *ntdb1, *ntdb; + int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 8); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb0 = ntdb_open("run-ntdb_foreach0.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ntdb = ntdb_open("run-ntdb_foreach2.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + + memset(found, 0, sizeof(found)); + ntdb_foreach(set_found, found); + ok1(found[0] && found[1] && found[2]); + + /* Test premature iteration termination */ + count = 1; + ntdb_foreach(drop_count, &count); + ok1(count == 0); + + ntdb_close(ntdb1); + memset(found, 0, sizeof(found)); + ntdb_foreach(set_found, found); + ok1(found[0] && !found[1] && found[2]); + + ntdb_close(ntdb); + memset(found, 0, sizeof(found)); + ntdb_foreach(set_found, found); + ok1(found[0] && !found[1] && !found[2]); + + ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb", + flags[i]|MAYBE_NOSYNC, + O_RDWR, 0600, &tap_log_attr); + memset(found, 0, sizeof(found)); + ntdb_foreach(set_found, found); + ok1(found[0] && found[1] && !found[2]); + + ntdb_close(ntdb0); + memset(found, 0, sizeof(found)); + ntdb_foreach(set_found, found); + ok1(!found[0] && found[1] && !found[2]); + + ntdb_close(ntdb1); + memset(found, 0, sizeof(found)); + ntdb_foreach(set_found, found); + ok1(!found[0] && !found[1] && !found[2]); + ok1(tap_log_messages == 0); + } + + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/run-traverse.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-traverse.c new file mode 100644 index 00000000..29b517db --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/run-traverse.c @@ -0,0 +1,204 @@ +#include "ntdb-source.h" +#include "tap-interface.h" +#include "logging.h" +#include "helprun-external-agent.h" + +#define NUM_RECORDS 1000 + +/* We use the same seed which we saw a failure on. */ +static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + *(uint64_t *)p); +} + +static bool store_records(struct ntdb_context *ntdb) +{ + int i; + NTDB_DATA key = { (unsigned char *)&i, sizeof(i) }; + NTDB_DATA data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0) + return false; + return true; +} + +struct trav_data { + unsigned int calls, call_limit; + int low, high; + bool mismatch; + bool delete; + enum NTDB_ERROR delete_error; +}; + +static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, + struct trav_data *td) +{ + int val; + + td->calls++; + if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val) + || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) { + td->mismatch = true; + return -1; + } + memcpy(&val, dbuf.dptr, dbuf.dsize); + if (val < td->low) + td->low = val; + if (val > td->high) + td->high = val; + + if (td->delete) { + td->delete_error = ntdb_delete(ntdb, key); + if (td->delete_error != NTDB_SUCCESS) { + return -1; + } + } + + if (td->calls == td->call_limit) + return 1; + return 0; +} + +struct trav_grow_data { + unsigned int calls; + unsigned int num_large; + bool mismatch; + enum NTDB_ERROR error; +}; + +static int trav_grow(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, + struct trav_grow_data *tgd) +{ + int val; + unsigned char buffer[128] = { 0 }; + + tgd->calls++; + if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val) + || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) { + tgd->mismatch = true; + return -1; + } + + if (dbuf.dsize > sizeof(val)) + /* We must have seen this before! */ + tgd->num_large++; + + /* Make a big difference to the database. */ + dbuf.dptr = buffer; + dbuf.dsize = sizeof(buffer); + tgd->error = ntdb_append(ntdb, key, dbuf); + if (tgd->error != NTDB_SUCCESS) { + return -1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + int num; + struct trav_data td; + struct trav_grow_data tgd; + struct ntdb_context *ntdb; + uint64_t seed = 16014841315512641303ULL; + int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, + NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, + NTDB_NOMMAP|NTDB_CONVERT }; + union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, + .fn = fixedhash, + .data = &seed } }; + + hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + ntdb = ntdb_open("run-traverse.ntdb", flags[i]|MAYBE_NOSYNC, + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(ntdb); + if (!ntdb) + continue; + + ok1(ntdb_traverse(ntdb, NULL, NULL) == 0); + + ok1(store_records(ntdb)); + num = ntdb_traverse(ntdb, NULL, NULL); + ok1(num == NUM_RECORDS); + + /* Full traverse. */ + td.calls = 0; + td.call_limit = UINT_MAX; + td.low = INT_MAX; + td.high = INT_MIN; + td.mismatch = false; + td.delete = false; + + num = ntdb_traverse(ntdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(!td.mismatch); + ok1(td.calls == NUM_RECORDS); + ok1(td.low == 0); + ok1(td.high == NUM_RECORDS-1); + + /* Short traverse. */ + td.calls = 0; + td.call_limit = NUM_RECORDS / 2; + td.low = INT_MAX; + td.high = INT_MIN; + td.mismatch = false; + td.delete = false; + + num = ntdb_traverse(ntdb, trav, &td); + ok1(num == NUM_RECORDS / 2); + ok1(!td.mismatch); + ok1(td.calls == NUM_RECORDS / 2); + ok1(td.low <= NUM_RECORDS / 2); + ok1(td.high > NUM_RECORDS / 2); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(tap_log_messages == 0); + + /* Deleting traverse (delete everything). */ + td.calls = 0; + td.call_limit = UINT_MAX; + td.low = INT_MAX; + td.high = INT_MIN; + td.mismatch = false; + td.delete = true; + td.delete_error = NTDB_SUCCESS; + num = ntdb_traverse(ntdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(td.delete_error == NTDB_SUCCESS); + ok1(!td.mismatch); + ok1(td.calls == NUM_RECORDS); + ok1(td.low == 0); + ok1(td.high == NUM_RECORDS - 1); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Now it's empty! */ + ok1(ntdb_traverse(ntdb, NULL, NULL) == 0); + + /* Re-add. */ + ok1(store_records(ntdb)); + ok1(ntdb_traverse(ntdb, NULL, NULL) == NUM_RECORDS); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + + /* Grow. This will cause us to be reshuffled. */ + tgd.calls = 0; + tgd.num_large = 0; + tgd.mismatch = false; + tgd.error = NTDB_SUCCESS; + ok1(ntdb_traverse(ntdb, trav_grow, &tgd) > 1); + ok1(tgd.error == 0); + ok1(!tgd.mismatch); + ok1(ntdb_check(ntdb, NULL, NULL) == 0); + ok1(tgd.num_large < tgd.calls); + diag("growing db: %u calls, %u repeats", + tgd.calls, tgd.num_large); + + ntdb_close(ntdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/tap-interface.c b/junkcode/rusty@rustcorp.com.au-ntdb/test/tap-interface.c new file mode 100644 index 00000000..077ec2cd --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/tap-interface.c @@ -0,0 +1,3 @@ +#include "tap-interface.h" + +unsigned tap_ok_count, tap_ok_target = -1U; diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/test/tap-interface.h b/junkcode/rusty@rustcorp.com.au-ntdb/test/tap-interface.h new file mode 100644 index 00000000..5363c32b --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/test/tap-interface.h @@ -0,0 +1,42 @@ +/* + Unix SMB/CIFS implementation. + Simplistic implementation of tap interface. + + Copyright (C) Rusty Russell 2012 + + ** NOTE! The following LGPL license applies to the talloc + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include +#include +#include "no-fsync.h" + +#ifndef __location__ +#define __TAP_STRING_LINE1__(s) #s +#define __TAP_STRING_LINE2__(s) __TAP_STRING_LINE1__(s) +#define __TAP_STRING_LINE3__ __TAP_STRING_LINE2__(__LINE__) +#define __location__ __FILE__ ":" __TAP_STRING_LINE3__ +#endif + +extern unsigned tap_ok_count, tap_ok_target; +#define plan_tests(num) do { tap_ok_target = (num); } while(0) +#define ok(e, ...) ((e) ? (printf("."), tap_ok_count++, true) : (warnx(__VA_ARGS__), false)) +#define ok1(e) ok((e), "%s:%s", __location__, #e) +#define pass(...) (printf("."), tap_ok_count++) +#define fail(...) warnx(__VA_ARGS__) +#define diag(...) do { printf(__VA_ARGS__); printf("\n"); } while(0) +#define exit_status() (tap_ok_count == tap_ok_target ? 0 : 1) diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/Makefile b/junkcode/rusty@rustcorp.com.au-ntdb/tools/Makefile new file mode 100644 index 00000000..087c256d --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/Makefile @@ -0,0 +1,16 @@ +OBJS:=../../ntdb.o ../../hash.o ../../tally.o +CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg +LDFLAGS:=-L../../.. + +default: ntdbtorture ntdbtool ntdbdump ntdbrestore mkntdb speed growtdb-bench + +ntdbdump: ntdbdump.c $(OBJS) +ntdbrestore: ntdbrestore.c $(OBJS) +ntdbtorture: ntdbtorture.c $(OBJS) +ntdbtool: ntdbtool.c $(OBJS) +mkntdb: mkntdb.c $(OBJS) +speed: speed.c $(OBJS) +growtdb-bench: growtdb-bench.c $(OBJS) + +clean: + rm -f ntdbtorture ntdbdump ntdbrestore ntdbtool mkntdb speed growtdb-bench diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/growtdb-bench.c b/junkcode/rusty@rustcorp.com.au-ntdb/tools/growtdb-bench.c new file mode 100644 index 00000000..28c1de83 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/growtdb-bench.c @@ -0,0 +1,127 @@ +#include "ntdb.h" +#include +#include +#include +#include +#include + +static void logfn(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) +{ + fprintf(stderr, "ntdb:%s:%s:%s\n", + ntdb_name(ntdb), ntdb_errorstr(ecode), message); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, users, groups; + NTDB_DATA idxkey, idxdata; + NTDB_DATA k, d, gk; + char cmd[100]; + struct ntdb_context *ntdb; + enum NTDB_ERROR ecode; + union ntdb_attribute log; + + if (argc != 3) { + printf("Usage: growtdb-bench \n"); + exit(1); + } + users = atoi(argv[1]); + groups = atoi(argv[2]); + + sprintf(cmd, "cat /proc/%i/statm", getpid()); + + log.base.attr = NTDB_ATTRIBUTE_LOG; + log.base.next = NULL; + log.log.fn = logfn; + + ntdb = ntdb_open("/tmp/growtdb.ntdb", NTDB_DEFAULT, + O_RDWR|O_CREAT|O_TRUNC, 0600, &log); + + idxkey.dptr = (unsigned char *)"User index"; + idxkey.dsize = strlen("User index"); + idxdata.dsize = 51; + idxdata.dptr = calloc(idxdata.dsize, 1); + if (idxdata.dptr == NULL) { + fprintf(stderr, "Unable to allocate memory for idxdata.dptr\n"); + return -1; + } + + /* Create users. */ + k.dsize = 48; + k.dptr = calloc(k.dsize, 1); + if (k.dptr == NULL) { + fprintf(stderr, "Unable to allocate memory for k.dptr\n"); + return -1; + } + d.dsize = 64; + d.dptr = calloc(d.dsize, 1); + if (d.dptr == NULL) { + fprintf(stderr, "Unable to allocate memory for d.dptr\n"); + return -1; + } + + ntdb_transaction_start(ntdb); + for (i = 0; i < users; i++) { + memcpy(k.dptr, &i, sizeof(i)); + ecode = ntdb_store(ntdb, k, d, NTDB_INSERT); + if (ecode != NTDB_SUCCESS) + errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode)); + + /* This simulates a growing index record. */ + ecode = ntdb_append(ntdb, idxkey, idxdata); + if (ecode != NTDB_SUCCESS) + errx(1, "ntdb append failed: %s", ntdb_errorstr(ecode)); + } + if ((ecode = ntdb_transaction_commit(ntdb)) != 0) + errx(1, "ntdb commit1 failed: %s", ntdb_errorstr(ecode)); + + if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0) + errx(1, "ntdb_check failed after initial insert!"); + + system(cmd); + + /* Now put them all in groups: add 32 bytes to each record for + * a group. */ + gk.dsize = 48; + gk.dptr = calloc(k.dsize, 1); + if (gk.dptr == NULL) { + fprintf(stderr, "Unable to allocate memory for gk.dptr\n"); + return -1; + } + gk.dptr[gk.dsize-1] = 1; + + d.dsize = 32; + for (i = 0; i < groups; i++) { + ntdb_transaction_start(ntdb); + /* Create the "group". */ + memcpy(gk.dptr, &i, sizeof(i)); + ecode = ntdb_store(ntdb, gk, d, NTDB_INSERT); + if (ecode != NTDB_SUCCESS) + errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode)); + + /* Now populate it. */ + for (j = 0; j < users; j++) { + /* Append to the user. */ + memcpy(k.dptr, &j, sizeof(j)); + if ((ecode = ntdb_append(ntdb, k, d)) != 0) + errx(1, "ntdb append failed: %s", + ntdb_errorstr(ecode)); + + /* Append to the group. */ + if ((ecode = ntdb_append(ntdb, gk, d)) != 0) + errx(1, "ntdb append failed: %s", + ntdb_errorstr(ecode)); + } + if ((ecode = ntdb_transaction_commit(ntdb)) != 0) + errx(1, "ntdb commit2 failed: %s", ntdb_errorstr(ecode)); + if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0) + errx(1, "ntdb_check failed after iteration %i!", i); + system(cmd); + } + + return 0; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/mkntdb.c b/junkcode/rusty@rustcorp.com.au-ntdb/tools/mkntdb.c new file mode 100644 index 00000000..e728987a --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/mkntdb.c @@ -0,0 +1,29 @@ +#include "ntdb.h" +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + unsigned int i, num_recs; + struct ntdb_context *ntdb; + + if (argc != 3 || (num_recs = atoi(argv[2])) == 0) + errx(1, "Usage: mktdb "); + + ntdb = ntdb_open(argv[1], NTDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL); + if (!ntdb) + err(1, "Opening %s", argv[1]); + + for (i = 0; i < num_recs; i++) { + NTDB_DATA d; + + d.dptr = (void *)&i; + d.dsize = sizeof(i); + if (ntdb_store(ntdb, d, d, NTDB_INSERT) != 0) + err(1, "Failed to store record %i", i); + } + printf("Done\n"); + return 0; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbbackup.c b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbbackup.c new file mode 100644 index 00000000..c632f0ed --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbbackup.c @@ -0,0 +1,340 @@ +/* + Unix SMB/CIFS implementation. + low level ntdb backup and restore utility + Copyright (C) Andrew Tridgell 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + + This program is meant for backup/restore of ntdb databases. Typical usage would be: + tdbbackup *.ntdb + when Samba shuts down cleanly, which will make a backup of all the local databases + to *.bak files. Then on Samba startup you would use: + tdbbackup -v *.ntdb + and this will check the databases for corruption and if corruption is detected then + the backup will be restored. + + You may also like to do a backup on a regular basis while Samba is + running, perhaps using cron. + + The reason this program is needed is to cope with power failures + while Samba is running. A power failure could lead to database + corruption and Samba will then not start correctly. + + Note that many of the databases in Samba are transient and thus + don't need to be backed up, so you can optimise the above a little + by only running the backup on the critical databases. + + */ + +#include "config.h" +#include "ntdb.h" +#include "private.h" + +#ifdef HAVE_GETOPT_H +#include +#endif + +static int failed; + +static void ntdb_log(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) +{ + fprintf(stderr, "%s:%s\n", ntdb_errorstr(ecode), message); +} + +static char *add_suffix(const char *name, const char *suffix) +{ + char *ret; + int len = strlen(name) + strlen(suffix) + 1; + ret = (char *)malloc(len); + if (!ret) { + fprintf(stderr,"Out of memory!\n"); + exit(1); + } + snprintf(ret, len, "%s%s", name, suffix); + return ret; +} + +static int copy_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + struct ntdb_context *ntdb_new = (struct ntdb_context *)state; + enum NTDB_ERROR err; + + err = ntdb_store(ntdb_new, key, dbuf, NTDB_INSERT); + if (err) { + fprintf(stderr,"Failed to insert into %s: %s\n", + ntdb_name(ntdb_new), ntdb_errorstr(err)); + failed = 1; + return 1; + } + return 0; +} + + +static int test_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + return 0; +} + +/* + carefully backup a ntdb, validating the contents and + only doing the backup if its OK + this function is also used for restore +*/ +static int backup_ntdb(const char *old_name, const char *new_name) +{ + struct ntdb_context *ntdb; + struct ntdb_context *ntdb_new; + char *tmp_name; + struct stat st; + int count1, count2; + enum NTDB_ERROR err; + union ntdb_attribute log_attr; + + tmp_name = add_suffix(new_name, ".tmp"); + + /* stat the old ntdb to find its permissions */ + if (stat(old_name, &st) != 0) { + perror(old_name); + free(tmp_name); + return 1; + } + + log_attr.base.attr = NTDB_ATTRIBUTE_LOG; + log_attr.base.next = NULL; + log_attr.log.fn = ntdb_log; + + /* open the old ntdb */ + ntdb = ntdb_open(old_name, NTDB_DEFAULT, O_RDWR, 0, &log_attr); + if (!ntdb) { + printf("Failed to open %s\n", old_name); + free(tmp_name); + return 1; + } + + unlink(tmp_name); + ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, + O_RDWR|O_CREAT|O_EXCL, st.st_mode & 0777, + &log_attr); + if (!ntdb_new) { + perror(tmp_name); + free(tmp_name); + return 1; + } + + err = ntdb_transaction_start(ntdb); + if (err) { + fprintf(stderr, "Failed to start transaction on old ntdb: %s\n", + ntdb_errorstr(err)); + ntdb_close(ntdb); + ntdb_close(ntdb_new); + unlink(tmp_name); + free(tmp_name); + return 1; + } + + /* lock the backup ntdb so that nobody else can change it */ + err = ntdb_lockall(ntdb_new); + if (err) { + fprintf(stderr, "Failed to lock backup ntdb: %s\n", + ntdb_errorstr(err)); + ntdb_close(ntdb); + ntdb_close(ntdb_new); + unlink(tmp_name); + free(tmp_name); + return 1; + } + + failed = 0; + + /* traverse and copy */ + count1 = ntdb_traverse(ntdb, copy_fn, (void *)ntdb_new); + if (count1 < 0 || failed) { + fprintf(stderr,"failed to copy %s\n", old_name); + ntdb_close(ntdb); + ntdb_close(ntdb_new); + unlink(tmp_name); + free(tmp_name); + return 1; + } + + /* close the old ntdb */ + ntdb_close(ntdb); + + /* copy done, unlock the backup ntdb */ + ntdb_unlockall(ntdb_new); + +#ifdef HAVE_FDATASYNC + if (fdatasync(ntdb_fd(ntdb_new)) != 0) { +#else + if (fsync(ntdb_fd(ntdb_new)) != 0) { +#endif + /* not fatal */ + fprintf(stderr, "failed to fsync backup file\n"); + } + + /* close the new ntdb and re-open read-only */ + ntdb_close(ntdb_new); + + /* we don't need the hash attr any more */ + log_attr.base.next = NULL; + + ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, O_RDONLY, 0, &log_attr); + if (!ntdb_new) { + fprintf(stderr,"failed to reopen %s\n", tmp_name); + unlink(tmp_name); + perror(tmp_name); + free(tmp_name); + return 1; + } + + /* traverse the new ntdb to confirm */ + count2 = ntdb_traverse(ntdb_new, test_fn, NULL); + if (count2 != count1) { + fprintf(stderr,"failed to copy %s\n", old_name); + ntdb_close(ntdb_new); + unlink(tmp_name); + free(tmp_name); + return 1; + } + + /* close the new ntdb and rename it to .bak */ + ntdb_close(ntdb_new); + if (rename(tmp_name, new_name) != 0) { + perror(new_name); + free(tmp_name); + return 1; + } + + free(tmp_name); + + return 0; +} + +/* + verify a ntdb and if it is corrupt then restore from *.bak +*/ +static int verify_ntdb(const char *fname, const char *bak_name) +{ + struct ntdb_context *ntdb; + int count = -1; + union ntdb_attribute log_attr; + + log_attr.base.attr = NTDB_ATTRIBUTE_LOG; + log_attr.base.next = NULL; + log_attr.log.fn = ntdb_log; + + /* open the ntdb */ + ntdb = ntdb_open(fname, NTDB_DEFAULT, O_RDONLY, 0, &log_attr); + + /* traverse the ntdb, then close it */ + if (ntdb) { + count = ntdb_traverse(ntdb, test_fn, NULL); + ntdb_close(ntdb); + } + + /* count is < 0 means an error */ + if (count < 0) { + printf("restoring %s\n", fname); + return backup_ntdb(bak_name, fname); + } + + printf("%s : %d records\n", fname, count); + + return 0; +} + +/* + see if one file is newer than another +*/ +static int file_newer(const char *fname1, const char *fname2) +{ + struct stat st1, st2; + if (stat(fname1, &st1) != 0) { + return 0; + } + if (stat(fname2, &st2) != 0) { + return 1; + } + return (st1.st_mtime > st2.st_mtime); +} + +static void usage(void) +{ + printf("Usage: ntdbbackup [options] \n\n"); + printf(" -h this help message\n"); + printf(" -v verify mode (restore if corrupt)\n"); + printf(" -s suffix set the backup suffix\n"); + printf(" -v verify mode (restore if corrupt)\n"); +} + + + int main(int argc, char *argv[]) +{ + int i; + int ret = 0; + int c; + int verify = 0; + const char *suffix = ".bak"; + + while ((c = getopt(argc, argv, "vhs:")) != -1) { + switch (c) { + case 'h': + usage(); + exit(0); + case 'v': + verify = 1; + break; + case 's': + suffix = optarg; + break; + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + usage(); + exit(1); + } + + for (i=0; i. +*/ +#include "config.h" +#include "ntdb.h" +#include "private.h" + +static void print_data(NTDB_DATA d) +{ + unsigned char *p = (unsigned char *)d.dptr; + int len = d.dsize; + while (len--) { + if (isprint(*p) && !strchr("\"\\", *p)) { + fputc(*p, stdout); + } else { + printf("\\%02X", *p); + } + p++; + } +} + +static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + printf("{\n"); + printf("key(%d) = \"", (int)key.dsize); + print_data(key); + printf("\"\n"); + printf("data(%d) = \"", (int)dbuf.dsize); + print_data(dbuf); + printf("\"\n"); + printf("}\n"); + return 0; +} + +static int dump_ntdb(const char *fname, const char *keyname) +{ + struct ntdb_context *ntdb; + NTDB_DATA key, value; + + ntdb = ntdb_open(fname, 0, O_RDONLY, 0, NULL); + if (!ntdb) { + printf("Failed to open %s\n", fname); + return 1; + } + + if (!keyname) { + ntdb_traverse(ntdb, traverse_fn, NULL); + } else { + key = ntdb_mkdata(keyname, strlen(keyname)); + if (ntdb_fetch(ntdb, key, &value) != 0) { + return 1; + } else { + print_data(value); + free(value.dptr); + } + } + + return 0; +} + +static void usage( void) +{ + printf( "Usage: ntdbdump [options] \n\n"); + printf( " -h this help message\n"); + printf( " -k keyname dumps value of keyname\n"); +} + + int main(int argc, char *argv[]) +{ + char *fname, *keyname=NULL; + int c; + + if (argc < 2) { + printf("Usage: ntdbdump \n"); + exit(1); + } + + while ((c = getopt( argc, argv, "hk:")) != -1) { + switch (c) { + case 'h': + usage(); + exit( 0); + case 'k': + keyname = optarg; + break; + default: + usage(); + exit( 1); + } + } + + fname = argv[optind]; + + return dump_ntdb(fname, keyname); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbrestore.c b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbrestore.c new file mode 100644 index 00000000..695af79c --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbrestore.c @@ -0,0 +1,242 @@ +/* + ntdbrestore -- construct a ntdb from (n)tdbdump output. + Copyright (C) Rusty Russell 2012 + Copyright (C) Volker Lendecke 2010 + Copyright (C) Simon McVittie 2005 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "config.h" +#include "ntdb.h" +#include "private.h" +#include + +static int read_linehead(FILE *f) +{ + int i, c; + int num_bytes; + char prefix[128]; + + while (1) { + c = getc(f); + if (c == EOF) { + return -1; + } + if (c == '(') { + break; + } + } + for (i=0; idptr = (unsigned char *)malloc(size); + if (d->dptr == NULL) { + return -1; + } + d->dsize = size; + + for (i=0; idptr[i] = (low|high); + } else { + d->dptr[i] = c; + } + } + return 0; +} + +static int swallow(FILE *f, const char *s, int *eof) +{ + char line[128]; + + if (fgets(line, sizeof(line), f) == NULL) { + if (eof != NULL) { + *eof = 1; + } + return -1; + } + if (strcmp(line, s) != 0) { + return -1; + } + return 0; +} + +static bool read_rec(FILE *f, struct ntdb_context *ntdb, int *eof) +{ + int length; + NTDB_DATA key, data; + bool ret = false; + enum NTDB_ERROR e; + + key.dptr = NULL; + data.dptr = NULL; + + if (swallow(f, "{\n", eof) == -1) { + goto fail; + } + length = read_linehead(f); + if (length == -1) { + goto fail; + } + if (read_data(f, &key, length) == -1) { + goto fail; + } + if (swallow(f, "\"\n", NULL) == -1) { + goto fail; + } + length = read_linehead(f); + if (length == -1) { + goto fail; + } + if (read_data(f, &data, length) == -1) { + goto fail; + } + if ((swallow(f, "\"\n", NULL) == -1) + || (swallow(f, "}\n", NULL) == -1)) { + goto fail; + } + e = ntdb_store(ntdb, key, data, NTDB_INSERT); + if (e != NTDB_SUCCESS) { + fprintf(stderr, "NTDB error: %s\n", ntdb_errorstr(e)); + goto fail; + } + + ret = true; +fail: + free(key.dptr); + free(data.dptr); + return ret; +} + +static int restore_ntdb(const char *fname, unsigned int hsize) +{ + struct ntdb_context *ntdb; + union ntdb_attribute hashsize; + + hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE; + hashsize.base.next = NULL; + hashsize.hashsize.size = hsize; + + ntdb = ntdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, + hsize ? &hashsize : NULL); + if (!ntdb) { + perror("ntdb_open"); + fprintf(stderr, "Failed to open %s\n", fname); + return 1; + } + + while (1) { + int eof = 0; + if (!read_rec(stdin, ntdb, &eof)) { + if (eof) { + break; + } + return 1; + } + } + if (ntdb_close(ntdb)) { + fprintf(stderr, "Error closing ntdb\n"); + return 1; + } + fprintf(stderr, "EOF\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int hsize = 0; + const char *execname = argv[0]; + + if (argv[1] && strcmp(argv[1], "-h") == 0) { + if (argv[2]) { + hsize = atoi(argv[2]); + } + if (hsize == 0) { + fprintf(stderr, "-h requires a integer value" + " (eg. 128 or 131072)\n"); + exit(1); + } + argv += 2; + argc -= 2; + } + if (argc != 2) { + printf("Usage: %s [-h ] dbname < tdbdump_output\n", + execname); + exit(1); + } + + + return restore_ntdb(argv[1], hsize); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbtool.c b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbtool.c new file mode 100644 index 00000000..144cd92f --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbtool.c @@ -0,0 +1,794 @@ +/* + Unix SMB/CIFS implementation. + Samba database functions + Copyright (C) Andrew Tridgell 1999-2000 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000 + Copyright (C) Andrew Esh 2001 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "config.h" +#include "ntdb.h" +#include "private.h" + +static int do_command(void); +const char *cmdname; +char *arg1, *arg2; +size_t arg1len, arg2len; +int bIterate = 0; +char *line; +NTDB_DATA iterate_kbuf; +char cmdline[1024]; +static int disable_mmap; + +enum commands { + CMD_CREATE_NTDB, + CMD_OPEN_NTDB, + CMD_TRANSACTION_START, + CMD_TRANSACTION_COMMIT, + CMD_TRANSACTION_CANCEL, + CMD_ERASE, + CMD_DUMP, + CMD_INSERT, + CMD_MOVE, + CMD_STORE, + CMD_SHOW, + CMD_KEYS, + CMD_HEXKEYS, + CMD_DELETE, +#if 0 + CMD_LIST_HASH_FREE, + CMD_LIST_FREE, +#endif + CMD_INFO, + CMD_MMAP, + CMD_SPEED, + CMD_FIRST, + CMD_NEXT, + CMD_SYSTEM, + CMD_CHECK, + CMD_QUIT, + CMD_HELP +}; + +typedef struct { + const char *name; + enum commands cmd; +} COMMAND_TABLE; + +COMMAND_TABLE cmd_table[] = { + {"create", CMD_CREATE_NTDB}, + {"open", CMD_OPEN_NTDB}, +#if 0 + {"transaction_start", CMD_TRANSACTION_START}, + {"transaction_commit", CMD_TRANSACTION_COMMIT}, + {"transaction_cancel", CMD_TRANSACTION_CANCEL}, +#endif + {"erase", CMD_ERASE}, + {"dump", CMD_DUMP}, + {"insert", CMD_INSERT}, + {"move", CMD_MOVE}, + {"store", CMD_STORE}, + {"show", CMD_SHOW}, + {"keys", CMD_KEYS}, + {"hexkeys", CMD_HEXKEYS}, + {"delete", CMD_DELETE}, +#if 0 + {"list", CMD_LIST_HASH_FREE}, + {"free", CMD_LIST_FREE}, +#endif + {"info", CMD_INFO}, + {"speed", CMD_SPEED}, + {"mmap", CMD_MMAP}, + {"first", CMD_FIRST}, + {"1", CMD_FIRST}, + {"next", CMD_NEXT}, + {"n", CMD_NEXT}, + {"check", CMD_CHECK}, + {"quit", CMD_QUIT}, + {"q", CMD_QUIT}, + {"!", CMD_SYSTEM}, + {NULL, CMD_HELP} +}; + +struct timeval tp1,tp2; + +static void _start_timer(void) +{ + gettimeofday(&tp1,NULL); +} + +static double _end_timer(void) +{ + gettimeofday(&tp2,NULL); + return((tp2.tv_sec - tp1.tv_sec) + + (tp2.tv_usec - tp1.tv_usec)*1.0e-6); +} + +static void ntdb_log(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) +{ + fprintf(stderr, "ntdb:%s:%s:%s\n", + ntdb_name(ntdb), ntdb_errorstr(ecode), message); +} + +/* a ntdb tool for manipulating a ntdb database */ + +static struct ntdb_context *ntdb; + +static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state); +static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state); +static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state); + +static void print_asc(const char *buf,int len) +{ + int i; + + /* We're probably printing ASCII strings so don't try to display + the trailing NULL character. */ + + if (buf[len - 1] == 0) + len--; + + for (i=0;i8) printf(" "); + while (n--) printf(" "); + + n = i%16; + if (n > 8) n = 8; + print_asc(&buf[i-(i%16)],n); printf(" "); + n = (i%16) - n; + if (n>0) print_asc(&buf[i-n],n); + printf("\n"); + } +} + +static void help(void) +{ + printf("\n" +"tdbtool: \n" +" create dbname : create a database\n" +" open dbname : open an existing database\n" +" openjh dbname : open an existing database (jenkins hash)\n" +" transaction_start : start a transaction\n" +" transaction_commit : commit a transaction\n" +" transaction_cancel : cancel a transaction\n" +" erase : erase the database\n" +" dump : dump the database as strings\n" +" keys : dump the database keys as strings\n" +" hexkeys : dump the database keys as hex values\n" +" info : print summary info about the database\n" +" insert key data : insert a record\n" +" move key file : move a record to a destination ntdb\n" +" store key data : store a record (replace)\n" +" show key : show a record by key\n" +" delete key : delete a record by key\n" +#if 0 +" list : print the database hash table and freelist\n" +" free : print the database freelist\n" +#endif +" check : check the integrity of an opened database\n" +" speed : perform speed tests on the database\n" +" ! command : execute system command\n" +" 1 | first : print the first record\n" +" n | next : print the next record\n" +" q | quit : terminate\n" +" \\n : repeat 'next' command\n" +"\n"); +} + +static void terror(enum NTDB_ERROR err, const char *why) +{ + if (err != NTDB_SUCCESS) + printf("%s:%s\n", ntdb_errorstr(err), why); + else + printf("%s\n", why); +} + +static void create_ntdb(const char *tdbname) +{ + union ntdb_attribute log_attr; + log_attr.base.attr = NTDB_ATTRIBUTE_LOG; + log_attr.base.next = NULL; + log_attr.log.fn = ntdb_log; + + if (ntdb) ntdb_close(ntdb); + ntdb = ntdb_open(tdbname, (disable_mmap?NTDB_NOMMAP:0), + O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr); + if (!ntdb) { + printf("Could not create %s: %s\n", tdbname, strerror(errno)); + } +} + +static void open_ntdb(const char *tdbname) +{ + union ntdb_attribute log_attr; + log_attr.base.attr = NTDB_ATTRIBUTE_LOG; + log_attr.base.next = NULL; + log_attr.log.fn = ntdb_log; + + if (ntdb) ntdb_close(ntdb); + ntdb = ntdb_open(tdbname, disable_mmap?NTDB_NOMMAP:0, O_RDWR, 0600, + &log_attr); + if (!ntdb) { + printf("Could not open %s: %s\n", tdbname, strerror(errno)); + } +} + +static void insert_ntdb(char *keyname, size_t keylen, char* data, size_t datalen) +{ + NTDB_DATA key, dbuf; + enum NTDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(NTDB_SUCCESS, "need key"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + dbuf.dptr = (unsigned char *)data; + dbuf.dsize = datalen; + + ecode = ntdb_store(ntdb, key, dbuf, NTDB_INSERT); + if (ecode) { + terror(ecode, "insert failed"); + } +} + +static void store_ntdb(char *keyname, size_t keylen, char* data, size_t datalen) +{ + NTDB_DATA key, dbuf; + enum NTDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(NTDB_SUCCESS, "need key"); + return; + } + + if ((data == NULL) || (datalen == 0)) { + terror(NTDB_SUCCESS, "need data"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + dbuf.dptr = (unsigned char *)data; + dbuf.dsize = datalen; + + printf("Storing key:\n"); + print_rec(ntdb, key, dbuf, NULL); + + ecode = ntdb_store(ntdb, key, dbuf, NTDB_REPLACE); + if (ecode) { + terror(ecode, "store failed"); + } +} + +static void show_ntdb(char *keyname, size_t keylen) +{ + NTDB_DATA key, dbuf; + enum NTDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(NTDB_SUCCESS, "need key"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + + ecode = ntdb_fetch(ntdb, key, &dbuf); + if (ecode) { + terror(ecode, "fetch failed"); + return; + } + + print_rec(ntdb, key, dbuf, NULL); + + free( dbuf.dptr ); +} + +static void delete_ntdb(char *keyname, size_t keylen) +{ + NTDB_DATA key; + enum NTDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(NTDB_SUCCESS, "need key"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + + ecode = ntdb_delete(ntdb, key); + if (ecode) { + terror(ecode, "delete failed"); + } +} + +static void move_rec(char *keyname, size_t keylen, char* tdbname) +{ + NTDB_DATA key, dbuf; + struct ntdb_context *dst_ntdb; + enum NTDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(NTDB_SUCCESS, "need key"); + return; + } + + if ( !tdbname ) { + terror(NTDB_SUCCESS, "need destination ntdb name"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + + ecode = ntdb_fetch(ntdb, key, &dbuf); + if (ecode) { + terror(ecode, "fetch failed"); + return; + } + + print_rec(ntdb, key, dbuf, NULL); + + dst_ntdb = ntdb_open(tdbname, 0, O_RDWR, 0600, NULL); + if ( !dst_ntdb ) { + terror(NTDB_SUCCESS, "unable to open destination ntdb"); + return; + } + + ecode = ntdb_store( dst_ntdb, key, dbuf, NTDB_REPLACE); + if (ecode) + terror(ecode, "failed to move record"); + else + printf("record moved\n"); + + ntdb_close( dst_ntdb ); +} + +static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + printf("\nkey %d bytes\n", (int)key.dsize); + print_asc((const char *)key.dptr, key.dsize); + printf("\ndata %d bytes\n", (int)dbuf.dsize); + print_data((const char *)dbuf.dptr, dbuf.dsize); + return 0; +} + +static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + printf("key %d bytes: ", (int)key.dsize); + print_asc((const char *)key.dptr, key.dsize); + printf("\n"); + return 0; +} + +static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + printf("key %d bytes\n", (int)key.dsize); + print_data((const char *)key.dptr, key.dsize); + printf("\n"); + return 0; +} + +static int total_bytes; + +static int traverse_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state) +{ + total_bytes += dbuf.dsize; + return 0; +} + +static void info_ntdb(void) +{ + enum NTDB_ERROR ecode; + char *summary; + + ecode = ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &summary); + + if (ecode) { + terror(ecode, "Getting summary"); + } else { + printf("%s", summary); + free(summary); + } +} + +static void speed_ntdb(const char *tlimit) +{ + unsigned timelimit = tlimit?atoi(tlimit):0; + double t; + int ops; + if (timelimit == 0) timelimit = 5; + + ops = 0; + printf("Testing store speed for %u seconds\n", timelimit); + _start_timer(); + do { + long int r = random(); + NTDB_DATA key, dbuf; + key = ntdb_mkdata("store test", strlen("store test")); + dbuf.dptr = (unsigned char *)&r; + dbuf.dsize = sizeof(r); + ntdb_store(ntdb, key, dbuf, NTDB_REPLACE); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); + + ops = 0; + printf("Testing fetch speed for %u seconds\n", timelimit); + _start_timer(); + do { + long int r = random(); + NTDB_DATA key, dbuf; + key = ntdb_mkdata("store test", strlen("store test")); + dbuf.dptr = (unsigned char *)&r; + dbuf.dsize = sizeof(r); + ntdb_fetch(ntdb, key, &dbuf); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); + + ops = 0; + printf("Testing transaction speed for %u seconds\n", timelimit); + _start_timer(); + do { + long int r = random(); + NTDB_DATA key, dbuf; + key = ntdb_mkdata("transaction test", strlen("transaction test")); + dbuf.dptr = (unsigned char *)&r; + dbuf.dsize = sizeof(r); + ntdb_transaction_start(ntdb); + ntdb_store(ntdb, key, dbuf, NTDB_REPLACE); + ntdb_transaction_commit(ntdb); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); + + ops = 0; + printf("Testing traverse speed for %u seconds\n", timelimit); + _start_timer(); + do { + ntdb_traverse(ntdb, traverse_fn, NULL); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); +} + +static void toggle_mmap(void) +{ + disable_mmap = !disable_mmap; + if (disable_mmap) { + printf("mmap is disabled\n"); + } else { + printf("mmap is enabled\n"); + } +} + +static char *ntdb_getline(const char *prompt) +{ + static char thisline[1024]; + char *p; + fputs(prompt, stdout); + thisline[0] = 0; + p = fgets(thisline, sizeof(thisline)-1, stdin); + if (p) p = strchr(p, '\n'); + if (p) *p = 0; + return p?thisline:NULL; +} + +static int do_delete_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, + void *state) +{ + return ntdb_delete(the_ntdb, key); +} + +static void first_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey) +{ + NTDB_DATA dbuf; + enum NTDB_ERROR ecode; + ecode = ntdb_firstkey(the_ntdb, pkey); + if (!ecode) + ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf); + if (ecode) terror(ecode, "fetch failed"); + else { + print_rec(the_ntdb, *pkey, dbuf, NULL); + } +} + +static void next_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey) +{ + NTDB_DATA dbuf; + enum NTDB_ERROR ecode; + ecode = ntdb_nextkey(the_ntdb, pkey); + + if (!ecode) + ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf); + if (ecode) + terror(ecode, "fetch failed"); + else + print_rec(the_ntdb, *pkey, dbuf, NULL); +} + +static void check_db(struct ntdb_context *the_ntdb) +{ + if (!the_ntdb) { + printf("Error: No database opened!\n"); + } else { + if (ntdb_check(the_ntdb, NULL, NULL) != 0) + printf("Integrity check for the opened database failed.\n"); + else + printf("Database integrity is OK.\n"); + } +} + +static int do_command(void) +{ + COMMAND_TABLE *ctp = cmd_table; + enum commands mycmd = CMD_HELP; + int cmd_len; + + if (cmdname && strlen(cmdname) == 0) { + mycmd = CMD_NEXT; + } else { + while (ctp->name) { + cmd_len = strlen(ctp->name); + if (strncmp(ctp->name,cmdname,cmd_len) == 0) { + mycmd = ctp->cmd; + break; + } + ctp++; + } + } + + switch (mycmd) { + case CMD_CREATE_NTDB: + bIterate = 0; + create_ntdb(arg1); + return 0; + case CMD_OPEN_NTDB: + bIterate = 0; + open_ntdb(arg1); + return 0; + case CMD_SYSTEM: + /* Shell command */ + if (system(arg1) == -1) { + terror(NTDB_SUCCESS, "system() call failed\n"); + } + return 0; + case CMD_QUIT: + return 1; + default: + /* all the rest require a open database */ + if (!ntdb) { + bIterate = 0; + terror(NTDB_SUCCESS, "database not open"); + help(); + return 0; + } + switch (mycmd) { + case CMD_TRANSACTION_START: + bIterate = 0; + ntdb_transaction_start(ntdb); + return 0; + case CMD_TRANSACTION_COMMIT: + bIterate = 0; + ntdb_transaction_commit(ntdb); + return 0; + case CMD_TRANSACTION_CANCEL: + bIterate = 0; + ntdb_transaction_cancel(ntdb); + return 0; + case CMD_ERASE: + bIterate = 0; + ntdb_traverse(ntdb, do_delete_fn, NULL); + return 0; + case CMD_DUMP: + bIterate = 0; + ntdb_traverse(ntdb, print_rec, NULL); + return 0; + case CMD_INSERT: + bIterate = 0; + insert_ntdb(arg1, arg1len,arg2,arg2len); + return 0; + case CMD_MOVE: + bIterate = 0; + move_rec(arg1,arg1len,arg2); + return 0; + case CMD_STORE: + bIterate = 0; + store_ntdb(arg1,arg1len,arg2,arg2len); + return 0; + case CMD_SHOW: + bIterate = 0; + show_ntdb(arg1, arg1len); + return 0; + case CMD_KEYS: + ntdb_traverse(ntdb, print_key, NULL); + return 0; + case CMD_HEXKEYS: + ntdb_traverse(ntdb, print_hexkey, NULL); + return 0; + case CMD_DELETE: + bIterate = 0; + delete_ntdb(arg1,arg1len); + return 0; +#if 0 + case CMD_LIST_HASH_FREE: + ntdb_dump_all(ntdb); + return 0; + case CMD_LIST_FREE: + ntdb_printfreelist(ntdb); + return 0; +#endif + case CMD_INFO: + info_ntdb(); + return 0; + case CMD_SPEED: + speed_ntdb(arg1); + return 0; + case CMD_MMAP: + toggle_mmap(); + return 0; + case CMD_FIRST: + bIterate = 1; + first_record(ntdb, &iterate_kbuf); + return 0; + case CMD_NEXT: + if (bIterate) + next_record(ntdb, &iterate_kbuf); + return 0; + case CMD_CHECK: + check_db(ntdb); + return 0; + case CMD_HELP: + help(); + return 0; + case CMD_CREATE_NTDB: + case CMD_OPEN_NTDB: + case CMD_SYSTEM: + case CMD_QUIT: + /* + * unhandled commands. cases included here to avoid compiler + * warnings. + */ + return 0; + } + } + + return 0; +} + +static char *convert_string(char *instring, size_t *sizep) +{ + size_t length = 0; + char *outp, *inp; + char temp[3]; + + outp = inp = instring; + + while (*inp) { + if (*inp == '\\') { + inp++; + if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) { + temp[0] = *inp++; + temp[1] = '\0'; + if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) { + temp[1] = *inp++; + temp[2] = '\0'; + } + *outp++ = (char)strtol((const char *)temp,NULL,16); + } else { + *outp++ = *inp++; + } + } else { + *outp++ = *inp++; + } + length++; + } + *sizep = length; + return instring; +} + +int main(int argc, char *argv[]) +{ + cmdname = ""; + arg1 = NULL; + arg1len = 0; + arg2 = NULL; + arg2len = 0; + + if (argv[1]) { + cmdname = "open"; + arg1 = argv[1]; + do_command(); + cmdname = ""; + arg1 = NULL; + } + + switch (argc) { + case 1: + case 2: + /* Interactive mode */ + while ((cmdname = ntdb_getline("ntdb> "))) { + arg2 = arg1 = NULL; + if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) { + arg1++; + arg2 = arg1; + while (*arg2) { + if (*arg2 == ' ') { + *arg2++ = '\0'; + break; + } + if ((*arg2++ == '\\') && (*arg2 == ' ')) { + arg2++; + } + } + } + if (arg1) arg1 = convert_string(arg1,&arg1len); + if (arg2) arg2 = convert_string(arg2,&arg2len); + if (do_command()) break; + } + break; + case 5: + arg2 = convert_string(argv[4],&arg2len); + case 4: + arg1 = convert_string(argv[3],&arg1len); + case 3: + cmdname = argv[2]; + default: + do_command(); + break; + } + + if (ntdb) ntdb_close(ntdb); + + return 0; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbtorture.c b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbtorture.c new file mode 100644 index 00000000..9fd25ca7 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/tools/ntdbtorture.c @@ -0,0 +1,535 @@ +/* this tests ntdb by doing lots of ops from several simultaneous + writers - that stresses the locking code. +*/ + +#include "config.h" +#include "ntdb.h" +#include "private.h" +#include + +//#define REOPEN_PROB 30 +#define DELETE_PROB 8 +#define STORE_PROB 4 +#define APPEND_PROB 6 +#define TRANSACTION_PROB 10 +#define TRANSACTION_PREPARE_PROB 2 +#define LOCKSTORE_PROB 5 +#define TRAVERSE_PROB 20 +#define TRAVERSE_MOD_PROB 100 +#define TRAVERSE_ABORT_PROB 500 +#define CULL_PROB 100 +#define KEYLEN 3 +#define DATALEN 100 + +static struct ntdb_context *db; +static int in_transaction; +static int in_traverse; +static int error_count; +#if TRANSACTION_PROB +static int always_transaction = 0; +#endif +static int loopnum; +static int count_pipe; +static union ntdb_attribute log_attr; +static union ntdb_attribute seed_attr; +static union ntdb_attribute hsize_attr; + +static void ntdb_log(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) +{ + printf("ntdb:%s:%s:%s\n", + ntdb_name(ntdb), ntdb_errorstr(ecode), message); + fflush(stdout); +#if 0 + { + char str[200]; + signal(SIGUSR1, SIG_IGN); + sprintf(str,"xterm -e gdb /proc/%u/exe %u", (unsigned int)getpid(), (unsigned int)getpid()); + system(str); + } +#endif +} + +#include "../private.h" + +static void segv_handler(int sig, siginfo_t *info, void *p) +{ + char string[100]; + + sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n", + (unsigned int)getpid(), info->si_addr, db->file->map_ptr, + (size_t)db->file->map_size); + if (write(2, string, strlen(string)) > 0) + sleep(60); + _exit(11); +} + +static void warn_on_err(enum NTDB_ERROR e, struct ntdb_context *ntdb, + const char *why) +{ + if (e != NTDB_SUCCESS) { + fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), why, + ntdb ? ntdb_errorstr(e) : "(no ntdb)"); + error_count++; + } +} + +static char *randbuf(int len) +{ + char *buf; + int i; + buf = (char *)malloc(len+1); + if (buf == NULL) { + perror("randbuf: unable to allocate memory for buffer.\n"); + exit(1); + } + + for (i=0;i +#include +#include +#include +#include +#include +#include +#include +#include +#include "ntdb.h" + +/* Nanoseconds per operation */ +static size_t normalize(const struct timeval *start, + const struct timeval *stop, + unsigned int num) +{ + struct timeval diff; + + timersub(stop, start, &diff); + + /* Floating point is more accurate here. */ + return (double)(diff.tv_sec * 1000000 + diff.tv_usec) + / num * 1000; +} + +static size_t file_size(void) +{ + struct stat st; + + if (stat("/tmp/speed.ntdb", &st) != 0) + return -1; + return st.st_size; +} + +static int count_record(struct ntdb_context *ntdb, + NTDB_DATA key, NTDB_DATA data, void *p) +{ + int *total = p; + *total += *(int *)data.dptr; + return 0; +} + +static void dump_and_clear_stats(struct ntdb_context **ntdb, + int flags, + union ntdb_attribute *attr) +{ + union ntdb_attribute stats; + enum NTDB_ERROR ecode; + + stats.base.attr = NTDB_ATTRIBUTE_STATS; + stats.stats.size = sizeof(stats.stats); + ecode = ntdb_get_attribute(*ntdb, &stats); + if (ecode != NTDB_SUCCESS) + errx(1, "Getting stats: %s", ntdb_errorstr(ecode)); + + printf("allocs = %llu\n", + (unsigned long long)stats.stats.allocs); + printf(" alloc_subhash = %llu\n", + (unsigned long long)stats.stats.alloc_subhash); + printf(" alloc_chain = %llu\n", + (unsigned long long)stats.stats.alloc_chain); + printf(" alloc_bucket_exact = %llu\n", + (unsigned long long)stats.stats.alloc_bucket_exact); + printf(" alloc_bucket_max = %llu\n", + (unsigned long long)stats.stats.alloc_bucket_max); + printf(" alloc_leftover = %llu\n", + (unsigned long long)stats.stats.alloc_leftover); + printf(" alloc_coalesce_tried = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_tried); + printf(" alloc_coalesce_iterate_clash = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_iterate_clash); + printf(" alloc_coalesce_lockfail = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_lockfail); + printf(" alloc_coalesce_race = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_race); + printf(" alloc_coalesce_succeeded = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_succeeded); + printf(" alloc_coalesce_num_merged = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_num_merged); + printf("compares = %llu\n", + (unsigned long long)stats.stats.compares); + printf(" compare_wrong_offsetbits = %llu\n", + (unsigned long long)stats.stats.compare_wrong_offsetbits); + printf(" compare_wrong_keylen = %llu\n", + (unsigned long long)stats.stats.compare_wrong_keylen); + printf(" compare_wrong_rechash = %llu\n", + (unsigned long long)stats.stats.compare_wrong_rechash); + printf(" compare_wrong_keycmp = %llu\n", + (unsigned long long)stats.stats.compare_wrong_keycmp); + printf("transactions = %llu\n", + (unsigned long long)stats.stats.transactions); + printf(" transaction_cancel = %llu\n", + (unsigned long long)stats.stats.transaction_cancel); + printf(" transaction_nest = %llu\n", + (unsigned long long)stats.stats.transaction_nest); + printf(" transaction_expand_file = %llu\n", + (unsigned long long)stats.stats.transaction_expand_file); + printf(" transaction_read_direct = %llu\n", + (unsigned long long)stats.stats.transaction_read_direct); + printf(" transaction_read_direct_fail = %llu\n", + (unsigned long long)stats.stats.transaction_read_direct_fail); + printf(" transaction_write_direct = %llu\n", + (unsigned long long)stats.stats.transaction_write_direct); + printf(" transaction_write_direct_fail = %llu\n", + (unsigned long long)stats.stats.transaction_write_direct_fail); + printf("expands = %llu\n", + (unsigned long long)stats.stats.expands); + printf("frees = %llu\n", + (unsigned long long)stats.stats.frees); + printf("locks = %llu\n", + (unsigned long long)stats.stats.locks); + printf(" lock_lowlevel = %llu\n", + (unsigned long long)stats.stats.lock_lowlevel); + printf(" lock_nonblock = %llu\n", + (unsigned long long)stats.stats.lock_nonblock); + printf(" lock_nonblock_fail = %llu\n", + (unsigned long long)stats.stats.lock_nonblock_fail); + + /* Now clear. */ + ntdb_close(*ntdb); + *ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR, 0, attr); +} + +static void ntdb_log(struct ntdb_context *ntdb, + enum ntdb_log_level level, + enum NTDB_ERROR ecode, + const char *message, + void *data) +{ + fprintf(stderr, "ntdb:%s:%s:%s\n", + ntdb_name(ntdb), ntdb_errorstr(ecode), message); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, num = 1000, stage = 0, stopat = -1; + int flags = NTDB_DEFAULT; + bool transaction = false, summary = false; + NTDB_DATA key, data; + struct ntdb_context *ntdb; + struct timeval start, stop; + union ntdb_attribute seed, log; + bool do_stats = false; + enum NTDB_ERROR ecode; + + /* Try to keep benchmarks even. */ + seed.base.attr = NTDB_ATTRIBUTE_SEED; + seed.base.next = NULL; + seed.seed.seed = 0; + + log.base.attr = NTDB_ATTRIBUTE_LOG; + log.base.next = &seed; + log.log.fn = ntdb_log; + + if (argv[1] && strcmp(argv[1], "--internal") == 0) { + flags = NTDB_INTERNAL; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--transaction") == 0) { + transaction = true; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--no-sync") == 0) { + flags |= NTDB_NOSYNC; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--summary") == 0) { + summary = true; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--stats") == 0) { + do_stats = true; + argc--; + argv++; + } + + ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR|O_CREAT|O_TRUNC, + 0600, &log); + if (!ntdb) + err(1, "Opening /tmp/speed.ntdb"); + + key.dptr = (void *)&i; + key.dsize = sizeof(i); + data = key; + + if (argv[1]) { + num = atoi(argv[1]); + argv++; + argc--; + } + + if (argv[1]) { + stopat = atoi(argv[1]); + argv++; + argc--; + } + + /* Add 1000 records. */ + printf("Adding %u records: ", num); fflush(stdout); + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0) + errx(1, "Inserting key %u in ntdb: %s", + i, ntdb_errorstr(ecode)); + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + + if (++stage == stopat) + exit(0); + + /* Finding 1000 records. */ + printf("Finding %u records: ", num); fflush(stdout); + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) { + NTDB_DATA dbuf; + if ((ecode = ntdb_fetch(ntdb, key, &dbuf)) != NTDB_SUCCESS + || *(int *)dbuf.dptr != i) { + errx(1, "Fetching key %u in ntdb gave %u", + i, ecode ? ecode : *(int *)dbuf.dptr); + } + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Missing 1000 records. */ + printf("Missing %u records: ", num); fflush(stdout); + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (i = num; i < num*2; i++) { + NTDB_DATA dbuf; + ecode = ntdb_fetch(ntdb, key, &dbuf); + if (ecode != NTDB_ERR_NOEXIST) + errx(1, "Fetching key %u in ntdb gave %s", + i, ntdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Traverse 1000 records. */ + printf("Traversing %u records: ", num); fflush(stdout); + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + i = 0; + gettimeofday(&start, NULL); + if (ntdb_traverse(ntdb, count_record, &i) != num) + errx(1, "Traverse returned wrong number of records"); + if (i != (num - 1) * (num / 2)) + errx(1, "Traverse tallied to %u", i); + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Delete 1000 records (not in order). */ + printf("Deleting %u records: ", num); fflush(stdout); + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + i = (j + 100003) % num; + if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS) + errx(1, "Deleting key %u in ntdb: %s", + i, ntdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Re-add 1000 records (not in order). */ + printf("Re-adding %u records: ", num); fflush(stdout); + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + i = (j + 100003) % num; + if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0) + errx(1, "Inserting key %u in ntdb: %s", + i, ntdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Append 1000 records. */ + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + printf("Appending %u records: ", num); fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + if ((ecode = ntdb_append(ntdb, key, data)) != NTDB_SUCCESS) + errx(1, "Appending key %u in ntdb: %s", + i, ntdb_errorstr(ecode)); + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (++stage == stopat) + exit(0); + + /* Churn 1000 records: not in order! */ + if (transaction && (ecode = ntdb_transaction_start(ntdb))) + errx(1, "starting transaction: %s", ntdb_errorstr(ecode)); + printf("Churning %u records: ", num); fflush(stdout); + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + i = (j + 1000019) % num; + if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS) + errx(1, "Deleting key %u in ntdb: %s", + i, ntdb_errorstr(ecode)); + i += num; + if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0) + errx(1, "Inserting key %u in ntdb: %s", + i, ntdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = ntdb_transaction_commit(ntdb))) + errx(1, "committing transaction: %s", ntdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + + if (ntdb_check(ntdb, NULL, NULL)) + errx(1, "ntdb_check failed!"); + if (summary) { + char *sumstr = NULL; + ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&ntdb, flags, &log); + if (++stage == stopat) + exit(0); + + return 0; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/transaction.c b/junkcode/rusty@rustcorp.com.au-ntdb/transaction.c new file mode 100644 index 00000000..f2762166 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/transaction.c @@ -0,0 +1,1317 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 2005 + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the ntdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include "private.h" +#include +#define SAFE_FREE(ntdb, x) do { if ((x) != NULL) {ntdb->free_fn((void *)x, ntdb->alloc_data); (x)=NULL;} } while(0) + +/* + transaction design: + + - only allow a single transaction at a time per database. This makes + using the transaction API simpler, as otherwise the caller would + have to cope with temporary failures in transactions that conflict + with other current transactions + + - keep the transaction recovery information in the same file as the + database, using a special 'transaction recovery' record pointed at + by the header. This removes the need for extra journal files as + used by some other databases + + - dynamically allocated the transaction recover record, re-using it + for subsequent transactions. If a larger record is needed then + ntdb_free() the old record to place it on the normal ntdb freelist + before allocating the new record + + - during transactions, keep a linked list of writes all that have + been performed by intercepting all ntdb_write() calls. The hooked + transaction versions of ntdb_read() and ntdb_write() check this + linked list and try to use the elements of the list in preference + to the real database. + + - don't allow any locks to be held when a transaction starts, + otherwise we can end up with deadlock (plus lack of lock nesting + in POSIX locks would mean the lock is lost) + + - if the caller gains a lock during the transaction but doesn't + release it then fail the commit + + - allow for nested calls to ntdb_transaction_start(), re-using the + existing transaction record. If the inner transaction is canceled + then a subsequent commit will fail + + - keep a mirrored copy of the ntdb hash chain heads to allow for the + fast hash heads scan on traverse, updating the mirrored copy in + the transaction version of ntdb_write + + - allow callers to mix transaction and non-transaction use of ntdb, + although once a transaction is started then an exclusive lock is + gained until the transaction is committed or canceled + + - the commit stategy involves first saving away all modified data + into a linearised buffer in the transaction recovery area, then + marking the transaction recovery area with a magic value to + indicate a valid recovery record. In total 4 fsync/msync calls are + needed per commit to prevent race conditions. It might be possible + to reduce this to 3 or even 2 with some more work. + + - check for a valid recovery record on open of the ntdb, while the + open lock is held. Automatically recover from the transaction + recovery area if needed, then continue with the open as + usual. This allows for smooth crash recovery with no administrator + intervention. + + - if NTDB_NOSYNC is passed to flags in ntdb_open then transactions are + still available, but fsync/msync calls are made. This means we + still are safe against unexpected death during transaction commit, + but not against machine reboots. +*/ + +/* + hold the context of any current transaction +*/ +struct ntdb_transaction { + /* the original io methods - used to do IOs to the real db */ + const struct ntdb_methods *io_methods; + + /* the list of transaction blocks. When a block is first + written to, it gets created in this list */ + uint8_t **blocks; + size_t num_blocks; + + /* non-zero when an internal transaction error has + occurred. All write operations will then fail until the + transaction is ended */ + int transaction_error; + + /* when inside a transaction we need to keep track of any + nested ntdb_transaction_start() calls, as these are allowed, + but don't create a new transaction */ + unsigned int nesting; + + /* set when a prepare has already occurred */ + bool prepared; + ntdb_off_t magic_offset; + + /* old file size before transaction */ + ntdb_len_t old_map_size; +}; + +/* + read while in a transaction. We need to check first if the data is in our list + of transaction elements, then if not do a real read +*/ +static enum NTDB_ERROR transaction_read(struct ntdb_context *ntdb, ntdb_off_t off, + void *buf, ntdb_len_t len) +{ + size_t blk; + enum NTDB_ERROR ecode; + + /* break it down into block sized ops */ + while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) { + ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE); + ecode = transaction_read(ntdb, off, buf, len2); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + len -= len2; + off += len2; + buf = (void *)(len2 + (char *)buf); + } + + if (len == 0) { + return NTDB_SUCCESS; + } + + blk = off / NTDB_PGSIZE; + + /* see if we have it in the block list */ + if (ntdb->transaction->num_blocks <= blk || + ntdb->transaction->blocks[blk] == NULL) { + /* nope, do a real read */ + ecode = ntdb->transaction->io_methods->tread(ntdb, off, buf, len); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + return 0; + } + + /* now copy it out of this block */ + memcpy(buf, ntdb->transaction->blocks[blk] + (off % NTDB_PGSIZE), len); + return NTDB_SUCCESS; + +fail: + ntdb->transaction->transaction_error = 1; + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "transaction_read: failed at off=%zu len=%zu", + (size_t)off, (size_t)len); +} + + +/* + write while in a transaction +*/ +static enum NTDB_ERROR transaction_write(struct ntdb_context *ntdb, ntdb_off_t off, + const void *buf, ntdb_len_t len) +{ + size_t blk; + enum NTDB_ERROR ecode; + + /* Only a commit is allowed on a prepared transaction */ + if (ntdb->transaction->prepared) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR, + "transaction_write: transaction already" + " prepared, write not allowed"); + goto fail; + } + + /* break it up into block sized chunks */ + while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) { + ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE); + ecode = transaction_write(ntdb, off, buf, len2); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); + } + } + + if (len == 0) { + return NTDB_SUCCESS; + } + + blk = off / NTDB_PGSIZE; + off = off % NTDB_PGSIZE; + + if (ntdb->transaction->num_blocks <= blk) { + uint8_t **new_blocks; + /* expand the blocks array */ + if (ntdb->transaction->blocks == NULL) { + new_blocks = (uint8_t **)ntdb->alloc_fn(ntdb, + (blk+1)*sizeof(uint8_t *), ntdb->alloc_data); + } else { + new_blocks = (uint8_t **)ntdb->expand_fn( + ntdb->transaction->blocks, + (blk+1)*sizeof(uint8_t *), ntdb->alloc_data); + } + if (new_blocks == NULL) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "transaction_write:" + " failed to allocate"); + goto fail; + } + memset(&new_blocks[ntdb->transaction->num_blocks], 0, + (1+(blk - ntdb->transaction->num_blocks))*sizeof(uint8_t *)); + ntdb->transaction->blocks = new_blocks; + ntdb->transaction->num_blocks = blk+1; + } + + /* allocate and fill a block? */ + if (ntdb->transaction->blocks[blk] == NULL) { + ntdb->transaction->blocks[blk] = (uint8_t *) + ntdb->alloc_fn(ntdb->transaction->blocks, NTDB_PGSIZE, + ntdb->alloc_data); + if (ntdb->transaction->blocks[blk] == NULL) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "transaction_write:" + " failed to allocate"); + goto fail; + } + memset(ntdb->transaction->blocks[blk], 0, NTDB_PGSIZE); + if (ntdb->transaction->old_map_size > blk * NTDB_PGSIZE) { + ntdb_len_t len2 = NTDB_PGSIZE; + if (len2 + (blk * NTDB_PGSIZE) > ntdb->transaction->old_map_size) { + len2 = ntdb->transaction->old_map_size - (blk * NTDB_PGSIZE); + } + ecode = ntdb->transaction->io_methods->tread(ntdb, + blk * NTDB_PGSIZE, + ntdb->transaction->blocks[blk], + len2); + if (ecode != NTDB_SUCCESS) { + ecode = ntdb_logerr(ntdb, ecode, + NTDB_LOG_ERROR, + "transaction_write:" + " failed to" + " read old block: %s", + strerror(errno)); + SAFE_FREE(ntdb, ntdb->transaction->blocks[blk]); + goto fail; + } + } + } + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(ntdb->transaction->blocks[blk] + off, 0, len); + } else { + memcpy(ntdb->transaction->blocks[blk] + off, buf, len); + } + return NTDB_SUCCESS; + +fail: + ntdb->transaction->transaction_error = 1; + return ecode; +} + + +/* + write while in a transaction - this variant never expands the transaction blocks, it only + updates existing blocks. This means it cannot change the recovery size +*/ +static void transaction_write_existing(struct ntdb_context *ntdb, ntdb_off_t off, + const void *buf, ntdb_len_t len) +{ + size_t blk; + + /* break it up into block sized chunks */ + while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) { + ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE); + transaction_write_existing(ntdb, off, buf, len2); + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); + } + } + + if (len == 0) { + return; + } + + blk = off / NTDB_PGSIZE; + off = off % NTDB_PGSIZE; + + if (ntdb->transaction->num_blocks <= blk || + ntdb->transaction->blocks[blk] == NULL) { + return; + } + + /* overwrite part of an existing block */ + memcpy(ntdb->transaction->blocks[blk] + off, buf, len); +} + + +/* + out of bounds check during a transaction +*/ +static enum NTDB_ERROR transaction_oob(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool probe) +{ + if ((off + len >= off && off + len <= ntdb->file->map_size) || probe) { + return NTDB_SUCCESS; + } + + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob len %lld beyond transaction size %lld", + (long long)(off + len), + (long long)ntdb->file->map_size); + return NTDB_ERR_IO; +} + +/* + transaction version of ntdb_expand(). +*/ +static enum NTDB_ERROR transaction_expand_file(struct ntdb_context *ntdb, + ntdb_off_t addition) +{ + enum NTDB_ERROR ecode; + + assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0); + + /* add a write to the transaction elements, so subsequent + reads see the zero data */ + ecode = transaction_write(ntdb, ntdb->file->map_size, NULL, addition); + if (ecode == NTDB_SUCCESS) { + ntdb->file->map_size += addition; + } + return ecode; +} + +static void *transaction_direct(struct ntdb_context *ntdb, ntdb_off_t off, + size_t len, bool write_mode) +{ + size_t blk = off / NTDB_PGSIZE, end_blk; + + /* This is wrong for zero-length blocks, but will fail gracefully */ + end_blk = (off + len - 1) / NTDB_PGSIZE; + + /* Can only do direct if in single block and we've already copied. */ + if (write_mode) { + ntdb->stats.transaction_write_direct++; + if (blk != end_blk + || blk >= ntdb->transaction->num_blocks + || ntdb->transaction->blocks[blk] == NULL) { + ntdb->stats.transaction_write_direct_fail++; + return NULL; + } + return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE; + } + + ntdb->stats.transaction_read_direct++; + /* Single which we have copied? */ + if (blk == end_blk + && blk < ntdb->transaction->num_blocks + && ntdb->transaction->blocks[blk]) + return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE; + + /* Otherwise must be all not copied. */ + while (blk <= end_blk) { + if (blk >= ntdb->transaction->num_blocks) + break; + if (ntdb->transaction->blocks[blk]) { + ntdb->stats.transaction_read_direct_fail++; + return NULL; + } + blk++; + } + return ntdb->transaction->io_methods->direct(ntdb, off, len, false); +} + +static ntdb_off_t transaction_read_off(struct ntdb_context *ntdb, + ntdb_off_t off) +{ + ntdb_off_t ret; + enum NTDB_ERROR ecode; + + ecode = transaction_read(ntdb, off, &ret, sizeof(ret)); + ntdb_convert(ntdb, &ret, sizeof(ret)); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + return ret; +} + +static enum NTDB_ERROR transaction_write_off(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_off_t val) +{ + ntdb_convert(ntdb, &val, sizeof(val)); + return transaction_write(ntdb, off, &val, sizeof(val)); +} + +static const struct ntdb_methods transaction_methods = { + transaction_read, + transaction_write, + transaction_oob, + transaction_expand_file, + transaction_direct, + transaction_read_off, + transaction_write_off, +}; + +/* + sync to disk +*/ +static enum NTDB_ERROR transaction_sync(struct ntdb_context *ntdb, + ntdb_off_t offset, ntdb_len_t length) +{ + if (ntdb->flags & NTDB_NOSYNC) { + return NTDB_SUCCESS; + } + + if (fsync(ntdb->file->fd) != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_transaction: fsync failed: %s", + strerror(errno)); + } +#ifdef MS_SYNC + if (ntdb->file->map_ptr) { + ntdb_off_t moffset = offset & ~(getpagesize()-1); + if (msync(moffset + (char *)ntdb->file->map_ptr, + length + (offset - moffset), MS_SYNC) != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_transaction: msync failed: %s", + strerror(errno)); + } + } +#endif + return NTDB_SUCCESS; +} + +static void free_transaction_blocks(struct ntdb_context *ntdb) +{ + int i; + + /* free all the transaction blocks */ + for (i=0;itransaction->num_blocks;i++) { + if (ntdb->transaction->blocks[i] != NULL) { + ntdb->free_fn(ntdb->transaction->blocks[i], + ntdb->alloc_data); + } + } + SAFE_FREE(ntdb, ntdb->transaction->blocks); + ntdb->transaction->num_blocks = 0; +} + +static void _ntdb_transaction_cancel(struct ntdb_context *ntdb) +{ + enum NTDB_ERROR ecode; + + if (ntdb->transaction == NULL) { + ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_transaction_cancel: no transaction"); + return; + } + + if (ntdb->transaction->nesting != 0) { + ntdb->transaction->transaction_error = 1; + ntdb->transaction->nesting--; + return; + } + + ntdb->file->map_size = ntdb->transaction->old_map_size; + + free_transaction_blocks(ntdb); + + if (ntdb->transaction->magic_offset) { + const struct ntdb_methods *methods = ntdb->transaction->io_methods; + uint64_t invalid = NTDB_RECOVERY_INVALID_MAGIC; + + /* remove the recovery marker */ + ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset, + &invalid, sizeof(invalid)); + if (ecode == NTDB_SUCCESS) + ecode = transaction_sync(ntdb, + ntdb->transaction->magic_offset, + sizeof(invalid)); + if (ecode != NTDB_SUCCESS) { + ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_cancel: failed to remove" + " recovery magic"); + } + } + + if (ntdb->file->allrecord_lock.count) + ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype); + + /* restore the normal io methods */ + ntdb->io = ntdb->transaction->io_methods; + + ntdb_transaction_unlock(ntdb, F_WRLCK); + + if (ntdb_has_open_lock(ntdb)) + ntdb_unlock_open(ntdb, F_WRLCK); + + SAFE_FREE(ntdb, ntdb->transaction); +} + +/* + start a ntdb transaction. No token is returned, as only a single + transaction is allowed to be pending per ntdb_context +*/ +_PUBLIC_ enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb) +{ + enum NTDB_ERROR ecode; + + ntdb->stats.transactions++; + /* some sanity checks */ + if (ntdb->flags & NTDB_INTERNAL) { + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_transaction_start:" + " cannot start a transaction on an" + " internal ntdb"); + } + + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "ntdb_transaction_start:" + " cannot start a transaction on a" + " read-only ntdb"); + } + + /* cope with nested ntdb_transaction_start() calls */ + if (ntdb->transaction != NULL) { + if (!(ntdb->flags & NTDB_ALLOW_NESTING)) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, + NTDB_LOG_USE_ERROR, + "ntdb_transaction_start:" + " already inside transaction"); + } + ntdb->transaction->nesting++; + ntdb->stats.transaction_nest++; + return 0; + } + + if (ntdb_has_hash_locks(ntdb)) { + /* the caller must not have any locks when starting a + transaction as otherwise we'll be screwed by lack + of nested locks in POSIX */ + return ntdb_logerr(ntdb, NTDB_ERR_LOCK, + NTDB_LOG_USE_ERROR, + "ntdb_transaction_start:" + " cannot start a transaction with locks" + " held"); + } + + ntdb->transaction = (struct ntdb_transaction *) + ntdb->alloc_fn(ntdb, sizeof(struct ntdb_transaction), + ntdb->alloc_data); + if (ntdb->transaction == NULL) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_transaction_start:" + " cannot allocate"); + } + memset(ntdb->transaction, 0, sizeof(*ntdb->transaction)); + + /* get the transaction write lock. This is a blocking lock. As + discussed with Volker, there are a number of ways we could + make this async, which we will probably do in the future */ + ecode = ntdb_transaction_lock(ntdb, F_WRLCK); + if (ecode != NTDB_SUCCESS) { + SAFE_FREE(ntdb, ntdb->transaction->blocks); + SAFE_FREE(ntdb, ntdb->transaction); + return ecode; + } + + /* get a read lock over entire file. This is upgraded to a write + lock during the commit */ + ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, true); + if (ecode != NTDB_SUCCESS) { + goto fail_allrecord_lock; + } + + /* make sure we know about any file expansions already done by + anyone else */ + ntdb_oob(ntdb, ntdb->file->map_size, 1, true); + ntdb->transaction->old_map_size = ntdb->file->map_size; + + /* finally hook the io methods, replacing them with + transaction specific methods */ + ntdb->transaction->io_methods = ntdb->io; + ntdb->io = &transaction_methods; + return NTDB_SUCCESS; + +fail_allrecord_lock: + ntdb_transaction_unlock(ntdb, F_WRLCK); + SAFE_FREE(ntdb, ntdb->transaction->blocks); + SAFE_FREE(ntdb, ntdb->transaction); + return ecode; +} + + +/* + cancel the current transaction +*/ +_PUBLIC_ void ntdb_transaction_cancel(struct ntdb_context *ntdb) +{ + ntdb->stats.transaction_cancel++; + _ntdb_transaction_cancel(ntdb); +} + +/* + work out how much space the linearised recovery data will consume (worst case) +*/ +static ntdb_len_t ntdb_recovery_size(struct ntdb_context *ntdb) +{ + ntdb_len_t recovery_size = 0; + int i; + + recovery_size = 0; + for (i=0;itransaction->num_blocks;i++) { + if (i * NTDB_PGSIZE >= ntdb->transaction->old_map_size) { + break; + } + if (ntdb->transaction->blocks[i] == NULL) { + continue; + } + recovery_size += 2*sizeof(ntdb_off_t) + NTDB_PGSIZE; + } + + return recovery_size; +} + +static enum NTDB_ERROR ntdb_recovery_area(struct ntdb_context *ntdb, + const struct ntdb_methods *methods, + ntdb_off_t *recovery_offset, + struct ntdb_recovery_record *rec) +{ + enum NTDB_ERROR ecode; + + *recovery_offset = ntdb_read_off(ntdb, + offsetof(struct ntdb_header, recovery)); + if (NTDB_OFF_IS_ERR(*recovery_offset)) { + return NTDB_OFF_TO_ERR(*recovery_offset); + } + + if (*recovery_offset == 0) { + rec->max_len = 0; + return NTDB_SUCCESS; + } + + ecode = methods->tread(ntdb, *recovery_offset, rec, sizeof(*rec)); + if (ecode != NTDB_SUCCESS) + return ecode; + + ntdb_convert(ntdb, rec, sizeof(*rec)); + /* ignore invalid recovery regions: can happen in crash */ + if (rec->magic != NTDB_RECOVERY_MAGIC && + rec->magic != NTDB_RECOVERY_INVALID_MAGIC) { + *recovery_offset = 0; + rec->max_len = 0; + } + return NTDB_SUCCESS; +} + +static unsigned int same(const unsigned char *new, + const unsigned char *old, + unsigned int length) +{ + unsigned int i; + + for (i = 0; i < length; i++) { + if (new[i] != old[i]) + break; + } + return i; +} + +static unsigned int different(const unsigned char *new, + const unsigned char *old, + unsigned int length, + unsigned int min_same, + unsigned int *samelen) +{ + unsigned int i; + + *samelen = 0; + for (i = 0; i < length; i++) { + if (new[i] == old[i]) { + (*samelen)++; + } else { + if (*samelen >= min_same) { + return i - *samelen; + } + *samelen = 0; + } + } + + if (*samelen < min_same) + *samelen = 0; + return length - *samelen; +} + +/* Allocates recovery blob, without ntdb_recovery_record at head set up. */ +static struct ntdb_recovery_record *alloc_recovery(struct ntdb_context *ntdb, + ntdb_len_t *len) +{ + struct ntdb_recovery_record *rec; + size_t i; + enum NTDB_ERROR ecode; + unsigned char *p; + const struct ntdb_methods *old_methods = ntdb->io; + + rec = ntdb->alloc_fn(ntdb, sizeof(*rec) + ntdb_recovery_size(ntdb), + ntdb->alloc_data); + if (!rec) { + ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "transaction_setup_recovery:" + " cannot allocate"); + return NTDB_ERR_PTR(NTDB_ERR_OOM); + } + + /* We temporarily revert to the old I/O methods, so we can use + * ntdb_access_read */ + ntdb->io = ntdb->transaction->io_methods; + + /* build the recovery data into a single blob to allow us to do a single + large write, which should be more efficient */ + p = (unsigned char *)(rec + 1); + for (i=0;itransaction->num_blocks;i++) { + ntdb_off_t offset; + ntdb_len_t length; + unsigned int off; + const unsigned char *buffer; + + if (ntdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * NTDB_PGSIZE; + length = NTDB_PGSIZE; + if (offset >= ntdb->transaction->old_map_size) { + continue; + } + + if (offset + length > ntdb->file->map_size) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_transaction_setup_recovery:" + " transaction data over new region" + " boundary"); + goto fail; + } + buffer = ntdb_access_read(ntdb, offset, length, false); + if (NTDB_PTR_IS_ERR(buffer)) { + ecode = NTDB_PTR_ERR(buffer); + goto fail; + } + + /* Skip over anything the same at the start. */ + off = same(ntdb->transaction->blocks[i], buffer, length); + offset += off; + + while (off < length) { + ntdb_len_t len1; + unsigned int samelen; + + len1 = different(ntdb->transaction->blocks[i] + off, + buffer + off, length - off, + sizeof(offset) + sizeof(len1) + 1, + &samelen); + + memcpy(p, &offset, sizeof(offset)); + memcpy(p + sizeof(offset), &len1, sizeof(len1)); + ntdb_convert(ntdb, p, sizeof(offset) + sizeof(len1)); + p += sizeof(offset) + sizeof(len1); + memcpy(p, buffer + off, len1); + p += len1; + off += len1 + samelen; + offset += len1 + samelen; + } + ntdb_access_release(ntdb, buffer); + } + + *len = p - (unsigned char *)(rec + 1); + ntdb->io = old_methods; + return rec; + +fail: + ntdb->free_fn(rec, ntdb->alloc_data); + ntdb->io = old_methods; + return NTDB_ERR_PTR(ecode); +} + +static ntdb_off_t create_recovery_area(struct ntdb_context *ntdb, + ntdb_len_t rec_length, + struct ntdb_recovery_record *rec) +{ + ntdb_off_t off, recovery_off; + ntdb_len_t addition; + enum NTDB_ERROR ecode; + const struct ntdb_methods *methods = ntdb->transaction->io_methods; + + /* round up to a multiple of page size. Overallocate, since each + * such allocation forces us to expand the file. */ + rec->max_len = ntdb_expand_adjust(ntdb->file->map_size, rec_length); + + /* Round up to a page. */ + rec->max_len = ((sizeof(*rec) + rec->max_len + NTDB_PGSIZE-1) + & ~(NTDB_PGSIZE-1)) + - sizeof(*rec); + + off = ntdb->file->map_size; + + /* Restore ->map_size before calling underlying expand_file. + Also so that we don't try to expand the file again in the + transaction commit, which would destroy the recovery + area */ + addition = (ntdb->file->map_size - ntdb->transaction->old_map_size) + + sizeof(*rec) + rec->max_len; + ntdb->file->map_size = ntdb->transaction->old_map_size; + ntdb->stats.transaction_expand_file++; + ecode = methods->expand_file(ntdb, addition); + if (ecode != NTDB_SUCCESS) { + ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_recovery_allocate:" + " failed to create recovery area"); + return NTDB_ERR_TO_OFF(ecode); + } + + /* we have to reset the old map size so that we don't try to + expand the file again in the transaction commit, which + would destroy the recovery area */ + ntdb->transaction->old_map_size = ntdb->file->map_size; + + /* write the recovery header offset and sync - we can sync without a race here + as the magic ptr in the recovery record has not been set */ + recovery_off = off; + ntdb_convert(ntdb, &recovery_off, sizeof(recovery_off)); + ecode = methods->twrite(ntdb, offsetof(struct ntdb_header, recovery), + &recovery_off, sizeof(ntdb_off_t)); + if (ecode != NTDB_SUCCESS) { + ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_recovery_allocate:" + " failed to write recovery head"); + return NTDB_ERR_TO_OFF(ecode); + } + transaction_write_existing(ntdb, offsetof(struct ntdb_header, recovery), + &recovery_off, + sizeof(ntdb_off_t)); + return off; +} + +/* + setup the recovery data that will be used on a crash during commit +*/ +static enum NTDB_ERROR transaction_setup_recovery(struct ntdb_context *ntdb) +{ + ntdb_len_t recovery_size = 0; + ntdb_off_t recovery_off = 0; + ntdb_off_t old_map_size = ntdb->transaction->old_map_size; + struct ntdb_recovery_record *recovery; + const struct ntdb_methods *methods = ntdb->transaction->io_methods; + uint64_t magic; + enum NTDB_ERROR ecode; + + recovery = alloc_recovery(ntdb, &recovery_size); + if (NTDB_PTR_IS_ERR(recovery)) + return NTDB_PTR_ERR(recovery); + + /* If we didn't actually change anything we overwrote? */ + if (recovery_size == 0) { + /* In theory, we could have just appended data. */ + if (ntdb->transaction->num_blocks * NTDB_PGSIZE + < ntdb->transaction->old_map_size) { + free_transaction_blocks(ntdb); + } + ntdb->free_fn(recovery, ntdb->alloc_data); + return NTDB_SUCCESS; + } + + ecode = ntdb_recovery_area(ntdb, methods, &recovery_off, recovery); + if (ecode) { + ntdb->free_fn(recovery, ntdb->alloc_data); + return ecode; + } + + if (recovery->max_len < recovery_size) { + /* Not large enough. Free up old recovery area. */ + if (recovery_off) { + ntdb->stats.frees++; + ecode = add_free_record(ntdb, recovery_off, + sizeof(*recovery) + + recovery->max_len, + NTDB_LOCK_WAIT, true); + ntdb->free_fn(recovery, ntdb->alloc_data); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_recovery_allocate:" + " failed to free previous" + " recovery area"); + } + + /* Refresh recovery after add_free_record above. */ + recovery = alloc_recovery(ntdb, &recovery_size); + if (NTDB_PTR_IS_ERR(recovery)) + return NTDB_PTR_ERR(recovery); + } + + recovery_off = create_recovery_area(ntdb, recovery_size, + recovery); + if (NTDB_OFF_IS_ERR(recovery_off)) { + ntdb->free_fn(recovery, ntdb->alloc_data); + return NTDB_OFF_TO_ERR(recovery_off); + } + } + + /* Now we know size, convert rec header. */ + recovery->magic = NTDB_RECOVERY_INVALID_MAGIC; + recovery->len = recovery_size; + recovery->eof = old_map_size; + ntdb_convert(ntdb, recovery, sizeof(*recovery)); + + /* write the recovery data to the recovery area */ + ecode = methods->twrite(ntdb, recovery_off, recovery, + sizeof(*recovery) + recovery_size); + if (ecode != NTDB_SUCCESS) { + ntdb->free_fn(recovery, ntdb->alloc_data); + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_setup_recovery:" + " failed to write recovery data"); + } + transaction_write_existing(ntdb, recovery_off, recovery, recovery_size); + + ntdb->free_fn(recovery, ntdb->alloc_data); + + /* as we don't have ordered writes, we have to sync the recovery + data before we update the magic to indicate that the recovery + data is present */ + ecode = transaction_sync(ntdb, recovery_off, recovery_size); + if (ecode != NTDB_SUCCESS) + return ecode; + + magic = NTDB_RECOVERY_MAGIC; + ntdb_convert(ntdb, &magic, sizeof(magic)); + + ntdb->transaction->magic_offset + = recovery_off + offsetof(struct ntdb_recovery_record, magic); + + ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset, + &magic, sizeof(magic)); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_setup_recovery:" + " failed to write recovery magic"); + } + transaction_write_existing(ntdb, ntdb->transaction->magic_offset, + &magic, sizeof(magic)); + + /* ensure the recovery magic marker is on disk */ + return transaction_sync(ntdb, ntdb->transaction->magic_offset, + sizeof(magic)); +} + +static enum NTDB_ERROR _ntdb_transaction_prepare_commit(struct ntdb_context *ntdb) +{ + const struct ntdb_methods *methods; + enum NTDB_ERROR ecode; + + if (ntdb->transaction == NULL) { + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_transaction_prepare_commit:" + " no transaction"); + } + + if (ntdb->transaction->prepared) { + _ntdb_transaction_cancel(ntdb); + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_transaction_prepare_commit:" + " transaction already prepared"); + } + + if (ntdb->transaction->transaction_error) { + _ntdb_transaction_cancel(ntdb); + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR, + "ntdb_transaction_prepare_commit:" + " transaction error pending"); + } + + + if (ntdb->transaction->nesting != 0) { + return NTDB_SUCCESS; + } + + /* check for a null transaction */ + if (ntdb->transaction->blocks == NULL) { + return NTDB_SUCCESS; + } + + methods = ntdb->transaction->io_methods; + + /* upgrade the main transaction lock region to a write lock */ + ecode = ntdb_allrecord_upgrade(ntdb, NTDB_HASH_LOCK_START); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* get the open lock - this prevents new users attaching to the database + during the commit */ + ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* Sets up ntdb->transaction->recovery and + * ntdb->transaction->magic_offset. */ + ecode = transaction_setup_recovery(ntdb); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + ntdb->transaction->prepared = true; + + /* expand the file to the new size if needed */ + if (ntdb->file->map_size != ntdb->transaction->old_map_size) { + ntdb_len_t add; + + add = ntdb->file->map_size - ntdb->transaction->old_map_size; + /* Restore original map size for ntdb_expand_file */ + ntdb->file->map_size = ntdb->transaction->old_map_size; + ecode = methods->expand_file(ntdb, add); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + } + + /* Keep the open lock until the actual commit */ + return NTDB_SUCCESS; +} + +/* + prepare to commit the current transaction +*/ +_PUBLIC_ enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb) +{ + return _ntdb_transaction_prepare_commit(ntdb); +} + +/* + commit the current transaction +*/ +_PUBLIC_ enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb) +{ + const struct ntdb_methods *methods; + int i; + enum NTDB_ERROR ecode; + + if (ntdb->transaction == NULL) { + return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR, + "ntdb_transaction_commit:" + " no transaction"); + } + + ntdb_trace(ntdb, "ntdb_transaction_commit"); + + if (ntdb->transaction->nesting != 0) { + ntdb->transaction->nesting--; + return NTDB_SUCCESS; + } + + if (!ntdb->transaction->prepared) { + ecode = _ntdb_transaction_prepare_commit(ntdb); + if (ecode != NTDB_SUCCESS) { + _ntdb_transaction_cancel(ntdb); + return ecode; + } + } + + /* check for a null transaction (prepare_commit may do this!) */ + if (ntdb->transaction->blocks == NULL) { + _ntdb_transaction_cancel(ntdb); + return NTDB_SUCCESS; + } + + methods = ntdb->transaction->io_methods; + + /* perform all the writes */ + for (i=0;itransaction->num_blocks;i++) { + ntdb_off_t offset; + ntdb_len_t length; + + if (ntdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * NTDB_PGSIZE; + length = NTDB_PGSIZE; + + ecode = methods->twrite(ntdb, offset, + ntdb->transaction->blocks[i], length); + if (ecode != NTDB_SUCCESS) { + /* we've overwritten part of the data and + possibly expanded the file, so we need to + run the crash recovery code */ + ntdb->io = methods; + ntdb_transaction_recover(ntdb); + + _ntdb_transaction_cancel(ntdb); + + return ecode; + } + SAFE_FREE(ntdb, ntdb->transaction->blocks[i]); + } + + SAFE_FREE(ntdb, ntdb->transaction->blocks); + ntdb->transaction->num_blocks = 0; + + /* ensure the new data is on disk */ + ecode = transaction_sync(ntdb, 0, ntdb->file->map_size); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + /* + TODO: maybe write to some dummy hdr field, or write to magic + offset without mmap, before the last sync, instead of the + utime() call + */ + + /* on some systems (like Linux 2.6.x) changes via mmap/msync + don't change the mtime of the file, this means the file may + not be backed up (as ntdb rounding to block sizes means that + file size changes are quite rare too). The following forces + mtime changes when a transaction completes */ +#if HAVE_UTIME + utime(ntdb->name, NULL); +#endif + + /* use a transaction cancel to free memory and remove the + transaction locks: it "restores" map_size, too. */ + ntdb->transaction->old_map_size = ntdb->file->map_size; + _ntdb_transaction_cancel(ntdb); + + return NTDB_SUCCESS; +} + + +/* + recover from an aborted transaction. Must be called with exclusive + database write access already established (including the open + lock to prevent new processes attaching) +*/ +enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb) +{ + ntdb_off_t recovery_head, recovery_eof; + unsigned char *data, *p; + struct ntdb_recovery_record rec; + enum NTDB_ERROR ecode; + + /* find the recovery area */ + recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery)); + if (NTDB_OFF_IS_ERR(recovery_head)) { + ecode = NTDB_OFF_TO_ERR(recovery_head); + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to read recovery head"); + } + + if (recovery_head == 0) { + /* we have never allocated a recovery record */ + return NTDB_SUCCESS; + } + + /* read the recovery record */ + ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to read recovery record"); + } + + if (rec.magic != NTDB_RECOVERY_MAGIC) { + /* there is no valid recovery data */ + return NTDB_SUCCESS; + } + + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " attempt to recover read only database"); + } + + recovery_eof = rec.eof; + + data = (unsigned char *)ntdb->alloc_fn(ntdb, rec.len, ntdb->alloc_data); + if (data == NULL) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to allocate recovery data"); + } + + /* read the full recovery data */ + ecode = ntdb->io->tread(ntdb, recovery_head + sizeof(rec), data, + rec.len); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to read recovery data"); + } + + /* recover the file data */ + p = data; + while (p+sizeof(ntdb_off_t)+sizeof(ntdb_len_t) < data + rec.len) { + ntdb_off_t ofs; + ntdb_len_t len; + ntdb_convert(ntdb, p, sizeof(ofs) + sizeof(len)); + memcpy(&ofs, p, sizeof(ofs)); + memcpy(&len, p + sizeof(ofs), sizeof(len)); + p += sizeof(ofs) + sizeof(len); + + ecode = ntdb->io->twrite(ntdb, ofs, p, len); + if (ecode != NTDB_SUCCESS) { + ntdb->free_fn(data, ntdb->alloc_data); + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to recover %zu bytes" + " at offset %zu", + (size_t)len, (size_t)ofs); + } + p += len; + } + + ntdb->free_fn(data, ntdb->alloc_data); + + ecode = transaction_sync(ntdb, 0, ntdb->file->map_size); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to sync recovery"); + } + + /* if the recovery area is after the recovered eof then remove it */ + if (recovery_eof <= recovery_head) { + ecode = ntdb_write_off(ntdb, offsetof(struct ntdb_header, + recovery), + 0); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to remove recovery head"); + } + } + + /* remove the recovery magic */ + ecode = ntdb_write_off(ntdb, + recovery_head + + offsetof(struct ntdb_recovery_record, magic), + NTDB_RECOVERY_INVALID_MAGIC); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to remove recovery magic"); + } + + ecode = transaction_sync(ntdb, 0, recovery_eof); + if (ecode != NTDB_SUCCESS) { + return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR, + "ntdb_transaction_recover:" + " failed to sync2 recovery"); + } + + ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, + "ntdb_transaction_recover: recovered %zu byte database", + (size_t)recovery_eof); + + /* all done */ + return NTDB_SUCCESS; +} + +ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb) +{ + ntdb_off_t recovery_head; + struct ntdb_recovery_record rec; + enum NTDB_ERROR ecode; + + /* find the recovery area */ + recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery)); + if (NTDB_OFF_IS_ERR(recovery_head)) { + return recovery_head; + } + + if (recovery_head == 0) { + /* we have never allocated a recovery record */ + return false; + } + + /* read the recovery record */ + ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + + return (rec.magic == NTDB_RECOVERY_MAGIC); +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/traverse.c b/junkcode/rusty@rustcorp.com.au-ntdb/traverse.c new file mode 100644 index 00000000..2e6763cb --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/traverse.c @@ -0,0 +1,100 @@ + /* + Trivial Database 2: traverse function. + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include + +_PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb, + int (*fn)(struct ntdb_context *, + NTDB_DATA, NTDB_DATA, void *), + void *p) +{ + enum NTDB_ERROR ecode; + struct hash_info h; + NTDB_DATA k, d; + int64_t count = 0; + + k.dptr = NULL; + for (ecode = first_in_hash(ntdb, &h, &k, &d.dsize); + ecode == NTDB_SUCCESS; + ecode = next_in_hash(ntdb, &h, &k, &d.dsize)) { + d.dptr = k.dptr + k.dsize; + + count++; + if (fn && fn(ntdb, k, d, p)) { + ntdb->free_fn(k.dptr, ntdb->alloc_data); + return count; + } + ntdb->free_fn(k.dptr, ntdb->alloc_data); + } + + if (ecode != NTDB_ERR_NOEXIST) { + return NTDB_ERR_TO_OFF(ecode); + } + return count; +} + +_PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key) +{ + struct hash_info h; + + return first_in_hash(ntdb, &h, key, NULL); +} + +/* We lock twice, not very efficient. We could keep last key & h cached. */ +_PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key) +{ + struct hash_info h; + struct ntdb_used_record rec; + ntdb_off_t off; + + off = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, NULL); + ntdb->free_fn(key->dptr, ntdb->alloc_data); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); + + /* If we found something, skip to next. */ + if (off) + h.bucket++; + return next_in_hash(ntdb, &h, key, NULL); +} + +static int wipe_one(struct ntdb_context *ntdb, + NTDB_DATA key, NTDB_DATA data, enum NTDB_ERROR *ecode) +{ + *ecode = ntdb_delete(ntdb, key); + return (*ecode != NTDB_SUCCESS); +} + +_PUBLIC_ enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb) +{ + enum NTDB_ERROR ecode; + int64_t count; + + ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false); + if (ecode != NTDB_SUCCESS) + return ecode; + + /* FIXME: Be smarter. */ + count = ntdb_traverse(ntdb, wipe_one, &ecode); + if (count < 0) + ecode = NTDB_OFF_TO_ERR(count); + ntdb_allrecord_unlock(ntdb, F_WRLCK); + return ecode; +} diff --git a/junkcode/rusty@rustcorp.com.au-ntdb/wscript b/junkcode/rusty@rustcorp.com.au-ntdb/wscript new file mode 100644 index 00000000..a21c1a30 --- /dev/null +++ b/junkcode/rusty@rustcorp.com.au-ntdb/wscript @@ -0,0 +1,287 @@ +#!/usr/bin/env python + +APPNAME = 'ntdb' +VERSION = '1.0' + +blddir = 'bin' + +import sys, os + +# find the buildtools directory +srcdir = '.' +while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5: + srcdir = srcdir + '/..' +sys.path.insert(0, srcdir + '/buildtools/wafsamba') + +import wafsamba, samba_dist, Options, Logs, glob + +samba_dist.DIST_DIRS('lib/ntdb:. lib/replace:lib/replace lib/ccan:lib/ccan buildtools:buildtools') + +def set_options(opt): + opt.BUILTIN_DEFAULT('replace,ccan') + opt.PRIVATE_EXTENSION_DEFAULT('ntdb', noextension='ntdb') + opt.RECURSE('lib/replace') + opt.add_option('--valgrind', + help=("use valgrind on tests programs"), + action="store_true", dest='VALGRIND', default=False) + opt.add_option('--valgrind-log', + help=("where to put the valgrind log"), + action="store", dest='VALGRINDLOG', default=None) + + if opt.IN_LAUNCH_DIR(): + opt.add_option('--disable-python', + help=("disable the pyntdb module"), + action="store_true", dest='disable_python', default=False) + +def configure(conf): + conf.RECURSE('lib/replace') + conf.RECURSE('lib/ccan') + + conf.env.NTDB_TEST_RUN_SRC=['test/run-001-encode.c', + 'test/run-001-fls.c', + 'test/run-01-new_database.c', + 'test/run-02-expand.c', + 'test/run-03-coalesce.c', + 'test/run-04-basichash.c', + 'test/run-05-readonly-open.c', + 'test/run-10-simple-store.c', + 'test/run-11-simple-fetch.c', + 'test/run-12-check.c', + 'test/run-15-append.c', + 'test/run-25-hashoverload.c', + 'test/run-30-exhaust-before-expand.c', + 'test/run-35-convert.c', + 'test/run-50-multiple-freelists.c', + 'test/run-56-open-during-transaction.c', + 'test/run-57-die-during-transaction.c', + 'test/run-64-bit-tdb.c', + 'test/run-90-get-set-attributes.c', + 'test/run-capabilities.c', + 'test/run-expand-in-transaction.c', + 'test/run-features.c', + 'test/run-lockall.c', + 'test/run-remap-in-read_traverse.c', + 'test/run-seed.c', + 'test/run-tdb_errorstr.c', + 'test/run-tdb_foreach.c', + 'test/run-traverse.c'] + conf.env.NTDB_TEST_API_SRC=['test/api-12-store.c', + 'test/api-13-delete.c', + 'test/api-14-exists.c', + 'test/api-16-wipe_all.c', + 'test/api-20-alloc-attr.c', + 'test/api-21-parse_record.c', + 'test/api-55-transaction.c', + 'test/api-60-noop-transaction.c', + 'test/api-80-tdb_fd.c', + 'test/api-81-seqnum.c', + 'test/api-82-lockattr.c', + 'test/api-83-openhook.c', + 'test/api-91-get-stats.c', + 'test/api-92-get-set-readonly.c', + 'test/api-93-repack.c', + 'test/api-94-expand-during-parse.c', + 'test/api-95-read-only-during-parse.c', + 'test/api-add-remove-flags.c', + 'test/api-check-callback.c', + 'test/api-firstkey-nextkey.c', + 'test/api-fork-test.c', + 'test/api-locktimeout.c', + 'test/api-missing-entries.c', + 'test/api-open-multiple-times.c', + 'test/api-record-expand.c', + 'test/api-simple-delete.c', + 'test/api-summary.c'] + conf.env.NTDB_TEST_API_PY=['test/python-api.py'] + conf.env.NTDB_TEST_API_HELPER_SRC=['test/helpapi-external-agent.c'] + conf.env.NTDB_TEST_RUN_HELPER_SRC=['test/helprun-external-agent.c', + 'test/helprun-layout.c'] + conf.env.NTDB_TEST_HELPER_SRC=['test/external-agent.c', + 'test/failtest_helper.c', + 'test/lock-tracking.c', + 'test/logging.c', + 'test/tap-interface.c'] + + conf.env.standalone_ntdb = conf.IN_LAUNCH_DIR() + conf.env.disable_python = getattr(Options.options, 'disable_python', False) + + if not conf.env.standalone_ntdb: + if conf.CHECK_BUNDLED_SYSTEM('ntdb', minversion=VERSION, + implied_deps='replace'): + conf.define('USING_SYSTEM_NTDB', 1) + if conf.CHECK_BUNDLED_SYSTEM_PYTHON('pyntdb', 'ntdb', minversion=VERSION): + conf.define('USING_SYSTEM_PYNTDB', 1) + + if not conf.env.disable_python: + # also disable if we don't have the python libs installed + conf.find_program('python', var='PYTHON') + conf.check_tool('python') + conf.check_python_version((2,4,2)) + conf.SAMBA_CHECK_PYTHON_HEADERS(mandatory=False) + if not conf.env.HAVE_PYTHON_H: + Logs.warn('Disabling pyntdb as python devel libs not found') + conf.env.disable_python = True + + conf.CHECK_XSLTPROC_MANPAGES() + + # This make #include work. + conf.ADD_EXTRA_INCLUDES('''#lib''') + + conf.SAMBA_CONFIG_H() + +def build(bld): + bld.RECURSE('lib/replace') + bld.RECURSE('lib/ccan') + + if bld.env.standalone_ntdb: + bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' + private_library = False + else: + private_library = True + + SRC = '''check.c free.c hash.c io.c lock.c open.c + summary.c ntdb.c transaction.c traverse.c''' + + if not bld.CONFIG_SET('USING_SYSTEM_NTDB'): + NTDB_CCAN='ccan-likely ccan-ilog ccan-hash ccan-tally' + bld.SAMBA_LIBRARY('ntdb', + SRC, + deps='replace ' + NTDB_CCAN , + includes='.', + abi_directory='ABI', + abi_match='ntdb_*', + hide_symbols=True, + vnum=VERSION, + public_headers='ntdb.h', + public_headers_install=not private_library, + pc_files='ntdb.pc', + private_library=private_library, + manpages='man/ntdb.3') + + bld.SAMBA_BINARY('ntdbtorture', + 'tools/ntdbtorture.c', + deps='ntdb ccan-err', + install=False) + + bld.SAMBA_BINARY('ntdbtool', + 'tools/ntdbtool.c', + deps='ntdb', manpages='man/ntdbtool.8') + + bld.SAMBA_BINARY('ntdbdump', + 'tools/ntdbdump.c', + deps='ntdb', manpages='man/ntdbdump.8') + + bld.SAMBA_BINARY('ntdbrestore', + 'tools/ntdbrestore.c', + deps='ntdb', manpages='man/ntdbrestore.8') + + bld.SAMBA_BINARY('ntdbbackup', + 'tools/ntdbbackup.c', + deps='ntdb', manpages='man/ntdbbackup.8') + + if bld.env.DEVELOPER_MODE: + # FIXME: We need CCAN for some API tests, but waf thinks it's + # already available via ntdb. It is, but not publicly. + # Workaround is to build a private, non-hiding version. + bld.SAMBA_SUBSYSTEM('ntdb-testing', + SRC, + deps='replace ' + NTDB_CCAN, + includes='.') + + bld.SAMBA_SUBSYSTEM('ntdb-test-helpers', + bld.env.NTDB_TEST_HELPER_SRC, + deps='replace', + allow_warnings=True) + bld.SAMBA_SUBSYSTEM('ntdb-run-helpers', + bld.env.NTDB_TEST_RUN_HELPER_SRC, + deps='replace') + bld.SAMBA_SUBSYSTEM('ntdb-api-helpers', + bld.env.NTDB_TEST_API_HELPER_SRC, + deps='replace') + + for f in bld.env.NTDB_TEST_RUN_SRC: + base = os.path.splitext(os.path.basename(f))[0] + bld.SAMBA_BINARY('ntdb-' + base, f, + deps=NTDB_CCAN + ' ccan-failtest ntdb-test-helpers ntdb-run-helpers', + install=False) + + for f in bld.env.NTDB_TEST_API_SRC: + base = os.path.splitext(os.path.basename(f))[0] + bld.SAMBA_BINARY('ntdb-' + base, f, + deps='ntdb-test-helpers ntdb-api-helpers ntdb-testing', + install=False) + + if not bld.CONFIG_SET('USING_SYSTEM_PYNTDB'): + bld.SAMBA_PYTHON('pyntdb', + source='pyntdb.c', + deps='ntdb', + enabled=not bld.env.disable_python, + realname='ntdb.so', + cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION) + +def testonly(ctx): + '''run ntdb testsuite''' + import Utils, samba_utils, shutil + ecode = 0; + + env = samba_utils.LOAD_ENVIRONMENT() + + if env.standalone_ntdb: + # FIXME: This is horrible :( + test_prefix = "%s/st" % (Utils.g_module.blddir) + shutil.rmtree(test_prefix, ignore_errors=True) + os.makedirs(test_prefix) + + # Create scratch directory for tests. + testdir = os.path.join(test_prefix, 'ntdb-tests') + samba_utils.mkdir_p(testdir) + # Symlink back to source dir so it can find tests in test/ + link = os.path.join(testdir, 'test') + if not os.path.exists(link): + os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link) + + if env.options['VALGRIND']: + os.environ['VALGRIND'] = 'valgrind -q --num-callers=30 --error-exitcode=11' + if env.options['VALGRINDLOG']: + os.environ['VALGRIND'] += ' --log-file=%s' % Options.options.VALGRINDLOG + + for f in env.NTDB_TEST_RUN_SRC + env.NTDB_TEST_API_SRC: + name = "ntdb-" + os.path.splitext(os.path.basename(f))[0] + cmd = "cd " + testdir + " && $VALGRIND " + os.path.abspath(os.path.join(Utils.g_module.blddir, name)) + " > test-output 2>&1" + print("..." + f) + ret = samba_utils.RUN_COMMAND(cmd) + if ret != 0: + print("%s (%s) failed:" % (name, f)) + samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output')) + ecode = ret; + break; + if not env.disable_python: + for f in env.NTDB_TEST_API_PY: + print("..." + f) + cmd = "cd " + testdir + " && PYTHONPATH=%s %s %s > test-output 2>&1" % ( + os.path.abspath(os.path.join(Utils.g_module.blddir, "python")), + env["PYTHON"], os.path.abspath(f)) + ret = samba_utils.RUN_COMMAND(cmd) + if ret != 0: + print("%s (%s) failed:" % (name, f)) + samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output')) + ecode = ret + break + + sys.exit(ecode) + +# WAF doesn't build the unit tests for this, maybe because they don't link with ntdb? +# This forces it +def test(ctx): + import Scripting + Scripting.commands.append('build') + Scripting.commands.append('testonly') + +def dist(): + '''makes a tarball for distribution''' + samba_dist.dist() + +def reconfigure(ctx): + '''reconfigure if config scripts have changed''' + import samba_utils + samba_utils.reconfigure(ctx)