+++ /dev/null
-ntdb_add_flag: void (struct ntdb_context *, unsigned int)
-ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA)
-ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
-ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
-ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA)
-ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA)
-ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
-ntdb_close: int (struct ntdb_context *)
-ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
-ntdb_errorstr: const char *(enum NTDB_ERROR)
-ntdb_exists: bool (struct ntdb_context *, NTDB_DATA)
-ntdb_fd: int (const struct ntdb_context *)
-ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *)
-ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
-ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *)
-ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *)
-ntdb_get_flags: unsigned int (struct ntdb_context *)
-ntdb_get_seqnum: int64_t (struct ntdb_context *)
-ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_name: const char *(const struct ntdb_context *)
-ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
-ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *)
-ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
-ntdb_remove_flag: void (struct ntdb_context *, unsigned int)
-ntdb_repack: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *)
-ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int)
-ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **)
-ntdb_transaction_cancel: void (struct ntdb_context *)
-ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *)
-ntdb_unlockall: void (struct ntdb_context *)
-ntdb_unlockall_read: void (struct ntdb_context *)
-ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type)
-ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *)
+++ /dev/null
-ntdb_add_flag: void (struct ntdb_context *, unsigned int)
-ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA)
-ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
-ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
-ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA)
-ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA)
-ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
-ntdb_close: int (struct ntdb_context *)
-ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
-ntdb_errorstr: const char *(enum NTDB_ERROR)
-ntdb_exists: bool (struct ntdb_context *, NTDB_DATA)
-ntdb_fd: int (const struct ntdb_context *)
-ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *)
-ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
-ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *)
-ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *)
-ntdb_get_flags: unsigned int (struct ntdb_context *)
-ntdb_get_seqnum: int64_t (struct ntdb_context *)
-ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_name: const char *(const struct ntdb_context *)
-ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
-ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *)
-ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
-ntdb_remove_flag: void (struct ntdb_context *, unsigned int)
-ntdb_repack: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *)
-ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int)
-ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **)
-ntdb_transaction_cancel: void (struct ntdb_context *)
-ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *)
-ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *)
-ntdb_unlockall: void (struct ntdb_context *)
-ntdb_unlockall_read: void (struct ntdb_context *)
-ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type)
-ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *)
+++ /dev/null
-../../licenses/LGPL-3
\ No newline at end of file
+++ /dev/null
-CC=gcc
-CFLAGS=-g -O0 -Wall -W -I../../ -I./
-LIBS=
-
-LIBNTDB_OBJ = ccan_hash.o ccan_tally.o check.o free.o hash.o io.o lock.o open.o summary.o ntdb.o transaction.o traverse.o
-
-all: ntdbtorture ntdbtool ntdbdump ntdbrestore ntdbbackup
-
-ntdbtorture: tools/ntdbtorture.c libntdb.a
- $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
-
-ntdbtool: tools/ntdbtool.c libntdb.a
- $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
-
-ntdbdump: tools/ntdbdump.c libntdb.a
- $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
-
-ntdbrestore: tools/ntdbrestore.c libntdb.a
- $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
-
-ntdbbackup: tools/ntdbbackup.c libntdb.a
- $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
-
-libntdb.a: $(LIBNTDB_OBJ)
- @echo Creating library $@
- ar r libntdb.a $(LIBNTDB_OBJ)
- ranlib libntdb.a
-
-check.o: check.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c check.c -o $@
-
-free.o: free.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c free.c -o $@
-
-hash.o: hash.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c hash.c -o $@
-
-io.o: io.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c io.c -o $@
-
-lock.o: lock.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c lock.c -o $@
-
-open.o: open.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c open.c -o $@
-
-summary.o: summary.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c summary.c -o $@
-
-ntdb.o: ntdb.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c ntdb.c -o $@
-
-transaction.o: transaction.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c transaction.c -o $@
-
-traverse.o: traverse.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c traverse.c -o $@
-
-ccan_hash.o: ../hash/hash.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c ../hash/hash.c -o $@
-
-ccan_tally.o: ../tally/tally.c
- @echo Compiling $@
- $(CC) $(CFLAGS) -c ../tally/tally.c -o $@
-
-clean:
- rm -f *.o
- rm -f *.a
- rm -f tools/ntdbtorture tools/ntdbtool tools/ntdbdump tools/ntdbrestore tools/ntdbbackup
+++ /dev/null
-#include "config.h"
-#include <stdio.h>
-#include <string.h>
-
-/**
- * ntdb - Next Generation Trivial Database
- *
- * This package provides an experimental persistent keyword/data store.
- * Its main advantage over tdb is that it's 64-bit.
- *
- * Example:
- * #include <stdio.h>
- * #include <err.h>
- * #include <unistd.h>
- * #include <ccan/ntdb/ntdb.h>
- *
- * int main(int argc, char *argv[])
- * {
- * NTDB_DATA key = ntdb_mkdata("key", 3);
- * NTDB_DATA val = ntdb_mkdata("val", 3);
- * struct ntdb_context *ntdb;
- *
- * ntdb = ntdb_open("example.ntdb", NTDB_DEFAULT,
- * O_RDWR | O_CREAT | O_TRUNC, 0600, NULL);
- * if (ntdb == NULL)
- * errx(1, "failed to open database file");
- *
- * ntdb_store(ntdb, key, val, NTDB_INSERT);
- *
- * ntdb_close(ntdb);
- *
- * return 0;
- * }
- *
- * License: LGPL (v3 or any later version)
- * Authors: Rusty Russell
- * Andrew Tridgell
- * Jeremy Allison
- * Jelmer Vernooij
- * Volker Lendecke
- * Andrew Esh
- * Simon McVittie
- * Tim Potter
- * Maintainer: Rusty Russell <rusty@rustcorp.com.au>
- */
-int main(int argc, char *argv[])
-{
- if (argc != 2)
- return 1;
-
- if (strcmp(argv[1], "depends") == 0) {
- printf("ccan/asearch\n");
- printf("ccan/build_assert\n");
- printf("ccan/cast\n");
- printf("ccan/compiler\n");
- printf("ccan/endian\n");
- printf("ccan/hash\n");
- printf("ccan/ilog\n");
- printf("ccan/likely\n");
- printf("ccan/tally\n");
- printf("ccan/typesafe_cb\n");
- return 0;
- }
-
- if (strcmp(argv[1], "testdepends") == 0) {
- printf("ccan/failtest\n");
- printf("ccan/err\n");
- return 0;
- }
-
- return 1;
-}
+++ /dev/null
- /*
- Trivial Database 2: free list/block handling
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-#include <ccan/asearch/asearch.h>
-
-/* We keep an ordered array of offsets. */
-static bool append(struct ntdb_context *ntdb,
- ntdb_off_t **arr, size_t *num, ntdb_off_t off)
-{
- ntdb_off_t *new;
-
- if (*num == 0) {
- new = ntdb->alloc_fn(ntdb, sizeof(ntdb_off_t), ntdb->alloc_data);
- } else {
- new = ntdb->expand_fn(*arr, (*num + 1) * sizeof(ntdb_off_t),
- ntdb->alloc_data);
- }
- if (!new)
- return false;
- new[(*num)++] = off;
- *arr = new;
- return true;
-}
-
-static enum NTDB_ERROR check_header(struct ntdb_context *ntdb,
- ntdb_off_t *recovery,
- uint64_t *features,
- size_t *num_capabilities)
-{
- uint64_t hash_test;
- struct ntdb_header hdr;
- enum NTDB_ERROR ecode;
- ntdb_off_t off, next;
-
- ecode = ntdb_read_convert(ntdb, 0, &hdr, sizeof(hdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- /* magic food should not be converted, so convert back. */
- ntdb_convert(ntdb, hdr.magic_food, sizeof(hdr.magic_food));
-
- hash_test = NTDB_HASH_MAGIC;
- hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
- if (hdr.hash_test != hash_test) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "check: hash test %llu should be %llu",
- (long long)hdr.hash_test,
- (long long)hash_test);
- }
-
- if (strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "check: bad magic '%.*s'",
- (unsigned)sizeof(hdr.magic_food),
- hdr.magic_food);
- }
-
- /* Features which are used must be a subset of features offered. */
- if (hdr.features_used & ~hdr.features_offered) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "check: features used (0x%llx) which"
- " are not offered (0x%llx)",
- (long long)hdr.features_used,
- (long long)hdr.features_offered);
- }
-
- *features = hdr.features_offered;
- *recovery = hdr.recovery;
- if (*recovery) {
- if (*recovery < sizeof(hdr)
- || *recovery > ntdb->file->map_size) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check:"
- " invalid recovery offset %zu",
- (size_t)*recovery);
- }
- }
-
- for (off = hdr.capabilities; off && ecode == NTDB_SUCCESS; off = next) {
- const struct ntdb_capability *cap;
- enum NTDB_ERROR e;
-
- cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
- if (NTDB_PTR_IS_ERR(cap)) {
- return NTDB_PTR_ERR(cap);
- }
-
- /* All capabilities are unknown. */
- e = unknown_capability(ntdb, "ntdb_check", cap->type);
- next = cap->next;
- ntdb_access_release(ntdb, cap);
- if (e)
- return e;
- (*num_capabilities)++;
- }
-
- /* Don't check reserved: they *can* be used later. */
- return NTDB_SUCCESS;
-}
-
-static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b, void *ctx)
-{
- /* Can overflow an int. */
- return *a > *b ? 1
- : *a < *b ? -1
- : 0;
-}
-
-static enum NTDB_ERROR check_entry(struct ntdb_context *ntdb,
- ntdb_off_t off_and_hash,
- ntdb_len_t bucket,
- ntdb_off_t used[],
- size_t num_used,
- size_t *num_found,
- enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA,
- void *),
- void *data)
-{
- enum NTDB_ERROR ecode;
- const struct ntdb_used_record *r;
- const unsigned char *kptr;
- ntdb_len_t klen, dlen;
- uint32_t hash;
- ntdb_off_t off = off_and_hash & NTDB_OFF_MASK;
- ntdb_off_t *p;
-
- /* Empty bucket is fine. */
- if (!off_and_hash) {
- return NTDB_SUCCESS;
- }
-
- /* This can't point to a chain, we handled those at toplevel. */
- if (off_and_hash & (1ULL << NTDB_OFF_CHAIN_BIT)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Invalid chain bit in offset "
- " %llu", (long long)off_and_hash);
- }
-
- p = asearch(&off, used, num_used, off_cmp, NULL);
- if (!p) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Invalid offset"
- " %llu in hash", (long long)off);
- }
- /* Mark it invalid. */
- *p ^= 1;
- (*num_found)++;
-
- r = ntdb_access_read(ntdb, off, sizeof(*r), true);
- if (NTDB_PTR_IS_ERR(r)) {
- return NTDB_PTR_ERR(r);
- }
- klen = rec_key_length(r);
- dlen = rec_data_length(r);
- ntdb_access_release(ntdb, r);
-
- kptr = ntdb_access_read(ntdb, off + sizeof(*r), klen + dlen, false);
- if (NTDB_PTR_IS_ERR(kptr)) {
- return NTDB_PTR_ERR(kptr);
- }
-
- hash = ntdb_hash(ntdb, kptr, klen);
-
- /* Are we in the right chain? */
- if (bits_from(hash, 0, ntdb->hash_bits) != bucket) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: Bad bucket %u vs %llu",
- bits_from(hash, 0, ntdb->hash_bits),
- (long long)bucket);
- /* Next 8 bits should be the same as top bits of bucket. */
- } else if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
- != bits_from(off_and_hash, 64-NTDB_OFF_UPPER_STEAL,
- NTDB_OFF_UPPER_STEAL)) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: Bad hash bits %llu vs %llu",
- (long long)off_and_hash,
- (long long)hash);
- } else if (check) {
- NTDB_DATA k, d;
-
- k = ntdb_mkdata(kptr, klen);
- d = ntdb_mkdata(kptr + klen, dlen);
- ecode = check(k, d, data);
- } else {
- ecode = NTDB_SUCCESS;
- }
- ntdb_access_release(ntdb, kptr);
-
- return ecode;
-}
-
-static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb,
- ntdb_off_t off,
- ntdb_len_t bucket,
- ntdb_off_t used[],
- size_t num_used,
- size_t *num_found,
- enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA,
- void *),
- void *data)
-{
- struct ntdb_used_record rec;
- enum NTDB_ERROR ecode;
- const ntdb_off_t *entries;
- ntdb_len_t i, num;
-
- /* This is a used entry. */
- (*num_found)++;
-
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash chain magic %llu",
- (long long)rec_magic(&rec));
- }
-
- if (rec_data_length(&rec) % sizeof(ntdb_off_t)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash chain data length %llu",
- (long long)rec_data_length(&rec));
- }
-
- if (rec_key_length(&rec) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash chain key length %llu",
- (long long)rec_key_length(&rec));
- }
-
- off += sizeof(rec);
- num = rec_data_length(&rec) / sizeof(ntdb_off_t);
- entries = ntdb_access_read(ntdb, off, rec_data_length(&rec), true);
- if (NTDB_PTR_IS_ERR(entries)) {
- return NTDB_PTR_ERR(entries);
- }
-
- /* Check each non-deleted entry in chain. */
- for (i = 0; i < num; i++) {
- ecode = check_entry(ntdb, entries[i], bucket,
- used, num_used, num_found, check, data);
- if (ecode) {
- break;
- }
- }
-
- ntdb_access_release(ntdb, entries);
- return ecode;
-}
-
-static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb,
- ntdb_off_t used[],
- size_t num_used,
- size_t num_other_used,
- enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA,
- void *),
- void *data)
-{
- enum NTDB_ERROR ecode;
- struct ntdb_used_record rec;
- const ntdb_off_t *entries;
- ntdb_len_t i;
- /* Free tables and capabilities also show up as used, as do we. */
- size_t num_found = num_other_used + 1;
-
- ecode = ntdb_read_convert(ntdb, NTDB_HASH_OFFSET, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash table magic %llu",
- (long long)rec_magic(&rec));
- }
-
- if (rec_data_length(&rec) != (sizeof(ntdb_off_t) << ntdb->hash_bits)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash table data length %llu",
- (long long)rec_data_length(&rec));
- }
-
- if (rec_key_length(&rec) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash table key length %llu",
- (long long)rec_key_length(&rec));
- }
-
- entries = ntdb_access_read(ntdb, NTDB_HASH_OFFSET + sizeof(rec),
- rec_data_length(&rec), true);
- if (NTDB_PTR_IS_ERR(entries)) {
- return NTDB_PTR_ERR(entries);
- }
-
- for (i = 0; i < (1 << ntdb->hash_bits); i++) {
- ntdb_off_t off = entries[i] & NTDB_OFF_MASK;
- if (entries[i] & (1ULL << NTDB_OFF_CHAIN_BIT)) {
- ecode = check_hash_chain(ntdb, off, i,
- used, num_used, &num_found,
- check, data);
- } else {
- ecode = check_entry(ntdb, entries[i], i,
- used, num_used, &num_found,
- check, data);
- }
- if (ecode) {
- break;
- }
- }
- ntdb_access_release(ntdb, entries);
-
- if (ecode == NTDB_SUCCESS && num_found != num_used) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Not all entries are in hash");
- }
- return ecode;
-}
-
-static enum NTDB_ERROR check_free(struct ntdb_context *ntdb,
- ntdb_off_t off,
- const struct ntdb_free_record *frec,
- ntdb_off_t prev, unsigned int ftable,
- unsigned int bucket)
-{
- enum NTDB_ERROR ecode;
-
- if (frec_magic(frec) != NTDB_FREE_MAGIC) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: offset %llu bad magic 0x%llx",
- (long long)off,
- (long long)frec->magic_and_prev);
- }
- if (frec_ftable(frec) != ftable) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: offset %llu bad freetable %u",
- (long long)off, frec_ftable(frec));
-
- }
-
- ecode = ntdb_oob(ntdb, off,
- frec_len(frec) + sizeof(struct ntdb_used_record),
- false);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- if (size_to_bucket(frec_len(frec)) != bucket) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: offset %llu in wrong bucket"
- " (%u vs %u)",
- (long long)off,
- bucket, size_to_bucket(frec_len(frec)));
- }
- if (prev && prev != frec_prev(frec)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: offset %llu bad prev"
- " (%llu vs %llu)",
- (long long)off,
- (long long)prev, (long long)frec_len(frec));
- }
- return NTDB_SUCCESS;
-}
-
-static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb,
- ntdb_off_t ftable_off,
- unsigned ftable_num,
- ntdb_off_t fr[],
- size_t num_free,
- size_t *num_found)
-{
- struct ntdb_freetable ft;
- ntdb_off_t h;
- unsigned int i;
- enum NTDB_ERROR ecode;
-
- ecode = ntdb_read_convert(ntdb, ftable_off, &ft, sizeof(ft));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC
- || rec_key_length(&ft.hdr) != 0
- || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Invalid header on free table");
- }
-
- for (i = 0; i < NTDB_FREE_BUCKETS; i++) {
- ntdb_off_t off, prev = 0, *p, first = 0;
- struct ntdb_free_record f;
-
- h = bucket_off(ftable_off, i);
- for (off = ntdb_read_off(ntdb, h); off; off = f.next) {
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
- if (!first) {
- off &= NTDB_OFF_MASK;
- first = off;
- }
- ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- ecode = check_free(ntdb, off, &f, prev, ftable_num, i);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* FIXME: Check hash bits */
- p = asearch(&off, fr, num_free, off_cmp, NULL);
- if (!p) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: Invalid offset"
- " %llu in free table",
- (long long)off);
- }
- /* Mark it invalid. */
- *p ^= 1;
- (*num_found)++;
- prev = off;
- }
-
- if (first) {
- /* Now we can check first back pointer. */
- ecode = ntdb_read_convert(ntdb, first, &f, sizeof(f));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- ecode = check_free(ntdb, first, &f, prev, ftable_num, i);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- }
- }
- return NTDB_SUCCESS;
-}
-
-/* Slow, but should be very rare. */
-ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off)
-{
- size_t len;
- enum NTDB_ERROR ecode;
-
- for (len = 0; off + len < ntdb->file->map_size; len++) {
- char c;
- ecode = ntdb->io->tread(ntdb, off, &c, 1);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- if (c != 0 && c != 0x43)
- break;
- }
- return len;
-}
-
-static enum NTDB_ERROR check_linear(struct ntdb_context *ntdb,
- ntdb_off_t **used, size_t *num_used,
- ntdb_off_t **fr, size_t *num_free,
- uint64_t features, ntdb_off_t recovery)
-{
- ntdb_off_t off;
- ntdb_len_t len;
- enum NTDB_ERROR ecode;
- bool found_recovery = false;
-
- for (off = sizeof(struct ntdb_header);
- off < ntdb->file->map_size;
- off += len) {
- union {
- struct ntdb_used_record u;
- struct ntdb_free_record f;
- struct ntdb_recovery_record r;
- } rec;
- /* r is larger: only get that if we need to. */
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.f));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* If we crash after ftruncate, we can get zeroes or fill. */
- if (rec.r.magic == NTDB_RECOVERY_INVALID_MAGIC
- || rec.r.magic == 0x4343434343434343ULL) {
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- if (recovery == off) {
- found_recovery = true;
- len = sizeof(rec.r) + rec.r.max_len;
- } else {
- len = dead_space(ntdb, off);
- if (NTDB_OFF_IS_ERR(len)) {
- return NTDB_OFF_TO_ERR(len);
- }
- if (len < sizeof(rec.r)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: invalid"
- " dead space at %zu",
- (size_t)off);
- }
-
- ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
- "Dead space at %zu-%zu (of %zu)",
- (size_t)off, (size_t)(off + len),
- (size_t)ntdb->file->map_size);
- }
- } else if (rec.r.magic == NTDB_RECOVERY_MAGIC) {
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- if (recovery != off) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: unexpected"
- " recovery record at offset"
- " %zu",
- (size_t)off);
- }
- if (rec.r.len > rec.r.max_len) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: invalid recovery"
- " length %zu",
- (size_t)rec.r.len);
- }
- if (rec.r.eof > ntdb->file->map_size) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: invalid old EOF"
- " %zu", (size_t)rec.r.eof);
- }
- found_recovery = true;
- len = sizeof(rec.r) + rec.r.max_len;
- } else if (frec_magic(&rec.f) == NTDB_FREE_MAGIC) {
- len = sizeof(rec.u) + frec_len(&rec.f);
- if (off + len > ntdb->file->map_size) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: free overlength"
- " %llu at offset %llu",
- (long long)len,
- (long long)off);
- }
- /* This record should be in free lists. */
- if (frec_ftable(&rec.f) != NTDB_FTABLE_NONE
- && !append(ntdb, fr, num_free, off)) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM,
- NTDB_LOG_ERROR,
- "ntdb_check: tracking %zu'th"
- " free record.", *num_free);
- }
- } else if (rec_magic(&rec.u) == NTDB_USED_MAGIC
- || rec_magic(&rec.u) == NTDB_CHAIN_MAGIC
- || rec_magic(&rec.u) == NTDB_HTABLE_MAGIC
- || rec_magic(&rec.u) == NTDB_FTABLE_MAGIC
- || rec_magic(&rec.u) == NTDB_CAP_MAGIC) {
- uint64_t klen, dlen, extra;
-
- /* This record is used! */
- if (!append(ntdb, used, num_used, off)) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM,
- NTDB_LOG_ERROR,
- "ntdb_check: tracking %zu'th"
- " used record.", *num_used);
- }
-
- klen = rec_key_length(&rec.u);
- dlen = rec_data_length(&rec.u);
- extra = rec_extra_padding(&rec.u);
-
- len = sizeof(rec.u) + klen + dlen + extra;
- if (off + len > ntdb->file->map_size) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: used overlength"
- " %llu at offset %llu",
- (long long)len,
- (long long)off);
- }
-
- if (len < sizeof(rec.f)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: too short record"
- " %llu at %llu",
- (long long)len,
- (long long)off);
- }
-
- /* Check that records have correct 0 at end (but may
- * not in future). */
- if (extra && !features
- && rec_magic(&rec.u) != NTDB_CAP_MAGIC) {
- const char *p;
- char c;
- p = ntdb_access_read(ntdb, off + sizeof(rec.u)
- + klen + dlen, 1, false);
- if (NTDB_PTR_IS_ERR(p))
- return NTDB_PTR_ERR(p);
- c = *p;
- ntdb_access_release(ntdb, p);
-
- if (c != '\0') {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check:"
- " non-zero extra"
- " at %llu",
- (long long)off);
- }
- }
- } else {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: Bad magic 0x%llx"
- " at offset %zu",
- (long long)rec_magic(&rec.u),
- (size_t)off);
- }
- }
-
- /* We must have found recovery area if there was one. */
- if (recovery != 0 && !found_recovery) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: expected a recovery area at %zu",
- (size_t)recovery);
- }
-
- return NTDB_SUCCESS;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb,
- enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *),
- void *data)
-{
- ntdb_off_t *fr = NULL, *used = NULL;
- ntdb_off_t ft = 0, recovery = 0;
- size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0,
- num_capabilities = 0;
- uint64_t features = 0;
- enum NTDB_ERROR ecode;
-
- if (ntdb->flags & NTDB_CANT_CHECK) {
- return ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
- "ntdb_check: database has unknown capability,"
- " cannot check.");
- }
-
- ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- ecode = ntdb_lock_expand(ntdb, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- ntdb_allrecord_unlock(ntdb, F_RDLCK);
- return ecode;
- }
-
- ecode = check_header(ntdb, &recovery, &features, &num_capabilities);
- if (ecode != NTDB_SUCCESS)
- goto out;
-
- /* First we do a linear scan, checking all records. */
- ecode = check_linear(ntdb, &used, &num_used, &fr, &num_free, features,
- recovery);
- if (ecode != NTDB_SUCCESS)
- goto out;
-
- for (ft = first_ftable(ntdb); ft; ft = next_ftable(ntdb, ft)) {
- if (NTDB_OFF_IS_ERR(ft)) {
- ecode = NTDB_OFF_TO_ERR(ft);
- goto out;
- }
- ecode = check_free_table(ntdb, ft, num_ftables, fr, num_free,
- &num_found);
- if (ecode != NTDB_SUCCESS)
- goto out;
- num_ftables++;
- }
-
- /* FIXME: Check key uniqueness? */
- ecode = check_hash(ntdb, used, num_used, num_ftables + num_capabilities,
- check, data);
- if (ecode != NTDB_SUCCESS)
- goto out;
-
- if (num_found != num_free) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Not all entries are in"
- " free table");
- }
-
-out:
- ntdb_allrecord_unlock(ntdb, F_RDLCK);
- ntdb_unlock_expand(ntdb, F_RDLCK);
- ntdb->free_fn(fr, ntdb->alloc_data);
- ntdb->free_fn(used, ntdb->alloc_data);
- return ecode;
-}
+++ /dev/null
-Interface differences between TDB and NTDB.
-
-- ntdb shares 'struct TDB_DATA' with tdb, but TDB defines the TDB_DATA
- typedef, whereas ntdb defines NTDB_DATA (ie. both are compatible).
- If you include both ntdb.h and tdb.h, #include tdb.h first,
- otherwise you'll get a compile error when tdb.h re-defined struct
- TDB_DATA.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
-- ntdb functions return NTDB_SUCCESS (ie 0) on success, and a negative
- error on failure, whereas tdb functions returned 0 on success, and
- -1 on failure. tdb then used tdb_error() to determine the error;
- this API is nasty if we ever want to support threads, so is not supported.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d)
- {
- if (tdb_store(tdb, key, d) == -1) {
- printf("store failed: %s\n", tdb_errorstr(tdb));
- }
- }
-
- void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d)
- {
- enum NTDB_ERROR e;
-
- e = ntdb_store(ntdb, key, d);
- if (e) {
- printf("store failed: %s\n", ntdb_errorstr(e));
- }
- }
-
-- ntdb's ntdb_fetch() returns an error, tdb's returned the data directly
- (or tdb_null, and you were supposed to check tdb_error() to find out why).
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- void tdb_example(struct tdb_context *tdb, TDB_DATA key)
- {
- TDB_DATA data;
-
- data = tdb_fetch(tdb, key);
- if (!data.dptr) {
- printf("fetch failed: %s\n", tdb_errorstr(tdb));
- }
- }
-
- void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key)
- {
- NTDB_DATA data;
- enum NTDB_ERROR e;
-
- e = ntdb_fetch(ntdb, key, &data);
- if (e) {
- printf("fetch failed: %s\n", ntdb_errorstr(e));
- }
- }
-
-- ntdb's ntdb_nextkey() frees the old key's dptr, in tdb you needed to do
- this manually.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- void tdb_example(struct tdb_context *tdb)
- {
- TDB_DATA key, next, data;
-
- for (key = tdb_firstkey(tdb); key.dptr; key = next) {
- printf("Got key!\n");
- next = tdb_nextkey(tdb, key);
- free(key.dptr);
- }
- }
-
-
- void ntdb_example(struct ntdb_context *ntdb)
- {
- NTDB_DATA k, data;
- enum NTDB_ERROR e;
-
- for (e = ntdb_firstkey(ntdb,&k); !e; e = ntdb_nextkey(ntdb,&k))
- printf("Got key!\n");
- }
-
-- Unlike tdb_open/tdb_open_ex, ntdb_open does not allow NULL names,
- even for NTDB_INTERNAL dbs, and thus ntdb_name() never returns NULL.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- struct tdb_context *tdb_example(void)
- {
- return tdb_open(NULL, 0, TDB_INTERNAL, O_RDWR, 0);
- }
-
- struct ntdb_context *ntdb_example(void)
- {
- return ntdb_open("example", NTDB_INTERNAL, O_RDWR, 0);
- }
-
-- ntdb uses a linked list of attribute structures to implement logging and
- alternate hashes. tdb used tdb_open_ex, which was not extensible.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- /* Custom hash function */
- static unsigned int my_tdb_hash_func(TDB_DATA *key)
- {
- return key->dsize;
- }
-
- struct tdb_context *tdb_example(void)
- {
- return tdb_open_ex("example.tdb", 0, TDB_DEFAULT,
- O_CREAT|O_RDWR, 0600, NULL, my_hash_func);
- }
-
- /* Custom hash function */
- static unsigned int my_ntdb_hash_func(const void *key, size_t len,
- uint32_t seed, void *data)
- {
- return len;
- }
-
- struct ntdb_context *ntdb_example(void)
- {
- union ntdb_attribute hash;
-
- hash.base.attr = NTDB_ATTRIBUTE_HASH;
- hash.base.next = NULL;
- hash.hash.fn = my_ntdb_hash_func;
- return ntdb_open("example.ntdb", NTDB_DEFAULT,
- O_CREAT|O_RDWR, 0600, &hash);
- }
-
-- tdb's tdb_open/tdb_open_ex took an explicit hash size, defaulting to
- 131. ntdb's uses an attribute for this, defaulting to 8192.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- struct tdb_context *tdb_example(void)
- {
- return tdb_open("example.tdb", 10007, TDB_DEFAULT,
- O_CREAT|O_RDWR, 0600);
- }
-
- struct ntdb_context *ntdb_example(void)
- {
- union ntdb_attribute hashsize;
-
- hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE;
- hashsize.base.next = NULL;
- hashsize.hashsize.size = 16384;
- return ntdb_open("example.ntdb", NTDB_DEFAULT,
- O_CREAT|O_RDWR, 0600, &hashsize);
- }
-
-- ntdb's log function is simpler than tdb's log function. The string
- is already formatted, is not terminated by a '\n', and it takes an
- enum ntdb_log_level not a tdb_debug_level, and which has only three
- values: NTDB_LOG_ERROR, NTDB_LOG_USE_ERROR and NTDB_LOG_WARNING.
-
- #include <tdb.h>
- #include <ntdb.h>
-
- static void tdb_log(struct tdb_context *tdb,
- enum tdb_debug_level level, const char *fmt, ...)
- {
- va_list ap;
- const char *name;
-
- switch (level) {
- case TDB_DEBUG_FATAL:
- fprintf(stderr, "FATAL: ");
- break;
- case TDB_DEBUG_ERROR:
- fprintf(stderr, "ERROR: ");
- break;
- case TDB_DEBUG_WARNING:
- fprintf(stderr, "WARNING: ");
- break;
- case TDB_DEBUG_TRACE:
- /* Don't print out tracing. */
- return;
- }
-
- name = tdb_name(tdb);
- if (!name) {
- name = "unnamed";
- }
-
- fprintf(stderr, "tdb(%s):", name);
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- }
-
- struct tdb_context *tdb_example(void)
- {
- struct tdb_logging_context lctx;
-
- lctx.log_fn = tdb_log;
- return tdb_open_ex("example.tdb", 0, TDB_DEFAULT,
- O_CREAT|O_RDWR, 0600, &lctx, NULL);
- }
-
- static void ntdb_log(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
- {
- switch (level) {
- case NTDB_LOG_ERROR:
- fprintf(stderr, "ERROR: ");
- break;
- case NTDB_LOG_USE_ERROR:
- /* We made a mistake, so abort. */
- abort();
- break;
- case NTDB_LOG_WARNING:
- fprintf(stderr, "WARNING: ");
- break;
- }
-
- fprintf(stderr, "ntdb(%s):%s:%s\n",
- ntdb_name(ntdb), ntdb_errorstr(ecode), message);
- }
-
- struct ntdb_context *ntdb_example(void)
- {
- union ntdb_attribute log;
-
- log.base.attr = NTDB_ATTRIBUTE_LOG;
- log.base.next = NULL;
- log.log.fn = ntdb_log;
- return ntdb_open("example.ntdb", NTDB_DEFAULT,
- O_CREAT|O_RDWR, 0600, &log);
- }
-
-- ntdb provides ntdb_deq() for comparing two NTDB_DATA, and ntdb_mkdata() for
- creating an NTDB_DATA.
-
- #include <tdb.h>
- #include <ntdb.h>
-
- void tdb_example(struct tdb_context *tdb)
- {
- TDB_DATA data, key;
-
- key.dsize = strlen("hello");
- key.dptr = "hello";
- data = tdb_fetch(tdb, key);
- if (data.dsize == key.dsize
- && !memcmp(data.dptr, key.dptr, key.dsize))
- printf("key is same as data\n");
- }
- free(data.dptr);
- }
-
- void ntdb_example(struct ntdb_context *ntdb)
- {
- NTDB_DATA data, key;
-
- key = ntdb_mkdata("hello", strlen("hello"));
- if (ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS) {
- if (ntdb_deq(key, data)) {
- printf("key is same as data\n");
- }
- free(data.dptr);
- }
- }
-
-- ntdb's ntdb_parse_record() takes a type-checked callback data
- pointer, not a void * (though a void * pointer still works). The
- callback function is allowed to do read operations on the database,
- or write operations if you first call ntdb_lockall(). TDB's
- tdb_parse_record() did not allow any database access within the
- callback, could crash if you tried.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- static int tdb_parser(TDB_DATA key, TDB_DATA data, void *private_data)
- {
- TDB_DATA *expect = private_data;
-
- return data.dsize == expect->dsize
- && !memcmp(data.dptr, expect->dptr, data.dsize);
- }
-
- void tdb_example(struct tdb_context *tdb, TDB_DATA key, NTDB_DATA d)
- {
- switch (tdb_parse_record(tdb, key, tdb_parser, &d)) {
- case -1:
- printf("parse failed: %s\n", tdb_errorstr(tdb));
- break;
- case 0:
- printf("data was different!\n");
- break;
- case 1:
- printf("data was same!\n");
- break;
- }
- }
-
- static int ntdb_parser(TDB_DATA key, TDB_DATA data, TDB_DATA *expect)
- {
- return ntdb_deq(data, *expect);
- }
-
- void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d)
- {
- enum NTDB_ERROR e;
-
- e = tdb_parse_record(tdb, key, tdb_parser, &d);
- switch (e) {
- case 0:
- printf("data was different!\n");
- break;
- case 1:
- printf("data was same!\n");
- break;
- default:
- printf("parse failed: %s\n", ntdb_errorstr(e));
- break;
- }
- }
-
-- ntdb does locking on read-only databases (ie. O_RDONLY passed to ntdb_open).
- tdb did not: use the NTDB_NOLOCK flag if you want to suppress locking.
-
- Example:
- #include <tdb.h>
- #include <ntdb.h>
-
- struct tdb_context *tdb_example(void)
- {
- return tdb_open("example.tdb", 0, TDB_DEFAULT, O_RDONLY, 0);
- }
-
- struct ntdb_context *ntdb_example(void)
- {
- return ntdb_open("example.ntdb", NTDB_NOLOCK, O_RDONLY, NULL);
- }
-
-- Failure inside a transaction (such as a lock function failing) does
- not implicitly cancel the transaction; you still need to call
- ntdb_transaction_cancel().
-
- #include <tdb.h>
- #include <ntdb.h>
-
- void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d)
- {
- if (tdb_transaction_start(tdb) == -1) {
- printf("transaction failed: %s\n", tdb_errorstr(tdb));
- return;
- }
-
- if (tdb_store(tdb, key, d) == -1) {
- printf("store failed: %s\n", tdb_errorstr(tdb));
- return;
- }
- if (tdb_transaction_commit(tdb) == -1) {
- printf("commit failed: %s\n", tdb_errorstr(tdb));
- }
- }
-
- void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d)
- {
- enum NTDB_ERROR e;
-
- e = ntdb_transaction_start(ntdb);
- if (e) {
- printf("transaction failed: %s\n", ntdb_errorstr(e));
- return;
- }
-
- e = ntdb_store(ntdb, key, d);
- if (e) {
- printf("store failed: %s\n", ntdb_errorstr(e));
- ntdb_transaction_cancel(ntdb);
- }
-
- e = ntdb_transaction_commit(ntdb);
- if (e) {
- printf("commit failed: %s\n", ntdb_errorstr(e));
- }
- }
-
-- There is no NTDB_CLEAR_IF_FIRST flag; it has severe scalability and
- API problems. If necessary, you can emulate this by using the open
- hook and placing a 1-byte lock at offset 4. If your program forks
- and exits, you will need to place this lock again in the child before
- the parent exits.
-
- Example:
-
- #include <tdb.h>
- #include <ntdb.h>
-
- struct tdb_context *tdb_example(void)
- {
- return tdb_open("example.tdb", 0, TDB_CLEAR_IF_FIRST,
- O_CREAT|O_RDWR, 0600);
- }
-
- static enum NTDB_ERROR clear_if_first(int fd, void *unused)
- {
- /* We hold a lock offset 4 always, so we can tell if
- * anyone else is. */
- struct flock fl;
-
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 4; /* ACTIVE_LOCK */
- fl.l_len = 1;
-
- if (fcntl(fd, F_SETLK, &fl) == 0) {
- /* We must be first ones to open it! Clear it. */
- if (ftruncate(fd, 0) != 0) {
- return NTDB_ERR_IO;
- }
- }
- fl.l_type = F_RDLCK;
- if (fcntl(fd, F_SETLKW, &fl) != 0) {
- return NTDB_ERR_IO;
- }
- return NTDB_SUCCESS;
- }
-
- struct ntdb_context *ntdb_example(void)
- {
- union ntdb_attribute open_attr;
-
- open_attr.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
- open_attr.openhook.base.next = NULL;
- open_attr.openhook.fn = clear_if_first;
-
- return ntdb_open("example.ntdb", NTDB_DEFAULT,
- O_CREAT|O_RDWR, 0600, &open_attr);
- }
-
-- ntdb traversals are not reliable if the database is changed during
- the traversal, ie your traversal may not cover all elements, or may
- cover elements multiple times. As a special exception, deleting the
- current record within ntdb_traverse() is reliable.
-
-- There is no ntdb_traverse_read, since ntdb_traverse does not hold
- a lock across the entire traversal anyway. If you want to make sure
- that your traversal function does not write to the database, you can
- set and clear the NTDB_RDONLY flag around the traversal.
-
-- ntdb does not need tdb_reopen() or tdb_reopen_all(). If you call
- fork() after during certain operations the child should close the
- ntdb, or complete the operations before continuing to use the tdb:
-
- ntdb_transaction_start(): child must ntdb_transaction_cancel()
- ntdb_lockall(): child must call ntdb_unlockall()
- ntdb_lockall_read(): child must call ntdb_unlockall_read()
- ntdb_chainlock(): child must call ntdb_chainunlock()
- ntdb_parse() callback: child must return from ntdb_parse()
-
-- ntdb will not open a non-ntdb file, even if O_CREAT is specified. tdb
- will overwrite an unknown file in that case.
+++ /dev/null
-#LyX 2.0 created this file. For more info see http://www.lyx.org/
-\lyxformat 413
-\begin_document
-\begin_header
-\textclass article
-\use_default_options true
-\maintain_unincluded_children false
-\language english
-\language_package default
-\inputencoding auto
-\fontencoding global
-\font_roman default
-\font_sans default
-\font_typewriter default
-\font_default_family default
-\use_non_tex_fonts false
-\font_sc false
-\font_osf false
-\font_sf_scale 100
-\font_tt_scale 100
-
-\graphics default
-\default_output_format default
-\output_sync 0
-\bibtex_command default
-\index_command default
-\paperfontsize default
-\use_hyperref false
-\papersize default
-\use_geometry false
-\use_amsmath 1
-\use_esint 1
-\use_mhchem 1
-\use_mathdots 1
-\cite_engine basic
-\use_bibtopic false
-\use_indices false
-\paperorientation portrait
-\suppress_date false
-\use_refstyle 0
-\index Index
-\shortcut idx
-\color #008000
-\end_index
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation indent
-\paragraph_indentation default
-\quotes_language english
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tracking_changes true
-\output_changes true
-\html_math_output 0
-\html_css_as_file 0
-\html_be_strict false
-\end_header
-
-\begin_body
-
-\begin_layout Title
-NTDB: Redesigning The Trivial DataBase
-\end_layout
-
-\begin_layout Author
-Rusty Russell, IBM Corporation
-\end_layout
-
-\begin_layout Date
-19 June 2012
-\end_layout
-
-\begin_layout Abstract
-The Trivial DataBase on-disk format is 32 bits; with usage cases heading
- towards the 4G limit, that must change.
- This required breakage provides an opportunity to revisit TDB's other design
- decisions and reassess them.
-\end_layout
-
-\begin_layout Section
-Introduction
-\end_layout
-
-\begin_layout Standard
-The Trivial DataBase was originally written by Andrew Tridgell as a simple
- key/data pair storage system with the same API as dbm, but allowing multiple
- readers and writers while being small enough (< 1000 lines of C) to include
- in SAMBA.
- The simple design created in 1999 has proven surprisingly robust and performant
-, used in Samba versions 3 and 4 as well as numerous other projects.
- Its useful life was greatly increased by the (backwards-compatible!) addition
- of transaction support in 2005.
-\end_layout
-
-\begin_layout Standard
-The wider variety and greater demands of TDB-using code has lead to some
- organic growth of the API, as well as some compromises on the implementation.
- None of these, by themselves, are seen as show-stoppers, but the cumulative
- effect is to a loss of elegance over the initial, simple TDB implementation.
- Here is a table of the approximate number of lines of implementation code
- and number of API functions at the end of each year:
-\end_layout
-
-\begin_layout Standard
-\begin_inset Tabular
-<lyxtabular version="3" rows="12" columns="3">
-<features tabularvalignment="middle">
-<column alignment="center" valignment="top" width="0">
-<column alignment="center" valignment="top" width="0">
-<column alignment="center" valignment="top" width="0">
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-Year End
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-API Functions
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-Lines of C Code Implementation
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1999
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-13
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1195
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2000
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-24
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-1725
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2001
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-32
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2228
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2002
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-35
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2481
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2003
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-35
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2552
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2004
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-40
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2584
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2005
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-38
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2647
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2006
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-52
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-3754
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2007
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-66
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-4398
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2008
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-71
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-4768
-\end_layout
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-2009
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-73
-\end_layout
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\begin_layout Plain Layout
-5715
-\end_layout
-
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-This review is an attempt to catalog and address all the known issues with
- TDB and create solutions which address the problems without significantly
- increasing complexity; all involved are far too aware of the dangers of
- second system syndrome in rewriting a successful project like this.
-\end_layout
-
-\begin_layout Standard
-Note: the final decision was to make ntdb a separate library, with a separarate
- 'ntdb' namespace so both can potentially be linked together.
- This document still refers to
-\begin_inset Quotes eld
-\end_inset
-
-tdb
-\begin_inset Quotes erd
-\end_inset
-
- everywhere, for simplicity.
-\end_layout
-
-\begin_layout Section
-API Issues
-\end_layout
-
-\begin_layout Subsection
-tdb_open_ex Is Not Expandable
-\end_layout
-
-\begin_layout Standard
-The tdb_open() call was expanded to tdb_open_ex(), which added an optional
- hashing function and an optional logging function argument.
- Additional arguments to open would require the introduction of a tdb_open_ex2
- call etc.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\begin_inset CommandInset label
-LatexCommand label
-name "attributes"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-tdb_open() will take a linked-list of attributes:
-\end_layout
-
-\begin_layout LyX-Code
-enum tdb_attribute {
-\end_layout
-
-\begin_layout LyX-Code
- TDB_ATTRIBUTE_LOG = 0,
-\end_layout
-
-\begin_layout LyX-Code
- TDB_ATTRIBUTE_HASH = 1
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_base {
-\end_layout
-
-\begin_layout LyX-Code
- enum tdb_attribute attr;
-\end_layout
-
-\begin_layout LyX-Code
- union tdb_attribute *next;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_log {
-\end_layout
-
-\begin_layout LyX-Code
- struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
-\end_layout
-
-\begin_layout LyX-Code
- tdb_log_func log_fn;
-\end_layout
-
-\begin_layout LyX-Code
- void *log_private;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_attribute_hash {
-\end_layout
-
-\begin_layout LyX-Code
- struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
-\end_layout
-
-\begin_layout LyX-Code
- tdb_hash_func hash_fn;
-\end_layout
-
-\begin_layout LyX-Code
- void *hash_private;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout LyX-Code
-union tdb_attribute {
-\end_layout
-
-\begin_layout LyX-Code
- struct tdb_attribute_base base;
-\end_layout
-
-\begin_layout LyX-Code
- struct tdb_attribute_log log;
-\end_layout
-
-\begin_layout LyX-Code
- struct tdb_attribute_hash hash;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-This allows future attributes to be added, even if this expands the size
- of the union.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_traverse Makes Impossible Guarantees
-\end_layout
-
-\begin_layout Standard
-tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it
- was thought that it was important to guarantee that all records which exist
- at the start and end of the traversal would be included, and no record
- would be included twice.
-\end_layout
-
-\begin_layout Standard
-This adds complexity (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Reliable-Traversal-Adds"
-
-\end_inset
-
-) and does not work anyway for records which are altered (in particular,
- those which are expanded may be effectively deleted and re-added behind
- the traversal).
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "traverse-Proposed-Solution"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Abandon the guarantee.
- You will see every record if no changes occur during your traversal, otherwise
- you will see some subset.
- You can prevent changes by using a transaction or the locking API.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
- Delete-during-traverse will still delete every record, too (assuming no
- other changes).
-\end_layout
-
-\begin_layout Subsection
-Nesting of Transactions Is Fraught
-\end_layout
-
-\begin_layout Standard
-TDB has alternated between allowing nested transactions and not allowing
- them.
- Various paths in the Samba codebase assume that transactions will nest,
- and in a sense they can: the operation is only committed to disk when the
- outer transaction is committed.
- There are two problems, however:
-\end_layout
-
-\begin_layout Enumerate
-Canceling the inner transaction will cause the outer transaction commit
- to fail, and will not undo any operations since the inner transaction began.
- This problem is soluble with some additional internal code.
-\end_layout
-
-\begin_layout Enumerate
-An inner transaction commit can be cancelled by the outer transaction.
- This is desirable in the way which Samba's database initialization code
- uses transactions, but could be a surprise to any users expecting a successful
- transaction commit to expose changes to others.
-\end_layout
-
-\begin_layout Standard
-The current solution is to specify the behavior at tdb_open(), with the
- default currently that nested transactions are allowed.
- This flag can also be changed at runtime.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Given the usage patterns, it seems that the
-\begin_inset Quotes eld
-\end_inset
-
-least-surprise
-\begin_inset Quotes erd
-\end_inset
-
- behavior of disallowing nested transactions should become the default.
- Additionally, it seems the outer transaction is the only code which knows
- whether inner transactions should be allowed, so a flag to indicate this
- could be added to tdb_transaction_start.
- However, this behavior can be simulated with a wrapper which uses tdb_add_flags
-() and tdb_remove_flags(), so the API should not be expanded for this relatively
--obscure case.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete; the nesting flag has been removed.
-\end_layout
-
-\begin_layout Subsection
-Incorrect Hash Function is Not Detected
-\end_layout
-
-\begin_layout Standard
-tdb_open_ex() allows the calling code to specify a different hash function
- to use, but does not check that all other processes accessing this tdb
- are using the same hash function.
- The result is that records are missing from tdb_fetch().
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The header should contain an example hash result (eg.
- the hash of 0xdeadbeef), and tdb_open_ex() should check that the given
- hash function produces the same answer, or fail the tdb_open call.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_set_max_dead/TDB_VOLATILE Expose Implementation
-\end_layout
-
-\begin_layout Standard
-In response to scalability issues with the free list (
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB-Freelist-Is"
-
-\end_inset
-
-) two API workarounds have been incorporated in TDB: tdb_set_max_dead()
- and the TDB_VOLATILE flag to tdb_open.
- The latter actually calls the former with an argument of
-\begin_inset Quotes eld
-\end_inset
-
-5
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-This code allows deleted records to accumulate without putting them in the
- free list.
- On delete we iterate through each chain and free them in a batch if there
- are more than max_dead entries.
- These are never otherwise recycled except as a side-effect of a tdb_repack.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-With the scalability problems of the freelist solved, this API can be removed.
- The TDB_VOLATILE flag may still be useful as a hint that store and delete
- of records will be at least as common as fetch in order to allow some internal
- tuning, but initially will become a no-op.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
- Unknown flags cause tdb_open() to fail as well, so they can be detected
- at runtime.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB-Files-Cannot"
-
-\end_inset
-
-TDB Files Cannot Be Opened Multiple Times In The Same Process
-\end_layout
-
-\begin_layout Standard
-No process can open the same TDB twice; we check and disallow it.
- This is an unfortunate side-effect of fcntl locks, which operate on a per-file
- rather than per-file-descriptor basis, and do not nest.
- Thus, closing any file descriptor on a file clears all the locks obtained
- by this process, even if they were placed using a different file descriptor!
-\end_layout
-
-\begin_layout Standard
-Note that even if this were solved, deadlock could occur if operations were
- nested: this is a more manageable programming error in most cases.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We could lobby POSIX to fix the perverse rules, or at least lobby Linux
- to violate them so that the most common implementation does not have this
- restriction.
- This would be a generally good idea for other fcntl lock users.
-\end_layout
-
-\begin_layout Standard
-Samba uses a wrapper which hands out the same tdb_context to multiple callers
- if this happens, and does simple reference counting.
- We should do this inside the tdb library, which already emulates lock nesting
- internally; it would need to recognize when deadlock occurs within a single
- process.
- This would create a new failure mode for tdb operations (while we currently
- handle locking failures, they are impossible in normal use and a process
- encountering them can do little but give up).
-\end_layout
-
-\begin_layout Standard
-I do not see benefit in an additional tdb_open flag to indicate whether
- re-opening is allowed, as though there may be some benefit to adding a
- call to detect when a tdb_context is shared, to allow other to create such
- an API.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB API Is Not POSIX Thread-safe
-\end_layout
-
-\begin_layout Standard
-The TDB API uses an error code which can be queried after an operation to
- determine what went wrong.
- This programming model does not work with threads, unless specific additional
- guarantees are given by the implementation.
- In addition, even otherwise-independent threads cannot open the same TDB
- (as in
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB-Files-Cannot"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Reachitecting the API to include a tdb_errcode pointer would be a great
- deal of churn, but fortunately most functions return 0 on success and -1
- on error: we can change these to return 0 on success and a negative error
- code on error, and the API remains similar to previous.
- The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA
- pointer and return an error code.
- It is also simpler to have tdb_nextkey replace its key argument in place,
- freeing up any old .dptr.
-\end_layout
-
-\begin_layout Standard
-Internal locking is required to make sure that fcntl locks do not overlap
- between threads, and also that the global list of tdbs is maintained.
-\end_layout
-
-\begin_layout Standard
-The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe
- version of the library, and otherwise no overhead will exist.
- Alternatively, a hooking mechanism similar to that proposed for
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Proposed-Solution-locking-hook"
-
-\end_inset
-
- could be used to enable pthread locking at runtime.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Incomplete; API has been changed but thread safety has not been implemented.
-\end_layout
-
-\begin_layout Subsection
-*_nonblock Functions And *_mark Functions Expose Implementation
-\end_layout
-
-\begin_layout Standard
-CTDB
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-Clustered TDB, see http://ctdb.samba.org
-\end_layout
-
-\end_inset
-
- wishes to operate on TDB in a non-blocking manner.
- This is currently done as follows:
-\end_layout
-
-\begin_layout Enumerate
-Call the _nonblock variant of an API function (eg.
- tdb_lockall_nonblock).
- If this fails:
-\end_layout
-
-\begin_layout Enumerate
-Fork a child process, and wait for it to call the normal variant (eg.
- tdb_lockall).
-\end_layout
-
-\begin_layout Enumerate
-If the child succeeds, call the _mark variant to indicate we already have
- the locks (eg.
- tdb_lockall_mark).
-\end_layout
-
-\begin_layout Enumerate
-Upon completion, tell the child to release the locks (eg.
- tdb_unlockall).
-\end_layout
-
-\begin_layout Enumerate
-Indicate to tdb that it should consider the locks removed (eg.
- tdb_unlockall_mark).
-\end_layout
-
-\begin_layout Standard
-There are several issues with this approach.
- Firstly, adding two new variants of each function clutters the API for
- an obscure use, and so not all functions have three variants.
- Secondly, it assumes that all paths of the functions ask for the same locks,
- otherwise the parent process will have to get a lock which the child doesn't
- have under some circumstances.
- I don't believe this is currently the case, but it constrains the implementatio
-n.
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "Proposed-Solution-locking-hook"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Implement a hook for locking methods, so that the caller can control the
- calls to create and remove fcntl locks.
- In this scenario, ctdbd would operate as follows:
-\end_layout
-
-\begin_layout Enumerate
-Call the normal API function, eg tdb_lockall().
-\end_layout
-
-\begin_layout Enumerate
-When the lock callback comes in, check if the child has the lock.
- Initially, this is always false.
- If so, return 0.
- Otherwise, try to obtain it in non-blocking mode.
- If that fails, return EWOULDBLOCK.
-\end_layout
-
-\begin_layout Enumerate
-Release locks in the unlock callback as normal.
-\end_layout
-
-\begin_layout Enumerate
-If tdb_lockall() fails, see if we recorded a lock failure; if so, call the
- child to repeat the operation.
-\end_layout
-
-\begin_layout Enumerate
-The child records what locks it obtains, and returns that information to
- the parent.
-\end_layout
-
-\begin_layout Enumerate
-When the child has succeeded, goto 1.
-\end_layout
-
-\begin_layout Standard
-This is flexible enough to handle any potential locking scenario, even when
- lock requirements change.
- It can be optimized so that the parent does not release locks, just tells
- the child which locks it doesn't need to obtain.
-\end_layout
-
-\begin_layout Standard
-It also keeps the complexity out of the API, and in ctdbd where it is needed.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-tdb_chainlock Functions Expose Implementation
-\end_layout
-
-\begin_layout Standard
-tdb_chainlock locks some number of records, including the record indicated
- by the given key.
- This gave atomicity guarantees; no-one can start a transaction, alter,
- read or delete that key while the lock is held.
-\end_layout
-
-\begin_layout Standard
-It also makes the same guarantee for any other key in the chain, which is
- an internal implementation detail and potentially a cause for deadlock.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- It would be nice to have an explicit single entry lock which effected no
- other keys.
- Unfortunately, this won't work for an entry which doesn't exist.
- Thus while chainlock may be implemented more efficiently for the existing
- case, it will still have overlap issues with the non-existing case.
- So it is best to keep the current (lack of) guarantee about which records
- will be effected to avoid constraining our implementation.
-\end_layout
-
-\begin_layout Subsection
-Signal Handling is Not Race-Free
-\end_layout
-
-\begin_layout Standard
-The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate
- that the tdb locking code should return with a failure, rather than trying
- again when a signal is received (and errno == EAGAIN).
- This is usually used to implement timeouts.
-\end_layout
-
-\begin_layout Standard
-Unfortunately, this does not work in the case where the signal is received
- before the tdb code enters the fcntl() call to place the lock: the code
- will sleep within the fcntl() code, unaware that the signal wants it to
- exit.
- In the case of long timeouts, this does not happen in practice.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The locking hooks proposed in
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "Proposed-Solution-locking-hook"
-
-\end_inset
-
- would allow the user to decide on whether to fail the lock acquisition
- on a signal.
- This allows the caller to choose their own compromise: they could narrow
- the race by checking immediately before the fcntl call.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-It may be possible to make this race-free in some implementations by having
- the signal handler alter the struct flock to make it invalid.
- This will cause the fcntl() lock call to fail with EINVAL if the signal
- occurs before the kernel is entered, otherwise EAGAIN.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-The API Uses Gratuitous Typedefs, Capitals
-\end_layout
-
-\begin_layout Standard
-typedefs are useful for providing source compatibility when types can differ
- across implementations, or arguably in the case of function pointer definitions
- which are hard for humans to parse.
- Otherwise it is simply obfuscation and pollutes the namespace.
-\end_layout
-
-\begin_layout Standard
-Capitalization is usually reserved for compile-time constants and macros.
-\end_layout
-
-\begin_layout Description
-TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the
- definition isn't visible to the API user anyway.
-\end_layout
-
-\begin_layout Description
-TDB_DATA There is no reason to use this over struct TDB_DATA; the struct
- needs to be understood by the API user.
-\end_layout
-
-\begin_layout Description
-struct
-\begin_inset space ~
-\end_inset
-
-TDB_DATA This would normally be called 'struct tdb_data'.
-\end_layout
-
-\begin_layout Description
-enum
-\begin_inset space ~
-\end_inset
-
-TDB_ERROR Similarly, this would normally be enum tdb_error.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- Introducing lower case variants would please pedants like myself, but if
- it were done the existing ones should be kept.
- There is little point forcing a purely cosmetic change upon tdb users.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "tdb_log_func-Doesnt-Take"
-
-\end_inset
-
-tdb_log_func Doesn't Take The Private Pointer
-\end_layout
-
-\begin_layout Standard
-For API compatibility reasons, the logging function needs to call tdb_get_loggin
-g_private() to retrieve the pointer registered by the tdb_open_ex for logging.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-It should simply take an extra argument, since we are prepared to break
- the API/ABI.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Various Callback Functions Are Not Typesafe
-\end_layout
-
-\begin_layout Standard
-The callback functions in tdb_set_logging_function (after
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "tdb_log_func-Doesnt-Take"
-
-\end_inset
-
- is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check
- all take void * and must internally convert it to the argument type they
- were expecting.
-\end_layout
-
-\begin_layout Standard
-If this type changes, the compiler will not produce warnings on the callers,
- since it only sees void *.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-With careful use of macros, we can create callback functions which give
- a warning when used on gcc and the types of the callback and its private
- argument differ.
- Unsupported compilers will not give a warning, which is no worse than now.
- In addition, the callbacks become clearer, as they need not use void *
- for their parameter.
-\end_layout
-
-\begin_layout Standard
-See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic
-\end_layout
-
-\begin_layout Standard
-The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should
- be cleared if the caller discovers it is the only process with the TDB
- open.
- However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not
- be detected, so will have the TDB erased underneath them (usually resulting
- in a crash).
-\end_layout
-
-\begin_layout Standard
-There is a similar issue on fork(); if the parent exits (or otherwise closes
- the tdb) before the child calls tdb_reopen_all() to establish the lock
- used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener
- at that moment will believe it alone has opened the TDB and will erase
- it.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove TDB_CLEAR_IF_FIRST.
- Other workarounds are possible, but see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "TDB_CLEAR_IF_FIRST-Imposes-Performance"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
- An open hook is provided to replicate this functionality if required.
-\end_layout
-
-\begin_layout Subsection
-Extending The Header Is Difficult
-\end_layout
-
-\begin_layout Standard
-We have reserved (zeroed) words in the TDB header, which can be used for
- future features.
- If the future features are compulsory, the version number must be updated
- to prevent old code from accessing the database.
- But if the future feature is optional, we have no way of telling if older
- code is accessing the database or not.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The header should contain a
-\begin_inset Quotes eld
-\end_inset
-
-format variant
-\begin_inset Quotes erd
-\end_inset
-
- value (64-bit).
- This is divided into two 32-bit parts:
-\end_layout
-
-\begin_layout Enumerate
-The lower part reflects the format variant understood by code accessing
- the database.
-\end_layout
-
-\begin_layout Enumerate
-The upper part reflects the format variant you must understand to write
- to the database (otherwise you can only open for reading).
-\end_layout
-
-\begin_layout Standard
-The latter field can only be written at creation time, the former should
- be written under the OPEN_LOCK when opening the database for writing, if
- the variant of the code is lower than the current lowest variant.
-\end_layout
-
-\begin_layout Standard
-This should allow backwards-compatible features to be added, and detection
- if older code (which doesn't understand the feature) writes to the database.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Record Headers Are Not Expandible
-\end_layout
-
-\begin_layout Standard
-If we later want to add (say) checksums on keys and data, it would require
- another format change, which we'd like to avoid.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We often have extra padding at the tail of a record.
- If we ensure that the first byte (if any) of this padding is zero, we will
- have a way for future changes to detect code which doesn't understand a
- new format: the new code would write (say) a 1 at the tail, and thus if
- there is no tail or the first byte is 0, we would know the extension is
- not present on that record.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Does Not Use Talloc
-\end_layout
-
-\begin_layout Standard
-Many users of TDB (particularly Samba) use the talloc allocator, and thus
- have to wrap TDB in a talloc context to use it conveniently.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The allocation within TDB is not complicated enough to justify the use of
- talloc, and I am reluctant to force another (excellent) library on TDB
- users.
- Nonetheless a compromise is possible.
- An attribute (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "attributes"
-
-\end_inset
-
-) can be added later to tdb_open() to provide an alternate allocation mechanism,
- specifically for talloc but usable by any other allocator (which would
- ignore the
-\begin_inset Quotes eld
-\end_inset
-
-context
-\begin_inset Quotes erd
-\end_inset
-
- argument).
-\end_layout
-
-\begin_layout Standard
-This would form a talloc heirarchy as expected, but the caller would still
- have to attach a destructor to the tdb context returned from tdb_open to
- close it.
- All TDB_DATA fields would be children of the tdb_context, and the caller
- would still have to manage them (using talloc_free() or talloc_steal()).
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute.
-\end_layout
-
-\begin_layout Section
-Performance And Scalability Issues
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB_CLEAR_IF_FIRST-Imposes-Performance"
-
-\end_inset
-
-TDB_CLEAR_IF_FIRST Imposes Performance Penalty
-\end_layout
-
-\begin_layout Standard
-When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset
- 4 (aka.
- the ACTIVE_LOCK).
- While these locks never conflict in normal tdb usage, they do add substantial
- overhead for most fcntl lock implementations when the kernel scans to detect
- if a lock conflict exists.
- This is often a single linked list, making the time to acquire and release
- a fcntl lock O(N) where N is the number of processes with the TDB open,
- not the number actually doing work.
-\end_layout
-
-\begin_layout Standard
-In a Samba server it is common to have huge numbers of clients sitting idle,
- and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag.
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-There is a flag to tdb_reopen_all() which is used for this optimization:
- if the parent process will outlive the child, the child does not need the
- ACTIVE_LOCK.
- This is a workaround for this very performance issue.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove the flag.
- It was a neat idea, but even trivial servers tend to know when they are
- initializing for the first time and can simply unlink the old tdb at that
- point.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Files Have a 4G Limit
-\end_layout
-
-\begin_layout Standard
-This seems to be becoming an issue (so much for
-\begin_inset Quotes eld
-\end_inset
-
-trivial
-\begin_inset Quotes erd
-\end_inset
-
-!), particularly for ldb.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-A new, incompatible TDB format which uses 64 bit offsets internally rather
- than 32 bit as now.
- For simplicity of endian conversion (which TDB does on the fly if required),
- all values will be 64 bit on disk.
- In practice, some upper bits may be used for other purposes, but at least
- 56 bits will be available for file offsets.
-\end_layout
-
-\begin_layout Standard
-tdb_open() will automatically detect the old version, and even create them
- if TDB_VERSION6 is specified to tdb_open.
-\end_layout
-
-\begin_layout Standard
-32 bit processes will still be able to access TDBs larger than 4G (assuming
- that their off_t allows them to seek to 64 bits), they will gracefully
- fall back as they fail to mmap.
- This can happen already with large TDBs.
-\end_layout
-
-\begin_layout Standard
-Old versions of tdb will fail to open the new TDB files (since 28 August
- 2009, commit 398d0c29290: prior to that any unrecognized file format would
- be erased and initialized as a fresh tdb!)
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-TDB Records Have a 4G Limit
-\end_layout
-
-\begin_layout Standard
-This has not been a reported problem, and the API uses size_t which can
- be 64 bit on 64 bit platforms.
- However, other limits may have made such an issue moot.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Record sizes will be 64 bit, with an error returned on 32 bit platforms
- which try to access such records (the current implementation would return
- TDB_ERR_OOM in a similar case).
- It seems unlikely that 32 bit keys will be a limitation, so the implementation
- may not support this (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Records-Incur-A"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Hash Size Is Determined At TDB Creation Time
-\end_layout
-
-\begin_layout Standard
-TDB contains a number of hash chains in the header; the number is specified
- at creation time, and defaults to 131.
- This is such a bottleneck on large databases (as each hash chain gets quite
- long), that LDB uses 10,000 for this hash.
- In general it is impossible to know what the 'right' answer is at database
- creation time.
-\end_layout
-
-\begin_layout Subsubsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:Hash-Size-Solution"
-
-\end_inset
-
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-After comprehensive performance testing on various scalable hash variants
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying
- because I was previously convinced that an expanding tree of hashes would
- be very close to optimal.
-\end_layout
-
-\end_inset
-
-, it became clear that it is hard to beat a straight linear hash table which
- doubles in size when it reaches saturation.
- Unfortunately, altering the hash table introduces serious locking complications
-: the entire hash table needs to be locked to enlarge the hash table, and
- others might be holding locks.
- Particularly insidious are insertions done under tdb_chainlock.
-\end_layout
-
-\begin_layout Standard
-Thus an expanding layered hash will be used: an array of hash groups, with
- each hash group exploding into pointers to lower hash groups once it fills,
- turning into a hash tree.
- This has implications for locking: we must lock the entire group in case
- we need to expand it, yet we don't know how deep the tree is at that point.
-\end_layout
-
-\begin_layout Standard
-Note that bits from the hash table entries should be stolen to hold more
- hash bits to reduce the penalty of collisions.
- We can use the otherwise-unused lower 3 bits.
- If we limit the size of the database to 64 exabytes, we can use the top
- 8 bits of the hash entry as well.
- These 11 bits would reduce false positives down to 1 in 2000 which is more
- than we need: we can use one of the bits to indicate that the extra hash
- bits are valid.
- This means we can choose not to re-hash all entries when we expand a hash
- group; simply use the next bits we need and mark them invalid.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Ignore.
- Scaling the hash automatically proved inefficient at small hash sizes;
- we default to a 8192-element hash (changable via NTDB_ATTRIBUTE_HASHSIZE),
- and when buckets clash we expand to an array of hash entries.
- This scales slightly better than the tdb chain (due to the 8 top bits containin
-g extra hash).
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "TDB-Freelist-Is"
-
-\end_inset
-
-TDB Freelist Is Highly Contended
-\end_layout
-
-\begin_layout Standard
-TDB uses a single linked list for the free list.
- Allocation occurs as follows, using heuristics which have evolved over
- time:
-\end_layout
-
-\begin_layout Enumerate
-Get the free list lock for this whole operation.
-\end_layout
-
-\begin_layout Enumerate
-Multiply length by 1.25, so we always over-allocate by 25%.
-\end_layout
-
-\begin_layout Enumerate
-Set the slack multiplier to 1.
-\end_layout
-
-\begin_layout Enumerate
-Examine the current freelist entry: if it is > length but < the current
- best case, remember it as the best case.
-\end_layout
-
-\begin_layout Enumerate
-Multiply the slack multiplier by 1.05.
-\end_layout
-
-\begin_layout Enumerate
-If our best fit so far is less than length * slack multiplier, return it.
- The slack will be turned into a new free record if it's large enough.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, go onto the next freelist entry.
-\end_layout
-
-\begin_layout Standard
-Deleting a record occurs as follows:
-\end_layout
-
-\begin_layout Enumerate
-Lock the hash chain for this whole operation.
-\end_layout
-
-\begin_layout Enumerate
-Walk the chain to find the record, keeping the prev pointer offset.
-\end_layout
-
-\begin_layout Enumerate
-If max_dead is non-zero:
-\end_layout
-
-\begin_deeper
-\begin_layout Enumerate
-Walk the hash chain again and count the dead records.
-\end_layout
-
-\begin_layout Enumerate
-If it's more than max_dead, bulk free all the dead ones (similar to steps
- 4 and below, but the lock is only obtained once).
-\end_layout
-
-\begin_layout Enumerate
-Simply mark this record as dead and return.
-\end_layout
-
-\end_deeper
-\begin_layout Enumerate
-Get the free list lock for the remainder of this operation.
-\end_layout
-
-\begin_layout Enumerate
-\begin_inset CommandInset label
-LatexCommand label
-name "right-merging"
-
-\end_inset
-
-Examine the following block to see if it is free; if so, enlarge the current
- block and remove that block from the free list.
- This was disabled, as removal from the free list was O(entries-in-free-list).
-\end_layout
-
-\begin_layout Enumerate
-Examine the preceeding block to see if it is free: for this reason, each
- block has a 32-bit tailer which indicates its length.
- If it is free, expand it to cover our new block and return.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, prepend ourselves to the free list.
-\end_layout
-
-\begin_layout Standard
-Disabling right-merging (step
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "right-merging"
-
-\end_inset
-
-) causes fragmentation; the other heuristics proved insufficient to address
- this, so the final answer to this was that when we expand the TDB file
- inside a transaction commit, we repack the entire tdb.
-\end_layout
-
-\begin_layout Standard
-The single list lock limits our allocation rate; due to the other issues
- this is not currently seen as a bottleneck.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The first step is to remove all the current heuristics, as they obviously
- interact, then examine them once the lock contention is addressed.
-\end_layout
-
-\begin_layout Standard
-The free list must be split to reduce contention.
- Assuming perfect free merging, we can at most have 1 free list entry for
- each entry.
- This implies that the number of free lists is related to the size of the
- hash table, but as it is rare to walk a large number of free list entries
- we can use far fewer, say 1/32 of the number of hash buckets.
-\end_layout
-
-\begin_layout Standard
-It seems tempting to try to reuse the hash implementation which we use for
- records here, but we have two ways of searching for free entries: for allocatio
-n we search by size (and possibly zone) which produces too many clashes
- for our hash table to handle well, and for coalescing we search by address.
- Thus an array of doubly-linked free lists seems preferable.
-\end_layout
-
-\begin_layout Standard
-There are various benefits in using per-size free lists (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-) but it's not clear this would reduce contention in the common case where
- all processes are allocating/freeing the same size.
- Thus we almost certainly need to divide in other ways: the most obvious
- is to divide the file into zones, and using a free list (or table of free
- lists) for each.
- This approximates address ordering.
-\end_layout
-
-\begin_layout Standard
-Unfortunately it is difficult to know what heuristics should be used to
- determine zone sizes, and our transaction code relies on being able to
- create a
-\begin_inset Quotes eld
-\end_inset
-
-recovery area
-\begin_inset Quotes erd
-\end_inset
-
- by simply appending to the file (difficult if it would need to create a
- new zone header).
- Thus we use a linked-list of free tables; currently we only ever create
- one, but if there is more than one we choose one at random to use.
- In future we may use heuristics to add new free tables on contention.
- We only expand the file when all free tables are exhausted.
-\end_layout
-
-\begin_layout Standard
-The basic algorithm is as follows.
- Freeing is simple:
-\end_layout
-
-\begin_layout Enumerate
-Identify the correct free list.
-\end_layout
-
-\begin_layout Enumerate
-Lock the corresponding list.
-\end_layout
-
-\begin_layout Enumerate
-Re-check the list (we didn't have a lock, sizes could have changed): relock
- if necessary.
-\end_layout
-
-\begin_layout Enumerate
-Place the freed entry in the list.
-\end_layout
-
-\begin_layout Standard
-Allocation is a little more complicated, as we perform delayed coalescing
- at this point:
-\end_layout
-
-\begin_layout Enumerate
-Pick a free table; usually the previous one.
-\end_layout
-
-\begin_layout Enumerate
-Lock the corresponding list.
-\end_layout
-
-\begin_layout Enumerate
-If the top entry is -large enough, remove it from the list and return it.
-\end_layout
-
-\begin_layout Enumerate
-Otherwise, coalesce entries in the list.If there was no entry large enough,
- unlock the list and try the next largest list
-\end_layout
-
-\begin_layout Enumerate
-If no list has an entry which meets our needs, try the next free table.
-\end_layout
-
-\begin_layout Enumerate
-If no zone satisfies, expand the file.
-\end_layout
-
-\begin_layout Standard
-This optimizes rapid insert/delete of free list entries by not coalescing
- them all the time..
- First-fit address ordering ordering seems to be fairly good for keeping
- fragmentation low (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-).
- Note that address ordering does not need a tailer to coalesce, though if
- we needed one we could have one cheaply: see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Records-Incur-A"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-Each free entry has the free table number in the header: less than 255.
- It also contains a doubly-linked list for easy deletion.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:TDB-Becomes-Fragmented"
-
-\end_inset
-
-TDB Becomes Fragmented
-\end_layout
-
-\begin_layout Standard
-Much of this is a result of allocation strategy
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute
-xas.edu/pub/garbage/malloc/ismm98.ps
-\end_layout
-
-\end_inset
-
- and deliberate hobbling of coalescing; internal fragmentation (aka overallocati
-on) is deliberately set at 25%, and external fragmentation is only cured
- by the decision to repack the entire db when a transaction commit needs
- to enlarge the file.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The 25% overhead on allocation works in practice for ldb because indexes
- tend to expand by one record at a time.
- This internal fragmentation can be resolved by having an
-\begin_inset Quotes eld
-\end_inset
-
-expanded
-\begin_inset Quotes erd
-\end_inset
-
- bit in the header to note entries that have previously expanded, and allocating
- more space for them.
-\end_layout
-
-\begin_layout Standard
-There are is a spectrum of possible solutions for external fragmentation:
- one is to use a fragmentation-avoiding allocation strategy such as best-fit
- address-order allocator.
- The other end of the spectrum would be to use a bump allocator (very fast
- and simple) and simply repack the file when we reach the end.
-\end_layout
-
-\begin_layout Standard
-There are three problems with efficient fragmentation-avoiding allocators:
- they are non-trivial, they tend to use a single free list for each size,
- and there's no evidence that tdb allocation patterns will match those recorded
- for general allocators (though it seems likely).
-\end_layout
-
-\begin_layout Standard
-Thus we don't spend too much effort on external fragmentation; we will be
- no worse than the current code if we need to repack on occasion.
- More effort is spent on reducing freelist contention, and reducing overhead.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:Records-Incur-A"
-
-\end_inset
-
-Records Incur A 28-Byte Overhead
-\end_layout
-
-\begin_layout Standard
-Each TDB record has a header as follows:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_record {
-\end_layout
-
-\begin_layout LyX-Code
- tdb_off_t next; /* offset of the next record in the list */
-\end_layout
-
-\begin_layout LyX-Code
- tdb_len_t rec_len; /* total byte length of record */
-\end_layout
-
-\begin_layout LyX-Code
- tdb_len_t key_len; /* byte length of key */
-\end_layout
-
-\begin_layout LyX-Code
- tdb_len_t data_len; /* byte length of data */
-\end_layout
-
-\begin_layout LyX-Code
- uint32_t full_hash; /* the full 32 bit hash of the key */
-\end_layout
-
-\begin_layout LyX-Code
- uint32_t magic; /* try to catch errors */
-\end_layout
-
-\begin_layout LyX-Code
- /* the following union is implied:
-\end_layout
-
-\begin_layout LyX-Code
- union {
-\end_layout
-
-\begin_layout LyX-Code
- char record[rec_len];
-\end_layout
-
-\begin_layout LyX-Code
- struct {
-\end_layout
-
-\begin_layout LyX-Code
- char key[key_len];
-\end_layout
-
-\begin_layout LyX-Code
- char data[data_len];
-\end_layout
-
-\begin_layout LyX-Code
- }
-\end_layout
-
-\begin_layout LyX-Code
- uint32_t totalsize; (tailer)
-\end_layout
-
-\begin_layout LyX-Code
- }
-\end_layout
-
-\begin_layout LyX-Code
- */
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-Naively, this would double to a 56-byte overhead on a 64 bit implementation.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-We can use various techniques to reduce this for an allocated block:
-\end_layout
-
-\begin_layout Enumerate
-The 'next' pointer is not required, as we are using a flat hash table.
-\end_layout
-
-\begin_layout Enumerate
-'rec_len' can instead be expressed as an addition to key_len and data_len
- (it accounts for wasted or overallocated length in the record).
- Since the record length is always a multiple of 8, we can conveniently
- fit it in 32 bits (representing up to 35 bits).
-\end_layout
-
-\begin_layout Enumerate
-'key_len' and 'data_len' can be reduced.
- I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine
- the two into one 64-bit field and using a 5 bit value which indicates at
- what bit to divide the two.
- Keys are unlikely to scale as fast as data, so I'm assuming a maximum key
- size of 32 bits.
-\end_layout
-
-\begin_layout Enumerate
-'full_hash' is used to avoid a memcmp on the
-\begin_inset Quotes eld
-\end_inset
-
-miss
-\begin_inset Quotes erd
-\end_inset
-
- case, but this is diminishing returns after a handful of bits (at 10 bits,
- it reduces 99.9% of false memcmp).
- As an aside, as the lower bits are already incorporated in the hash table
- resolution, the upper bits should be used here.
- Note that it's not clear that these bits will be a win, given the extra
- bits in the hash table itself (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:Hash-Size-Solution"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Enumerate
-'magic' does not need to be enlarged: it currently reflects one of 5 values
- (used, free, dead, recovery, and unused_recovery).
- It is useful for quick sanity checking however, and should not be eliminated.
-\end_layout
-
-\begin_layout Enumerate
-'tailer' is only used to coalesce free blocks (so a block to the right can
- find the header to check if this block is free).
- This can be replaced by a single 'free' bit in the header of the following
- block (and the tailer only exists in free blocks).
-\begin_inset Foot
-status collapsed
-
-\begin_layout Plain Layout
-This technique from Thomas Standish.
- Data Structure Techniques.
- Addison-Wesley, Reading, Massachusetts, 1980.
-\end_layout
-
-\end_inset
-
- The current proposed coalescing algorithm doesn't need this, however.
-\end_layout
-
-\begin_layout Standard
-This produces a 16 byte used header like this:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_used_record {
-\end_layout
-
-\begin_layout LyX-Code
- uint32_t used_magic : 16,
-\end_layout
-
-\begin_layout LyX-Code
-
-\end_layout
-
-\begin_layout LyX-Code
- key_data_divide: 5,
-\end_layout
-
-\begin_layout LyX-Code
- top_hash: 11;
-\end_layout
-
-\begin_layout LyX-Code
- uint32_t extra_octets;
-\end_layout
-
-\begin_layout LyX-Code
- uint64_t key_and_data_len;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-And a free record like this:
-\end_layout
-
-\begin_layout LyX-Code
-struct tdb_free_record {
-\end_layout
-
-\begin_layout LyX-Code
- uint64_t free_magic: 8,
-\end_layout
-
-\begin_layout LyX-Code
- prev : 56;
-\end_layout
-
-\begin_layout LyX-Code
-
-\end_layout
-
-\begin_layout LyX-Code
- uint64_t free_table: 8,
-\end_layout
-
-\begin_layout LyX-Code
- total_length : 56
-\end_layout
-
-\begin_layout LyX-Code
- uint64_t next;;
-\end_layout
-
-\begin_layout LyX-Code
-};
-\end_layout
-
-\begin_layout Standard
-Note that by limiting valid offsets to 56 bits, we can pack everything we
- need into 3 64-byte words, meaning our minimum record size is 8 bytes.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Transaction Commit Requires 4 fdatasync
-\end_layout
-
-\begin_layout Standard
-The current transaction algorithm is:
-\end_layout
-
-\begin_layout Enumerate
-write_recovery_data();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-write_recovery_header();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-overwrite_with_new_data();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Enumerate
-remove_recovery_header();
-\end_layout
-
-\begin_layout Enumerate
-sync();
-\end_layout
-
-\begin_layout Standard
-On current ext3, each sync flushes all data to disk, so the next 3 syncs
- are relatively expensive.
- But this could become a performance bottleneck on other filesystems such
- as ext4.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Neil Brown points out that this is overzealous, and only one sync is needed:
-\end_layout
-
-\begin_layout Enumerate
-Bundle the recovery data, a transaction counter and a strong checksum of
- the new data.
-\end_layout
-
-\begin_layout Enumerate
-Strong checksum that whole bundle.
-\end_layout
-
-\begin_layout Enumerate
-Store the bundle in the database.
-\end_layout
-
-\begin_layout Enumerate
-Overwrite the oldest of the two recovery pointers in the header (identified
- using the transaction counter) with the offset of this bundle.
-\end_layout
-
-\begin_layout Enumerate
-sync.
-\end_layout
-
-\begin_layout Enumerate
-Write the new data to the file.
-\end_layout
-
-\begin_layout Standard
-Checking for recovery means identifying the latest bundle with a valid checksum
- and using the new data checksum to ensure that it has been applied.
- This is more expensive than the current check, but need only be done at
- open.
- For running databases, a separate header field can be used to indicate
- a transaction in progress; we need only check for recovery if this is set.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "sub:TDB-Does-Not"
-
-\end_inset
-
-TDB Does Not Have Snapshot Support
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
- At some point you say
-\begin_inset Quotes eld
-\end_inset
-
-use a real database
-\begin_inset Quotes erd
-\end_inset
-
- (but see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "replay-attribute"
-
-\end_inset
-
-).
-\end_layout
-
-\begin_layout Standard
-But as a thought experiment, if we implemented transactions to only overwrite
- free entries (this is tricky: there must not be a header in each entry
- which indicates whether it is free, but use of presence in metadata elsewhere),
- and a pointer to the hash table, we could create an entirely new commit
- without destroying existing data.
- Then it would be easy to implement snapshots in a similar way.
-\end_layout
-
-\begin_layout Standard
-This would not allow arbitrary changes to the database, such as tdb_repack
- does, and would require more space (since we have to preserve the current
- and future entries at once).
- If we used hash trees rather than one big hash table, we might only have
- to rewrite some sections of the hash, too.
-\end_layout
-
-\begin_layout Standard
-We could then implement snapshots using a similar method, using multiple
- different hash tables/free tables.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-Transactions Cannot Operate in Parallel
-\end_layout
-
-\begin_layout Standard
-This would be useless for ldb, as it hits the index records with just about
- every update.
- It would add significant complexity in resolving clashes, and cause the
- all transaction callers to write their code to loop in the case where the
- transactions spuriously failed.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None (but see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "replay-attribute"
-
-\end_inset
-
-).
- We could solve a small part of the problem by providing read-only transactions.
- These would allow one write transaction to begin, but it could not commit
- until all r/o transactions are done.
- This would require a new RO_TRANSACTION_LOCK, which would be upgraded on
- commit.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\begin_layout Subsection
-Default Hash Function Is Suboptimal
-\end_layout
-
-\begin_layout Standard
-The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially
- if we expand it to 64 bits), and works best when the hash bucket size is
- a prime number (which also means a slow modulus).
- In addition, it is highly predictable which could potentially lead to a
- Denial of Service attack in some TDB uses.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-The Jenkins lookup3 hash
-\begin_inset Foot
-status open
-
-\begin_layout Plain Layout
-http://burtleburtle.net/bob/c/lookup3.c
-\end_layout
-
-\end_inset
-
- is a fast and superbly-mixing hash.
- It's used by the Linux kernel and almost everything else.
- This has the particular properties that it takes an initial seed, and produces
- two 32 bit hash numbers, which we can combine into a 64-bit hash.
-\end_layout
-
-\begin_layout Standard
-The seed should be created at tdb-creation time from some random source,
- and placed in the header.
- This is far from foolproof, but adds a little bit of protection against
- hash bombing.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-\begin_inset CommandInset label
-LatexCommand label
-name "Reliable-Traversal-Adds"
-
-\end_inset
-
-Reliable Traversal Adds Complexity
-\end_layout
-
-\begin_layout Standard
-We lock a record during traversal iteration, and try to grab that lock in
- the delete code.
- If that grab on delete fails, we simply mark it deleted and continue onwards;
- traversal checks for this condition and does the delete when it moves off
- the record.
-\end_layout
-
-\begin_layout Standard
-If traversal terminates, the dead record may be left indefinitely.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-Remove reliability guarantees; see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "traverse-Proposed-Solution"
-
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Complete.
-\end_layout
-
-\begin_layout Subsection
-Fcntl Locking Adds Overhead
-\end_layout
-
-\begin_layout Standard
-Placing a fcntl lock means a system call, as does removing one.
- This is actually one reason why transactions can be faster (everything
- is locked once at transaction start).
- In the uncontended case, this overhead can theoretically be eliminated.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
-\end_layout
-
-\begin_layout Standard
-We tried this before with spinlock support, in the early days of TDB, and
- it didn't make much difference except in manufactured benchmarks.
-\end_layout
-
-\begin_layout Standard
-We could use spinlocks (with futex kernel support under Linux), but it means
- that we lose automatic cleanup when a process dies with a lock.
- There is a method of auto-cleanup under Linux, but it's not supported by
- other operating systems.
- We could reintroduce a clear-if-first-style lock and sweep for dead futexes
- on open, but that wouldn't help the normal case of one concurrent opener
- dying.
- Increasingly elaborate repair schemes could be considered, but they require
- an ABI change (everyone must use them) anyway, so there's no need to do
- this at the same time as everything else.
-\end_layout
-
-\begin_layout Subsection
-Some Transactions Don't Require Durability
-\end_layout
-
-\begin_layout Standard
-Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast)
- usage, and occasionally empties the results into a transactional TDB.
- This kind of usage prioritizes performance over durability: as long as
- we are consistent, data can be lost.
-\end_layout
-
-\begin_layout Standard
-This would be more neatly implemented inside tdb: a
-\begin_inset Quotes eld
-\end_inset
-
-soft
-\begin_inset Quotes erd
-\end_inset
-
- transaction commit (ie.
- syncless) which meant that data may be reverted on a crash.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\end_layout
-
-\begin_layout Standard
-None.
-\end_layout
-
-\begin_layout Standard
-Unfortunately any transaction scheme which overwrites old data requires
- a sync before that overwrite to avoid the possibility of corruption.
-\end_layout
-
-\begin_layout Standard
-It seems possible to use a scheme similar to that described in
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "sub:TDB-Does-Not"
-
-\end_inset
-
-,where transactions are committed without overwriting existing data, and
- an array of top-level pointers were available in the header.
- If the transaction is
-\begin_inset Quotes eld
-\end_inset
-
-soft
-\begin_inset Quotes erd
-\end_inset
-
- then we would not need a sync at all: existing processes would pick up
- the new hash table and free list and work with that.
-\end_layout
-
-\begin_layout Standard
-At some later point, a sync would allow recovery of the old data into the
- free lists (perhaps when the array of top-level pointers filled).
- On crash, tdb_open() would examine the array of top levels, and apply the
- transactions until it encountered an invalid checksum.
-\end_layout
-
-\begin_layout Subsection
-Tracing Is Fragile, Replay Is External
-\end_layout
-
-\begin_layout Standard
-The current TDB has compile-time-enabled tracing code, but it often breaks
- as it is not enabled by default.
- In a similar way, the ctdb code has an external wrapper which does replay
- tracing so it can coordinate cluster-wide transactions.
-\end_layout
-
-\begin_layout Subsubsection
-Proposed Solution
-\begin_inset CommandInset label
-LatexCommand label
-name "replay-attribute"
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Tridge points out that an attribute can be later added to tdb_open (see
-\begin_inset CommandInset ref
-LatexCommand ref
-reference "attributes"
-
-\end_inset
-
-) to provide replay/trace hooks, which could become the basis for this and
- future parallel transactions and snapshot support.
-\end_layout
-
-\begin_layout Subsubsection
-Status
-\end_layout
-
-\begin_layout Standard
-Deferred.
-\end_layout
-
-\end_body
-\end_document
+++ /dev/null
-NTDB: Redesigning The Trivial DataBase
-
-Rusty Russell, IBM Corporation
-
-19 June 2012
-
-Abstract
-
-The Trivial DataBase on-disk format is 32 bits; with usage cases
-heading towards the 4G limit, that must change. This required
-breakage provides an opportunity to revisit TDB's other design
-decisions and reassess them.
-
-1 Introduction
-
-The Trivial DataBase was originally written by Andrew Tridgell as
-a simple key/data pair storage system with the same API as dbm,
-but allowing multiple readers and writers while being small
-enough (< 1000 lines of C) to include in SAMBA. The simple design
-created in 1999 has proven surprisingly robust and performant,
-used in Samba versions 3 and 4 as well as numerous other
-projects. Its useful life was greatly increased by the
-(backwards-compatible!) addition of transaction support in 2005.
-
-The wider variety and greater demands of TDB-using code has lead
-to some organic growth of the API, as well as some compromises on
-the implementation. None of these, by themselves, are seen as
-show-stoppers, but the cumulative effect is to a loss of elegance
-over the initial, simple TDB implementation. Here is a table of
-the approximate number of lines of implementation code and number
-of API functions at the end of each year:
-
-
-+-----------+----------------+--------------------------------+
-| Year End | API Functions | Lines of C Code Implementation |
-+-----------+----------------+--------------------------------+
-+-----------+----------------+--------------------------------+
-| 1999 | 13 | 1195 |
-+-----------+----------------+--------------------------------+
-| 2000 | 24 | 1725 |
-+-----------+----------------+--------------------------------+
-| 2001 | 32 | 2228 |
-+-----------+----------------+--------------------------------+
-| 2002 | 35 | 2481 |
-+-----------+----------------+--------------------------------+
-| 2003 | 35 | 2552 |
-+-----------+----------------+--------------------------------+
-| 2004 | 40 | 2584 |
-+-----------+----------------+--------------------------------+
-| 2005 | 38 | 2647 |
-+-----------+----------------+--------------------------------+
-| 2006 | 52 | 3754 |
-+-----------+----------------+--------------------------------+
-| 2007 | 66 | 4398 |
-+-----------+----------------+--------------------------------+
-| 2008 | 71 | 4768 |
-+-----------+----------------+--------------------------------+
-| 2009 | 73 | 5715 |
-+-----------+----------------+--------------------------------+
-
-
-This review is an attempt to catalog and address all the known
-issues with TDB and create solutions which address the problems
-without significantly increasing complexity; all involved are far
-too aware of the dangers of second system syndrome in rewriting a
-successful project like this.
-
-Note: the final decision was to make ntdb a separate library,
-with a separarate 'ntdb' namespace so both can potentially be
-linked together. This document still refers to “tdb” everywhere,
-for simplicity.
-
-2 API Issues
-
-2.1 tdb_open_ex Is Not Expandable
-
-The tdb_open() call was expanded to tdb_open_ex(), which added an
-optional hashing function and an optional logging function
-argument. Additional arguments to open would require the
-introduction of a tdb_open_ex2 call etc.
-
-2.1.1 Proposed Solution<attributes>
-
-tdb_open() will take a linked-list of attributes:
-
-enum tdb_attribute {
-
- TDB_ATTRIBUTE_LOG = 0,
-
- TDB_ATTRIBUTE_HASH = 1
-
-};
-
-struct tdb_attribute_base {
-
- enum tdb_attribute attr;
-
- union tdb_attribute *next;
-
-};
-
-struct tdb_attribute_log {
-
- struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG
-*/
-
- tdb_log_func log_fn;
-
- void *log_private;
-
-};
-
-struct tdb_attribute_hash {
-
- struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH
-*/
-
- tdb_hash_func hash_fn;
-
- void *hash_private;
-
-};
-
-union tdb_attribute {
-
- struct tdb_attribute_base base;
-
- struct tdb_attribute_log log;
-
- struct tdb_attribute_hash hash;
-
-};
-
-This allows future attributes to be added, even if this expands
-the size of the union.
-
-2.1.2 Status
-
-Complete.
-
-2.2 tdb_traverse Makes Impossible Guarantees
-
-tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions,
-and it was thought that it was important to guarantee that all
-records which exist at the start and end of the traversal would
-be included, and no record would be included twice.
-
-This adds complexity (see[Reliable-Traversal-Adds]) and does not
-work anyway for records which are altered (in particular, those
-which are expanded may be effectively deleted and re-added behind
-the traversal).
-
-2.2.1 <traverse-Proposed-Solution>Proposed Solution
-
-Abandon the guarantee. You will see every record if no changes
-occur during your traversal, otherwise you will see some subset.
-You can prevent changes by using a transaction or the locking
-API.
-
-2.2.2 Status
-
-Complete. Delete-during-traverse will still delete every record,
-too (assuming no other changes).
-
-2.3 Nesting of Transactions Is Fraught
-
-TDB has alternated between allowing nested transactions and not
-allowing them. Various paths in the Samba codebase assume that
-transactions will nest, and in a sense they can: the operation is
-only committed to disk when the outer transaction is committed.
-There are two problems, however:
-
-1. Canceling the inner transaction will cause the outer
- transaction commit to fail, and will not undo any operations
- since the inner transaction began. This problem is soluble with
- some additional internal code.
-
-2. An inner transaction commit can be cancelled by the outer
- transaction. This is desirable in the way which Samba's
- database initialization code uses transactions, but could be a
- surprise to any users expecting a successful transaction commit
- to expose changes to others.
-
-The current solution is to specify the behavior at tdb_open(),
-with the default currently that nested transactions are allowed.
-This flag can also be changed at runtime.
-
-2.3.1 Proposed Solution
-
-Given the usage patterns, it seems that the“least-surprise”
-behavior of disallowing nested transactions should become the
-default. Additionally, it seems the outer transaction is the only
-code which knows whether inner transactions should be allowed, so
-a flag to indicate this could be added to tdb_transaction_start.
-However, this behavior can be simulated with a wrapper which uses
-tdb_add_flags() and tdb_remove_flags(), so the API should not be
-expanded for this relatively-obscure case.
-
-2.3.2 Status
-
-Complete; the nesting flag has been removed.
-
-2.4 Incorrect Hash Function is Not Detected
-
-tdb_open_ex() allows the calling code to specify a different hash
-function to use, but does not check that all other processes
-accessing this tdb are using the same hash function. The result
-is that records are missing from tdb_fetch().
-
-2.4.1 Proposed Solution
-
-The header should contain an example hash result (eg. the hash of
-0xdeadbeef), and tdb_open_ex() should check that the given hash
-function produces the same answer, or fail the tdb_open call.
-
-2.4.2 Status
-
-Complete.
-
-2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation
-
-In response to scalability issues with the free list ([TDB-Freelist-Is]
-) two API workarounds have been incorporated in TDB:
-tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The
-latter actually calls the former with an argument of“5”.
-
-This code allows deleted records to accumulate without putting
-them in the free list. On delete we iterate through each chain
-and free them in a batch if there are more than max_dead entries.
-These are never otherwise recycled except as a side-effect of a
-tdb_repack.
-
-2.5.1 Proposed Solution
-
-With the scalability problems of the freelist solved, this API
-can be removed. The TDB_VOLATILE flag may still be useful as a
-hint that store and delete of records will be at least as common
-as fetch in order to allow some internal tuning, but initially
-will become a no-op.
-
-2.5.2 Status
-
-Complete. Unknown flags cause tdb_open() to fail as well, so they
-can be detected at runtime.
-
-2.6 <TDB-Files-Cannot>TDB Files Cannot Be Opened Multiple Times
- In The Same Process
-
-No process can open the same TDB twice; we check and disallow it.
-This is an unfortunate side-effect of fcntl locks, which operate
-on a per-file rather than per-file-descriptor basis, and do not
-nest. Thus, closing any file descriptor on a file clears all the
-locks obtained by this process, even if they were placed using a
-different file descriptor!
-
-Note that even if this were solved, deadlock could occur if
-operations were nested: this is a more manageable programming
-error in most cases.
-
-2.6.1 Proposed Solution
-
-We could lobby POSIX to fix the perverse rules, or at least lobby
-Linux to violate them so that the most common implementation does
-not have this restriction. This would be a generally good idea
-for other fcntl lock users.
-
-Samba uses a wrapper which hands out the same tdb_context to
-multiple callers if this happens, and does simple reference
-counting. We should do this inside the tdb library, which already
-emulates lock nesting internally; it would need to recognize when
-deadlock occurs within a single process. This would create a new
-failure mode for tdb operations (while we currently handle
-locking failures, they are impossible in normal use and a process
-encountering them can do little but give up).
-
-I do not see benefit in an additional tdb_open flag to indicate
-whether re-opening is allowed, as though there may be some
-benefit to adding a call to detect when a tdb_context is shared,
-to allow other to create such an API.
-
-2.6.2 Status
-
-Complete.
-
-2.7 TDB API Is Not POSIX Thread-safe
-
-The TDB API uses an error code which can be queried after an
-operation to determine what went wrong. This programming model
-does not work with threads, unless specific additional guarantees
-are given by the implementation. In addition, even
-otherwise-independent threads cannot open the same TDB (as in[TDB-Files-Cannot]
-).
-
-2.7.1 Proposed Solution
-
-Reachitecting the API to include a tdb_errcode pointer would be a
-great deal of churn, but fortunately most functions return 0 on
-success and -1 on error: we can change these to return 0 on
-success and a negative error code on error, and the API remains
-similar to previous. The tdb_fetch, tdb_firstkey and tdb_nextkey
-functions need to take a TDB_DATA pointer and return an error
-code. It is also simpler to have tdb_nextkey replace its key
-argument in place, freeing up any old .dptr.
-
-Internal locking is required to make sure that fcntl locks do not
-overlap between threads, and also that the global list of tdbs is
-maintained.
-
-The aim is that building tdb with -DTDB_PTHREAD will result in a
-pthread-safe version of the library, and otherwise no overhead
-will exist. Alternatively, a hooking mechanism similar to that
-proposed for[Proposed-Solution-locking-hook] could be used to
-enable pthread locking at runtime.
-
-2.7.2 Status
-
-Incomplete; API has been changed but thread safety has not been
-implemented.
-
-2.8 *_nonblock Functions And *_mark Functions Expose
- Implementation
-
-CTDB[footnote:
-Clustered TDB, see http://ctdb.samba.org
-] wishes to operate on TDB in a non-blocking manner. This is
-currently done as follows:
-
-1. Call the _nonblock variant of an API function (eg.
- tdb_lockall_nonblock). If this fails:
-
-2. Fork a child process, and wait for it to call the normal
- variant (eg. tdb_lockall).
-
-3. If the child succeeds, call the _mark variant to indicate we
- already have the locks (eg. tdb_lockall_mark).
-
-4. Upon completion, tell the child to release the locks (eg.
- tdb_unlockall).
-
-5. Indicate to tdb that it should consider the locks removed (eg.
- tdb_unlockall_mark).
-
-There are several issues with this approach. Firstly, adding two
-new variants of each function clutters the API for an obscure
-use, and so not all functions have three variants. Secondly, it
-assumes that all paths of the functions ask for the same locks,
-otherwise the parent process will have to get a lock which the
-child doesn't have under some circumstances. I don't believe this
-is currently the case, but it constrains the implementation.
-
-2.8.1 <Proposed-Solution-locking-hook>Proposed Solution
-
-Implement a hook for locking methods, so that the caller can
-control the calls to create and remove fcntl locks. In this
-scenario, ctdbd would operate as follows:
-
-1. Call the normal API function, eg tdb_lockall().
-
-2. When the lock callback comes in, check if the child has the
- lock. Initially, this is always false. If so, return 0.
- Otherwise, try to obtain it in non-blocking mode. If that
- fails, return EWOULDBLOCK.
-
-3. Release locks in the unlock callback as normal.
-
-4. If tdb_lockall() fails, see if we recorded a lock failure; if
- so, call the child to repeat the operation.
-
-5. The child records what locks it obtains, and returns that
- information to the parent.
-
-6. When the child has succeeded, goto 1.
-
-This is flexible enough to handle any potential locking scenario,
-even when lock requirements change. It can be optimized so that
-the parent does not release locks, just tells the child which
-locks it doesn't need to obtain.
-
-It also keeps the complexity out of the API, and in ctdbd where
-it is needed.
-
-2.8.2 Status
-
-Complete.
-
-2.9 tdb_chainlock Functions Expose Implementation
-
-tdb_chainlock locks some number of records, including the record
-indicated by the given key. This gave atomicity guarantees;
-no-one can start a transaction, alter, read or delete that key
-while the lock is held.
-
-It also makes the same guarantee for any other key in the chain,
-which is an internal implementation detail and potentially a
-cause for deadlock.
-
-2.9.1 Proposed Solution
-
-None. It would be nice to have an explicit single entry lock
-which effected no other keys. Unfortunately, this won't work for
-an entry which doesn't exist. Thus while chainlock may be
-implemented more efficiently for the existing case, it will still
-have overlap issues with the non-existing case. So it is best to
-keep the current (lack of) guarantee about which records will be
-effected to avoid constraining our implementation.
-
-2.10 Signal Handling is Not Race-Free
-
-The tdb_setalarm_sigptr() call allows the caller's signal handler
-to indicate that the tdb locking code should return with a
-failure, rather than trying again when a signal is received (and
-errno == EAGAIN). This is usually used to implement timeouts.
-
-Unfortunately, this does not work in the case where the signal is
-received before the tdb code enters the fcntl() call to place the
-lock: the code will sleep within the fcntl() code, unaware that
-the signal wants it to exit. In the case of long timeouts, this
-does not happen in practice.
-
-2.10.1 Proposed Solution
-
-The locking hooks proposed in[Proposed-Solution-locking-hook]
-would allow the user to decide on whether to fail the lock
-acquisition on a signal. This allows the caller to choose their
-own compromise: they could narrow the race by checking
-immediately before the fcntl call.[footnote:
-It may be possible to make this race-free in some implementations
-by having the signal handler alter the struct flock to make it
-invalid. This will cause the fcntl() lock call to fail with
-EINVAL if the signal occurs before the kernel is entered,
-otherwise EAGAIN.
-]
-
-2.10.2 Status
-
-Complete.
-
-2.11 The API Uses Gratuitous Typedefs, Capitals
-
-typedefs are useful for providing source compatibility when types
-can differ across implementations, or arguably in the case of
-function pointer definitions which are hard for humans to parse.
-Otherwise it is simply obfuscation and pollutes the namespace.
-
-Capitalization is usually reserved for compile-time constants and
-macros.
-
- TDB_CONTEXT There is no reason to use this over 'struct
- tdb_context'; the definition isn't visible to the API user
- anyway.
-
- TDB_DATA There is no reason to use this over struct TDB_DATA;
- the struct needs to be understood by the API user.
-
- struct TDB_DATA This would normally be called 'struct
- tdb_data'.
-
- enum TDB_ERROR Similarly, this would normally be enum
- tdb_error.
-
-2.11.1 Proposed Solution
-
-None. Introducing lower case variants would please pedants like
-myself, but if it were done the existing ones should be kept.
-There is little point forcing a purely cosmetic change upon tdb
-users.
-
-2.12 <tdb_log_func-Doesnt-Take>tdb_log_func Doesn't Take The
- Private Pointer
-
-For API compatibility reasons, the logging function needs to call
-tdb_get_logging_private() to retrieve the pointer registered by
-the tdb_open_ex for logging.
-
-2.12.1 Proposed Solution
-
-It should simply take an extra argument, since we are prepared to
-break the API/ABI.
-
-2.12.2 Status
-
-Complete.
-
-2.13 Various Callback Functions Are Not Typesafe
-
-The callback functions in tdb_set_logging_function (after[tdb_log_func-Doesnt-Take]
- is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read
-and tdb_check all take void * and must internally convert it to
-the argument type they were expecting.
-
-If this type changes, the compiler will not produce warnings on
-the callers, since it only sees void *.
-
-2.13.1 Proposed Solution
-
-With careful use of macros, we can create callback functions
-which give a warning when used on gcc and the types of the
-callback and its private argument differ. Unsupported compilers
-will not give a warning, which is no worse than now. In addition,
-the callbacks become clearer, as they need not use void * for
-their parameter.
-
-See CCAN's typesafe_cb module at
-http://ccan.ozlabs.org/info/typesafe_cb.html
-
-2.13.2 Status
-
-Complete.
-
-2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens,
- tdb_reopen_all Problematic
-
-The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB
-file should be cleared if the caller discovers it is the only
-process with the TDB open. However, if any caller does not
-specify TDB_CLEAR_IF_FIRST it will not be detected, so will have
-the TDB erased underneath them (usually resulting in a crash).
-
-There is a similar issue on fork(); if the parent exits (or
-otherwise closes the tdb) before the child calls tdb_reopen_all()
-to establish the lock used to indicate the TDB is opened by
-someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe
-it alone has opened the TDB and will erase it.
-
-2.14.1 Proposed Solution
-
-Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but
-see[TDB_CLEAR_IF_FIRST-Imposes-Performance].
-
-2.14.2 Status
-
-Complete. An open hook is provided to replicate this
-functionality if required.
-
-2.15 Extending The Header Is Difficult
-
-We have reserved (zeroed) words in the TDB header, which can be
-used for future features. If the future features are compulsory,
-the version number must be updated to prevent old code from
-accessing the database. But if the future feature is optional, we
-have no way of telling if older code is accessing the database or
-not.
-
-2.15.1 Proposed Solution
-
-The header should contain a“format variant” value (64-bit). This
-is divided into two 32-bit parts:
-
-1. The lower part reflects the format variant understood by code
- accessing the database.
-
-2. The upper part reflects the format variant you must understand
- to write to the database (otherwise you can only open for
- reading).
-
-The latter field can only be written at creation time, the former
-should be written under the OPEN_LOCK when opening the database
-for writing, if the variant of the code is lower than the current
-lowest variant.
-
-This should allow backwards-compatible features to be added, and
-detection if older code (which doesn't understand the feature)
-writes to the database.
-
-2.15.2 Status
-
-Complete.
-
-2.16 Record Headers Are Not Expandible
-
-If we later want to add (say) checksums on keys and data, it
-would require another format change, which we'd like to avoid.
-
-2.16.1 Proposed Solution
-
-We often have extra padding at the tail of a record. If we ensure
-that the first byte (if any) of this padding is zero, we will
-have a way for future changes to detect code which doesn't
-understand a new format: the new code would write (say) a 1 at
-the tail, and thus if there is no tail or the first byte is 0, we
-would know the extension is not present on that record.
-
-2.16.2 Status
-
-Complete.
-
-2.17 TDB Does Not Use Talloc
-
-Many users of TDB (particularly Samba) use the talloc allocator,
-and thus have to wrap TDB in a talloc context to use it
-conveniently.
-
-2.17.1 Proposed Solution
-
-The allocation within TDB is not complicated enough to justify
-the use of talloc, and I am reluctant to force another
-(excellent) library on TDB users. Nonetheless a compromise is
-possible. An attribute (see[attributes]) can be added later to
-tdb_open() to provide an alternate allocation mechanism,
-specifically for talloc but usable by any other allocator (which
-would ignore the“context” argument).
-
-This would form a talloc heirarchy as expected, but the caller
-would still have to attach a destructor to the tdb context
-returned from tdb_open to close it. All TDB_DATA fields would be
-children of the tdb_context, and the caller would still have to
-manage them (using talloc_free() or talloc_steal()).
-
-2.17.2 Status
-
-Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute.
-
-3 Performance And Scalability Issues
-
-3.1 <TDB_CLEAR_IF_FIRST-Imposes-Performance>TDB_CLEAR_IF_FIRST
- Imposes Performance Penalty
-
-When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is
-placed at offset 4 (aka. the ACTIVE_LOCK). While these locks
-never conflict in normal tdb usage, they do add substantial
-overhead for most fcntl lock implementations when the kernel
-scans to detect if a lock conflict exists. This is often a single
-linked list, making the time to acquire and release a fcntl lock
-O(N) where N is the number of processes with the TDB open, not
-the number actually doing work.
-
-In a Samba server it is common to have huge numbers of clients
-sitting idle, and thus they have weaned themselves off the
-TDB_CLEAR_IF_FIRST flag.[footnote:
-There is a flag to tdb_reopen_all() which is used for this
-optimization: if the parent process will outlive the child, the
-child does not need the ACTIVE_LOCK. This is a workaround for
-this very performance issue.
-]
-
-3.1.1 Proposed Solution
-
-Remove the flag. It was a neat idea, but even trivial servers
-tend to know when they are initializing for the first time and
-can simply unlink the old tdb at that point.
-
-3.1.2 Status
-
-Complete.
-
-3.2 TDB Files Have a 4G Limit
-
-This seems to be becoming an issue (so much for“trivial”!),
-particularly for ldb.
-
-3.2.1 Proposed Solution
-
-A new, incompatible TDB format which uses 64 bit offsets
-internally rather than 32 bit as now. For simplicity of endian
-conversion (which TDB does on the fly if required), all values
-will be 64 bit on disk. In practice, some upper bits may be used
-for other purposes, but at least 56 bits will be available for
-file offsets.
-
-tdb_open() will automatically detect the old version, and even
-create them if TDB_VERSION6 is specified to tdb_open.
-
-32 bit processes will still be able to access TDBs larger than 4G
-(assuming that their off_t allows them to seek to 64 bits), they
-will gracefully fall back as they fail to mmap. This can happen
-already with large TDBs.
-
-Old versions of tdb will fail to open the new TDB files (since 28
-August 2009, commit 398d0c29290: prior to that any unrecognized
-file format would be erased and initialized as a fresh tdb!)
-
-3.2.2 Status
-
-Complete.
-
-3.3 TDB Records Have a 4G Limit
-
-This has not been a reported problem, and the API uses size_t
-which can be 64 bit on 64 bit platforms. However, other limits
-may have made such an issue moot.
-
-3.3.1 Proposed Solution
-
-Record sizes will be 64 bit, with an error returned on 32 bit
-platforms which try to access such records (the current
-implementation would return TDB_ERR_OOM in a similar case). It
-seems unlikely that 32 bit keys will be a limitation, so the
-implementation may not support this (see[sub:Records-Incur-A]).
-
-3.3.2 Status
-
-Complete.
-
-3.4 Hash Size Is Determined At TDB Creation Time
-
-TDB contains a number of hash chains in the header; the number is
-specified at creation time, and defaults to 131. This is such a
-bottleneck on large databases (as each hash chain gets quite
-long), that LDB uses 10,000 for this hash. In general it is
-impossible to know what the 'right' answer is at database
-creation time.
-
-3.4.1 <sub:Hash-Size-Solution>Proposed Solution
-
-After comprehensive performance testing on various scalable hash
-variants[footnote:
-http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94
-This was annoying because I was previously convinced that an
-expanding tree of hashes would be very close to optimal.
-], it became clear that it is hard to beat a straight linear hash
-table which doubles in size when it reaches saturation.
-Unfortunately, altering the hash table introduces serious locking
-complications: the entire hash table needs to be locked to
-enlarge the hash table, and others might be holding locks.
-Particularly insidious are insertions done under tdb_chainlock.
-
-Thus an expanding layered hash will be used: an array of hash
-groups, with each hash group exploding into pointers to lower
-hash groups once it fills, turning into a hash tree. This has
-implications for locking: we must lock the entire group in case
-we need to expand it, yet we don't know how deep the tree is at
-that point.
-
-Note that bits from the hash table entries should be stolen to
-hold more hash bits to reduce the penalty of collisions. We can
-use the otherwise-unused lower 3 bits. If we limit the size of
-the database to 64 exabytes, we can use the top 8 bits of the
-hash entry as well. These 11 bits would reduce false positives
-down to 1 in 2000 which is more than we need: we can use one of
-the bits to indicate that the extra hash bits are valid. This
-means we can choose not to re-hash all entries when we expand a
-hash group; simply use the next bits we need and mark them
-invalid.
-
-3.4.2 Status
-
-Ignore. Scaling the hash automatically proved inefficient at
-small hash sizes; we default to a 8192-element hash (changable
-via NTDB_ATTRIBUTE_HASHSIZE), and when buckets clash we expand to
-an array of hash entries. This scales slightly better than the
-tdb chain (due to the 8 top bits containing extra hash).
-
-3.5 <TDB-Freelist-Is>TDB Freelist Is Highly Contended
-
-TDB uses a single linked list for the free list. Allocation
-occurs as follows, using heuristics which have evolved over time:
-
-1. Get the free list lock for this whole operation.
-
-2. Multiply length by 1.25, so we always over-allocate by 25%.
-
-3. Set the slack multiplier to 1.
-
-4. Examine the current freelist entry: if it is > length but <
- the current best case, remember it as the best case.
-
-5. Multiply the slack multiplier by 1.05.
-
-6. If our best fit so far is less than length * slack multiplier,
- return it. The slack will be turned into a new free record if
- it's large enough.
-
-7. Otherwise, go onto the next freelist entry.
-
-Deleting a record occurs as follows:
-
-1. Lock the hash chain for this whole operation.
-
-2. Walk the chain to find the record, keeping the prev pointer
- offset.
-
-3. If max_dead is non-zero:
-
- (a) Walk the hash chain again and count the dead records.
-
- (b) If it's more than max_dead, bulk free all the dead ones
- (similar to steps 4 and below, but the lock is only obtained
- once).
-
- (c) Simply mark this record as dead and return.
-
-4. Get the free list lock for the remainder of this operation.
-
-5. <right-merging>Examine the following block to see if it is
- free; if so, enlarge the current block and remove that block
- from the free list. This was disabled, as removal from the free
- list was O(entries-in-free-list).
-
-6. Examine the preceeding block to see if it is free: for this
- reason, each block has a 32-bit tailer which indicates its
- length. If it is free, expand it to cover our new block and
- return.
-
-7. Otherwise, prepend ourselves to the free list.
-
-Disabling right-merging (step[right-merging]) causes
-fragmentation; the other heuristics proved insufficient to
-address this, so the final answer to this was that when we expand
-the TDB file inside a transaction commit, we repack the entire
-tdb.
-
-The single list lock limits our allocation rate; due to the other
-issues this is not currently seen as a bottleneck.
-
-3.5.1 Proposed Solution
-
-The first step is to remove all the current heuristics, as they
-obviously interact, then examine them once the lock contention is
-addressed.
-
-The free list must be split to reduce contention. Assuming
-perfect free merging, we can at most have 1 free list entry for
-each entry. This implies that the number of free lists is related
-to the size of the hash table, but as it is rare to walk a large
-number of free list entries we can use far fewer, say 1/32 of the
-number of hash buckets.
-
-It seems tempting to try to reuse the hash implementation which
-we use for records here, but we have two ways of searching for
-free entries: for allocation we search by size (and possibly
-zone) which produces too many clashes for our hash table to
-handle well, and for coalescing we search by address. Thus an
-array of doubly-linked free lists seems preferable.
-
-There are various benefits in using per-size free lists (see[sub:TDB-Becomes-Fragmented]
-) but it's not clear this would reduce contention in the common
-case where all processes are allocating/freeing the same size.
-Thus we almost certainly need to divide in other ways: the most
-obvious is to divide the file into zones, and using a free list
-(or table of free lists) for each. This approximates address
-ordering.
-
-Unfortunately it is difficult to know what heuristics should be
-used to determine zone sizes, and our transaction code relies on
-being able to create a“recovery area” by simply appending to the
-file (difficult if it would need to create a new zone header).
-Thus we use a linked-list of free tables; currently we only ever
-create one, but if there is more than one we choose one at random
-to use. In future we may use heuristics to add new free tables on
-contention. We only expand the file when all free tables are
-exhausted.
-
-The basic algorithm is as follows. Freeing is simple:
-
-1. Identify the correct free list.
-
-2. Lock the corresponding list.
-
-3. Re-check the list (we didn't have a lock, sizes could have
- changed): relock if necessary.
-
-4. Place the freed entry in the list.
-
-Allocation is a little more complicated, as we perform delayed
-coalescing at this point:
-
-1. Pick a free table; usually the previous one.
-
-2. Lock the corresponding list.
-
-3. If the top entry is -large enough, remove it from the list and
- return it.
-
-4. Otherwise, coalesce entries in the list.If there was no entry
- large enough, unlock the list and try the next largest list
-
-5. If no list has an entry which meets our needs, try the next
- free table.
-
-6. If no zone satisfies, expand the file.
-
-This optimizes rapid insert/delete of free list entries by not
-coalescing them all the time.. First-fit address ordering
-ordering seems to be fairly good for keeping fragmentation low
-(see[sub:TDB-Becomes-Fragmented]). Note that address ordering
-does not need a tailer to coalesce, though if we needed one we
-could have one cheaply: see[sub:Records-Incur-A].
-
-Each free entry has the free table number in the header: less
-than 255. It also contains a doubly-linked list for easy
-deletion.
-
-3.6 <sub:TDB-Becomes-Fragmented>TDB Becomes Fragmented
-
-Much of this is a result of allocation strategy[footnote:
-The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995
-ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps
-] and deliberate hobbling of coalescing; internal fragmentation
-(aka overallocation) is deliberately set at 25%, and external
-fragmentation is only cured by the decision to repack the entire
-db when a transaction commit needs to enlarge the file.
-
-3.6.1 Proposed Solution
-
-The 25% overhead on allocation works in practice for ldb because
-indexes tend to expand by one record at a time. This internal
-fragmentation can be resolved by having an“expanded” bit in the
-header to note entries that have previously expanded, and
-allocating more space for them.
-
-There are is a spectrum of possible solutions for external
-fragmentation: one is to use a fragmentation-avoiding allocation
-strategy such as best-fit address-order allocator. The other end
-of the spectrum would be to use a bump allocator (very fast and
-simple) and simply repack the file when we reach the end.
-
-There are three problems with efficient fragmentation-avoiding
-allocators: they are non-trivial, they tend to use a single free
-list for each size, and there's no evidence that tdb allocation
-patterns will match those recorded for general allocators (though
-it seems likely).
-
-Thus we don't spend too much effort on external fragmentation; we
-will be no worse than the current code if we need to repack on
-occasion. More effort is spent on reducing freelist contention,
-and reducing overhead.
-
-3.7 <sub:Records-Incur-A>Records Incur A 28-Byte Overhead
-
-Each TDB record has a header as follows:
-
-struct tdb_record {
-
- tdb_off_t next; /* offset of the next record in the list
-*/
-
- tdb_len_t rec_len; /* total byte length of record */
-
- tdb_len_t key_len; /* byte length of key */
-
- tdb_len_t data_len; /* byte length of data */
-
- uint32_t full_hash; /* the full 32 bit hash of the key */
-
- uint32_t magic; /* try to catch errors */
-
- /* the following union is implied:
-
- union {
-
- char record[rec_len];
-
- struct {
-
- char key[key_len];
-
- char data[data_len];
-
- }
-
- uint32_t totalsize; (tailer)
-
- }
-
- */
-
-};
-
-Naively, this would double to a 56-byte overhead on a 64 bit
-implementation.
-
-3.7.1 Proposed Solution
-
-We can use various techniques to reduce this for an allocated
-block:
-
-1. The 'next' pointer is not required, as we are using a flat
- hash table.
-
-2. 'rec_len' can instead be expressed as an addition to key_len
- and data_len (it accounts for wasted or overallocated length in
- the record). Since the record length is always a multiple of 8,
- we can conveniently fit it in 32 bits (representing up to 35
- bits).
-
-3. 'key_len' and 'data_len' can be reduced. I'm unwilling to
- restrict 'data_len' to 32 bits, but instead we can combine the
- two into one 64-bit field and using a 5 bit value which
- indicates at what bit to divide the two. Keys are unlikely to
- scale as fast as data, so I'm assuming a maximum key size of 32
- bits.
-
-4. 'full_hash' is used to avoid a memcmp on the“miss” case, but
- this is diminishing returns after a handful of bits (at 10
- bits, it reduces 99.9% of false memcmp). As an aside, as the
- lower bits are already incorporated in the hash table
- resolution, the upper bits should be used here. Note that it's
- not clear that these bits will be a win, given the extra bits
- in the hash table itself (see[sub:Hash-Size-Solution]).
-
-5. 'magic' does not need to be enlarged: it currently reflects
- one of 5 values (used, free, dead, recovery, and
- unused_recovery). It is useful for quick sanity checking
- however, and should not be eliminated.
-
-6. 'tailer' is only used to coalesce free blocks (so a block to
- the right can find the header to check if this block is free).
- This can be replaced by a single 'free' bit in the header of
- the following block (and the tailer only exists in free
- blocks).[footnote:
-This technique from Thomas Standish. Data Structure Techniques.
-Addison-Wesley, Reading, Massachusetts, 1980.
-] The current proposed coalescing algorithm doesn't need this,
- however.
-
-This produces a 16 byte used header like this:
-
-struct tdb_used_record {
-
- uint32_t used_magic : 16,
-
-
-
- key_data_divide: 5,
-
- top_hash: 11;
-
- uint32_t extra_octets;
-
- uint64_t key_and_data_len;
-
-};
-
-And a free record like this:
-
-struct tdb_free_record {
-
- uint64_t free_magic: 8,
-
- prev : 56;
-
-
-
- uint64_t free_table: 8,
-
- total_length : 56
-
- uint64_t next;;
-
-};
-
-Note that by limiting valid offsets to 56 bits, we can pack
-everything we need into 3 64-byte words, meaning our minimum
-record size is 8 bytes.
-
-3.7.2 Status
-
-Complete.
-
-3.8 Transaction Commit Requires 4 fdatasync
-
-The current transaction algorithm is:
-
-1. write_recovery_data();
-
-2. sync();
-
-3. write_recovery_header();
-
-4. sync();
-
-5. overwrite_with_new_data();
-
-6. sync();
-
-7. remove_recovery_header();
-
-8. sync();
-
-On current ext3, each sync flushes all data to disk, so the next
-3 syncs are relatively expensive. But this could become a
-performance bottleneck on other filesystems such as ext4.
-
-3.8.1 Proposed Solution
-
-Neil Brown points out that this is overzealous, and only one sync
-is needed:
-
-1. Bundle the recovery data, a transaction counter and a strong
- checksum of the new data.
-
-2. Strong checksum that whole bundle.
-
-3. Store the bundle in the database.
-
-4. Overwrite the oldest of the two recovery pointers in the
- header (identified using the transaction counter) with the
- offset of this bundle.
-
-5. sync.
-
-6. Write the new data to the file.
-
-Checking for recovery means identifying the latest bundle with a
-valid checksum and using the new data checksum to ensure that it
-has been applied. This is more expensive than the current check,
-but need only be done at open. For running databases, a separate
-header field can be used to indicate a transaction in progress;
-we need only check for recovery if this is set.
-
-3.8.2 Status
-
-Deferred.
-
-3.9 <sub:TDB-Does-Not>TDB Does Not Have Snapshot Support
-
-3.9.1 Proposed Solution
-
-None. At some point you say“use a real database” (but see[replay-attribute]
-).
-
-But as a thought experiment, if we implemented transactions to
-only overwrite free entries (this is tricky: there must not be a
-header in each entry which indicates whether it is free, but use
-of presence in metadata elsewhere), and a pointer to the hash
-table, we could create an entirely new commit without destroying
-existing data. Then it would be easy to implement snapshots in a
-similar way.
-
-This would not allow arbitrary changes to the database, such as
-tdb_repack does, and would require more space (since we have to
-preserve the current and future entries at once). If we used hash
-trees rather than one big hash table, we might only have to
-rewrite some sections of the hash, too.
-
-We could then implement snapshots using a similar method, using
-multiple different hash tables/free tables.
-
-3.9.2 Status
-
-Deferred.
-
-3.10 Transactions Cannot Operate in Parallel
-
-This would be useless for ldb, as it hits the index records with
-just about every update. It would add significant complexity in
-resolving clashes, and cause the all transaction callers to write
-their code to loop in the case where the transactions spuriously
-failed.
-
-3.10.1 Proposed Solution
-
-None (but see[replay-attribute]). We could solve a small part of
-the problem by providing read-only transactions. These would
-allow one write transaction to begin, but it could not commit
-until all r/o transactions are done. This would require a new
-RO_TRANSACTION_LOCK, which would be upgraded on commit.
-
-3.10.2 Status
-
-Deferred.
-
-3.11 Default Hash Function Is Suboptimal
-
-The Knuth-inspired multiplicative hash used by tdb is fairly slow
-(especially if we expand it to 64 bits), and works best when the
-hash bucket size is a prime number (which also means a slow
-modulus). In addition, it is highly predictable which could
-potentially lead to a Denial of Service attack in some TDB uses.
-
-3.11.1 Proposed Solution
-
-The Jenkins lookup3 hash[footnote:
-http://burtleburtle.net/bob/c/lookup3.c
-] is a fast and superbly-mixing hash. It's used by the Linux
-kernel and almost everything else. This has the particular
-properties that it takes an initial seed, and produces two 32 bit
-hash numbers, which we can combine into a 64-bit hash.
-
-The seed should be created at tdb-creation time from some random
-source, and placed in the header. This is far from foolproof, but
-adds a little bit of protection against hash bombing.
-
-3.11.2 Status
-
-Complete.
-
-3.12 <Reliable-Traversal-Adds>Reliable Traversal Adds Complexity
-
-We lock a record during traversal iteration, and try to grab that
-lock in the delete code. If that grab on delete fails, we simply
-mark it deleted and continue onwards; traversal checks for this
-condition and does the delete when it moves off the record.
-
-If traversal terminates, the dead record may be left
-indefinitely.
-
-3.12.1 Proposed Solution
-
-Remove reliability guarantees; see[traverse-Proposed-Solution].
-
-3.12.2 Status
-
-Complete.
-
-3.13 Fcntl Locking Adds Overhead
-
-Placing a fcntl lock means a system call, as does removing one.
-This is actually one reason why transactions can be faster
-(everything is locked once at transaction start). In the
-uncontended case, this overhead can theoretically be eliminated.
-
-3.13.1 Proposed Solution
-
-None.
-
-We tried this before with spinlock support, in the early days of
-TDB, and it didn't make much difference except in manufactured
-benchmarks.
-
-We could use spinlocks (with futex kernel support under Linux),
-but it means that we lose automatic cleanup when a process dies
-with a lock. There is a method of auto-cleanup under Linux, but
-it's not supported by other operating systems. We could
-reintroduce a clear-if-first-style lock and sweep for dead
-futexes on open, but that wouldn't help the normal case of one
-concurrent opener dying. Increasingly elaborate repair schemes
-could be considered, but they require an ABI change (everyone
-must use them) anyway, so there's no need to do this at the same
-time as everything else.
-
-3.14 Some Transactions Don't Require Durability
-
-Volker points out that gencache uses a CLEAR_IF_FIRST tdb for
-normal (fast) usage, and occasionally empties the results into a
-transactional TDB. This kind of usage prioritizes performance
-over durability: as long as we are consistent, data can be lost.
-
-This would be more neatly implemented inside tdb: a“soft”
-transaction commit (ie. syncless) which meant that data may be
-reverted on a crash.
-
-3.14.1 Proposed Solution
-
-None.
-
-Unfortunately any transaction scheme which overwrites old data
-requires a sync before that overwrite to avoid the possibility of
-corruption.
-
-It seems possible to use a scheme similar to that described in[sub:TDB-Does-Not]
-,where transactions are committed without overwriting existing
-data, and an array of top-level pointers were available in the
-header. If the transaction is“soft” then we would not need a sync
-at all: existing processes would pick up the new hash table and
-free list and work with that.
-
-At some later point, a sync would allow recovery of the old data
-into the free lists (perhaps when the array of top-level pointers
-filled). On crash, tdb_open() would examine the array of top
-levels, and apply the transactions until it encountered an
-invalid checksum.
-
-3.15 Tracing Is Fragile, Replay Is External
-
-The current TDB has compile-time-enabled tracing code, but it
-often breaks as it is not enabled by default. In a similar way,
-the ctdb code has an external wrapper which does replay tracing
-so it can coordinate cluster-wide transactions.
-
-3.15.1 Proposed Solution<replay-attribute>
-
-Tridge points out that an attribute can be later added to
-tdb_open (see[attributes]) to provide replay/trace hooks, which
-could become the basis for this and future parallel transactions
-and snapshot support.
-
-3.15.2 Status
-
-Deferred.
+++ /dev/null
- /*
- Trivial Database 2: free list/block handling
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-#include <ccan/ilog/ilog.h>
-#include <time.h>
-#include <limits.h>
-
-static unsigned fls64(uint64_t val)
-{
- return ilog64(val);
-}
-
-/* In which bucket would we find a particular record size? (ignoring header) */
-unsigned int size_to_bucket(ntdb_len_t data_len)
-{
- unsigned int bucket;
-
- /* We can't have records smaller than this. */
- assert(data_len >= NTDB_MIN_DATA_LEN);
-
- /* Ignoring the header... */
- if (data_len - NTDB_MIN_DATA_LEN <= 64) {
- /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */
- bucket = (data_len - NTDB_MIN_DATA_LEN) / 8;
- } else {
- /* After that we go power of 2. */
- bucket = fls64(data_len - NTDB_MIN_DATA_LEN) + 2;
- }
-
- if (unlikely(bucket >= NTDB_FREE_BUCKETS))
- bucket = NTDB_FREE_BUCKETS - 1;
- return bucket;
-}
-
-ntdb_off_t first_ftable(struct ntdb_context *ntdb)
-{
- return ntdb_read_off(ntdb, offsetof(struct ntdb_header, free_table));
-}
-
-ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable)
-{
- return ntdb_read_off(ntdb, ftable + offsetof(struct ntdb_freetable,next));
-}
-
-enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb)
-{
- /* Use reservoir sampling algorithm to select a free list at random. */
- unsigned int rnd, max = 0, count = 0;
- ntdb_off_t off;
-
- ntdb->ftable_off = off = first_ftable(ntdb);
- ntdb->ftable = 0;
-
- while (off) {
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
-
- rnd = random();
- if (rnd >= max) {
- ntdb->ftable_off = off;
- ntdb->ftable = count;
- max = rnd;
- }
-
- off = next_ftable(ntdb, off);
- count++;
- }
- return NTDB_SUCCESS;
-}
-
-/* Offset of a given bucket. */
-ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket)
-{
- return ftable_off + offsetof(struct ntdb_freetable, buckets)
- + bucket * sizeof(ntdb_off_t);
-}
-
-/* Returns free_buckets + 1, or list number to search, or -ve error. */
-static ntdb_off_t find_free_head(struct ntdb_context *ntdb,
- ntdb_off_t ftable_off,
- ntdb_off_t bucket)
-{
- /* Speculatively search for a non-zero bucket. */
- return ntdb_find_nonzero_off(ntdb, bucket_off(ftable_off, 0),
- bucket, NTDB_FREE_BUCKETS);
-}
-
-static void check_list(struct ntdb_context *ntdb, ntdb_off_t b_off)
-{
-#ifdef CCAN_NTDB_DEBUG
- ntdb_off_t off, prev = 0, first;
- struct ntdb_free_record r;
-
- first = off = (ntdb_read_off(ntdb, b_off) & NTDB_OFF_MASK);
- while (off != 0) {
- ntdb_read_convert(ntdb, off, &r, sizeof(r));
- if (frec_magic(&r) != NTDB_FREE_MAGIC)
- abort();
- if (prev && frec_prev(&r) != prev)
- abort();
- prev = off;
- off = r.next;
- }
-
- if (first) {
- ntdb_read_convert(ntdb, first, &r, sizeof(r));
- if (frec_prev(&r) != prev)
- abort();
- }
-#endif
-}
-
-/* Remove from free bucket. */
-static enum NTDB_ERROR remove_from_list(struct ntdb_context *ntdb,
- ntdb_off_t b_off, ntdb_off_t r_off,
- const struct ntdb_free_record *r)
-{
- ntdb_off_t off, prev_next, head;
- enum NTDB_ERROR ecode;
-
- /* Is this only element in list? Zero out bucket, and we're done. */
- if (frec_prev(r) == r_off)
- return ntdb_write_off(ntdb, b_off, 0);
-
- /* off = &r->prev->next */
- off = frec_prev(r) + offsetof(struct ntdb_free_record, next);
-
- /* Get prev->next */
- prev_next = ntdb_read_off(ntdb, off);
- if (NTDB_OFF_IS_ERR(prev_next))
- return NTDB_OFF_TO_ERR(prev_next);
-
- /* If prev->next == 0, we were head: update bucket to point to next. */
- if (prev_next == 0) {
- /* We must preserve upper bits. */
- head = ntdb_read_off(ntdb, b_off);
- if (NTDB_OFF_IS_ERR(head))
- return NTDB_OFF_TO_ERR(head);
-
- if ((head & NTDB_OFF_MASK) != r_off) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "remove_from_list:"
- " %llu head %llu on list %llu",
- (long long)r_off,
- (long long)head,
- (long long)b_off);
- }
- head = ((head & ~NTDB_OFF_MASK) | r->next);
- ecode = ntdb_write_off(ntdb, b_off, head);
- if (ecode != NTDB_SUCCESS)
- return ecode;
- } else {
- /* r->prev->next = r->next */
- ecode = ntdb_write_off(ntdb, off, r->next);
- if (ecode != NTDB_SUCCESS)
- return ecode;
- }
-
- /* If we were the tail, off = &head->prev. */
- if (r->next == 0) {
- head = ntdb_read_off(ntdb, b_off);
- if (NTDB_OFF_IS_ERR(head))
- return NTDB_OFF_TO_ERR(head);
- head &= NTDB_OFF_MASK;
- off = head + offsetof(struct ntdb_free_record, magic_and_prev);
- } else {
- /* off = &r->next->prev */
- off = r->next + offsetof(struct ntdb_free_record,
- magic_and_prev);
- }
-
-#ifdef CCAN_NTDB_DEBUG
- /* *off == r */
- if ((ntdb_read_off(ntdb, off) & NTDB_OFF_MASK) != r_off) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "remove_from_list:"
- " %llu bad prev in list %llu",
- (long long)r_off, (long long)b_off);
- }
-#endif
- /* r->next->prev = r->prev */
- return ntdb_write_off(ntdb, off, r->magic_and_prev);
-}
-
-/* Enqueue in this free bucket: sets coalesce if we've added 128
- * entries to it. */
-static enum NTDB_ERROR enqueue_in_free(struct ntdb_context *ntdb,
- ntdb_off_t b_off,
- ntdb_off_t off,
- ntdb_len_t len,
- bool *coalesce)
-{
- struct ntdb_free_record new;
- enum NTDB_ERROR ecode;
- ntdb_off_t prev, head;
- uint64_t magic = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL));
-
- head = ntdb_read_off(ntdb, b_off);
- if (NTDB_OFF_IS_ERR(head))
- return NTDB_OFF_TO_ERR(head);
-
- /* We only need to set ftable_and_len; rest is set in enqueue_in_free */
- new.ftable_and_len = ((uint64_t)ntdb->ftable
- << (64 - NTDB_OFF_UPPER_STEAL))
- | len;
-
- /* new->next = head. */
- new.next = (head & NTDB_OFF_MASK);
-
- /* First element? Prev points to ourselves. */
- if (!new.next) {
- new.magic_and_prev = (magic | off);
- } else {
- /* new->prev = next->prev */
- prev = ntdb_read_off(ntdb,
- new.next + offsetof(struct ntdb_free_record,
- magic_and_prev));
- new.magic_and_prev = prev;
- if (frec_magic(&new) != NTDB_FREE_MAGIC) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "enqueue_in_free: %llu bad head"
- " prev %llu",
- (long long)new.next,
- (long long)prev);
- }
- /* next->prev = new. */
- ecode = ntdb_write_off(ntdb, new.next
- + offsetof(struct ntdb_free_record,
- magic_and_prev),
- off | magic);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
-#ifdef CCAN_NTDB_DEBUG
- prev = ntdb_read_off(ntdb, frec_prev(&new)
- + offsetof(struct ntdb_free_record, next));
- if (prev != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "enqueue_in_free:"
- " %llu bad tail next ptr %llu",
- (long long)frec_prev(&new)
- + offsetof(struct ntdb_free_record,
- next),
- (long long)prev);
- }
-#endif
- }
-
- /* Update enqueue count, but don't set high bit: see NTDB_OFF_IS_ERR */
- if (*coalesce)
- head += (1ULL << (64 - NTDB_OFF_UPPER_STEAL));
- head &= ~(NTDB_OFF_MASK | (1ULL << 63));
- head |= off;
-
- ecode = ntdb_write_off(ntdb, b_off, head);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* It's time to coalesce if counter wrapped. */
- if (*coalesce)
- *coalesce = ((head & ~NTDB_OFF_MASK) == 0);
-
- return ntdb_write_convert(ntdb, off, &new, sizeof(new));
-}
-
-static ntdb_off_t ftable_offset(struct ntdb_context *ntdb, unsigned int ftable)
-{
- ntdb_off_t off;
- unsigned int i;
-
- if (likely(ntdb->ftable == ftable))
- return ntdb->ftable_off;
-
- off = first_ftable(ntdb);
- for (i = 0; i < ftable; i++) {
- if (NTDB_OFF_IS_ERR(off)) {
- break;
- }
- off = next_ftable(ntdb, off);
- }
- return off;
-}
-
-/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and
- * need to blatt the *protect record (which is set to an error). */
-static ntdb_len_t coalesce(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_off_t b_off,
- ntdb_len_t data_len,
- ntdb_off_t *protect)
-{
- ntdb_off_t end;
- struct ntdb_free_record rec;
- enum NTDB_ERROR ecode;
-
- ntdb->stats.alloc_coalesce_tried++;
- end = off + sizeof(struct ntdb_used_record) + data_len;
-
- while (end < ntdb->file->map_size) {
- const struct ntdb_free_record *r;
- ntdb_off_t nb_off;
- unsigned ftable, bucket;
-
- r = ntdb_access_read(ntdb, end, sizeof(*r), true);
- if (NTDB_PTR_IS_ERR(r)) {
- ecode = NTDB_PTR_ERR(r);
- goto err;
- }
-
- if (frec_magic(r) != NTDB_FREE_MAGIC
- || frec_ftable(r) == NTDB_FTABLE_NONE) {
- ntdb_access_release(ntdb, r);
- break;
- }
-
- ftable = frec_ftable(r);
- bucket = size_to_bucket(frec_len(r));
- nb_off = ftable_offset(ntdb, ftable);
- if (NTDB_OFF_IS_ERR(nb_off)) {
- ntdb_access_release(ntdb, r);
- ecode = NTDB_OFF_TO_ERR(nb_off);
- goto err;
- }
- nb_off = bucket_off(nb_off, bucket);
- ntdb_access_release(ntdb, r);
-
- /* We may be violating lock order here, so best effort. */
- if (ntdb_lock_free_bucket(ntdb, nb_off, NTDB_LOCK_NOWAIT)
- != NTDB_SUCCESS) {
- ntdb->stats.alloc_coalesce_lockfail++;
- break;
- }
-
- /* Now we have lock, re-check. */
- ecode = ntdb_read_convert(ntdb, end, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- ntdb_unlock_free_bucket(ntdb, nb_off);
- goto err;
- }
-
- if (unlikely(frec_magic(&rec) != NTDB_FREE_MAGIC)) {
- ntdb->stats.alloc_coalesce_race++;
- ntdb_unlock_free_bucket(ntdb, nb_off);
- break;
- }
-
- if (unlikely(frec_ftable(&rec) != ftable)
- || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
- ntdb->stats.alloc_coalesce_race++;
- ntdb_unlock_free_bucket(ntdb, nb_off);
- break;
- }
-
- /* Did we just mess up a record you were hoping to use? */
- if (end == *protect) {
- ntdb->stats.alloc_coalesce_iterate_clash++;
- *protect = NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST);
- }
-
- ecode = remove_from_list(ntdb, nb_off, end, &rec);
- check_list(ntdb, nb_off);
- if (ecode != NTDB_SUCCESS) {
- ntdb_unlock_free_bucket(ntdb, nb_off);
- goto err;
- }
-
- end += sizeof(struct ntdb_used_record) + frec_len(&rec);
- ntdb_unlock_free_bucket(ntdb, nb_off);
- ntdb->stats.alloc_coalesce_num_merged++;
- }
-
- /* Didn't find any adjacent free? */
- if (end == off + sizeof(struct ntdb_used_record) + data_len)
- return 0;
-
- /* Before we expand, check this isn't one you wanted protected? */
- if (off == *protect) {
- *protect = NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS);
- ntdb->stats.alloc_coalesce_iterate_clash++;
- }
-
- /* OK, expand initial record */
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto err;
- }
-
- if (frec_len(&rec) != data_len) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "coalesce: expected data len %zu not %zu",
- (size_t)data_len, (size_t)frec_len(&rec));
- goto err;
- }
-
- ecode = remove_from_list(ntdb, b_off, off, &rec);
- check_list(ntdb, b_off);
- if (ecode != NTDB_SUCCESS) {
- goto err;
- }
-
- /* Try locking violation first. We don't allow coalesce recursion! */
- ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_NOWAIT, false);
- if (ecode != NTDB_SUCCESS) {
- /* Need to drop lock. Can't rely on anything stable. */
- ntdb->stats.alloc_coalesce_lockfail++;
- *protect = NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT);
-
- /* We have to drop this to avoid deadlocks, so make sure record
- * doesn't get coalesced by someone else! */
- rec.ftable_and_len = (NTDB_FTABLE_NONE
- << (64 - NTDB_OFF_UPPER_STEAL))
- | (end - off - sizeof(struct ntdb_used_record));
- ecode = ntdb_write_off(ntdb,
- off + offsetof(struct ntdb_free_record,
- ftable_and_len),
- rec.ftable_and_len);
- if (ecode != NTDB_SUCCESS) {
- goto err;
- }
-
- ntdb_unlock_free_bucket(ntdb, b_off);
-
- ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_WAIT,
- false);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- } else if (NTDB_OFF_IS_ERR(*protect)) {
- /* For simplicity, we always drop lock if they can't continue */
- ntdb_unlock_free_bucket(ntdb, b_off);
- }
- ntdb->stats.alloc_coalesce_succeeded++;
-
- /* Return usable length. */
- return end - off - sizeof(struct ntdb_used_record);
-
-err:
- /* To unify error paths, we *always* unlock bucket on error. */
- ntdb_unlock_free_bucket(ntdb, b_off);
- return NTDB_ERR_TO_OFF(ecode);
-}
-
-/* List is locked: we unlock it. */
-static enum NTDB_ERROR coalesce_list(struct ntdb_context *ntdb,
- ntdb_off_t ftable_off,
- ntdb_off_t b_off,
- unsigned int limit)
-{
- enum NTDB_ERROR ecode;
- ntdb_off_t off;
-
- off = ntdb_read_off(ntdb, b_off);
- if (NTDB_OFF_IS_ERR(off)) {
- ecode = NTDB_OFF_TO_ERR(off);
- goto unlock_err;
- }
- /* A little bit of paranoia: counter should be 0. */
- off &= NTDB_OFF_MASK;
-
- while (off && limit--) {
- struct ntdb_free_record rec;
- ntdb_len_t coal;
- ntdb_off_t next;
-
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- next = rec.next;
- coal = coalesce(ntdb, off, b_off, frec_len(&rec), &next);
- if (NTDB_OFF_IS_ERR(coal)) {
- /* This has already unlocked on error. */
- return NTDB_OFF_TO_ERR(coal);
- }
- if (NTDB_OFF_IS_ERR(next)) {
- /* Coalescing had to unlock, so stop. */
- return NTDB_SUCCESS;
- }
- /* Keep going if we're doing well... */
- limit += size_to_bucket(coal / 16 + NTDB_MIN_DATA_LEN);
- off = next;
- }
-
- /* Now, move those elements to the tail of the list so we get something
- * else next time. */
- if (off) {
- struct ntdb_free_record oldhrec, newhrec, oldtrec, newtrec;
- ntdb_off_t oldhoff, oldtoff, newtoff;
-
- /* The record we were up to is the new head. */
- ecode = ntdb_read_convert(ntdb, off, &newhrec, sizeof(newhrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- /* Get the new tail. */
- newtoff = frec_prev(&newhrec);
- ecode = ntdb_read_convert(ntdb, newtoff, &newtrec,
- sizeof(newtrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- /* Get the old head. */
- oldhoff = ntdb_read_off(ntdb, b_off);
- if (NTDB_OFF_IS_ERR(oldhoff)) {
- ecode = NTDB_OFF_TO_ERR(oldhoff);
- goto unlock_err;
- }
-
- /* This could happen if they all coalesced away. */
- if (oldhoff == off)
- goto out;
-
- ecode = ntdb_read_convert(ntdb, oldhoff, &oldhrec,
- sizeof(oldhrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- /* Get the old tail. */
- oldtoff = frec_prev(&oldhrec);
- ecode = ntdb_read_convert(ntdb, oldtoff, &oldtrec,
- sizeof(oldtrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- /* Old tail's next points to old head. */
- oldtrec.next = oldhoff;
-
- /* Old head's prev points to old tail. */
- oldhrec.magic_and_prev
- = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL))
- | oldtoff;
-
- /* New tail's next is 0. */
- newtrec.next = 0;
-
- /* Write out the modified versions. */
- ecode = ntdb_write_convert(ntdb, oldtoff, &oldtrec,
- sizeof(oldtrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- ecode = ntdb_write_convert(ntdb, oldhoff, &oldhrec,
- sizeof(oldhrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- ecode = ntdb_write_convert(ntdb, newtoff, &newtrec,
- sizeof(newtrec));
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
-
- /* And finally link in new head. */
- ecode = ntdb_write_off(ntdb, b_off, off);
- if (ecode != NTDB_SUCCESS)
- goto unlock_err;
- }
-out:
- ntdb_unlock_free_bucket(ntdb, b_off);
- return NTDB_SUCCESS;
-
-unlock_err:
- ntdb_unlock_free_bucket(ntdb, b_off);
- return ecode;
-}
-
-/* List must not be locked if coalesce_ok is set. */
-enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len_with_header,
- enum ntdb_lock_flags waitflag,
- bool coalesce_ok)
-{
- ntdb_off_t b_off;
- ntdb_len_t len;
- enum NTDB_ERROR ecode;
-
- assert(len_with_header >= sizeof(struct ntdb_free_record));
-
- len = len_with_header - sizeof(struct ntdb_used_record);
-
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len));
- ecode = ntdb_lock_free_bucket(ntdb, b_off, waitflag);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- ecode = enqueue_in_free(ntdb, b_off, off, len, &coalesce_ok);
- check_list(ntdb, b_off);
-
- /* Coalescing unlocks free list. */
- if (!ecode && coalesce_ok)
- ecode = coalesce_list(ntdb, ntdb->ftable_off, b_off, 2);
- else
- ntdb_unlock_free_bucket(ntdb, b_off);
- return ecode;
-}
-
-static size_t adjust_size(size_t keylen, size_t datalen)
-{
- size_t size = keylen + datalen;
-
- if (size < NTDB_MIN_DATA_LEN)
- size = NTDB_MIN_DATA_LEN;
-
- /* Round to next uint64_t boundary. */
- return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
-}
-
-/* If we have enough left over to be useful, split that off. */
-static size_t record_leftover(size_t keylen, size_t datalen,
- bool want_extra, size_t total_len)
-{
- ssize_t leftover;
-
- if (want_extra)
- datalen += datalen / 2;
- leftover = total_len - adjust_size(keylen, datalen);
-
- if (leftover < (ssize_t)sizeof(struct ntdb_free_record))
- return 0;
-
- return leftover;
-}
-
-/* We need size bytes to put our key and data in. */
-static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb,
- ntdb_off_t ftable_off,
- ntdb_off_t bucket,
- size_t keylen, size_t datalen,
- bool want_extra,
- unsigned magic)
-{
- ntdb_off_t off, b_off,best_off;
- struct ntdb_free_record best = { 0 };
- double multiplier;
- size_t size = adjust_size(keylen, datalen);
- enum NTDB_ERROR ecode;
-
- ntdb->stats.allocs++;
- b_off = bucket_off(ftable_off, bucket);
-
- /* FIXME: Try non-blocking wait first, to measure contention. */
- /* Lock this bucket. */
- ecode = ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- best.ftable_and_len = -1ULL;
- best_off = 0;
-
- /* Get slack if we're after extra. */
- if (want_extra)
- multiplier = 1.5;
- else
- multiplier = 1.0;
-
- /* Walk the list to see if any are large enough, getting less fussy
- * as we go. */
- off = ntdb_read_off(ntdb, b_off);
- if (NTDB_OFF_IS_ERR(off)) {
- ecode = NTDB_OFF_TO_ERR(off);
- goto unlock_err;
- }
- off &= NTDB_OFF_MASK;
-
- while (off) {
- const struct ntdb_free_record *r;
- ntdb_off_t next;
-
- r = ntdb_access_read(ntdb, off, sizeof(*r), true);
- if (NTDB_PTR_IS_ERR(r)) {
- ecode = NTDB_PTR_ERR(r);
- goto unlock_err;
- }
-
- if (frec_magic(r) != NTDB_FREE_MAGIC) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "lock_and_alloc:"
- " %llu non-free 0x%llx",
- (long long)off,
- (long long)r->magic_and_prev);
- ntdb_access_release(ntdb, r);
- goto unlock_err;
- }
-
- if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) {
- best_off = off;
- best = *r;
- }
-
- if (frec_len(&best) <= size * multiplier && best_off) {
- ntdb_access_release(ntdb, r);
- break;
- }
-
- multiplier *= 1.01;
-
- next = r->next;
- ntdb_access_release(ntdb, r);
- off = next;
- }
-
- /* If we found anything at all, use it. */
- if (best_off) {
- struct ntdb_used_record rec;
- size_t leftover;
-
- /* We're happy with this size: take it. */
- ecode = remove_from_list(ntdb, b_off, best_off, &best);
- check_list(ntdb, b_off);
- if (ecode != NTDB_SUCCESS) {
- goto unlock_err;
- }
-
- leftover = record_leftover(keylen, datalen, want_extra,
- frec_len(&best));
-
- assert(keylen + datalen + leftover <= frec_len(&best));
- /* We need to mark non-free before we drop lock, otherwise
- * coalesce() could try to merge it! */
- ecode = set_header(ntdb, &rec, magic, keylen, datalen,
- frec_len(&best) - leftover);
- if (ecode != NTDB_SUCCESS) {
- goto unlock_err;
- }
-
- ecode = ntdb_write_convert(ntdb, best_off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto unlock_err;
- }
-
- /* For futureproofing, we put a 0 in any unused space. */
- if (rec_extra_padding(&rec)) {
- ecode = ntdb->io->twrite(ntdb, best_off + sizeof(rec)
- + keylen + datalen, "", 1);
- if (ecode != NTDB_SUCCESS) {
- goto unlock_err;
- }
- }
-
- /* Bucket of leftover will be <= current bucket, so nested
- * locking is allowed. */
- if (leftover) {
- ntdb->stats.alloc_leftover++;
- ecode = add_free_record(ntdb,
- best_off + sizeof(rec)
- + frec_len(&best) - leftover,
- leftover, NTDB_LOCK_WAIT, false);
- if (ecode != NTDB_SUCCESS) {
- best_off = NTDB_ERR_TO_OFF(ecode);
- }
- }
- ntdb_unlock_free_bucket(ntdb, b_off);
-
- return best_off;
- }
-
- ntdb_unlock_free_bucket(ntdb, b_off);
- return 0;
-
-unlock_err:
- ntdb_unlock_free_bucket(ntdb, b_off);
- return NTDB_ERR_TO_OFF(ecode);
-}
-
-/* Get a free block from current free list, or 0 if none, -ve on error. */
-static ntdb_off_t get_free(struct ntdb_context *ntdb,
- size_t keylen, size_t datalen, bool want_extra,
- unsigned magic)
-{
- ntdb_off_t off, ftable_off;
- ntdb_off_t start_b, b, ftable;
- bool wrapped = false;
-
- /* If they are growing, add 50% to get to higher bucket. */
- if (want_extra)
- start_b = size_to_bucket(adjust_size(keylen,
- datalen + datalen / 2));
- else
- start_b = size_to_bucket(adjust_size(keylen, datalen));
-
- ftable_off = ntdb->ftable_off;
- ftable = ntdb->ftable;
- while (!wrapped || ftable_off != ntdb->ftable_off) {
- /* Start at exact size bucket, and search up... */
- for (b = find_free_head(ntdb, ftable_off, start_b);
- b < NTDB_FREE_BUCKETS;
- b = find_free_head(ntdb, ftable_off, b + 1)) {
- /* Try getting one from list. */
- off = lock_and_alloc(ntdb, ftable_off,
- b, keylen, datalen, want_extra,
- magic);
- if (NTDB_OFF_IS_ERR(off))
- return off;
- if (off != 0) {
- if (b == start_b)
- ntdb->stats.alloc_bucket_exact++;
- if (b == NTDB_FREE_BUCKETS - 1)
- ntdb->stats.alloc_bucket_max++;
- /* Worked? Stay using this list. */
- ntdb->ftable_off = ftable_off;
- ntdb->ftable = ftable;
- return off;
- }
- /* Didn't work. Try next bucket. */
- }
-
- if (NTDB_OFF_IS_ERR(b)) {
- return b;
- }
-
- /* Hmm, try next table. */
- ftable_off = next_ftable(ntdb, ftable_off);
- if (NTDB_OFF_IS_ERR(ftable_off)) {
- return ftable_off;
- }
- ftable++;
-
- if (ftable_off == 0) {
- wrapped = true;
- ftable_off = first_ftable(ntdb);
- if (NTDB_OFF_IS_ERR(ftable_off)) {
- return ftable_off;
- }
- ftable = 0;
- }
- }
-
- return 0;
-}
-
-enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
- struct ntdb_used_record *rec,
- unsigned magic, uint64_t keylen, uint64_t datalen,
- uint64_t actuallen)
-{
- uint64_t keybits = (fls64(keylen) + 1) / 2;
-
- rec->magic_and_meta = ((actuallen - (keylen + datalen)) << 11)
- | (keybits << 43)
- | ((uint64_t)magic << 48);
- rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
-
- /* Encoding can fail on big values. */
- if (rec_key_length(rec) != keylen
- || rec_data_length(rec) != datalen
- || rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "Could not encode k=%llu,d=%llu,a=%llu",
- (long long)keylen, (long long)datalen,
- (long long)actuallen);
- }
- return NTDB_SUCCESS;
-}
-
-/* You need 'size', this tells you how much you should expand by. */
-ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size)
-{
- ntdb_off_t new_size, top_size;
-
- /* limit size in order to avoid using up huge amounts of memory for
- * in memory tdbs if an oddball huge record creeps in */
- if (size > 100 * 1024) {
- top_size = map_size + size * 2;
- } else {
- top_size = map_size + size * 100;
- }
-
- /* always make room for at least top_size more records, and at
- least 25% more space. if the DB is smaller than 100MiB,
- otherwise grow it by 10% only. */
- if (map_size > 100 * 1024 * 1024) {
- new_size = map_size * 1.10;
- } else {
- new_size = map_size * 1.25;
- }
-
- if (new_size < top_size)
- new_size = top_size;
-
- /* We always make the file a multiple of transaction page
- * size. This guarantees that the transaction recovery area
- * is always aligned, otherwise the transaction code can overwrite
- * itself. */
- new_size = (new_size + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
- return new_size - map_size;
-}
-
-/* Expand the database. */
-static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size)
-{
- uint64_t old_size;
- ntdb_len_t wanted;
- enum NTDB_ERROR ecode;
-
- /* Need to hold a hash lock to expand DB: transactions rely on it. */
- if (!(ntdb->flags & NTDB_NOLOCK)
- && !ntdb->file->allrecord_lock.count && !ntdb_has_hash_locks(ntdb)) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_expand: must hold lock during expand");
- }
-
- /* Only one person can expand file at a time. */
- ecode = ntdb_lock_expand(ntdb, F_WRLCK);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* Someone else may have expanded the file, so retry. */
- old_size = ntdb->file->map_size;
- ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
- if (ntdb->file->map_size != old_size) {
- ntdb_unlock_expand(ntdb, F_WRLCK);
- return NTDB_SUCCESS;
- }
-
- /* We need room for the record header too. */
- size = adjust_size(0, sizeof(struct ntdb_used_record) + size);
- /* Overallocate. */
- wanted = ntdb_expand_adjust(old_size, size);
-
- ecode = ntdb->io->expand_file(ntdb, wanted);
- if (ecode != NTDB_SUCCESS) {
- ntdb_unlock_expand(ntdb, F_WRLCK);
- return ecode;
- }
-
- /* We need to drop this lock before adding free record. */
- ntdb_unlock_expand(ntdb, F_WRLCK);
-
- ntdb->stats.expands++;
- return add_free_record(ntdb, old_size, wanted, NTDB_LOCK_WAIT, true);
-}
-
-/* This won't fail: it will expand the database if it has to. */
-ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
- unsigned magic, bool growing)
-{
- ntdb_off_t off;
-
- for (;;) {
- enum NTDB_ERROR ecode;
- off = get_free(ntdb, keylen, datalen, growing, magic);
- if (likely(off != 0))
- break;
-
- ecode = ntdb_expand(ntdb, adjust_size(keylen, datalen));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- }
-
- return off;
-}
+++ /dev/null
- /*
- Trivial Database 2: hash handling
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/hash/hash.h>
-
-/* Default hash function. */
-uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
- void *unused)
-{
- return hash_stable((const unsigned char *)key, length, seed);
-}
-
-uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len)
-{
- return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data);
-}
-
-static ntdb_bool_err key_matches(struct ntdb_context *ntdb,
- const struct ntdb_used_record *rec,
- ntdb_off_t off,
- const NTDB_DATA *key,
- const char **rptr)
-{
- ntdb_bool_err ret = false;
- const char *rkey;
-
- if (rec_key_length(rec) != key->dsize) {
- ntdb->stats.compare_wrong_keylen++;
- return ret;
- }
-
- rkey = ntdb_access_read(ntdb, off + sizeof(*rec),
- key->dsize + rec_data_length(rec), false);
- if (NTDB_PTR_IS_ERR(rkey)) {
- return (ntdb_bool_err)NTDB_PTR_ERR(rkey);
- }
- if (memcmp(rkey, key->dptr, key->dsize) == 0) {
- if (rptr) {
- *rptr = rkey;
- } else {
- ntdb_access_release(ntdb, rkey);
- }
- return true;
- }
- ntdb->stats.compare_wrong_keycmp++;
- ntdb_access_release(ntdb, rkey);
- return ret;
-}
-
-/* Does entry match? */
-static ntdb_bool_err match(struct ntdb_context *ntdb,
- uint32_t hash,
- const NTDB_DATA *key,
- ntdb_off_t val,
- struct ntdb_used_record *rec,
- const char **rptr)
-{
- ntdb_off_t off;
- enum NTDB_ERROR ecode;
-
- ntdb->stats.compares++;
-
- /* Top bits of offset == next bits of hash. */
- if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
- != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) {
- ntdb->stats.compare_wrong_offsetbits++;
- return false;
- }
-
- off = val & NTDB_OFF_MASK;
- ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec));
- if (ecode != NTDB_SUCCESS) {
- return (ntdb_bool_err)ecode;
- }
-
- return key_matches(ntdb, rec, off, key, rptr);
-}
-
-static bool is_chain(ntdb_off_t val)
-{
- return val & (1ULL << NTDB_OFF_CHAIN_BIT);
-}
-
-static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx)
-{
- return base + sizeof(struct ntdb_used_record)
- + idx * sizeof(ntdb_off_t);
-}
-
-/* This is the core routine which searches the hashtable for an entry.
- * On error, no locks are held and -ve is returned.
- * Otherwise, hinfo is filled in.
- * If not found, the return value is 0.
- * If found, the return value is the offset, and *rec is the record. */
-ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
- NTDB_DATA key,
- int ltype,
- struct hash_info *h,
- struct ntdb_used_record *rec,
- const char **rptr)
-{
- ntdb_off_t off, val;
- const ntdb_off_t *arr = NULL;
- ntdb_len_t i;
- bool found_empty;
- enum NTDB_ERROR ecode;
- struct ntdb_used_record chdr;
- ntdb_bool_err berr;
-
- h->h = ntdb_hash(ntdb, key.dptr, key.dsize);
-
- h->table = NTDB_HASH_OFFSET;
- h->table_size = 1 << ntdb->hash_bits;
- h->bucket = bits_from(h->h, 0, ntdb->hash_bits);
- h->old_val = 0;
-
- ecode = ntdb_lock_hash(ntdb, h->bucket, ltype);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- off = hbucket_off(h->table, h->bucket);
- val = ntdb_read_off(ntdb, off);
- if (NTDB_OFF_IS_ERR(val)) {
- ecode = NTDB_OFF_TO_ERR(val);
- goto fail;
- }
-
- /* Directly in hash table? */
- if (!likely(is_chain(val))) {
- if (val) {
- berr = match(ntdb, h->h, &key, val, rec, rptr);
- if (berr < 0) {
- ecode = NTDB_OFF_TO_ERR(berr);
- goto fail;
- }
- if (berr) {
- return val & NTDB_OFF_MASK;
- }
- /* If you want to insert here, make a chain. */
- h->old_val = val;
- }
- return 0;
- }
-
- /* Nope? Iterate through chain. */
- h->table = val & NTDB_OFF_MASK;
-
- ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
-
- if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "find_and_lock:"
- " corrupt record %#x at %llu",
- rec_magic(&chdr), (long long)off);
- goto fail;
- }
-
- h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
-
- arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
- rec_data_length(&chdr), true);
- if (NTDB_PTR_IS_ERR(arr)) {
- ecode = NTDB_PTR_ERR(arr);
- goto fail;
- }
-
- found_empty = false;
- for (i = 0; i < h->table_size; i++) {
- if (arr[i] == 0) {
- if (!found_empty) {
- h->bucket = i;
- found_empty = true;
- }
- } else {
- berr = match(ntdb, h->h, &key, arr[i], rec, rptr);
- if (berr < 0) {
- ecode = NTDB_OFF_TO_ERR(berr);
- ntdb_access_release(ntdb, arr);
- goto fail;
- }
- if (berr) {
- /* We found it! */
- h->bucket = i;
- off = arr[i] & NTDB_OFF_MASK;
- ntdb_access_release(ntdb, arr);
- return off;
- }
- }
- }
- if (!found_empty) {
- /* Set to any non-zero value */
- h->old_val = 1;
- h->bucket = i;
- }
-
- ntdb_access_release(ntdb, arr);
- return 0;
-
-fail:
- ntdb_unlock_hash(ntdb, h->bucket, ltype);
- return NTDB_ERR_TO_OFF(ecode);
-}
-
-static ntdb_off_t encode_offset(const struct ntdb_context *ntdb,
- ntdb_off_t new_off, uint32_t hash)
-{
- ntdb_off_t extra;
-
- assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0);
- assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0);
- /* We pack extra hash bits into the upper bits of the offset. */
- extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL);
- extra <<= (64 - NTDB_OFF_UPPER_STEAL);
-
- return new_off | extra;
-}
-
-/* Simply overwrite the hash entry we found before. */
-enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
- const struct hash_info *h,
- ntdb_off_t new_off)
-{
- return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket),
- encode_offset(ntdb, new_off, h->h));
-}
-
-enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
- const struct hash_info *h)
-{
- return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0);
-}
-
-
-enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
- const struct hash_info *h,
- ntdb_off_t new_off)
-{
- enum NTDB_ERROR ecode;
- ntdb_off_t chain;
- struct ntdb_used_record chdr;
- const ntdb_off_t *old;
- ntdb_off_t *new;
-
- /* We hit an empty bucket during search? That's where it goes. */
- if (!h->old_val) {
- return replace_in_hash(ntdb, h, new_off);
- }
-
- /* Full at top-level? Create a 2-element chain. */
- if (h->table == NTDB_HASH_OFFSET) {
- ntdb_off_t pair[2];
-
- /* One element is old value, the other is the new value. */
- pair[0] = h->old_val;
- pair[1] = encode_offset(ntdb, new_off, h->h);
-
- chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true);
- if (NTDB_OFF_IS_ERR(chain)) {
- return NTDB_OFF_TO_ERR(chain);
- }
- ecode = ntdb_write_convert(ntdb,
- chain
- + sizeof(struct ntdb_used_record),
- pair, sizeof(pair));
- if (ecode == NTDB_SUCCESS) {
- ecode = ntdb_write_off(ntdb,
- hbucket_off(h->table, h->bucket),
- chain
- | (1ULL << NTDB_OFF_CHAIN_BIT));
- }
- return ecode;
- }
-
- /* Full bucket. Expand. */
- ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (rec_extra_padding(&chdr) >= sizeof(new_off)) {
- /* Expand in place. */
- uint64_t dlen = rec_data_length(&chdr);
-
- ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0,
- dlen + sizeof(new_off),
- dlen + rec_extra_padding(&chdr));
-
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- /* find_and_lock set up h to point to last bucket. */
- ecode = replace_in_hash(ntdb, h, new_off);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- /* For futureproofing, we always make the first byte of padding
- * a zero. */
- if (rec_extra_padding(&chdr)) {
- ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr)
- + dlen + sizeof(new_off),
- "", 1);
- }
- return ecode;
- }
-
- /* We need to reallocate the chain. */
- chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t),
- NTDB_CHAIN_MAGIC, true);
- if (NTDB_OFF_IS_ERR(chain)) {
- return NTDB_OFF_TO_ERR(chain);
- }
-
- /* Map both and copy across old buckets. */
- old = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
- h->table_size*sizeof(ntdb_off_t), true);
- if (NTDB_PTR_IS_ERR(old)) {
- return NTDB_PTR_ERR(old);
- }
- new = ntdb_access_write(ntdb, hbucket_off(chain, 0),
- (h->table_size + 1)*sizeof(ntdb_off_t), true);
- if (NTDB_PTR_IS_ERR(new)) {
- ntdb_access_release(ntdb, old);
- return NTDB_PTR_ERR(new);
- }
-
- memcpy(new, old, h->bucket * sizeof(ntdb_off_t));
- new[h->bucket] = encode_offset(ntdb, new_off, h->h);
- ntdb_access_release(ntdb, old);
-
- ecode = ntdb_access_commit(ntdb, new);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* Free the old chain. */
- ecode = add_free_record(ntdb, h->table,
- sizeof(struct ntdb_used_record)
- + rec_data_length(&chdr)
- + rec_extra_padding(&chdr),
- NTDB_LOCK_WAIT, true);
-
- /* Replace top-level to point to new chain */
- return ntdb_write_off(ntdb,
- hbucket_off(NTDB_HASH_OFFSET,
- bits_from(h->h, 0, ntdb->hash_bits)),
- chain | (1ULL << NTDB_OFF_CHAIN_BIT));
-}
-
-/* Traverse support: returns offset of record, or 0 or -ve error. */
-static ntdb_off_t iterate_chain(struct ntdb_context *ntdb,
- ntdb_off_t val,
- struct hash_info *h)
-{
- ntdb_off_t i;
- enum NTDB_ERROR ecode;
- struct ntdb_used_record chdr;
-
- /* First load up chain header. */
- h->table = val & NTDB_OFF_MASK;
- ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "get_table:"
- " corrupt record %#x at %llu",
- rec_magic(&chdr),
- (long long)h->table);
- }
-
- /* Chain length is implied by data length. */
- h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
-
- i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket,
- h->table_size);
- if (NTDB_OFF_IS_ERR(i)) {
- return i;
- }
-
- if (i != h->table_size) {
- /* Return to next bucket. */
- h->bucket = i + 1;
- val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
- if (NTDB_OFF_IS_ERR(val)) {
- return val;
- }
- return val & NTDB_OFF_MASK;
- }
-
- /* Go back up to hash table. */
- h->table = NTDB_HASH_OFFSET;
- h->table_size = 1 << ntdb->hash_bits;
- h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1;
- return 0;
-}
-
-/* Keeps hash locked unless returns 0 or error. */
-static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb,
- struct hash_info *h)
-{
- ntdb_off_t val, i;
- enum NTDB_ERROR ecode;
-
- if (h->table != NTDB_HASH_OFFSET) {
- /* We're in a chain. */
- i = bits_from(h->h, 0, ntdb->hash_bits);
- ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- /* We dropped lock, bucket might have moved! */
- val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i));
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
-
- /* We don't remove chains: there should still be one there! */
- if (!val || !is_chain(val)) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "iterate_hash:"
- " vanished hchain %llu at %llu",
- (long long)val,
- (long long)i);
- val = NTDB_ERR_TO_OFF(ecode);
- goto unlock;
- }
-
- /* Find next bucket in the chain. */
- val = iterate_chain(ntdb, val, h);
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
- if (val != 0) {
- return val;
- }
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
-
- /* OK, we've reset h back to top level. */
- }
-
- /* We do this unlocked, then re-check. */
- for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
- h->bucket, h->table_size);
- i != h->table_size;
- i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
- i+1, h->table_size)) {
- ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
-
- /* Lost race, and it's empty? */
- if (!val) {
- ntdb->stats.traverse_val_vanished++;
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- continue;
- }
-
- if (!is_chain(val)) {
- /* So caller knows what lock to free. */
- h->h = i;
- /* Return to next bucket. */
- h->bucket = i + 1;
- val &= NTDB_OFF_MASK;
- return val;
- }
-
- /* Start at beginning of chain */
- h->bucket = 0;
- h->h = i;
-
- val = iterate_chain(ntdb, val, h);
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
- if (val != 0) {
- return val;
- }
-
- /* Otherwise, bucket has been set to i+1 */
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- }
- return 0;
-
-unlock:
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- return val;
-}
-
-/* Return success if we find something, NTDB_ERR_NOEXIST if none. */
-enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA *kbuf, size_t *dlen)
-{
- ntdb_off_t off;
- struct ntdb_used_record rec;
- enum NTDB_ERROR ecode;
-
- off = lock_and_iterate_hash(ntdb, h);
-
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- } else if (off == 0) {
- return NTDB_ERR_NOEXIST;
- }
-
- /* The hash for this key is still locked. */
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto unlock;
- }
- if (rec_magic(&rec) != NTDB_USED_MAGIC) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "next_in_hash:"
- " corrupt record at %llu",
- (long long)off);
- goto unlock;
- }
-
- kbuf->dsize = rec_key_length(&rec);
-
- /* They want data as well? */
- if (dlen) {
- *dlen = rec_data_length(&rec);
- kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
- kbuf->dsize + *dlen);
- } else {
- kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
- kbuf->dsize);
- }
- if (NTDB_PTR_IS_ERR(kbuf->dptr)) {
- ecode = NTDB_PTR_ERR(kbuf->dptr);
- goto unlock;
- }
- ecode = NTDB_SUCCESS;
-
-unlock:
- ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK);
- return ecode;
-
-}
-
-enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA *kbuf, size_t *dlen)
-{
- h->table = NTDB_HASH_OFFSET;
- h->table_size = 1 << ntdb->hash_bits;
- h->bucket = 0;
-
- return next_in_hash(ntdb, h, kbuf, dlen);
-}
-
-/* Even if the entry isn't in this hash bucket, you'd have to lock this
- * bucket to find it. */
-static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb,
- const NTDB_DATA *key, int ltype)
-{
- uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize);
-
- return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype);
-}
-
-/* lock/unlock one hash chain. This is meant to be used to reduce
- contention - it cannot guarantee how many records will be locked */
-_PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key)
-{
- return chainlock(ntdb, &key, F_WRLCK);
-}
-
-_PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key)
-{
- uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
-
- ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK);
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb,
- NTDB_DATA key)
-{
- return chainlock(ntdb, &key, F_RDLCK);
-}
-
-_PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
-{
- uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
-
- ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK);
-}
+++ /dev/null
- /*
- Unix SMB/CIFS implementation.
-
- trivial database library
-
- Copyright (C) Andrew Tridgell 1999-2005
- Copyright (C) Paul `Rusty' Russell 2000
- Copyright (C) Jeremy Allison 2000-2003
- Copyright (C) Rusty Russell 2010
-
- ** NOTE! The following LGPL license applies to the ntdb
- ** library. This does NOT imply that all of Samba is released
- ** under the LGPL
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-
-static void free_old_mmaps(struct ntdb_context *ntdb)
-{
- struct ntdb_old_mmap *i;
-
- assert(ntdb->file->direct_count == 0);
-
- while ((i = ntdb->file->old_mmaps) != NULL) {
- ntdb->file->old_mmaps = i->next;
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb->free_fn(i->map_ptr, ntdb->alloc_data);
- } else {
- munmap(i->map_ptr, i->map_size);
- }
- ntdb->free_fn(i, ntdb->alloc_data);
- }
-}
-
-static enum NTDB_ERROR save_old_map(struct ntdb_context *ntdb)
-{
- struct ntdb_old_mmap *old;
-
- assert(ntdb->file->direct_count);
-
- old = ntdb->alloc_fn(ntdb->file, sizeof(*old), ntdb->alloc_data);
- if (!old) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "save_old_map alloc failed");
- }
- old->next = ntdb->file->old_mmaps;
- old->map_ptr = ntdb->file->map_ptr;
- old->map_size = ntdb->file->map_size;
- ntdb->file->old_mmaps = old;
-
- return NTDB_SUCCESS;
-}
-
-enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb)
-{
- if (ntdb->file->fd == -1) {
- return NTDB_SUCCESS;
- }
-
- if (!ntdb->file->map_ptr) {
- return NTDB_SUCCESS;
- }
-
- /* We can't unmap now if there are accessors. */
- if (ntdb->file->direct_count) {
- return save_old_map(ntdb);
- } else {
- munmap(ntdb->file->map_ptr, ntdb->file->map_size);
- ntdb->file->map_ptr = NULL;
- }
- return NTDB_SUCCESS;
-}
-
-enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb)
-{
- int mmap_flags;
-
- if (ntdb->flags & NTDB_INTERNAL)
- return NTDB_SUCCESS;
-
-#ifndef HAVE_INCOHERENT_MMAP
- if (ntdb->flags & NTDB_NOMMAP)
- return NTDB_SUCCESS;
-#endif
-
- if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY)
- mmap_flags = PROT_READ;
- else
- mmap_flags = PROT_READ | PROT_WRITE;
-
- /* size_t can be smaller than off_t. */
- if ((size_t)ntdb->file->map_size == ntdb->file->map_size) {
- ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size,
- mmap_flags,
- MAP_SHARED, ntdb->file->fd, 0);
- } else
- ntdb->file->map_ptr = MAP_FAILED;
-
- /*
- * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
- */
- if (ntdb->file->map_ptr == MAP_FAILED) {
- ntdb->file->map_ptr = NULL;
-#ifdef HAVE_INCOHERENT_MMAP
- /* Incoherent mmap means everyone must mmap! */
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_mmap failed for size %lld (%s)",
- (long long)ntdb->file->map_size,
- strerror(errno));
-#else
- ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
- "ntdb_mmap failed for size %lld (%s)",
- (long long)ntdb->file->map_size, strerror(errno));
-#endif
- }
- return NTDB_SUCCESS;
-}
-
-/* check for an out of bounds access - if it is out of bounds then
- see if the database has been expanded by someone else and expand
- if necessary
- note that "len" is the minimum length needed for the db.
-
- If probe is true, len being too large isn't a failure.
-*/
-static enum NTDB_ERROR ntdb_normal_oob(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len,
- bool probe)
-{
- struct stat st;
- enum NTDB_ERROR ecode;
-
- if (len + off < len) {
- if (probe)
- return NTDB_SUCCESS;
-
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_oob off %llu len %llu wrap\n",
- (long long)off, (long long)len);
- }
-
- if (ntdb->flags & NTDB_INTERNAL) {
- if (probe)
- return NTDB_SUCCESS;
-
- ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_oob len %lld beyond internal"
- " alloc size %lld",
- (long long)(off + len),
- (long long)ntdb->file->map_size);
- return NTDB_ERR_IO;
- }
-
- ecode = ntdb_lock_expand(ntdb, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (fstat(ntdb->file->fd, &st) != 0) {
- ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "Failed to fstat file: %s", strerror(errno));
- ntdb_unlock_expand(ntdb, F_RDLCK);
- return NTDB_ERR_IO;
- }
-
- ntdb_unlock_expand(ntdb, F_RDLCK);
-
- if (st.st_size < off + len) {
- if (probe)
- return NTDB_SUCCESS;
-
- ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_oob len %llu beyond eof at %llu",
- (long long)(off + len), (long long)st.st_size);
- return NTDB_ERR_IO;
- }
-
- /* Unmap, update size, remap */
- ecode = ntdb_munmap(ntdb);
- if (ecode) {
- return ecode;
- }
-
- ntdb->file->map_size = st.st_size;
- return ntdb_mmap(ntdb);
-}
-
-/* Endian conversion: we only ever deal with 8 byte quantities */
-void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size)
-{
- assert(size % 8 == 0);
- if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) {
- uint64_t i, *p = (uint64_t *)buf;
- for (i = 0; i < size / 8; i++)
- p[i] = bswap_64(p[i]);
- }
- return buf;
-}
-
-/* Return first non-zero offset in offset array, or end, or -ve error. */
-/* FIXME: Return the off? */
-uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
- ntdb_off_t base, uint64_t start, uint64_t end)
-{
- uint64_t i;
- const uint64_t *val;
-
- /* Zero vs non-zero is the same unconverted: minor optimization. */
- val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t),
- (end - start) * sizeof(ntdb_off_t), false);
- if (NTDB_PTR_IS_ERR(val)) {
- return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
- }
-
- for (i = 0; i < (end - start); i++) {
- if (val[i])
- break;
- }
- ntdb_access_release(ntdb, val);
- return start + i;
-}
-
-/* Return first zero offset in num offset array, or num, or -ve error. */
-uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
- uint64_t num)
-{
- uint64_t i;
- const uint64_t *val;
-
- /* Zero vs non-zero is the same unconverted: minor optimization. */
- val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false);
- if (NTDB_PTR_IS_ERR(val)) {
- return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
- }
-
- for (i = 0; i < num; i++) {
- if (!val[i])
- break;
- }
- ntdb_access_release(ntdb, val);
- return i;
-}
-
-enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len)
-{
- char buf[8192] = { 0 };
- void *p = ntdb->io->direct(ntdb, off, len, true);
- enum NTDB_ERROR ecode = NTDB_SUCCESS;
-
- assert(!(ntdb->flags & NTDB_RDONLY));
- if (NTDB_PTR_IS_ERR(p)) {
- return NTDB_PTR_ERR(p);
- }
- if (p) {
- memset(p, 0, len);
- return ecode;
- }
- while (len) {
- unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
- ecode = ntdb->io->twrite(ntdb, off, buf, todo);
- if (ecode != NTDB_SUCCESS) {
- break;
- }
- len -= todo;
- off += todo;
- }
- return ecode;
-}
-
-/* write a lump of data at a specified offset */
-static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off,
- const void *buf, ntdb_len_t len)
-{
- enum NTDB_ERROR ecode;
-
- if (ntdb->flags & NTDB_RDONLY) {
- return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
- "Write to read-only database");
- }
-
- ecode = ntdb_oob(ntdb, off, len, false);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (ntdb->file->map_ptr) {
- memcpy(off + (char *)ntdb->file->map_ptr, buf, len);
- } else {
-#ifdef HAVE_INCOHERENT_MMAP
- return NTDB_ERR_IO;
-#else
- ssize_t ret;
- ret = pwrite(ntdb->file->fd, buf, len, off);
- if (ret != len) {
- /* This shouldn't happen: we avoid sparse files. */
- if (ret >= 0)
- errno = ENOSPC;
-
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_write: %zi at %zu len=%zu (%s)",
- ret, (size_t)off, (size_t)len,
- strerror(errno));
- }
-#endif
- }
- return NTDB_SUCCESS;
-}
-
-/* read a lump of data at a specified offset */
-static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off,
- void *buf, ntdb_len_t len)
-{
- enum NTDB_ERROR ecode;
-
- ecode = ntdb_oob(ntdb, off, len, false);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (ntdb->file->map_ptr) {
- memcpy(buf, off + (char *)ntdb->file->map_ptr, len);
- } else {
-#ifdef HAVE_INCOHERENT_MMAP
- return NTDB_ERR_IO;
-#else
- ssize_t r = pread(ntdb->file->fd, buf, len, off);
- if (r != len) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_read failed with %zi at %zu "
- "len=%zu (%s) map_size=%zu",
- r, (size_t)off, (size_t)len,
- strerror(errno),
- (size_t)ntdb->file->map_size);
- }
-#endif
- }
- return NTDB_SUCCESS;
-}
-
-enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
- const void *rec, size_t len)
-{
- enum NTDB_ERROR ecode;
-
- if (unlikely((ntdb->flags & NTDB_CONVERT))) {
- void *conv = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data);
- if (!conv) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_write: no memory converting"
- " %zu bytes", len);
- }
- memcpy(conv, rec, len);
- ecode = ntdb->io->twrite(ntdb, off,
- ntdb_convert(ntdb, conv, len), len);
- ntdb->free_fn(conv, ntdb->alloc_data);
- } else {
- ecode = ntdb->io->twrite(ntdb, off, rec, len);
- }
- return ecode;
-}
-
-enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
- void *rec, size_t len)
-{
- enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len);
- ntdb_convert(ntdb, rec, len);
- return ecode;
-}
-
-static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset,
- ntdb_len_t len, unsigned int prefix)
-{
- unsigned char *buf;
- enum NTDB_ERROR ecode;
-
- /* some systems don't like zero length malloc */
- buf = ntdb->alloc_fn(ntdb, prefix + len ? prefix + len : 1,
- ntdb->alloc_data);
- if (!buf) {
- ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_alloc_read alloc failed len=%zu",
- (size_t)(prefix + len));
- return NTDB_ERR_PTR(NTDB_ERR_OOM);
- } else {
- ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len);
- if (unlikely(ecode != NTDB_SUCCESS)) {
- ntdb->free_fn(buf, ntdb->alloc_data);
- return NTDB_ERR_PTR(ecode);
- }
- }
- return buf;
-}
-
-/* read a lump of data, allocating the space for it */
-void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len)
-{
- return _ntdb_alloc_read(ntdb, offset, len, 0);
-}
-
-static enum NTDB_ERROR fill(struct ntdb_context *ntdb,
- const void *buf, size_t size,
- ntdb_off_t off, ntdb_len_t len)
-{
- while (len) {
- size_t n = len > size ? size : len;
- ssize_t ret = pwrite(ntdb->file->fd, buf, n, off);
- if (ret != n) {
- if (ret >= 0)
- errno = ENOSPC;
-
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "fill failed:"
- " %zi at %zu len=%zu (%s)",
- ret, (size_t)off, (size_t)len,
- strerror(errno));
- }
- len -= n;
- off += n;
- }
- return NTDB_SUCCESS;
-}
-
-/* expand a file. we prefer to use ftruncate, as that is what posix
- says to use for mmap expansion */
-static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb,
- ntdb_len_t addition)
-{
- char buf[8192];
- enum NTDB_ERROR ecode;
-
- assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0);
- if (ntdb->flags & NTDB_RDONLY) {
- return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
- "Expand on read-only database");
- }
-
- if (ntdb->flags & NTDB_INTERNAL) {
- char *new;
-
- /* Can't free it if we have direct accesses. */
- if (ntdb->file->direct_count) {
- ecode = save_old_map(ntdb);
- if (ecode) {
- return ecode;
- }
- new = ntdb->alloc_fn(ntdb->file,
- ntdb->file->map_size + addition,
- ntdb->alloc_data);
- if (new) {
- memcpy(new, ntdb->file->map_ptr,
- ntdb->file->map_size);
- }
- } else {
- new = ntdb->expand_fn(ntdb->file->map_ptr,
- ntdb->file->map_size + addition,
- ntdb->alloc_data);
- }
- if (!new) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "No memory to expand database");
- }
- ntdb->file->map_ptr = new;
- ntdb->file->map_size += addition;
- return NTDB_SUCCESS;
- } else {
- /* Unmap before trying to write; old NTDB claimed OpenBSD had
- * problem with this otherwise. */
- ecode = ntdb_munmap(ntdb);
- if (ecode) {
- return ecode;
- }
-
- /* If this fails, we try to fill anyway. */
- if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition))
- ;
-
- /* now fill the file with something. This ensures that the
- file isn't sparse, which would be very bad if we ran out of
- disk. This must be done with write, not via mmap */
- memset(buf, 0x43, sizeof(buf));
- ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size,
- addition);
- if (ecode != NTDB_SUCCESS)
- return ecode;
- ntdb->file->map_size += addition;
- return ntdb_mmap(ntdb);
- }
-}
-
-const void *ntdb_access_read(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len, bool convert)
-{
- void *ret = NULL;
-
- if (likely(!(ntdb->flags & NTDB_CONVERT))) {
- ret = ntdb->io->direct(ntdb, off, len, false);
-
- if (NTDB_PTR_IS_ERR(ret)) {
- return ret;
- }
- }
- if (!ret) {
- struct ntdb_access_hdr *hdr;
- hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
- if (NTDB_PTR_IS_ERR(hdr)) {
- return hdr;
- }
- hdr->next = ntdb->access;
- ntdb->access = hdr;
- ret = hdr + 1;
- if (convert) {
- ntdb_convert(ntdb, (void *)ret, len);
- }
- } else {
- ntdb->file->direct_count++;
- }
-
- return ret;
-}
-
-void *ntdb_access_write(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len, bool convert)
-{
- void *ret = NULL;
-
- if (ntdb->flags & NTDB_RDONLY) {
- ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
- "Write to read-only database");
- return NTDB_ERR_PTR(NTDB_ERR_RDONLY);
- }
-
- if (likely(!(ntdb->flags & NTDB_CONVERT))) {
- ret = ntdb->io->direct(ntdb, off, len, true);
-
- if (NTDB_PTR_IS_ERR(ret)) {
- return ret;
- }
- }
-
- if (!ret) {
- struct ntdb_access_hdr *hdr;
- hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
- if (NTDB_PTR_IS_ERR(hdr)) {
- return hdr;
- }
- hdr->next = ntdb->access;
- ntdb->access = hdr;
- hdr->off = off;
- hdr->len = len;
- hdr->convert = convert;
- ret = hdr + 1;
- if (convert)
- ntdb_convert(ntdb, (void *)ret, len);
- } else {
- ntdb->file->direct_count++;
- }
- return ret;
-}
-
-static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p)
-{
- struct ntdb_access_hdr **hp;
-
- for (hp = &ntdb->access; *hp; hp = &(*hp)->next) {
- if (*hp + 1 == p)
- return hp;
- }
- return NULL;
-}
-
-void ntdb_access_release(struct ntdb_context *ntdb, const void *p)
-{
- struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
-
- if (hp) {
- hdr = *hp;
- *hp = hdr->next;
- ntdb->free_fn(hdr, ntdb->alloc_data);
- } else {
- if (--ntdb->file->direct_count == 0) {
- free_old_mmaps(ntdb);
- }
- }
-}
-
-enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p)
-{
- struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
- enum NTDB_ERROR ecode;
-
- if (hp) {
- hdr = *hp;
- if (hdr->convert)
- ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len);
- else
- ecode = ntdb_write(ntdb, hdr->off, p, hdr->len);
- *hp = hdr->next;
- ntdb->free_fn(hdr, ntdb->alloc_data);
- } else {
- if (--ntdb->file->direct_count == 0) {
- free_old_mmaps(ntdb);
- }
- ecode = NTDB_SUCCESS;
- }
-
- return ecode;
-}
-
-static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len,
- bool write_mode)
-{
- enum NTDB_ERROR ecode;
-
- if (unlikely(!ntdb->file->map_ptr))
- return NULL;
-
- ecode = ntdb_oob(ntdb, off, len, false);
- if (unlikely(ecode != NTDB_SUCCESS))
- return NTDB_ERR_PTR(ecode);
- return (char *)ntdb->file->map_ptr + off;
-}
-
-static ntdb_off_t ntdb_read_normal_off(struct ntdb_context *ntdb,
- ntdb_off_t off)
-{
- ntdb_off_t ret;
- enum NTDB_ERROR ecode;
- ntdb_off_t *p;
-
- p = ntdb_direct(ntdb, off, sizeof(*p), false);
- if (NTDB_PTR_IS_ERR(p)) {
- return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p));
- }
- if (likely(p)) {
- return *p;
- }
-
- ecode = ntdb_read(ntdb, off, &ret, sizeof(ret));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- return ret;
-}
-
-static ntdb_off_t ntdb_read_convert_off(struct ntdb_context *ntdb,
- ntdb_off_t off)
-{
- ntdb_off_t ret;
- enum NTDB_ERROR ecode;
-
- ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- return ret;
-}
-
-static enum NTDB_ERROR ntdb_write_normal_off(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_off_t val)
-{
- ntdb_off_t *p;
-
- p = ntdb_direct(ntdb, off, sizeof(*p), true);
- if (NTDB_PTR_IS_ERR(p)) {
- return NTDB_PTR_ERR(p);
- }
- if (likely(p)) {
- *p = val;
- return NTDB_SUCCESS;
- }
- return ntdb_write(ntdb, off, &val, sizeof(val));
-}
-
-static enum NTDB_ERROR ntdb_write_convert_off(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_off_t val)
-{
- return ntdb_write_convert(ntdb, off, &val, sizeof(val));
-}
-
-void ntdb_inc_seqnum(struct ntdb_context *ntdb)
-{
- ntdb_off_t seq;
-
- if (likely(!(ntdb->flags & NTDB_CONVERT))) {
- int64_t *direct;
-
- direct = ntdb->io->direct(ntdb,
- offsetof(struct ntdb_header, seqnum),
- sizeof(*direct), true);
- if (likely(direct)) {
- /* Don't let it go negative, even briefly */
- if (unlikely((*direct) + 1) < 0)
- *direct = 0;
- (*direct)++;
- return;
- }
- }
-
- seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
- if (!NTDB_OFF_IS_ERR(seq)) {
- seq++;
- if (unlikely((int64_t)seq < 0))
- seq = 0;
- ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq);
- }
-}
-
-static const struct ntdb_methods io_methods = {
- ntdb_read,
- ntdb_write,
- ntdb_normal_oob,
- ntdb_expand_file,
- ntdb_direct,
- ntdb_read_normal_off,
- ntdb_write_normal_off,
-};
-
-static const struct ntdb_methods io_convert_methods = {
- ntdb_read,
- ntdb_write,
- ntdb_normal_oob,
- ntdb_expand_file,
- ntdb_direct,
- ntdb_read_convert_off,
- ntdb_write_convert_off,
-};
-
-/*
- initialise the default methods table
-*/
-void ntdb_io_init(struct ntdb_context *ntdb)
-{
- if (ntdb->flags & NTDB_CONVERT)
- ntdb->io = &io_convert_methods;
- else
- ntdb->io = &io_methods;
-}
+++ /dev/null
- /*
- Unix SMB/CIFS implementation.
-
- trivial database library
-
- Copyright (C) Andrew Tridgell 1999-2005
- Copyright (C) Paul `Rusty' Russell 2000
- Copyright (C) Jeremy Allison 2000-2003
-
- ** NOTE! The following LGPL license applies to the ntdb
- ** library. This does NOT imply that all of Samba is released
- ** under the LGPL
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-#include <ccan/build_assert/build_assert.h>
-
-/* If we were threaded, we could wait for unlock, but we're not, so fail. */
-enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call)
-{
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "%s: lock owned by another ntdb in this process.",
- call);
-}
-
-/* If we fork, we no longer really own locks. */
-bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log)
-{
- /* No locks? No problem! */
- if (ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0) {
- return true;
- }
-
- /* No fork? No problem! */
- if (ntdb->file->locker == getpid()) {
- return true;
- }
-
- if (log) {
- ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "%s: fork() detected after lock acquisition!"
- " (%u vs %u)", call,
- (unsigned int)ntdb->file->locker,
- (unsigned int)getpid());
- }
- return false;
-}
-
-int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
- void *unused)
-{
- struct flock fl;
- int ret;
-
- do {
- fl.l_type = rw;
- fl.l_whence = SEEK_SET;
- fl.l_start = off;
- fl.l_len = len;
-
- if (waitflag)
- ret = fcntl(fd, F_SETLKW, &fl);
- else
- ret = fcntl(fd, F_SETLK, &fl);
- } while (ret != 0 && errno == EINTR);
- return ret;
-}
-
-int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
-{
- struct flock fl;
- int ret;
-
- do {
- fl.l_type = F_UNLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = off;
- fl.l_len = len;
-
- ret = fcntl(fd, F_SETLKW, &fl);
- } while (ret != 0 && errno == EINTR);
- return ret;
-}
-
-static int lock(struct ntdb_context *ntdb,
- int rw, off_t off, off_t len, bool waitflag)
-{
- int ret;
- if (ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0) {
- ntdb->file->locker = getpid();
- }
-
- ntdb->stats.lock_lowlevel++;
- ret = ntdb->lock_fn(ntdb->file->fd, rw, off, len, waitflag,
- ntdb->lock_data);
- if (!waitflag) {
- ntdb->stats.lock_nonblock++;
- if (ret != 0)
- ntdb->stats.lock_nonblock_fail++;
- }
- return ret;
-}
-
-static int unlock(struct ntdb_context *ntdb, int rw, off_t off, off_t len)
-{
-#if 0 /* Check they matched up locks and unlocks correctly. */
- char line[80];
- FILE *locks;
- bool found = false;
-
- locks = fopen("/proc/locks", "r");
-
- while (fgets(line, 80, locks)) {
- char *p;
- int type, start, l;
-
- /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
- p = strchr(line, ':') + 1;
- if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
- continue;
- p += strlen(" FLOCK ADVISORY ");
- if (strncmp(p, "READ ", strlen("READ ")) == 0)
- type = F_RDLCK;
- else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
- type = F_WRLCK;
- else
- abort();
- p += 6;
- if (atoi(p) != getpid())
- continue;
- p = strchr(strchr(p, ' ') + 1, ' ') + 1;
- start = atoi(p);
- p = strchr(p, ' ') + 1;
- if (strncmp(p, "EOF", 3) == 0)
- l = 0;
- else
- l = atoi(p) - start + 1;
-
- if (off == start) {
- if (len != l) {
- fprintf(stderr, "Len %u should be %u: %s",
- (int)len, l, line);
- abort();
- }
- if (type != rw) {
- fprintf(stderr, "Type %s wrong: %s",
- rw == F_RDLCK ? "READ" : "WRITE", line);
- abort();
- }
- found = true;
- break;
- }
- }
-
- if (!found) {
- fprintf(stderr, "Unlock on %u@%u not found!",
- (int)off, (int)len);
- abort();
- }
-
- fclose(locks);
-#endif
-
- return ntdb->unlock_fn(ntdb->file->fd, rw, off, len, ntdb->lock_data);
-}
-
-/* a byte range locking function - return 0 on success
- this functions locks len bytes at the specified offset.
-
- note that a len of zero means lock to end of file
-*/
-static enum NTDB_ERROR ntdb_brlock(struct ntdb_context *ntdb,
- int rw_type, ntdb_off_t offset, ntdb_off_t len,
- enum ntdb_lock_flags flags)
-{
- int ret;
-
- if (rw_type == F_WRLCK && (ntdb->flags & NTDB_RDONLY)) {
- return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
- "Write lock attempted on read-only database");
- }
-
- if (ntdb->flags & NTDB_NOLOCK) {
- return NTDB_SUCCESS;
- }
-
- /* A 32 bit system cannot open a 64-bit file, but it could have
- * expanded since then: check here. */
- if ((size_t)(offset + len) != offset + len) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_brlock: lock on giant offset %llu",
- (long long)(offset + len));
- }
-
- ret = lock(ntdb, rw_type, offset, len, flags & NTDB_LOCK_WAIT);
- if (ret != 0) {
- /* Generic lock error. errno set by fcntl.
- * EAGAIN is an expected return from non-blocking
- * locks. */
- if (!(flags & NTDB_LOCK_PROBE)
- && (errno != EAGAIN && errno != EINTR)) {
- ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_brlock failed (fd=%d) at"
- " offset %zu rw_type=%d flags=%d len=%zu:"
- " %s",
- ntdb->file->fd, (size_t)offset, rw_type,
- flags, (size_t)len, strerror(errno));
- }
- return NTDB_ERR_LOCK;
- }
- return NTDB_SUCCESS;
-}
-
-static enum NTDB_ERROR ntdb_brunlock(struct ntdb_context *ntdb,
- int rw_type, ntdb_off_t offset, size_t len)
-{
- if (ntdb->flags & NTDB_NOLOCK) {
- return NTDB_SUCCESS;
- }
-
- if (!check_lock_pid(ntdb, "ntdb_brunlock", false))
- return NTDB_ERR_LOCK;
-
- if (unlock(ntdb, rw_type, offset, len) == -1) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_brunlock failed (fd=%d) at offset %zu"
- " rw_type=%d len=%zu: %s",
- ntdb->file->fd, (size_t)offset, rw_type,
- (size_t)len, strerror(errno));
- }
- return NTDB_SUCCESS;
-}
-
-/*
- upgrade a read lock to a write lock. This needs to be handled in a
- special way as some OSes (such as solaris) have too conservative
- deadlock detection and claim a deadlock when progress can be
- made. For those OSes we may loop for a while.
-*/
-enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start)
-{
- int count = 1000;
-
- if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
- return NTDB_ERR_LOCK;
-
- if (ntdb->file->allrecord_lock.count != 1) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_allrecord_upgrade failed:"
- " count %u too high",
- ntdb->file->allrecord_lock.count);
- }
-
- if (ntdb->file->allrecord_lock.off != 1) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_allrecord_upgrade failed:"
- " already upgraded?");
- }
-
- if (ntdb->file->allrecord_lock.owner != ntdb) {
- return owner_conflict(ntdb, "ntdb_allrecord_upgrade");
- }
-
- while (count--) {
- struct timeval tv;
- if (ntdb_brlock(ntdb, F_WRLCK, start, 0,
- NTDB_LOCK_WAIT|NTDB_LOCK_PROBE) == NTDB_SUCCESS) {
- ntdb->file->allrecord_lock.ltype = F_WRLCK;
- ntdb->file->allrecord_lock.off = 0;
- return NTDB_SUCCESS;
- }
- if (errno != EDEADLK) {
- break;
- }
- /* sleep for as short a time as we can - more portable than usleep() */
- tv.tv_sec = 0;
- tv.tv_usec = 1;
- select(0, NULL, NULL, NULL, &tv);
- }
-
- if (errno != EAGAIN && errno != EINTR)
- ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_allrecord_upgrade failed");
- return NTDB_ERR_LOCK;
-}
-
-static struct ntdb_lock *find_nestlock(struct ntdb_context *ntdb, ntdb_off_t offset,
- const struct ntdb_context *owner)
-{
- unsigned int i;
-
- for (i=0; i<ntdb->file->num_lockrecs; i++) {
- if (ntdb->file->lockrecs[i].off == offset) {
- if (owner && ntdb->file->lockrecs[i].owner != owner)
- return NULL;
- return &ntdb->file->lockrecs[i];
- }
- }
- return NULL;
-}
-
-enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb)
-{
- enum NTDB_ERROR ecode;
-
- if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
- return NTDB_ERR_LOCK;
-
- ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK,
- false);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
- if (ecode != NTDB_SUCCESS) {
- ntdb_allrecord_unlock(ntdb, F_WRLCK);
- return ecode;
- }
- ecode = ntdb_transaction_recover(ntdb);
- ntdb_unlock_open(ntdb, F_WRLCK);
- ntdb_allrecord_unlock(ntdb, F_WRLCK);
-
- return ecode;
-}
-
-/* lock an offset in the database. */
-static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb,
- ntdb_off_t offset, int ltype,
- enum ntdb_lock_flags flags)
-{
- struct ntdb_lock *new_lck;
- enum NTDB_ERROR ecode;
-
- assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
- + ntdb->file->map_size / 8));
-
- if (ntdb->flags & NTDB_NOLOCK)
- return NTDB_SUCCESS;
-
- if (!check_lock_pid(ntdb, "ntdb_nest_lock", true)) {
- return NTDB_ERR_LOCK;
- }
-
- ntdb->stats.locks++;
-
- new_lck = find_nestlock(ntdb, offset, NULL);
- if (new_lck) {
- if (new_lck->owner != ntdb) {
- return owner_conflict(ntdb, "ntdb_nest_lock");
- }
-
- if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_nest_lock:"
- " offset %zu has read lock",
- (size_t)offset);
- }
- /* Just increment the struct, posix locks don't stack. */
- new_lck->count++;
- return NTDB_SUCCESS;
- }
-
-#if 0
- if (ntdb->file->num_lockrecs
- && offset >= NTDB_HASH_LOCK_START
- && offset < NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_nest_lock: already have a hash lock?");
- }
-#endif
- if (ntdb->file->lockrecs == NULL) {
- new_lck = ntdb->alloc_fn(ntdb->file, sizeof(*ntdb->file->lockrecs),
- ntdb->alloc_data);
- } else {
- new_lck = (struct ntdb_lock *)ntdb->expand_fn(
- ntdb->file->lockrecs,
- sizeof(*ntdb->file->lockrecs)
- * (ntdb->file->num_lockrecs+1),
- ntdb->alloc_data);
- }
- if (new_lck == NULL) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_nest_lock:"
- " unable to allocate %zu lock struct",
- ntdb->file->num_lockrecs + 1);
- }
- ntdb->file->lockrecs = new_lck;
-
- /* Since fcntl locks don't nest, we do a lock for the first one,
- and simply bump the count for future ones */
- ecode = ntdb_brlock(ntdb, ltype, offset, 1, flags);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* First time we grab a lock, perhaps someone died in commit? */
- if (!(flags & NTDB_LOCK_NOCHECK)
- && ntdb->file->num_lockrecs == 0) {
- ntdb_bool_err berr = ntdb_needs_recovery(ntdb);
- if (berr != false) {
- ntdb_brunlock(ntdb, ltype, offset, 1);
-
- if (berr < 0)
- return NTDB_OFF_TO_ERR(berr);
- ecode = ntdb_lock_and_recover(ntdb);
- if (ecode == NTDB_SUCCESS) {
- ecode = ntdb_brlock(ntdb, ltype, offset, 1,
- flags);
- }
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- }
- }
-
- ntdb->file->lockrecs[ntdb->file->num_lockrecs].owner = ntdb;
- ntdb->file->lockrecs[ntdb->file->num_lockrecs].off = offset;
- ntdb->file->lockrecs[ntdb->file->num_lockrecs].count = 1;
- ntdb->file->lockrecs[ntdb->file->num_lockrecs].ltype = ltype;
- ntdb->file->num_lockrecs++;
-
- return NTDB_SUCCESS;
-}
-
-static enum NTDB_ERROR ntdb_nest_unlock(struct ntdb_context *ntdb,
- ntdb_off_t off, int ltype)
-{
- struct ntdb_lock *lck;
- enum NTDB_ERROR ecode;
-
- if (ntdb->flags & NTDB_NOLOCK)
- return NTDB_SUCCESS;
-
- lck = find_nestlock(ntdb, off, ntdb);
- if ((lck == NULL) || (lck->count == 0)) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_nest_unlock: no lock for %zu",
- (size_t)off);
- }
-
- if (lck->count > 1) {
- lck->count--;
- return NTDB_SUCCESS;
- }
-
- /*
- * This lock has count==1 left, so we need to unlock it in the
- * kernel. We don't bother with decrementing the in-memory array
- * element, we're about to overwrite it with the last array element
- * anyway.
- */
- ecode = ntdb_brunlock(ntdb, ltype, off, 1);
-
- /*
- * Shrink the array by overwriting the element just unlocked with the
- * last array element.
- */
- *lck = ntdb->file->lockrecs[--ntdb->file->num_lockrecs];
-
- return ecode;
-}
-
-/*
- get the transaction lock
- */
-enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype)
-{
- return ntdb_nest_lock(ntdb, NTDB_TRANSACTION_LOCK, ltype, NTDB_LOCK_WAIT);
-}
-
-/*
- release the transaction lock
- */
-void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype)
-{
- ntdb_nest_unlock(ntdb, NTDB_TRANSACTION_LOCK, ltype);
-}
-
-/* We only need to lock individual bytes, but Linux merges consecutive locks
- * so we lock in contiguous ranges. */
-static enum NTDB_ERROR ntdb_lock_gradual(struct ntdb_context *ntdb,
- int ltype, enum ntdb_lock_flags flags,
- ntdb_off_t off, ntdb_off_t len)
-{
- enum NTDB_ERROR ecode;
- enum ntdb_lock_flags nb_flags = (flags & ~NTDB_LOCK_WAIT);
-
- if (len <= 1) {
- /* 0 would mean to end-of-file... */
- assert(len != 0);
- /* Single hash. Just do blocking lock. */
- return ntdb_brlock(ntdb, ltype, off, len, flags);
- }
-
- /* First we try non-blocking. */
- ecode = ntdb_brlock(ntdb, ltype, off, len, nb_flags);
- if (ecode != NTDB_ERR_LOCK) {
- return ecode;
- }
-
- /* Try locking first half, then second. */
- ecode = ntdb_lock_gradual(ntdb, ltype, flags, off, len / 2);
- if (ecode != NTDB_SUCCESS)
- return ecode;
-
- ecode = ntdb_lock_gradual(ntdb, ltype, flags,
- off + len / 2, len - len / 2);
- if (ecode != NTDB_SUCCESS) {
- ntdb_brunlock(ntdb, ltype, off, len / 2);
- }
- return ecode;
-}
-
-/* lock/unlock entire database. It can only be upgradable if you have some
- * other way of guaranteeing exclusivity (ie. transaction write lock). */
-enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
- enum ntdb_lock_flags flags, bool upgradable)
-{
- enum NTDB_ERROR ecode;
- ntdb_bool_err berr;
-
- if (ntdb->flags & NTDB_NOLOCK) {
- return NTDB_SUCCESS;
- }
-
- if (!check_lock_pid(ntdb, "ntdb_allrecord_lock", true)) {
- return NTDB_ERR_LOCK;
- }
-
- if (ntdb->file->allrecord_lock.count) {
- if (ntdb->file->allrecord_lock.owner != ntdb) {
- return owner_conflict(ntdb, "ntdb_allrecord_lock");
- }
-
- if (ltype == F_RDLCK
- || ntdb->file->allrecord_lock.ltype == F_WRLCK) {
- ntdb->file->allrecord_lock.count++;
- return NTDB_SUCCESS;
- }
-
- /* a global lock of a different type exists */
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "ntdb_allrecord_lock: already have %s lock",
- ntdb->file->allrecord_lock.ltype == F_RDLCK
- ? "read" : "write");
- }
-
- if (ntdb_has_hash_locks(ntdb)) {
- /* can't combine global and chain locks */
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "ntdb_allrecord_lock:"
- " already have chain lock");
- }
-
- if (upgradable && ltype != F_RDLCK) {
- /* ntdb error: you can't upgrade a write lock! */
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_allrecord_lock:"
- " can't upgrade a write lock");
- }
-
- ntdb->stats.locks++;
-again:
- /* Lock hashes, gradually. */
- ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START,
- 1 << ntdb->hash_bits);
- if (ecode != NTDB_SUCCESS)
- return ecode;
-
- /* Lock free tables: there to end of file. */
- ecode = ntdb_brlock(ntdb, ltype,
- NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits),
- 0, flags);
- if (ecode != NTDB_SUCCESS) {
- ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START,
- 1 << ntdb->hash_bits);
- return ecode;
- }
-
- ntdb->file->allrecord_lock.owner = ntdb;
- ntdb->file->allrecord_lock.count = 1;
- /* If it's upgradable, it's actually exclusive so we can treat
- * it as a write lock. */
- ntdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
- ntdb->file->allrecord_lock.off = upgradable;
-
- /* Now check for needing recovery. */
- if (flags & NTDB_LOCK_NOCHECK)
- return NTDB_SUCCESS;
-
- berr = ntdb_needs_recovery(ntdb);
- if (likely(berr == false))
- return NTDB_SUCCESS;
-
- ntdb_allrecord_unlock(ntdb, ltype);
- if (berr < 0)
- return NTDB_OFF_TO_ERR(berr);
- ecode = ntdb_lock_and_recover(ntdb);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- goto again;
-}
-
-enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
- int ltype, enum ntdb_lock_flags flags)
-{
- return ntdb_nest_lock(ntdb, NTDB_OPEN_LOCK, ltype, flags);
-}
-
-void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype)
-{
- ntdb_nest_unlock(ntdb, NTDB_OPEN_LOCK, ltype);
-}
-
-bool ntdb_has_open_lock(struct ntdb_context *ntdb)
-{
- return !(ntdb->flags & NTDB_NOLOCK)
- && find_nestlock(ntdb, NTDB_OPEN_LOCK, ntdb) != NULL;
-}
-
-enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype)
-{
- /* Lock doesn't protect data, so don't check (we recurse if we do!) */
- return ntdb_nest_lock(ntdb, NTDB_EXPANSION_LOCK, ltype,
- NTDB_LOCK_WAIT | NTDB_LOCK_NOCHECK);
-}
-
-void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype)
-{
- ntdb_nest_unlock(ntdb, NTDB_EXPANSION_LOCK, ltype);
-}
-
-/* unlock entire db */
-void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype)
-{
- if (ntdb->flags & NTDB_NOLOCK)
- return;
-
- if (ntdb->file->allrecord_lock.count == 0) {
- ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "ntdb_allrecord_unlock: not locked!");
- return;
- }
-
- if (ntdb->file->allrecord_lock.owner != ntdb) {
- ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "ntdb_allrecord_unlock: not locked by us!");
- return;
- }
-
- /* Upgradable locks are marked as write locks. */
- if (ntdb->file->allrecord_lock.ltype != ltype
- && (!ntdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
- ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_allrecord_unlock: have %s lock",
- ntdb->file->allrecord_lock.ltype == F_RDLCK
- ? "read" : "write");
- return;
- }
-
- if (ntdb->file->allrecord_lock.count > 1) {
- ntdb->file->allrecord_lock.count--;
- return;
- }
-
- ntdb->file->allrecord_lock.count = 0;
- ntdb->file->allrecord_lock.ltype = 0;
-
- ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, 0);
-}
-
-bool ntdb_has_expansion_lock(struct ntdb_context *ntdb)
-{
- return find_nestlock(ntdb, NTDB_EXPANSION_LOCK, ntdb) != NULL;
-}
-
-bool ntdb_has_hash_locks(struct ntdb_context *ntdb)
-{
- unsigned int i;
-
- for (i=0; i<ntdb->file->num_lockrecs; i++) {
- if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START
- && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START
- + (1 << ntdb->hash_bits)))
- return true;
- }
- return false;
-}
-
-static bool ntdb_has_free_lock(struct ntdb_context *ntdb)
-{
- unsigned int i;
-
- if (ntdb->flags & NTDB_NOLOCK)
- return false;
-
- for (i=0; i<ntdb->file->num_lockrecs; i++) {
- if (ntdb->file->lockrecs[i].off
- > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits))
- return true;
- }
- return false;
-}
-
-enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
- unsigned int h,
- int ltype)
-{
- unsigned l = NTDB_HASH_LOCK_START + h;
-
- assert(h < (1 << ntdb->hash_bits));
-
- /* a allrecord lock allows us to avoid per chain locks */
- if (ntdb->file->allrecord_lock.count) {
- if (!check_lock_pid(ntdb, "ntdb_lock_hashes", true))
- return NTDB_ERR_LOCK;
-
- if (ntdb->file->allrecord_lock.owner != ntdb)
- return owner_conflict(ntdb, "ntdb_lock_hashes");
- if (ltype == ntdb->file->allrecord_lock.ltype
- || ltype == F_RDLCK) {
- return NTDB_SUCCESS;
- }
-
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "ntdb_lock_hashes:"
- " already have %s allrecordlock",
- ntdb->file->allrecord_lock.ltype == F_RDLCK
- ? "read" : "write");
- }
-
- if (ntdb_has_free_lock(ntdb)) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_lock_hashes: already have free lock");
- }
-
- if (ntdb_has_expansion_lock(ntdb)) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_lock_hashes:"
- " already have expansion lock");
- }
-
- return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT);
-}
-
-enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
- unsigned int h, int ltype)
-{
- unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1));
-
- if (ntdb->flags & NTDB_NOLOCK)
- return 0;
-
- /* a allrecord lock allows us to avoid per chain locks */
- if (ntdb->file->allrecord_lock.count) {
- if (ntdb->file->allrecord_lock.ltype == F_RDLCK
- && ltype == F_WRLCK) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_unlock_hashes RO allrecord!");
- }
- if (ntdb->file->allrecord_lock.owner != ntdb) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
- "ntdb_unlock_hashes:"
- " not locked by us!");
- }
- return NTDB_SUCCESS;
- }
-
- return ntdb_nest_unlock(ntdb, l, ltype);
-}
-
-/* Hash locks use NTDB_HASH_LOCK_START + <number of hash entries>..
- * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide.
- * The result is that on 32 bit systems we don't use lock values > 2^31 on
- * files that are less than 4GB.
- */
-static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb,
- ntdb_off_t b_off)
-{
- return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
- + b_off / sizeof(ntdb_off_t);
-}
-
-enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
- enum ntdb_lock_flags waitflag)
-{
- assert(b_off >= sizeof(struct ntdb_header));
-
- if (ntdb->flags & NTDB_NOLOCK)
- return 0;
-
- /* a allrecord lock allows us to avoid per chain locks */
- if (ntdb->file->allrecord_lock.count) {
- if (!check_lock_pid(ntdb, "ntdb_lock_free_bucket", true))
- return NTDB_ERR_LOCK;
-
- if (ntdb->file->allrecord_lock.owner != ntdb) {
- return owner_conflict(ntdb, "ntdb_lock_free_bucket");
- }
-
- if (ntdb->file->allrecord_lock.ltype == F_WRLCK)
- return 0;
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_lock_free_bucket with"
- " read-only allrecordlock!");
- }
-
-#if 0 /* FIXME */
- if (ntdb_has_expansion_lock(ntdb)) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_lock_free_bucket:"
- " already have expansion lock");
- }
-#endif
-
- return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK,
- waitflag);
-}
-
-void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off)
-{
- if (ntdb->file->allrecord_lock.count)
- return;
-
- ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK);
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb)
-{
- return ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
-}
-
-_PUBLIC_ void ntdb_unlockall(struct ntdb_context *ntdb)
-{
- ntdb_allrecord_unlock(ntdb, F_WRLCK);
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb)
-{
- return ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
-}
-
-_PUBLIC_ void ntdb_unlockall_read(struct ntdb_context *ntdb)
-{
- ntdb_allrecord_unlock(ntdb, F_RDLCK);
-}
-
-void ntdb_lock_cleanup(struct ntdb_context *ntdb)
-{
- unsigned int i;
-
- /* We don't want to warn: they're allowed to close ntdb after fork. */
- if (!check_lock_pid(ntdb, "ntdb_close", false))
- return;
-
- while (ntdb->file->allrecord_lock.count
- && ntdb->file->allrecord_lock.owner == ntdb) {
- ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
- }
-
- for (i=0; i<ntdb->file->num_lockrecs; i++) {
- if (ntdb->file->lockrecs[i].owner == ntdb) {
- ntdb_nest_unlock(ntdb,
- ntdb->file->lockrecs[i].off,
- ntdb->file->lockrecs[i].ltype);
- i--;
- }
- }
-}
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
-<refentry>
- <refmeta>
- <refentrytitle>ntdb</refentrytitle>
- <manvolnum>3</manvolnum>
- <refmiscinfo class="source">Samba</refmiscinfo>
- <refmiscinfo class="manual">System Administration tools</refmiscinfo>
- <refmiscinfo class="version">4.1</refmiscinfo>
- </refmeta>
- <refnamediv>
- <refname>ntdb</refname>
-<refpurpose>A not-so trivial keyword/data database system</refpurpose>
- </refnamediv>
- <refsynopsisdiv>
-<synopsis>#include <ntdb.h></synopsis>
- </refsynopsisdiv>
- <refsect1><title>DESCRIPTION</title>
- <para>
- If you have previously used the tdb library from Samba, much of
- this will seem familiar, but there are some API changes which a
- compiler will warn you about if you simply replace 'tdb' with
- 'ntdb' in your code! The on-disk format for ntdb is
- incompatible with tdb.
- </para>
- <para>
- tdb's API was based on gdbm, and ntdb continues this tradition,
- with enhancements. A differences guide is available in the text
- file <filename>lib/ntdb/doc/TDB_porting.txt</filename> in the
- SAMBA source tree.
- </para>
- </refsect1>
- <refsect1><title>NTDB API OVERVIEW</title>
- <para>
- The complete API is documented in the ntdb.h header, which is
- kept up-to-date and recommended reading.
- </para>
- <para>
- Normal usage is to call ntdb_open() to create or open an ntdb
- file. ntdb_store() is used to add records, ntdb_fetch() is used
- to fetch them. Traversals are supported via callback
- (ntdb_traverse()) or iteration (ntdb_firstkey() and
- ntdb_nextkey()). Transactions are supported for batching
- updates or reads atomically, using ntdb_transaction_start() and
- ntdb_transaction_commit().
- </para>
- <refsect2><title>Use With Talloc</title>
- <para>
- ntdb_open() takes an optional linked list of attributes:
- in particular you can specify an alternate allocator (such as
- talloc):
- </para>
- <programlisting>
-#include <talloc.h>
-#include <ntdb.h>
-
-static void *my_alloc(const void *owner, size_t len, void *priv)
-{
- return talloc_size(owner, len);
-}
-
-static void *my_expand(void *old, size_t newlen, void *priv)
-{
- return talloc_realloc_size(NULL, old, newlen);
-}
-
-static void my_free(void *old, void *priv)
-{
- talloc_free(old);
-}
-
-/* This opens an ntdb file as a talloc object with given parent. */
-struct ntdb_context *ntdb_open_talloc(const void *parent,
- const char *filename)
-{
- struct ntdb_context *ntdb;
- union ntdb_attribute alloc;
-
- alloc.base.attr = NTDB_ATTRIBUTE_ALLOCATOR;
- alloc.base.next = NULL;
- alloc.alloc.alloc = my_alloc;
- alloc.alloc.expand = my_expand;
- alloc.alloc.free = my_free;
-
- ntdb = ntdb_open(filename, NTDB_DEFAULT, O_RDWR|O_CREAT, 0600,
- &alloc);
- if (ntdb) {
- talloc_steal(parent, ntdb);
- talloc_set_name(ntdb, "%s", filename);
- }
- return ntdb;
-}
-</programlisting>
- </refsect2>
- </refsect1>
- <refsect1><title>SEE ALSO</title>
- <para>
- <ulink url="http://tdb.samba.org/"/>
- </para>
- </refsect1>
-
- <refsect1><title>AUTHOR</title>
- <para> The original tdb software was created by Andrew Tridgell, and
- is now developed by the
- Samba Team as an Open Source project similar to the way the
- Linux kernel is developed. ntdb was derived from tdb, but mostly
- rewritten by Rusty Russell.
- </para>
- </refsect1>
-
- <refsect1><title>COPYRIGHT/LICENSE</title>
- <para>
- Copyright (C) Rusty Russell 2013, IBM Corporation
- </para>
- <para>
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as
- published by the Free Software Foundation; either version 3 of the
- License, or (at your option) any later version.
- </para>
- <para>
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- </para>
- <para>
- You should have received a copy of the GNU General Public License
- along with this program; if not, see http://www.gnu.org/licenses/.
- </para>
- </refsect1>
-</refentry>
+++ /dev/null
-<?xml version="1.0" encoding="iso-8859-1"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
-<refentry id="ntdbbackup.8">
-
-<refmeta>
- <refentrytitle>ntdbbackup</refentrytitle>
- <manvolnum>8</manvolnum>
- <refmiscinfo class="source">Samba</refmiscinfo>
- <refmiscinfo class="manual">System Administration tools</refmiscinfo>
- <refmiscinfo class="version">4.1</refmiscinfo>
-</refmeta>
-
-
-<refnamediv>
- <refname>ntdbbackup</refname>
- <refpurpose>tool for backing up and for validating the integrity of samba .ntdb files</refpurpose>
-</refnamediv>
-
-<refsynopsisdiv>
- <cmdsynopsis>
- <command>ntdbbackup</command>
- <arg choice="opt">-s suffix</arg>
- <arg choice="opt">-v</arg>
- <arg choice="opt">-h</arg>
- </cmdsynopsis>
-</refsynopsisdiv>
-
-<refsect1>
- <title>DESCRIPTION</title>
-
- <para>This tool is part of the <citerefentry><refentrytitle>samba</refentrytitle>
- <manvolnum>1</manvolnum></citerefentry> suite.</para>
-
- <para><command>ntdbbackup</command> is a tool that may be used to backup samba .ntdb
- files. This tool may also be used to verify the integrity of the .ntdb files prior
- to samba startup or during normal operation. If it finds file damage and it finds
- a prior backup the backup file will be restored.
- </para>
-</refsect1>
-
-
-<refsect1>
- <title>OPTIONS</title>
-
- <variablelist>
-
- <varlistentry>
- <term>-h</term>
- <listitem><para>
- Get help information.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>-s suffix</term>
- <listitem><para>
- The <command>-s</command> option allows the administrator to specify a file
- backup extension. This way it is possible to keep a history of ntdb backup
- files by using a new suffix for each backup.
- </para> </listitem>
- </varlistentry>
-
- <varlistentry>
- <term>-v</term>
- <listitem><para>
- The <command>-v</command> will check the database for damages (corrupt data)
- which if detected causes the backup to be restored.
- </para></listitem>
- </varlistentry>
-
- </variablelist>
-</refsect1>
-
-
-<refsect1>
- <title>COMMANDS</title>
-
- <para><emphasis>GENERAL INFORMATION</emphasis></para>
-
- <para>
- The <command>ntdbbackup</command> utility can safely be run at any time. It was designed so
- that it can be used at any time to validate the integrity of ntdb files, even during Samba
- operation. Typical usage for the command will be:
- </para>
-
- <para>ntdbbackup [-s suffix] *.ntdb</para>
-
- <para>
- Before restarting samba the following command may be run to validate .ntdb files:
- </para>
-
- <para>ntdbbackup -v [-s suffix] *.ntdb</para>
-
- <para>
- Note that Samba 4 can use .tdb files instead, so you should
- use <command>tdbbackup</command> on those files.
- </para>
-
- <para>
- Samba .tdb and .ntdb files are stored in various locations, be sure to run backup all
- .(n)tdb files on the system. Important files includes:
- </para>
-
- <itemizedlist>
- <listitem><para>
- <command>secrets.(n)tdb</command> - usual location is in the /usr/local/samba/private
- directory, or on some systems in /etc/samba.
- </para></listitem>
-
- <listitem><para>
- <command>passdb.(n)tdb</command> - usual location is in the /usr/local/samba/private
- directory, or on some systems in /etc/samba.
- </para></listitem>
-
- <listitem><para>
- <command>*.tdb</command> and <command>*.ntdb</command> located in the /usr/local/samba/var directory or on some
- systems in the /var/cache or /var/lib/samba directories.
- </para></listitem>
- </itemizedlist>
-
-</refsect1>
-
-<refsect1>
- <title>VERSION</title>
-
- <para>This man page is correct for version 4 of the Samba suite.</para>
-</refsect1>
-
-<refsect1>
- <title>SEE ALSO</title>
-
- <para>
- tdbbackup(8), ntdbrestore(8)
- </para>
-</refsect1>
-
-<refsect1>
- <title>AUTHOR</title>
-
- <para>
- The original Samba software and related utilities were created by Andrew Tridgell.
- Samba is now developed by the Samba Team as an Open Source project similar to the way
- the Linux kernel is developed.
- </para>
-
- <para>The ntdbbackup man page was written by Rusty Russell,
- based on the tdbbackup man page by John H Terpstra.</para>
-</refsect1>
-
-</refentry>
+++ /dev/null
-<?xml version="1.0" encoding="iso-8859-1"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
-<refentry id="ntdbdump.8">
-
-<refmeta>
- <refentrytitle>ntdbdump</refentrytitle>
- <manvolnum>8</manvolnum>
- <refmiscinfo class="source">Samba</refmiscinfo>
- <refmiscinfo class="manual">System Administration tools</refmiscinfo>
- <refmiscinfo class="version">4.1</refmiscinfo>
-</refmeta>
-
-
-<refnamediv>
- <refname>ntdbdump</refname>
- <refpurpose>tool for printing the contents of an NTDB file</refpurpose>
-</refnamediv>
-
-<refsynopsisdiv>
- <cmdsynopsis>
- <command>ntdbdump</command>
- <arg choice="opt">-k <replaceable>keyname</replaceable></arg>
- <arg choice="opt">-e</arg>
- <arg choice="opt">-h</arg>
- <arg choice="req">filename</arg>
- </cmdsynopsis>
-</refsynopsisdiv>
-
-<refsect1>
- <title>DESCRIPTION</title>
-
- <para>This tool is part of the <citerefentry><refentrytitle>samba</refentrytitle>
- <manvolnum>1</manvolnum></citerefentry> suite.</para>
-
- <para><command>ntdbdump</command> is a very simple utility that 'dumps' the
- contents of a NTDB (New Trivial DataBase) file to standard output in a
- human-readable format.
- </para>
-
- <para>This tool can be used when debugging problems with NTDB files. It is
- intended for those who are somewhat familiar with Samba internals.
- </para>
-</refsect1>
-
-<refsect1>
- <title>OPTIONS</title>
-
- <variablelist>
-
- <varlistentry>
- <term>-h</term>
- <listitem><para>
- Get help information.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>-k <replaceable>keyname</replaceable></term>
- <listitem><para>
- The <command>-k</command> option restricts dumping to a single key, if found.
- </para> </listitem>
- </varlistentry>
-
- </variablelist>
-</refsect1>
-
-<refsect1>
- <title>SEE ALSO</title>
-
- <para>
- tdbdump(8), ntdbtool(8)
- </para>
-</refsect1>
-
-<refsect1>
- <title>VERSION</title>
-
- <para>This man page is correct for version 4 of the Samba suite.</para>
-</refsect1>
-
-<refsect1>
- <title>AUTHOR</title>
-
- <para>
- The original Samba software and related utilities were created by Andrew Tridgell.
- Samba is now developed by the Samba Team as an Open Source project similar to the way
- the Linux kernel is developed.
- </para>
-
- <para>The ntdbdump man page was written by Rusty Russell, base on the tdbdump man page by Jelmer Vernooij.</para>
-</refsect1>
-
-</refentry>
+++ /dev/null
-<?xml version="1.0" encoding="iso-8859-1"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
-<refentry id="ntdbrestore.8">
-
-<refmeta>
- <refentrytitle>ntdbrestore</refentrytitle>
- <manvolnum>8</manvolnum>
- <refmiscinfo class="source">Samba</refmiscinfo>
- <refmiscinfo class="manual">System Administration tools</refmiscinfo>
- <refmiscinfo class="version">4.1</refmiscinfo>
-</refmeta>
-
-
-<refnamediv>
- <refname>ntdbrestore</refname>
- <refpurpose>tool for creating a NTDB file out of a ntdbdump output</refpurpose>
-</refnamediv>
-
-<refsynopsisdiv>
- <cmdsynopsis>
- <command>ntdbrestore</command>
- <arg choice="req">ntdbfilename</arg>
- </cmdsynopsis>
-</refsynopsisdiv>
-
-<refsect1>
- <title>DESCRIPTION</title>
-
- <para>This tool is part of the <citerefentry><refentrytitle>samba</refentrytitle>
- <manvolnum>1</manvolnum></citerefentry> suite.</para>
-
- <para><command>ntdbrestore</command> is a very simple utility that 'restores' the
- contents of dump file into NTDB (New Trivial DataBase) file. The dump file is obtained from the ntdbdump or tdbdump
- commands.
- </para>
-
- <para>This tool wait on the standard input for the content of the dump and will write the ntdb in the ntdbfilename
- parameter.
- </para>
- <para>This tool can be used to translate between ntdb and tdb files by dumping and restoring.
- </para>
-</refsect1>
-
-
-<refsect1>
- <title>VERSION</title>
-
- <para>This man page is correct for version 4 of the Samba suite.</para>
-</refsect1>
-
-<refsect1>
- <title>SEE ALSO</title>
-
- <para>
- ntdbdump(8), tdbrestore(8)
- </para>
-</refsect1>
-
-<refsect1>
- <title>AUTHOR</title>
-
- <para>
- The original Samba software and related utilities were created by Andrew Tridgell.
- Samba is now developed by the Samba Team as an Open Source project similar to the way
- the Linux kernel is developed.
-
- ntdbrestore was written by Rusty Russell based on tdbrestore, which was initially written by Volker Lendecke based on an
- idea by Simon McVittie.
- </para>
-
- <para>The ntdbrestore man page was written by Rusty Russell, based on the tdbrestore man page by Matthieu Patou.</para>
-</refsect1>
-
-</refentry>
+++ /dev/null
-<?xml version="1.0" encoding="iso-8859-1"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
-<refentry id="ntdbtool.8">
-
-<refmeta>
- <refentrytitle>ntdbtool</refentrytitle>
- <manvolnum>8</manvolnum>
- <refmiscinfo class="source">Samba</refmiscinfo>
- <refmiscinfo class="manual">System Administration tools</refmiscinfo>
- <refmiscinfo class="version">4.1</refmiscinfo>
-</refmeta>
-
-
-<refnamediv>
- <refname>ntdbtool</refname>
- <refpurpose>manipulate the contents NTDB files</refpurpose>
-</refnamediv>
-
-<refsynopsisdiv>
-
- <cmdsynopsis>
- <command>ntdbtool</command>
- <arg choice="plain">
- <replaceable>NTDBFILE</replaceable>
- </arg>
- <arg rep="repeat" choice="opt">
- <replaceable>COMMANDS</replaceable>
- </arg>
- </cmdsynopsis>
-
-</refsynopsisdiv>
-
-<refsect1>
- <title>DESCRIPTION</title>
-
- <para>This tool is part of the
- <citerefentry><refentrytitle>samba</refentrytitle>
- <manvolnum>1</manvolnum></citerefentry> suite.</para>
-
- <para><command>ntdbtool</command> a tool for displaying and
- altering the contents of Samba NTDB (New Trivial DataBase) files. Each
- of the commands listed below can be entered interactively or
- provided on the command line.</para>
-
-</refsect1>
-
-
-<refsect1>
- <title>COMMANDS</title>
-
- <variablelist>
-
- <varlistentry>
- <term><option>create</option>
- <replaceable>NTDBFILE</replaceable></term>
- <listitem><para>Create a new database named
- <replaceable>NTDBFILE</replaceable>.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>open</option>
- <replaceable>NTDBFILE</replaceable></term>
- <listitem><para>Open an existing database named
- <replaceable>NTDBFILE</replaceable>.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>erase</option></term>
- <listitem><para>Erase the current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>dump</option></term>
- <listitem><para>Dump the current database as strings.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>cdump</option></term>
- <listitem><para>Dump the current database as connection records.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>keys</option></term>
- <listitem><para>Dump the current database keys as strings.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>hexkeys</option></term>
- <listitem><para>Dump the current database keys as hex values.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>info</option></term>
- <listitem><para>Print summary information about the
- current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>insert</option>
- <replaceable>KEY</replaceable>
- <replaceable>DATA</replaceable>
- </term>
- <listitem><para>Insert a record into the
- current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>move</option>
- <replaceable>KEY</replaceable>
- <replaceable>NTDBFILE</replaceable>
- </term>
- <listitem><para>Move a record from the
- current database into <replaceable>NTDBFILE</replaceable>.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>store</option>
- <replaceable>KEY</replaceable>
- <replaceable>DATA</replaceable>
- </term>
- <listitem><para>Store (replace) a record in the
- current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>show</option>
- <replaceable>KEY</replaceable>
- </term>
- <listitem><para>Show a record by key.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>delete</option>
- <replaceable>KEY</replaceable>
- </term>
- <listitem><para>Delete a record by key.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>list</option>
- </term>
- <listitem><para>Print the current database hash table and free list.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>free</option>
- </term>
- <listitem><para>Print the current database and free list.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term><option>!</option>
- <replaceable>COMMAND</replaceable>
- </term>
- <listitem><para>Execute the given system command.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>
- <option>first</option>
- </term>
- <listitem><para>Print the first record in the current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>
- <option>next</option>
- </term>
- <listitem><para>Print the next record in the current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>
- <option>check</option>
- </term>
- <listitem><para>Check the integrity of the current database.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>
- <option>repack</option>
- </term>
- <listitem><para>Repack a database using a temporary file to remove fragmentation.
- </para></listitem>
- </varlistentry>
-
- <varlistentry>
- <term>
- <option>quit</option>
- </term>
- <listitem><para>Exit <command>ntdbtool</command>.
- </para></listitem>
- </varlistentry>
-
- </variablelist>
-</refsect1>
-
-<refsect1>
- <title>SEE ALSO</title>
-
- <para>
- tdbtool(8)
- </para>
-</refsect1>
-
-<refsect1>
- <title>CAVEATS</title>
- <para>The contents of the Samba NTDB files are private
- to the implementation and should not be altered with
- <command>ntdbtool</command>.
- </para>
-</refsect1>
-
-<refsect1>
- <title>VERSION</title>
- <para>This man page is correct for version 4.0 of the Samba suite.</para>
-</refsect1>
-
-<refsect1>
- <title>AUTHOR</title>
-
- <para> The original Samba software and related utilities were
- created by Andrew Tridgell. Samba is now developed by the
- Samba Team as an Open Source project similar to the way the
- Linux kernel is developed.</para>
-</refsect1>
-
-</refentry>
+++ /dev/null
- /*
- Trivial Database 2: fetch, store and misc routines.
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#ifndef HAVE_LIBREPLACE
-#include <stdarg.h>
-#endif
-
-static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
- ntdb_off_t off,
- ntdb_len_t keylen,
- ntdb_len_t datalen,
- struct ntdb_used_record *rec)
-{
- uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
- enum NTDB_ERROR ecode;
-
- ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
- keylen + dataroom);
- if (ecode == NTDB_SUCCESS) {
- ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
- }
- return ecode;
-}
-
-static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA key, NTDB_DATA dbuf,
- ntdb_off_t old_off, ntdb_len_t old_room,
- bool growing)
-{
- ntdb_off_t new_off;
- enum NTDB_ERROR ecode;
-
- /* Allocate a new record. */
- new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing);
- if (NTDB_OFF_IS_ERR(new_off)) {
- return NTDB_OFF_TO_ERR(new_off);
- }
-
- /* We didn't like the existing one: remove it. */
- if (old_off) {
- ntdb->stats.frees++;
- ecode = add_free_record(ntdb, old_off,
- sizeof(struct ntdb_used_record)
- + key.dsize + old_room,
- NTDB_LOCK_WAIT, true);
- if (ecode == NTDB_SUCCESS)
- ecode = replace_in_hash(ntdb, h, new_off);
- } else {
- ecode = add_to_hash(ntdb, h, new_off);
- }
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- new_off += sizeof(struct ntdb_used_record);
- ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- new_off += key.dsize;
- ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- if (ntdb->flags & NTDB_SEQNUM)
- ntdb_inc_seqnum(ntdb);
-
- return NTDB_SUCCESS;
-}
-
-static enum NTDB_ERROR update_data(struct ntdb_context *ntdb,
- ntdb_off_t off,
- NTDB_DATA dbuf,
- ntdb_len_t extra)
-{
- enum NTDB_ERROR ecode;
-
- ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize);
- if (ecode == NTDB_SUCCESS && extra) {
- /* Put a zero in; future versions may append other data. */
- ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1);
- }
- if (ntdb->flags & NTDB_SEQNUM)
- ntdb_inc_seqnum(ntdb);
-
- return ecode;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
- NTDB_DATA key, NTDB_DATA dbuf, int flag)
-{
- struct hash_info h;
- ntdb_off_t off;
- ntdb_len_t old_room = 0;
- struct ntdb_used_record rec;
- enum NTDB_ERROR ecode;
-
- off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
-
- /* Now we have lock on this hash bucket. */
- if (flag == NTDB_INSERT) {
- if (off) {
- ecode = NTDB_ERR_EXISTS;
- goto out;
- }
- } else {
- if (off) {
- old_room = rec_data_length(&rec)
- + rec_extra_padding(&rec);
- if (old_room >= dbuf.dsize) {
- /* Can modify in-place. Easy! */
- ecode = update_rec_hdr(ntdb, off,
- key.dsize, dbuf.dsize,
- &rec);
- if (ecode != NTDB_SUCCESS) {
- goto out;
- }
- ecode = update_data(ntdb,
- off + sizeof(rec)
- + key.dsize, dbuf,
- old_room - dbuf.dsize);
- if (ecode != NTDB_SUCCESS) {
- goto out;
- }
- ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
- return NTDB_SUCCESS;
- }
- } else {
- if (flag == NTDB_MODIFY) {
- /* if the record doesn't exist and we
- are in NTDB_MODIFY mode then we should fail
- the store */
- ecode = NTDB_ERR_NOEXIST;
- goto out;
- }
- }
- }
-
- /* If we didn't use the old record, this implies we're growing. */
- ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
-out:
- ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
- return ecode;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
- NTDB_DATA key, NTDB_DATA dbuf)
-{
- struct hash_info h;
- ntdb_off_t off;
- struct ntdb_used_record rec;
- ntdb_len_t old_room = 0, old_dlen;
- unsigned char *newdata;
- NTDB_DATA new_dbuf;
- enum NTDB_ERROR ecode;
-
- off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
-
- if (off) {
- old_dlen = rec_data_length(&rec);
- old_room = old_dlen + rec_extra_padding(&rec);
-
- /* Fast path: can append in place. */
- if (rec_extra_padding(&rec) >= dbuf.dsize) {
- ecode = update_rec_hdr(ntdb, off, key.dsize,
- old_dlen + dbuf.dsize, &rec);
- if (ecode != NTDB_SUCCESS) {
- goto out;
- }
-
- off += sizeof(rec) + key.dsize + old_dlen;
- ecode = update_data(ntdb, off, dbuf,
- rec_extra_padding(&rec));
- goto out;
- }
-
- /* Slow path. */
- newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize,
- ntdb->alloc_data);
- if (!newdata) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_append:"
- " failed to allocate %zu bytes",
- (size_t)(key.dsize + old_dlen
- + dbuf.dsize));
- goto out;
- }
- ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize,
- newdata, old_dlen);
- if (ecode != NTDB_SUCCESS) {
- goto out_free_newdata;
- }
- memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
- new_dbuf.dptr = newdata;
- new_dbuf.dsize = old_dlen + dbuf.dsize;
- } else {
- newdata = NULL;
- new_dbuf = dbuf;
- }
-
- /* If they're using ntdb_append(), it implies they're growing record. */
- ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true);
-
-out_free_newdata:
- ntdb->free_fn(newdata, ntdb->alloc_data);
-out:
- ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
- return ecode;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
- NTDB_DATA *data)
-{
- ntdb_off_t off;
- struct ntdb_used_record rec;
- struct hash_info h;
- enum NTDB_ERROR ecode;
- const char *keyp;
-
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
-
- if (!off) {
- ecode = NTDB_ERR_NOEXIST;
- } else {
- data->dsize = rec_data_length(&rec);
- data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data);
- if (unlikely(!data->dptr)) {
- ecode = NTDB_ERR_OOM;
- } else {
- memcpy(data->dptr, keyp + key.dsize, data->dsize);
- ecode = NTDB_SUCCESS;
- }
- ntdb_access_release(ntdb, keyp);
- }
-
- ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
- return ecode;
-}
-
-_PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
-{
- ntdb_off_t off;
- struct ntdb_used_record rec;
- struct hash_info h;
-
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
- if (NTDB_OFF_IS_ERR(off)) {
- return false;
- }
- ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
-
- return off ? true : false;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
-{
- ntdb_off_t off;
- struct ntdb_used_record rec;
- struct hash_info h;
- enum NTDB_ERROR ecode;
-
- off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
-
- if (!off) {
- ecode = NTDB_ERR_NOEXIST;
- goto unlock;
- }
-
- ecode = delete_from_hash(ntdb, &h);
- if (ecode != NTDB_SUCCESS) {
- goto unlock;
- }
-
- /* Free the deleted entry. */
- ntdb->stats.frees++;
- ecode = add_free_record(ntdb, off,
- sizeof(struct ntdb_used_record)
- + rec_key_length(&rec)
- + rec_data_length(&rec)
- + rec_extra_padding(&rec),
- NTDB_LOCK_WAIT, true);
-
- if (ntdb->flags & NTDB_SEQNUM)
- ntdb_inc_seqnum(ntdb);
-
-unlock:
- ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
- return ecode;
-}
-
-_PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb)
-{
- return ntdb->flags;
-}
-
-static bool inside_transaction(const struct ntdb_context *ntdb)
-{
- return ntdb->transaction != NULL;
-}
-
-static bool readonly_changable(struct ntdb_context *ntdb, const char *caller)
-{
- if (inside_transaction(ntdb)) {
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "%s: can't change"
- " NTDB_RDONLY inside transaction",
- caller);
- return false;
- }
- return true;
-}
-
-_PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag)
-{
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_add_flag: internal db");
- return;
- }
- switch (flag) {
- case NTDB_NOLOCK:
- ntdb->flags |= NTDB_NOLOCK;
- break;
- case NTDB_NOMMAP:
- if (ntdb->file->direct_count) {
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_add_flag: Can't get NTDB_NOMMAP from"
- " ntdb_parse_record!");
- return;
- }
- ntdb->flags |= NTDB_NOMMAP;
-#ifndef HAVE_INCOHERENT_MMAP
- ntdb_munmap(ntdb);
-#endif
- break;
- case NTDB_NOSYNC:
- ntdb->flags |= NTDB_NOSYNC;
- break;
- case NTDB_SEQNUM:
- ntdb->flags |= NTDB_SEQNUM;
- break;
- case NTDB_ALLOW_NESTING:
- ntdb->flags |= NTDB_ALLOW_NESTING;
- break;
- case NTDB_RDONLY:
- if (readonly_changable(ntdb, "ntdb_add_flag"))
- ntdb->flags |= NTDB_RDONLY;
- break;
- default:
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_add_flag: Unknown flag %u", flag);
- }
-}
-
-_PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag)
-{
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_remove_flag: internal db");
- return;
- }
- switch (flag) {
- case NTDB_NOLOCK:
- ntdb->flags &= ~NTDB_NOLOCK;
- break;
- case NTDB_NOMMAP:
- ntdb->flags &= ~NTDB_NOMMAP;
-#ifndef HAVE_INCOHERENT_MMAP
- /* If mmap incoherent, we were mmaping anyway. */
- ntdb_mmap(ntdb);
-#endif
- break;
- case NTDB_NOSYNC:
- ntdb->flags &= ~NTDB_NOSYNC;
- break;
- case NTDB_SEQNUM:
- ntdb->flags &= ~NTDB_SEQNUM;
- break;
- case NTDB_ALLOW_NESTING:
- ntdb->flags &= ~NTDB_ALLOW_NESTING;
- break;
- case NTDB_RDONLY:
- if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) {
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_remove_flag: can't"
- " remove NTDB_RDONLY on ntdb"
- " opened with O_RDONLY");
- break;
- }
- if (readonly_changable(ntdb, "ntdb_remove_flag"))
- ntdb->flags &= ~NTDB_RDONLY;
- break;
- default:
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_remove_flag: Unknown flag %u",
- flag);
- }
-}
-
-_PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode)
-{
- /* Gcc warns if you miss a case in the switch, so use that. */
- switch (NTDB_ERR_TO_OFF(ecode)) {
- case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success";
- case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database";
- case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error";
- case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error";
- case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory";
- case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists";
- case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter";
- case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist";
- case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted";
- }
- return "Invalid error code";
-}
-
-enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb,
- enum NTDB_ERROR ecode,
- enum ntdb_log_level level,
- const char *fmt, ...)
-{
- char *message;
- va_list ap;
- size_t len;
- /* ntdb_open paths care about errno, so save it. */
- int saved_errno = errno;
-
- if (!ntdb->log_fn)
- return ecode;
-
- va_start(ap, fmt);
- len = vsnprintf(NULL, 0, fmt, ap);
- va_end(ap);
-
- message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data);
- if (!message) {
- ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM,
- "out of memory formatting message:", ntdb->log_data);
- ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data);
- } else {
- va_start(ap, fmt);
- vsnprintf(message, len+1, fmt, ap);
- va_end(ap);
- ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data);
- ntdb->free_fn(message, ntdb->alloc_data);
- }
- errno = saved_errno;
- return ecode;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
- NTDB_DATA key,
- enum NTDB_ERROR (*parse)(NTDB_DATA k,
- NTDB_DATA d,
- void *data),
- void *data)
-{
- ntdb_off_t off;
- struct ntdb_used_record rec;
- struct hash_info h;
- enum NTDB_ERROR ecode;
- const char *keyp;
-
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
-
- if (!off) {
- ecode = NTDB_ERR_NOEXIST;
- } else {
- unsigned int old_flags;
- NTDB_DATA d = ntdb_mkdata(keyp + key.dsize,
- rec_data_length(&rec));
-
- /*
- * Make sure they don't try to write db, since they
- * have read lock! They can if they've done
- * ntdb_lockall(): if it was ntdb_lockall_read, that'll
- * stop them doing a write operation anyway.
- */
- old_flags = ntdb->flags;
- if (!ntdb->file->allrecord_lock.count &&
- !(ntdb->flags & NTDB_NOLOCK)) {
- ntdb->flags |= NTDB_RDONLY;
- }
- ecode = parse(key, d, data);
- ntdb->flags = old_flags;
- ntdb_access_release(ntdb, keyp);
- }
-
- ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
- return ecode;
-}
-
-_PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb)
-{
- return ntdb->name;
-}
-
-_PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb)
-{
- return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
-}
-
-
-_PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb)
-{
- return ntdb->file->fd;
-}
-
-struct traverse_state {
- enum NTDB_ERROR error;
- struct ntdb_context *dest_db;
-};
-
-/*
- traverse function for repacking
- */
-static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data,
- struct traverse_state *state)
-{
- state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT);
- if (state->error != NTDB_SUCCESS) {
- return -1;
- }
- return 0;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb)
-{
- struct ntdb_context *tmp_db;
- struct traverse_state state;
-
- state.error = ntdb_transaction_start(ntdb);
- if (state.error != NTDB_SUCCESS) {
- return state.error;
- }
-
- tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
- if (tmp_db == NULL) {
- state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- __location__
- " Failed to create tmp_db");
- ntdb_transaction_cancel(ntdb);
- return state.error;
- }
-
- state.dest_db = tmp_db;
- if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) {
- goto fail;
- }
-
- state.error = ntdb_wipe_all(ntdb);
- if (state.error != NTDB_SUCCESS) {
- goto fail;
- }
-
- state.dest_db = ntdb;
- if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) {
- goto fail;
- }
-
- ntdb_close(tmp_db);
- return ntdb_transaction_commit(ntdb);
-
-fail:
- ntdb_transaction_cancel(ntdb);
- ntdb_close(tmp_db);
- return state.error;
-}
+++ /dev/null
-#ifndef CCAN_NTDB_H
-#define CCAN_NTDB_H
-
-/*
- NTDB: trivial database library version 2
-
- Copyright (C) Andrew Tridgell 1999-2004
- Copyright (C) Rusty Russell 2010-2012
-
- ** NOTE! The following LGPL license applies to the ntdb
- ** library. This does NOT imply that all of Samba is released
- ** under the LGPL
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef HAVE_LIBREPLACE
-#include <replace.h>
-#include <system/filesys.h>
-#else
-#include "config.h"
-#if HAVE_FILE_OFFSET_BITS
-#define _FILE_OFFSET_BITS 64
-#endif
-
-#ifndef _PUBLIC_
-#ifdef HAVE_VISIBILITY_ATTR
-#define _PUBLIC_ __attribute__((visibility("default")))
-#else
-#define _PUBLIC_
-#endif
-#endif
-
-/* For mode_t */
-#include <sys/types.h>
-/* For O_* flags. */
-#include <sys/stat.h>
-/* For sig_atomic_t. */
-#include <signal.h>
-/* For uint64_t */
-#include <stdint.h>
-/* For bool */
-#include <stdbool.h>
-/* For memcmp */
-#include <string.h>
-#endif
-
-#include <ccan/compiler/compiler.h>
-#include <ccan/typesafe_cb/typesafe_cb.h>
-#include <ccan/cast/cast.h>
-
-union ntdb_attribute;
-struct ntdb_context;
-
-/**
- * struct TDB_DATA - (n)tdb data blob
- *
- * To ease compatibility, we use 'struct TDB_DATA' from tdb.h, so if
- * you want to include both tdb.h and ntdb.h, you need to #include
- * tdb.h first.
- */
-#ifndef __TDB_H__
-struct TDB_DATA {
- unsigned char *dptr;
- size_t dsize;
-};
-#endif
-
-typedef struct TDB_DATA NTDB_DATA;
-
-/**
- * ntdb_open - open a database file
- * @name: the file name (or database name if flags contains NTDB_INTERNAL)
- * @ntdb_flags: options for this database
- * @open_flags: flags argument for ntdb's open() call.
- * @mode: mode argument for ntdb's open() call.
- * @attributes: linked list of extra attributes for this ntdb.
- *
- * This call opens (and potentially creates) a database file.
- * Multiple processes can have the NTDB file open at once.
- *
- * On failure it will return NULL, and set errno: it may also call
- * any log attribute found in @attributes.
- *
- * See also:
- * union ntdb_attribute
- */
-struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
- int open_flags, mode_t mode,
- union ntdb_attribute *attributes);
-
-
-/* flags for ntdb_open() */
-#define NTDB_DEFAULT 0 /* just a readability place holder */
-#define NTDB_INTERNAL 2 /* don't store on disk */
-#define NTDB_NOLOCK 4 /* don't do any locking */
-#define NTDB_NOMMAP 8 /* don't use mmap */
-#define NTDB_CONVERT 16 /* convert endian */
-#define NTDB_NOSYNC 64 /* don't use synchronous transactions */
-#define NTDB_SEQNUM 128 /* maintain a sequence number */
-#define NTDB_ALLOW_NESTING 256 /* fake nested transactions */
-#define NTDB_RDONLY 512 /* implied by O_RDONLY */
-#define NTDB_CANT_CHECK 2048 /* has a feature which we don't understand */
-
-/**
- * ntdb_close - close and free a ntdb.
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This always succeeds, in that @ntdb is unusable after this call. But if
- * some unexpected error occurred while closing, it will return non-zero
- * (the only clue as to cause will be via the log attribute).
- */
-int ntdb_close(struct ntdb_context *ntdb);
-
-/**
- * enum NTDB_ERROR - error returns for NTDB
- *
- * See Also:
- * ntdb_errorstr()
- */
-enum NTDB_ERROR {
- NTDB_SUCCESS = 0, /* No error. */
- NTDB_ERR_CORRUPT = -1, /* We read the db, and it was bogus. */
- NTDB_ERR_IO = -2, /* We couldn't read/write the db. */
- NTDB_ERR_LOCK = -3, /* Locking failed. */
- NTDB_ERR_OOM = -4, /* Out of Memory. */
- NTDB_ERR_EXISTS = -5, /* The key already exists. */
- NTDB_ERR_NOEXIST = -6, /* The key does not exist. */
- NTDB_ERR_EINVAL = -7, /* You're using it wrong. */
- NTDB_ERR_RDONLY = -8, /* The database is read-only. */
- NTDB_ERR_LAST = NTDB_ERR_RDONLY
-};
-
-/**
- * ntdb_store - store a key/value pair in a ntdb.
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key
- * @dbuf: the data to associate with the key.
- * @flag: NTDB_REPLACE, NTDB_INSERT or NTDB_MODIFY.
- *
- * This inserts (or overwrites) a key/value pair in the NTDB. If flag
- * is NTDB_REPLACE, it doesn't matter whether the key exists or not;
- * NTDB_INSERT means it must not exist (returns NTDB_ERR_EXISTS otherwise),
- * and NTDB_MODIFY means it must exist (returns NTDB_ERR_NOEXIST otherwise).
- *
- * On success, this returns NTDB_SUCCESS.
- *
- * See also:
- * ntdb_fetch, ntdb_transaction_start, ntdb_append, ntdb_delete.
- */
-enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
- NTDB_DATA key,
- NTDB_DATA dbuf,
- int flag);
-
-/* flags to ntdb_store() */
-#define NTDB_REPLACE 1 /* A readability place holder */
-#define NTDB_INSERT 2 /* Don't overwrite an existing entry */
-#define NTDB_MODIFY 3 /* Don't create an existing entry */
-
-/**
- * ntdb_fetch - fetch a value from a ntdb.
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key
- * @data: pointer to data.
- *
- * This looks up a key in the database and sets it in @data.
- *
- * If it returns NTDB_SUCCESS, the key was found: it is your
- * responsibility to call free() on @data->dptr.
- *
- * Otherwise, it returns an error (usually, NTDB_ERR_NOEXIST) and @data is
- * undefined.
- */
-enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
- NTDB_DATA *data);
-
-/**
- * ntdb_errorstr - map the ntdb error onto a constant readable string
- * @ecode: the enum NTDB_ERROR to map.
- *
- * This is useful for displaying errors to users.
- */
-const char *ntdb_errorstr(enum NTDB_ERROR ecode);
-
-/**
- * ntdb_append - append a value to a key/value pair in a ntdb.
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key
- * @dbuf: the data to append.
- *
- * This is equivalent to fetching a record, reallocating .dptr to add the
- * data, and writing it back, only it's much more efficient. If the key
- * doesn't exist, it's equivalent to ntdb_store (with an additional hint that
- * you expect to expand the record in future).
- *
- * See Also:
- * ntdb_fetch(), ntdb_store()
- */
-enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
- NTDB_DATA key, NTDB_DATA dbuf);
-
-/**
- * ntdb_delete - delete a key from a ntdb.
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key to delete.
- *
- * Returns NTDB_SUCCESS on success, or an error (usually NTDB_ERR_NOEXIST).
- *
- * See Also:
- * ntdb_fetch(), ntdb_store()
- */
-enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key);
-
-/**
- * ntdb_exists - does a key exist in the database?
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key to search for.
- *
- * Returns true if it exists, or false if it doesn't or any other error.
- */
-bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key);
-
-/**
- * ntdb_deq - are NTDB_DATA equal?
- * @a: one NTDB_DATA
- * @b: another NTDB_DATA
- */
-static inline bool ntdb_deq(NTDB_DATA a, NTDB_DATA b)
-{
- return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0;
-}
-
-/**
- * ntdb_mkdata - make a NTDB_DATA from const data
- * @p: the constant pointer
- * @len: the length
- *
- * As the dptr member of NTDB_DATA is not constant, you need to
- * cast it. This function keeps thost casts in one place, as well as
- * suppressing the warning some compilers give when casting away a
- * qualifier (eg. gcc with -Wcast-qual)
- */
-static inline NTDB_DATA ntdb_mkdata(const void *p, size_t len)
-{
- NTDB_DATA d;
- d.dptr = cast_const(void *, p);
- d.dsize = len;
- return d;
-}
-
-/**
- * ntdb_transaction_start - start a transaction
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This begins a series of atomic operations. Other processes will be able
- * to read the ntdb, but not alter it (they will block), nor will they see
- * any changes until ntdb_transaction_commit() is called.
- *
- * Note that if the NTDB_ALLOW_NESTING flag is set, a ntdb_transaction_start()
- * within a transaction will succeed, but it's not a real transaction:
- * (1) An inner transaction which is committed is not actually committed until
- * the outer transaction is; if the outer transaction is cancelled, the
- * inner ones are discarded.
- * (2) ntdb_transaction_cancel() marks the outer transaction as having an error,
- * so the final ntdb_transaction_commit() will fail.
- * (3) the outer transaction will see the results of the inner transaction.
- *
- * See Also:
- * ntdb_transaction_cancel, ntdb_transaction_commit.
- */
-enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb);
-
-/**
- * ntdb_transaction_cancel - abandon a transaction
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This aborts a transaction, discarding any changes which were made.
- * ntdb_close() does this implicitly.
- */
-void ntdb_transaction_cancel(struct ntdb_context *ntdb);
-
-/**
- * ntdb_transaction_commit - commit a transaction
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This completes a transaction, writing any changes which were made.
- *
- * fsync() is used to commit the transaction (unless NTDB_NOSYNC is set),
- * making it robust against machine crashes, but very slow compared to
- * other NTDB operations.
- *
- * A failure can only be caused by unexpected errors (eg. I/O or
- * memory); this is no point looping on transaction failure.
- *
- * See Also:
- * ntdb_transaction_prepare_commit()
- */
-enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb);
-
-/**
- * ntdb_transaction_prepare_commit - prepare to commit a transaction
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This ensures we have the resources to commit a transaction (using
- * ntdb_transaction_commit): if this succeeds then a transaction will only
- * fail if the write() or fsync() calls fail.
- *
- * If this fails you must still call ntdb_transaction_cancel() to cancel
- * the transaction.
- *
- * See Also:
- * ntdb_transaction_commit()
- */
-enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb);
-
-/**
- * ntdb_traverse - traverse a NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- * @fn: the function to call for every key/value pair (or NULL)
- * @p: the pointer to hand to @f
- *
- * This walks the NTDB until all they keys have been traversed, or @fn
- * returns non-zero. If the traverse function or other processes are
- * changing data or adding or deleting keys, the traverse may be
- * unreliable: keys may be skipped or (rarely) visited twice.
- *
- * There is one specific exception: the special case of deleting the
- * current key does not undermine the reliability of the traversal.
- *
- * On success, returns the number of keys iterated. On error returns
- * a negative enum NTDB_ERROR value.
- */
-#define ntdb_traverse(ntdb, fn, p) \
- ntdb_traverse_(ntdb, typesafe_cb_preargs(int, void *, (fn), (p), \
- struct ntdb_context *, \
- NTDB_DATA, NTDB_DATA), (p))
-
-int64_t ntdb_traverse_(struct ntdb_context *ntdb,
- int (*fn)(struct ntdb_context *,
- NTDB_DATA, NTDB_DATA, void *), void *p);
-
-/**
- * ntdb_parse_record - operate directly on data in the database.
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key whose record we should hand to @parse
- * @parse: the function to call for the data
- * @data: the private pointer to hand to @parse (types must match).
- *
- * This avoids a copy for many cases, by handing you a pointer into
- * the memory-mapped database. It also locks the record to prevent
- * other accesses at the same time, so it won't change.
- *
- * Within the @parse callback you can perform read operations on the
- * database, but no write operations: no ntdb_store() or
- * ntdb_delete(), for example. The exception is if you call
- * ntdb_lockall() before ntdb_parse_record().
- *
- * Never alter the data handed to parse()!
- */
-#define ntdb_parse_record(ntdb, key, parse, data) \
- ntdb_parse_record_((ntdb), (key), \
- typesafe_cb_preargs(enum NTDB_ERROR, void *, \
- (parse), (data), \
- NTDB_DATA, NTDB_DATA), (data))
-
-enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
- NTDB_DATA key,
- enum NTDB_ERROR (*parse)(NTDB_DATA k,
- NTDB_DATA d,
- void *data),
- void *data);
-
-/**
- * ntdb_get_seqnum - get a database sequence number
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This returns a sequence number: any change to the database from a
- * ntdb context opened with the NTDB_SEQNUM flag will cause that number
- * to increment. Note that the incrementing is unreliable (it is done
- * without locking), so this is only useful as an optimization.
- *
- * For example, you may have a regular database backup routine which
- * does not operate if the sequence number is unchanged. In the
- * unlikely event of a failed increment, it will be backed up next
- * time any way.
- *
- * Returns an enum NTDB_ERROR (ie. negative) on error.
- */
-int64_t ntdb_get_seqnum(struct ntdb_context *ntdb);
-
-/**
- * ntdb_firstkey - get the "first" key in a NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: pointer to key.
- *
- * This returns an arbitrary key in the database; with ntdb_nextkey() it allows
- * open-coded traversal of the database, though it is slightly less efficient
- * than ntdb_traverse.
- *
- * It is your responsibility to free @key->dptr on success.
- *
- * Returns NTDB_ERR_NOEXIST if the database is empty.
- */
-enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key);
-
-/**
- * ntdb_nextkey - get the "next" key in a NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: a key returned by ntdb_firstkey() or ntdb_nextkey().
- *
- * This returns another key in the database; it will free @key.dptr for
- * your convenience.
- *
- * Returns NTDB_ERR_NOEXIST if there are no more keys.
- */
-enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key);
-
-/**
- * ntdb_chainlock - lock a record in the NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key to lock.
- *
- * This prevents any access occurring to a group of keys including @key,
- * even if @key does not exist. This allows primitive atomic updates of
- * records without using transactions.
- *
- * You cannot begin a transaction while holding a ntdb_chainlock(), nor can
- * you do any operations on any other keys in the database. This also means
- * that you cannot hold more than one ntdb_chainlock() at a time.
- *
- * See Also:
- * ntdb_chainunlock()
- */
-enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key);
-
-/**
- * ntdb_chainunlock - unlock a record in the NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key to unlock.
- *
- * The key must have previously been locked by ntdb_chainlock().
- */
-void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key);
-
-/**
- * ntdb_chainlock_read - lock a record in the NTDB, for reading
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key to lock.
- *
- * This prevents any changes from occurring to a group of keys including @key,
- * even if @key does not exist. This allows primitive atomic updates of
- * records without using transactions.
- *
- * You cannot begin a transaction while holding a ntdb_chainlock_read(), nor can
- * you do any operations on any other keys in the database. This also means
- * that you cannot hold more than one ntdb_chainlock()/read() at a time.
- *
- * See Also:
- * ntdb_chainlock()
- */
-enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key);
-
-/**
- * ntdb_chainunlock_read - unlock a record in the NTDB for reading
- * @ntdb: the ntdb context returned from ntdb_open()
- * @key: the key to unlock.
- *
- * The key must have previously been locked by ntdb_chainlock_read().
- */
-void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key);
-
-/**
- * ntdb_lockall - lock the entire NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * You cannot hold a ntdb_chainlock while calling this. It nests, so you
- * must call ntdb_unlockall as many times as you call ntdb_lockall.
- */
-enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb);
-
-/**
- * ntdb_unlockall - unlock the entire NTDB
- * @ntdb: the ntdb context returned from ntdb_open()
- */
-void ntdb_unlockall(struct ntdb_context *ntdb);
-
-/**
- * ntdb_lockall_read - lock the entire NTDB for reading
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This prevents others writing to the database, eg. ntdb_delete, ntdb_store,
- * ntdb_append, but not ntdb_fetch.
- *
- * You cannot hold a ntdb_chainlock while calling this. It nests, so you
- * must call ntdb_unlockall_read as many times as you call ntdb_lockall_read.
- */
-enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb);
-
-/**
- * ntdb_unlockall_read - unlock the entire NTDB for reading
- * @ntdb: the ntdb context returned from ntdb_open()
- */
-void ntdb_unlockall_read(struct ntdb_context *ntdb);
-
-/**
- * ntdb_wipe_all - wipe the database clean
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * Completely erase the database. This is faster than iterating through
- * each key and doing ntdb_delete.
- */
-enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb);
-
-/**
- * ntdb_repack - repack the database
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This repacks the database; if it is suffering from a great deal of
- * fragmentation this might help. However, it can take twice the
- * memory of the existing NTDB.
- */
-enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb);
-
-/**
- * ntdb_check - check a NTDB for consistency
- * @ntdb: the ntdb context returned from ntdb_open()
- * @check: function to check each key/data pair (or NULL)
- * @data: argument for @check, must match type.
- *
- * This performs a consistency check of the open database, optionally calling
- * a check() function on each record so you can do your own data consistency
- * checks as well. If check() returns an error, that is returned from
- * ntdb_check().
- *
- * Note that the NTDB uses a feature which we don't understand which
- * indicates we can't run ntdb_check(), this will log a warning to that
- * effect and return NTDB_SUCCESS. You can detect this condition by
- * looking for NTDB_CANT_CHECK in ntdb_get_flags().
- *
- * Returns NTDB_SUCCESS or an error.
- */
-#define ntdb_check(ntdb, check, data) \
- ntdb_check_((ntdb), typesafe_cb_preargs(enum NTDB_ERROR, void *, \
- (check), (data), \
- NTDB_DATA, \
- NTDB_DATA), \
- (data))
-
-enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb,
- enum NTDB_ERROR (*check)(NTDB_DATA k,
- NTDB_DATA d,
- void *data),
- void *data);
-
-/**
- * enum ntdb_summary_flags - flags for ntdb_summary.
- */
-enum ntdb_summary_flags {
- NTDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */
-};
-
-/**
- * ntdb_summary - return a string describing the NTDB state
- * @ntdb: the ntdb context returned from ntdb_open()
- * @flags: flags to control the summary output.
- * @summary: pointer to string to allocate.
- *
- * This returns a developer-readable string describing the overall
- * state of the ntdb, such as the percentage used and sizes of records.
- * It is designed to provide information about the ntdb at a glance
- * without displaying any keys or data in the database.
- *
- * On success, sets @summary to point to a malloc()'ed nul-terminated
- * multi-line string. It is your responsibility to free() it.
- */
-enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
- enum ntdb_summary_flags flags,
- char **summary);
-
-
-/**
- * ntdb_get_flags - return the flags for a ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This returns the flags on the current ntdb. Some of these are caused by
- * the flags argument to ntdb_open(), others (such as NTDB_CONVERT) are
- * intuited.
- */
-unsigned int ntdb_get_flags(struct ntdb_context *ntdb);
-
-/**
- * ntdb_add_flag - set a flag for a ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING.
- *
- * You can use this to set a flag on the NTDB. You cannot set these flags
- * on a NTDB_INTERNAL ntdb.
- */
-void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag);
-
-/**
- * ntdb_remove_flag - unset a flag for a ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING.
- *
- * You can use this to clear a flag on the NTDB. You cannot clear flags
- * on a NTDB_INTERNAL ntdb.
- */
-void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag);
-
-/**
- * enum ntdb_attribute_type - descriminator for union ntdb_attribute.
- */
-enum ntdb_attribute_type {
- NTDB_ATTRIBUTE_LOG = 0,
- NTDB_ATTRIBUTE_HASH = 1,
- NTDB_ATTRIBUTE_SEED = 2,
- NTDB_ATTRIBUTE_STATS = 3,
- NTDB_ATTRIBUTE_OPENHOOK = 4,
- NTDB_ATTRIBUTE_FLOCK = 5,
- NTDB_ATTRIBUTE_ALLOCATOR = 6,
- NTDB_ATTRIBUTE_HASHSIZE = 7
-};
-
-/**
- * ntdb_get_attribute - get an attribute for an existing ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- * @attr: the union ntdb_attribute to set.
- *
- * This gets an attribute from a NTDB which has previously been set (or
- * may return the default values). Set @attr.base.attr to the
- * attribute type you want get.
- */
-enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
- union ntdb_attribute *attr);
-
-/**
- * ntdb_set_attribute - set an attribute for an existing ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- * @attr: the union ntdb_attribute to set.
- *
- * This sets an attribute on a NTDB, overriding any previous attribute
- * of the same type. It returns NTDB_ERR_EINVAL if the attribute is
- * unknown or invalid.
- *
- * Note that NTDB_ATTRIBUTE_HASH, NTDB_ATTRIBUTE_SEED, and
- * NTDB_ATTRIBUTE_OPENHOOK cannot currently be set after ntdb_open.
- */
-enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
- const union ntdb_attribute *attr);
-
-/**
- * ntdb_unset_attribute - reset an attribute for an existing ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- * @type: the attribute type to unset.
- *
- * This unsets an attribute on a NTDB, returning it to the defaults
- * (where applicable).
- *
- * Note that it only makes sense for NTDB_ATTRIBUTE_LOG and NTDB_ATTRIBUTE_FLOCK
- * to be unset.
- */
-void ntdb_unset_attribute(struct ntdb_context *ntdb,
- enum ntdb_attribute_type type);
-
-/**
- * ntdb_name - get the name of a ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This returns a copy of the name string, made at ntdb_open() time.
- *
- * This is mostly useful for logging.
- */
-const char *ntdb_name(const struct ntdb_context *ntdb);
-
-/**
- * ntdb_fd - get the file descriptor of a ntdb
- * @ntdb: the ntdb context returned from ntdb_open()
- *
- * This returns the file descriptor for the underlying database file, or -1
- * for NTDB_INTERNAL.
- */
-int ntdb_fd(const struct ntdb_context *ntdb);
-
-/**
- * ntdb_foreach - iterate through every open NTDB.
- * @fn: the function to call for every NTDB
- * @p: the pointer to hand to @fn
- *
- * NTDB internally keeps track of all open TDBs; this function allows you to
- * iterate through them. If @fn returns non-zero, traversal stops.
- */
-#define ntdb_foreach(fn, p) \
- ntdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p), \
- struct ntdb_context *), (p))
-
-void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p);
-
-/**
- * struct ntdb_attribute_base - common fields for all ntdb attributes.
- */
-struct ntdb_attribute_base {
- enum ntdb_attribute_type attr;
- union ntdb_attribute *next;
-};
-
-/**
- * enum ntdb_log_level - log levels for ntdb_attribute_log
- * @NTDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors
- * or internal consistency failures.
- * @NTDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters
- * or writing to a read-only database.
- * @NTDB_LOG_WARNING: used for informational messages on issues which
- * are unusual but handled by NTDB internally, such
- * as a failure to mmap or failure to open /dev/urandom.
- * It's also used when ntdb_open() fails without O_CREAT
- * because a file does not exist.
- */
-enum ntdb_log_level {
- NTDB_LOG_ERROR,
- NTDB_LOG_USE_ERROR,
- NTDB_LOG_WARNING
-};
-
-/**
- * struct ntdb_attribute_log - log function attribute
- *
- * This attribute provides a hook for you to log errors.
- */
-struct ntdb_attribute_log {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_LOG */
- void (*fn)(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data);
- void *data;
-};
-
-/**
- * struct ntdb_attribute_hash - hash function attribute
- *
- * This attribute allows you to provide an alternative hash function.
- * This hash function will be handed keys from the database; it will also
- * be handed the 8-byte NTDB_HASH_MAGIC value for checking the header (the
- * ntdb_open() will fail if the hash value doesn't match the header).
- *
- * Note that if your hash function gives different results on
- * different machine endians, your ntdb will no longer work across
- * different architectures!
- */
-struct ntdb_attribute_hash {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */
- uint32_t (*fn)(const void *key, size_t len, uint32_t seed,
- void *data);
- void *data;
-};
-
-/**
- * struct ntdb_attribute_seed - hash function seed attribute
- *
- * The hash function seed is normally taken from /dev/urandom (or equivalent)
- * but can be set manually here. This is mainly for testing purposes.
- */
-struct ntdb_attribute_seed {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_SEED */
- uint64_t seed;
-};
-
-/**
- * struct ntdb_attribute_stats - ntdb operational statistics
- *
- * This attribute records statistics of various low-level NTDB operations.
- * This can be used to assist performance evaluation. This is only
- * useful for ntdb_get_attribute().
- *
- * New fields will be added at the end, hence the "size" argument which
- * indicates how large your structure is: it must be filled in before
- * calling ntdb_get_attribute(), which will overwrite it with the size
- * ntdb knows about.
- */
-struct ntdb_attribute_stats {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_STATS */
- size_t size; /* = sizeof(struct ntdb_attribute_stats) */
- uint64_t allocs;
- uint64_t alloc_subhash;
- uint64_t alloc_chain;
- uint64_t alloc_bucket_exact;
- uint64_t alloc_bucket_max;
- uint64_t alloc_leftover;
- uint64_t alloc_coalesce_tried;
- uint64_t alloc_coalesce_iterate_clash;
- uint64_t alloc_coalesce_lockfail;
- uint64_t alloc_coalesce_race;
- uint64_t alloc_coalesce_succeeded;
- uint64_t alloc_coalesce_num_merged;
- uint64_t compares;
- uint64_t compare_wrong_offsetbits;
- uint64_t compare_wrong_keylen;
- uint64_t compare_wrong_rechash;
- uint64_t compare_wrong_keycmp;
- uint64_t transactions;
- uint64_t transaction_cancel;
- uint64_t transaction_nest;
- uint64_t transaction_expand_file;
- uint64_t transaction_read_direct;
- uint64_t transaction_read_direct_fail;
- uint64_t transaction_write_direct;
- uint64_t transaction_write_direct_fail;
- uint64_t traverses;
- uint64_t traverse_val_vanished;
- uint64_t expands;
- uint64_t frees;
- uint64_t locks;
- uint64_t lock_lowlevel;
- uint64_t lock_nonblock;
- uint64_t lock_nonblock_fail;
-};
-
-/**
- * struct ntdb_attribute_openhook - ntdb special effects hook for open
- *
- * This attribute contains a function to call once we have the OPEN_LOCK
- * for the ntdb, but before we've examined its contents. If this succeeds,
- * the ntdb will be populated if it's then zero-length.
- *
- * This is a hack to allow support for TDB-style TDB_CLEAR_IF_FIRST
- * behaviour.
- */
-struct ntdb_attribute_openhook {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_OPENHOOK */
- enum NTDB_ERROR (*fn)(int fd, void *data);
- void *data;
-};
-
-/**
- * struct ntdb_attribute_flock - ntdb special effects hook for file locking
- *
- * This attribute contains function to call to place locks on a file; it can
- * be used to support non-blocking operations or lock proxying.
- *
- * They should return 0 on success, -1 on failure and set errno.
- *
- * An error will be logged on error if errno is neither EAGAIN nor EINTR
- * (normally it would only return EAGAIN if waitflag is false, and
- * loop internally on EINTR).
- */
-struct ntdb_attribute_flock {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_FLOCK */
- int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *);
- int (*unlock)(int fd, int rw, off_t off, off_t len, void *);
- void *data;
-};
-
-/**
- * struct ntdb_attribute_hashsize - ntdb hashsize setting.
- *
- * This attribute is only settable on ntdb_open; it indicates that we create
- * a hashtable of the given size, rather than the default.
- */
-struct ntdb_attribute_hashsize {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASHSIZE */
- uint32_t size;
-};
-
-/**
- * struct ntdb_attribute_allocator - allocator for ntdb to use.
- *
- * You can replace malloc/free with your own allocation functions.
- * The allocator takes an "owner" pointer, which is either NULL (for
- * the initial struct ntdb_context and struct ntdb_file), or a
- * previously allocated pointer. This is useful for relationship
- * tracking, such as the talloc library.
- *
- * The expand function is realloc, but only ever used to expand an
- * existing allocation.
- *
- * Be careful mixing allocators: two ntdb_contexts which have the same file
- * open will share the same struct ntdb_file. This may be allocated by one
- * ntdb's allocator, and freed by the other.
- */
-struct ntdb_attribute_allocator {
- struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_ALLOCATOR */
- void *(*alloc)(const void *owner, size_t len, void *priv_data);
- void *(*expand)(void *old, size_t newlen, void *priv_data);
- void (*free)(void *old, void *priv_data);
- void *priv_data;
-};
-
-/**
- * union ntdb_attribute - ntdb attributes.
- *
- * This represents all the known attributes.
- *
- * See also:
- * struct ntdb_attribute_log, struct ntdb_attribute_hash,
- * struct ntdb_attribute_seed, struct ntdb_attribute_stats,
- * struct ntdb_attribute_openhook, struct ntdb_attribute_flock,
- * struct ntdb_attribute_allocator alloc.
- */
-union ntdb_attribute {
- struct ntdb_attribute_base base;
- struct ntdb_attribute_log log;
- struct ntdb_attribute_hash hash;
- struct ntdb_attribute_seed seed;
- struct ntdb_attribute_stats stats;
- struct ntdb_attribute_openhook openhook;
- struct ntdb_attribute_flock flock;
- struct ntdb_attribute_allocator alloc;
- struct ntdb_attribute_hashsize hashsize;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ntdb.h */
+++ /dev/null
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: ntdb
-Description: A (not-so) trivial database
-Version: @PACKAGE_VERSION@
-Libs: @LIB_RPATH@ -L${libdir} -lntdb
-Cflags: -I${includedir}
-URL: http://tdb.samba.org/
+++ /dev/null
- /*
- Trivial Database 2: opening and closing TDBs
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/build_assert/build_assert.h>
-
-/* all tdbs, to detect double-opens (fcntl file don't nest!) */
-static struct ntdb_context *tdbs = NULL;
-
-static struct ntdb_file *find_file(dev_t device, ino_t ino)
-{
- struct ntdb_context *i;
-
- for (i = tdbs; i; i = i->next) {
- if (i->file->device == device && i->file->inode == ino) {
- i->file->refcnt++;
- return i->file;
- }
- }
- return NULL;
-}
-
-static bool read_all(int fd, void *buf, size_t len)
-{
- while (len) {
- ssize_t ret;
- ret = read(fd, buf, len);
- if (ret < 0)
- return false;
- if (ret == 0) {
- /* ETOOSHORT? */
- errno = EWOULDBLOCK;
- return false;
- }
- buf = (char *)buf + ret;
- len -= ret;
- }
- return true;
-}
-
-static uint32_t random_number(struct ntdb_context *ntdb)
-{
- int fd;
- uint32_t ret = 0;
- struct timeval now;
-
- fd = open("/dev/urandom", O_RDONLY);
- if (fd >= 0) {
- if (read_all(fd, &ret, sizeof(ret))) {
- close(fd);
- return ret;
- }
- close(fd);
- }
- /* FIXME: Untested! Based on Wikipedia protocol description! */
- fd = open("/dev/egd-pool", O_RDWR);
- if (fd >= 0) {
- /* Command is 1, next byte is size we want to read. */
- char cmd[2] = { 1, sizeof(uint32_t) };
- if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
- char reply[1 + sizeof(uint32_t)];
- int r = read(fd, reply, sizeof(reply));
- if (r > 1) {
- /* Copy at least some bytes. */
- memcpy(&ret, reply+1, r - 1);
- if (reply[0] == sizeof(uint32_t)
- && r == sizeof(reply)) {
- close(fd);
- return ret;
- }
- }
- }
- close(fd);
- }
-
- /* Fallback: pid and time. */
- gettimeofday(&now, NULL);
- ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
- ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
- "ntdb_open: random from getpid and time");
- return ret;
-}
-
-static void ntdb_context_init(struct ntdb_context *ntdb)
-{
- /* Initialize the NTDB fields here */
- ntdb_io_init(ntdb);
- ntdb->transaction = NULL;
- ntdb->access = NULL;
-}
-
-/* initialise a new database:
- *
- * struct ntdb_header;
- * struct {
- * struct ntdb_used_record hash_header;
- * ntdb_off_t hash_buckets[1 << ntdb->hash_bits];
- * } hash;
- * struct ntdb_freetable ftable;
- * struct {
- * struct ntdb_free_record free_header;
- * char forty_three[...];
- * } remainder;
- */
-#define NEW_DATABASE_HDR_SIZE(hbits) \
- (sizeof(struct ntdb_header) \
- + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \
- + sizeof(struct ntdb_freetable) \
- + sizeof(struct ntdb_free_record))
-
-static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
- struct ntdb_attribute_seed *seed,
- struct ntdb_header *rhdr)
-{
- /* We make it up in memory, then write it out if not internal */
- struct ntdb_freetable *ftable;
- struct ntdb_used_record *htable;
- struct ntdb_header *hdr;
- struct ntdb_free_record *remainder;
- char *mem;
- unsigned int magic_len;
- ssize_t rlen;
- size_t dbsize, hashsize, hdrsize, remaindersize;
- enum NTDB_ERROR ecode;
-
- hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits;
-
- /* Always make db a multiple of NTDB_PGSIZE */
- hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits);
- dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
-
- mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
- if (!mem) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_new_database: failed to allocate");
- }
-
- hdr = (void *)mem;
- htable = (void *)(mem + sizeof(*hdr));
- ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize);
- remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize
- + sizeof(*ftable));
-
- /* Fill in the header */
- hdr->version = NTDB_VERSION;
- if (seed)
- hdr->hash_seed = seed->seed;
- else
- hdr->hash_seed = random_number(ntdb);
- hdr->hash_test = NTDB_HASH_MAGIC;
- hdr->hash_test = ntdb->hash_fn(&hdr->hash_test,
- sizeof(hdr->hash_test),
- hdr->hash_seed,
- ntdb->hash_data);
- hdr->hash_bits = ntdb->hash_bits;
- hdr->recovery = 0;
- hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK;
- hdr->seqnum = 0;
- hdr->capabilities = 0;
- memset(hdr->reserved, 0, sizeof(hdr->reserved));
-
- /* Hash is all zero after header. */
- set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize);
- memset(htable + 1, 0, hashsize);
-
- /* Free is empty. */
- hdr->free_table = (char *)ftable - (char *)hdr;
- memset(ftable, 0, sizeof(*ftable));
- ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
- sizeof(*ftable) - sizeof(ftable->hdr),
- sizeof(*ftable) - sizeof(ftable->hdr));
- if (ecode != NTDB_SUCCESS) {
- goto out;
- }
-
- /* Rest of database is a free record, containing junk. */
- remaindersize = dbsize - hdrsize;
- remainder->ftable_and_len
- = (remaindersize + sizeof(*remainder)
- - sizeof(struct ntdb_used_record));
- remainder->next = 0;
- remainder->magic_and_prev
- = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
- | ((char *)remainder - (char *)hdr);
- memset(remainder + 1, 0x43, remaindersize);
-
- /* Put in our single free entry. */
- ftable->buckets[size_to_bucket(remaindersize)] =
- (char *)remainder - (char *)hdr;
-
- /* Magic food */
- memset(hdr->magic_food, 0, sizeof(hdr->magic_food));
- strcpy(hdr->magic_food, NTDB_MAGIC_FOOD);
-
- /* This creates an endian-converted database, as if read from disk */
- magic_len = sizeof(hdr->magic_food);
- ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len);
-
- /* Return copy of header. */
- *rhdr = *hdr;
-
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb->file->map_size = dbsize;
- ntdb->file->map_ptr = hdr;
- return NTDB_SUCCESS;
- }
- if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_new_database:"
- " failed to seek: %s", strerror(errno));
- goto out;
- }
-
- if (ftruncate(ntdb->file->fd, 0) == -1) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_new_database:"
- " failed to truncate: %s", strerror(errno));
- goto out;
- }
-
- rlen = write(ntdb->file->fd, hdr, dbsize);
- if (rlen != dbsize) {
- if (rlen >= 0)
- errno = ENOSPC;
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_new_database: %zi writing header: %s",
- rlen, strerror(errno));
- goto out;
- }
-
-out:
- ntdb->free_fn(hdr, ntdb->alloc_data);
- return ecode;
-}
-
-static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
-{
- ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data);
- if (!ntdb->file)
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_open: cannot alloc ntdb_file structure");
- ntdb->file->num_lockrecs = 0;
- ntdb->file->lockrecs = NULL;
- ntdb->file->allrecord_lock.count = 0;
- ntdb->file->refcnt = 1;
- ntdb->file->map_ptr = NULL;
- ntdb->file->direct_count = 0;
- ntdb->file->old_mmaps = NULL;
- return NTDB_SUCCESS;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
- const union ntdb_attribute *attr)
-{
- switch (attr->base.attr) {
- case NTDB_ATTRIBUTE_LOG:
- ntdb->log_fn = attr->log.fn;
- ntdb->log_data = attr->log.data;
- break;
- case NTDB_ATTRIBUTE_HASH:
- case NTDB_ATTRIBUTE_SEED:
- case NTDB_ATTRIBUTE_OPENHOOK:
- case NTDB_ATTRIBUTE_HASHSIZE:
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_set_attribute:"
- " cannot set %s after opening",
- attr->base.attr == NTDB_ATTRIBUTE_HASH
- ? "NTDB_ATTRIBUTE_HASH"
- : attr->base.attr == NTDB_ATTRIBUTE_SEED
- ? "NTDB_ATTRIBUTE_SEED"
- : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK
- ? "NTDB_ATTRIBUTE_OPENHOOK"
- : "NTDB_ATTRIBUTE_HASHSIZE");
- case NTDB_ATTRIBUTE_STATS:
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_set_attribute:"
- " cannot set NTDB_ATTRIBUTE_STATS");
- case NTDB_ATTRIBUTE_FLOCK:
- ntdb->lock_fn = attr->flock.lock;
- ntdb->unlock_fn = attr->flock.unlock;
- ntdb->lock_data = attr->flock.data;
- break;
- case NTDB_ATTRIBUTE_ALLOCATOR:
- ntdb->alloc_fn = attr->alloc.alloc;
- ntdb->expand_fn = attr->alloc.expand;
- ntdb->free_fn = attr->alloc.free;
- ntdb->alloc_data = attr->alloc.priv_data;
- break;
- default:
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_set_attribute:"
- " unknown attribute type %u",
- attr->base.attr);
- }
- return NTDB_SUCCESS;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
- union ntdb_attribute *attr)
-{
- switch (attr->base.attr) {
- case NTDB_ATTRIBUTE_LOG:
- if (!ntdb->log_fn)
- return NTDB_ERR_NOEXIST;
- attr->log.fn = ntdb->log_fn;
- attr->log.data = ntdb->log_data;
- break;
- case NTDB_ATTRIBUTE_HASH:
- attr->hash.fn = ntdb->hash_fn;
- attr->hash.data = ntdb->hash_data;
- break;
- case NTDB_ATTRIBUTE_SEED:
- attr->seed.seed = ntdb->hash_seed;
- break;
- case NTDB_ATTRIBUTE_OPENHOOK:
- if (!ntdb->openhook)
- return NTDB_ERR_NOEXIST;
- attr->openhook.fn = ntdb->openhook;
- attr->openhook.data = ntdb->openhook_data;
- break;
- case NTDB_ATTRIBUTE_STATS: {
- size_t size = attr->stats.size;
- if (size > ntdb->stats.size)
- size = ntdb->stats.size;
- memcpy(&attr->stats, &ntdb->stats, size);
- break;
- }
- case NTDB_ATTRIBUTE_FLOCK:
- attr->flock.lock = ntdb->lock_fn;
- attr->flock.unlock = ntdb->unlock_fn;
- attr->flock.data = ntdb->lock_data;
- break;
- case NTDB_ATTRIBUTE_ALLOCATOR:
- attr->alloc.alloc = ntdb->alloc_fn;
- attr->alloc.expand = ntdb->expand_fn;
- attr->alloc.free = ntdb->free_fn;
- attr->alloc.priv_data = ntdb->alloc_data;
- break;
- case NTDB_ATTRIBUTE_HASHSIZE:
- attr->hashsize.size = 1 << ntdb->hash_bits;
- break;
- default:
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_get_attribute:"
- " unknown attribute type %u",
- attr->base.attr);
- }
- attr->base.next = NULL;
- return NTDB_SUCCESS;
-}
-
-_PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
- enum ntdb_attribute_type type)
-{
- switch (type) {
- case NTDB_ATTRIBUTE_LOG:
- ntdb->log_fn = NULL;
- break;
- case NTDB_ATTRIBUTE_OPENHOOK:
- ntdb->openhook = NULL;
- break;
- case NTDB_ATTRIBUTE_HASH:
- case NTDB_ATTRIBUTE_SEED:
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_unset_attribute: cannot unset %s after opening",
- type == NTDB_ATTRIBUTE_HASH
- ? "NTDB_ATTRIBUTE_HASH"
- : "NTDB_ATTRIBUTE_SEED");
- break;
- case NTDB_ATTRIBUTE_STATS:
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_unset_attribute:"
- "cannot unset NTDB_ATTRIBUTE_STATS");
- break;
- case NTDB_ATTRIBUTE_FLOCK:
- ntdb->lock_fn = ntdb_fcntl_lock;
- ntdb->unlock_fn = ntdb_fcntl_unlock;
- break;
- default:
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_unset_attribute: unknown attribute type %u",
- type);
- }
-}
-
-/* The top three bits of the capability tell us whether it matters. */
-enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
- ntdb_off_t type)
-{
- if (type & NTDB_CAP_NOOPEN) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "%s: file has unknown capability %llu",
- caller, type & NTDB_CAP_NOOPEN);
- }
-
- if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
- return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
- "%s: file has unknown capability %llu"
- " (cannot write to it)",
- caller, type & NTDB_CAP_NOOPEN);
- }
-
- if (type & NTDB_CAP_NOCHECK) {
- ntdb->flags |= NTDB_CANT_CHECK;
- }
- return NTDB_SUCCESS;
-}
-
-static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
- ntdb_off_t capabilities)
-{
- ntdb_off_t off, next;
- enum NTDB_ERROR ecode = NTDB_SUCCESS;
- const struct ntdb_capability *cap;
-
- /* Check capability list. */
- for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
- cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
- if (NTDB_PTR_IS_ERR(cap)) {
- return NTDB_PTR_ERR(cap);
- }
-
- switch (cap->type & NTDB_CAP_TYPE_MASK) {
- /* We don't understand any capabilities (yet). */
- default:
- ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
- }
- next = cap->next;
- ntdb_access_release(ntdb, cap);
- }
- return ecode;
-}
-
-static void *default_alloc(const void *owner, size_t len, void *priv_data)
-{
- return malloc(len);
-}
-
-static void *default_expand(void *ptr, size_t len, void *priv_data)
-{
- return realloc(ptr, len);
-}
-
-static void default_free(void *ptr, void *priv_data)
-{
- free(ptr);
-}
-
-/* First allocation needs manual search of attributes. */
-static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr,
- const char *name)
-{
- size_t len = sizeof(struct ntdb_context) + strlen(name) + 1;
-
- while (attr) {
- if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) {
- return attr->alloc.alloc(NULL, len,
- attr->alloc.priv_data);
- }
- attr = attr->base.next;
- }
- return default_alloc(NULL, len, NULL);
-}
-
-static unsigned int next_pow2(uint64_t size)
-{
- unsigned int bits = 1;
-
- while ((1ULL << bits) < size)
- bits++;
- return bits;
-}
-
-_PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
- int open_flags, mode_t mode,
- union ntdb_attribute *attr)
-{
- struct ntdb_context *ntdb;
- struct stat st;
- int saved_errno = 0;
- uint64_t hash_test;
- unsigned v;
- ssize_t rlen;
- struct ntdb_header hdr;
- struct ntdb_attribute_seed *seed = NULL;
- ntdb_bool_err berr;
- enum NTDB_ERROR ecode;
- int openlock;
-
- ntdb = alloc_ntdb(attr, name);
- if (!ntdb) {
- /* Can't log this */
- errno = ENOMEM;
- return NULL;
- }
- /* Set name immediately for logging functions. */
- ntdb->name = strcpy((char *)(ntdb + 1), name);
- ntdb->flags = ntdb_flags;
- ntdb->log_fn = NULL;
- ntdb->open_flags = open_flags;
- ntdb->file = NULL;
- ntdb->openhook = NULL;
- ntdb->lock_fn = ntdb_fcntl_lock;
- ntdb->unlock_fn = ntdb_fcntl_unlock;
- ntdb->hash_fn = ntdb_jenkins_hash;
- memset(&ntdb->stats, 0, sizeof(ntdb->stats));
- ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
- ntdb->stats.size = sizeof(ntdb->stats);
- ntdb->alloc_fn = default_alloc;
- ntdb->expand_fn = default_expand;
- ntdb->free_fn = default_free;
- ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */
-
- while (attr) {
- switch (attr->base.attr) {
- case NTDB_ATTRIBUTE_HASH:
- ntdb->hash_fn = attr->hash.fn;
- ntdb->hash_data = attr->hash.data;
- break;
- case NTDB_ATTRIBUTE_SEED:
- seed = &attr->seed;
- break;
- case NTDB_ATTRIBUTE_OPENHOOK:
- ntdb->openhook = attr->openhook.fn;
- ntdb->openhook_data = attr->openhook.data;
- break;
- case NTDB_ATTRIBUTE_HASHSIZE:
- ntdb->hash_bits = next_pow2(attr->hashsize.size);
- if (ntdb->hash_bits > 31) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_open: hash_size %u"
- " too large",
- attr->hashsize.size);
- goto fail;
- }
- break;
- default:
- /* These are set as normal. */
- ecode = ntdb_set_attribute(ntdb, attr);
- if (ecode != NTDB_SUCCESS)
- goto fail;
- }
- attr = attr->base.next;
- }
-
- if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
- | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
- | NTDB_RDONLY)) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_open: unknown flags %u", ntdb_flags);
- goto fail;
- }
-
- if (seed) {
- if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_open:"
- " cannot set NTDB_ATTRIBUTE_SEED"
- " without O_CREAT.");
- goto fail;
- }
- }
-
- if ((open_flags & O_ACCMODE) == O_WRONLY) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_open: can't open ntdb %s write-only",
- name);
- goto fail;
- }
-
- if ((open_flags & O_ACCMODE) == O_RDONLY) {
- openlock = F_RDLCK;
- ntdb->flags |= NTDB_RDONLY;
- } else {
- if (ntdb_flags & NTDB_RDONLY) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
- NTDB_LOG_USE_ERROR,
- "ntdb_open: can't use NTDB_RDONLY"
- " without O_RDONLY");
- goto fail;
- }
- openlock = F_WRLCK;
- }
-
- /* internal databases don't need any of the rest. */
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
- ecode = ntdb_new_file(ntdb);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- ntdb->file->fd = -1;
- ecode = ntdb_new_database(ntdb, seed, &hdr);
- if (ecode == NTDB_SUCCESS) {
- ntdb_convert(ntdb, &hdr.hash_seed,
- sizeof(hdr.hash_seed));
- ntdb->hash_seed = hdr.hash_seed;
- ntdb_context_init(ntdb);
- ntdb_ftable_init(ntdb);
- }
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- return ntdb;
- }
-
- if (stat(name, &st) != -1)
- ntdb->file = find_file(st.st_dev, st.st_ino);
-
- if (!ntdb->file) {
- ecode = ntdb_new_file(ntdb);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
-
- /* Set this now, as ntdb_nest_lock examines it. */
- ntdb->file->map_size = 0;
-
- if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) {
- enum ntdb_log_level lvl;
- /* errno set by open(2) */
- saved_errno = errno;
-
- /* Probing for files like this is a common pattern. */
- if (!(open_flags & O_CREAT) && errno == ENOENT) {
- lvl = NTDB_LOG_WARNING;
- } else {
- lvl = NTDB_LOG_ERROR;
- }
- ntdb_logerr(ntdb, NTDB_ERR_IO, lvl,
- "ntdb_open: could not open file %s: %s",
- name, strerror(errno));
-
- goto fail_errno;
- }
-
- /* ensure there is only one process initialising at once:
- * do it immediately to reduce the create/openlock race. */
- ecode = ntdb_lock_open(ntdb, openlock,
- NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
- if (ecode != NTDB_SUCCESS) {
- saved_errno = errno;
- goto fail_errno;
- }
-
- /* on exec, don't inherit the fd */
- v = fcntl(ntdb->file->fd, F_GETFD, 0);
- fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC);
-
- if (fstat(ntdb->file->fd, &st) == -1) {
- saved_errno = errno;
- ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open: could not stat open %s: %s",
- name, strerror(errno));
- goto fail_errno;
- }
-
- ntdb->file->device = st.st_dev;
- ntdb->file->inode = st.st_ino;
-
- /* call their open hook if they gave us one. */
- if (ntdb->openhook) {
- ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
- if (ecode != NTDB_SUCCESS) {
- ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_open: open hook failed");
- goto fail;
- }
- open_flags |= O_CREAT;
- }
- } else {
- /* ensure there is only one process initialising at once */
- ecode = ntdb_lock_open(ntdb, openlock,
- NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
- if (ecode != NTDB_SUCCESS) {
- saved_errno = errno;
- goto fail_errno;
- }
- }
-
- /* If they used O_TRUNC, read will return 0. */
- rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
- if (rlen == 0 && (open_flags & O_CREAT)) {
- ecode = ntdb_new_database(ntdb, seed, &hdr);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- } else if (rlen < 0) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open: error %s reading %s",
- strerror(errno), name);
- goto fail;
- } else if (rlen < sizeof(hdr)
- || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open: %s is not a ntdb file", name);
- goto fail;
- }
-
- if (hdr.version != NTDB_VERSION) {
- if (hdr.version == bswap_64(NTDB_VERSION))
- ntdb->flags |= NTDB_CONVERT;
- else {
- /* wrong version */
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open:"
- " %s is unknown version 0x%llx",
- name, (long long)hdr.version);
- goto fail;
- }
- } else if (ntdb->flags & NTDB_CONVERT) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open:"
- " %s does not need NTDB_CONVERT",
- name);
- goto fail;
- }
-
- ntdb_context_init(ntdb);
-
- ntdb_convert(ntdb, &hdr, sizeof(hdr));
- ntdb->hash_bits = hdr.hash_bits;
- ntdb->hash_seed = hdr.hash_seed;
- hash_test = NTDB_HASH_MAGIC;
- hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
- if (hdr.hash_test != hash_test) {
- /* wrong hash variant */
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open:"
- " %s uses a different hash function",
- name);
- goto fail;
- }
-
- ecode = capabilities_ok(ntdb, hdr.capabilities);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
-
- /* Clear any features we don't understand. */
- if ((open_flags & O_ACCMODE) != O_RDONLY) {
- hdr.features_used &= NTDB_FEATURE_MASK;
- ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
- features_used),
- &hdr.features_used,
- sizeof(hdr.features_used));
- if (ecode != NTDB_SUCCESS)
- goto fail;
- }
-
- ntdb_unlock_open(ntdb, openlock);
-
- /* This makes sure we have current map_size and mmap. */
- ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
- if (unlikely(ecode != NTDB_SUCCESS))
- goto fail;
-
- if (ntdb->file->map_size % NTDB_PGSIZE != 0) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open:"
- " %s size %llu isn't a multiple of %u",
- name, (long long)ntdb->file->map_size,
- NTDB_PGSIZE);
- goto fail;
- }
-
- /* Now it's fully formed, recover if necessary. */
- berr = ntdb_needs_recovery(ntdb);
- if (unlikely(berr != false)) {
- if (berr < 0) {
- ecode = NTDB_OFF_TO_ERR(berr);
- goto fail;
- }
- ecode = ntdb_lock_and_recover(ntdb);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- }
-
- ecode = ntdb_ftable_init(ntdb);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
-
- ntdb->next = tdbs;
- tdbs = ntdb;
- return ntdb;
-
- fail:
- /* Map ecode to some logical errno. */
- switch (NTDB_ERR_TO_OFF(ecode)) {
- case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
- case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
- saved_errno = EIO;
- break;
- case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
- saved_errno = EWOULDBLOCK;
- break;
- case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
- saved_errno = ENOMEM;
- break;
- case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
- saved_errno = EINVAL;
- break;
- default:
- saved_errno = EINVAL;
- break;
- }
-
-fail_errno:
-#ifdef NTDB_TRACE
- close(ntdb->tracefd);
-#endif
- if (ntdb->file) {
- ntdb_lock_cleanup(ntdb);
- if (--ntdb->file->refcnt == 0) {
- assert(ntdb->file->num_lockrecs == 0);
- if (ntdb->file->map_ptr) {
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb->free_fn(ntdb->file->map_ptr,
- ntdb->alloc_data);
- } else
- ntdb_munmap(ntdb);
- }
- if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0)
- ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_open: failed to close ntdb fd"
- " on error: %s", strerror(errno));
- ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
- ntdb->free_fn(ntdb->file, ntdb->alloc_data);
- }
- }
-
- ntdb->free_fn(ntdb, ntdb->alloc_data);
- errno = saved_errno;
- return NULL;
-}
-
-_PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
-{
- int ret = 0;
- struct ntdb_context **i;
-
- ntdb_trace(ntdb, "ntdb_close");
-
- if (ntdb->transaction) {
- ntdb_transaction_cancel(ntdb);
- }
-
- ntdb_lock_cleanup(ntdb);
- if (--ntdb->file->refcnt == 0) {
- if (ntdb->file->map_ptr) {
- if (ntdb->flags & NTDB_INTERNAL) {
- ntdb->free_fn(ntdb->file->map_ptr,
- ntdb->alloc_data);
- } else {
- ntdb_munmap(ntdb);
- }
- }
- ret = close(ntdb->file->fd);
- ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
- ntdb->free_fn(ntdb->file, ntdb->alloc_data);
- }
-
- /* Remove from tdbs list */
- for (i = &tdbs; *i; i = &(*i)->next) {
- if (*i == ntdb) {
- *i = ntdb->next;
- break;
- }
- }
-
-#ifdef NTDB_TRACE
- close(ntdb->tracefd);
-#endif
- ntdb->free_fn(ntdb, ntdb->alloc_data);
-
- return ret;
-}
-
-_PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
-{
- struct ntdb_context *i;
-
- for (i = tdbs; i; i = i->next) {
- if (fn(i, p) != 0)
- break;
- }
-}
+++ /dev/null
-#ifndef NTDB_PRIVATE_H
-#define NTDB_PRIVATE_H
-/*
- Trivial Database 2: private types and prototypes
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "config.h"
-#ifndef HAVE_CCAN
-#error You need ccan to build ntdb!
-#endif
-#include "ntdb.h"
-#include <ccan/compiler/compiler.h>
-#include <ccan/likely/likely.h>
-#include <ccan/endian/endian.h>
-
-#ifdef HAVE_LIBREPLACE
-#include "replace.h"
-#include "system/filesys.h"
-#include "system/time.h"
-#include "system/shmem.h"
-#include "system/select.h"
-#include "system/wait.h"
-#else
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <sys/time.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <utime.h>
-#include <unistd.h>
-#include <ctype.h>
-#include <string.h>
-#include <sys/wait.h>
-#include <time.h>
-#endif
-#include <assert.h>
-
-#ifndef TEST_IT
-#define TEST_IT(cond)
-#endif
-
-/* #define NTDB_TRACE 1 */
-
-#ifndef __STRING
-#define __STRING(x) #x
-#endif
-
-#ifndef __STRINGSTRING
-#define __STRINGSTRING(x) __STRING(x)
-#endif
-
-#ifndef __location__
-#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
-#endif
-
-typedef uint64_t ntdb_len_t;
-typedef uint64_t ntdb_off_t;
-
-#define NTDB_MAGIC_FOOD "NTDB file\n"
-#define NTDB_VERSION ((uint64_t)(0x26011967 + 7))
-#define NTDB_USED_MAGIC ((uint64_t)0x1999)
-#define NTDB_HTABLE_MAGIC ((uint64_t)0x1888)
-#define NTDB_CHAIN_MAGIC ((uint64_t)0x1777)
-#define NTDB_FTABLE_MAGIC ((uint64_t)0x1666)
-#define NTDB_CAP_MAGIC ((uint64_t)0x1555)
-#define NTDB_FREE_MAGIC ((uint64_t)0xFE)
-#define NTDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
-#define NTDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
-#define NTDB_RECOVERY_INVALID_MAGIC (0x0ULL)
-
-/* Capability bits. */
-#define NTDB_CAP_TYPE_MASK 0x1FFFFFFFFFFFFFFFULL
-#define NTDB_CAP_NOCHECK 0x8000000000000000ULL
-#define NTDB_CAP_NOWRITE 0x4000000000000000ULL
-#define NTDB_CAP_NOOPEN 0x2000000000000000ULL
-
-#define NTDB_OFF_IS_ERR(off) unlikely(off >= (ntdb_off_t)(long)NTDB_ERR_LAST)
-#define NTDB_OFF_TO_ERR(off) ((enum NTDB_ERROR)(long)(off))
-#define NTDB_ERR_TO_OFF(ecode) ((ntdb_off_t)(long)(ecode))
-
-/* Packing errors into pointers and v.v. */
-#define NTDB_PTR_IS_ERR(ptr) \
- unlikely((unsigned long)(ptr) >= (unsigned long)NTDB_ERR_LAST)
-#define NTDB_PTR_ERR(p) ((enum NTDB_ERROR)(long)(p))
-#define NTDB_ERR_PTR(err) ((void *)(long)(err))
-
-/* This doesn't really need to be pagesize, but we use it for similar
- * reasons. */
-#define NTDB_PGSIZE 16384
-
-/* Common case of returning true, false or -ve error. */
-typedef int ntdb_bool_err;
-
-/* Prevent others from opening the file. */
-#define NTDB_OPEN_LOCK 0
-/* Expanding file. */
-#define NTDB_EXPANSION_LOCK 2
-/* Doing a transaction. */
-#define NTDB_TRANSACTION_LOCK 8
-/* Hash chain locks. */
-#define NTDB_HASH_LOCK_START 64
-
-/* Extend file by least 100 times larger than needed. */
-#define NTDB_EXTENSION_FACTOR 100
-
-/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
-#define NTDB_OFF_UPPER_STEAL 8
-
-/* And we use the lower bit, too. */
-#define NTDB_OFF_CHAIN_BIT 0
-
-/* Hash table sits just after the header. */
-#define NTDB_HASH_OFFSET (sizeof(struct ntdb_header))
-
-/* Additional features we understand. Currently: none. */
-#define NTDB_FEATURE_MASK ((uint64_t)0)
-
-/* The bit number where we store the extra hash bits. */
-/* Convenience mask to get actual offset. */
-#define NTDB_OFF_MASK \
- (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - (1<<NTDB_OFF_CHAIN_BIT))
-
-/* How many buckets in a free list: see size_to_bucket(). */
-#define NTDB_FREE_BUCKETS (64 - NTDB_OFF_UPPER_STEAL)
-
-/* We have to be able to fit a free record here. */
-#define NTDB_MIN_DATA_LEN \
- (sizeof(struct ntdb_free_record) - sizeof(struct ntdb_used_record))
-
-/* Indicates this entry is not on an flist (can happen during coalescing) */
-#define NTDB_FTABLE_NONE ((1ULL << NTDB_OFF_UPPER_STEAL) - 1)
-
-/* By default, hash is 64k bytes */
-#define NTDB_DEFAULT_HBITS 13
-
-struct ntdb_used_record {
- /* For on-disk compatibility, we avoid bitfields:
- magic: 16, (highest)
- key_len_bits: 5,
- extra_padding: 32
- */
- uint64_t magic_and_meta;
- /* The bottom key_len_bits*2 are key length, rest is data length. */
- uint64_t key_and_data_len;
-};
-
-static inline unsigned rec_key_bits(const struct ntdb_used_record *r)
-{
- return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
-}
-
-static inline uint64_t rec_key_length(const struct ntdb_used_record *r)
-{
- return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
-}
-
-static inline uint64_t rec_data_length(const struct ntdb_used_record *r)
-{
- return r->key_and_data_len >> rec_key_bits(r);
-}
-
-static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r)
-{
- return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
-}
-
-static inline uint16_t rec_magic(const struct ntdb_used_record *r)
-{
- return (r->magic_and_meta >> 48);
-}
-
-struct ntdb_free_record {
- uint64_t magic_and_prev; /* NTDB_OFF_UPPER_STEAL bits magic, then prev */
- uint64_t ftable_and_len; /* Len not counting these two fields. */
- /* This is why the minimum record size is 8 bytes. */
- uint64_t next;
-};
-
-static inline uint64_t frec_prev(const struct ntdb_free_record *f)
-{
- return f->magic_and_prev & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1);
-}
-
-static inline uint64_t frec_magic(const struct ntdb_free_record *f)
-{
- return f->magic_and_prev >> (64 - NTDB_OFF_UPPER_STEAL);
-}
-
-static inline uint64_t frec_len(const struct ntdb_free_record *f)
-{
- return f->ftable_and_len & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL))-1);
-}
-
-static inline unsigned frec_ftable(const struct ntdb_free_record *f)
-{
- return f->ftable_and_len >> (64 - NTDB_OFF_UPPER_STEAL);
-}
-
-struct ntdb_recovery_record {
- uint64_t magic;
- /* Length of record (add this header to get total length). */
- uint64_t max_len;
- /* Length used. */
- uint64_t len;
- /* Old length of file before transaction. */
- uint64_t eof;
-};
-
-/* this is stored at the front of every database */
-struct ntdb_header {
- char magic_food[64]; /* for /etc/magic */
- /* FIXME: Make me 32 bit? */
- uint64_t version; /* version of the code */
- uint64_t hash_bits; /* bits for toplevel hash table. */
- uint64_t hash_test; /* result of hashing HASH_MAGIC. */
- uint64_t hash_seed; /* "random" seed written at creation time. */
- ntdb_off_t free_table; /* (First) free table. */
- ntdb_off_t recovery; /* Transaction recovery area. */
-
- uint64_t features_used; /* Features all writers understand */
- uint64_t features_offered; /* Features offered */
-
- uint64_t seqnum; /* Sequence number for NTDB_SEQNUM */
-
- ntdb_off_t capabilities; /* Optional linked list of capabilities. */
- ntdb_off_t reserved[22];
-
- /*
- * Hash table is next:
- *
- * struct ntdb_used_record htable_hdr;
- * ntdb_off_t htable[1 << hash_bits];
- */
-};
-
-struct ntdb_freetable {
- struct ntdb_used_record hdr;
- ntdb_off_t next;
- ntdb_off_t buckets[NTDB_FREE_BUCKETS];
-};
-
-struct ntdb_capability {
- struct ntdb_used_record hdr;
- ntdb_off_t type;
- ntdb_off_t next;
- /* ... */
-};
-
-/* Information about a particular (locked) hash entry. */
-struct hash_info {
- /* Full hash value of entry. */
- uint32_t h;
- /* Start of hash table / chain. */
- ntdb_off_t table;
- /* Number of entries in this table/chain. */
- ntdb_off_t table_size;
- /* Bucket we (or an empty space) were found in. */
- ntdb_off_t bucket;
- /* Old value that was in that entry (if not found) */
- ntdb_off_t old_val;
-};
-
-enum ntdb_lock_flags {
- /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
- NTDB_LOCK_NOWAIT = 0,
- NTDB_LOCK_WAIT = 1,
- /* If set, don't log an error on failure. */
- NTDB_LOCK_PROBE = 2,
- /* If set, don't check for recovery (used by recovery code). */
- NTDB_LOCK_NOCHECK = 4,
-};
-
-struct ntdb_lock {
- struct ntdb_context *owner;
- off_t off;
- uint32_t count;
- uint32_t ltype;
-};
-
-/* This is only needed for ntdb_access_commit, but used everywhere to
- * simplify. */
-struct ntdb_access_hdr {
- struct ntdb_access_hdr *next;
- ntdb_off_t off;
- ntdb_len_t len;
- bool convert;
-};
-
-/* mmaps we are keeping around because they are still direct accessed */
-struct ntdb_old_mmap {
- struct ntdb_old_mmap *next;
-
- void *map_ptr;
- ntdb_len_t map_size;
-};
-
-struct ntdb_file {
- /* How many are sharing us? */
- unsigned int refcnt;
-
- /* Mmap (if any), or malloc (for NTDB_INTERNAL). */
- void *map_ptr;
-
- /* How much space has been mapped (<= current file size) */
- ntdb_len_t map_size;
-
- /* The file descriptor (-1 for NTDB_INTERNAL). */
- int fd;
-
- /* How many are accessing directly? */
- unsigned int direct_count;
-
- /* Old maps, still direct accessed. */
- struct ntdb_old_mmap *old_mmaps;
-
- /* Lock information */
- pid_t locker;
- struct ntdb_lock allrecord_lock;
- size_t num_lockrecs;
- struct ntdb_lock *lockrecs;
-
- /* Identity of this file. */
- dev_t device;
- ino_t inode;
-};
-
-struct ntdb_methods {
- enum NTDB_ERROR (*tread)(struct ntdb_context *, ntdb_off_t, void *,
- ntdb_len_t);
- enum NTDB_ERROR (*twrite)(struct ntdb_context *, ntdb_off_t, const void *,
- ntdb_len_t);
- enum NTDB_ERROR (*oob)(struct ntdb_context *, ntdb_off_t, ntdb_len_t, bool);
- enum NTDB_ERROR (*expand_file)(struct ntdb_context *, ntdb_len_t);
- void *(*direct)(struct ntdb_context *, ntdb_off_t, size_t, bool);
- ntdb_off_t (*read_off)(struct ntdb_context *ntdb, ntdb_off_t off);
- enum NTDB_ERROR (*write_off)(struct ntdb_context *ntdb, ntdb_off_t off,
- ntdb_off_t val);
-};
-
-/*
- internal prototypes
-*/
-/* Get bits from a value. */
-static inline uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
-{
- assert(num <= 32);
- return (val >> start) & ((1U << num) - 1);
-}
-
-
-/* hash.c: */
-uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
- void *unused);
-
-enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA *kbuf, size_t *dlen);
-
-enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA *kbuf, size_t *dlen);
-
-/* Hash random memory. */
-uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len);
-
-/* Find and lock a hash entry (or where it would be). */
-ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
- NTDB_DATA key,
- int ltype,
- struct hash_info *h,
- struct ntdb_used_record *rec,
- const char **rkey);
-
-enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
- const struct hash_info *h,
- ntdb_off_t new_off);
-
-enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
- const struct hash_info *h,
- ntdb_off_t new_off);
-
-enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
- const struct hash_info *h);
-
-/* For ntdb_check */
-bool is_subhash(ntdb_off_t val);
-enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
- ntdb_off_t type);
-
-/* free.c: */
-enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb);
-
-/* check.c needs these to iterate through free lists. */
-ntdb_off_t first_ftable(struct ntdb_context *ntdb);
-ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable);
-
-/* This returns space or -ve error number. */
-ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
- unsigned magic, bool growing);
-
-/* Put this record in a free list. */
-enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len_with_header,
- enum ntdb_lock_flags waitflag,
- bool coalesce_ok);
-
-/* Set up header for a used/ftable/htable/chain/capability record. */
-enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
- struct ntdb_used_record *rec,
- unsigned magic, uint64_t keylen, uint64_t datalen,
- uint64_t actuallen);
-
-/* Used by ntdb_check to verify. */
-unsigned int size_to_bucket(ntdb_len_t data_len);
-ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket);
-
-/* Used by ntdb_summary */
-ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off);
-
-/* Adjust expansion, used by create_recovery_area */
-ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size);
-
-/* io.c: */
-/* Initialize ntdb->methods. */
-void ntdb_io_init(struct ntdb_context *ntdb);
-
-/* Convert endian of the buffer if required. */
-void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size);
-
-/* Unmap and try to map the ntdb. */
-enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb);
-enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb);
-
-/* Either alloc a copy, or give direct access. Release frees or noop. */
-const void *ntdb_access_read(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len, bool convert);
-void *ntdb_access_write(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len, bool convert);
-
-/* Release result of ntdb_access_read/write. */
-void ntdb_access_release(struct ntdb_context *ntdb, const void *p);
-/* Commit result of ntdb_acces_write. */
-enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p);
-
-/* Clear an ondisk area. */
-enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len);
-
-/* Return a non-zero offset between >= start < end in this array (or end). */
-ntdb_off_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
- ntdb_off_t base,
- uint64_t start,
- uint64_t end);
-
-/* Return a zero offset in this array, or num. */
-ntdb_off_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
- uint64_t num);
-
-/* Allocate and make a copy of some offset. */
-void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len);
-
-/* Writes a converted copy of a record. */
-enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
- const void *rec, size_t len);
-
-/* Reads record and converts it */
-enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
- void *rec, size_t len);
-
-/* Bump the seqnum (caller checks for ntdb->flags & NTDB_SEQNUM) */
-void ntdb_inc_seqnum(struct ntdb_context *ntdb);
-
-/* lock.c: */
-/* Print message because another ntdb owns a lock we want. */
-enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call);
-
-/* If we fork, we no longer really own locks. */
-bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log);
-
-/* Lock/unlock a hash bucket. */
-enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
- unsigned int hbucket,
- int ltype);
-enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
- unsigned int hash, int ltype);
-
-/* For closing the file. */
-void ntdb_lock_cleanup(struct ntdb_context *ntdb);
-
-/* Lock/unlock a particular free bucket. */
-enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
- enum ntdb_lock_flags waitflag);
-void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off);
-
-/* Serialize transaction start. */
-enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype);
-void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype);
-
-/* Do we have any hash locks (ie. via ntdb_chainlock) ? */
-bool ntdb_has_hash_locks(struct ntdb_context *ntdb);
-
-/* Lock entire database. */
-enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
- enum ntdb_lock_flags flags, bool upgradable);
-void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype);
-enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start);
-
-/* Serialize db open. */
-enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
- int ltype, enum ntdb_lock_flags flags);
-void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype);
-bool ntdb_has_open_lock(struct ntdb_context *ntdb);
-
-/* Serialize db expand. */
-enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype);
-void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype);
-bool ntdb_has_expansion_lock(struct ntdb_context *ntdb);
-
-/* If it needs recovery, grab all the locks and do it. */
-enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb);
-
-/* Default lock and unlock functions. */
-int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *);
-int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *);
-
-/* transaction.c: */
-enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb);
-ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb);
-
-struct ntdb_context {
- /* Single list of all TDBs, to detect multiple opens. */
- struct ntdb_context *next;
-
- /* Filename of the database. */
- const char *name;
-
- /* Logging function */
- void (*log_fn)(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data);
- void *log_data;
-
- /* Open flags passed to ntdb_open. */
- int open_flags;
-
- /* low level (fnctl) lock functions. */
- int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *);
- int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *);
- void *lock_data;
-
- /* the ntdb flags passed to ntdb_open. */
- uint32_t flags;
-
- /* Our statistics. */
- struct ntdb_attribute_stats stats;
-
- /* The actual file information */
- struct ntdb_file *file;
-
- /* Hash function. */
- uint32_t (*hash_fn)(const void *key, size_t len, uint32_t seed, void *);
- void *hash_data;
- uint32_t hash_seed;
- /* Bits in toplevel hash table. */
- unsigned int hash_bits;
-
- /* Allocate and free functions. */
- void *(*alloc_fn)(const void *owner, size_t len, void *priv_data);
- void *(*expand_fn)(void *old, size_t newlen, void *priv_data);
- void (*free_fn)(void *old, void *priv_data);
- void *alloc_data;
-
- /* Our open hook, if any. */
- enum NTDB_ERROR (*openhook)(int fd, void *data);
- void *openhook_data;
-
- /* Set if we are in a transaction. */
- struct ntdb_transaction *transaction;
-
- /* What free table are we using? */
- ntdb_off_t ftable_off;
- unsigned int ftable;
-
- /* IO methods: changes for transactions. */
- const struct ntdb_methods *io;
-
- /* Direct access information */
- struct ntdb_access_hdr *access;
-};
-
-/* ntdb.c: */
-enum NTDB_ERROR COLD PRINTF_FMT(4, 5)
- ntdb_logerr(struct ntdb_context *ntdb,
- enum NTDB_ERROR ecode,
- enum ntdb_log_level level,
- const char *fmt, ...);
-
-static inline enum NTDB_ERROR ntdb_oob(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len,
- bool probe)
-{
- if (likely(off + len >= off)
- && likely(off + len <= ntdb->file->map_size)
- && likely(!probe)) {
- return NTDB_SUCCESS;
- }
- return ntdb->io->oob(ntdb, off, len, probe);
-}
-
-/* Convenience routine to get an offset. */
-static inline ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb,
- ntdb_off_t off)
-{
- return ntdb->io->read_off(ntdb, off);
-}
-
-/* Write an offset at an offset. */
-static inline enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb,
- ntdb_off_t off,
- ntdb_off_t val)
-{
- return ntdb->io->write_off(ntdb, off, val);
-}
-
-#ifdef NTDB_TRACE
-void ntdb_trace(struct ntdb_context *ntdb, const char *op);
-void ntdb_trace_seqnum(struct ntdb_context *ntdb, uint32_t seqnum, const char *op);
-void ntdb_trace_open(struct ntdb_context *ntdb, const char *op,
- unsigned hash_size, unsigned ntdb_flags, unsigned open_flags);
-void ntdb_trace_ret(struct ntdb_context *ntdb, const char *op, int ret);
-void ntdb_trace_retrec(struct ntdb_context *ntdb, const char *op, NTDB_DATA ret);
-void ntdb_trace_1rec(struct ntdb_context *ntdb, const char *op,
- NTDB_DATA rec);
-void ntdb_trace_1rec_ret(struct ntdb_context *ntdb, const char *op,
- NTDB_DATA rec, int ret);
-void ntdb_trace_1rec_retrec(struct ntdb_context *ntdb, const char *op,
- NTDB_DATA rec, NTDB_DATA ret);
-void ntdb_trace_2rec_flag_ret(struct ntdb_context *ntdb, const char *op,
- NTDB_DATA rec1, NTDB_DATA rec2, unsigned flag,
- int ret);
-void ntdb_trace_2rec_retrec(struct ntdb_context *ntdb, const char *op,
- NTDB_DATA rec1, NTDB_DATA rec2, NTDB_DATA ret);
-#else
-#define ntdb_trace(ntdb, op)
-#define ntdb_trace_seqnum(ntdb, seqnum, op)
-#define ntdb_trace_open(ntdb, op, hash_size, ntdb_flags, open_flags)
-#define ntdb_trace_ret(ntdb, op, ret)
-#define ntdb_trace_retrec(ntdb, op, ret)
-#define ntdb_trace_1rec(ntdb, op, rec)
-#define ntdb_trace_1rec_ret(ntdb, op, rec, ret)
-#define ntdb_trace_1rec_retrec(ntdb, op, rec, ret)
-#define ntdb_trace_2rec_flag_ret(ntdb, op, rec1, rec2, flag, ret)
-#define ntdb_trace_2rec_retrec(ntdb, op, rec1, rec2, ret)
-#endif /* !NTDB_TRACE */
-
-#endif
+++ /dev/null
-/*
- Unix SMB/CIFS implementation.
-
- Python interface to ntdb. Simply modified from tdb version.
-
- Copyright (C) 2004-2006 Tim Potter <tpot@samba.org>
- Copyright (C) 2007-2008 Jelmer Vernooij <jelmer@samba.org>
- Copyright (C) 2011 Rusty Russell <rusty@rustcorp.com.au>
-
- ** NOTE! The following LGPL license applies to the ntdb
- ** library. This does NOT imply that all of Samba is released
- ** under the LGPL
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-#include "replace.h"
-#include "system/filesys.h"
-
-/* Include ntdb headers */
-#include <ntdb.h>
-
-typedef struct {
- PyObject_HEAD
- struct ntdb_context *ctx;
- bool closed;
-} PyNtdbObject;
-
-static PyTypeObject PyNtdb;
-
-static void PyErr_SetTDBError(enum NTDB_ERROR e)
-{
- PyErr_SetObject(PyExc_RuntimeError,
- Py_BuildValue("(i,s)", e, ntdb_errorstr(e)));
-}
-
-static NTDB_DATA PyString_AsNtdb_Data(PyObject *data)
-{
- NTDB_DATA ret;
- ret.dptr = (unsigned char *)PyString_AsString(data);
- ret.dsize = PyString_Size(data);
- return ret;
-}
-
-static PyObject *PyString_FromNtdb_Data(NTDB_DATA data)
-{
- PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr,
- data.dsize);
- free(data.dptr);
- return ret;
-}
-
-#define PyErr_NTDB_ERROR_IS_ERR_RAISE(ret) \
- if (ret != NTDB_SUCCESS) { \
- PyErr_SetTDBError(ret); \
- return NULL; \
- }
-
-#define PyNtdb_CHECK_CLOSED(pyobj) \
- if (pyobj->closed) {\
- PyErr_SetObject(PyExc_RuntimeError, \
- Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed")); \
- return NULL; \
- }
-
-static void stderr_log(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
-{
- fprintf(stderr, "%s:%s:%s\n",
- ntdb_name(ntdb), ntdb_errorstr(ecode), message);
-}
-
-static PyObject *py_ntdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs)
-{
- char *name = NULL;
- int ntdb_flags = NTDB_DEFAULT, flags = O_RDWR, mode = 0600;
- struct ntdb_context *ctx;
- PyNtdbObject *ret;
- union ntdb_attribute logattr;
- const char *kwnames[] = { "name", "ntdb_flags", "flags", "mode", NULL };
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", cast_const2(char **, kwnames), &name, &ntdb_flags, &flags, &mode))
- return NULL;
-
- if (name == NULL) {
- ntdb_flags |= NTDB_INTERNAL;
- name = "<internal>";
- }
-
- logattr.log.base.attr = NTDB_ATTRIBUTE_LOG;
- logattr.log.base.next = NULL;
- logattr.log.fn = stderr_log;
- ctx = ntdb_open(name, ntdb_flags, flags, mode, &logattr);
- if (ctx == NULL) {
- PyErr_SetFromErrno(PyExc_IOError);
- return NULL;
- }
-
- ret = PyObject_New(PyNtdbObject, &PyNtdb);
- if (!ret) {
- ntdb_close(ctx);
- return NULL;
- }
-
- ret->ctx = ctx;
- ret->closed = false;
- return (PyObject *)ret;
-}
-
-static PyObject *obj_transaction_cancel(PyNtdbObject *self)
-{
- PyNtdb_CHECK_CLOSED(self);
- ntdb_transaction_cancel(self->ctx);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_transaction_commit(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- PyNtdb_CHECK_CLOSED(self);
- ret = ntdb_transaction_commit(self->ctx);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_transaction_prepare_commit(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- PyNtdb_CHECK_CLOSED(self);
- ret = ntdb_transaction_prepare_commit(self->ctx);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_transaction_start(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- PyNtdb_CHECK_CLOSED(self);
- ret = ntdb_transaction_start(self->ctx);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_lockall(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- PyNtdb_CHECK_CLOSED(self);
- ret = ntdb_lockall(self->ctx);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_unlockall(PyNtdbObject *self)
-{
- PyNtdb_CHECK_CLOSED(self);
- ntdb_unlockall(self->ctx);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_lockall_read(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- PyNtdb_CHECK_CLOSED(self);
- ret = ntdb_lockall_read(self->ctx);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_unlockall_read(PyNtdbObject *self)
-{
- PyNtdb_CHECK_CLOSED(self);
- ntdb_unlockall_read(self->ctx);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_close(PyNtdbObject *self)
-{
- int ret;
- if (self->closed)
- Py_RETURN_NONE;
- ret = ntdb_close(self->ctx);
- self->closed = true;
- if (ret != 0) {
- PyErr_SetTDBError(NTDB_ERR_IO);
- return NULL;
- }
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_get(PyNtdbObject *self, PyObject *args)
-{
- NTDB_DATA key, data;
- PyObject *py_key;
- enum NTDB_ERROR ret;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "O", &py_key))
- return NULL;
-
- key = PyString_AsNtdb_Data(py_key);
- ret = ntdb_fetch(self->ctx, key, &data);
- if (ret == NTDB_ERR_NOEXIST)
- Py_RETURN_NONE;
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- return PyString_FromNtdb_Data(data);
-}
-
-static PyObject *obj_append(PyNtdbObject *self, PyObject *args)
-{
- NTDB_DATA key, data;
- PyObject *py_key, *py_data;
- enum NTDB_ERROR ret;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data))
- return NULL;
-
- key = PyString_AsNtdb_Data(py_key);
- data = PyString_AsNtdb_Data(py_data);
-
- ret = ntdb_append(self->ctx, key, data);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_firstkey(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- NTDB_DATA key;
-
- PyNtdb_CHECK_CLOSED(self);
-
- ret = ntdb_firstkey(self->ctx, &key);
- if (ret == NTDB_ERR_NOEXIST)
- Py_RETURN_NONE;
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
-
- return PyString_FromNtdb_Data(key);
-}
-
-static PyObject *obj_nextkey(PyNtdbObject *self, PyObject *args)
-{
- NTDB_DATA key;
- PyObject *py_key;
- enum NTDB_ERROR ret;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "O", &py_key))
- return NULL;
-
- /* Malloc here, since ntdb_nextkey frees. */
- key.dsize = PyString_Size(py_key);
- key.dptr = malloc(key.dsize);
- memcpy(key.dptr, PyString_AsString(py_key), key.dsize);
-
- ret = ntdb_nextkey(self->ctx, &key);
- if (ret == NTDB_ERR_NOEXIST)
- Py_RETURN_NONE;
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
-
- return PyString_FromNtdb_Data(key);
-}
-
-static PyObject *obj_delete(PyNtdbObject *self, PyObject *args)
-{
- NTDB_DATA key;
- PyObject *py_key;
- enum NTDB_ERROR ret;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "O", &py_key))
- return NULL;
-
- key = PyString_AsNtdb_Data(py_key);
- ret = ntdb_delete(self->ctx, key);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_has_key(PyNtdbObject *self, PyObject *args)
-{
- NTDB_DATA key;
- PyObject *py_key;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "O", &py_key))
- return NULL;
-
- key = PyString_AsNtdb_Data(py_key);
- if (ntdb_exists(self->ctx, key))
- return Py_True;
- return Py_False;
-}
-
-static PyObject *obj_store(PyNtdbObject *self, PyObject *args)
-{
- NTDB_DATA key, value;
- enum NTDB_ERROR ret;
- int flag = NTDB_REPLACE;
- PyObject *py_key, *py_value;
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag))
- return NULL;
-
- key = PyString_AsNtdb_Data(py_key);
- value = PyString_AsNtdb_Data(py_value);
-
- ret = ntdb_store(self->ctx, key, value, flag);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_add_flag(PyNtdbObject *self, PyObject *args)
-{
- unsigned flag;
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "I", &flag))
- return NULL;
-
- ntdb_add_flag(self->ctx, flag);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_remove_flag(PyNtdbObject *self, PyObject *args)
-{
- unsigned flag;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyArg_ParseTuple(args, "I", &flag))
- return NULL;
-
- ntdb_remove_flag(self->ctx, flag);
- Py_RETURN_NONE;
-}
-
-typedef struct {
- PyObject_HEAD
- NTDB_DATA current;
- bool end;
- PyNtdbObject *iteratee;
-} PyNtdbIteratorObject;
-
-static PyObject *ntdb_iter_next(PyNtdbIteratorObject *self)
-{
- enum NTDB_ERROR e;
- PyObject *ret;
- if (self->end)
- return NULL;
- ret = PyString_FromStringAndSize((const char *)self->current.dptr,
- self->current.dsize);
- e = ntdb_nextkey(self->iteratee->ctx, &self->current);
- if (e == NTDB_ERR_NOEXIST)
- self->end = true;
- else
- PyErr_NTDB_ERROR_IS_ERR_RAISE(e);
- return ret;
-}
-
-static void ntdb_iter_dealloc(PyNtdbIteratorObject *self)
-{
- Py_DECREF(self->iteratee);
- PyObject_Del(self);
-}
-
-PyTypeObject PyNtdbIterator = {
- .tp_name = "Iterator",
- .tp_basicsize = sizeof(PyNtdbIteratorObject),
- .tp_iternext = (iternextfunc)ntdb_iter_next,
- .tp_dealloc = (destructor)ntdb_iter_dealloc,
- .tp_flags = Py_TPFLAGS_DEFAULT,
- .tp_iter = PyObject_SelfIter,
-};
-
-static PyObject *ntdb_object_iter(PyNtdbObject *self)
-{
- PyNtdbIteratorObject *ret;
- enum NTDB_ERROR e;
- PyNtdb_CHECK_CLOSED(self);
-
- ret = PyObject_New(PyNtdbIteratorObject, &PyNtdbIterator);
- if (!ret)
- return NULL;
- e = ntdb_firstkey(self->ctx, &ret->current);
- if (e == NTDB_ERR_NOEXIST) {
- ret->end = true;
- } else {
- PyErr_NTDB_ERROR_IS_ERR_RAISE(e);
- ret->end = false;
- }
- ret->iteratee = self;
- Py_INCREF(self);
- return (PyObject *)ret;
-}
-
-static PyObject *obj_clear(PyNtdbObject *self)
-{
- enum NTDB_ERROR ret;
- PyNtdb_CHECK_CLOSED(self);
- ret = ntdb_wipe_all(self->ctx);
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- Py_RETURN_NONE;
-}
-
-static PyObject *obj_enable_seqnum(PyNtdbObject *self)
-{
- PyNtdb_CHECK_CLOSED(self);
- ntdb_add_flag(self->ctx, NTDB_SEQNUM);
- Py_RETURN_NONE;
-}
-
-static PyMethodDef ntdb_object_methods[] = {
- { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS,
- "S.transaction_cancel() -> None\n"
- "Cancel the currently active transaction." },
- { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS,
- "S.transaction_commit() -> None\n"
- "Commit the currently active transaction." },
- { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS,
- "S.transaction_prepare_commit() -> None\n"
- "Prepare to commit the currently active transaction" },
- { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS,
- "S.transaction_start() -> None\n"
- "Start a new transaction." },
- { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL },
- { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL },
- { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL },
- { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL },
- { "close", (PyCFunction)obj_close, METH_NOARGS, NULL },
- { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n"
- "Fetch a value." },
- { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n"
- "Append data to an existing key." },
- { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n"
- "Return the first key in this database." },
- { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n"
- "Return the next key in this database." },
- { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n"
- "Delete an entry." },
- { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n"
- "Check whether key exists in this database." },
- { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None"
- "Store data." },
- { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" },
- { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" },
- { "iterkeys", (PyCFunction)ntdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" },
- { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n"
- "Wipe the entire database." },
- { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS,
- "S.enable_seqnum() -> None" },
- { NULL }
-};
-
-static PyObject *obj_get_flags(PyNtdbObject *self, void *closure)
-{
- PyNtdb_CHECK_CLOSED(self);
- return PyInt_FromLong(ntdb_get_flags(self->ctx));
-}
-
-static PyObject *obj_get_filename(PyNtdbObject *self, void *closure)
-{
- PyNtdb_CHECK_CLOSED(self);
- return PyString_FromString(ntdb_name(self->ctx));
-}
-
-static PyObject *obj_get_seqnum(PyNtdbObject *self, void *closure)
-{
- PyNtdb_CHECK_CLOSED(self);
- return PyInt_FromLong(ntdb_get_seqnum(self->ctx));
-}
-
-
-static PyGetSetDef ntdb_object_getsetters[] = {
- { cast_const(char *, "flags"), (getter)obj_get_flags, NULL, NULL },
- { cast_const(char *, "filename"), (getter)obj_get_filename, NULL,
- cast_const(char *, "The filename of this NTDB file.")},
- { cast_const(char *, "seqnum"), (getter)obj_get_seqnum, NULL, NULL },
- { NULL }
-};
-
-static PyObject *ntdb_object_repr(PyNtdbObject *self)
-{
- if (ntdb_get_flags(self->ctx) & NTDB_INTERNAL) {
- return PyString_FromString("Ntdb(<internal>)");
- } else {
- return PyString_FromFormat("Ntdb('%s')", ntdb_name(self->ctx));
- }
-}
-
-static void ntdb_object_dealloc(PyNtdbObject *self)
-{
- if (!self->closed)
- ntdb_close(self->ctx);
- self->ob_type->tp_free(self);
-}
-
-static PyObject *obj_getitem(PyNtdbObject *self, PyObject *key)
-{
- NTDB_DATA tkey, val;
- enum NTDB_ERROR ret;
-
- PyNtdb_CHECK_CLOSED(self);
-
- if (!PyString_Check(key)) {
- PyErr_SetString(PyExc_TypeError, "Expected string as key");
- return NULL;
- }
-
- tkey.dptr = (unsigned char *)PyString_AsString(key);
- tkey.dsize = PyString_Size(key);
-
- ret = ntdb_fetch(self->ctx, tkey, &val);
- if (ret == NTDB_ERR_NOEXIST) {
- PyErr_SetString(PyExc_KeyError, "No such NTDB entry");
- return NULL;
- } else {
- PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
- return PyString_FromNtdb_Data(val);
- }
-}
-
-static int obj_setitem(PyNtdbObject *self, PyObject *key, PyObject *value)
-{
- NTDB_DATA tkey, tval;
- enum NTDB_ERROR ret;
- if (self->closed) {
- PyErr_SetObject(PyExc_RuntimeError,
- Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed"));
- return -1;
- }
-
- if (!PyString_Check(key)) {
- PyErr_SetString(PyExc_TypeError, "Expected string as key");
- return -1;
- }
-
- tkey = PyString_AsNtdb_Data(key);
-
- if (value == NULL) {
- ret = ntdb_delete(self->ctx, tkey);
- } else {
- if (!PyString_Check(value)) {
- PyErr_SetString(PyExc_TypeError, "Expected string as value");
- return -1;
- }
-
- tval = PyString_AsNtdb_Data(value);
-
- ret = ntdb_store(self->ctx, tkey, tval, NTDB_REPLACE);
- }
-
- if (ret != NTDB_SUCCESS) {
- PyErr_SetTDBError(ret);
- return -1;
- }
-
- return ret;
-}
-
-static PyMappingMethods ntdb_object_mapping = {
- .mp_subscript = (binaryfunc)obj_getitem,
- .mp_ass_subscript = (objobjargproc)obj_setitem,
-};
-
-static PyTypeObject PyNtdb = {
- .tp_name = "ntdb.Ntdb",
- .tp_basicsize = sizeof(PyNtdbObject),
- .tp_methods = ntdb_object_methods,
- .tp_getset = ntdb_object_getsetters,
- .tp_new = py_ntdb_open,
- .tp_doc = "A NTDB file",
- .tp_repr = (reprfunc)ntdb_object_repr,
- .tp_dealloc = (destructor)ntdb_object_dealloc,
- .tp_as_mapping = &ntdb_object_mapping,
- .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER,
- .tp_iter = (getiterfunc)ntdb_object_iter,
-};
-
-static PyMethodDef ntdb_methods[] = {
- { "open", (PyCFunction)py_ntdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, ntdb_flags=NTDB_DEFAULT, flags=O_RDWR, mode=0600)\n"
- "Open a NTDB file." },
- { NULL }
-};
-
-void initntdb(void);
-void initntdb(void)
-{
- PyObject *m;
-
- if (PyType_Ready(&PyNtdb) < 0)
- return;
-
- if (PyType_Ready(&PyNtdbIterator) < 0)
- return;
-
- m = Py_InitModule3("ntdb", ntdb_methods, "NTDB is a simple key-value database similar to GDBM that supports multiple writers.");
- if (m == NULL)
- return;
-
- PyModule_AddObject(m, "REPLACE", PyInt_FromLong(NTDB_REPLACE));
- PyModule_AddObject(m, "INSERT", PyInt_FromLong(NTDB_INSERT));
- PyModule_AddObject(m, "MODIFY", PyInt_FromLong(NTDB_MODIFY));
-
- PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(NTDB_DEFAULT));
- PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(NTDB_INTERNAL));
- PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(NTDB_NOLOCK));
- PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(NTDB_NOMMAP));
- PyModule_AddObject(m, "CONVERT", PyInt_FromLong(NTDB_CONVERT));
- PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(NTDB_NOSYNC));
- PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(NTDB_SEQNUM));
- PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(NTDB_ALLOW_NESTING));
-
- PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText"));
-
- PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION));
-
- Py_INCREF(&PyNtdb);
- PyModule_AddObject(m, "Ntdb", (PyObject *)&PyNtdb);
-
- Py_INCREF(&PyNtdbIterator);
-}
+++ /dev/null
- /*
- Trivial Database 2: human-readable summary code
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/tally/tally.h>
-
-#define SUMMARY_FORMAT \
- "Size of file/data: %zu/%zu\n" \
- "Number of records: %zu\n" \
- "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
- "Smallest/average/largest data: %zu/%zu/%zu\n%s" \
- "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
- "Number of free records: %zu\n" \
- "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
- "Number of uncoalesced records: %zu\n" \
- "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
- "Toplevel hash used: %u of %u\n" \
- "Number of hashes: %zu\n" \
- "Smallest/average/largest hash chains: %zu/%zu/%zu\n%s" \
- "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
-
-#define BUCKET_SUMMARY_FORMAT_A \
- "Free bucket %zu: total entries %zu.\n" \
- "Smallest/average/largest length: %zu/%zu/%zu\n%s"
-#define BUCKET_SUMMARY_FORMAT_B \
- "Free bucket %zu-%zu: total entries %zu.\n" \
- "Smallest/average/largest length: %zu/%zu/%zu\n%s"
-#define CAPABILITY_FORMAT \
- "Capability %llu%s\n"
-
-#define HISTO_WIDTH 70
-#define HISTO_HEIGHT 20
-
-static ntdb_off_t count_hash(struct ntdb_context *ntdb,
- ntdb_off_t hash_off,
- ntdb_off_t num)
-{
- const ntdb_off_t *h;
- ntdb_off_t i, count = 0;
-
- h = ntdb_access_read(ntdb, hash_off, sizeof(*h) * num, true);
- if (NTDB_PTR_IS_ERR(h)) {
- return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h));
- }
- for (i = 0; i < num; i++)
- count += (h[i] != 0);
-
- ntdb_access_release(ntdb, h);
- return count;
-}
-
-static enum NTDB_ERROR summarize(struct ntdb_context *ntdb,
- struct tally *ftables,
- struct tally *fr,
- struct tally *keys,
- struct tally *data,
- struct tally *extra,
- struct tally *uncoal,
- struct tally *hashes,
- size_t *num_caps)
-{
- ntdb_off_t off;
- ntdb_len_t len;
- ntdb_len_t unc = 0;
-
- for (off = sizeof(struct ntdb_header);
- off < ntdb->file->map_size;
- off += len) {
- const union {
- struct ntdb_used_record u;
- struct ntdb_free_record f;
- struct ntdb_recovery_record r;
- } *p;
- /* We might not be able to get the whole thing. */
- p = ntdb_access_read(ntdb, off, sizeof(p->f), true);
- if (NTDB_PTR_IS_ERR(p)) {
- return NTDB_PTR_ERR(p);
- }
- if (frec_magic(&p->f) != NTDB_FREE_MAGIC) {
- if (unc > 1) {
- tally_add(uncoal, unc);
- unc = 0;
- }
- }
-
- if (p->r.magic == NTDB_RECOVERY_INVALID_MAGIC
- || p->r.magic == NTDB_RECOVERY_MAGIC) {
- len = sizeof(p->r) + p->r.max_len;
- } else if (frec_magic(&p->f) == NTDB_FREE_MAGIC) {
- len = frec_len(&p->f);
- tally_add(fr, len);
- len += sizeof(p->u);
- unc++;
- } else if (rec_magic(&p->u) == NTDB_USED_MAGIC) {
- len = sizeof(p->u)
- + rec_key_length(&p->u)
- + rec_data_length(&p->u)
- + rec_extra_padding(&p->u);
-
- tally_add(keys, rec_key_length(&p->u));
- tally_add(data, rec_data_length(&p->u));
- tally_add(extra, rec_extra_padding(&p->u));
- } else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) {
- ntdb_off_t count = count_hash(ntdb,
- off + sizeof(p->u),
- 1 << ntdb->hash_bits);
- if (NTDB_OFF_IS_ERR(count)) {
- return NTDB_OFF_TO_ERR(count);
- }
- tally_add(hashes, count);
- tally_add(extra, rec_extra_padding(&p->u));
- len = sizeof(p->u)
- + rec_data_length(&p->u)
- + rec_extra_padding(&p->u);
- } else if (rec_magic(&p->u) == NTDB_FTABLE_MAGIC) {
- len = sizeof(p->u)
- + rec_data_length(&p->u)
- + rec_extra_padding(&p->u);
- tally_add(ftables, rec_data_length(&p->u));
- tally_add(extra, rec_extra_padding(&p->u));
- } else if (rec_magic(&p->u) == NTDB_CHAIN_MAGIC) {
- len = sizeof(p->u)
- + rec_data_length(&p->u)
- + rec_extra_padding(&p->u);
- tally_add(hashes,
- rec_data_length(&p->u)/sizeof(ntdb_off_t));
- tally_add(extra, rec_extra_padding(&p->u));
- } else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) {
- len = sizeof(p->u)
- + rec_data_length(&p->u)
- + rec_extra_padding(&p->u);
- (*num_caps)++;
- } else {
- len = dead_space(ntdb, off);
- if (NTDB_OFF_IS_ERR(len)) {
- return NTDB_OFF_TO_ERR(len);
- }
- }
- ntdb_access_release(ntdb, p);
- }
- if (unc)
- tally_add(uncoal, unc);
- return NTDB_SUCCESS;
-}
-
-static void add_capabilities(struct ntdb_context *ntdb, char *summary)
-{
- ntdb_off_t off, next;
- const struct ntdb_capability *cap;
- size_t count = 0;
-
- /* Append to summary. */
- summary += strlen(summary);
-
- off = ntdb_read_off(ntdb, offsetof(struct ntdb_header, capabilities));
- if (NTDB_OFF_IS_ERR(off))
- return;
-
- /* Walk capability list. */
- for (; off; off = next) {
- cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
- if (NTDB_PTR_IS_ERR(cap)) {
- break;
- }
- count++;
- sprintf(summary, CAPABILITY_FORMAT,
- cap->type & NTDB_CAP_TYPE_MASK,
- /* Noopen? How did we get here? */
- (cap->type & NTDB_CAP_NOOPEN) ? " (unopenable)"
- : ((cap->type & NTDB_CAP_NOWRITE)
- && (cap->type & NTDB_CAP_NOCHECK)) ? " (uncheckable,read-only)"
- : (cap->type & NTDB_CAP_NOWRITE) ? " (read-only)"
- : (cap->type & NTDB_CAP_NOCHECK) ? " (uncheckable)"
- : "");
- summary += strlen(summary);
- next = cap->next;
- ntdb_access_release(ntdb, cap);
- }
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
- enum ntdb_summary_flags flags,
- char **summary)
-{
- ntdb_len_t len;
- size_t num_caps = 0;
- struct tally *ftables, *freet, *keys, *data, *extra, *uncoal, *hashes;
- char *freeg, *keysg, *datag, *extrag, *uncoalg, *hashesg;
- enum NTDB_ERROR ecode;
-
- freeg = keysg = datag = extrag = uncoalg = hashesg = NULL;
-
- ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- ecode = ntdb_lock_expand(ntdb, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- ntdb_allrecord_unlock(ntdb, F_RDLCK);
- return ecode;
- }
-
- /* Start stats off empty. */
- ftables = tally_new(HISTO_HEIGHT);
- freet = tally_new(HISTO_HEIGHT);
- keys = tally_new(HISTO_HEIGHT);
- data = tally_new(HISTO_HEIGHT);
- extra = tally_new(HISTO_HEIGHT);
- uncoal = tally_new(HISTO_HEIGHT);
- hashes = tally_new(HISTO_HEIGHT);
- if (!ftables || !freet || !keys || !data || !extra
- || !uncoal || !hashes) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_summary: failed to allocate"
- " tally structures");
- goto unlock;
- }
-
- ecode = summarize(ntdb, ftables, freet, keys, data, extra,
- uncoal, hashes, &num_caps);
- if (ecode != NTDB_SUCCESS) {
- goto unlock;
- }
-
- if (flags & NTDB_SUMMARY_HISTOGRAMS) {
- freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
- keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
- datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
- extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
- uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
- hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
- }
-
- /* 20 is max length of a %llu. */
- len = strlen(SUMMARY_FORMAT) + 33*20 + 1
- + (freeg ? strlen(freeg) : 0)
- + (keysg ? strlen(keysg) : 0)
- + (datag ? strlen(datag) : 0)
- + (extrag ? strlen(extrag) : 0)
- + (uncoalg ? strlen(uncoalg) : 0)
- + (hashesg ? strlen(hashesg) : 0)
- + num_caps * (strlen(CAPABILITY_FORMAT) + 20
- + strlen(" (uncheckable,read-only)"));
-
- *summary = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data);
- if (!*summary) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_summary: failed to allocate string");
- goto unlock;
- }
-
- sprintf(*summary, SUMMARY_FORMAT,
- (size_t)ntdb->file->map_size,
- tally_total(keys, NULL) + tally_total(data, NULL),
- tally_num(keys),
- tally_min(keys), tally_mean(keys), tally_max(keys),
- keysg ? keysg : "",
- tally_min(data), tally_mean(data), tally_max(data),
- datag ? datag : "",
- tally_min(extra), tally_mean(extra), tally_max(extra),
- extrag ? extrag : "",
- tally_num(freet),
- tally_min(freet), tally_mean(freet), tally_max(freet),
- freeg ? freeg : "",
- tally_total(uncoal, NULL),
- tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
- uncoalg ? uncoalg : "",
- (unsigned)count_hash(ntdb, sizeof(struct ntdb_header),
- 1 << ntdb->hash_bits),
- 1 << ntdb->hash_bits,
- tally_num(hashes),
- tally_min(hashes), tally_mean(hashes), tally_max(hashes),
- hashesg ? hashesg : "",
- tally_total(keys, NULL) * 100.0 / ntdb->file->map_size,
- tally_total(data, NULL) * 100.0 / ntdb->file->map_size,
- tally_total(extra, NULL) * 100.0 / ntdb->file->map_size,
- tally_total(freet, NULL) * 100.0 / ntdb->file->map_size,
- (tally_num(keys) + tally_num(freet) + tally_num(hashes))
- * sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size,
- tally_num(ftables) * sizeof(struct ntdb_freetable)
- * 100.0 / ntdb->file->map_size,
- (tally_total(hashes, NULL) * sizeof(ntdb_off_t)
- + (sizeof(ntdb_off_t) << ntdb->hash_bits))
- * 100.0 / ntdb->file->map_size);
-
- add_capabilities(ntdb, *summary);
-
-unlock:
- ntdb->free_fn(freeg, ntdb->alloc_data);
- ntdb->free_fn(keysg, ntdb->alloc_data);
- ntdb->free_fn(datag, ntdb->alloc_data);
- ntdb->free_fn(extrag, ntdb->alloc_data);
- ntdb->free_fn(uncoalg, ntdb->alloc_data);
- ntdb->free_fn(hashesg, ntdb->alloc_data);
- ntdb->free_fn(freet, ntdb->alloc_data);
- ntdb->free_fn(keys, ntdb->alloc_data);
- ntdb->free_fn(data, ntdb->alloc_data);
- ntdb->free_fn(extra, ntdb->alloc_data);
- ntdb->free_fn(uncoal, ntdb->alloc_data);
- ntdb->free_fn(ftables, ntdb->alloc_data);
- ntdb->free_fn(hashes, ntdb->alloc_data);
-
- ntdb_allrecord_unlock(ntdb, F_RDLCK);
- ntdb_unlock_expand(ntdb, F_RDLCK);
- return ecode;
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include <ccan/hash/hash.h>
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-/* We use the same seed which we saw a failure on. */
-static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
-{
- return hash64_stable((const unsigned char *)key, len,
- *(uint64_t *)p);
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- uint64_t seed = 16014841315512641303ULL;
- union ntdb_attribute fixed_hattr
- = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = fixedhash,
- .data = &seed } };
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
- NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
-
- fixed_hattr.base.next = &tap_log_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-12-store.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* We seemed to lose some keys.
- * Insert and check they're in there! */
- for (j = 0; j < 500; j++) {
- NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(d, data));
- free(d.dptr);
- }
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h" // For NTDB_TOPLEVEL_HASH_BITS
-#include <ccan/hash/hash.h>
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-/* We rig the hash so adjacent-numbered records always clash. */
-static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv)
-{
- return *((const unsigned int *)key) / 2;
-}
-
-/* We use the same seed which we saw a failure on. */
-static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
-{
- return hash64_stable((const unsigned char *)key, len,
- *(uint64_t *)p);
-}
-
-static bool store_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA d, data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < 1000; i++) {
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- ntdb_fetch(ntdb, key, &d);
- if (!ntdb_deq(d, data))
- return false;
- free(d.dptr);
- }
- return true;
-}
-
-static void test_val(struct ntdb_context *ntdb, uint64_t val)
-{
- uint64_t v;
- NTDB_DATA key = { (unsigned char *)&v, sizeof(v) };
- NTDB_DATA d, data = { (unsigned char *)&v, sizeof(v) };
-
- /* Insert an entry, then delete it. */
- v = val;
- /* Delete should fail. */
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Insert should succeed. */
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Delete should succeed. */
- ok1(ntdb_delete(ntdb, key) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Re-add it, then add collision. */
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- v = val + 1;
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Can find both? */
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
- v = val;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
-
- /* Delete second one. */
- v = val + 1;
- ok1(ntdb_delete(ntdb, key) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Re-add */
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Now, try deleting first one. */
- v = val;
- ok1(ntdb_delete(ntdb, key) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Can still find second? */
- v = val + 1;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
-
- /* Now, this will be ideally placed. */
- v = val + 2;
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* This will collide with both. */
- v = val;
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
-
- /* We can still find them all, right? */
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
- v = val + 1;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
- v = val + 2;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
-
- /* And if we delete val + 1, that val + 2 should not move! */
- v = val + 1;
- ok1(ntdb_delete(ntdb, key) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- v = val;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
- v = val + 2;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == data.dsize);
- free(d.dptr);
-
- /* Delete those two, so we are empty. */
- ok1(ntdb_delete(ntdb, key) == 0);
- v = val;
- ok1(ntdb_delete(ntdb, key) == 0);
-
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- uint64_t seed = 16014841315512641303ULL;
- union ntdb_attribute clash_hattr
- = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = clash } };
- union ntdb_attribute fixed_hattr
- = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = fixedhash,
- .data = &seed } };
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- /* These two values gave trouble before. */
- int vals[] = { 755, 837 };
-
- clash_hattr.base.next = &tap_log_attr;
- fixed_hattr.base.next = &tap_log_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0])
- * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Check start of hash table. */
- test_val(ntdb, 0);
-
- /* Check end of hash table. */
- test_val(ntdb, -1ULL);
-
- /* Check mixed bitpattern. */
- test_val(ntdb, 0x123456789ABCDEF0ULL);
-
- ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0));
- ntdb_close(ntdb);
-
- /* Deleting these entries in the db gave problems. */
- ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(store_records(ntdb));
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
- NTDB_DATA key;
-
- key.dptr = (unsigned char *)&vals[j];
- key.dsize = sizeof(vals[j]);
- ok1(ntdb_delete(ntdb, key) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- }
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static bool test_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < 1000; i++) {
- if (ntdb_exists(ntdb, key))
- return false;
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- if (!ntdb_exists(ntdb, key))
- return false;
- }
-
- for (i = 0; i < 1000; i++) {
- if (!ntdb_exists(ntdb, key))
- return false;
- if (ntdb_delete(ntdb, key) != 0)
- return false;
- if (ntdb_exists(ntdb, key))
- return false;
- }
- return true;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-14-exists.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (ok1(ntdb))
- ok1(test_records(ntdb));
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static bool add_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < 1000; i++) {
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- }
- return true;
-}
-
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-16-wipe_all.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (ok1(ntdb)) {
- NTDB_DATA key;
- ok1(add_records(ntdb));
- ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
- ok1(ntdb_firstkey(ntdb, &key) == NTDB_ERR_NOEXIST);
- ntdb_close(ntdb);
- }
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include <ccan/hash/hash.h>
-#include <assert.h>
-
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static const struct ntdb_context *curr_ntdb;
-static const struct ntdb_file *curr_file;
-
-static int owner_null_count,
- owner_weird_count, alloc_count, free_count, expand_count;
-
-static void *test_alloc(const void *owner, size_t len, void *priv_data)
-{
- void *ret;
-
- if (!owner) {
- owner_null_count++;
- } else if (owner != curr_ntdb && owner != curr_file) {
- owner_weird_count++;
- }
-
- alloc_count++;
- ret = malloc(len);
-
- /* The first time, this is the current ntdb, next is
- * for the file struct. */
- if (!owner) {
- if (!curr_ntdb) {
- curr_ntdb = ret;
- } else if (!curr_file) {
- curr_file = ret;
- }
- }
- assert(priv_data == &owner_weird_count);
- return ret;
-}
-
-static void *test_expand(void *old, size_t newlen, void *priv_data)
-{
- expand_count++;
-
- assert(priv_data == &owner_weird_count);
- return realloc(old, newlen);
-}
-
-static void test_free(void *old, void *priv_data)
-{
- assert(priv_data == &owner_weird_count);
- if (old) {
- free_count++;
- }
- free(old);
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- union ntdb_attribute alloc_attr;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
- NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
-
- alloc_attr.base.next = &tap_log_attr;
- alloc_attr.base.attr = NTDB_ATTRIBUTE_ALLOCATOR;
-
- alloc_attr.alloc.alloc = test_alloc;
- alloc_attr.alloc.expand = test_expand;
- alloc_attr.alloc.free = test_free;
- alloc_attr.alloc.priv_data = &owner_weird_count;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 700 * 3 + 4) + 1);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- curr_ntdb = NULL;
- curr_file = NULL;
- ntdb = ntdb_open("run-20-alloc-attr.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &alloc_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- for (j = 0; j < 700; j++) {
- NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(d, data));
- test_free(d.dptr, &owner_weird_count);
- }
- ntdb_close(ntdb);
-
- ok1(owner_null_count == 2+i*2);
- ok1(owner_weird_count == 0);
- ok1(alloc_count == free_count);
- ok1(expand_count != 0);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, NTDB_DATA *expected)
-{
- if (!ntdb_deq(data, *expected))
- return NTDB_ERR_EINVAL;
- return NTDB_SUCCESS;
-}
-
-static enum NTDB_ERROR parse_err(NTDB_DATA key, NTDB_DATA data, void *unused)
-{
- return 100;
-}
-
-static bool test_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < 1000; i++) {
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- }
-
- for (i = 0; i < 1000; i++) {
- if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_SUCCESS)
- return false;
- }
-
- if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_ERR_NOEXIST)
- return false;
-
- /* Test error return from parse function. */
- i = 0;
- if (ntdb_parse_record(ntdb, key, parse_err, NULL) != 100)
- return false;
-
- return true;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-21-parse_record.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (ok1(ntdb))
- ok1(test_records(ntdb));
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h" // struct ntdb_context
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include <stdlib.h>
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- unsigned char *buffer;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data;
-
- buffer = malloc(1000);
- for (i = 0; i < 1000; i++)
- buffer[i] = i;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-55-transaction.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(ntdb_transaction_start(ntdb) == 0);
- data.dptr = buffer;
- data.dsize = 1000;
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
- ok1(data.dsize == 1000);
- ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
- free(data.dptr);
-
- /* Cancelling a transaction means no store */
- ntdb_transaction_cancel(ntdb);
- ok1(ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_ERR_NOEXIST);
-
- /* Commit the transaction. */
- ok1(ntdb_transaction_start(ntdb) == 0);
- data.dptr = buffer;
- data.dsize = 1000;
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
- ok1(data.dsize == 1000);
- ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
- free(data.dptr);
- ok1(ntdb_transaction_commit(ntdb) == 0);
- ok1(ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
- ok1(data.dsize == 1000);
- ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
- free(data.dptr);
-
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- free(buffer);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h" // struct ntdb_context
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include <stdlib.h>
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4), d;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 12 + 1);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-60-transaction.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
-
- ok1(ntdb_transaction_start(ntdb) == 0);
- /* Do an identical replace. */
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- ok1(ntdb_transaction_commit(ntdb) == 0);
-
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(data, d));
- free(d.dptr);
- ntdb_close(ntdb);
-
- /* Reopen, fetch. */
- ntdb = ntdb_open("api-60-transaction.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(data, d));
- free(d.dptr);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-80-ntdb_fd.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- continue;
-
- if (flags[i] & NTDB_INTERNAL)
- ok1(ntdb_fd(ntdb) == -1);
- else
- ok1(ntdb_fd(ntdb) > 2);
- ntdb_close(ntdb);
- ok1(tap_log_messages == 0);
- }
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include <stdlib.h>
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i, seq;
- struct ntdb_context *ntdb;
- NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-81-seqnum.ntdb",
- flags[i]|NTDB_SEQNUM|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- continue;
-
- seq = 0;
- ok1(ntdb_get_seqnum(ntdb) == seq);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
- /* Fetch doesn't change seqnum */
- if (ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS))
- free(d.dptr);
- ok1(ntdb_get_seqnum(ntdb) == seq);
- ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
-
- ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
- /* Empty append works */
- ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
-
- ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
-
- if (!(flags[i] & NTDB_INTERNAL)) {
- ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
- ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
- ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == ++seq);
- ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS);
- ok1(ntdb_get_seqnum(ntdb) == seq);
-
- ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_get_seqnum(ntdb) == seq + 1);
- ntdb_transaction_cancel(ntdb);
- ok1(ntdb_get_seqnum(ntdb) == seq);
- }
- ntdb_close(ntdb);
- ok1(tap_log_messages == 0);
- }
- return exit_status();
-}
+++ /dev/null
-#include "../private.h" // for ntdb_fcntl_unlock
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include <errno.h>
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
- void *_err)
-{
- int *lock_err = _err;
- struct flock fl;
- int ret;
-
- if (*lock_err) {
- errno = *lock_err;
- return -1;
- }
-
- do {
- fl.l_type = rw;
- fl.l_whence = SEEK_SET;
- fl.l_start = off;
- fl.l_len = len;
-
- if (waitflag)
- ret = fcntl(fd, F_SETLKW, &fl);
- else
- ret = fcntl(fd, F_SETLK, &fl);
- } while (ret != 0 && errno == EINTR);
-
- return ret;
-}
-
-static int trav_err;
-static int trav(struct ntdb_context *ntdb, NTDB_DATA k, NTDB_DATA d, int *terr)
-{
- *terr = trav_err;
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- union ntdb_attribute lock_attr;
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
- int lock_err;
-
- lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
- lock_attr.base.next = &tap_log_attr;
- lock_attr.flock.lock = mylock;
- lock_attr.flock.unlock = ntdb_fcntl_unlock;
- lock_attr.flock.data = &lock_err;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 81);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- NTDB_DATA d;
-
- /* Nonblocking open; expect no error message. */
- lock_err = EAGAIN;
- ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
- ok(errno == lock_err, "Errno is %u", errno);
- ok1(!ntdb);
- ok1(tap_log_messages == 0);
-
- lock_err = EINTR;
- ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
- ok(errno == lock_err, "Errno is %u", errno);
- ok1(!ntdb);
- ok1(tap_log_messages == 0);
-
- /* Forced fail open. */
- lock_err = ENOMEM;
- ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
- ok1(errno == lock_err);
- ok1(!ntdb);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- lock_err = 0;
- ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
- if (!ok1(ntdb))
- continue;
- ok1(tap_log_messages == 0);
-
- /* Nonblocking store. */
- lock_err = EAGAIN;
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- /* Nonblocking fetch. */
- lock_err = EAGAIN;
- ok1(!ntdb_exists(ntdb, key));
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(!ntdb_exists(ntdb, key));
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(!ntdb_exists(ntdb, key));
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- lock_err = EAGAIN;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- /* Nonblocking delete. */
- lock_err = EAGAIN;
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- /* Nonblocking locks. */
- lock_err = EAGAIN;
- ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- lock_err = EAGAIN;
- ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- lock_err = EAGAIN;
- ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
- /* This actually does divide and conquer. */
- ok1(tap_log_messages > 0);
- tap_log_messages = 0;
-
- lock_err = EAGAIN;
- ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages > 0);
- tap_log_messages = 0;
-
- /* Nonblocking traverse; go nonblock partway through. */
- lock_err = 0;
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- /* Need two entries to ensure two lock attempts! */
- ok1(ntdb_store(ntdb, ntdb_mkdata("key2", 4), data,
- NTDB_REPLACE) == 0);
- trav_err = EAGAIN;
- ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- trav_err = EINTR;
- lock_err = 0;
- ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- trav_err = ENOMEM;
- lock_err = 0;
- ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- /* Nonblocking transactions. */
- lock_err = EAGAIN;
- ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = EINTR;
- ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
- lock_err = ENOMEM;
- ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- tap_log_messages = 0;
-
- /* Nonblocking transaction prepare. */
- lock_err = 0;
- ok1(ntdb_transaction_start(ntdb) == 0);
- ok1(ntdb_delete(ntdb, key) == 0);
-
- lock_err = EAGAIN;
- ok1(ntdb_transaction_prepare_commit(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
-
- lock_err = 0;
- ok1(ntdb_transaction_prepare_commit(ntdb) == 0);
- ok1(ntdb_transaction_commit(ntdb) == 0);
-
- /* And the transaction was committed, right? */
- ok1(!ntdb_exists(ntdb, key));
- ntdb_close(ntdb);
- ok1(tap_log_messages == 0);
- }
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "external-agent.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-#define KEY_STR "key"
-
-static enum NTDB_ERROR clear_if_first(int fd, void *arg)
-{
-/* We hold a lock offset 4 always, so we can tell if anyone is holding it.
- * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */
- struct flock fl;
-
- if (arg != clear_if_first)
- return NTDB_ERR_CORRUPT;
-
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 4;
- fl.l_len = 1;
-
- if (fcntl(fd, F_SETLK, &fl) == 0) {
- /* We must be first ones to open it! */
- diag("truncating file!");
- if (ftruncate(fd, 0) != 0) {
- return NTDB_ERR_IO;
- }
- }
- fl.l_type = F_RDLCK;
- if (fcntl(fd, F_SETLKW, &fl) != 0) {
- return NTDB_ERR_IO;
- }
- return NTDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb, *ntdb2;
- struct agent *agent;
- union ntdb_attribute cif;
- NTDB_DATA key = ntdb_mkdata(KEY_STR, strlen(KEY_STR));
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
-
- cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
- cif.openhook.base.next = &tap_log_attr;
- cif.openhook.fn = clear_if_first;
- cif.openhook.data = clear_if_first;
-
- agent = prepare_external_agent();
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 16);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- /* Create it */
- ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
- ok1(ntdb);
- ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
- ntdb_close(ntdb);
-
- /* Now, open with CIF, should clear it. */
- ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR, 0, &cif);
- ok1(ntdb);
- ok1(!ntdb_exists(ntdb, key));
- ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
-
- /* Agent should not clear it, since it's still open. */
- ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
- "run-83-openhook.ntdb") == SUCCESS);
- ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
- == SUCCESS);
- ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
-
- /* Still exists for us too. */
- ok1(ntdb_exists(ntdb, key));
-
- /* Nested open should not erase db. */
- ntdb2 = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR, 0, &cif);
- ok1(ntdb_exists(ntdb2, key));
- ok1(ntdb_exists(ntdb, key));
- ntdb_close(ntdb2);
-
- ok1(ntdb_exists(ntdb, key));
-
- /* Close it, now agent should clear it. */
- ntdb_close(ntdb);
-
- ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
- "run-83-openhook.ntdb") == SUCCESS);
- ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
- == FAILED);
- ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
-
- ok1(tap_log_messages == 0);
- }
-
- free_external_agent(agent);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- union ntdb_attribute *attr;
- NTDB_DATA key = ntdb_mkdata("key", 3), data;
-
- ntdb = ntdb_open("run-91-get-stats.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- /* Force an expansion */
- data.dsize = 65536;
- data.dptr = calloc(data.dsize, 1);
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- free(data.dptr);
-
- /* Use malloc so valgrind will catch overruns. */
- attr = malloc(sizeof *attr);
- attr->stats.base.attr = NTDB_ATTRIBUTE_STATS;
- attr->stats.size = sizeof(*attr);
-
- ok1(ntdb_get_attribute(ntdb, attr) == 0);
- ok1(attr->stats.size == sizeof(*attr));
- ok1(attr->stats.allocs > 0);
- ok1(attr->stats.expands > 0);
- ok1(attr->stats.locks > 0);
- free(attr);
-
- /* Try short one. */
- attr = malloc(offsetof(struct ntdb_attribute_stats, allocs)
- + sizeof(attr->stats.allocs));
- attr->stats.base.attr = NTDB_ATTRIBUTE_STATS;
- attr->stats.size = offsetof(struct ntdb_attribute_stats, allocs)
- + sizeof(attr->stats.allocs);
- ok1(ntdb_get_attribute(ntdb, attr) == 0);
- ok1(attr->stats.size == sizeof(*attr));
- ok1(attr->stats.allocs > 0);
- free(attr);
- ok1(tap_log_messages == 0);
-
- ntdb_close(ntdb);
-
- }
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 48);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- /* RW -> R0 */
- ntdb = ntdb_open("run-92-get-set-readonly.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY));
-
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
-
- ntdb_add_flag(ntdb, NTDB_RDONLY);
- ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
-
- /* Can't store, append, delete. */
- ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 1);
- ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 2);
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 3);
-
- /* Can't start a transaction, or any write lock. */
- ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 4);
- ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 5);
- ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 6);
- ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 7);
-
- /* Back to RW. */
- ntdb_remove_flag(ntdb, NTDB_RDONLY);
- ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY));
-
- ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_SUCCESS);
- ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
- ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
-
- ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
- ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS);
-
- ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS);
- ntdb_chainunlock(ntdb, key);
- ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
- ntdb_unlockall(ntdb);
- ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
- ok1(tap_log_messages == 7);
-
- ntdb_close(ntdb);
-
- /* R0 -> RW */
- ntdb = ntdb_open("run-92-get-set-readonly.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDONLY, 0600, &tap_log_attr);
- ok1(ntdb);
- ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
-
- /* Can't store, append, delete. */
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 8);
- ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 9);
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 10);
-
- /* Can't start a transaction, or any write lock. */
- ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 11);
- ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 12);
- ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 13);
- ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY);
- ok1(tap_log_messages == 14);
-
- /* Can't remove NTDB_RDONLY since we opened with O_RDONLY */
- ntdb_remove_flag(ntdb, NTDB_RDONLY);
- ok1(tap_log_messages == 15);
- ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
- ntdb_close(ntdb);
-
- ok1(tap_log_messages == 15);
- tap_log_messages = 0;
- }
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-#define NUM_TESTS 1000
-
-static bool store_all(struct ntdb_context *ntdb)
-{
- unsigned int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA dbuf = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < NUM_TESTS; i++) {
- if (ntdb_store(ntdb, key, dbuf, NTDB_INSERT) != NTDB_SUCCESS)
- return false;
- }
- return true;
-}
-
-static int mark_entry(struct ntdb_context *ntdb,
- NTDB_DATA key, NTDB_DATA data, bool found[])
-{
- unsigned int num;
-
- if (key.dsize != sizeof(num))
- return -1;
- memcpy(&num, key.dptr, key.dsize);
- if (num >= NUM_TESTS)
- return -1;
- if (found[num])
- return -1;
- found[num] = true;
- return 0;
-}
-
-static bool is_all_set(bool found[], unsigned int num)
-{
- unsigned int i;
-
- for (i = 0; i < num; i++)
- if (!found[i])
- return false;
- return true;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- bool found[NUM_TESTS];
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT
- };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 6 + 1);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-93-repack.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- break;
-
- ok1(store_all(ntdb));
-
- ok1(ntdb_repack(ntdb) == NTDB_SUCCESS);
- memset(found, 0, sizeof(found));
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- ok1(ntdb_traverse(ntdb, mark_entry, found) == NUM_TESTS);
- ok1(is_all_set(found, NUM_TESTS));
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-/* We use direct access to hand to the parse function: what if db expands? */
-#include "config.h"
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "../private.h" /* To establish size, esp. for NTDB_INTERNAL dbs */
-#include "helpapi-external-agent.h"
-
-static struct ntdb_context *ntdb;
-
-static off_t ntdb_size(void)
-{
- return ntdb->file->map_size;
-}
-
-struct parse_info {
- unsigned int depth;
- NTDB_DATA expected;
-};
-
-static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data,
- struct parse_info *pinfo)
-{
- off_t flen;
- unsigned int i;
-
- if (!ntdb_deq(data, pinfo->expected))
- return NTDB_ERR_EINVAL;
-
- flen = ntdb_size();
-
- for (i = 0; ntdb_size() == flen; i++) {
- NTDB_DATA add = ntdb_mkdata(&i, sizeof(i));
-
- /* This is technically illegal parse(), which is why we
- * grabbed allrecord lock.*/
- ntdb_store(ntdb, add, add, NTDB_INSERT);
- }
-
- /* Access the record again. */
- if (!ntdb_deq(data, pinfo->expected))
- return NTDB_ERR_EINVAL;
-
- /* Recurse! Woot! */
- if (pinfo->depth != 0) {
- enum NTDB_ERROR ecode;
-
- pinfo->depth--;
- ecode = ntdb_parse_record(ntdb, key, parse, pinfo);
- if (ecode) {
- return ecode;
- }
- }
-
- /* Access the record one more time. */
- if (!ntdb_deq(data, pinfo->expected))
- return NTDB_ERR_EINVAL;
-
- return NTDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- struct parse_info pinfo;
- NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5);
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-94-expand-during-parse.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
- ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
- pinfo.expected = data;
- pinfo.depth = 3;
- ok1(ntdb_parse_record(ntdb, key, parse, &pinfo) == NTDB_SUCCESS);
- ntdb_unlockall(ntdb);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-/* Make sure write operations fail during ntdb_parse(). */
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static struct ntdb_context *ntdb;
-
-/* We could get either of these. */
-static bool xfail(enum NTDB_ERROR ecode)
-{
- return ecode == NTDB_ERR_RDONLY || ecode == NTDB_ERR_LOCK;
-}
-
-static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data,
- NTDB_DATA *expected)
-{
- NTDB_DATA add = ntdb_mkdata("another", strlen("another"));
-
- if (!ntdb_deq(data, *expected)) {
- return NTDB_ERR_EINVAL;
- }
-
- /* These should all fail.*/
- if (!xfail(ntdb_store(ntdb, add, add, NTDB_INSERT))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_append(ntdb, key, add))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_delete(ntdb, key))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_transaction_start(ntdb))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_chainlock(ntdb, key))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_lockall(ntdb))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_wipe_all(ntdb))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- if (!xfail(ntdb_repack(ntdb))) {
- return NTDB_ERR_EINVAL;
- }
- tap_log_messages--;
-
- /* Access the record one more time. */
- if (!ntdb_deq(data, *expected)) {
- return NTDB_ERR_EINVAL;
- }
-
- return NTDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5);
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-95-read-only-during-parse.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
- ok1(ntdb_parse_record(ntdb, key, parse, &data) == NTDB_SUCCESS);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h" // for ntdb_context
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(87);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-add-remove-flags.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(ntdb_get_flags(ntdb) == ntdb->flags);
- tap_log_messages = 0;
- ntdb_add_flag(ntdb, NTDB_NOLOCK);
- if (flags[i] & NTDB_INTERNAL)
- ok1(tap_log_messages == 1);
- else {
- ok1(tap_log_messages == 0);
- ok1(ntdb_get_flags(ntdb) & NTDB_NOLOCK);
- }
-
- tap_log_messages = 0;
- ntdb_add_flag(ntdb, NTDB_NOMMAP);
- if (flags[i] & NTDB_INTERNAL)
- ok1(tap_log_messages == 1);
- else {
- ok1(tap_log_messages == 0);
- ok1(ntdb_get_flags(ntdb) & NTDB_NOMMAP);
- ok1(ntdb->file->map_ptr == NULL);
- }
-
- tap_log_messages = 0;
- ntdb_add_flag(ntdb, NTDB_NOSYNC);
- if (flags[i] & NTDB_INTERNAL)
- ok1(tap_log_messages == 1);
- else {
- ok1(tap_log_messages == 0);
- ok1(ntdb_get_flags(ntdb) & NTDB_NOSYNC);
- }
-
- ok1(ntdb_get_flags(ntdb) == ntdb->flags);
-
- tap_log_messages = 0;
- ntdb_remove_flag(ntdb, NTDB_NOLOCK);
- if (flags[i] & NTDB_INTERNAL)
- ok1(tap_log_messages == 1);
- else {
- ok1(tap_log_messages == 0);
- ok1(!(ntdb_get_flags(ntdb) & NTDB_NOLOCK));
- }
-
- tap_log_messages = 0;
- ntdb_remove_flag(ntdb, NTDB_NOMMAP);
- if (flags[i] & NTDB_INTERNAL)
- ok1(tap_log_messages == 1);
- else {
- ok1(tap_log_messages == 0);
- ok1(!(ntdb_get_flags(ntdb) & NTDB_NOMMAP));
- ok1(ntdb->file->map_ptr != NULL);
- }
-
- tap_log_messages = 0;
- ntdb_remove_flag(ntdb, NTDB_NOSYNC);
- if (flags[i] & NTDB_INTERNAL)
- ok1(tap_log_messages == 1);
- else {
- ok1(tap_log_messages == 0);
- ok1(!(ntdb_get_flags(ntdb) & NTDB_NOSYNC));
- }
-
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-#define NUM_RECORDS 1000
-
-static bool store_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < NUM_RECORDS; i++)
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- return true;
-}
-
-static enum NTDB_ERROR check(NTDB_DATA key,
- NTDB_DATA data,
- bool *array)
-{
- int val;
-
- if (key.dsize != sizeof(val)) {
- diag("Wrong key size: %zu\n", key.dsize);
- return NTDB_ERR_CORRUPT;
- }
-
- if (key.dsize != data.dsize
- || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) {
- diag("Key and data differ\n");
- return NTDB_ERR_CORRUPT;
- }
-
- memcpy(&val, key.dptr, sizeof(val));
- if (val >= NUM_RECORDS || val < 0) {
- diag("check value %i\n", val);
- return NTDB_ERR_CORRUPT;
- }
-
- if (array[val]) {
- diag("Value %i already seen\n", val);
- return NTDB_ERR_CORRUPT;
- }
-
- array[val] = true;
- return NTDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- return 0;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- bool array[NUM_RECORDS];
-
- ntdb = ntdb_open("run-check-callback.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(store_records(ntdb));
- for (j = 0; j < NUM_RECORDS; j++)
- array[j] = false;
- ok1(ntdb_check(ntdb, check, array) == NTDB_SUCCESS);
- for (j = 0; j < NUM_RECORDS; j++)
- if (!array[j])
- break;
- ok1(j == NUM_RECORDS);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-#define NUM_RECORDS 1000
-
-static bool store_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < NUM_RECORDS; i++)
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- return true;
-}
-
-struct trav_data {
- unsigned int records[NUM_RECORDS];
- unsigned int calls;
-};
-
-static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p)
-{
- struct trav_data *td = p;
- int val;
-
- memcpy(&val, dbuf.dptr, dbuf.dsize);
- td->records[td->calls++] = val;
- return 0;
-}
-
-/* Since ntdb_nextkey frees dptr, we need to clone it. */
-static NTDB_DATA dup_key(NTDB_DATA key)
-{
- void *p = malloc(key.dsize);
- memcpy(p, key.dptr, key.dsize);
- key.dptr = p;
- return key;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- int num;
- struct trav_data td;
- NTDB_DATA k;
- struct ntdb_context *ntdb;
- union ntdb_attribute seed_attr;
- enum NTDB_ERROR ecode;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
- seed_attr.base.next = &tap_log_attr;
- seed_attr.seed.seed = 6334326220117065685ULL;
-
- plan_tests(sizeof(flags) / sizeof(flags[0])
- * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("api-firstkey-nextkey.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600,
- &seed_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(ntdb_firstkey(ntdb, &k) == NTDB_ERR_NOEXIST);
-
- /* One entry... */
- k.dptr = (unsigned char *)#
- k.dsize = sizeof(num);
- num = 0;
- ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0);
- ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS);
- ok1(k.dsize == sizeof(num));
- ok1(memcmp(k.dptr, &num, sizeof(num)) == 0);
- ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST);
-
- /* Two entries. */
- k.dptr = (unsigned char *)#
- k.dsize = sizeof(num);
- num = 1;
- ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0);
- ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS);
- ok1(k.dsize == sizeof(num));
- memcpy(&num, k.dptr, sizeof(num));
- ok1(num == 0 || num == 1);
- ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS);
- ok1(k.dsize == sizeof(j));
- memcpy(&j, k.dptr, sizeof(j));
- ok1(j == 0 || j == 1);
- ok1(j != num);
- ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST);
-
- /* Clean up. */
- k.dptr = (unsigned char *)#
- k.dsize = sizeof(num);
- num = 0;
- ok1(ntdb_delete(ntdb, k) == 0);
- num = 1;
- ok1(ntdb_delete(ntdb, k) == 0);
-
- /* Now lots of records. */
- ok1(store_records(ntdb));
- td.calls = 0;
-
- num = ntdb_traverse(ntdb, trav, &td);
- ok1(num == NUM_RECORDS);
- ok1(td.calls == NUM_RECORDS);
-
- /* Simple loop should match ntdb_traverse */
- for (j = 0, ecode = ntdb_firstkey(ntdb, &k); j < td.calls; j++) {
- int val;
-
- ok1(ecode == NTDB_SUCCESS);
- ok1(k.dsize == sizeof(val));
- memcpy(&val, k.dptr, k.dsize);
- ok1(td.records[j] == val);
- ecode = ntdb_nextkey(ntdb, &k);
- }
-
- /* But arbitrary orderings should work too. */
- for (j = td.calls-1; j > 0; j--) {
- k.dptr = (unsigned char *)&td.records[j-1];
- k.dsize = sizeof(td.records[j-1]);
- k = dup_key(k);
- ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS);
- ok1(k.dsize == sizeof(td.records[j]));
- ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0);
- free(k.dptr);
- }
-
- /* Even delete should work. */
- for (j = 0, ecode = ntdb_firstkey(ntdb, &k);
- ecode != NTDB_ERR_NOEXIST;
- j++) {
- ok1(ecode == NTDB_SUCCESS);
- ok1(k.dsize == 4);
- ok1(ntdb_delete(ntdb, k) == 0);
- ecode = ntdb_nextkey(ntdb, &k);
- }
-
- diag("delete using first/nextkey gave %u of %u records",
- j, NUM_RECORDS);
- ok1(j == NUM_RECORDS);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-/* Test forking while holding lock.
- *
- * There are only five ways to do this currently:
- * (1) grab a ntdb_chainlock, then fork.
- * (2) grab a ntdb_lockall, then fork.
- * (3) grab a ntdb_lockall_read, then fork.
- * (4) start a transaction, then fork.
- * (5) fork from inside a ntdb_parse() callback.
- *
- * Note that we don't hold a lock across ntdb_traverse callbacks, so
- * that doesn't matter.
- */
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-static bool am_child = false;
-
-static enum NTDB_ERROR fork_in_parse(NTDB_DATA key, NTDB_DATA data,
- struct ntdb_context *ntdb)
-{
- int status;
-
- if (fork() == 0) {
- am_child = true;
-
- /* We expect this to fail. */
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
- exit(1);
-
- if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
- exit(1);
-
- if (tap_log_messages != 2)
- exit(2);
-
- return NTDB_SUCCESS;
- }
- wait(&status);
- ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
- return NTDB_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- int status;
-
- tap_log_messages = 0;
-
- ntdb = ntdb_open("run-fork-test.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- continue;
-
- /* Put a record in here. */
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_SUCCESS);
-
- ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS);
- if (fork() == 0) {
- /* We expect this to fail. */
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
- return 1;
-
- if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
- return 1;
-
- if (tap_log_messages != 2)
- return 2;
-
- /* Child can do this without any complaints. */
- ntdb_chainunlock(ntdb, key);
- if (tap_log_messages != 2)
- return 3;
- ntdb_close(ntdb);
- if (tap_log_messages != 2)
- return 4;
- return 0;
- }
- wait(&status);
- ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
- ntdb_chainunlock(ntdb, key);
-
- ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
- if (fork() == 0) {
- /* We expect this to fail. */
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
- return 1;
-
- if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
- return 1;
-
- if (tap_log_messages != 2)
- return 2;
-
- /* Child can do this without any complaints. */
- ntdb_unlockall(ntdb);
- if (tap_log_messages != 2)
- return 3;
- ntdb_close(ntdb);
- if (tap_log_messages != 2)
- return 4;
- return 0;
- }
- wait(&status);
- ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
- ntdb_unlockall(ntdb);
-
- ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
- if (fork() == 0) {
- /* We expect this to fail. */
- /* This would always fail anyway... */
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
- return 1;
-
- if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
- return 1;
-
- if (tap_log_messages != 2)
- return 2;
-
- /* Child can do this without any complaints. */
- ntdb_unlockall_read(ntdb);
- if (tap_log_messages != 2)
- return 3;
- ntdb_close(ntdb);
- if (tap_log_messages != 2)
- return 4;
- return 0;
- }
- wait(&status);
- ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
- ntdb_unlockall_read(ntdb);
-
- ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
- /* If transactions is empty, noop "commit" succeeds. */
- ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
- if (fork() == 0) {
- int last_log_messages;
-
- /* We expect this to fail. */
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
- return 1;
-
- if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
- return 1;
-
- if (tap_log_messages != 2)
- return 2;
-
- if (ntdb_transaction_prepare_commit(ntdb)
- != NTDB_ERR_LOCK)
- return 3;
- if (tap_log_messages == 2)
- return 4;
-
- last_log_messages = tap_log_messages;
- /* Child can do this without any complaints. */
- ntdb_transaction_cancel(ntdb);
- if (tap_log_messages != last_log_messages)
- return 4;
- ntdb_close(ntdb);
- if (tap_log_messages != last_log_messages)
- return 4;
- return 0;
- }
- wait(&status);
- ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
- ntdb_transaction_cancel(ntdb);
-
- ok1(ntdb_parse_record(ntdb, key, fork_in_parse, ntdb)
- == NTDB_SUCCESS);
- ntdb_close(ntdb);
- if (am_child) {
- /* Child can return from parse without complaints. */
- if (tap_log_messages != 2)
- exit(3);
- exit(0);
- }
- ok1(tap_log_messages == 0);
- }
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include <limits.h>
-#include "logging.h"
-#include "external-agent.h"
-#include "helpapi-external-agent.h"
-
-#undef alarm
-#define alarm fast_alarm
-
-/* Speed things up by doing things in milliseconds. */
-static unsigned int fast_alarm(unsigned int milli_seconds)
-{
- struct itimerval it;
-
- it.it_interval.tv_sec = it.it_interval.tv_usec = 0;
- it.it_value.tv_sec = milli_seconds / 1000;
- it.it_value.tv_usec = milli_seconds * 1000;
- setitimer(ITIMER_REAL, &it, NULL);
- return 0;
-}
-
-#define CatchSignal(sig, handler) signal((sig), (handler))
-
-static void do_nothing(int signum)
-{
-}
-
-/* This example code is taken from SAMBA, so try not to change it. */
-static struct flock flock_struct;
-
-/* Return a value which is none of v1, v2 or v3. */
-static inline short int invalid_value(short int v1, short int v2, short int v3)
-{
- short int try = (v1+v2+v3)^((v1+v2+v3) << 16);
- while (try == v1 || try == v2 || try == v3)
- try++;
- return try;
-}
-
-/* We invalidate in as many ways as we can, so the OS rejects it */
-static void invalidate_flock_struct(int signum)
-{
- flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK);
- flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END);
- flock_struct.l_start = -1;
- /* A large negative. */
- flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1);
-}
-
-static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
- void *_timeout)
-{
- int ret, saved_errno = errno;
- unsigned int timeout = *(unsigned int *)_timeout;
-
- flock_struct.l_type = rw;
- flock_struct.l_whence = SEEK_SET;
- flock_struct.l_start = off;
- flock_struct.l_len = len;
-
- CatchSignal(SIGALRM, invalidate_flock_struct);
- alarm(timeout);
-
- for (;;) {
- if (waitflag)
- ret = fcntl(fd, F_SETLKW, &flock_struct);
- else
- ret = fcntl(fd, F_SETLK, &flock_struct);
-
- if (ret == 0)
- break;
-
- /* Not signalled? Something else went wrong. */
- if (flock_struct.l_len == len) {
- if (errno == EAGAIN || errno == EINTR)
- continue;
- saved_errno = errno;
- break;
- } else {
- saved_errno = EINTR;
- break;
- }
- }
-
- alarm(0);
- errno = saved_errno;
- return ret;
-}
-
-static int ntdb_chainlock_with_timeout_internal(struct ntdb_context *ntdb,
- NTDB_DATA key,
- unsigned int timeout,
- int rw_type)
-{
- union ntdb_attribute locking;
- enum NTDB_ERROR ecode;
-
- if (timeout) {
- locking.base.attr = NTDB_ATTRIBUTE_FLOCK;
- ecode = ntdb_get_attribute(ntdb, &locking);
- if (ecode != NTDB_SUCCESS)
- return ecode;
-
- /* Replace locking function with our own. */
- locking.flock.data = &timeout;
- locking.flock.lock = timeout_lock;
-
- ecode = ntdb_set_attribute(ntdb, &locking);
- if (ecode != NTDB_SUCCESS)
- return ecode;
- }
- if (rw_type == F_RDLCK)
- ecode = ntdb_chainlock_read(ntdb, key);
- else
- ecode = ntdb_chainlock(ntdb, key);
-
- if (timeout) {
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
- }
- return ecode;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- NTDB_DATA key = ntdb_mkdata("hello", 5);
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- struct agent *agent;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 15);
-
- agent = prepare_external_agent();
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- enum NTDB_ERROR ecode;
- ntdb = ntdb_open("run-locktimeout.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- break;
-
- /* Simple cases: should succeed. */
- ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
- F_RDLCK);
- ok1(ecode == NTDB_SUCCESS);
- ok1(tap_log_messages == 0);
-
- ntdb_chainunlock_read(ntdb, key);
- ok1(tap_log_messages == 0);
-
- ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
- F_WRLCK);
- ok1(ecode == NTDB_SUCCESS);
- ok1(tap_log_messages == 0);
-
- ntdb_chainunlock(ntdb, key);
- ok1(tap_log_messages == 0);
-
- /* OK, get agent to start transaction, then we should time out. */
- ok1(external_agent_operation(agent, OPEN, "run-locktimeout.ntdb")
- == SUCCESS);
- ok1(external_agent_operation(agent, TRANSACTION_START, "")
- == SUCCESS);
- ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
- F_WRLCK);
- ok1(ecode == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
-
- /* Even if we get a different signal, should be fine. */
- CatchSignal(SIGUSR1, do_nothing);
- external_agent_operation(agent, SEND_SIGNAL, "");
- ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
- F_WRLCK);
- ok1(ecode == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 0);
-
- ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "")
- == SUCCESS);
- ok1(external_agent_operation(agent, CLOSE, "")
- == SUCCESS);
- ntdb_close(ntdb);
- }
- free_external_agent(agent);
- return exit_status();
-}
+++ /dev/null
-/* Another test revealed that we lost an entry. This reproduces it. */
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include <ccan/hash/hash.h>
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-#define NUM_RECORDS 1189
-
-/* We use the same seed which we saw this failure on. */
-static uint32_t failhash(const void *key, size_t len, uint32_t seed, void *p)
-{
- return hash64_stable((const unsigned char *)key, len,
- 699537674708983027ULL);
-}
-
-int main(int argc, char *argv[])
-{
- int i;
- struct ntdb_context *ntdb;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
- union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = failhash } };
-
- hattr.base.next = &tap_log_attr;
- plan_tests(1 + NUM_RECORDS + 2);
-
- ntdb = ntdb_open("run-missing-entries.ntdb", NTDB_INTERNAL,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
- if (ok1(ntdb)) {
- for (i = 0; i < NUM_RECORDS; i++) {
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- }
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "tap-interface.h"
-#include <stdlib.h>
-#include "logging.h"
-#include "../private.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb, *ntdb2;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 30);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-open-multiple-times.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ntdb2 = ntdb_open("run-open-multiple-times.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT, 0600, &tap_log_attr);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_check(ntdb2, NULL, NULL) == 0);
- ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr);
-
- /* Store in one, fetch in the other. */
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(d, data));
- free(d.dptr);
-
- /* Vice versa, with delete. */
- ok1(ntdb_delete(ntdb2, key) == 0);
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST);
-
- /* OK, now close first one, check second still good. */
- ok1(ntdb_close(ntdb) == 0);
-
- ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr);
- ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == 0);
- ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(d, data));
- free(d.dptr);
-
- /* Reopen */
- ntdb = ntdb_open("run-open-multiple-times.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT, 0600, &tap_log_attr);
- ok1(ntdb);
-
- ok1(ntdb_transaction_start(ntdb2) == 0);
-
- /* Anything in the other one should fail. */
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 1);
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 2);
- ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 3);
- ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
- ok1(tap_log_messages == 4);
-
- /* Transaciton should work as normal. */
- ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == NTDB_SUCCESS);
-
- /* Now... try closing with locks held. */
- ok1(ntdb_close(ntdb2) == 0);
-
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(ntdb_deq(d, data));
- free(d.dptr);
- ok1(ntdb_close(ntdb) == 0);
- ok1(tap_log_messages == 4);
- tap_log_messages = 0;
- }
-
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-#define MAX_SIZE 10000
-#define SIZE_STEP 131
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data;
-
- data.dptr = malloc(MAX_SIZE);
- memset(data.dptr, 0x24, MAX_SIZE);
-
- plan_tests(sizeof(flags) / sizeof(flags[0])
- * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-record-expand.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- data.dsize = 0;
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- for (data.dsize = 0;
- data.dsize < MAX_SIZE;
- data.dsize += SIZE_STEP) {
- memset(data.dptr, data.dsize, data.dsize);
- ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- }
- ntdb_close(ntdb);
- }
- ok1(tap_log_messages == 0);
- free(data.dptr);
-
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-simple-delete.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (ntdb) {
- /* Delete should fail. */
- ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Insert should succeed. */
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Delete should now work. */
- ok1(ntdb_delete(ntdb, key) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- }
- }
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "config.h"
-#include "../ntdb.h"
-#include "../private.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helpapi-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
- NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
- char *summary;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-summary.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Put some stuff in there. */
- for (j = 0; j < 500; j++) {
- /* Make sure padding varies to we get some graphs! */
- data.dsize = j % (sizeof(j) + 1);
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- fail("Storing in ntdb");
- }
-
- for (j = 0;
- j <= NTDB_SUMMARY_HISTOGRAMS;
- j += NTDB_SUMMARY_HISTOGRAMS) {
- ok1(ntdb_summary(ntdb, j, &summary) == NTDB_SUCCESS);
- ok1(strstr(summary, "Number of records: 500\n"));
- ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
- ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
- if (j == NTDB_SUMMARY_HISTOGRAMS) {
- ok1(strstr(summary, "|")
- && strstr(summary, "*"));
- } else {
- ok1(!strstr(summary, "|")
- && !strstr(summary, "*"));
- }
- free(summary);
- }
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "external-agent.h"
-#include "logging.h"
-#include "lock-tracking.h"
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <ccan/err/err.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-#include <errno.h>
-#include "tap-interface.h"
-#include <stdio.h>
-#include <stdarg.h>
-
-static struct ntdb_context *ntdb;
-
-void (*external_agent_free)(void *) = free;
-
-static enum NTDB_ERROR clear_if_first(int fd, void *arg)
-{
-/* We hold a lock offset 4 always, so we can tell if anyone is holding it.
- * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */
- struct flock fl;
-
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 4;
- fl.l_len = 1;
-
- if (fcntl(fd, F_SETLK, &fl) == 0) {
- /* We must be first ones to open it! */
- diag("agent truncating file!");
- if (ftruncate(fd, 0) != 0) {
- return NTDB_ERR_IO;
- }
- }
- fl.l_type = F_RDLCK;
- if (fcntl(fd, F_SETLKW, &fl) != 0) {
- return NTDB_ERR_IO;
- }
- return NTDB_SUCCESS;
-}
-
-static enum agent_return do_operation(enum operation op, const char *name)
-{
- NTDB_DATA k, d;
- enum agent_return ret;
- NTDB_DATA data;
- enum NTDB_ERROR ecode;
- union ntdb_attribute cif;
- const char *eq;
-
- if (op != OPEN && op != OPEN_WITH_HOOK && !ntdb) {
- diag("external: No ntdb open!");
- return OTHER_FAILURE;
- }
-
- diag("external: %s", operation_name(op));
-
- eq = strchr(name, '=');
- if (eq) {
- k = ntdb_mkdata(name, eq - name);
- d = ntdb_mkdata(eq + 1, strlen(eq+1));
- } else {
- k = ntdb_mkdata(name, strlen(name));
- d.dsize = 0;
- d.dptr = NULL;
- }
-
- locking_would_block = 0;
- switch (op) {
- case OPEN:
- if (ntdb) {
- diag("Already have ntdb %s open", ntdb_name(ntdb));
- return OTHER_FAILURE;
- }
- ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &tap_log_attr);
- if (!ntdb) {
- if (!locking_would_block)
- diag("Opening ntdb gave %s", strerror(errno));
- forget_locking();
- ret = OTHER_FAILURE;
- } else
- ret = SUCCESS;
- break;
- case OPEN_WITH_HOOK:
- if (ntdb) {
- diag("Already have ntdb %s open", ntdb_name(ntdb));
- return OTHER_FAILURE;
- }
- cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
- cif.openhook.base.next = &tap_log_attr;
- cif.openhook.fn = clear_if_first;
- ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &cif);
- if (!ntdb) {
- if (!locking_would_block)
- diag("Opening ntdb gave %s", strerror(errno));
- forget_locking();
- ret = OTHER_FAILURE;
- } else
- ret = SUCCESS;
- break;
- case FETCH:
- ecode = ntdb_fetch(ntdb, k, &data);
- if (ecode == NTDB_ERR_NOEXIST) {
- ret = FAILED;
- } else if (ecode < 0) {
- ret = OTHER_FAILURE;
- } else if (!ntdb_deq(data, d)) {
- ret = OTHER_FAILURE;
- external_agent_free(data.dptr);
- } else {
- ret = SUCCESS;
- external_agent_free(data.dptr);
- }
- break;
- case STORE:
- ret = ntdb_store(ntdb, k, d, 0) == 0 ? SUCCESS : OTHER_FAILURE;
- break;
- case TRANSACTION_START:
- ret = ntdb_transaction_start(ntdb) == 0 ? SUCCESS : OTHER_FAILURE;
- break;
- case TRANSACTION_COMMIT:
- ret = ntdb_transaction_commit(ntdb)==0 ? SUCCESS : OTHER_FAILURE;
- break;
- case NEEDS_RECOVERY:
- ret = external_agent_needs_rec(ntdb);
- break;
- case CHECK:
- ret = ntdb_check(ntdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE;
- break;
- case CLOSE:
- ret = ntdb_close(ntdb) == 0 ? SUCCESS : OTHER_FAILURE;
- ntdb = NULL;
- break;
- case SEND_SIGNAL:
- /* We do this async */
- ret = SUCCESS;
- break;
- default:
- ret = OTHER_FAILURE;
- }
-
- if (locking_would_block)
- ret = WOULD_HAVE_BLOCKED;
-
- return ret;
-}
-
-struct agent {
- int cmdfd, responsefd;
-};
-
-/* Do this before doing any ntdb stuff. Return handle, or NULL. */
-struct agent *prepare_external_agent(void)
-{
- int pid, ret;
- int command[2], response[2];
- char name[1+PATH_MAX];
-
- if (pipe(command) != 0 || pipe(response) != 0)
- return NULL;
-
- pid = fork();
- if (pid < 0)
- return NULL;
-
- if (pid != 0) {
- struct agent *agent = malloc(sizeof(*agent));
-
- close(command[0]);
- close(response[1]);
- agent->cmdfd = command[1];
- agent->responsefd = response[0];
- return agent;
- }
-
- close(command[1]);
- close(response[0]);
-
- /* We want to fail, not block. */
- nonblocking_locks = true;
- log_prefix = "external: ";
- while ((ret = read(command[0], name, sizeof(name))) > 0) {
- enum agent_return result;
-
- result = do_operation(name[0], name+1);
- if (write(response[1], &result, sizeof(result))
- != sizeof(result))
- err(1, "Writing response");
- if (name[0] == SEND_SIGNAL) {
- struct timeval ten_ms;
- ten_ms.tv_sec = 0;
- ten_ms.tv_usec = 10000;
- select(0, NULL, NULL, NULL, &ten_ms);
- kill(getppid(), SIGUSR1);
- }
- }
- exit(0);
-}
-
-/* Ask the external agent to try to do an operation. */
-enum agent_return external_agent_operation(struct agent *agent,
- enum operation op,
- const char *name)
-{
- enum agent_return res;
- unsigned int len;
- char *string;
-
- if (!name)
- name = "";
- len = 1 + strlen(name) + 1;
- string = malloc(len);
-
- string[0] = op;
- strcpy(string+1, name);
-
- if (write(agent->cmdfd, string, len) != len
- || read(agent->responsefd, &res, sizeof(res)) != sizeof(res))
- res = AGENT_DIED;
-
- free(string);
- return res;
-}
-
-const char *agent_return_name(enum agent_return ret)
-{
- return ret == SUCCESS ? "SUCCESS"
- : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED"
- : ret == AGENT_DIED ? "AGENT_DIED"
- : ret == FAILED ? "FAILED"
- : ret == OTHER_FAILURE ? "OTHER_FAILURE"
- : "**INVALID**";
-}
-
-const char *operation_name(enum operation op)
-{
- switch (op) {
- case OPEN: return "OPEN";
- case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK";
- case FETCH: return "FETCH";
- case STORE: return "STORE";
- case CHECK: return "CHECK";
- case TRANSACTION_START: return "TRANSACTION_START";
- case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT";
- case NEEDS_RECOVERY: return "NEEDS_RECOVERY";
- case SEND_SIGNAL: return "SEND_SIGNAL";
- case CLOSE: return "CLOSE";
- }
- return "**INVALID**";
-}
-
-void free_external_agent(struct agent *agent)
-{
- close(agent->cmdfd);
- close(agent->responsefd);
- free(agent);
-}
+++ /dev/null
-#ifndef NTDB_TEST_EXTERNAL_AGENT_H
-#define NTDB_TEST_EXTERNAL_AGENT_H
-
-/* For locking tests, we need a different process to try things at
- * various times. */
-enum operation {
- OPEN,
- OPEN_WITH_HOOK,
- FETCH,
- STORE,
- TRANSACTION_START,
- TRANSACTION_COMMIT,
- NEEDS_RECOVERY,
- CHECK,
- SEND_SIGNAL,
- CLOSE,
-};
-
-/* Do this before doing any ntdb stuff. Return handle, or -1. */
-struct agent *prepare_external_agent(void);
-
-enum agent_return {
- SUCCESS,
- WOULD_HAVE_BLOCKED,
- AGENT_DIED,
- FAILED, /* For fetch, or NEEDS_RECOVERY */
- OTHER_FAILURE,
-};
-
-/* Ask the external agent to try to do an operation.
- * name == ntdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
- * <key>=<data> for FETCH/STORE.
- */
-enum agent_return external_agent_operation(struct agent *handle,
- enum operation op,
- const char *name);
-
-/* Hook into free() on ntdb_data in external agent. */
-extern void (*external_agent_free)(void *);
-
-/* Mapping enum -> string. */
-const char *agent_return_name(enum agent_return ret);
-const char *operation_name(enum operation op);
-
-void free_external_agent(struct agent *agent);
-
-/* Internal use: */
-struct ntdb_context;
-enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb);
-
-#endif /* NTDB_TEST_EXTERNAL_AGENT_H */
+++ /dev/null
-#include "failtest_helper.h"
-#include "logging.h"
-#include <string.h>
-#include "tap-interface.h"
-
-bool failtest_suppress = false;
-
-bool failmatch(const struct failtest_call *call,
- const char *file, int line, enum failtest_call_type type)
-{
- return call->type == type
- && call->line == line
- && ((strcmp(call->file, file) == 0)
- || (strends(call->file, file)
- && (call->file[strlen(call->file) - strlen(file) - 1]
- == '/')));
-}
-
-static bool is_nonblocking_lock(const struct failtest_call *call)
-{
- return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK;
-}
-
-static bool is_unlock(const struct failtest_call *call)
-{
- return call->type == FAILTEST_FCNTL
- && call->u.fcntl.arg.fl.l_type == F_UNLCK;
-}
-
-bool exit_check_log(struct tlist_calls *history)
-{
- const struct failtest_call *i;
- unsigned int malloc_count = 0;
-
- tlist_for_each(history, i, list) {
- if (!i->fail)
- continue;
- /* Failing the /dev/urandom open doesn't count: we fall back. */
- if (failmatch(i, URANDOM_OPEN))
- continue;
-
- /* Similarly with read fail. */
- if (failmatch(i, URANDOM_READ))
- continue;
-
- /* Initial allocation of ntdb doesn't log. */
- if (i->type == FAILTEST_MALLOC) {
- if (malloc_count++ == 0) {
- continue;
- }
- }
-
- /* We don't block "failures" on non-blocking locks. */
- if (is_nonblocking_lock(i))
- continue;
-
- if (!tap_log_messages)
- diag("We didn't log for %s:%u", i->file, i->line);
- return tap_log_messages != 0;
- }
- return true;
-}
-
-/* Some places we soldier on despite errors: only fail them once. */
-enum failtest_result
-block_repeat_failures(struct tlist_calls *history)
-{
- const struct failtest_call *last;
-
- last = tlist_tail(history, list);
-
- if (failtest_suppress)
- return FAIL_DONT_FAIL;
-
- if (failmatch(last, URANDOM_OPEN)
- || failmatch(last, URANDOM_READ)) {
- return FAIL_PROBE;
- }
-
- /* We handle mmap failing, by falling back to read/write, so
- * don't try all possible paths. */
- if (last->type == FAILTEST_MMAP)
- return FAIL_PROBE;
-
- /* Unlock or non-blocking lock is fail-once. */
- if (is_unlock(last) || is_nonblocking_lock(last))
- return FAIL_PROBE;
-
- return FAIL_OK;
-}
+++ /dev/null
-#ifndef NTDB_TEST_FAILTEST_HELPER_H
-#define NTDB_TEST_FAILTEST_HELPER_H
-#include <ccan/failtest/failtest.h>
-#include <stdbool.h>
-
-/* FIXME: Check these! */
-#define URANDOM_OPEN "open.c", 62, FAILTEST_OPEN
-#define URANDOM_READ "open.c", 42, FAILTEST_READ
-
-bool exit_check_log(struct tlist_calls *history);
-bool failmatch(const struct failtest_call *call,
- const char *file, int line, enum failtest_call_type type);
-enum failtest_result block_repeat_failures(struct tlist_calls *history);
-
-/* Set this to suppress failure. */
-extern bool failtest_suppress;
-
-#endif /* NTDB_TEST_LOGGING_H */
+++ /dev/null
-#include "external-agent.h"
-
-/* This isn't possible with via the ntdb API, but this makes it link. */
-enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb)
-{
- return FAILED;
-}
+++ /dev/null
-#include "external-agent.h"
-#include "../private.h"
-
-enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb)
-{
- return ntdb_needs_recovery(ntdb) ? SUCCESS : FAILED;
-}
+++ /dev/null
-/* NTDB tools to create various canned database layouts. */
-#include "layout.h"
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <ccan/err/err.h>
-#include "logging.h"
-
-struct ntdb_layout *new_ntdb_layout(void)
-{
- struct ntdb_layout *layout = malloc(sizeof(*layout));
- layout->num_elems = 0;
- layout->elem = NULL;
- return layout;
-}
-
-static void add(struct ntdb_layout *layout, union ntdb_layout_elem elem)
-{
- layout->elem = realloc(layout->elem,
- sizeof(layout->elem[0])
- * (layout->num_elems+1));
- layout->elem[layout->num_elems++] = elem;
-}
-
-void ntdb_layout_add_freetable(struct ntdb_layout *layout)
-{
- union ntdb_layout_elem elem;
- elem.base.type = FREETABLE;
- add(layout, elem);
-}
-
-void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len,
- unsigned ftable)
-{
- union ntdb_layout_elem elem;
- elem.base.type = FREE;
- elem.free.len = len;
- elem.free.ftable_num = ftable;
- add(layout, elem);
-}
-
-void ntdb_layout_add_capability(struct ntdb_layout *layout,
- uint64_t type,
- bool write_breaks,
- bool check_breaks,
- bool open_breaks,
- ntdb_len_t extra)
-{
- union ntdb_layout_elem elem;
- elem.base.type = CAPABILITY;
- elem.capability.type = type;
- if (write_breaks)
- elem.capability.type |= NTDB_CAP_NOWRITE;
- if (open_breaks)
- elem.capability.type |= NTDB_CAP_NOOPEN;
- if (check_breaks)
- elem.capability.type |= NTDB_CAP_NOCHECK;
- elem.capability.extra = extra;
- add(layout, elem);
-}
-
-static NTDB_DATA dup_key(NTDB_DATA key)
-{
- NTDB_DATA ret;
- ret.dsize = key.dsize;
- ret.dptr = malloc(ret.dsize);
- memcpy(ret.dptr, key.dptr, ret.dsize);
- return ret;
-}
-
-void ntdb_layout_add_used(struct ntdb_layout *layout,
- NTDB_DATA key, NTDB_DATA data,
- ntdb_len_t extra)
-{
- union ntdb_layout_elem elem;
- elem.base.type = DATA;
- elem.used.key = dup_key(key);
- elem.used.data = dup_key(data);
- elem.used.extra = extra;
- add(layout, elem);
-}
-
-static ntdb_len_t free_record_len(ntdb_len_t len)
-{
- return sizeof(struct ntdb_used_record) + len;
-}
-
-static ntdb_len_t data_record_len(struct tle_used *used)
-{
- ntdb_len_t len;
- len = sizeof(struct ntdb_used_record)
- + used->key.dsize + used->data.dsize + used->extra;
- assert(len >= sizeof(struct ntdb_free_record));
- return len;
-}
-
-static ntdb_len_t capability_len(struct tle_capability *cap)
-{
- return sizeof(struct ntdb_capability) + cap->extra;
-}
-
-static ntdb_len_t freetable_len(struct tle_freetable *ftable)
-{
- return sizeof(struct ntdb_freetable);
-}
-
-static void set_free_record(void *mem, ntdb_len_t len)
-{
- /* We do all the work in add_to_freetable */
-}
-
-static void add_zero_pad(struct ntdb_used_record *u, size_t len, size_t extra)
-{
- if (extra)
- ((char *)(u + 1))[len] = '\0';
-}
-
-static void set_data_record(void *mem, struct ntdb_context *ntdb,
- struct tle_used *used)
-{
- struct ntdb_used_record *u = mem;
-
- set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize,
- used->key.dsize + used->data.dsize + used->extra);
- memcpy(u + 1, used->key.dptr, used->key.dsize);
- memcpy((char *)(u + 1) + used->key.dsize,
- used->data.dptr, used->data.dsize);
- add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
-}
-
-static void set_capability(void *mem, struct ntdb_context *ntdb,
- struct tle_capability *cap, struct ntdb_header *hdr,
- ntdb_off_t last_cap)
-{
- struct ntdb_capability *c = mem;
- ntdb_len_t len = sizeof(*c) - sizeof(struct ntdb_used_record) + cap->extra;
-
- c->type = cap->type;
- c->next = 0;
- set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len);
-
- /* Append to capability list. */
- if (!last_cap) {
- hdr->capabilities = cap->base.off;
- } else {
- c = (struct ntdb_capability *)((char *)hdr + last_cap);
- c->next = cap->base.off;
- }
-}
-
-static void set_freetable(void *mem, struct ntdb_context *ntdb,
- struct tle_freetable *freetable, struct ntdb_header *hdr,
- ntdb_off_t last_ftable)
-{
- struct ntdb_freetable *ftable = mem;
- memset(ftable, 0, sizeof(*ftable));
- set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
- sizeof(*ftable) - sizeof(ftable->hdr),
- sizeof(*ftable) - sizeof(ftable->hdr));
-
- if (last_ftable) {
- ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable);
- ftable->next = freetable->base.off;
- } else {
- hdr->free_table = freetable->base.off;
- }
-}
-
-static void add_to_freetable(struct ntdb_context *ntdb,
- ntdb_off_t eoff,
- ntdb_off_t elen,
- unsigned ftable,
- struct tle_freetable *freetable)
-{
- ntdb->ftable_off = freetable->base.off;
- ntdb->ftable = ftable;
- add_free_record(ntdb, eoff, sizeof(struct ntdb_used_record) + elen,
- NTDB_LOCK_WAIT, false);
-}
-
-static ntdb_off_t hbucket_offset(ntdb_len_t idx)
-{
- return sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record)
- + idx * sizeof(ntdb_off_t);
-}
-
-/* FIXME: Our hash table handling here is primitive: we don't expand! */
-static void add_to_hashtable(struct ntdb_context *ntdb,
- ntdb_off_t eoff,
- NTDB_DATA key)
-{
- ntdb_off_t b_off;
- uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
-
- b_off = hbucket_offset(h & ((1 << ntdb->hash_bits)-1));
- if (ntdb_read_off(ntdb, b_off) != 0)
- abort();
-
- ntdb_write_off(ntdb, b_off, encode_offset(ntdb, eoff, h));
-}
-
-static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num)
-{
- unsigned i;
-
- for (i = 0; i < layout->num_elems; i++) {
- if (layout->elem[i].base.type != FREETABLE)
- continue;
- if (num == 0)
- return &layout->elem[i].ftable;
- num--;
- }
- abort();
-}
-
-/* FIXME: Support NTDB_CONVERT */
-struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
- void (*freefn)(void *),
- union ntdb_attribute *attr)
-{
- unsigned int i;
- ntdb_off_t off, hdrlen, len, last_ftable, last_cap;
- char *mem;
- struct ntdb_context *ntdb;
-
- /* Now populate our header, cribbing from a real NTDB header. */
- ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr);
-
- off = sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record)
- + (sizeof(ntdb_off_t) << ntdb->hash_bits);
- hdrlen = off;
-
- /* First pass of layout: calc lengths */
- for (i = 0; i < layout->num_elems; i++) {
- union ntdb_layout_elem *e = &layout->elem[i];
- e->base.off = off;
- switch (e->base.type) {
- case FREETABLE:
- len = freetable_len(&e->ftable);
- break;
- case FREE:
- len = free_record_len(e->free.len);
- break;
- case DATA:
- len = data_record_len(&e->used);
- break;
- case CAPABILITY:
- len = capability_len(&e->capability);
- break;
- default:
- abort();
- }
- off += len;
- }
-
- mem = malloc(off);
- /* Fill with some weird pattern. */
- memset(mem, 0x99, off);
- memcpy(mem, ntdb->file->map_ptr, hdrlen);
-
- /* Mug the ntdb we have to make it use this. */
- freefn(ntdb->file->map_ptr);
- ntdb->file->map_ptr = mem;
- ntdb->file->map_size = off;
-
- last_ftable = 0;
- last_cap = 0;
- for (i = 0; i < layout->num_elems; i++) {
- union ntdb_layout_elem *e = &layout->elem[i];
- switch (e->base.type) {
- case FREETABLE:
- set_freetable(mem + e->base.off, ntdb, &e->ftable,
- (struct ntdb_header *)mem, last_ftable);
- last_ftable = e->base.off;
- break;
- case FREE:
- set_free_record(mem + e->base.off, e->free.len);
- break;
- case DATA:
- set_data_record(mem + e->base.off, ntdb, &e->used);
- break;
- case CAPABILITY:
- set_capability(mem + e->base.off, ntdb, &e->capability,
- (struct ntdb_header *)mem, last_cap);
- last_cap = e->base.off;
- break;
- }
- }
- /* Must have a free table! */
- assert(last_ftable);
-
- /* Now fill the free and hash tables. */
- for (i = 0; i < layout->num_elems; i++) {
- union ntdb_layout_elem *e = &layout->elem[i];
- switch (e->base.type) {
- case FREE:
- add_to_freetable(ntdb, e->base.off, e->free.len,
- e->free.ftable_num,
- find_ftable(layout, e->free.ftable_num));
- break;
- case DATA:
- add_to_hashtable(ntdb, e->base.off, e->used.key);
- break;
- default:
- break;
- }
- }
-
- ntdb->ftable_off = find_ftable(layout, 0)->base.off;
- return ntdb;
-}
-
-void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
- union ntdb_attribute *attr, const char *filename)
-{
- struct ntdb_context *ntdb = ntdb_layout_get(layout, freefn, attr);
- int fd;
-
- fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0600);
- if (fd < 0)
- err(1, "opening %s for writing", filename);
- if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size)
- != ntdb->file->map_size)
- err(1, "writing %s", filename);
- close(fd);
- ntdb_close(ntdb);
-}
-
-void ntdb_layout_free(struct ntdb_layout *layout)
-{
- unsigned int i;
-
- for (i = 0; i < layout->num_elems; i++) {
- if (layout->elem[i].base.type == DATA) {
- free(layout->elem[i].used.key.dptr);
- free(layout->elem[i].used.data.dptr);
- }
- }
- free(layout->elem);
- free(layout);
-}
+++ /dev/null
-#ifndef NTDB_TEST_LAYOUT_H
-#define NTDB_TEST_LAYOUT_H
-#include "../private.h"
-
-struct ntdb_layout *new_ntdb_layout(void);
-void ntdb_layout_add_freetable(struct ntdb_layout *layout);
-void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len,
- unsigned ftable);
-void ntdb_layout_add_used(struct ntdb_layout *layout,
- NTDB_DATA key, NTDB_DATA data,
- ntdb_len_t extra);
-void ntdb_layout_add_capability(struct ntdb_layout *layout,
- uint64_t type,
- bool write_breaks,
- bool check_breaks,
- bool open_breaks,
- ntdb_len_t extra);
-
-#if 0 /* FIXME: Allow allocation of subtables */
-void ntdb_layout_add_hashtable(struct ntdb_layout *layout,
- int htable_parent, /* -1 == toplevel */
- unsigned int bucket,
- ntdb_len_t extra);
-#endif
-/* freefn is needed if we're using failtest_free. */
-struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
- void (*freefn)(void *),
- union ntdb_attribute *attr);
-void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
- union ntdb_attribute *attr, const char *filename);
-
-void ntdb_layout_free(struct ntdb_layout *layout);
-
-enum layout_type {
- FREETABLE, FREE, DATA, CAPABILITY
-};
-
-/* Shared by all union members. */
-struct tle_base {
- enum layout_type type;
- ntdb_off_t off;
-};
-
-struct tle_freetable {
- struct tle_base base;
-};
-
-struct tle_free {
- struct tle_base base;
- ntdb_len_t len;
- unsigned ftable_num;
-};
-
-struct tle_used {
- struct tle_base base;
- NTDB_DATA key;
- NTDB_DATA data;
- ntdb_len_t extra;
-};
-
-struct tle_capability {
- struct tle_base base;
- uint64_t type;
- ntdb_len_t extra;
-};
-
-union ntdb_layout_elem {
- struct tle_base base;
- struct tle_freetable ftable;
- struct tle_free free;
- struct tle_used used;
- struct tle_capability capability;
-};
-
-struct ntdb_layout {
- unsigned int num_elems;
- union ntdb_layout_elem *elem;
-};
-
-#include "helprun-layout.h"
-#endif /* NTDB_TEST_LAYOUT_H */
+++ /dev/null
-/* We save the locks so we can reaquire them. */
-#include "../private.h" /* For NTDB_HASH_LOCK_START, etc. */
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include "tap-interface.h"
-#include "lock-tracking.h"
-
-struct lock {
- struct lock *next;
- unsigned int off;
- unsigned int len;
- int type;
-};
-static struct lock *locks;
-int locking_errors = 0;
-bool suppress_lockcheck = false;
-bool nonblocking_locks;
-int locking_would_block = 0;
-void (*unlock_callback)(int fd);
-
-int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
-{
- va_list ap;
- int ret, arg3;
- struct flock *fl;
- bool may_block = false;
-
- if (cmd != F_SETLK && cmd != F_SETLKW) {
- /* This may be totally bogus, but we don't know in general. */
- va_start(ap, cmd);
- arg3 = va_arg(ap, int);
- va_end(ap);
-
- return fcntl(fd, cmd, arg3);
- }
-
- va_start(ap, cmd);
- fl = va_arg(ap, struct flock *);
- va_end(ap);
-
- if (cmd == F_SETLKW && nonblocking_locks) {
- cmd = F_SETLK;
- may_block = true;
- }
- ret = fcntl(fd, cmd, fl);
-
- /* Detect when we failed, but might have been OK if we waited. */
- if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) {
- locking_would_block++;
- }
-
- if (fl->l_type == F_UNLCK) {
- struct lock **l;
- struct lock *old = NULL;
-
- for (l = &locks; *l; l = &(*l)->next) {
- if ((*l)->off == fl->l_start
- && (*l)->len == fl->l_len) {
- if (ret == 0) {
- old = *l;
- *l = (*l)->next;
- free(old);
- }
- break;
- }
- }
- if (!old && !suppress_lockcheck) {
- diag("Unknown unlock %u@%u - %i",
- (int)fl->l_len, (int)fl->l_start, ret);
- locking_errors++;
- }
- } else {
- struct lock *new, *i;
- unsigned int fl_end = fl->l_start + fl->l_len;
- if (fl->l_len == 0)
- fl_end = (unsigned int)-1;
-
- /* Check for overlaps: we shouldn't do this. */
- for (i = locks; i; i = i->next) {
- unsigned int i_end = i->off + i->len;
- if (i->len == 0)
- i_end = (unsigned int)-1;
-
- if (fl->l_start >= i->off && fl->l_start < i_end)
- break;
- if (fl_end > i->off && fl_end < i_end)
- break;
-
- /* ntdb_allrecord_lock does this, handle adjacent: */
- if (fl->l_start > NTDB_HASH_LOCK_START
- && fl->l_start == i_end && fl->l_type == i->type) {
- if (ret == 0) {
- i->len = fl->l_len
- ? i->len + fl->l_len
- : 0;
- }
- goto done;
- }
- }
- if (i) {
- /* Special case: upgrade of allrecord lock. */
- if (i->type == F_RDLCK && fl->l_type == F_WRLCK
- && i->off == NTDB_HASH_LOCK_START
- && fl->l_start == NTDB_HASH_LOCK_START
- && i->len == 0
- && fl->l_len == 0) {
- if (ret == 0)
- i->type = F_WRLCK;
- goto done;
- }
- if (!suppress_lockcheck) {
- diag("%s lock %u@%u overlaps %u@%u",
- fl->l_type == F_WRLCK ? "write" : "read",
- (int)fl->l_len, (int)fl->l_start,
- i->len, (int)i->off);
- locking_errors++;
- }
- }
-
- if (ret == 0) {
- new = malloc(sizeof *new);
- new->off = fl->l_start;
- new->len = fl->l_len;
- new->type = fl->l_type;
- new->next = locks;
- locks = new;
- }
- }
-done:
- if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback)
- unlock_callback(fd);
- return ret;
-}
-
-unsigned int forget_locking(void)
-{
- unsigned int num = 0;
- while (locks) {
- struct lock *next = locks->next;
- free(locks);
- locks = next;
- num++;
- }
- return num;
-}
+++ /dev/null
-#ifndef LOCK_TRACKING_H
-#define LOCK_TRACKING_H
-#include <stdbool.h>
-
-/* Set this if you want a callback after fnctl unlock. */
-extern void (*unlock_callback)(int fd);
-
-/* Replacement fcntl. */
-int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ );
-
-/* Discard locking info: returns number of locks outstanding. */
-unsigned int forget_locking(void);
-
-/* Number of errors in locking. */
-extern int locking_errors;
-
-/* Suppress lock checking. */
-extern bool suppress_lockcheck;
-
-/* Make all locks non-blocking. */
-extern bool nonblocking_locks;
-
-/* Number of times we failed a lock because we made it non-blocking. */
-extern int locking_would_block;
-#endif /* LOCK_TRACKING_H */
+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include "tap-interface.h"
-#include "logging.h"
-
-unsigned tap_log_messages;
-const char *log_prefix = "";
-char *log_last = NULL;
-bool suppress_logging;
-
-union ntdb_attribute tap_log_attr = {
- .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG },
- .fn = tap_log_fn }
-};
-
-void tap_log_fn(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message, void *priv)
-{
- if (suppress_logging)
- return;
-
- diag("ntdb log level %u: %s: %s%s",
- level, ntdb_errorstr(ecode), log_prefix, message);
- if (log_last)
- free(log_last);
- log_last = strdup(message);
- tap_log_messages++;
-}
+++ /dev/null
-#ifndef NTDB_TEST_LOGGING_H
-#define NTDB_TEST_LOGGING_H
-#include "../ntdb.h"
-#include <stdbool.h>
-#include <string.h>
-
-extern bool suppress_logging;
-extern const char *log_prefix;
-extern unsigned tap_log_messages;
-extern union ntdb_attribute tap_log_attr;
-extern char *log_last;
-
-void tap_log_fn(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message, void *priv);
-#endif /* NTDB_TEST_LOGGING_H */
+++ /dev/null
-#ifndef NTDB_NO_FSYNC_H
-#define NTDB_NO_FSYNC_H
-/* Obey $TDB_NO_FSYNC, a bit like tdb does (only note our NTDB_NOSYNC
- * does less) */
-#define MAYBE_NOSYNC (getenv("TDB_NO_FSYNC") ? NTDB_NOSYNC : 0)
-#endif
+++ /dev/null
-#include "config.h"
-#include "../check.c"
-#include "../free.c"
-#include "../hash.c"
-#include "../io.c"
-#include "../lock.c"
-#include "../open.c"
-#include "../summary.c"
-#include "../ntdb.c"
-#include "../transaction.c"
-#include "../traverse.c"
+++ /dev/null
-#!/usr/bin/env python
-# Some simple tests for the Python bindings for TDB
-# Note that this tests the interface of the Python bindings
-# It does not test tdb itself.
-#
-# Copyright (C) 2007-2013 Jelmer Vernooij <jelmer@samba.org>
-# Published under the GNU LGPLv3 or later
-
-import ntdb
-from unittest import TestCase
-import os, tempfile
-
-
-class OpenTdbTests(TestCase):
-
- def test_nonexistent_read(self):
- self.assertRaises(IOError, ntdb.Ntdb, "/some/nonexistent/file", 0,
- ntdb.DEFAULT, os.O_RDWR)
-
-class CloseTdbTests(TestCase):
-
- def test_double_close(self):
- self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT,
- os.O_CREAT|os.O_RDWR)
- self.assertNotEqual(None, self.ntdb)
-
- # ensure that double close does not crash python
- self.ntdb.close()
- self.ntdb.close()
-
- # Check that further operations do not crash python
- self.assertRaises(RuntimeError, lambda: self.ntdb.transaction_start())
-
- self.assertRaises(RuntimeError, lambda: self.ntdb["bar"])
-
-
-class InternalTdbTests(TestCase):
-
- def test_repr(self):
- self.ntdb = ntdb.Ntdb()
-
- # repr used to crash on internal db
- self.assertEquals(repr(self.ntdb), "Ntdb(<internal>)")
-
-
-class SimpleTdbTests(TestCase):
-
- def setUp(self):
- super(SimpleTdbTests, self).setUp()
- self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT,
- os.O_CREAT|os.O_RDWR)
- self.assertNotEqual(None, self.ntdb)
-
- def tearDown(self):
- del self.ntdb
-
- def test_repr(self):
- self.assertTrue(repr(self.ntdb).startswith("Ntdb('"))
-
- def test_lockall(self):
- self.ntdb.lock_all()
-
- def test_unlockall(self):
- self.ntdb.lock_all()
- self.ntdb.unlock_all()
-
- def test_lockall_read(self):
- self.ntdb.read_lock_all()
- self.ntdb.read_unlock_all()
-
- def test_store(self):
- self.ntdb.store("bar", "bla")
- self.assertEquals("bla", self.ntdb.get("bar"))
-
- def test_getitem(self):
- self.ntdb["bar"] = "foo"
- self.assertEquals("foo", self.ntdb["bar"])
-
- def test_delete(self):
- self.ntdb["bar"] = "foo"
- del self.ntdb["bar"]
- self.assertRaises(KeyError, lambda: self.ntdb["bar"])
-
- def test_contains(self):
- self.ntdb["bla"] = "bloe"
- self.assertTrue("bla" in self.ntdb)
-
- def test_keyerror(self):
- self.assertRaises(KeyError, lambda: self.ntdb["bla"])
-
- def test_name(self):
- self.ntdb.filename
-
- def test_iterator(self):
- self.ntdb["bla"] = "1"
- self.ntdb["brainslug"] = "2"
- l = list(self.ntdb)
- l.sort()
- self.assertEquals(["bla", "brainslug"], l)
-
- def test_transaction_cancel(self):
- self.ntdb["bloe"] = "2"
- self.ntdb.transaction_start()
- self.ntdb["bloe"] = "1"
- self.ntdb.transaction_cancel()
- self.assertEquals("2", self.ntdb["bloe"])
-
- def test_transaction_commit(self):
- self.ntdb["bloe"] = "2"
- self.ntdb.transaction_start()
- self.ntdb["bloe"] = "1"
- self.ntdb.transaction_commit()
- self.assertEquals("1", self.ntdb["bloe"])
-
- def test_transaction_prepare_commit(self):
- self.ntdb["bloe"] = "2"
- self.ntdb.transaction_start()
- self.ntdb["bloe"] = "1"
- self.ntdb.transaction_prepare_commit()
- self.ntdb.transaction_commit()
- self.assertEquals("1", self.ntdb["bloe"])
-
- def test_iterkeys(self):
- self.ntdb["bloe"] = "2"
- self.ntdb["bla"] = "25"
- i = self.ntdb.iterkeys()
- self.assertEquals(set(["bloe", "bla"]), set([i.next(), i.next()]))
-
- def test_clear(self):
- self.ntdb["bloe"] = "2"
- self.ntdb["bla"] = "25"
- self.assertEquals(2, len(list(self.ntdb)))
- self.ntdb.clear()
- self.assertEquals(0, len(list(self.ntdb)))
-
- def test_len(self):
- self.assertEquals(0, len(list(self.ntdb)))
- self.ntdb["entry"] = "value"
- self.assertEquals(1, len(list(self.ntdb)))
-
- def test_add_flags(self):
- self.ntdb.add_flag(ntdb.NOMMAP)
- self.ntdb.remove_flag(ntdb.NOMMAP)
-
-
-class VersionTests(TestCase):
-
- def test_present(self):
- self.assertTrue(isinstance(ntdb.__version__, str))
-
-
-if __name__ == '__main__':
- import unittest
- unittest.TestProgram()
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_used_record rec;
- struct ntdb_context ntdb = { .log_fn = tap_log_fn };
-
- plan_tests(64 + 32 + 48*5 + 1);
-
- /* We should be able to encode any data value. */
- for (i = 0; i < 64; i++)
- ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i,
- 1ULL << i) == 0);
-
- /* And any key and data with < 64 bits between them. */
- for (i = 0; i < 32; i++) {
- ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
- ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
- klen + dlen) == 0);
- }
-
- /* We should neatly encode all values. */
- for (i = 0; i < 48; i++) {
- uint64_t klen = 1ULL << (i < 16 ? i : 15);
- uint64_t dlen = 1ULL << i;
- uint64_t xlen = 1ULL << (i < 32 ? i : 31);
- ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
- klen+dlen+xlen) == 0);
- ok1(rec_key_length(&rec) == klen);
- ok1(rec_data_length(&rec) == dlen);
- ok1(rec_extra_padding(&rec) == xlen);
- ok1(rec_magic(&rec) == NTDB_USED_MAGIC);
- }
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "helprun-external-agent.h"
-
-static unsigned int dumb_fls(uint64_t num)
-{
- int i;
-
- for (i = 63; i >= 0; i--) {
- if (num & (1ULL << i))
- break;
- }
- return i + 1;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
-
- plan_tests(64 * 64 + 2);
-
- ok1(fls64(0) == 0);
- ok1(dumb_fls(0) == 0);
-
- for (i = 0; i < 64; i++) {
- for (j = 0; j < 64; j++) {
- uint64_t val = (1ULL << i) | (1ULL << j);
- ok(fls64(val) == dumb_fls(val),
- "%llu -> %u should be %u", (long long)val,
- fls64(val), dumb_fls(val));
- }
- }
- return exit_status();
-}
+++ /dev/null
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-new_database.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- failtest_exit(exit_status());
-
- failtest_suppress = true;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- failtest_suppress = false;
- ntdb_close(ntdb);
- if (!ok1(tap_log_messages == 0))
- break;
- }
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- uint64_t val;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- failtest_suppress = true;
- ntdb = ntdb_open("run-expand.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- break;
-
- val = ntdb->file->map_size;
- /* Need some hash lock for expand. */
- ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0);
- failtest_suppress = false;
- if (!ok1(ntdb_expand(ntdb, 1) == 0)) {
- failtest_suppress = true;
- ntdb_close(ntdb);
- break;
- }
- failtest_suppress = true;
-
- ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR);
- ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- val = ntdb->file->map_size;
- ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0);
- failtest_suppress = false;
- if (!ok1(ntdb_expand(ntdb, 1024) == 0)) {
- failtest_suppress = true;
- ntdb_close(ntdb);
- break;
- }
- failtest_suppress = true;
- ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0);
- ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "layout.h"
-#include "helprun-external-agent.h"
-
-static ntdb_len_t free_record_length(struct ntdb_context *ntdb, ntdb_off_t off)
-{
- struct ntdb_free_record f;
- enum NTDB_ERROR ecode;
-
- ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f));
- if (ecode != NTDB_SUCCESS)
- return ecode;
- if (frec_magic(&f) != NTDB_FREE_MAGIC)
- return NTDB_ERR_CORRUPT;
- return frec_len(&f);
-}
-
-int main(int argc, char *argv[])
-{
- ntdb_off_t b_off, test;
- struct ntdb_context *ntdb;
- struct ntdb_layout *layout;
- NTDB_DATA data, key;
- ntdb_len_t len;
-
- /* FIXME: Test NTDB_CONVERT */
- /* FIXME: Test lock order fail. */
-
- plan_tests(42);
- data = ntdb_mkdata("world", 5);
- key = ntdb_mkdata("hello", 5);
-
- /* No coalescing can be done due to EOF */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- len = 15560;
- ntdb_layout_add_free(layout, len, 0);
- ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
- /* NOMMAP is for lockcheck. */
- ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
- O_RDWR, 0, &tap_log_attr);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == len);
-
- /* Figure out which bucket free entry is. */
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len));
- /* Lock and fail to coalesce. */
- ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
- test = layout->elem[1].base.off;
- ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, len, &test)
- == 0);
- ntdb_unlock_free_bucket(ntdb, b_off);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == len);
- ok1(test == layout->elem[1].base.off);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-
- /* No coalescing can be done due to used record */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_free(layout, 15528, 0);
- ntdb_layout_add_used(layout, key, data, 6);
- ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
- /* NOMMAP is for lockcheck. */
- ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
- O_RDWR, 0, &tap_log_attr);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Figure out which bucket free entry is. */
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(15528));
- /* Lock and fail to coalesce. */
- ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
- test = layout->elem[1].base.off;
- ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 15528, &test)
- == 0);
- ntdb_unlock_free_bucket(ntdb, b_off);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528);
- ok1(test == layout->elem[1].base.off);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-
- /* Coalescing can be done due to two free records, then EOF */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_free(layout, 1024, 0);
- ntdb_layout_add_free(layout, 14520, 0);
- ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
- /* NOMMAP is for lockcheck. */
- ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
- O_RDWR, 0, &tap_log_attr);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
- ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14520);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Figure out which bucket (first) free entry is. */
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
- /* Lock and coalesce. */
- ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
- test = layout->elem[2].base.off;
- ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
- == 1024 + sizeof(struct ntdb_used_record) + 14520);
- /* Should tell us it's erased this one... */
- ok1(test == NTDB_ERR_NOEXIST);
- ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
- ok1(free_record_length(ntdb, layout->elem[1].base.off)
- == 1024 + sizeof(struct ntdb_used_record) + 14520);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-
- /* Coalescing can be done due to two free records, then data */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_free(layout, 1024, 0);
- ntdb_layout_add_free(layout, 14488, 0);
- ntdb_layout_add_used(layout, key, data, 6);
- ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
- /* NOMMAP is for lockcheck. */
- ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
- O_RDWR, 0, &tap_log_attr);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
- ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14488);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Figure out which bucket free entry is. */
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
- /* Lock and coalesce. */
- ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
- test = layout->elem[2].base.off;
- ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
- == 1024 + sizeof(struct ntdb_used_record) + 14488);
- ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
- ok1(free_record_length(ntdb, layout->elem[1].base.off)
- == 1024 + sizeof(struct ntdb_used_record) + 14488);
- ok1(test == NTDB_ERR_NOEXIST);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-
- /* Coalescing can be done due to three free records, then EOF */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_free(layout, 1024, 0);
- ntdb_layout_add_free(layout, 512, 0);
- ntdb_layout_add_free(layout, 13992, 0);
- ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
- /* NOMMAP is for lockcheck. */
- ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
- O_RDWR, 0, &tap_log_attr);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
- ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512);
- ok1(free_record_length(ntdb, layout->elem[3].base.off) == 13992);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Figure out which bucket free entry is. */
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
- /* Lock and coalesce. */
- ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
- test = layout->elem[2].base.off;
- ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
- == 1024 + sizeof(struct ntdb_used_record) + 512
- + sizeof(struct ntdb_used_record) + 13992);
- ok1(ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0);
- ok1(free_record_length(ntdb, layout->elem[1].base.off)
- == 1024 + sizeof(struct ntdb_used_record) + 512
- + sizeof(struct ntdb_used_record) + 13992);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-/* We rig the hash so all records clash. */
-static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv)
-{
- return *((const unsigned int *)key) << 20;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- unsigned int v;
- struct ntdb_used_record rec;
- NTDB_DATA key = { (unsigned char *)&v, sizeof(v) };
- NTDB_DATA dbuf = { (unsigned char *)&v, sizeof(v) };
- union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = clash } };
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT,
- };
-
- hattr.base.next = &tap_log_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 137 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- struct hash_info h;
- ntdb_off_t new_off, new_off2, off;
-
- ntdb = ntdb_open("run-04-basichash.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- v = 0;
- /* Should not find it. */
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in top table, bucket 0. */
- ok1(h.table == NTDB_HASH_OFFSET);
- ok1(h.table_size == (1 << ntdb->hash_bits));
- ok1(h.bucket == 0);
- ok1(h.old_val == 0);
-
- /* Should have lock on bucket 0 */
- ok1(h.h == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- /* FIXME: Check lock length */
-
- /* Allocate a new record. */
- new_off = alloc(ntdb, key.dsize, dbuf.dsize,
- NTDB_USED_MAGIC, false);
- ok1(!NTDB_OFF_IS_ERR(new_off));
-
- /* We should be able to add it now. */
- ok1(add_to_hash(ntdb, &h, new_off) == 0);
-
- /* Make sure we fill it in for later finding. */
- off = new_off + sizeof(struct ntdb_used_record);
- ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
- off += key.dsize;
- ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
-
- /* We should be able to unlock that OK. */
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* Database should be consistent. */
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Now, this should give a successful lookup. */
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located it in top table, bucket 0. */
- ok1(h.table == NTDB_HASH_OFFSET);
- ok1(h.table_size == (1 << ntdb->hash_bits));
- ok1(h.bucket == 0);
-
- /* Should have lock on bucket 0 */
- ok1(h.h == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- /* FIXME: Check lock length */
-
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* Database should be consistent. */
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Test expansion. */
- v = 1;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located clash in toplevel bucket 0. */
- ok1(h.table == NTDB_HASH_OFFSET);
- ok1(h.table_size == (1 << ntdb->hash_bits));
- ok1(h.bucket == 0);
- ok1((h.old_val & NTDB_OFF_MASK) == new_off);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- /* FIXME: Check lock length */
-
- new_off2 = alloc(ntdb, key.dsize, dbuf.dsize,
- NTDB_USED_MAGIC, false);
- ok1(!NTDB_OFF_IS_ERR(new_off2));
-
- off = new_off2 + sizeof(struct ntdb_used_record);
- ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
- off += key.dsize;
- ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
-
- /* We should be able to add it now. */
- ok1(add_to_hash(ntdb, &h, new_off2) == 0);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* Should be happy with expansion. */
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Should be able to find both. */
- v = 1;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in chain. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 2);
- ok1(h.bucket == 1);
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- v = 0;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in chain. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 2);
- ok1(h.bucket == 0);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- /* FIXME: Check lock length */
-
- /* Simple delete should work. */
- ok1(delete_from_hash(ntdb, &h) == 0);
- ok1(add_free_record(ntdb, new_off,
- sizeof(struct ntdb_used_record)
- + rec_key_length(&rec)
- + rec_data_length(&rec)
- + rec_extra_padding(&rec),
- NTDB_LOCK_NOWAIT, false) == 0);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Should still be able to find other record. */
- v = 1;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in chain. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 2);
- ok1(h.bucket == 1);
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* Now should find empty space. */
- v = 0;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in chain, bucket 0. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 2);
- ok1(h.bucket == 0);
- ok1(h.old_val == 0);
-
- /* Adding another record should work. */
- v = 2;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in chain, bucket 0. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 2);
- ok1(h.bucket == 0);
- ok1(h.old_val == 0);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
-
- new_off = alloc(ntdb, key.dsize, dbuf.dsize,
- NTDB_USED_MAGIC, false);
- ok1(!NTDB_OFF_IS_ERR(new_off2));
- ok1(add_to_hash(ntdb, &h, new_off) == 0);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- off = new_off + sizeof(struct ntdb_used_record);
- ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
- off += key.dsize;
- ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
-
- /* Adding another record should cause expansion. */
- v = 3;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should not have located space in chain. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 2);
- ok1(h.bucket == 2);
- ok1(h.old_val != 0);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
-
- new_off = alloc(ntdb, key.dsize, dbuf.dsize,
- NTDB_USED_MAGIC, false);
- ok1(!NTDB_OFF_IS_ERR(new_off2));
- off = new_off + sizeof(struct ntdb_used_record);
- ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
- off += key.dsize;
- ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
- ok1(add_to_hash(ntdb, &h, new_off) == 0);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* Retrieve it and check. */
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have appended to chain, bucket 2. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 3);
- ok1(h.bucket == 2);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* YA record: relocation. */
- v = 4;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should not have located space in chain. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 3);
- ok1(h.bucket == 3);
- ok1(h.old_val != 0);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
-
- new_off = alloc(ntdb, key.dsize, dbuf.dsize,
- NTDB_USED_MAGIC, false);
- ok1(!NTDB_OFF_IS_ERR(new_off2));
- off = new_off + sizeof(struct ntdb_used_record);
- ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
- off += key.dsize;
- ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
- ok1(add_to_hash(ntdb, &h, new_off) == 0);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- /* Retrieve it and check. */
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have appended to chain, bucket 2. */
- ok1(h.table > NTDB_HASH_OFFSET);
- ok1(h.table_size == 4);
- ok1(h.bucket == 3);
-
- /* Should have lock on bucket 0 */
- ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
- ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
- ok1((ntdb->flags & NTDB_NOLOCK)
- || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
-
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4), d;
- union ntdb_attribute seed_attr;
- unsigned int msgs = 0;
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
-
- seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
- seed_attr.base.next = &tap_log_attr;
- seed_attr.seed.seed = 0;
-
- failtest_suppress = true;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-05-readonly-open.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600,
- &seed_attr);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ntdb_close(ntdb);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-05-readonly-open.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDONLY, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- break;
- ok1(tap_log_messages == msgs);
- /* Fetch should succeed, stores should fail. */
- if (!ok1(ntdb_fetch(ntdb, key, &d) == 0))
- goto fail;
- ok1(ntdb_deq(d, data));
- free(d.dptr);
- if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY)
- == NTDB_ERR_RDONLY))
- goto fail;
- ok1(tap_log_messages == ++msgs);
- if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT)
- == NTDB_ERR_RDONLY))
- goto fail;
- ok1(tap_log_messages == ++msgs);
- failtest_suppress = true;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- ok1(tap_log_messages == msgs);
- /* SIGH: failtest bug, it doesn't save the ntdb file because
- * we have it read-only. If we go around again, it gets
- * changed underneath us and things get screwy. */
- if (failtest_has_failed())
- break;
- }
- failtest_exit(exit_status());
-
-fail:
- failtest_suppress = true;
- ntdb_close(ntdb);
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
-
- failtest_suppress = true;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-10-simple-store.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- break;
- /* Modify should fail. */
- failtest_suppress = false;
- if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY)
- == NTDB_ERR_NOEXIST))
- goto fail;
- failtest_suppress = true;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Insert should succeed. */
- failtest_suppress = false;
- if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0))
- goto fail;
- failtest_suppress = true;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Second insert should fail. */
- failtest_suppress = false;
- if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT)
- == NTDB_ERR_EXISTS))
- goto fail;
- failtest_suppress = true;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- }
- ok1(tap_log_messages == 0);
- failtest_exit(exit_status());
-
-fail:
- failtest_suppress = true;
- ntdb_close(ntdb);
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
-
- failtest_suppress = true;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-11-simple-fetch.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (ntdb) {
- NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
-
- /* fetch should fail. */
- failtest_suppress = false;
- if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST))
- goto fail;
- failtest_suppress = true;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Insert should succeed. */
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Fetch should now work. */
- failtest_suppress = false;
- if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS))
- goto fail;
- failtest_suppress = true;
- ok1(ntdb_deq(d, data));
- free(d.dptr);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- }
- }
- ok1(tap_log_messages == 0);
- failtest_exit(exit_status());
-
-fail:
- failtest_suppress = true;
- ntdb_close(ntdb);
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include "../private.h"
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL,
- NTDB_INTERNAL|NTDB_CONVERT,
- NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
-
- failtest_suppress = true;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-12-check.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
-
- /* This is what we really want to test: ntdb_check(). */
- failtest_suppress = false;
- if (!ok1(ntdb_check(ntdb, NULL, NULL) == 0))
- goto fail;
- failtest_suppress = true;
-
- ntdb_close(ntdb);
- }
- ok1(tap_log_messages == 0);
- failtest_exit(exit_status());
-
-fail:
- failtest_suppress = true;
- ntdb_close(ntdb);
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/ilog/ilog.h>
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-#define MAX_SIZE 13100
-#define SIZE_STEP 131
-
-static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key)
-{
- ntdb_off_t off;
- struct ntdb_used_record urec;
- struct hash_info h;
-
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL);
- if (NTDB_OFF_IS_ERR(off))
- return 0;
- ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
- return off;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j, moves;
- struct ntdb_context *ntdb;
- unsigned char *buffer;
- ntdb_off_t oldoff = 0, newoff;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data;
-
- buffer = malloc(MAX_SIZE);
- for (i = 0; i < MAX_SIZE; i++)
- buffer[i] = i;
-
- plan_tests(sizeof(flags) / sizeof(flags[0])
- * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7)
- + 1);
-
- /* Using ntdb_store. */
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- moves = 0;
- for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
- data.dptr = buffer;
- data.dsize = j;
- ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
- ok1(data.dsize == j);
- ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
- free(data.dptr);
- newoff = ntdb_offset(ntdb, key);
- if (newoff != oldoff)
- moves++;
- oldoff = newoff;
- }
- ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0));
- /* We should increase by 50% each time... */
- ok(moves <= ilog64(j / SIZE_STEP)*2,
- "Moved %u times", moves);
- ntdb_close(ntdb);
- }
-
- /* Using ntdb_append. */
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- size_t prev_len = 0;
- ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- moves = 0;
- for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
- data.dptr = buffer + prev_len;
- data.dsize = j - prev_len;
- ok1(ntdb_append(ntdb, key, data) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
- ok1(data.dsize == j);
- ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
- free(data.dptr);
- prev_len = data.dsize;
- newoff = ntdb_offset(ntdb, key);
- if (newoff != oldoff)
- moves++;
- oldoff = newoff;
- }
- ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0));
- /* We should increase by 50% each time... */
- ok(moves <= ilog64(j / SIZE_STEP)*2,
- "Moved %u times", moves);
- ntdb_close(ntdb);
- }
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Huge initial store. */
- data.dptr = buffer;
- data.dsize = MAX_SIZE;
- ok1(ntdb_append(ntdb, key, data) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
- ok1(data.dsize == MAX_SIZE);
- ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
- free(data.dptr);
- ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
- && ntdb->file->num_lockrecs == 0));
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- free(buffer);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-#define OVERLOAD 100
-
-static uint32_t badhash(const void *key, size_t len, uint32_t seed, void *priv)
-{
- return 0;
-}
-
-static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p)
-{
- if (p)
- return ntdb_delete(ntdb, key);
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
- NTDB_DATA dbuf = { (unsigned char *)&j, sizeof(j) };
- union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = badhash } };
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT,
- };
-
- hattr.base.next = &tap_log_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * (7 * OVERLOAD + 11) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
-
- ntdb = ntdb_open("run-25-hashoverload.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Overload a bucket. */
- for (j = 0; j < OVERLOAD; j++) {
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- }
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Check we can find them all. */
- for (j = 0; j < OVERLOAD; j++) {
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == sizeof(j));
- ok1(d.dptr != NULL);
- ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
- free(d.dptr);
- }
-
- /* Traverse through them. */
- ok1(ntdb_traverse(ntdb, trav, NULL) == OVERLOAD);
-
- /* Delete the first 99. */
- for (j = 0; j < OVERLOAD-1; j++)
- ok1(ntdb_delete(ntdb, key) == 0);
-
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == sizeof(j));
- ok1(d.dptr != NULL);
- ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
- free(d.dptr);
-
- /* Traverse through them. */
- ok1(ntdb_traverse(ntdb, trav, NULL) == 1);
-
- /* Re-add */
- for (j = 0; j < OVERLOAD-1; j++) {
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- }
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Now try deleting as we go. */
- ok1(ntdb_traverse(ntdb, trav, trav) == OVERLOAD);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb_traverse(ntdb, trav, NULL) == 0);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static bool empty_freetable(struct ntdb_context *ntdb)
-{
- struct ntdb_freetable ftab;
- unsigned int i;
-
- /* Now, free table should be completely exhausted in zone 0 */
- if (ntdb_read_convert(ntdb, ntdb->ftable_off, &ftab, sizeof(ftab)) != 0)
- abort();
-
- for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) {
- if (ftab.buckets[i])
- return false;
- }
- return true;
-}
-
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- NTDB_DATA k, d;
- uint64_t size;
- bool was_empty = false;
-
- k.dptr = (void *)&j;
- k.dsize = sizeof(j);
-
- ntdb = ntdb_open("run-30-exhaust-before-expand.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* There's one empty record in initial db. */
- ok1(!empty_freetable(ntdb));
-
- size = ntdb->file->map_size;
-
- /* Create one record to chew up most space. */
- d.dsize = size - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 32;
- d.dptr = calloc(d.dsize, 1);
- j = 0;
- ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
- ok1(ntdb->file->map_size == size);
- free(d.dptr);
-
- /* Now insert minimal-length records until we expand. */
- for (j = 1; ntdb->file->map_size == size; j++) {
- was_empty = empty_freetable(ntdb);
- if (ntdb_store(ntdb, k, k, NTDB_INSERT) != 0)
- err(1, "Failed to store record %i", j);
- }
-
- /* Would have been empty before expansion, but no longer. */
- ok1(was_empty);
- ok1(!empty_freetable(ntdb));
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h"
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <ccan/failtest/failtest.h>
-#include "logging.h"
-#include "failtest_helper.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i, messages = 0;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 4);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-35-convert.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- if (!ok1(ntdb))
- failtest_exit(exit_status());
-
- ntdb_close(ntdb);
- /* We can fail in log message formatting or open. That's OK */
- if (failtest_has_failed()) {
- failtest_exit(exit_status());
- }
- /* If we say NTDB_CONVERT, it must be converted */
- ntdb = ntdb_open("run-35-convert.ntdb",
- flags[i]|NTDB_CONVERT|MAYBE_NOSYNC,
- O_RDWR, 0600, &tap_log_attr);
- if (flags[i] & NTDB_CONVERT) {
- if (!ntdb)
- failtest_exit(exit_status());
- ok1(ntdb_get_flags(ntdb) & NTDB_CONVERT);
- ntdb_close(ntdb);
- } else {
- if (!ok1(!ntdb && errno == EIO))
- failtest_exit(exit_status());
- ok1(tap_log_messages == ++messages);
- if (!ok1(log_last && strstr(log_last, "NTDB_CONVERT")))
- failtest_exit(exit_status());
- }
-
- /* If don't say NTDB_CONVERT, it *may* be converted */
- ntdb = ntdb_open("run-35-convert.ntdb",
- (flags[i] & ~NTDB_CONVERT)|MAYBE_NOSYNC,
- O_RDWR, 0600, &tap_log_attr);
- if (!ntdb)
- failtest_exit(exit_status());
- ok1(ntdb_get_flags(ntdb) == (flags[i]|MAYBE_NOSYNC));
- ntdb_close(ntdb);
- }
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "layout.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- ntdb_off_t off;
- struct ntdb_context *ntdb;
- struct ntdb_layout *layout;
- NTDB_DATA key, data;
- union ntdb_attribute seed;
-
- /* This seed value previously tickled a layout.c bug. */
- seed.base.attr = NTDB_ATTRIBUTE_SEED;
- seed.seed.seed = 0xb1142bc054d035b4ULL;
- seed.base.next = &tap_log_attr;
-
- plan_tests(11);
- key = ntdb_mkdata("Hello", 5);
- data = ntdb_mkdata("world", 5);
-
- /* Create a NTDB with three free tables. */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_free(layout, 80, 0);
- /* Used record prevent coalescing. */
- ntdb_layout_add_used(layout, key, data, 6);
- ntdb_layout_add_free(layout, 160, 1);
- key.dsize--;
- ntdb_layout_add_used(layout, key, data, 7);
- ntdb_layout_add_free(layout, 320, 2);
- key.dsize--;
- ntdb_layout_add_used(layout, key, data, 8);
- ntdb_layout_add_free(layout, 40, 0);
- ntdb = ntdb_layout_get(layout, free, &seed);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC);
- ok1(off == layout->elem[3].base.off);
- ok1(ntdb->ftable_off == layout->elem[0].base.off);
-
- off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC);
- ok1(off == layout->elem[5].base.off);
- ok1(ntdb->ftable_off == layout->elem[1].base.off);
-
- off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC);
- ok1(off == layout->elem[7].base.off);
- ok1(ntdb->ftable_off == layout->elem[2].base.off);
-
- off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC);
- ok1(off == layout->elem[9].base.off);
- ok1(ntdb->ftable_off == layout->elem[0].base.off);
-
- /* Now we fail. */
- off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC);
- ok1(off == 0);
-
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h"
-#include <unistd.h>
-#include "lock-tracking.h"
-
-static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
-static ssize_t write_check(int fd, const void *buf, size_t count);
-static int ftruncate_check(int fd, off_t length);
-
-#define pwrite pwrite_check
-#define write write_check
-#define fcntl fcntl_with_lockcheck
-#define ftruncate ftruncate_check
-
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include "external-agent.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static struct agent *agent;
-static bool opened;
-static int errors = 0;
-#define TEST_DBNAME "run-56-open-during-transaction.ntdb"
-
-#undef write
-#undef pwrite
-#undef fcntl
-#undef ftruncate
-
-static bool is_same(const char *snapshot, const char *latest, off_t len)
-{
- unsigned i;
-
- for (i = 0; i < len; i++) {
- if (snapshot[i] != latest[i])
- return false;
- }
- return true;
-}
-
-static bool compare_file(int fd, const char *snapshot, off_t snapshot_len)
-{
- char *contents;
- bool ret;
-
- /* over-length read serves as length check. */
- contents = malloc(snapshot_len+1);
- ret = pread(fd, contents, snapshot_len+1, 0) == snapshot_len
- && is_same(snapshot, contents, snapshot_len);
- free(contents);
- return ret;
-}
-
-static void check_file_intact(int fd)
-{
- enum agent_return ret;
- struct stat st;
- char *contents;
-
- fstat(fd, &st);
- contents = malloc(st.st_size);
- if (pread(fd, contents, st.st_size, 0) != st.st_size) {
- diag("Read fail");
- errors++;
- return;
- }
-
- /* Ask agent to open file. */
- ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
-
- /* It's OK to open it, but it must not have changed! */
- if (!compare_file(fd, contents, st.st_size)) {
- diag("Agent changed file after opening %s",
- agent_return_name(ret));
- errors++;
- }
-
- if (ret == SUCCESS) {
- ret = external_agent_operation(agent, CLOSE, NULL);
- if (ret != SUCCESS) {
- diag("Agent failed to close ntdb: %s",
- agent_return_name(ret));
- errors++;
- }
- } else if (ret != WOULD_HAVE_BLOCKED) {
- diag("Agent opening file gave %s",
- agent_return_name(ret));
- errors++;
- }
-
- free(contents);
-}
-
-static void after_unlock(int fd)
-{
- if (opened)
- check_file_intact(fd);
-}
-
-static ssize_t pwrite_check(int fd,
- const void *buf, size_t count, off_t offset)
-{
- if (opened)
- check_file_intact(fd);
-
- return pwrite(fd, buf, count, offset);
-}
-
-static ssize_t write_check(int fd, const void *buf, size_t count)
-{
- if (opened)
- check_file_intact(fd);
-
- return write(fd, buf, count);
-}
-
-static int ftruncate_check(int fd, off_t length)
-{
- if (opened)
- check_file_intact(fd);
-
- return ftruncate(fd, length);
-
-}
-
-int main(int argc, char *argv[])
-{
- const int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- int i;
- struct ntdb_context *ntdb;
- NTDB_DATA key, data;
-
- plan_tests(sizeof(flags)/sizeof(flags[0]) * 5);
- agent = prepare_external_agent();
- if (!agent)
- err(1, "preparing agent");
-
- unlock_callback = after_unlock;
- for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
- diag("Test with %s and %s\n",
- (flags[i] & NTDB_CONVERT) ? "CONVERT" : "DEFAULT",
- (flags[i] & NTDB_NOMMAP) ? "no mmap" : "mmap");
- unlink(TEST_DBNAME);
- ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
-
- opened = true;
- ok1(ntdb_transaction_start(ntdb) == 0);
- key = ntdb_mkdata("hi", strlen("hi"));
- data = ntdb_mkdata("world", strlen("world"));
-
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb_transaction_commit(ntdb) == 0);
- ok(!errors, "We had %u open errors", errors);
-
- opened = false;
- ntdb_close(ntdb);
- }
-
- return exit_status();
-}
+++ /dev/null
-#include "../private.h"
-#include <unistd.h>
-#include "lock-tracking.h"
-#include "tap-interface.h"
-#include <stdlib.h>
-#include <assert.h>
-static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
-static ssize_t write_check(int fd, const void *buf, size_t count);
-static int ftruncate_check(int fd, off_t length);
-
-#define pwrite pwrite_check
-#define write write_check
-#define fcntl fcntl_with_lockcheck
-#define ftruncate ftruncate_check
-
-/* There's a malloc inside transaction_setup_recovery, and valgrind complains
- * when we longjmp and leak it. */
-#define MAX_ALLOCATIONS 10
-static void *allocated[MAX_ALLOCATIONS];
-static unsigned max_alloc = 0;
-
-static void *malloc_noleak(size_t len)
-{
- unsigned int i;
-
- for (i = 0; i < MAX_ALLOCATIONS; i++)
- if (!allocated[i]) {
- allocated[i] = malloc(len);
- if (i > max_alloc) {
- max_alloc = i;
- diag("max_alloc: %i", max_alloc);
- }
- return allocated[i];
- }
- diag("Too many allocations!");
- abort();
-}
-
-static void *realloc_noleak(void *p, size_t size)
-{
- unsigned int i;
-
- for (i = 0; i < MAX_ALLOCATIONS; i++) {
- if (allocated[i] == p) {
- if (i > max_alloc) {
- max_alloc = i;
- diag("max_alloc: %i", max_alloc);
- }
- return allocated[i] = realloc(p, size);
- }
- }
- diag("Untracked realloc!");
- abort();
-}
-
-static void free_noleak(void *p)
-{
- unsigned int i;
-
- /* We don't catch asprintf, so don't complain if we miss one. */
- for (i = 0; i < MAX_ALLOCATIONS; i++) {
- if (allocated[i] == p) {
- allocated[i] = NULL;
- break;
- }
- }
- free(p);
-}
-
-static void free_all(void)
-{
- unsigned int i;
-
- for (i = 0; i < MAX_ALLOCATIONS; i++) {
- free(allocated[i]);
- allocated[i] = NULL;
- }
-}
-
-#define malloc malloc_noleak
-#define free(x) free_noleak(x)
-#define realloc realloc_noleak
-
-#include "ntdb-source.h"
-
-#undef malloc
-#undef free
-#undef realloc
-#undef write
-#undef pwrite
-#undef fcntl
-#undef ftruncate
-
-#include <stdbool.h>
-#include <stdarg.h>
-#include <ccan/err/err.h>
-#include <setjmp.h>
-#include "external-agent.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static bool in_transaction;
-static int target, current;
-static jmp_buf jmpbuf;
-#define TEST_DBNAME "run-57-die-during-transaction.ntdb"
-#define KEY_STRING "helloworld"
-#define DATA_STRING "Helloworld"
-
-static void maybe_die(int fd)
-{
- if (in_transaction && current++ == target) {
- longjmp(jmpbuf, 1);
- }
-}
-
-static ssize_t pwrite_check(int fd,
- const void *buf, size_t count, off_t offset)
-{
- ssize_t ret;
-
- maybe_die(fd);
-
- ret = pwrite(fd, buf, count, offset);
- if (ret != count)
- return ret;
-
- maybe_die(fd);
- return ret;
-}
-
-static ssize_t write_check(int fd, const void *buf, size_t count)
-{
- ssize_t ret;
-
- maybe_die(fd);
-
- ret = write(fd, buf, count);
- if (ret != count)
- return ret;
-
- maybe_die(fd);
- return ret;
-}
-
-static int ftruncate_check(int fd, off_t length)
-{
- int ret;
-
- maybe_die(fd);
-
- ret = ftruncate(fd, length);
-
- maybe_die(fd);
- return ret;
-}
-
-static bool test_death(enum operation op, struct agent *agent,
- bool pre_create_recovery)
-{
- struct ntdb_context *ntdb = NULL;
- NTDB_DATA key, data;
- enum agent_return ret;
- int needed_recovery = 0;
-
- current = target = 0;
- /* Big long data to force a change. */
- data = ntdb_mkdata(DATA_STRING, strlen(DATA_STRING));
-
-reset:
- unlink(TEST_DBNAME);
- ntdb = ntdb_open(TEST_DBNAME, NTDB_NOMMAP|MAYBE_NOSYNC,
- O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr);
- if (!ntdb) {
- diag("Failed opening NTDB: %s", strerror(errno));
- return false;
- }
-
- if (setjmp(jmpbuf) != 0) {
- /* We're partway through. Simulate our death. */
- close(ntdb->file->fd);
- forget_locking();
- in_transaction = false;
-
- ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
- if (ret == SUCCESS)
- needed_recovery++;
- else if (ret != FAILED) {
- diag("Step %u agent NEEDS_RECOVERY = %s", current,
- agent_return_name(ret));
- return false;
- }
-
- /* Could be key, or data. */
- ret = external_agent_operation(agent, op,
- KEY_STRING "=" KEY_STRING);
- if (ret != SUCCESS) {
- ret = external_agent_operation(agent, op,
- KEY_STRING
- "=" DATA_STRING);
- }
- if (ret != SUCCESS) {
- diag("Step %u op %s failed = %s", current,
- operation_name(op),
- agent_return_name(ret));
- return false;
- }
-
- ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
- if (ret != FAILED) {
- diag("Still needs recovery after step %u = %s",
- current, agent_return_name(ret));
- return false;
- }
-
- ret = external_agent_operation(agent, CHECK, "");
- if (ret != SUCCESS) {
- diag("Step %u check failed = %s", current,
- agent_return_name(ret));
- return false;
- }
-
- ret = external_agent_operation(agent, CLOSE, "");
- if (ret != SUCCESS) {
- diag("Step %u close failed = %s", current,
- agent_return_name(ret));
- return false;
- }
-
- /* Suppress logging as this tries to use closed fd. */
- suppress_logging = true;
- suppress_lockcheck = true;
- ntdb_close(ntdb);
- suppress_logging = false;
- suppress_lockcheck = false;
- target++;
- current = 0;
- free_all();
- goto reset;
- }
-
- /* Put key for agent to fetch. */
- key = ntdb_mkdata(KEY_STRING, strlen(KEY_STRING));
-
- if (pre_create_recovery) {
- /* Using a transaction now means we allocate the recovery
- * area immediately. That makes the later transaction smaller
- * and thus tickles a bug we had. */
- if (ntdb_transaction_start(ntdb) != 0)
- return false;
- }
- if (ntdb_store(ntdb, key, key, NTDB_INSERT) != 0)
- return false;
- if (pre_create_recovery) {
- if (ntdb_transaction_commit(ntdb) != 0)
- return false;
- }
-
- /* This is the key we insert in transaction. */
- key.dsize--;
-
- ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
- if (ret != SUCCESS)
- errx(1, "Agent failed to open: %s", agent_return_name(ret));
-
- ret = external_agent_operation(agent, FETCH, KEY_STRING "=" KEY_STRING);
- if (ret != SUCCESS)
- errx(1, "Agent failed find key: %s", agent_return_name(ret));
-
- in_transaction = true;
- if (ntdb_transaction_start(ntdb) != 0)
- return false;
-
- if (ntdb_store(ntdb, key, data, NTDB_INSERT) != 0)
- return false;
-
- if (ntdb_transaction_commit(ntdb) != 0)
- return false;
-
- in_transaction = false;
-
- /* We made it! */
- diag("Completed %u runs", current);
- ntdb_close(ntdb);
- ret = external_agent_operation(agent, CLOSE, "");
- if (ret != SUCCESS) {
- diag("Step %u close failed = %s", current,
- agent_return_name(ret));
- return false;
- }
-
- ok1(needed_recovery);
- ok1(locking_errors == 0);
- ok1(forget_locking() == 0);
- locking_errors = 0;
- return true;
-}
-
-int main(int argc, char *argv[])
-{
- enum operation ops[] = { FETCH, STORE, TRANSACTION_START };
- struct agent *agent;
- int i, j;
-
- plan_tests(24);
- unlock_callback = maybe_die;
-
- external_agent_free = free_noleak;
- agent = prepare_external_agent();
- if (!agent)
- err(1, "preparing agent");
-
- for (j = 0; j < 2; j++) {
- for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) {
- diag("Testing %s after death (%s recovery area)",
- operation_name(ops[i]), j ? "with" : "without");
- ok1(test_death(ops[i], agent, j));
- }
- }
-
- free_external_agent(agent);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-/* The largest 32-bit value which is still a multiple of NTDB_PGSIZE */
-#define ALMOST_4G ((uint32_t)-NTDB_PGSIZE)
-/* And this pushes it over 32 bits */
-#define A_LITTLE_BIT (NTDB_PGSIZE * 2)
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- if (sizeof(off_t) <= 4) {
- plan_tests(1);
- pass("No 64 bit off_t");
- return exit_status();
- }
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 16);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- off_t old_size;
- NTDB_DATA k, d;
- struct hash_info h;
- struct ntdb_used_record rec;
- ntdb_off_t off;
-
- ntdb = ntdb_open("run-64-bit-ntdb.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- old_size = ntdb->file->map_size;
-
- /* Add a fake record to chew up the existing free space. */
- k = ntdb_mkdata("fake", 4);
- d.dsize = ntdb->file->map_size
- - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8;
- d.dptr = malloc(d.dsize);
- memset(d.dptr, 0, d.dsize);
- ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
- ok1(ntdb->file->map_size == old_size);
- free(d.dptr);
-
- /* This makes a sparse file */
- ok1(ftruncate(ntdb->file->fd, ALMOST_4G) == 0);
- ok1(add_free_record(ntdb, old_size, ALMOST_4G - old_size,
- NTDB_LOCK_WAIT, false) == NTDB_SUCCESS);
-
- /* Now add a little record past the 4G barrier. */
- ok1(ntdb_expand_file(ntdb, A_LITTLE_BIT) == NTDB_SUCCESS);
- ok1(add_free_record(ntdb, ALMOST_4G, A_LITTLE_BIT,
- NTDB_LOCK_WAIT, false)
- == NTDB_SUCCESS);
-
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
-
- /* Test allocation path. */
- k = ntdb_mkdata("key", 4);
- d = ntdb_mkdata("data", 5);
- ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
-
- /* Make sure it put it at end as we expected. */
- off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL);
- ok1(off >= ALMOST_4G);
- ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
-
- ok1(ntdb_fetch(ntdb, k, &d) == 0);
- ok1(d.dsize == 5);
- ok1(strcmp((char *)d.dptr, "data") == 0);
- free(d.dptr);
-
- ok1(ntdb_delete(ntdb, k) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
-
- ntdb_close(ntdb);
- }
-
- /* We might get messages about mmap failing, so don't test
- * tap_log_messages */
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
- void *unused)
-{
- return 0;
-}
-
-static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
-{
- return 0;
-}
-
-static uint32_t hash_fn(const void *key, size_t len, uint32_t seed,
- void *priv)
-{
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- union ntdb_attribute seed_attr;
- union ntdb_attribute hash_attr;
- union ntdb_attribute lock_attr;
-
- seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
- seed_attr.base.next = &hash_attr;
- seed_attr.seed.seed = 100;
-
- hash_attr.base.attr = NTDB_ATTRIBUTE_HASH;
- hash_attr.base.next = &lock_attr;
- hash_attr.hash.fn = hash_fn;
- hash_attr.hash.data = &hash_attr;
-
- lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
- lock_attr.base.next = &tap_log_attr;
- lock_attr.flock.lock = mylock;
- lock_attr.flock.unlock = myunlock;
- lock_attr.flock.data = &lock_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 50);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- union ntdb_attribute attr;
-
- /* First open with no attributes. */
- ntdb = ntdb_open("run-90-get-set-attributes.ntdb",
- flags[i] |MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
- ok1(ntdb);
-
- /* Get log on no attributes will fail */
- attr.base.attr = NTDB_ATTRIBUTE_LOG;
- ok1(ntdb_get_attribute(ntdb, &attr) == NTDB_ERR_NOEXIST);
- /* These always work. */
- attr.base.attr = NTDB_ATTRIBUTE_HASH;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH);
- ok1(attr.hash.fn == ntdb_jenkins_hash);
- attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
- ok1(attr.flock.lock == ntdb_fcntl_lock);
- ok1(attr.flock.unlock == ntdb_fcntl_unlock);
- attr.base.attr = NTDB_ATTRIBUTE_SEED;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED);
- /* This is possible, just astronomically unlikely. */
- ok1(attr.seed.seed != 0);
-
- /* Unset attributes. */
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
-
- /* Set them. */
- ok1(ntdb_set_attribute(ntdb, &tap_log_attr) == 0);
- ok1(ntdb_set_attribute(ntdb, &lock_attr) == 0);
- /* These should fail. */
- ok1(ntdb_set_attribute(ntdb, &seed_attr) == NTDB_ERR_EINVAL);
- ok1(tap_log_messages == 1);
- ok1(ntdb_set_attribute(ntdb, &hash_attr) == NTDB_ERR_EINVAL);
- ok1(tap_log_messages == 2);
- tap_log_messages = 0;
-
- /* Getting them should work as expected. */
- attr.base.attr = NTDB_ATTRIBUTE_LOG;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG);
- ok1(attr.log.fn == tap_log_attr.log.fn);
- ok1(attr.log.data == tap_log_attr.log.data);
-
- attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
- ok1(attr.flock.lock == mylock);
- ok1(attr.flock.unlock == myunlock);
- ok1(attr.flock.data == &lock_attr);
-
- /* Unset them again. */
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
- ok1(tap_log_messages == 0);
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
- ok1(tap_log_messages == 0);
-
- ntdb_close(ntdb);
- ok1(tap_log_messages == 0);
-
- /* Now open with all attributes. */
- ntdb = ntdb_open("run-90-get-set-attributes.ntdb",
- flags[i] | MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600,
- &seed_attr);
-
- ok1(ntdb);
-
- /* Get will succeed */
- attr.base.attr = NTDB_ATTRIBUTE_LOG;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG);
- ok1(attr.log.fn == tap_log_attr.log.fn);
- ok1(attr.log.data == tap_log_attr.log.data);
-
- attr.base.attr = NTDB_ATTRIBUTE_HASH;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH);
- ok1(attr.hash.fn == hash_fn);
- ok1(attr.hash.data == &hash_attr);
-
- attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
- ok1(attr.flock.lock == mylock);
- ok1(attr.flock.unlock == myunlock);
- ok1(attr.flock.data == &lock_attr);
-
- attr.base.attr = NTDB_ATTRIBUTE_SEED;
- ok1(ntdb_get_attribute(ntdb, &attr) == 0);
- ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED);
- ok1(attr.seed.seed == seed_attr.seed.seed);
-
- /* Unset attributes. */
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_HASH);
- ok1(tap_log_messages == 1);
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_SEED);
- ok1(tap_log_messages == 2);
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
- ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
- ok1(tap_log_messages == 2);
- tap_log_messages = 0;
-
- ntdb_close(ntdb);
-
- }
- return exit_status();
-}
+++ /dev/null
-#include <ccan/failtest/failtest_override.h>
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "layout.h"
-#include "failtest_helper.h"
-#include <stdarg.h>
-#include "helprun-external-agent.h"
-
-static size_t len_of(bool breaks_check, bool breaks_write, bool breaks_open)
-{
- size_t len = 0;
- if (breaks_check)
- len += 8;
- if (breaks_write)
- len += 16;
- if (breaks_open)
- len += 32;
- return len;
-}
-
-/* Creates a NTDB with various capabilities. */
-static void create_ntdb(const char *name,
- unsigned int cap,
- bool breaks_check,
- bool breaks_write,
- bool breaks_open, ...)
-{
- NTDB_DATA key, data;
- va_list ap;
- struct ntdb_layout *layout;
- struct ntdb_context *ntdb;
- int fd, clen;
- union ntdb_attribute seed_attr;
-
- /* Force a seed which doesn't allow records to clash! */
- seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
- seed_attr.base.next = &tap_log_attr;
- seed_attr.seed.seed = 0;
-
- key = ntdb_mkdata("Hello", 5);
- data = ntdb_mkdata("world", 5);
-
- /* Create a NTDB with some data, and some capabilities */
- layout = new_ntdb_layout();
- ntdb_layout_add_freetable(layout);
- ntdb_layout_add_used(layout, key, data, 6);
- clen = len_of(breaks_check, breaks_write, breaks_open);
- ntdb_layout_add_free(layout, 15496 - clen, 0);
- ntdb_layout_add_capability(layout, cap,
- breaks_write, breaks_check, breaks_open,
- clen);
-
- va_start(ap, breaks_open);
- while ((cap = va_arg(ap, int)) != 0) {
- breaks_check = va_arg(ap, int);
- breaks_write = va_arg(ap, int);
- breaks_open = va_arg(ap, int);
-
- key.dsize--;
- ntdb_layout_add_used(layout, key, data, 11 - key.dsize);
- clen = len_of(breaks_check, breaks_write, breaks_open);
- ntdb_layout_add_free(layout, 16304 - clen, 0);
- ntdb_layout_add_capability(layout, cap,
- breaks_write, breaks_check,
- breaks_open, clen);
- }
- va_end(ap);
-
- /* We open-code this, because we need to use the failtest write. */
- ntdb = ntdb_layout_get(layout, failtest_free, &seed_attr);
-
- fd = open(name, O_RDWR|O_TRUNC|O_CREAT, 0600);
- if (fd < 0)
- err(1, "opening %s for writing", name);
- if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size)
- != ntdb->file->map_size)
- err(1, "writing %s", name);
- close(fd);
- ntdb_close(ntdb);
- ntdb_layout_free(layout);
-}
-
-/* Note all the "goto out" early exits: they're to shorten failtest time. */
-int main(int argc, char *argv[])
-{
- struct ntdb_context *ntdb;
- char *summary;
-
- failtest_init(argc, argv);
- failtest_hook = block_repeat_failures;
- failtest_exit_check = exit_check_log;
- plan_tests(60);
-
- failtest_suppress = true;
- /* Capability says you can ignore it? */
- create_ntdb("run-capabilities.ntdb", 1, false, false, false, 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- if (!ok1(ntdb))
- goto out;
- ok1(tap_log_messages == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- ok1(tap_log_messages == 0);
- ntdb_close(ntdb);
-
- /* Two capabilitues say you can ignore them? */
- create_ntdb("run-capabilities.ntdb",
- 1, false, false, false,
- 2, false, false, false, 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- if (!ok1(ntdb))
- goto out;
- ok1(tap_log_messages == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- ok1(tap_log_messages == 0);
- ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
- ok1(strstr(summary, "Capability 1\n"));
- free(summary);
- ntdb_close(ntdb);
-
- /* Capability says you can't check. */
- create_ntdb("run-capabilities.ntdb",
- 1, false, false, false,
- 2, true, false, false, 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- if (!ok1(ntdb))
- goto out;
- ok1(tap_log_messages == 0);
- ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- /* We expect a warning! */
- ok1(tap_log_messages == 1);
- ok1(strstr(log_last, "capabilit"));
- ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
- ok1(strstr(summary, "Capability 1\n"));
- ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
- free(summary);
- ntdb_close(ntdb);
-
- /* Capability says you can't write. */
- create_ntdb("run-capabilities.ntdb",
- 1, false, false, false,
- 2, false, true, false, 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- /* We expect a message. */
- ok1(!ntdb);
- if (!ok1(tap_log_messages == 2))
- goto out;
- if (!ok1(strstr(log_last, "unknown")))
- goto out;
- ok1(strstr(log_last, "write"));
-
- /* We can open it read-only though! */
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0,
- &tap_log_attr);
- failtest_suppress = true;
- if (!ok1(ntdb))
- goto out;
- ok1(tap_log_messages == 2);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- ok1(tap_log_messages == 2);
- ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
- ok1(strstr(summary, "Capability 1\n"));
- ok1(strstr(summary, "Capability 2 (read-only)\n"));
- free(summary);
- ntdb_close(ntdb);
-
- /* Capability says you can't open. */
- create_ntdb("run-capabilities.ntdb",
- 1, false, false, false,
- 2, false, false, true, 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- /* We expect a message. */
- ok1(!ntdb);
- if (!ok1(tap_log_messages == 3))
- goto out;
- if (!ok1(strstr(log_last, "unknown")))
- goto out;
-
- /* Combine capabilities correctly. */
- create_ntdb("run-capabilities.ntdb",
- 1, false, false, false,
- 2, true, false, false,
- 3, false, true, false, 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- /* We expect a message. */
- ok1(!ntdb);
- if (!ok1(tap_log_messages == 4))
- goto out;
- if (!ok1(strstr(log_last, "unknown")))
- goto out;
- ok1(strstr(log_last, "write"));
-
- /* We can open it read-only though! */
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0,
- &tap_log_attr);
- failtest_suppress = true;
- if (!ok1(ntdb))
- goto out;
- ok1(tap_log_messages == 4);
- ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- /* We expect a warning! */
- ok1(tap_log_messages == 5);
- ok1(strstr(log_last, "unknown"));
- ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
- ok1(strstr(summary, "Capability 1\n"));
- ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
- ok1(strstr(summary, "Capability 3 (read-only)\n"));
- free(summary);
- ntdb_close(ntdb);
-
- /* Two capability flags in one. */
- create_ntdb("run-capabilities.ntdb",
- 1, false, false, false,
- 2, true, true, false,
- 0);
-
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
- &tap_log_attr);
- failtest_suppress = true;
- /* We expect a message. */
- ok1(!ntdb);
- if (!ok1(tap_log_messages == 6))
- goto out;
- if (!ok1(strstr(log_last, "unknown")))
- goto out;
- ok1(strstr(log_last, "write"));
-
- /* We can open it read-only though! */
- failtest_suppress = false;
- ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0,
- &tap_log_attr);
- failtest_suppress = true;
- if (!ok1(ntdb))
- goto out;
- ok1(tap_log_messages == 6);
- ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
- ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
- /* We expect a warning! */
- ok1(tap_log_messages == 7);
- ok1(strstr(log_last, "unknown"));
- ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
- ok1(strstr(summary, "Capability 1\n"));
- ok1(strstr(summary, "Capability 2 (uncheckable,read-only)\n"));
- free(summary);
- ntdb_close(ntdb);
-
-out:
- failtest_exit(exit_status());
-
- /*
- * We will never reach this but the compiler complains if we do not
- * return in this function.
- */
- return EFAULT;
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = ntdb_mkdata("key", 3);
- NTDB_DATA data = ntdb_mkdata("data", 4);
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
-
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- size_t size;
- NTDB_DATA k, d;
- ntdb = ntdb_open("run-expand-in-transaction.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- size = ntdb->file->map_size;
- /* Add a fake record to chew up the existing free space. */
- k = ntdb_mkdata("fake", 4);
- d.dsize = ntdb->file->map_size
- - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8;
- d.dptr = malloc(d.dsize);
- memset(d.dptr, 0, d.dsize);
- ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
- ok1(ntdb->file->map_size == size);
- free(d.dptr);
- ok1(ntdb_transaction_start(ntdb) == 0);
- ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
- ok1(ntdb->file->map_size > size);
- ok1(ntdb_transaction_commit(ntdb) == 0);
- ok1(ntdb->file->map_size > size);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
- NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- uint64_t features;
- ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Put some stuff in there. */
- for (j = 0; j < 100; j++) {
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- fail("Storing in ntdb");
- }
-
- /* Mess with features fields in hdr. */
- features = (~NTDB_FEATURE_MASK ^ 1);
- ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
- features_used),
- &features, sizeof(features)) == 0);
- ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
- features_offered),
- &features, sizeof(features)) == 0);
- ntdb_close(ntdb);
-
- ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR, 0, &tap_log_attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Should not have changed features offered. */
- ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header,
- features_offered),
- &features, sizeof(features)) == 0);
- ok1(features == (~NTDB_FEATURE_MASK ^ 1));
-
- /* Should have cleared unknown bits in features_used. */
- ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header,
- features_used),
- &features, sizeof(features)) == 0);
- ok1(features == (1 & NTDB_FEATURE_MASK));
-
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "../private.h"
-#include <unistd.h>
-#include "lock-tracking.h"
-
-#define fcntl fcntl_with_lockcheck
-#include "ntdb-source.h"
-
-#include "tap-interface.h"
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include "external-agent.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-#define TEST_DBNAME "run-lockall.ntdb"
-#define KEY_STR "key"
-
-#undef fcntl
-
-int main(int argc, char *argv[])
-{
- struct agent *agent;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
- int i;
-
- plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1);
- agent = prepare_external_agent();
- if (!agent)
- err(1, "preparing agent");
-
- for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
- enum agent_return ret;
- struct ntdb_context *ntdb;
-
- ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ok1(ntdb);
-
- ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
- ok1(ret == SUCCESS);
-
- ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
- ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
- == WOULD_HAVE_BLOCKED);
- ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
- == WOULD_HAVE_BLOCKED);
- /* Test nesting. */
- ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
- ntdb_unlockall(ntdb);
- ntdb_unlockall(ntdb);
-
- ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
- == SUCCESS);
-
- ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
- ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
- == WOULD_HAVE_BLOCKED);
- ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
- == SUCCESS);
- ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
- ntdb_unlockall_read(ntdb);
- ntdb_unlockall_read(ntdb);
-
- ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
- == SUCCESS);
- ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS);
- ntdb_close(ntdb);
- }
-
- free_external_agent(agent);
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-/* We had a bug where we marked the ntdb read-only for a ntdb_traverse_read.
- * If we then expanded the ntdb, we would remap read-only, and later SEGV. */
-#include "tap-interface.h"
-#include "external-agent.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static bool file_larger(int fd, ntdb_len_t size)
-{
- struct stat st;
-
- fstat(fd, &st);
- return st.st_size != size;
-}
-
-static unsigned add_records_to_grow(struct agent *agent, int fd, ntdb_len_t size)
-{
- unsigned int i;
-
- for (i = 0; !file_larger(fd, size); i++) {
- char data[50];
- sprintf(data, "%i=%i", i, i);
- if (external_agent_operation(agent, STORE, data) != SUCCESS)
- return 0;
- }
- diag("Added %u records to grow file", i);
- return i;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct agent *agent;
- struct ntdb_context *ntdb;
- NTDB_DATA d = ntdb_mkdata("hello", 5);
- const char filename[] = "run-remap-in-read_traverse.ntdb";
-
- plan_tests(4);
-
- agent = prepare_external_agent();
-
- ntdb = ntdb_open(filename, MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-
- ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS);
- i = add_records_to_grow(agent, ntdb->file->fd, ntdb->file->map_size);
-
- /* Do a traverse. */
- ok1(ntdb_traverse(ntdb, NULL, NULL) == i);
-
- /* Now store something! */
- ok1(ntdb_store(ntdb, d, d, NTDB_INSERT) == 0);
- ok1(tap_log_messages == 0);
- ntdb_close(ntdb);
- free_external_agent(agent);
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static int log_count = 0;
-
-/* Normally we get a log when setting random seed. */
-static void my_log_fn(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message, void *priv)
-{
- log_count++;
-}
-
-static union ntdb_attribute log_attr = {
- .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG },
- .fn = my_log_fn }
-};
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- struct ntdb_context *ntdb;
- union ntdb_attribute attr;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
-
- attr.seed.base.attr = NTDB_ATTRIBUTE_SEED;
- attr.seed.base.next = &log_attr;
- attr.seed.seed = 42;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- struct ntdb_header hdr;
- int fd;
- ntdb = ntdb_open("run-seed.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &attr);
- ok1(ntdb);
- if (!ntdb)
- continue;
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(ntdb->hash_seed == 42);
- ok1(log_count == 0);
- ntdb_close(ntdb);
-
- if (flags[i] & NTDB_INTERNAL)
- continue;
-
- fd = open("run-seed.ntdb", O_RDONLY);
- ok1(fd >= 0);
- ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
- if (flags[i] & NTDB_CONVERT)
- ok1(bswap_64(hdr.hash_seed) == 42);
- else
- ok1(hdr.hash_seed == 42);
- close(fd);
- }
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "helprun-external-agent.h"
-
-int main(int argc, char *argv[])
-{
- enum NTDB_ERROR e;
- plan_tests(NTDB_ERR_RDONLY*-1 + 2);
-
- for (e = NTDB_SUCCESS; e >= NTDB_ERR_RDONLY; e--) {
- switch (e) {
- case NTDB_SUCCESS:
- ok1(!strcmp(ntdb_errorstr(e),
- "Success"));
- break;
- case NTDB_ERR_IO:
- ok1(!strcmp(ntdb_errorstr(e),
- "IO Error"));
- break;
- case NTDB_ERR_LOCK:
- ok1(!strcmp(ntdb_errorstr(e),
- "Locking error"));
- break;
- case NTDB_ERR_OOM:
- ok1(!strcmp(ntdb_errorstr(e),
- "Out of memory"));
- break;
- case NTDB_ERR_EXISTS:
- ok1(!strcmp(ntdb_errorstr(e),
- "Record exists"));
- break;
- case NTDB_ERR_EINVAL:
- ok1(!strcmp(ntdb_errorstr(e),
- "Invalid parameter"));
- break;
- case NTDB_ERR_NOEXIST:
- ok1(!strcmp(ntdb_errorstr(e),
- "Record does not exist"));
- break;
- case NTDB_ERR_RDONLY:
- ok1(!strcmp(ntdb_errorstr(e),
- "write not permitted"));
- break;
- case NTDB_ERR_CORRUPT:
- ok1(!strcmp(ntdb_errorstr(e),
- "Corrupt database"));
- break;
- }
- }
- ok1(!strcmp(ntdb_errorstr(e), "Invalid error code"));
-
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-static int drop_count(struct ntdb_context *ntdb, unsigned int *count)
-{
- if (--(*count) == 0)
- return 1;
- return 0;
-}
-
-static int set_found(struct ntdb_context *ntdb, bool found[3])
-{
- unsigned int idx;
-
- if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach0.ntdb") == 0)
- idx = 0;
- else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach1.ntdb") == 0)
- idx = 1;
- else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach2.ntdb") == 0)
- idx = 2;
- else
- abort();
-
- if (found[idx])
- abort();
- found[idx] = true;
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, count;
- bool found[3];
- struct ntdb_context *ntdb0, *ntdb1, *ntdb;
- int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 8);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb0 = ntdb_open("run-ntdb_foreach0.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
- ntdb = ntdb_open("run-ntdb_foreach2.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-
- memset(found, 0, sizeof(found));
- ntdb_foreach(set_found, found);
- ok1(found[0] && found[1] && found[2]);
-
- /* Test premature iteration termination */
- count = 1;
- ntdb_foreach(drop_count, &count);
- ok1(count == 0);
-
- ntdb_close(ntdb1);
- memset(found, 0, sizeof(found));
- ntdb_foreach(set_found, found);
- ok1(found[0] && !found[1] && found[2]);
-
- ntdb_close(ntdb);
- memset(found, 0, sizeof(found));
- ntdb_foreach(set_found, found);
- ok1(found[0] && !found[1] && !found[2]);
-
- ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb",
- flags[i]|MAYBE_NOSYNC,
- O_RDWR, 0600, &tap_log_attr);
- memset(found, 0, sizeof(found));
- ntdb_foreach(set_found, found);
- ok1(found[0] && found[1] && !found[2]);
-
- ntdb_close(ntdb0);
- memset(found, 0, sizeof(found));
- ntdb_foreach(set_found, found);
- ok1(!found[0] && found[1] && !found[2]);
-
- ntdb_close(ntdb1);
- memset(found, 0, sizeof(found));
- ntdb_foreach(set_found, found);
- ok1(!found[0] && !found[1] && !found[2]);
- ok1(tap_log_messages == 0);
- }
-
- return exit_status();
-}
+++ /dev/null
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-#include "helprun-external-agent.h"
-
-#define NUM_RECORDS 1000
-
-/* We use the same seed which we saw a failure on. */
-static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
-{
- return hash64_stable((const unsigned char *)key, len,
- *(uint64_t *)p);
-}
-
-static bool store_records(struct ntdb_context *ntdb)
-{
- int i;
- NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
- NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
-
- for (i = 0; i < NUM_RECORDS; i++)
- if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
- return false;
- return true;
-}
-
-struct trav_data {
- unsigned int calls, call_limit;
- int low, high;
- bool mismatch;
- bool delete;
- enum NTDB_ERROR delete_error;
-};
-
-static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
- struct trav_data *td)
-{
- int val;
-
- td->calls++;
- if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val)
- || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
- td->mismatch = true;
- return -1;
- }
- memcpy(&val, dbuf.dptr, dbuf.dsize);
- if (val < td->low)
- td->low = val;
- if (val > td->high)
- td->high = val;
-
- if (td->delete) {
- td->delete_error = ntdb_delete(ntdb, key);
- if (td->delete_error != NTDB_SUCCESS) {
- return -1;
- }
- }
-
- if (td->calls == td->call_limit)
- return 1;
- return 0;
-}
-
-struct trav_grow_data {
- unsigned int calls;
- unsigned int num_large;
- bool mismatch;
- enum NTDB_ERROR error;
-};
-
-static int trav_grow(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
- struct trav_grow_data *tgd)
-{
- int val;
- unsigned char buffer[128] = { 0 };
-
- tgd->calls++;
- if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val)
- || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
- tgd->mismatch = true;
- return -1;
- }
-
- if (dbuf.dsize > sizeof(val))
- /* We must have seen this before! */
- tgd->num_large++;
-
- /* Make a big difference to the database. */
- dbuf.dptr = buffer;
- dbuf.dsize = sizeof(buffer);
- tgd->error = ntdb_append(ntdb, key, dbuf);
- if (tgd->error != NTDB_SUCCESS) {
- return -1;
- }
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i;
- int num;
- struct trav_data td;
- struct trav_grow_data tgd;
- struct ntdb_context *ntdb;
- uint64_t seed = 16014841315512641303ULL;
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT };
- union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = fixedhash,
- .data = &seed } };
-
- hattr.base.next = &tap_log_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- ntdb = ntdb_open("run-traverse.ntdb", flags[i]|MAYBE_NOSYNC,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- ok1(ntdb_traverse(ntdb, NULL, NULL) == 0);
-
- ok1(store_records(ntdb));
- num = ntdb_traverse(ntdb, NULL, NULL);
- ok1(num == NUM_RECORDS);
-
- /* Full traverse. */
- td.calls = 0;
- td.call_limit = UINT_MAX;
- td.low = INT_MAX;
- td.high = INT_MIN;
- td.mismatch = false;
- td.delete = false;
-
- num = ntdb_traverse(ntdb, trav, &td);
- ok1(num == NUM_RECORDS);
- ok1(!td.mismatch);
- ok1(td.calls == NUM_RECORDS);
- ok1(td.low == 0);
- ok1(td.high == NUM_RECORDS-1);
-
- /* Short traverse. */
- td.calls = 0;
- td.call_limit = NUM_RECORDS / 2;
- td.low = INT_MAX;
- td.high = INT_MIN;
- td.mismatch = false;
- td.delete = false;
-
- num = ntdb_traverse(ntdb, trav, &td);
- ok1(num == NUM_RECORDS / 2);
- ok1(!td.mismatch);
- ok1(td.calls == NUM_RECORDS / 2);
- ok1(td.low <= NUM_RECORDS / 2);
- ok1(td.high > NUM_RECORDS / 2);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(tap_log_messages == 0);
-
- /* Deleting traverse (delete everything). */
- td.calls = 0;
- td.call_limit = UINT_MAX;
- td.low = INT_MAX;
- td.high = INT_MIN;
- td.mismatch = false;
- td.delete = true;
- td.delete_error = NTDB_SUCCESS;
- num = ntdb_traverse(ntdb, trav, &td);
- ok1(num == NUM_RECORDS);
- ok1(td.delete_error == NTDB_SUCCESS);
- ok1(!td.mismatch);
- ok1(td.calls == NUM_RECORDS);
- ok1(td.low == 0);
- ok1(td.high == NUM_RECORDS - 1);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Now it's empty! */
- ok1(ntdb_traverse(ntdb, NULL, NULL) == 0);
-
- /* Re-add. */
- ok1(store_records(ntdb));
- ok1(ntdb_traverse(ntdb, NULL, NULL) == NUM_RECORDS);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Grow. This will cause us to be reshuffled. */
- tgd.calls = 0;
- tgd.num_large = 0;
- tgd.mismatch = false;
- tgd.error = NTDB_SUCCESS;
- ok1(ntdb_traverse(ntdb, trav_grow, &tgd) > 1);
- ok1(tgd.error == 0);
- ok1(!tgd.mismatch);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- ok1(tgd.num_large < tgd.calls);
- diag("growing db: %u calls, %u repeats",
- tgd.calls, tgd.num_large);
-
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
+++ /dev/null
-#include "tap-interface.h"
-
-unsigned tap_ok_count, tap_ok_target = -1U;
+++ /dev/null
-/*
- Unix SMB/CIFS implementation.
- Simplistic implementation of tap interface.
-
- Copyright (C) Rusty Russell 2012
-
- ** NOTE! The following LGPL license applies to the talloc
- ** library. This does NOT imply that all of Samba is released
- ** under the LGPL
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include <stdio.h>
-#include <ccan/err/err.h>
-#include "no-fsync.h"
-
-#ifndef __location__
-#define __TAP_STRING_LINE1__(s) #s
-#define __TAP_STRING_LINE2__(s) __TAP_STRING_LINE1__(s)
-#define __TAP_STRING_LINE3__ __TAP_STRING_LINE2__(__LINE__)
-#define __location__ __FILE__ ":" __TAP_STRING_LINE3__
-#endif
-
-extern unsigned tap_ok_count, tap_ok_target;
-#define plan_tests(num) do { tap_ok_target = (num); } while(0)
-#define ok(e, ...) ((e) ? (printf("."), tap_ok_count++, true) : (warnx(__VA_ARGS__), false))
-#define ok1(e) ok((e), "%s:%s", __location__, #e)
-#define pass(...) (printf("."), tap_ok_count++)
-#define fail(...) warnx(__VA_ARGS__)
-#define diag(...) do { printf(__VA_ARGS__); printf("\n"); } while(0)
-#define exit_status() (tap_ok_count == tap_ok_target ? 0 : 1)
+++ /dev/null
-OBJS:=../../ntdb.o ../../hash.o ../../tally.o
-CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg
-LDFLAGS:=-L../../..
-
-default: ntdbtorture ntdbtool ntdbdump ntdbrestore mkntdb speed growtdb-bench
-
-ntdbdump: ntdbdump.c $(OBJS)
-ntdbrestore: ntdbrestore.c $(OBJS)
-ntdbtorture: ntdbtorture.c $(OBJS)
-ntdbtool: ntdbtool.c $(OBJS)
-mkntdb: mkntdb.c $(OBJS)
-speed: speed.c $(OBJS)
-growtdb-bench: growtdb-bench.c $(OBJS)
-
-clean:
- rm -f ntdbtorture ntdbdump ntdbrestore ntdbtool mkntdb speed growtdb-bench
+++ /dev/null
-#include "ntdb.h"
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <ccan/err/err.h>
-
-static void logfn(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
-{
- fprintf(stderr, "ntdb:%s:%s:%s\n",
- ntdb_name(ntdb), ntdb_errorstr(ecode), message);
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j, users, groups;
- NTDB_DATA idxkey, idxdata;
- NTDB_DATA k, d, gk;
- char cmd[100];
- struct ntdb_context *ntdb;
- enum NTDB_ERROR ecode;
- union ntdb_attribute log;
-
- if (argc != 3) {
- printf("Usage: growtdb-bench <users> <groups>\n");
- exit(1);
- }
- users = atoi(argv[1]);
- groups = atoi(argv[2]);
-
- sprintf(cmd, "cat /proc/%i/statm", getpid());
-
- log.base.attr = NTDB_ATTRIBUTE_LOG;
- log.base.next = NULL;
- log.log.fn = logfn;
-
- ntdb = ntdb_open("/tmp/growtdb.ntdb", NTDB_DEFAULT,
- O_RDWR|O_CREAT|O_TRUNC, 0600, &log);
-
- idxkey.dptr = (unsigned char *)"User index";
- idxkey.dsize = strlen("User index");
- idxdata.dsize = 51;
- idxdata.dptr = calloc(idxdata.dsize, 1);
- if (idxdata.dptr == NULL) {
- fprintf(stderr, "Unable to allocate memory for idxdata.dptr\n");
- return -1;
- }
-
- /* Create users. */
- k.dsize = 48;
- k.dptr = calloc(k.dsize, 1);
- if (k.dptr == NULL) {
- fprintf(stderr, "Unable to allocate memory for k.dptr\n");
- return -1;
- }
- d.dsize = 64;
- d.dptr = calloc(d.dsize, 1);
- if (d.dptr == NULL) {
- fprintf(stderr, "Unable to allocate memory for d.dptr\n");
- return -1;
- }
-
- ntdb_transaction_start(ntdb);
- for (i = 0; i < users; i++) {
- memcpy(k.dptr, &i, sizeof(i));
- ecode = ntdb_store(ntdb, k, d, NTDB_INSERT);
- if (ecode != NTDB_SUCCESS)
- errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode));
-
- /* This simulates a growing index record. */
- ecode = ntdb_append(ntdb, idxkey, idxdata);
- if (ecode != NTDB_SUCCESS)
- errx(1, "ntdb append failed: %s", ntdb_errorstr(ecode));
- }
- if ((ecode = ntdb_transaction_commit(ntdb)) != 0)
- errx(1, "ntdb commit1 failed: %s", ntdb_errorstr(ecode));
-
- if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0)
- errx(1, "ntdb_check failed after initial insert!");
-
- system(cmd);
-
- /* Now put them all in groups: add 32 bytes to each record for
- * a group. */
- gk.dsize = 48;
- gk.dptr = calloc(k.dsize, 1);
- if (gk.dptr == NULL) {
- fprintf(stderr, "Unable to allocate memory for gk.dptr\n");
- return -1;
- }
- gk.dptr[gk.dsize-1] = 1;
-
- d.dsize = 32;
- for (i = 0; i < groups; i++) {
- ntdb_transaction_start(ntdb);
- /* Create the "group". */
- memcpy(gk.dptr, &i, sizeof(i));
- ecode = ntdb_store(ntdb, gk, d, NTDB_INSERT);
- if (ecode != NTDB_SUCCESS)
- errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode));
-
- /* Now populate it. */
- for (j = 0; j < users; j++) {
- /* Append to the user. */
- memcpy(k.dptr, &j, sizeof(j));
- if ((ecode = ntdb_append(ntdb, k, d)) != 0)
- errx(1, "ntdb append failed: %s",
- ntdb_errorstr(ecode));
-
- /* Append to the group. */
- if ((ecode = ntdb_append(ntdb, gk, d)) != 0)
- errx(1, "ntdb append failed: %s",
- ntdb_errorstr(ecode));
- }
- if ((ecode = ntdb_transaction_commit(ntdb)) != 0)
- errx(1, "ntdb commit2 failed: %s", ntdb_errorstr(ecode));
- if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0)
- errx(1, "ntdb_check failed after iteration %i!", i);
- system(cmd);
- }
-
- return 0;
-}
+++ /dev/null
-#include "ntdb.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <ccan/err/err.h>
-
-int main(int argc, char *argv[])
-{
- unsigned int i, num_recs;
- struct ntdb_context *ntdb;
-
- if (argc != 3 || (num_recs = atoi(argv[2])) == 0)
- errx(1, "Usage: mktdb <tdbfile> <numrecords>");
-
- ntdb = ntdb_open(argv[1], NTDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL);
- if (!ntdb)
- err(1, "Opening %s", argv[1]);
-
- for (i = 0; i < num_recs; i++) {
- NTDB_DATA d;
-
- d.dptr = (void *)&i;
- d.dsize = sizeof(i);
- if (ntdb_store(ntdb, d, d, NTDB_INSERT) != 0)
- err(1, "Failed to store record %i", i);
- }
- printf("Done\n");
- return 0;
-}
+++ /dev/null
-/*
- Unix SMB/CIFS implementation.
- low level ntdb backup and restore utility
- Copyright (C) Andrew Tridgell 2002
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/*
-
- This program is meant for backup/restore of ntdb databases. Typical usage would be:
- tdbbackup *.ntdb
- when Samba shuts down cleanly, which will make a backup of all the local databases
- to *.bak files. Then on Samba startup you would use:
- tdbbackup -v *.ntdb
- and this will check the databases for corruption and if corruption is detected then
- the backup will be restored.
-
- You may also like to do a backup on a regular basis while Samba is
- running, perhaps using cron.
-
- The reason this program is needed is to cope with power failures
- while Samba is running. A power failure could lead to database
- corruption and Samba will then not start correctly.
-
- Note that many of the databases in Samba are transient and thus
- don't need to be backed up, so you can optimise the above a little
- by only running the backup on the critical databases.
-
- */
-
-#include "config.h"
-#include "ntdb.h"
-#include "private.h"
-
-#ifdef HAVE_GETOPT_H
-#include <getopt.h>
-#endif
-
-static int failed;
-
-static void ntdb_log(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
-{
- fprintf(stderr, "%s:%s\n", ntdb_errorstr(ecode), message);
-}
-
-static char *add_suffix(const char *name, const char *suffix)
-{
- char *ret;
- int len = strlen(name) + strlen(suffix) + 1;
- ret = (char *)malloc(len);
- if (!ret) {
- fprintf(stderr,"Out of memory!\n");
- exit(1);
- }
- snprintf(ret, len, "%s%s", name, suffix);
- return ret;
-}
-
-static int copy_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- struct ntdb_context *ntdb_new = (struct ntdb_context *)state;
- enum NTDB_ERROR err;
-
- err = ntdb_store(ntdb_new, key, dbuf, NTDB_INSERT);
- if (err) {
- fprintf(stderr,"Failed to insert into %s: %s\n",
- ntdb_name(ntdb_new), ntdb_errorstr(err));
- failed = 1;
- return 1;
- }
- return 0;
-}
-
-
-static int test_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- return 0;
-}
-
-/*
- carefully backup a ntdb, validating the contents and
- only doing the backup if its OK
- this function is also used for restore
-*/
-static int backup_ntdb(const char *old_name, const char *new_name)
-{
- struct ntdb_context *ntdb;
- struct ntdb_context *ntdb_new;
- char *tmp_name;
- struct stat st;
- int count1, count2;
- enum NTDB_ERROR err;
- union ntdb_attribute log_attr;
-
- tmp_name = add_suffix(new_name, ".tmp");
-
- /* stat the old ntdb to find its permissions */
- if (stat(old_name, &st) != 0) {
- perror(old_name);
- free(tmp_name);
- return 1;
- }
-
- log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
- log_attr.base.next = NULL;
- log_attr.log.fn = ntdb_log;
-
- /* open the old ntdb */
- ntdb = ntdb_open(old_name, NTDB_DEFAULT, O_RDWR, 0, &log_attr);
- if (!ntdb) {
- printf("Failed to open %s\n", old_name);
- free(tmp_name);
- return 1;
- }
-
- unlink(tmp_name);
- ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT,
- O_RDWR|O_CREAT|O_EXCL, st.st_mode & 0777,
- &log_attr);
- if (!ntdb_new) {
- perror(tmp_name);
- free(tmp_name);
- return 1;
- }
-
- err = ntdb_transaction_start(ntdb);
- if (err) {
- fprintf(stderr, "Failed to start transaction on old ntdb: %s\n",
- ntdb_errorstr(err));
- ntdb_close(ntdb);
- ntdb_close(ntdb_new);
- unlink(tmp_name);
- free(tmp_name);
- return 1;
- }
-
- /* lock the backup ntdb so that nobody else can change it */
- err = ntdb_lockall(ntdb_new);
- if (err) {
- fprintf(stderr, "Failed to lock backup ntdb: %s\n",
- ntdb_errorstr(err));
- ntdb_close(ntdb);
- ntdb_close(ntdb_new);
- unlink(tmp_name);
- free(tmp_name);
- return 1;
- }
-
- failed = 0;
-
- /* traverse and copy */
- count1 = ntdb_traverse(ntdb, copy_fn, (void *)ntdb_new);
- if (count1 < 0 || failed) {
- fprintf(stderr,"failed to copy %s\n", old_name);
- ntdb_close(ntdb);
- ntdb_close(ntdb_new);
- unlink(tmp_name);
- free(tmp_name);
- return 1;
- }
-
- /* close the old ntdb */
- ntdb_close(ntdb);
-
- /* copy done, unlock the backup ntdb */
- ntdb_unlockall(ntdb_new);
-
-#ifdef HAVE_FDATASYNC
- if (fdatasync(ntdb_fd(ntdb_new)) != 0) {
-#else
- if (fsync(ntdb_fd(ntdb_new)) != 0) {
-#endif
- /* not fatal */
- fprintf(stderr, "failed to fsync backup file\n");
- }
-
- /* close the new ntdb and re-open read-only */
- ntdb_close(ntdb_new);
-
- /* we don't need the hash attr any more */
- log_attr.base.next = NULL;
-
- ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, O_RDONLY, 0, &log_attr);
- if (!ntdb_new) {
- fprintf(stderr,"failed to reopen %s\n", tmp_name);
- unlink(tmp_name);
- perror(tmp_name);
- free(tmp_name);
- return 1;
- }
-
- /* traverse the new ntdb to confirm */
- count2 = ntdb_traverse(ntdb_new, test_fn, NULL);
- if (count2 != count1) {
- fprintf(stderr,"failed to copy %s\n", old_name);
- ntdb_close(ntdb_new);
- unlink(tmp_name);
- free(tmp_name);
- return 1;
- }
-
- /* close the new ntdb and rename it to .bak */
- ntdb_close(ntdb_new);
- if (rename(tmp_name, new_name) != 0) {
- perror(new_name);
- free(tmp_name);
- return 1;
- }
-
- free(tmp_name);
-
- return 0;
-}
-
-/*
- verify a ntdb and if it is corrupt then restore from *.bak
-*/
-static int verify_ntdb(const char *fname, const char *bak_name)
-{
- struct ntdb_context *ntdb;
- int count = -1;
- union ntdb_attribute log_attr;
-
- log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
- log_attr.base.next = NULL;
- log_attr.log.fn = ntdb_log;
-
- /* open the ntdb */
- ntdb = ntdb_open(fname, NTDB_DEFAULT, O_RDONLY, 0, &log_attr);
-
- /* traverse the ntdb, then close it */
- if (ntdb) {
- count = ntdb_traverse(ntdb, test_fn, NULL);
- ntdb_close(ntdb);
- }
-
- /* count is < 0 means an error */
- if (count < 0) {
- printf("restoring %s\n", fname);
- return backup_ntdb(bak_name, fname);
- }
-
- printf("%s : %d records\n", fname, count);
-
- return 0;
-}
-
-/*
- see if one file is newer than another
-*/
-static int file_newer(const char *fname1, const char *fname2)
-{
- struct stat st1, st2;
- if (stat(fname1, &st1) != 0) {
- return 0;
- }
- if (stat(fname2, &st2) != 0) {
- return 1;
- }
- return (st1.st_mtime > st2.st_mtime);
-}
-
-static void usage(void)
-{
- printf("Usage: ntdbbackup [options] <fname...>\n\n");
- printf(" -h this help message\n");
- printf(" -v verify mode (restore if corrupt)\n");
- printf(" -s suffix set the backup suffix\n");
- printf(" -v verify mode (restore if corrupt)\n");
-}
-
-
- int main(int argc, char *argv[])
-{
- int i;
- int ret = 0;
- int c;
- int verify = 0;
- const char *suffix = ".bak";
-
- while ((c = getopt(argc, argv, "vhs:")) != -1) {
- switch (c) {
- case 'h':
- usage();
- exit(0);
- case 'v':
- verify = 1;
- break;
- case 's':
- suffix = optarg;
- break;
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if (argc < 1) {
- usage();
- exit(1);
- }
-
- for (i=0; i<argc; i++) {
- const char *fname = argv[i];
- char *bak_name;
-
- bak_name = add_suffix(fname, suffix);
-
- if (verify) {
- if (verify_ntdb(fname, bak_name) != 0) {
- ret = 1;
- }
- } else {
- if (file_newer(fname, bak_name) &&
- backup_ntdb(fname, bak_name) != 0) {
- ret = 1;
- }
- }
-
- free(bak_name);
- }
-
- return ret;
-}
+++ /dev/null
-/*
- simple ntdb dump util
- Copyright (C) Andrew Tridgell 2001
- Copyright (C) Rusty Russell 2011
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-#include "config.h"
-#include "ntdb.h"
-#include "private.h"
-
-static void print_data(NTDB_DATA d)
-{
- unsigned char *p = (unsigned char *)d.dptr;
- int len = d.dsize;
- while (len--) {
- if (isprint(*p) && !strchr("\"\\", *p)) {
- fputc(*p, stdout);
- } else {
- printf("\\%02X", *p);
- }
- p++;
- }
-}
-
-static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- printf("{\n");
- printf("key(%d) = \"", (int)key.dsize);
- print_data(key);
- printf("\"\n");
- printf("data(%d) = \"", (int)dbuf.dsize);
- print_data(dbuf);
- printf("\"\n");
- printf("}\n");
- return 0;
-}
-
-static int dump_ntdb(const char *fname, const char *keyname)
-{
- struct ntdb_context *ntdb;
- NTDB_DATA key, value;
-
- ntdb = ntdb_open(fname, 0, O_RDONLY, 0, NULL);
- if (!ntdb) {
- printf("Failed to open %s\n", fname);
- return 1;
- }
-
- if (!keyname) {
- ntdb_traverse(ntdb, traverse_fn, NULL);
- } else {
- key = ntdb_mkdata(keyname, strlen(keyname));
- if (ntdb_fetch(ntdb, key, &value) != 0) {
- return 1;
- } else {
- print_data(value);
- free(value.dptr);
- }
- }
-
- return 0;
-}
-
-static void usage( void)
-{
- printf( "Usage: ntdbdump [options] <filename>\n\n");
- printf( " -h this help message\n");
- printf( " -k keyname dumps value of keyname\n");
-}
-
- int main(int argc, char *argv[])
-{
- char *fname, *keyname=NULL;
- int c;
-
- if (argc < 2) {
- printf("Usage: ntdbdump <fname>\n");
- exit(1);
- }
-
- while ((c = getopt( argc, argv, "hk:")) != -1) {
- switch (c) {
- case 'h':
- usage();
- exit( 0);
- case 'k':
- keyname = optarg;
- break;
- default:
- usage();
- exit( 1);
- }
- }
-
- fname = argv[optind];
-
- return dump_ntdb(fname, keyname);
-}
+++ /dev/null
-/*
- ntdbrestore -- construct a ntdb from (n)tdbdump output.
- Copyright (C) Rusty Russell 2012
- Copyright (C) Volker Lendecke 2010
- Copyright (C) Simon McVittie 2005
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "config.h"
-#include "ntdb.h"
-#include "private.h"
-#include <assert.h>
-
-static int read_linehead(FILE *f)
-{
- int i, c;
- int num_bytes;
- char prefix[128];
-
- while (1) {
- c = getc(f);
- if (c == EOF) {
- return -1;
- }
- if (c == '(') {
- break;
- }
- }
- for (i=0; i<sizeof(prefix); i++) {
- c = getc(f);
- if (c == EOF) {
- return -1;
- }
- prefix[i] = c;
- if (c == '"') {
- break;
- }
- }
- if (i == sizeof(prefix)) {
- return -1;
- }
- prefix[i] = '\0';
-
- if (sscanf(prefix, "%d) = ", &num_bytes) != 1) {
- return -1;
- }
- return num_bytes;
-}
-
-static int read_hex(void) {
- int c;
- c = getchar();
- if (c == EOF) {
- fprintf(stderr, "Unexpected EOF in data\n");
- return -1;
- } else if (c == '"') {
- fprintf(stderr, "Unexpected \\\" sequence\n");
- return -1;
- } else if ('0' <= c && c <= '9') {
- return c - '0';
- } else if ('A' <= c && c <= 'F') {
- return c - 'A' + 10;
- } else if ('a' <= c && c <= 'f') {
- return c - 'a' + 10;
- } else {
- fprintf(stderr, "Invalid hex: %c\n", c);
- return -1;
- }
-}
-
-static int read_data(FILE *f, NTDB_DATA *d, size_t size) {
- int c, low, high;
- int i;
-
- d->dptr = (unsigned char *)malloc(size);
- if (d->dptr == NULL) {
- return -1;
- }
- d->dsize = size;
-
- for (i=0; i<size; i++) {
- c = getc(f);
- if (c == EOF) {
- fprintf(stderr, "Unexpected EOF in data\n");
- return 1;
- } else if (c == '"') {
- return 0;
- } else if (c == '\\') {
- high = read_hex();
- if (high < 0) {
- return -1;
- }
- high = high << 4;
- assert(high == (high & 0xf0));
- low = read_hex();
- if (low < 0) {
- return -1;
- }
- assert(low == (low & 0x0f));
- d->dptr[i] = (low|high);
- } else {
- d->dptr[i] = c;
- }
- }
- return 0;
-}
-
-static int swallow(FILE *f, const char *s, int *eof)
-{
- char line[128];
-
- if (fgets(line, sizeof(line), f) == NULL) {
- if (eof != NULL) {
- *eof = 1;
- }
- return -1;
- }
- if (strcmp(line, s) != 0) {
- return -1;
- }
- return 0;
-}
-
-static bool read_rec(FILE *f, struct ntdb_context *ntdb, int *eof)
-{
- int length;
- NTDB_DATA key, data;
- bool ret = false;
- enum NTDB_ERROR e;
-
- key.dptr = NULL;
- data.dptr = NULL;
-
- if (swallow(f, "{\n", eof) == -1) {
- goto fail;
- }
- length = read_linehead(f);
- if (length == -1) {
- goto fail;
- }
- if (read_data(f, &key, length) == -1) {
- goto fail;
- }
- if (swallow(f, "\"\n", NULL) == -1) {
- goto fail;
- }
- length = read_linehead(f);
- if (length == -1) {
- goto fail;
- }
- if (read_data(f, &data, length) == -1) {
- goto fail;
- }
- if ((swallow(f, "\"\n", NULL) == -1)
- || (swallow(f, "}\n", NULL) == -1)) {
- goto fail;
- }
- e = ntdb_store(ntdb, key, data, NTDB_INSERT);
- if (e != NTDB_SUCCESS) {
- fprintf(stderr, "NTDB error: %s\n", ntdb_errorstr(e));
- goto fail;
- }
-
- ret = true;
-fail:
- free(key.dptr);
- free(data.dptr);
- return ret;
-}
-
-static int restore_ntdb(const char *fname, unsigned int hsize)
-{
- struct ntdb_context *ntdb;
- union ntdb_attribute hashsize;
-
- hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE;
- hashsize.base.next = NULL;
- hashsize.hashsize.size = hsize;
-
- ntdb = ntdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666,
- hsize ? &hashsize : NULL);
- if (!ntdb) {
- perror("ntdb_open");
- fprintf(stderr, "Failed to open %s\n", fname);
- return 1;
- }
-
- while (1) {
- int eof = 0;
- if (!read_rec(stdin, ntdb, &eof)) {
- if (eof) {
- break;
- }
- return 1;
- }
- }
- if (ntdb_close(ntdb)) {
- fprintf(stderr, "Error closing ntdb\n");
- return 1;
- }
- fprintf(stderr, "EOF\n");
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int hsize = 0;
- const char *execname = argv[0];
-
- if (argv[1] && strcmp(argv[1], "-h") == 0) {
- if (argv[2]) {
- hsize = atoi(argv[2]);
- }
- if (hsize == 0) {
- fprintf(stderr, "-h requires a integer value"
- " (eg. 128 or 131072)\n");
- exit(1);
- }
- argv += 2;
- argc -= 2;
- }
- if (argc != 2) {
- printf("Usage: %s [-h <hashsize>] dbname < tdbdump_output\n",
- execname);
- exit(1);
- }
-
-
- return restore_ntdb(argv[1], hsize);
-}
+++ /dev/null
-/*
- Unix SMB/CIFS implementation.
- Samba database functions
- Copyright (C) Andrew Tridgell 1999-2000
- Copyright (C) Paul `Rusty' Russell 2000
- Copyright (C) Jeremy Allison 2000
- Copyright (C) Andrew Esh 2001
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "config.h"
-#include "ntdb.h"
-#include "private.h"
-
-static int do_command(void);
-const char *cmdname;
-char *arg1, *arg2;
-size_t arg1len, arg2len;
-int bIterate = 0;
-char *line;
-NTDB_DATA iterate_kbuf;
-char cmdline[1024];
-static int disable_mmap;
-
-enum commands {
- CMD_CREATE_NTDB,
- CMD_OPEN_NTDB,
- CMD_TRANSACTION_START,
- CMD_TRANSACTION_COMMIT,
- CMD_TRANSACTION_CANCEL,
- CMD_ERASE,
- CMD_DUMP,
- CMD_INSERT,
- CMD_MOVE,
- CMD_STORE,
- CMD_SHOW,
- CMD_KEYS,
- CMD_HEXKEYS,
- CMD_DELETE,
-#if 0
- CMD_LIST_HASH_FREE,
- CMD_LIST_FREE,
-#endif
- CMD_INFO,
- CMD_MMAP,
- CMD_SPEED,
- CMD_FIRST,
- CMD_NEXT,
- CMD_SYSTEM,
- CMD_CHECK,
- CMD_QUIT,
- CMD_HELP
-};
-
-typedef struct {
- const char *name;
- enum commands cmd;
-} COMMAND_TABLE;
-
-COMMAND_TABLE cmd_table[] = {
- {"create", CMD_CREATE_NTDB},
- {"open", CMD_OPEN_NTDB},
-#if 0
- {"transaction_start", CMD_TRANSACTION_START},
- {"transaction_commit", CMD_TRANSACTION_COMMIT},
- {"transaction_cancel", CMD_TRANSACTION_CANCEL},
-#endif
- {"erase", CMD_ERASE},
- {"dump", CMD_DUMP},
- {"insert", CMD_INSERT},
- {"move", CMD_MOVE},
- {"store", CMD_STORE},
- {"show", CMD_SHOW},
- {"keys", CMD_KEYS},
- {"hexkeys", CMD_HEXKEYS},
- {"delete", CMD_DELETE},
-#if 0
- {"list", CMD_LIST_HASH_FREE},
- {"free", CMD_LIST_FREE},
-#endif
- {"info", CMD_INFO},
- {"speed", CMD_SPEED},
- {"mmap", CMD_MMAP},
- {"first", CMD_FIRST},
- {"1", CMD_FIRST},
- {"next", CMD_NEXT},
- {"n", CMD_NEXT},
- {"check", CMD_CHECK},
- {"quit", CMD_QUIT},
- {"q", CMD_QUIT},
- {"!", CMD_SYSTEM},
- {NULL, CMD_HELP}
-};
-
-struct timeval tp1,tp2;
-
-static void _start_timer(void)
-{
- gettimeofday(&tp1,NULL);
-}
-
-static double _end_timer(void)
-{
- gettimeofday(&tp2,NULL);
- return((tp2.tv_sec - tp1.tv_sec) +
- (tp2.tv_usec - tp1.tv_usec)*1.0e-6);
-}
-
-static void ntdb_log(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
-{
- fprintf(stderr, "ntdb:%s:%s:%s\n",
- ntdb_name(ntdb), ntdb_errorstr(ecode), message);
-}
-
-/* a ntdb tool for manipulating a ntdb database */
-
-static struct ntdb_context *ntdb;
-
-static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
-static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
-static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
-
-static void print_asc(const char *buf,int len)
-{
- int i;
-
- /* We're probably printing ASCII strings so don't try to display
- the trailing NULL character. */
-
- if (buf[len - 1] == 0)
- len--;
-
- for (i=0;i<len;i++)
- printf("%c",isprint(buf[i])?buf[i]:'.');
-}
-
-static void print_data(const char *buf,int len)
-{
- int i=0;
- if (len<=0) return;
- printf("[%03X] ",i);
- for (i=0;i<len;) {
- printf("%02X ",(int)((unsigned char)buf[i]));
- i++;
- if (i%8 == 0) printf(" ");
- if (i%16 == 0) {
- print_asc(&buf[i-16],8); printf(" ");
- print_asc(&buf[i-8],8); printf("\n");
- if (i<len) printf("[%03X] ",i);
- }
- }
- if (i%16) {
- int n;
-
- n = 16 - (i%16);
- printf(" ");
- if (n>8) printf(" ");
- while (n--) printf(" ");
-
- n = i%16;
- if (n > 8) n = 8;
- print_asc(&buf[i-(i%16)],n); printf(" ");
- n = (i%16) - n;
- if (n>0) print_asc(&buf[i-n],n);
- printf("\n");
- }
-}
-
-static void help(void)
-{
- printf("\n"
-"tdbtool: \n"
-" create dbname : create a database\n"
-" open dbname : open an existing database\n"
-" openjh dbname : open an existing database (jenkins hash)\n"
-" transaction_start : start a transaction\n"
-" transaction_commit : commit a transaction\n"
-" transaction_cancel : cancel a transaction\n"
-" erase : erase the database\n"
-" dump : dump the database as strings\n"
-" keys : dump the database keys as strings\n"
-" hexkeys : dump the database keys as hex values\n"
-" info : print summary info about the database\n"
-" insert key data : insert a record\n"
-" move key file : move a record to a destination ntdb\n"
-" store key data : store a record (replace)\n"
-" show key : show a record by key\n"
-" delete key : delete a record by key\n"
-#if 0
-" list : print the database hash table and freelist\n"
-" free : print the database freelist\n"
-#endif
-" check : check the integrity of an opened database\n"
-" speed : perform speed tests on the database\n"
-" ! command : execute system command\n"
-" 1 | first : print the first record\n"
-" n | next : print the next record\n"
-" q | quit : terminate\n"
-" \\n : repeat 'next' command\n"
-"\n");
-}
-
-static void terror(enum NTDB_ERROR err, const char *why)
-{
- if (err != NTDB_SUCCESS)
- printf("%s:%s\n", ntdb_errorstr(err), why);
- else
- printf("%s\n", why);
-}
-
-static void create_ntdb(const char *tdbname)
-{
- union ntdb_attribute log_attr;
- log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
- log_attr.base.next = NULL;
- log_attr.log.fn = ntdb_log;
-
- if (ntdb) ntdb_close(ntdb);
- ntdb = ntdb_open(tdbname, (disable_mmap?NTDB_NOMMAP:0),
- O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr);
- if (!ntdb) {
- printf("Could not create %s: %s\n", tdbname, strerror(errno));
- }
-}
-
-static void open_ntdb(const char *tdbname)
-{
- union ntdb_attribute log_attr;
- log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
- log_attr.base.next = NULL;
- log_attr.log.fn = ntdb_log;
-
- if (ntdb) ntdb_close(ntdb);
- ntdb = ntdb_open(tdbname, disable_mmap?NTDB_NOMMAP:0, O_RDWR, 0600,
- &log_attr);
- if (!ntdb) {
- printf("Could not open %s: %s\n", tdbname, strerror(errno));
- }
-}
-
-static void insert_ntdb(char *keyname, size_t keylen, char* data, size_t datalen)
-{
- NTDB_DATA key, dbuf;
- enum NTDB_ERROR ecode;
-
- if ((keyname == NULL) || (keylen == 0)) {
- terror(NTDB_SUCCESS, "need key");
- return;
- }
-
- key.dptr = (unsigned char *)keyname;
- key.dsize = keylen;
- dbuf.dptr = (unsigned char *)data;
- dbuf.dsize = datalen;
-
- ecode = ntdb_store(ntdb, key, dbuf, NTDB_INSERT);
- if (ecode) {
- terror(ecode, "insert failed");
- }
-}
-
-static void store_ntdb(char *keyname, size_t keylen, char* data, size_t datalen)
-{
- NTDB_DATA key, dbuf;
- enum NTDB_ERROR ecode;
-
- if ((keyname == NULL) || (keylen == 0)) {
- terror(NTDB_SUCCESS, "need key");
- return;
- }
-
- if ((data == NULL) || (datalen == 0)) {
- terror(NTDB_SUCCESS, "need data");
- return;
- }
-
- key.dptr = (unsigned char *)keyname;
- key.dsize = keylen;
- dbuf.dptr = (unsigned char *)data;
- dbuf.dsize = datalen;
-
- printf("Storing key:\n");
- print_rec(ntdb, key, dbuf, NULL);
-
- ecode = ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
- if (ecode) {
- terror(ecode, "store failed");
- }
-}
-
-static void show_ntdb(char *keyname, size_t keylen)
-{
- NTDB_DATA key, dbuf;
- enum NTDB_ERROR ecode;
-
- if ((keyname == NULL) || (keylen == 0)) {
- terror(NTDB_SUCCESS, "need key");
- return;
- }
-
- key.dptr = (unsigned char *)keyname;
- key.dsize = keylen;
-
- ecode = ntdb_fetch(ntdb, key, &dbuf);
- if (ecode) {
- terror(ecode, "fetch failed");
- return;
- }
-
- print_rec(ntdb, key, dbuf, NULL);
-
- free( dbuf.dptr );
-}
-
-static void delete_ntdb(char *keyname, size_t keylen)
-{
- NTDB_DATA key;
- enum NTDB_ERROR ecode;
-
- if ((keyname == NULL) || (keylen == 0)) {
- terror(NTDB_SUCCESS, "need key");
- return;
- }
-
- key.dptr = (unsigned char *)keyname;
- key.dsize = keylen;
-
- ecode = ntdb_delete(ntdb, key);
- if (ecode) {
- terror(ecode, "delete failed");
- }
-}
-
-static void move_rec(char *keyname, size_t keylen, char* tdbname)
-{
- NTDB_DATA key, dbuf;
- struct ntdb_context *dst_ntdb;
- enum NTDB_ERROR ecode;
-
- if ((keyname == NULL) || (keylen == 0)) {
- terror(NTDB_SUCCESS, "need key");
- return;
- }
-
- if ( !tdbname ) {
- terror(NTDB_SUCCESS, "need destination ntdb name");
- return;
- }
-
- key.dptr = (unsigned char *)keyname;
- key.dsize = keylen;
-
- ecode = ntdb_fetch(ntdb, key, &dbuf);
- if (ecode) {
- terror(ecode, "fetch failed");
- return;
- }
-
- print_rec(ntdb, key, dbuf, NULL);
-
- dst_ntdb = ntdb_open(tdbname, 0, O_RDWR, 0600, NULL);
- if ( !dst_ntdb ) {
- terror(NTDB_SUCCESS, "unable to open destination ntdb");
- return;
- }
-
- ecode = ntdb_store( dst_ntdb, key, dbuf, NTDB_REPLACE);
- if (ecode)
- terror(ecode, "failed to move record");
- else
- printf("record moved\n");
-
- ntdb_close( dst_ntdb );
-}
-
-static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- printf("\nkey %d bytes\n", (int)key.dsize);
- print_asc((const char *)key.dptr, key.dsize);
- printf("\ndata %d bytes\n", (int)dbuf.dsize);
- print_data((const char *)dbuf.dptr, dbuf.dsize);
- return 0;
-}
-
-static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- printf("key %d bytes: ", (int)key.dsize);
- print_asc((const char *)key.dptr, key.dsize);
- printf("\n");
- return 0;
-}
-
-static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- printf("key %d bytes\n", (int)key.dsize);
- print_data((const char *)key.dptr, key.dsize);
- printf("\n");
- return 0;
-}
-
-static int total_bytes;
-
-static int traverse_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
-{
- total_bytes += dbuf.dsize;
- return 0;
-}
-
-static void info_ntdb(void)
-{
- enum NTDB_ERROR ecode;
- char *summary;
-
- ecode = ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &summary);
-
- if (ecode) {
- terror(ecode, "Getting summary");
- } else {
- printf("%s", summary);
- free(summary);
- }
-}
-
-static void speed_ntdb(const char *tlimit)
-{
- unsigned timelimit = tlimit?atoi(tlimit):0;
- double t;
- int ops;
- if (timelimit == 0) timelimit = 5;
-
- ops = 0;
- printf("Testing store speed for %u seconds\n", timelimit);
- _start_timer();
- do {
- long int r = random();
- NTDB_DATA key, dbuf;
- key = ntdb_mkdata("store test", strlen("store test"));
- dbuf.dptr = (unsigned char *)&r;
- dbuf.dsize = sizeof(r);
- ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
- t = _end_timer();
- ops++;
- } while (t < timelimit);
- printf("%10.3f ops/sec\n", ops/t);
-
- ops = 0;
- printf("Testing fetch speed for %u seconds\n", timelimit);
- _start_timer();
- do {
- long int r = random();
- NTDB_DATA key, dbuf;
- key = ntdb_mkdata("store test", strlen("store test"));
- dbuf.dptr = (unsigned char *)&r;
- dbuf.dsize = sizeof(r);
- ntdb_fetch(ntdb, key, &dbuf);
- t = _end_timer();
- ops++;
- } while (t < timelimit);
- printf("%10.3f ops/sec\n", ops/t);
-
- ops = 0;
- printf("Testing transaction speed for %u seconds\n", timelimit);
- _start_timer();
- do {
- long int r = random();
- NTDB_DATA key, dbuf;
- key = ntdb_mkdata("transaction test", strlen("transaction test"));
- dbuf.dptr = (unsigned char *)&r;
- dbuf.dsize = sizeof(r);
- ntdb_transaction_start(ntdb);
- ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
- ntdb_transaction_commit(ntdb);
- t = _end_timer();
- ops++;
- } while (t < timelimit);
- printf("%10.3f ops/sec\n", ops/t);
-
- ops = 0;
- printf("Testing traverse speed for %u seconds\n", timelimit);
- _start_timer();
- do {
- ntdb_traverse(ntdb, traverse_fn, NULL);
- t = _end_timer();
- ops++;
- } while (t < timelimit);
- printf("%10.3f ops/sec\n", ops/t);
-}
-
-static void toggle_mmap(void)
-{
- disable_mmap = !disable_mmap;
- if (disable_mmap) {
- printf("mmap is disabled\n");
- } else {
- printf("mmap is enabled\n");
- }
-}
-
-static char *ntdb_getline(const char *prompt)
-{
- static char thisline[1024];
- char *p;
- fputs(prompt, stdout);
- thisline[0] = 0;
- p = fgets(thisline, sizeof(thisline)-1, stdin);
- if (p) p = strchr(p, '\n');
- if (p) *p = 0;
- return p?thisline:NULL;
-}
-
-static int do_delete_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf,
- void *state)
-{
- return ntdb_delete(the_ntdb, key);
-}
-
-static void first_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey)
-{
- NTDB_DATA dbuf;
- enum NTDB_ERROR ecode;
- ecode = ntdb_firstkey(the_ntdb, pkey);
- if (!ecode)
- ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf);
- if (ecode) terror(ecode, "fetch failed");
- else {
- print_rec(the_ntdb, *pkey, dbuf, NULL);
- }
-}
-
-static void next_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey)
-{
- NTDB_DATA dbuf;
- enum NTDB_ERROR ecode;
- ecode = ntdb_nextkey(the_ntdb, pkey);
-
- if (!ecode)
- ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf);
- if (ecode)
- terror(ecode, "fetch failed");
- else
- print_rec(the_ntdb, *pkey, dbuf, NULL);
-}
-
-static void check_db(struct ntdb_context *the_ntdb)
-{
- if (!the_ntdb) {
- printf("Error: No database opened!\n");
- } else {
- if (ntdb_check(the_ntdb, NULL, NULL) != 0)
- printf("Integrity check for the opened database failed.\n");
- else
- printf("Database integrity is OK.\n");
- }
-}
-
-static int do_command(void)
-{
- COMMAND_TABLE *ctp = cmd_table;
- enum commands mycmd = CMD_HELP;
- int cmd_len;
-
- if (cmdname && strlen(cmdname) == 0) {
- mycmd = CMD_NEXT;
- } else {
- while (ctp->name) {
- cmd_len = strlen(ctp->name);
- if (strncmp(ctp->name,cmdname,cmd_len) == 0) {
- mycmd = ctp->cmd;
- break;
- }
- ctp++;
- }
- }
-
- switch (mycmd) {
- case CMD_CREATE_NTDB:
- bIterate = 0;
- create_ntdb(arg1);
- return 0;
- case CMD_OPEN_NTDB:
- bIterate = 0;
- open_ntdb(arg1);
- return 0;
- case CMD_SYSTEM:
- /* Shell command */
- if (system(arg1) == -1) {
- terror(NTDB_SUCCESS, "system() call failed\n");
- }
- return 0;
- case CMD_QUIT:
- return 1;
- default:
- /* all the rest require a open database */
- if (!ntdb) {
- bIterate = 0;
- terror(NTDB_SUCCESS, "database not open");
- help();
- return 0;
- }
- switch (mycmd) {
- case CMD_TRANSACTION_START:
- bIterate = 0;
- ntdb_transaction_start(ntdb);
- return 0;
- case CMD_TRANSACTION_COMMIT:
- bIterate = 0;
- ntdb_transaction_commit(ntdb);
- return 0;
- case CMD_TRANSACTION_CANCEL:
- bIterate = 0;
- ntdb_transaction_cancel(ntdb);
- return 0;
- case CMD_ERASE:
- bIterate = 0;
- ntdb_traverse(ntdb, do_delete_fn, NULL);
- return 0;
- case CMD_DUMP:
- bIterate = 0;
- ntdb_traverse(ntdb, print_rec, NULL);
- return 0;
- case CMD_INSERT:
- bIterate = 0;
- insert_ntdb(arg1, arg1len,arg2,arg2len);
- return 0;
- case CMD_MOVE:
- bIterate = 0;
- move_rec(arg1,arg1len,arg2);
- return 0;
- case CMD_STORE:
- bIterate = 0;
- store_ntdb(arg1,arg1len,arg2,arg2len);
- return 0;
- case CMD_SHOW:
- bIterate = 0;
- show_ntdb(arg1, arg1len);
- return 0;
- case CMD_KEYS:
- ntdb_traverse(ntdb, print_key, NULL);
- return 0;
- case CMD_HEXKEYS:
- ntdb_traverse(ntdb, print_hexkey, NULL);
- return 0;
- case CMD_DELETE:
- bIterate = 0;
- delete_ntdb(arg1,arg1len);
- return 0;
-#if 0
- case CMD_LIST_HASH_FREE:
- ntdb_dump_all(ntdb);
- return 0;
- case CMD_LIST_FREE:
- ntdb_printfreelist(ntdb);
- return 0;
-#endif
- case CMD_INFO:
- info_ntdb();
- return 0;
- case CMD_SPEED:
- speed_ntdb(arg1);
- return 0;
- case CMD_MMAP:
- toggle_mmap();
- return 0;
- case CMD_FIRST:
- bIterate = 1;
- first_record(ntdb, &iterate_kbuf);
- return 0;
- case CMD_NEXT:
- if (bIterate)
- next_record(ntdb, &iterate_kbuf);
- return 0;
- case CMD_CHECK:
- check_db(ntdb);
- return 0;
- case CMD_HELP:
- help();
- return 0;
- case CMD_CREATE_NTDB:
- case CMD_OPEN_NTDB:
- case CMD_SYSTEM:
- case CMD_QUIT:
- /*
- * unhandled commands. cases included here to avoid compiler
- * warnings.
- */
- return 0;
- }
- }
-
- return 0;
-}
-
-static char *convert_string(char *instring, size_t *sizep)
-{
- size_t length = 0;
- char *outp, *inp;
- char temp[3];
-
- outp = inp = instring;
-
- while (*inp) {
- if (*inp == '\\') {
- inp++;
- if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
- temp[0] = *inp++;
- temp[1] = '\0';
- if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
- temp[1] = *inp++;
- temp[2] = '\0';
- }
- *outp++ = (char)strtol((const char *)temp,NULL,16);
- } else {
- *outp++ = *inp++;
- }
- } else {
- *outp++ = *inp++;
- }
- length++;
- }
- *sizep = length;
- return instring;
-}
-
-int main(int argc, char *argv[])
-{
- cmdname = "";
- arg1 = NULL;
- arg1len = 0;
- arg2 = NULL;
- arg2len = 0;
-
- if (argv[1]) {
- cmdname = "open";
- arg1 = argv[1];
- do_command();
- cmdname = "";
- arg1 = NULL;
- }
-
- switch (argc) {
- case 1:
- case 2:
- /* Interactive mode */
- while ((cmdname = ntdb_getline("ntdb> "))) {
- arg2 = arg1 = NULL;
- if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) {
- arg1++;
- arg2 = arg1;
- while (*arg2) {
- if (*arg2 == ' ') {
- *arg2++ = '\0';
- break;
- }
- if ((*arg2++ == '\\') && (*arg2 == ' ')) {
- arg2++;
- }
- }
- }
- if (arg1) arg1 = convert_string(arg1,&arg1len);
- if (arg2) arg2 = convert_string(arg2,&arg2len);
- if (do_command()) break;
- }
- break;
- case 5:
- arg2 = convert_string(argv[4],&arg2len);
- case 4:
- arg1 = convert_string(argv[3],&arg1len);
- case 3:
- cmdname = argv[2];
- default:
- do_command();
- break;
- }
-
- if (ntdb) ntdb_close(ntdb);
-
- return 0;
-}
+++ /dev/null
-/* this tests ntdb by doing lots of ops from several simultaneous
- writers - that stresses the locking code.
-*/
-
-#include "config.h"
-#include "ntdb.h"
-#include "private.h"
-#include <ccan/err/err.h>
-
-//#define REOPEN_PROB 30
-#define DELETE_PROB 8
-#define STORE_PROB 4
-#define APPEND_PROB 6
-#define TRANSACTION_PROB 10
-#define TRANSACTION_PREPARE_PROB 2
-#define LOCKSTORE_PROB 5
-#define TRAVERSE_PROB 20
-#define TRAVERSE_MOD_PROB 100
-#define TRAVERSE_ABORT_PROB 500
-#define CULL_PROB 100
-#define KEYLEN 3
-#define DATALEN 100
-
-static struct ntdb_context *db;
-static int in_transaction;
-static int in_traverse;
-static int error_count;
-#if TRANSACTION_PROB
-static int always_transaction = 0;
-#endif
-static int loopnum;
-static int count_pipe;
-static union ntdb_attribute log_attr;
-static union ntdb_attribute seed_attr;
-static union ntdb_attribute hsize_attr;
-
-static void ntdb_log(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
-{
- printf("ntdb:%s:%s:%s\n",
- ntdb_name(ntdb), ntdb_errorstr(ecode), message);
- fflush(stdout);
-#if 0
- {
- char str[200];
- signal(SIGUSR1, SIG_IGN);
- sprintf(str,"xterm -e gdb /proc/%u/exe %u", (unsigned int)getpid(), (unsigned int)getpid());
- system(str);
- }
-#endif
-}
-
-#include "../private.h"
-
-static void segv_handler(int sig, siginfo_t *info, void *p)
-{
- char string[100];
-
- sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n",
- (unsigned int)getpid(), info->si_addr, db->file->map_ptr,
- (size_t)db->file->map_size);
- if (write(2, string, strlen(string)) > 0)
- sleep(60);
- _exit(11);
-}
-
-static void warn_on_err(enum NTDB_ERROR e, struct ntdb_context *ntdb,
- const char *why)
-{
- if (e != NTDB_SUCCESS) {
- fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), why,
- ntdb ? ntdb_errorstr(e) : "(no ntdb)");
- error_count++;
- }
-}
-
-static char *randbuf(int len)
-{
- char *buf;
- int i;
- buf = (char *)malloc(len+1);
- if (buf == NULL) {
- perror("randbuf: unable to allocate memory for buffer.\n");
- exit(1);
- }
-
- for (i=0;i<len;i++) {
- buf[i] = 'a' + (rand() % 26);
- }
- buf[i] = 0;
- return buf;
-}
-
-static void addrec_db(void);
-static int modify_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
- void *state)
-{
-#if CULL_PROB
- if (random() % CULL_PROB == 0) {
- ntdb_delete(ntdb, key);
- }
-#endif
-
-#if TRAVERSE_MOD_PROB
- if (random() % TRAVERSE_MOD_PROB == 0) {
- addrec_db();
- }
-#endif
-
-#if TRAVERSE_ABORT_PROB
- if (random() % TRAVERSE_ABORT_PROB == 0)
- return 1;
-#endif
-
- return 0;
-}
-
-static void addrec_db(void)
-{
- int klen, dlen;
- char *k, *d;
- NTDB_DATA key, data;
- enum NTDB_ERROR e;
-
- klen = 1 + (rand() % KEYLEN);
- dlen = 1 + (rand() % DATALEN);
-
- k = randbuf(klen);
- d = randbuf(dlen);
-
- key.dptr = (unsigned char *)k;
- key.dsize = klen+1;
-
- data.dptr = (unsigned char *)d;
- data.dsize = dlen+1;
-
-#if REOPEN_PROB
- if (in_traverse == 0 && in_transaction == 0 && random() % REOPEN_PROB == 0) {
- ntdb_reopen_all(0);
- goto next;
- }
-#endif
-
-#if TRANSACTION_PROB
- if (in_traverse == 0 && in_transaction == 0 && (always_transaction || random() % TRANSACTION_PROB == 0)) {
- e = ntdb_transaction_start(db);
- warn_on_err(e, db, "ntdb_transaction_start failed");
- in_transaction++;
- goto next;
- }
- if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
- if (random() % TRANSACTION_PREPARE_PROB == 0) {
- e = ntdb_transaction_prepare_commit(db);
- warn_on_err(e, db, "ntdb_transaction_prepare_commit failed");
- }
- e = ntdb_transaction_commit(db);
- warn_on_err(e, db, "ntdb_transaction_commit failed");
- in_transaction--;
- goto next;
- }
-
- if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
- ntdb_transaction_cancel(db);
- in_transaction--;
- goto next;
- }
-#endif
-
-#if DELETE_PROB
- if (random() % DELETE_PROB == 0) {
- ntdb_delete(db, key);
- goto next;
- }
-#endif
-
-#if STORE_PROB
- if (random() % STORE_PROB == 0) {
- e = ntdb_store(db, key, data, NTDB_REPLACE);
- warn_on_err(e, db, "ntdb_store failed");
- goto next;
- }
-#endif
-
-#if APPEND_PROB
- if (random() % APPEND_PROB == 0) {
- e = ntdb_append(db, key, data);
- warn_on_err(e, db, "ntdb_append failed");
- goto next;
- }
-#endif
-
-#if LOCKSTORE_PROB
- if (random() % LOCKSTORE_PROB == 0) {
- ntdb_chainlock(db, key);
- if (ntdb_fetch(db, key, &data) != NTDB_SUCCESS) {
- data.dsize = 0;
- data.dptr = NULL;
- }
- e = ntdb_store(db, key, data, NTDB_REPLACE);
- warn_on_err(e, db, "ntdb_store failed");
- if (data.dptr) free(data.dptr);
- ntdb_chainunlock(db, key);
- goto next;
- }
-#endif
-
-#if TRAVERSE_PROB
- /* FIXME: recursive traverses break transactions? */
- if (in_traverse == 0 && random() % TRAVERSE_PROB == 0) {
- in_traverse++;
- ntdb_traverse(db, modify_traverse, NULL);
- in_traverse--;
- goto next;
- }
-#endif
-
- if (ntdb_fetch(db, key, &data) == NTDB_SUCCESS)
- free(data.dptr);
-
-next:
- free(k);
- free(d);
-}
-
-static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
- void *state)
-{
- ntdb_delete(ntdb, key);
- return 0;
-}
-
-static void usage(void)
-{
- printf("Usage: ntdbtorture"
-#if TRANSACTION_PROB
- " [-t]"
-#endif
- " [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-S] [-H HASH_SIZE]\n");
- exit(0);
-}
-
-static void send_count_and_suicide(int sig)
-{
- /* This ensures our successor can continue where we left off. */
- if (write(count_pipe, &loopnum, sizeof(loopnum)) != sizeof(loopnum))
- exit(2);
- /* This gives a unique signature. */
- kill(getpid(), SIGUSR2);
-}
-
-static int run_child(const char *filename, int i, int seed, unsigned num_loops,
- unsigned start, int ntdb_flags)
-{
- struct sigaction act = { .sa_sigaction = segv_handler,
- .sa_flags = SA_SIGINFO };
- sigaction(11, &act, NULL);
-
- db = ntdb_open(filename, ntdb_flags, O_RDWR | O_CREAT, 0600,
- &log_attr);
- if (!db) {
- fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), filename,
- "db open failed");
- exit(1);
- }
-
-#if 0
- if (i == 0) {
- printf("pid %u\n", (unsigned int)getpid());
- sleep(9);
- } else
- sleep(10);
-#endif
-
- srand(seed + i);
- srandom(seed + i);
-
- /* Set global, then we're ready to handle being killed. */
- loopnum = start;
- signal(SIGUSR1, send_count_and_suicide);
-
- for (;loopnum<num_loops && error_count == 0;loopnum++) {
- addrec_db();
- }
-
- if (error_count == 0) {
- enum NTDB_ERROR e;
-
- ntdb_traverse(db, NULL, NULL);
-#if TRANSACTION_PROB
- if (always_transaction) {
- while (in_transaction) {
- ntdb_transaction_cancel(db);
- in_transaction--;
- }
- e = ntdb_transaction_start(db);
- if (e) {
- warn_on_err(e, db,
- "ntdb_transaction_start failed");
- exit(1);
- }
- }
-#endif
- ntdb_traverse(db, traverse_fn, NULL);
- ntdb_traverse(db, traverse_fn, NULL);
-
-#if TRANSACTION_PROB
- if (always_transaction) {
- e = ntdb_transaction_commit(db);
- warn_on_err(e, db, "ntdb_transaction_commit failed");
- }
-#endif
- }
-
- ntdb_close(db);
-
- return (error_count < 100 ? error_count : 100);
-}
-
-static char *test_path(const char *filename)
-{
- const char *prefix = getenv("TEST_DATA_PREFIX");
-
- if (prefix) {
- char *path = NULL;
- int ret;
-
- ret = asprintf(&path, "%s/%s", prefix, filename);
- if (ret == -1) {
- return NULL;
- }
- return path;
- }
-
- return strdup(filename);
-}
-
-int main(int argc, char * const *argv)
-{
- int i, seed = -1;
- int num_loops = 5000;
- int num_procs = 3;
- int c, pfds[2];
- extern char *optarg;
- pid_t *pids;
- int kill_random = 0;
- int *done;
- int ntdb_flags = NTDB_DEFAULT;
- char *test_ntdb;
- enum NTDB_ERROR e;
-
- log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
- log_attr.base.next = &seed_attr;
- log_attr.log.fn = ntdb_log;
- seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
- seed_attr.base.next = &hsize_attr;
- hsize_attr.base.attr = NTDB_ATTRIBUTE_HASHSIZE;
- hsize_attr.base.next = NULL;
- hsize_attr.hashsize.size = 2; /* stress it by default. */
-
- while ((c = getopt(argc, argv, "n:l:s:thkSH:")) != -1) {
- switch (c) {
- case 'n':
- num_procs = strtol(optarg, NULL, 0);
- break;
- case 'l':
- num_loops = strtol(optarg, NULL, 0);
- break;
- case 's':
- seed = strtol(optarg, NULL, 0);
- break;
- case 'S':
- ntdb_flags = NTDB_NOSYNC;
- break;
- case 't':
-#if TRANSACTION_PROB
- always_transaction = 1;
-#else
- fprintf(stderr, "Transactions not supported\n");
- usage();
-#endif
- break;
- case 'k':
- kill_random = 1;
- break;
- case 'H':
- hsize_attr.hashsize.size = strtol(optarg, NULL, 0);
- break;
- default:
- usage();
- }
- }
-
- test_ntdb = test_path("torture.ntdb");
-
- unlink(test_ntdb);
-
- if (seed == -1) {
- seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
- }
- seed_attr.seed.seed = (((uint64_t)seed) << 32) | seed;
-
- if (num_procs == 1 && !kill_random) {
- /* Don't fork for this case, makes debugging easier. */
- error_count = run_child(test_ntdb, 0, seed, num_loops, 0,
- ntdb_flags);
- goto done;
- }
-
- pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
- done = (int *)calloc(sizeof(int), num_procs);
-
- if (pipe(pfds) != 0) {
- perror("Creating pipe");
- exit(1);
- }
- count_pipe = pfds[1];
-
- for (i=0;i<num_procs;i++) {
- if ((pids[i]=fork()) == 0) {
- close(pfds[0]);
- if (i == 0) {
- printf("testing with %d processes, %d loops, seed=%d%s\n",
- num_procs, num_loops, seed,
-#if TRANSACTION_PROB
- always_transaction ? " (all within transactions)" : ""
-#else
- ""
-#endif
- );
- }
- exit(run_child(test_ntdb, i, seed, num_loops, 0,
- ntdb_flags));
- }
- }
-
- while (num_procs) {
- int status, j;
- pid_t pid;
-
- if (error_count != 0) {
- /* try and stop the test on any failure */
- for (j=0;j<num_procs;j++) {
- if (pids[j] != 0) {
- kill(pids[j], SIGTERM);
- }
- }
- }
-
- pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
- if (pid == 0) {
- struct timespec ts;
-
- /* Sleep for 1/10 second. */
- ts.tv_sec = 0;
- ts.tv_nsec = 100000000;
- nanosleep(&ts, NULL);
-
- /* Kill someone. */
- kill(pids[random() % num_procs], SIGUSR1);
- continue;
- }
-
- if (pid == -1) {
- perror("failed to wait for child\n");
- exit(1);
- }
-
- for (j=0;j<num_procs;j++) {
- if (pids[j] == pid) break;
- }
- if (j == num_procs) {
- printf("unknown child %d exited!?\n", (int)pid);
- exit(1);
- }
- if (WIFSIGNALED(status)) {
- if (WTERMSIG(status) == SIGUSR2
- || WTERMSIG(status) == SIGUSR1) {
- /* SIGUSR2 means they wrote to pipe. */
- if (WTERMSIG(status) == SIGUSR2) {
- if (read(pfds[0], &done[j],
- sizeof(done[j]))
- != sizeof(done[j]))
- err(1,
- "Short read from child?");
- }
- pids[j] = fork();
- if (pids[j] == 0)
- exit(run_child(test_ntdb, j, seed,
- num_loops, done[j],
- ntdb_flags));
- printf("Restarting child %i for %u-%u\n",
- j, done[j], num_loops);
- continue;
- }
- printf("child %d exited with signal %d\n",
- (int)pid, WTERMSIG(status));
- error_count++;
- } else {
- if (WEXITSTATUS(status) != 0) {
- printf("child %d exited with status %d\n",
- (int)pid, WEXITSTATUS(status));
- error_count++;
- }
- }
- memmove(&pids[j], &pids[j+1],
- (num_procs - j - 1)*sizeof(pids[0]));
- num_procs--;
- }
-
- free(pids);
-
-done:
- if (error_count == 0) {
- db = ntdb_open(test_ntdb, NTDB_DEFAULT, O_RDWR | O_CREAT,
- 0600, &log_attr);
- if (!db) {
- fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), test_ntdb,
- "db open failed");
- exit(1);
- }
- e = ntdb_check(db, NULL, NULL);
- if (e) {
- warn_on_err(e, db, "db check failed");
- exit(1);
- }
- ntdb_close(db);
- printf("OK\n");
- }
-
- free(test_ntdb);
- return error_count;
-}
+++ /dev/null
-/* Simple speed test for NTDB */
-#include <ccan/err/err.h>
-#include <time.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-#include "ntdb.h"
-
-/* Nanoseconds per operation */
-static size_t normalize(const struct timeval *start,
- const struct timeval *stop,
- unsigned int num)
-{
- struct timeval diff;
-
- timersub(stop, start, &diff);
-
- /* Floating point is more accurate here. */
- return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
- / num * 1000;
-}
-
-static size_t file_size(void)
-{
- struct stat st;
-
- if (stat("/tmp/speed.ntdb", &st) != 0)
- return -1;
- return st.st_size;
-}
-
-static int count_record(struct ntdb_context *ntdb,
- NTDB_DATA key, NTDB_DATA data, void *p)
-{
- int *total = p;
- *total += *(int *)data.dptr;
- return 0;
-}
-
-static void dump_and_clear_stats(struct ntdb_context **ntdb,
- int flags,
- union ntdb_attribute *attr)
-{
- union ntdb_attribute stats;
- enum NTDB_ERROR ecode;
-
- stats.base.attr = NTDB_ATTRIBUTE_STATS;
- stats.stats.size = sizeof(stats.stats);
- ecode = ntdb_get_attribute(*ntdb, &stats);
- if (ecode != NTDB_SUCCESS)
- errx(1, "Getting stats: %s", ntdb_errorstr(ecode));
-
- printf("allocs = %llu\n",
- (unsigned long long)stats.stats.allocs);
- printf(" alloc_subhash = %llu\n",
- (unsigned long long)stats.stats.alloc_subhash);
- printf(" alloc_chain = %llu\n",
- (unsigned long long)stats.stats.alloc_chain);
- printf(" alloc_bucket_exact = %llu\n",
- (unsigned long long)stats.stats.alloc_bucket_exact);
- printf(" alloc_bucket_max = %llu\n",
- (unsigned long long)stats.stats.alloc_bucket_max);
- printf(" alloc_leftover = %llu\n",
- (unsigned long long)stats.stats.alloc_leftover);
- printf(" alloc_coalesce_tried = %llu\n",
- (unsigned long long)stats.stats.alloc_coalesce_tried);
- printf(" alloc_coalesce_iterate_clash = %llu\n",
- (unsigned long long)stats.stats.alloc_coalesce_iterate_clash);
- printf(" alloc_coalesce_lockfail = %llu\n",
- (unsigned long long)stats.stats.alloc_coalesce_lockfail);
- printf(" alloc_coalesce_race = %llu\n",
- (unsigned long long)stats.stats.alloc_coalesce_race);
- printf(" alloc_coalesce_succeeded = %llu\n",
- (unsigned long long)stats.stats.alloc_coalesce_succeeded);
- printf(" alloc_coalesce_num_merged = %llu\n",
- (unsigned long long)stats.stats.alloc_coalesce_num_merged);
- printf("compares = %llu\n",
- (unsigned long long)stats.stats.compares);
- printf(" compare_wrong_offsetbits = %llu\n",
- (unsigned long long)stats.stats.compare_wrong_offsetbits);
- printf(" compare_wrong_keylen = %llu\n",
- (unsigned long long)stats.stats.compare_wrong_keylen);
- printf(" compare_wrong_rechash = %llu\n",
- (unsigned long long)stats.stats.compare_wrong_rechash);
- printf(" compare_wrong_keycmp = %llu\n",
- (unsigned long long)stats.stats.compare_wrong_keycmp);
- printf("transactions = %llu\n",
- (unsigned long long)stats.stats.transactions);
- printf(" transaction_cancel = %llu\n",
- (unsigned long long)stats.stats.transaction_cancel);
- printf(" transaction_nest = %llu\n",
- (unsigned long long)stats.stats.transaction_nest);
- printf(" transaction_expand_file = %llu\n",
- (unsigned long long)stats.stats.transaction_expand_file);
- printf(" transaction_read_direct = %llu\n",
- (unsigned long long)stats.stats.transaction_read_direct);
- printf(" transaction_read_direct_fail = %llu\n",
- (unsigned long long)stats.stats.transaction_read_direct_fail);
- printf(" transaction_write_direct = %llu\n",
- (unsigned long long)stats.stats.transaction_write_direct);
- printf(" transaction_write_direct_fail = %llu\n",
- (unsigned long long)stats.stats.transaction_write_direct_fail);
- printf("expands = %llu\n",
- (unsigned long long)stats.stats.expands);
- printf("frees = %llu\n",
- (unsigned long long)stats.stats.frees);
- printf("locks = %llu\n",
- (unsigned long long)stats.stats.locks);
- printf(" lock_lowlevel = %llu\n",
- (unsigned long long)stats.stats.lock_lowlevel);
- printf(" lock_nonblock = %llu\n",
- (unsigned long long)stats.stats.lock_nonblock);
- printf(" lock_nonblock_fail = %llu\n",
- (unsigned long long)stats.stats.lock_nonblock_fail);
-
- /* Now clear. */
- ntdb_close(*ntdb);
- *ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR, 0, attr);
-}
-
-static void ntdb_log(struct ntdb_context *ntdb,
- enum ntdb_log_level level,
- enum NTDB_ERROR ecode,
- const char *message,
- void *data)
-{
- fprintf(stderr, "ntdb:%s:%s:%s\n",
- ntdb_name(ntdb), ntdb_errorstr(ecode), message);
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j, num = 1000, stage = 0, stopat = -1;
- int flags = NTDB_DEFAULT;
- bool transaction = false, summary = false;
- NTDB_DATA key, data;
- struct ntdb_context *ntdb;
- struct timeval start, stop;
- union ntdb_attribute seed, log;
- bool do_stats = false;
- enum NTDB_ERROR ecode;
-
- /* Try to keep benchmarks even. */
- seed.base.attr = NTDB_ATTRIBUTE_SEED;
- seed.base.next = NULL;
- seed.seed.seed = 0;
-
- log.base.attr = NTDB_ATTRIBUTE_LOG;
- log.base.next = &seed;
- log.log.fn = ntdb_log;
-
- if (argv[1] && strcmp(argv[1], "--internal") == 0) {
- flags = NTDB_INTERNAL;
- argc--;
- argv++;
- }
- if (argv[1] && strcmp(argv[1], "--transaction") == 0) {
- transaction = true;
- argc--;
- argv++;
- }
- if (argv[1] && strcmp(argv[1], "--no-sync") == 0) {
- flags |= NTDB_NOSYNC;
- argc--;
- argv++;
- }
- if (argv[1] && strcmp(argv[1], "--summary") == 0) {
- summary = true;
- argc--;
- argv++;
- }
- if (argv[1] && strcmp(argv[1], "--stats") == 0) {
- do_stats = true;
- argc--;
- argv++;
- }
-
- ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR|O_CREAT|O_TRUNC,
- 0600, &log);
- if (!ntdb)
- err(1, "Opening /tmp/speed.ntdb");
-
- key.dptr = (void *)&i;
- key.dsize = sizeof(i);
- data = key;
-
- if (argv[1]) {
- num = atoi(argv[1]);
- argv++;
- argc--;
- }
-
- if (argv[1]) {
- stopat = atoi(argv[1]);
- argv++;
- argc--;
- }
-
- /* Add 1000 records. */
- printf("Adding %u records: ", num); fflush(stdout);
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- gettimeofday(&start, NULL);
- for (i = 0; i < num; i++)
- if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
- errx(1, "Inserting key %u in ntdb: %s",
- i, ntdb_errorstr(ecode));
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
-
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
-
- if (++stage == stopat)
- exit(0);
-
- /* Finding 1000 records. */
- printf("Finding %u records: ", num); fflush(stdout);
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- gettimeofday(&start, NULL);
- for (i = 0; i < num; i++) {
- NTDB_DATA dbuf;
- if ((ecode = ntdb_fetch(ntdb, key, &dbuf)) != NTDB_SUCCESS
- || *(int *)dbuf.dptr != i) {
- errx(1, "Fetching key %u in ntdb gave %u",
- i, ecode ? ecode : *(int *)dbuf.dptr);
- }
- }
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
- if (++stage == stopat)
- exit(0);
-
- /* Missing 1000 records. */
- printf("Missing %u records: ", num); fflush(stdout);
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- gettimeofday(&start, NULL);
- for (i = num; i < num*2; i++) {
- NTDB_DATA dbuf;
- ecode = ntdb_fetch(ntdb, key, &dbuf);
- if (ecode != NTDB_ERR_NOEXIST)
- errx(1, "Fetching key %u in ntdb gave %s",
- i, ntdb_errorstr(ecode));
- }
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
- if (++stage == stopat)
- exit(0);
-
- /* Traverse 1000 records. */
- printf("Traversing %u records: ", num); fflush(stdout);
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- i = 0;
- gettimeofday(&start, NULL);
- if (ntdb_traverse(ntdb, count_record, &i) != num)
- errx(1, "Traverse returned wrong number of records");
- if (i != (num - 1) * (num / 2))
- errx(1, "Traverse tallied to %u", i);
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
- if (++stage == stopat)
- exit(0);
-
- /* Delete 1000 records (not in order). */
- printf("Deleting %u records: ", num); fflush(stdout);
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- gettimeofday(&start, NULL);
- for (j = 0; j < num; j++) {
- i = (j + 100003) % num;
- if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS)
- errx(1, "Deleting key %u in ntdb: %s",
- i, ntdb_errorstr(ecode));
- }
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
- if (++stage == stopat)
- exit(0);
-
- /* Re-add 1000 records (not in order). */
- printf("Re-adding %u records: ", num); fflush(stdout);
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- gettimeofday(&start, NULL);
- for (j = 0; j < num; j++) {
- i = (j + 100003) % num;
- if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
- errx(1, "Inserting key %u in ntdb: %s",
- i, ntdb_errorstr(ecode));
- }
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
- if (++stage == stopat)
- exit(0);
-
- /* Append 1000 records. */
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- printf("Appending %u records: ", num); fflush(stdout);
- gettimeofday(&start, NULL);
- for (i = 0; i < num; i++)
- if ((ecode = ntdb_append(ntdb, key, data)) != NTDB_SUCCESS)
- errx(1, "Appending key %u in ntdb: %s",
- i, ntdb_errorstr(ecode));
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (++stage == stopat)
- exit(0);
-
- /* Churn 1000 records: not in order! */
- if (transaction && (ecode = ntdb_transaction_start(ntdb)))
- errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
- printf("Churning %u records: ", num); fflush(stdout);
- gettimeofday(&start, NULL);
- for (j = 0; j < num; j++) {
- i = (j + 1000019) % num;
- if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS)
- errx(1, "Deleting key %u in ntdb: %s",
- i, ntdb_errorstr(ecode));
- i += num;
- if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
- errx(1, "Inserting key %u in ntdb: %s",
- i, ntdb_errorstr(ecode));
- }
- gettimeofday(&stop, NULL);
- if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
- errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
- printf(" %zu ns (%zu bytes)\n",
- normalize(&start, &stop, num), file_size());
-
- if (ntdb_check(ntdb, NULL, NULL))
- errx(1, "ntdb_check failed!");
- if (summary) {
- char *sumstr = NULL;
- ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
- printf("%s\n", sumstr);
- free(sumstr);
- }
- if (do_stats)
- dump_and_clear_stats(&ntdb, flags, &log);
- if (++stage == stopat)
- exit(0);
-
- return 0;
-}
+++ /dev/null
- /*
- Unix SMB/CIFS implementation.
-
- trivial database library
-
- Copyright (C) Andrew Tridgell 2005
- Copyright (C) Rusty Russell 2010
-
- ** NOTE! The following LGPL license applies to the ntdb
- ** library. This does NOT imply that all of Samba is released
- ** under the LGPL
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "private.h"
-#include <assert.h>
-#define SAFE_FREE(ntdb, x) do { if ((x) != NULL) {ntdb->free_fn((void *)x, ntdb->alloc_data); (x)=NULL;} } while(0)
-
-/*
- transaction design:
-
- - only allow a single transaction at a time per database. This makes
- using the transaction API simpler, as otherwise the caller would
- have to cope with temporary failures in transactions that conflict
- with other current transactions
-
- - keep the transaction recovery information in the same file as the
- database, using a special 'transaction recovery' record pointed at
- by the header. This removes the need for extra journal files as
- used by some other databases
-
- - dynamically allocated the transaction recover record, re-using it
- for subsequent transactions. If a larger record is needed then
- ntdb_free() the old record to place it on the normal ntdb freelist
- before allocating the new record
-
- - during transactions, keep a linked list of writes all that have
- been performed by intercepting all ntdb_write() calls. The hooked
- transaction versions of ntdb_read() and ntdb_write() check this
- linked list and try to use the elements of the list in preference
- to the real database.
-
- - don't allow any locks to be held when a transaction starts,
- otherwise we can end up with deadlock (plus lack of lock nesting
- in POSIX locks would mean the lock is lost)
-
- - if the caller gains a lock during the transaction but doesn't
- release it then fail the commit
-
- - allow for nested calls to ntdb_transaction_start(), re-using the
- existing transaction record. If the inner transaction is canceled
- then a subsequent commit will fail
-
- - keep a mirrored copy of the ntdb hash chain heads to allow for the
- fast hash heads scan on traverse, updating the mirrored copy in
- the transaction version of ntdb_write
-
- - allow callers to mix transaction and non-transaction use of ntdb,
- although once a transaction is started then an exclusive lock is
- gained until the transaction is committed or canceled
-
- - the commit stategy involves first saving away all modified data
- into a linearised buffer in the transaction recovery area, then
- marking the transaction recovery area with a magic value to
- indicate a valid recovery record. In total 4 fsync/msync calls are
- needed per commit to prevent race conditions. It might be possible
- to reduce this to 3 or even 2 with some more work.
-
- - check for a valid recovery record on open of the ntdb, while the
- open lock is held. Automatically recover from the transaction
- recovery area if needed, then continue with the open as
- usual. This allows for smooth crash recovery with no administrator
- intervention.
-
- - if NTDB_NOSYNC is passed to flags in ntdb_open then transactions are
- still available, but fsync/msync calls are made. This means we
- still are safe against unexpected death during transaction commit,
- but not against machine reboots.
-*/
-
-/*
- hold the context of any current transaction
-*/
-struct ntdb_transaction {
- /* the original io methods - used to do IOs to the real db */
- const struct ntdb_methods *io_methods;
-
- /* the list of transaction blocks. When a block is first
- written to, it gets created in this list */
- uint8_t **blocks;
- size_t num_blocks;
-
- /* non-zero when an internal transaction error has
- occurred. All write operations will then fail until the
- transaction is ended */
- int transaction_error;
-
- /* when inside a transaction we need to keep track of any
- nested ntdb_transaction_start() calls, as these are allowed,
- but don't create a new transaction */
- unsigned int nesting;
-
- /* set when a prepare has already occurred */
- bool prepared;
- ntdb_off_t magic_offset;
-
- /* old file size before transaction */
- ntdb_len_t old_map_size;
-};
-
-/*
- read while in a transaction. We need to check first if the data is in our list
- of transaction elements, then if not do a real read
-*/
-static enum NTDB_ERROR transaction_read(struct ntdb_context *ntdb, ntdb_off_t off,
- void *buf, ntdb_len_t len)
-{
- size_t blk;
- enum NTDB_ERROR ecode;
-
- /* break it down into block sized ops */
- while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) {
- ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE);
- ecode = transaction_read(ntdb, off, buf, len2);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- len -= len2;
- off += len2;
- buf = (void *)(len2 + (char *)buf);
- }
-
- if (len == 0) {
- return NTDB_SUCCESS;
- }
-
- blk = off / NTDB_PGSIZE;
-
- /* see if we have it in the block list */
- if (ntdb->transaction->num_blocks <= blk ||
- ntdb->transaction->blocks[blk] == NULL) {
- /* nope, do a real read */
- ecode = ntdb->transaction->io_methods->tread(ntdb, off, buf, len);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- return 0;
- }
-
- /* now copy it out of this block */
- memcpy(buf, ntdb->transaction->blocks[blk] + (off % NTDB_PGSIZE), len);
- return NTDB_SUCCESS;
-
-fail:
- ntdb->transaction->transaction_error = 1;
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "transaction_read: failed at off=%zu len=%zu",
- (size_t)off, (size_t)len);
-}
-
-
-/*
- write while in a transaction
-*/
-static enum NTDB_ERROR transaction_write(struct ntdb_context *ntdb, ntdb_off_t off,
- const void *buf, ntdb_len_t len)
-{
- size_t blk;
- enum NTDB_ERROR ecode;
-
- /* Only a commit is allowed on a prepared transaction */
- if (ntdb->transaction->prepared) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR,
- "transaction_write: transaction already"
- " prepared, write not allowed");
- goto fail;
- }
-
- /* break it up into block sized chunks */
- while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) {
- ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE);
- ecode = transaction_write(ntdb, off, buf, len2);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- len -= len2;
- off += len2;
- if (buf != NULL) {
- buf = (const void *)(len2 + (const char *)buf);
- }
- }
-
- if (len == 0) {
- return NTDB_SUCCESS;
- }
-
- blk = off / NTDB_PGSIZE;
- off = off % NTDB_PGSIZE;
-
- if (ntdb->transaction->num_blocks <= blk) {
- uint8_t **new_blocks;
- /* expand the blocks array */
- if (ntdb->transaction->blocks == NULL) {
- new_blocks = (uint8_t **)ntdb->alloc_fn(ntdb,
- (blk+1)*sizeof(uint8_t *), ntdb->alloc_data);
- } else {
- new_blocks = (uint8_t **)ntdb->expand_fn(
- ntdb->transaction->blocks,
- (blk+1)*sizeof(uint8_t *), ntdb->alloc_data);
- }
- if (new_blocks == NULL) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "transaction_write:"
- " failed to allocate");
- goto fail;
- }
- memset(&new_blocks[ntdb->transaction->num_blocks], 0,
- (1+(blk - ntdb->transaction->num_blocks))*sizeof(uint8_t *));
- ntdb->transaction->blocks = new_blocks;
- ntdb->transaction->num_blocks = blk+1;
- }
-
- /* allocate and fill a block? */
- if (ntdb->transaction->blocks[blk] == NULL) {
- ntdb->transaction->blocks[blk] = (uint8_t *)
- ntdb->alloc_fn(ntdb->transaction->blocks, NTDB_PGSIZE,
- ntdb->alloc_data);
- if (ntdb->transaction->blocks[blk] == NULL) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "transaction_write:"
- " failed to allocate");
- goto fail;
- }
- memset(ntdb->transaction->blocks[blk], 0, NTDB_PGSIZE);
- if (ntdb->transaction->old_map_size > blk * NTDB_PGSIZE) {
- ntdb_len_t len2 = NTDB_PGSIZE;
- if (len2 + (blk * NTDB_PGSIZE) > ntdb->transaction->old_map_size) {
- len2 = ntdb->transaction->old_map_size - (blk * NTDB_PGSIZE);
- }
- ecode = ntdb->transaction->io_methods->tread(ntdb,
- blk * NTDB_PGSIZE,
- ntdb->transaction->blocks[blk],
- len2);
- if (ecode != NTDB_SUCCESS) {
- ecode = ntdb_logerr(ntdb, ecode,
- NTDB_LOG_ERROR,
- "transaction_write:"
- " failed to"
- " read old block: %s",
- strerror(errno));
- SAFE_FREE(ntdb, ntdb->transaction->blocks[blk]);
- goto fail;
- }
- }
- }
-
- /* overwrite part of an existing block */
- if (buf == NULL) {
- memset(ntdb->transaction->blocks[blk] + off, 0, len);
- } else {
- memcpy(ntdb->transaction->blocks[blk] + off, buf, len);
- }
- return NTDB_SUCCESS;
-
-fail:
- ntdb->transaction->transaction_error = 1;
- return ecode;
-}
-
-
-/*
- write while in a transaction - this variant never expands the transaction blocks, it only
- updates existing blocks. This means it cannot change the recovery size
-*/
-static void transaction_write_existing(struct ntdb_context *ntdb, ntdb_off_t off,
- const void *buf, ntdb_len_t len)
-{
- size_t blk;
-
- /* break it up into block sized chunks */
- while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) {
- ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE);
- transaction_write_existing(ntdb, off, buf, len2);
- len -= len2;
- off += len2;
- if (buf != NULL) {
- buf = (const void *)(len2 + (const char *)buf);
- }
- }
-
- if (len == 0) {
- return;
- }
-
- blk = off / NTDB_PGSIZE;
- off = off % NTDB_PGSIZE;
-
- if (ntdb->transaction->num_blocks <= blk ||
- ntdb->transaction->blocks[blk] == NULL) {
- return;
- }
-
- /* overwrite part of an existing block */
- memcpy(ntdb->transaction->blocks[blk] + off, buf, len);
-}
-
-
-/*
- out of bounds check during a transaction
-*/
-static enum NTDB_ERROR transaction_oob(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_len_t len, bool probe)
-{
- if ((off + len >= off && off + len <= ntdb->file->map_size) || probe) {
- return NTDB_SUCCESS;
- }
-
- ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_oob len %lld beyond transaction size %lld",
- (long long)(off + len),
- (long long)ntdb->file->map_size);
- return NTDB_ERR_IO;
-}
-
-/*
- transaction version of ntdb_expand().
-*/
-static enum NTDB_ERROR transaction_expand_file(struct ntdb_context *ntdb,
- ntdb_off_t addition)
-{
- enum NTDB_ERROR ecode;
-
- assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0);
-
- /* add a write to the transaction elements, so subsequent
- reads see the zero data */
- ecode = transaction_write(ntdb, ntdb->file->map_size, NULL, addition);
- if (ecode == NTDB_SUCCESS) {
- ntdb->file->map_size += addition;
- }
- return ecode;
-}
-
-static void *transaction_direct(struct ntdb_context *ntdb, ntdb_off_t off,
- size_t len, bool write_mode)
-{
- size_t blk = off / NTDB_PGSIZE, end_blk;
-
- /* This is wrong for zero-length blocks, but will fail gracefully */
- end_blk = (off + len - 1) / NTDB_PGSIZE;
-
- /* Can only do direct if in single block and we've already copied. */
- if (write_mode) {
- ntdb->stats.transaction_write_direct++;
- if (blk != end_blk
- || blk >= ntdb->transaction->num_blocks
- || ntdb->transaction->blocks[blk] == NULL) {
- ntdb->stats.transaction_write_direct_fail++;
- return NULL;
- }
- return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE;
- }
-
- ntdb->stats.transaction_read_direct++;
- /* Single which we have copied? */
- if (blk == end_blk
- && blk < ntdb->transaction->num_blocks
- && ntdb->transaction->blocks[blk])
- return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE;
-
- /* Otherwise must be all not copied. */
- while (blk <= end_blk) {
- if (blk >= ntdb->transaction->num_blocks)
- break;
- if (ntdb->transaction->blocks[blk]) {
- ntdb->stats.transaction_read_direct_fail++;
- return NULL;
- }
- blk++;
- }
- return ntdb->transaction->io_methods->direct(ntdb, off, len, false);
-}
-
-static ntdb_off_t transaction_read_off(struct ntdb_context *ntdb,
- ntdb_off_t off)
-{
- ntdb_off_t ret;
- enum NTDB_ERROR ecode;
-
- ecode = transaction_read(ntdb, off, &ret, sizeof(ret));
- ntdb_convert(ntdb, &ret, sizeof(ret));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- return ret;
-}
-
-static enum NTDB_ERROR transaction_write_off(struct ntdb_context *ntdb,
- ntdb_off_t off, ntdb_off_t val)
-{
- ntdb_convert(ntdb, &val, sizeof(val));
- return transaction_write(ntdb, off, &val, sizeof(val));
-}
-
-static const struct ntdb_methods transaction_methods = {
- transaction_read,
- transaction_write,
- transaction_oob,
- transaction_expand_file,
- transaction_direct,
- transaction_read_off,
- transaction_write_off,
-};
-
-/*
- sync to disk
-*/
-static enum NTDB_ERROR transaction_sync(struct ntdb_context *ntdb,
- ntdb_off_t offset, ntdb_len_t length)
-{
- if (ntdb->flags & NTDB_NOSYNC) {
- return NTDB_SUCCESS;
- }
-
- if (fsync(ntdb->file->fd) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_transaction: fsync failed: %s",
- strerror(errno));
- }
-#ifdef MS_SYNC
- if (ntdb->file->map_ptr) {
- ntdb_off_t moffset = offset & ~(getpagesize()-1);
- if (msync(moffset + (char *)ntdb->file->map_ptr,
- length + (offset - moffset), MS_SYNC) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
- "ntdb_transaction: msync failed: %s",
- strerror(errno));
- }
- }
-#endif
- return NTDB_SUCCESS;
-}
-
-static void free_transaction_blocks(struct ntdb_context *ntdb)
-{
- int i;
-
- /* free all the transaction blocks */
- for (i=0;i<ntdb->transaction->num_blocks;i++) {
- if (ntdb->transaction->blocks[i] != NULL) {
- ntdb->free_fn(ntdb->transaction->blocks[i],
- ntdb->alloc_data);
- }
- }
- SAFE_FREE(ntdb, ntdb->transaction->blocks);
- ntdb->transaction->num_blocks = 0;
-}
-
-static void _ntdb_transaction_cancel(struct ntdb_context *ntdb)
-{
- enum NTDB_ERROR ecode;
-
- if (ntdb->transaction == NULL) {
- ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_transaction_cancel: no transaction");
- return;
- }
-
- if (ntdb->transaction->nesting != 0) {
- ntdb->transaction->transaction_error = 1;
- ntdb->transaction->nesting--;
- return;
- }
-
- ntdb->file->map_size = ntdb->transaction->old_map_size;
-
- free_transaction_blocks(ntdb);
-
- if (ntdb->transaction->magic_offset) {
- const struct ntdb_methods *methods = ntdb->transaction->io_methods;
- uint64_t invalid = NTDB_RECOVERY_INVALID_MAGIC;
-
- /* remove the recovery marker */
- ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset,
- &invalid, sizeof(invalid));
- if (ecode == NTDB_SUCCESS)
- ecode = transaction_sync(ntdb,
- ntdb->transaction->magic_offset,
- sizeof(invalid));
- if (ecode != NTDB_SUCCESS) {
- ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_cancel: failed to remove"
- " recovery magic");
- }
- }
-
- if (ntdb->file->allrecord_lock.count)
- ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
-
- /* restore the normal io methods */
- ntdb->io = ntdb->transaction->io_methods;
-
- ntdb_transaction_unlock(ntdb, F_WRLCK);
-
- if (ntdb_has_open_lock(ntdb))
- ntdb_unlock_open(ntdb, F_WRLCK);
-
- SAFE_FREE(ntdb, ntdb->transaction);
-}
-
-/*
- start a ntdb transaction. No token is returned, as only a single
- transaction is allowed to be pending per ntdb_context
-*/
-_PUBLIC_ enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb)
-{
- enum NTDB_ERROR ecode;
-
- ntdb->stats.transactions++;
- /* some sanity checks */
- if (ntdb->flags & NTDB_INTERNAL) {
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_transaction_start:"
- " cannot start a transaction on an"
- " internal ntdb");
- }
-
- if (ntdb->flags & NTDB_RDONLY) {
- return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
- "ntdb_transaction_start:"
- " cannot start a transaction on a"
- " read-only ntdb");
- }
-
- /* cope with nested ntdb_transaction_start() calls */
- if (ntdb->transaction != NULL) {
- if (!(ntdb->flags & NTDB_ALLOW_NESTING)) {
- return ntdb_logerr(ntdb, NTDB_ERR_IO,
- NTDB_LOG_USE_ERROR,
- "ntdb_transaction_start:"
- " already inside transaction");
- }
- ntdb->transaction->nesting++;
- ntdb->stats.transaction_nest++;
- return 0;
- }
-
- if (ntdb_has_hash_locks(ntdb)) {
- /* the caller must not have any locks when starting a
- transaction as otherwise we'll be screwed by lack
- of nested locks in POSIX */
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK,
- NTDB_LOG_USE_ERROR,
- "ntdb_transaction_start:"
- " cannot start a transaction with locks"
- " held");
- }
-
- ntdb->transaction = (struct ntdb_transaction *)
- ntdb->alloc_fn(ntdb, sizeof(struct ntdb_transaction),
- ntdb->alloc_data);
- if (ntdb->transaction == NULL) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_transaction_start:"
- " cannot allocate");
- }
- memset(ntdb->transaction, 0, sizeof(*ntdb->transaction));
-
- /* get the transaction write lock. This is a blocking lock. As
- discussed with Volker, there are a number of ways we could
- make this async, which we will probably do in the future */
- ecode = ntdb_transaction_lock(ntdb, F_WRLCK);
- if (ecode != NTDB_SUCCESS) {
- SAFE_FREE(ntdb, ntdb->transaction->blocks);
- SAFE_FREE(ntdb, ntdb->transaction);
- return ecode;
- }
-
- /* get a read lock over entire file. This is upgraded to a write
- lock during the commit */
- ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, true);
- if (ecode != NTDB_SUCCESS) {
- goto fail_allrecord_lock;
- }
-
- /* make sure we know about any file expansions already done by
- anyone else */
- ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
- ntdb->transaction->old_map_size = ntdb->file->map_size;
-
- /* finally hook the io methods, replacing them with
- transaction specific methods */
- ntdb->transaction->io_methods = ntdb->io;
- ntdb->io = &transaction_methods;
- return NTDB_SUCCESS;
-
-fail_allrecord_lock:
- ntdb_transaction_unlock(ntdb, F_WRLCK);
- SAFE_FREE(ntdb, ntdb->transaction->blocks);
- SAFE_FREE(ntdb, ntdb->transaction);
- return ecode;
-}
-
-
-/*
- cancel the current transaction
-*/
-_PUBLIC_ void ntdb_transaction_cancel(struct ntdb_context *ntdb)
-{
- ntdb->stats.transaction_cancel++;
- _ntdb_transaction_cancel(ntdb);
-}
-
-/*
- work out how much space the linearised recovery data will consume (worst case)
-*/
-static ntdb_len_t ntdb_recovery_size(struct ntdb_context *ntdb)
-{
- ntdb_len_t recovery_size = 0;
- int i;
-
- recovery_size = 0;
- for (i=0;i<ntdb->transaction->num_blocks;i++) {
- if (i * NTDB_PGSIZE >= ntdb->transaction->old_map_size) {
- break;
- }
- if (ntdb->transaction->blocks[i] == NULL) {
- continue;
- }
- recovery_size += 2*sizeof(ntdb_off_t) + NTDB_PGSIZE;
- }
-
- return recovery_size;
-}
-
-static enum NTDB_ERROR ntdb_recovery_area(struct ntdb_context *ntdb,
- const struct ntdb_methods *methods,
- ntdb_off_t *recovery_offset,
- struct ntdb_recovery_record *rec)
-{
- enum NTDB_ERROR ecode;
-
- *recovery_offset = ntdb_read_off(ntdb,
- offsetof(struct ntdb_header, recovery));
- if (NTDB_OFF_IS_ERR(*recovery_offset)) {
- return NTDB_OFF_TO_ERR(*recovery_offset);
- }
-
- if (*recovery_offset == 0) {
- rec->max_len = 0;
- return NTDB_SUCCESS;
- }
-
- ecode = methods->tread(ntdb, *recovery_offset, rec, sizeof(*rec));
- if (ecode != NTDB_SUCCESS)
- return ecode;
-
- ntdb_convert(ntdb, rec, sizeof(*rec));
- /* ignore invalid recovery regions: can happen in crash */
- if (rec->magic != NTDB_RECOVERY_MAGIC &&
- rec->magic != NTDB_RECOVERY_INVALID_MAGIC) {
- *recovery_offset = 0;
- rec->max_len = 0;
- }
- return NTDB_SUCCESS;
-}
-
-static unsigned int same(const unsigned char *new,
- const unsigned char *old,
- unsigned int length)
-{
- unsigned int i;
-
- for (i = 0; i < length; i++) {
- if (new[i] != old[i])
- break;
- }
- return i;
-}
-
-static unsigned int different(const unsigned char *new,
- const unsigned char *old,
- unsigned int length,
- unsigned int min_same,
- unsigned int *samelen)
-{
- unsigned int i;
-
- *samelen = 0;
- for (i = 0; i < length; i++) {
- if (new[i] == old[i]) {
- (*samelen)++;
- } else {
- if (*samelen >= min_same) {
- return i - *samelen;
- }
- *samelen = 0;
- }
- }
-
- if (*samelen < min_same)
- *samelen = 0;
- return length - *samelen;
-}
-
-/* Allocates recovery blob, without ntdb_recovery_record at head set up. */
-static struct ntdb_recovery_record *alloc_recovery(struct ntdb_context *ntdb,
- ntdb_len_t *len)
-{
- struct ntdb_recovery_record *rec;
- size_t i;
- enum NTDB_ERROR ecode;
- unsigned char *p;
- const struct ntdb_methods *old_methods = ntdb->io;
-
- rec = ntdb->alloc_fn(ntdb, sizeof(*rec) + ntdb_recovery_size(ntdb),
- ntdb->alloc_data);
- if (!rec) {
- ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "transaction_setup_recovery:"
- " cannot allocate");
- return NTDB_ERR_PTR(NTDB_ERR_OOM);
- }
-
- /* We temporarily revert to the old I/O methods, so we can use
- * ntdb_access_read */
- ntdb->io = ntdb->transaction->io_methods;
-
- /* build the recovery data into a single blob to allow us to do a single
- large write, which should be more efficient */
- p = (unsigned char *)(rec + 1);
- for (i=0;i<ntdb->transaction->num_blocks;i++) {
- ntdb_off_t offset;
- ntdb_len_t length;
- unsigned int off;
- const unsigned char *buffer;
-
- if (ntdb->transaction->blocks[i] == NULL) {
- continue;
- }
-
- offset = i * NTDB_PGSIZE;
- length = NTDB_PGSIZE;
- if (offset >= ntdb->transaction->old_map_size) {
- continue;
- }
-
- if (offset + length > ntdb->file->map_size) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_transaction_setup_recovery:"
- " transaction data over new region"
- " boundary");
- goto fail;
- }
- buffer = ntdb_access_read(ntdb, offset, length, false);
- if (NTDB_PTR_IS_ERR(buffer)) {
- ecode = NTDB_PTR_ERR(buffer);
- goto fail;
- }
-
- /* Skip over anything the same at the start. */
- off = same(ntdb->transaction->blocks[i], buffer, length);
- offset += off;
-
- while (off < length) {
- ntdb_len_t len1;
- unsigned int samelen;
-
- len1 = different(ntdb->transaction->blocks[i] + off,
- buffer + off, length - off,
- sizeof(offset) + sizeof(len1) + 1,
- &samelen);
-
- memcpy(p, &offset, sizeof(offset));
- memcpy(p + sizeof(offset), &len1, sizeof(len1));
- ntdb_convert(ntdb, p, sizeof(offset) + sizeof(len1));
- p += sizeof(offset) + sizeof(len1);
- memcpy(p, buffer + off, len1);
- p += len1;
- off += len1 + samelen;
- offset += len1 + samelen;
- }
- ntdb_access_release(ntdb, buffer);
- }
-
- *len = p - (unsigned char *)(rec + 1);
- ntdb->io = old_methods;
- return rec;
-
-fail:
- ntdb->free_fn(rec, ntdb->alloc_data);
- ntdb->io = old_methods;
- return NTDB_ERR_PTR(ecode);
-}
-
-static ntdb_off_t create_recovery_area(struct ntdb_context *ntdb,
- ntdb_len_t rec_length,
- struct ntdb_recovery_record *rec)
-{
- ntdb_off_t off, recovery_off;
- ntdb_len_t addition;
- enum NTDB_ERROR ecode;
- const struct ntdb_methods *methods = ntdb->transaction->io_methods;
-
- /* round up to a multiple of page size. Overallocate, since each
- * such allocation forces us to expand the file. */
- rec->max_len = ntdb_expand_adjust(ntdb->file->map_size, rec_length);
-
- /* Round up to a page. */
- rec->max_len = ((sizeof(*rec) + rec->max_len + NTDB_PGSIZE-1)
- & ~(NTDB_PGSIZE-1))
- - sizeof(*rec);
-
- off = ntdb->file->map_size;
-
- /* Restore ->map_size before calling underlying expand_file.
- Also so that we don't try to expand the file again in the
- transaction commit, which would destroy the recovery
- area */
- addition = (ntdb->file->map_size - ntdb->transaction->old_map_size) +
- sizeof(*rec) + rec->max_len;
- ntdb->file->map_size = ntdb->transaction->old_map_size;
- ntdb->stats.transaction_expand_file++;
- ecode = methods->expand_file(ntdb, addition);
- if (ecode != NTDB_SUCCESS) {
- ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_recovery_allocate:"
- " failed to create recovery area");
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- /* we have to reset the old map size so that we don't try to
- expand the file again in the transaction commit, which
- would destroy the recovery area */
- ntdb->transaction->old_map_size = ntdb->file->map_size;
-
- /* write the recovery header offset and sync - we can sync without a race here
- as the magic ptr in the recovery record has not been set */
- recovery_off = off;
- ntdb_convert(ntdb, &recovery_off, sizeof(recovery_off));
- ecode = methods->twrite(ntdb, offsetof(struct ntdb_header, recovery),
- &recovery_off, sizeof(ntdb_off_t));
- if (ecode != NTDB_SUCCESS) {
- ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_recovery_allocate:"
- " failed to write recovery head");
- return NTDB_ERR_TO_OFF(ecode);
- }
- transaction_write_existing(ntdb, offsetof(struct ntdb_header, recovery),
- &recovery_off,
- sizeof(ntdb_off_t));
- return off;
-}
-
-/*
- setup the recovery data that will be used on a crash during commit
-*/
-static enum NTDB_ERROR transaction_setup_recovery(struct ntdb_context *ntdb)
-{
- ntdb_len_t recovery_size = 0;
- ntdb_off_t recovery_off = 0;
- ntdb_off_t old_map_size = ntdb->transaction->old_map_size;
- struct ntdb_recovery_record *recovery;
- const struct ntdb_methods *methods = ntdb->transaction->io_methods;
- uint64_t magic;
- enum NTDB_ERROR ecode;
-
- recovery = alloc_recovery(ntdb, &recovery_size);
- if (NTDB_PTR_IS_ERR(recovery))
- return NTDB_PTR_ERR(recovery);
-
- /* If we didn't actually change anything we overwrote? */
- if (recovery_size == 0) {
- /* In theory, we could have just appended data. */
- if (ntdb->transaction->num_blocks * NTDB_PGSIZE
- < ntdb->transaction->old_map_size) {
- free_transaction_blocks(ntdb);
- }
- ntdb->free_fn(recovery, ntdb->alloc_data);
- return NTDB_SUCCESS;
- }
-
- ecode = ntdb_recovery_area(ntdb, methods, &recovery_off, recovery);
- if (ecode) {
- ntdb->free_fn(recovery, ntdb->alloc_data);
- return ecode;
- }
-
- if (recovery->max_len < recovery_size) {
- /* Not large enough. Free up old recovery area. */
- if (recovery_off) {
- ntdb->stats.frees++;
- ecode = add_free_record(ntdb, recovery_off,
- sizeof(*recovery)
- + recovery->max_len,
- NTDB_LOCK_WAIT, true);
- ntdb->free_fn(recovery, ntdb->alloc_data);
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_recovery_allocate:"
- " failed to free previous"
- " recovery area");
- }
-
- /* Refresh recovery after add_free_record above. */
- recovery = alloc_recovery(ntdb, &recovery_size);
- if (NTDB_PTR_IS_ERR(recovery))
- return NTDB_PTR_ERR(recovery);
- }
-
- recovery_off = create_recovery_area(ntdb, recovery_size,
- recovery);
- if (NTDB_OFF_IS_ERR(recovery_off)) {
- ntdb->free_fn(recovery, ntdb->alloc_data);
- return NTDB_OFF_TO_ERR(recovery_off);
- }
- }
-
- /* Now we know size, convert rec header. */
- recovery->magic = NTDB_RECOVERY_INVALID_MAGIC;
- recovery->len = recovery_size;
- recovery->eof = old_map_size;
- ntdb_convert(ntdb, recovery, sizeof(*recovery));
-
- /* write the recovery data to the recovery area */
- ecode = methods->twrite(ntdb, recovery_off, recovery,
- sizeof(*recovery) + recovery_size);
- if (ecode != NTDB_SUCCESS) {
- ntdb->free_fn(recovery, ntdb->alloc_data);
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_setup_recovery:"
- " failed to write recovery data");
- }
- transaction_write_existing(ntdb, recovery_off, recovery, recovery_size);
-
- ntdb->free_fn(recovery, ntdb->alloc_data);
-
- /* as we don't have ordered writes, we have to sync the recovery
- data before we update the magic to indicate that the recovery
- data is present */
- ecode = transaction_sync(ntdb, recovery_off, recovery_size);
- if (ecode != NTDB_SUCCESS)
- return ecode;
-
- magic = NTDB_RECOVERY_MAGIC;
- ntdb_convert(ntdb, &magic, sizeof(magic));
-
- ntdb->transaction->magic_offset
- = recovery_off + offsetof(struct ntdb_recovery_record, magic);
-
- ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset,
- &magic, sizeof(magic));
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_setup_recovery:"
- " failed to write recovery magic");
- }
- transaction_write_existing(ntdb, ntdb->transaction->magic_offset,
- &magic, sizeof(magic));
-
- /* ensure the recovery magic marker is on disk */
- return transaction_sync(ntdb, ntdb->transaction->magic_offset,
- sizeof(magic));
-}
-
-static enum NTDB_ERROR _ntdb_transaction_prepare_commit(struct ntdb_context *ntdb)
-{
- const struct ntdb_methods *methods;
- enum NTDB_ERROR ecode;
-
- if (ntdb->transaction == NULL) {
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_transaction_prepare_commit:"
- " no transaction");
- }
-
- if (ntdb->transaction->prepared) {
- _ntdb_transaction_cancel(ntdb);
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_transaction_prepare_commit:"
- " transaction already prepared");
- }
-
- if (ntdb->transaction->transaction_error) {
- _ntdb_transaction_cancel(ntdb);
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR,
- "ntdb_transaction_prepare_commit:"
- " transaction error pending");
- }
-
-
- if (ntdb->transaction->nesting != 0) {
- return NTDB_SUCCESS;
- }
-
- /* check for a null transaction */
- if (ntdb->transaction->blocks == NULL) {
- return NTDB_SUCCESS;
- }
-
- methods = ntdb->transaction->io_methods;
-
- /* upgrade the main transaction lock region to a write lock */
- ecode = ntdb_allrecord_upgrade(ntdb, NTDB_HASH_LOCK_START);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* get the open lock - this prevents new users attaching to the database
- during the commit */
- ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /* Sets up ntdb->transaction->recovery and
- * ntdb->transaction->magic_offset. */
- ecode = transaction_setup_recovery(ntdb);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- ntdb->transaction->prepared = true;
-
- /* expand the file to the new size if needed */
- if (ntdb->file->map_size != ntdb->transaction->old_map_size) {
- ntdb_len_t add;
-
- add = ntdb->file->map_size - ntdb->transaction->old_map_size;
- /* Restore original map size for ntdb_expand_file */
- ntdb->file->map_size = ntdb->transaction->old_map_size;
- ecode = methods->expand_file(ntdb, add);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- }
-
- /* Keep the open lock until the actual commit */
- return NTDB_SUCCESS;
-}
-
-/*
- prepare to commit the current transaction
-*/
-_PUBLIC_ enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb)
-{
- return _ntdb_transaction_prepare_commit(ntdb);
-}
-
-/*
- commit the current transaction
-*/
-_PUBLIC_ enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb)
-{
- const struct ntdb_methods *methods;
- int i;
- enum NTDB_ERROR ecode;
-
- if (ntdb->transaction == NULL) {
- return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
- "ntdb_transaction_commit:"
- " no transaction");
- }
-
- ntdb_trace(ntdb, "ntdb_transaction_commit");
-
- if (ntdb->transaction->nesting != 0) {
- ntdb->transaction->nesting--;
- return NTDB_SUCCESS;
- }
-
- if (!ntdb->transaction->prepared) {
- ecode = _ntdb_transaction_prepare_commit(ntdb);
- if (ecode != NTDB_SUCCESS) {
- _ntdb_transaction_cancel(ntdb);
- return ecode;
- }
- }
-
- /* check for a null transaction (prepare_commit may do this!) */
- if (ntdb->transaction->blocks == NULL) {
- _ntdb_transaction_cancel(ntdb);
- return NTDB_SUCCESS;
- }
-
- methods = ntdb->transaction->io_methods;
-
- /* perform all the writes */
- for (i=0;i<ntdb->transaction->num_blocks;i++) {
- ntdb_off_t offset;
- ntdb_len_t length;
-
- if (ntdb->transaction->blocks[i] == NULL) {
- continue;
- }
-
- offset = i * NTDB_PGSIZE;
- length = NTDB_PGSIZE;
-
- ecode = methods->twrite(ntdb, offset,
- ntdb->transaction->blocks[i], length);
- if (ecode != NTDB_SUCCESS) {
- /* we've overwritten part of the data and
- possibly expanded the file, so we need to
- run the crash recovery code */
- ntdb->io = methods;
- ntdb_transaction_recover(ntdb);
-
- _ntdb_transaction_cancel(ntdb);
-
- return ecode;
- }
- SAFE_FREE(ntdb, ntdb->transaction->blocks[i]);
- }
-
- SAFE_FREE(ntdb, ntdb->transaction->blocks);
- ntdb->transaction->num_blocks = 0;
-
- /* ensure the new data is on disk */
- ecode = transaction_sync(ntdb, 0, ntdb->file->map_size);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- /*
- TODO: maybe write to some dummy hdr field, or write to magic
- offset without mmap, before the last sync, instead of the
- utime() call
- */
-
- /* on some systems (like Linux 2.6.x) changes via mmap/msync
- don't change the mtime of the file, this means the file may
- not be backed up (as ntdb rounding to block sizes means that
- file size changes are quite rare too). The following forces
- mtime changes when a transaction completes */
-#if HAVE_UTIME
- utime(ntdb->name, NULL);
-#endif
-
- /* use a transaction cancel to free memory and remove the
- transaction locks: it "restores" map_size, too. */
- ntdb->transaction->old_map_size = ntdb->file->map_size;
- _ntdb_transaction_cancel(ntdb);
-
- return NTDB_SUCCESS;
-}
-
-
-/*
- recover from an aborted transaction. Must be called with exclusive
- database write access already established (including the open
- lock to prevent new processes attaching)
-*/
-enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb)
-{
- ntdb_off_t recovery_head, recovery_eof;
- unsigned char *data, *p;
- struct ntdb_recovery_record rec;
- enum NTDB_ERROR ecode;
-
- /* find the recovery area */
- recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery));
- if (NTDB_OFF_IS_ERR(recovery_head)) {
- ecode = NTDB_OFF_TO_ERR(recovery_head);
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to read recovery head");
- }
-
- if (recovery_head == 0) {
- /* we have never allocated a recovery record */
- return NTDB_SUCCESS;
- }
-
- /* read the recovery record */
- ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to read recovery record");
- }
-
- if (rec.magic != NTDB_RECOVERY_MAGIC) {
- /* there is no valid recovery data */
- return NTDB_SUCCESS;
- }
-
- if (ntdb->flags & NTDB_RDONLY) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " attempt to recover read only database");
- }
-
- recovery_eof = rec.eof;
-
- data = (unsigned char *)ntdb->alloc_fn(ntdb, rec.len, ntdb->alloc_data);
- if (data == NULL) {
- return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to allocate recovery data");
- }
-
- /* read the full recovery data */
- ecode = ntdb->io->tread(ntdb, recovery_head + sizeof(rec), data,
- rec.len);
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to read recovery data");
- }
-
- /* recover the file data */
- p = data;
- while (p+sizeof(ntdb_off_t)+sizeof(ntdb_len_t) < data + rec.len) {
- ntdb_off_t ofs;
- ntdb_len_t len;
- ntdb_convert(ntdb, p, sizeof(ofs) + sizeof(len));
- memcpy(&ofs, p, sizeof(ofs));
- memcpy(&len, p + sizeof(ofs), sizeof(len));
- p += sizeof(ofs) + sizeof(len);
-
- ecode = ntdb->io->twrite(ntdb, ofs, p, len);
- if (ecode != NTDB_SUCCESS) {
- ntdb->free_fn(data, ntdb->alloc_data);
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to recover %zu bytes"
- " at offset %zu",
- (size_t)len, (size_t)ofs);
- }
- p += len;
- }
-
- ntdb->free_fn(data, ntdb->alloc_data);
-
- ecode = transaction_sync(ntdb, 0, ntdb->file->map_size);
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to sync recovery");
- }
-
- /* if the recovery area is after the recovered eof then remove it */
- if (recovery_eof <= recovery_head) {
- ecode = ntdb_write_off(ntdb, offsetof(struct ntdb_header,
- recovery),
- 0);
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to remove recovery head");
- }
- }
-
- /* remove the recovery magic */
- ecode = ntdb_write_off(ntdb,
- recovery_head
- + offsetof(struct ntdb_recovery_record, magic),
- NTDB_RECOVERY_INVALID_MAGIC);
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to remove recovery magic");
- }
-
- ecode = transaction_sync(ntdb, 0, recovery_eof);
- if (ecode != NTDB_SUCCESS) {
- return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
- "ntdb_transaction_recover:"
- " failed to sync2 recovery");
- }
-
- ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
- "ntdb_transaction_recover: recovered %zu byte database",
- (size_t)recovery_eof);
-
- /* all done */
- return NTDB_SUCCESS;
-}
-
-ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb)
-{
- ntdb_off_t recovery_head;
- struct ntdb_recovery_record rec;
- enum NTDB_ERROR ecode;
-
- /* find the recovery area */
- recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery));
- if (NTDB_OFF_IS_ERR(recovery_head)) {
- return recovery_head;
- }
-
- if (recovery_head == 0) {
- /* we have never allocated a recovery record */
- return false;
- }
-
- /* read the recovery record */
- ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- return (rec.magic == NTDB_RECOVERY_MAGIC);
-}
+++ /dev/null
- /*
- Trivial Database 2: traverse function.
- Copyright (C) Rusty Russell 2010
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
-#include "private.h"
-#include <ccan/likely/likely.h>
-
-_PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb,
- int (*fn)(struct ntdb_context *,
- NTDB_DATA, NTDB_DATA, void *),
- void *p)
-{
- enum NTDB_ERROR ecode;
- struct hash_info h;
- NTDB_DATA k, d;
- int64_t count = 0;
-
- k.dptr = NULL;
- for (ecode = first_in_hash(ntdb, &h, &k, &d.dsize);
- ecode == NTDB_SUCCESS;
- ecode = next_in_hash(ntdb, &h, &k, &d.dsize)) {
- d.dptr = k.dptr + k.dsize;
-
- count++;
- if (fn && fn(ntdb, k, d, p)) {
- ntdb->free_fn(k.dptr, ntdb->alloc_data);
- return count;
- }
- ntdb->free_fn(k.dptr, ntdb->alloc_data);
- }
-
- if (ecode != NTDB_ERR_NOEXIST) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- return count;
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key)
-{
- struct hash_info h;
-
- return first_in_hash(ntdb, &h, key, NULL);
-}
-
-/* We lock twice, not very efficient. We could keep last key & h cached. */
-_PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key)
-{
- struct hash_info h;
- struct ntdb_used_record rec;
- ntdb_off_t off;
-
- off = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, NULL);
- ntdb->free_fn(key->dptr, ntdb->alloc_data);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
- ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
-
- /* If we found something, skip to next. */
- if (off)
- h.bucket++;
- return next_in_hash(ntdb, &h, key, NULL);
-}
-
-static int wipe_one(struct ntdb_context *ntdb,
- NTDB_DATA key, NTDB_DATA data, enum NTDB_ERROR *ecode)
-{
- *ecode = ntdb_delete(ntdb, key);
- return (*ecode != NTDB_SUCCESS);
-}
-
-_PUBLIC_ enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb)
-{
- enum NTDB_ERROR ecode;
- int64_t count;
-
- ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
- if (ecode != NTDB_SUCCESS)
- return ecode;
-
- /* FIXME: Be smarter. */
- count = ntdb_traverse(ntdb, wipe_one, &ecode);
- if (count < 0)
- ecode = NTDB_OFF_TO_ERR(count);
- ntdb_allrecord_unlock(ntdb, F_WRLCK);
- return ecode;
-}
+++ /dev/null
-#!/usr/bin/env python
-
-APPNAME = 'ntdb'
-VERSION = '1.0'
-
-blddir = 'bin'
-
-import sys, os
-
-# find the buildtools directory
-srcdir = '.'
-while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5:
- srcdir = srcdir + '/..'
-sys.path.insert(0, srcdir + '/buildtools/wafsamba')
-
-import wafsamba, samba_dist, Options, Logs, glob
-
-samba_dist.DIST_DIRS('lib/ntdb:. lib/replace:lib/replace lib/ccan:lib/ccan buildtools:buildtools')
-
-def set_options(opt):
- opt.BUILTIN_DEFAULT('replace,ccan')
- opt.PRIVATE_EXTENSION_DEFAULT('ntdb', noextension='ntdb')
- opt.RECURSE('lib/replace')
- opt.add_option('--valgrind',
- help=("use valgrind on tests programs"),
- action="store_true", dest='VALGRIND', default=False)
- opt.add_option('--valgrind-log',
- help=("where to put the valgrind log"),
- action="store", dest='VALGRINDLOG', default=None)
-
- if opt.IN_LAUNCH_DIR():
- opt.add_option('--disable-python',
- help=("disable the pyntdb module"),
- action="store_true", dest='disable_python', default=False)
-
-def configure(conf):
- conf.RECURSE('lib/replace')
- conf.RECURSE('lib/ccan')
-
- conf.env.NTDB_TEST_RUN_SRC=['test/run-001-encode.c',
- 'test/run-001-fls.c',
- 'test/run-01-new_database.c',
- 'test/run-02-expand.c',
- 'test/run-03-coalesce.c',
- 'test/run-04-basichash.c',
- 'test/run-05-readonly-open.c',
- 'test/run-10-simple-store.c',
- 'test/run-11-simple-fetch.c',
- 'test/run-12-check.c',
- 'test/run-15-append.c',
- 'test/run-25-hashoverload.c',
- 'test/run-30-exhaust-before-expand.c',
- 'test/run-35-convert.c',
- 'test/run-50-multiple-freelists.c',
- 'test/run-56-open-during-transaction.c',
- 'test/run-57-die-during-transaction.c',
- 'test/run-64-bit-tdb.c',
- 'test/run-90-get-set-attributes.c',
- 'test/run-capabilities.c',
- 'test/run-expand-in-transaction.c',
- 'test/run-features.c',
- 'test/run-lockall.c',
- 'test/run-remap-in-read_traverse.c',
- 'test/run-seed.c',
- 'test/run-tdb_errorstr.c',
- 'test/run-tdb_foreach.c',
- 'test/run-traverse.c']
- conf.env.NTDB_TEST_API_SRC=['test/api-12-store.c',
- 'test/api-13-delete.c',
- 'test/api-14-exists.c',
- 'test/api-16-wipe_all.c',
- 'test/api-20-alloc-attr.c',
- 'test/api-21-parse_record.c',
- 'test/api-55-transaction.c',
- 'test/api-60-noop-transaction.c',
- 'test/api-80-tdb_fd.c',
- 'test/api-81-seqnum.c',
- 'test/api-82-lockattr.c',
- 'test/api-83-openhook.c',
- 'test/api-91-get-stats.c',
- 'test/api-92-get-set-readonly.c',
- 'test/api-93-repack.c',
- 'test/api-94-expand-during-parse.c',
- 'test/api-95-read-only-during-parse.c',
- 'test/api-add-remove-flags.c',
- 'test/api-check-callback.c',
- 'test/api-firstkey-nextkey.c',
- 'test/api-fork-test.c',
- 'test/api-locktimeout.c',
- 'test/api-missing-entries.c',
- 'test/api-open-multiple-times.c',
- 'test/api-record-expand.c',
- 'test/api-simple-delete.c',
- 'test/api-summary.c']
- conf.env.NTDB_TEST_API_PY=['test/python-api.py']
- conf.env.NTDB_TEST_API_HELPER_SRC=['test/helpapi-external-agent.c']
- conf.env.NTDB_TEST_RUN_HELPER_SRC=['test/helprun-external-agent.c',
- 'test/helprun-layout.c']
- conf.env.NTDB_TEST_HELPER_SRC=['test/external-agent.c',
- 'test/failtest_helper.c',
- 'test/lock-tracking.c',
- 'test/logging.c',
- 'test/tap-interface.c']
-
- conf.env.standalone_ntdb = conf.IN_LAUNCH_DIR()
- conf.env.disable_python = getattr(Options.options, 'disable_python', False)
-
- if not conf.env.standalone_ntdb:
- if conf.CHECK_BUNDLED_SYSTEM('ntdb', minversion=VERSION,
- implied_deps='replace'):
- conf.define('USING_SYSTEM_NTDB', 1)
- if conf.CHECK_BUNDLED_SYSTEM_PYTHON('pyntdb', 'ntdb', minversion=VERSION):
- conf.define('USING_SYSTEM_PYNTDB', 1)
-
- if not conf.env.disable_python:
- # also disable if we don't have the python libs installed
- conf.find_program('python', var='PYTHON')
- conf.check_tool('python')
- conf.check_python_version((2,4,2))
- conf.SAMBA_CHECK_PYTHON_HEADERS(mandatory=False)
- if not conf.env.HAVE_PYTHON_H:
- Logs.warn('Disabling pyntdb as python devel libs not found')
- conf.env.disable_python = True
-
- conf.CHECK_XSLTPROC_MANPAGES()
-
- # This make #include <ccan/...> work.
- conf.ADD_EXTRA_INCLUDES('''#lib''')
-
- conf.SAMBA_CONFIG_H()
-
-def build(bld):
- bld.RECURSE('lib/replace')
- bld.RECURSE('lib/ccan')
-
- if bld.env.standalone_ntdb:
- bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig'
- private_library = False
- else:
- private_library = True
-
- SRC = '''check.c free.c hash.c io.c lock.c open.c
- summary.c ntdb.c transaction.c traverse.c'''
-
- if not bld.CONFIG_SET('USING_SYSTEM_NTDB'):
- NTDB_CCAN='ccan-likely ccan-ilog ccan-hash ccan-tally'
- bld.SAMBA_LIBRARY('ntdb',
- SRC,
- deps='replace ' + NTDB_CCAN ,
- includes='.',
- abi_directory='ABI',
- abi_match='ntdb_*',
- hide_symbols=True,
- vnum=VERSION,
- public_headers='ntdb.h',
- public_headers_install=not private_library,
- pc_files='ntdb.pc',
- private_library=private_library,
- manpages='man/ntdb.3')
-
- bld.SAMBA_BINARY('ntdbtorture',
- 'tools/ntdbtorture.c',
- deps='ntdb ccan-err',
- install=False)
-
- bld.SAMBA_BINARY('ntdbtool',
- 'tools/ntdbtool.c',
- deps='ntdb', manpages='man/ntdbtool.8')
-
- bld.SAMBA_BINARY('ntdbdump',
- 'tools/ntdbdump.c',
- deps='ntdb', manpages='man/ntdbdump.8')
-
- bld.SAMBA_BINARY('ntdbrestore',
- 'tools/ntdbrestore.c',
- deps='ntdb', manpages='man/ntdbrestore.8')
-
- bld.SAMBA_BINARY('ntdbbackup',
- 'tools/ntdbbackup.c',
- deps='ntdb', manpages='man/ntdbbackup.8')
-
- if bld.env.DEVELOPER_MODE:
- # FIXME: We need CCAN for some API tests, but waf thinks it's
- # already available via ntdb. It is, but not publicly.
- # Workaround is to build a private, non-hiding version.
- bld.SAMBA_SUBSYSTEM('ntdb-testing',
- SRC,
- deps='replace ' + NTDB_CCAN,
- includes='.')
-
- bld.SAMBA_SUBSYSTEM('ntdb-test-helpers',
- bld.env.NTDB_TEST_HELPER_SRC,
- deps='replace',
- allow_warnings=True)
- bld.SAMBA_SUBSYSTEM('ntdb-run-helpers',
- bld.env.NTDB_TEST_RUN_HELPER_SRC,
- deps='replace')
- bld.SAMBA_SUBSYSTEM('ntdb-api-helpers',
- bld.env.NTDB_TEST_API_HELPER_SRC,
- deps='replace')
-
- for f in bld.env.NTDB_TEST_RUN_SRC:
- base = os.path.splitext(os.path.basename(f))[0]
- bld.SAMBA_BINARY('ntdb-' + base, f,
- deps=NTDB_CCAN + ' ccan-failtest ntdb-test-helpers ntdb-run-helpers',
- install=False)
-
- for f in bld.env.NTDB_TEST_API_SRC:
- base = os.path.splitext(os.path.basename(f))[0]
- bld.SAMBA_BINARY('ntdb-' + base, f,
- deps='ntdb-test-helpers ntdb-api-helpers ntdb-testing',
- install=False)
-
- if not bld.CONFIG_SET('USING_SYSTEM_PYNTDB'):
- bld.SAMBA_PYTHON('pyntdb',
- source='pyntdb.c',
- deps='ntdb',
- enabled=not bld.env.disable_python,
- realname='ntdb.so',
- cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION)
-
-def testonly(ctx):
- '''run ntdb testsuite'''
- import Utils, samba_utils, shutil
- ecode = 0;
-
- env = samba_utils.LOAD_ENVIRONMENT()
-
- if env.standalone_ntdb:
- # FIXME: This is horrible :(
- test_prefix = "%s/st" % (Utils.g_module.blddir)
- shutil.rmtree(test_prefix, ignore_errors=True)
- os.makedirs(test_prefix)
-
- # Create scratch directory for tests.
- testdir = os.path.join(test_prefix, 'ntdb-tests')
- samba_utils.mkdir_p(testdir)
- # Symlink back to source dir so it can find tests in test/
- link = os.path.join(testdir, 'test')
- if not os.path.exists(link):
- os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link)
-
- if env.options['VALGRIND']:
- os.environ['VALGRIND'] = 'valgrind -q --num-callers=30 --error-exitcode=11'
- if env.options['VALGRINDLOG']:
- os.environ['VALGRIND'] += ' --log-file=%s' % Options.options.VALGRINDLOG
-
- for f in env.NTDB_TEST_RUN_SRC + env.NTDB_TEST_API_SRC:
- name = "ntdb-" + os.path.splitext(os.path.basename(f))[0]
- cmd = "cd " + testdir + " && $VALGRIND " + os.path.abspath(os.path.join(Utils.g_module.blddir, name)) + " > test-output 2>&1"
- print("..." + f)
- ret = samba_utils.RUN_COMMAND(cmd)
- if ret != 0:
- print("%s (%s) failed:" % (name, f))
- samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output'))
- ecode = ret;
- break;
- if not env.disable_python:
- for f in env.NTDB_TEST_API_PY:
- print("..." + f)
- cmd = "cd " + testdir + " && PYTHONPATH=%s %s %s > test-output 2>&1" % (
- os.path.abspath(os.path.join(Utils.g_module.blddir, "python")),
- env["PYTHON"], os.path.abspath(f))
- ret = samba_utils.RUN_COMMAND(cmd)
- if ret != 0:
- print("%s (%s) failed:" % (name, f))
- samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output'))
- ecode = ret
- break
-
- sys.exit(ecode)
-
-# WAF doesn't build the unit tests for this, maybe because they don't link with ntdb?
-# This forces it
-def test(ctx):
- import Scripting
- Scripting.commands.append('build')
- Scripting.commands.append('testonly')
-
-def dist():
- '''makes a tarball for distribution'''
- samba_dist.dist()
-
-def reconfigure(ctx):
- '''reconfigure if config scripts have changed'''
- import samba_utils
- samba_utils.reconfigure(ctx)
--- /dev/null
+ntdb_add_flag: void (struct ntdb_context *, unsigned int)
+ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA)
+ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA)
+ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA)
+ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_close: int (struct ntdb_context *)
+ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_errorstr: const char *(enum NTDB_ERROR)
+ntdb_exists: bool (struct ntdb_context *, NTDB_DATA)
+ntdb_fd: int (const struct ntdb_context *)
+ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *)
+ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
+ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *)
+ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *)
+ntdb_get_flags: unsigned int (struct ntdb_context *)
+ntdb_get_seqnum: int64_t (struct ntdb_context *)
+ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_name: const char *(const struct ntdb_context *)
+ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
+ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *)
+ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_remove_flag: void (struct ntdb_context *, unsigned int)
+ntdb_repack: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *)
+ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int)
+ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **)
+ntdb_transaction_cancel: void (struct ntdb_context *)
+ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_unlockall: void (struct ntdb_context *)
+ntdb_unlockall_read: void (struct ntdb_context *)
+ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type)
+ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *)
--- /dev/null
+ntdb_add_flag: void (struct ntdb_context *, unsigned int)
+ntdb_append: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA)
+ntdb_chainlock: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_chainlock_read: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_chainunlock: void (struct ntdb_context *, NTDB_DATA)
+ntdb_chainunlock_read: void (struct ntdb_context *, NTDB_DATA)
+ntdb_check_: enum NTDB_ERROR (struct ntdb_context *, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_close: int (struct ntdb_context *)
+ntdb_delete: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA)
+ntdb_errorstr: const char *(enum NTDB_ERROR)
+ntdb_exists: bool (struct ntdb_context *, NTDB_DATA)
+ntdb_fd: int (const struct ntdb_context *)
+ntdb_fetch: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA *)
+ntdb_firstkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
+ntdb_foreach_: void (int (*)(struct ntdb_context *, void *), void *)
+ntdb_get_attribute: enum NTDB_ERROR (struct ntdb_context *, union ntdb_attribute *)
+ntdb_get_flags: unsigned int (struct ntdb_context *)
+ntdb_get_seqnum: int64_t (struct ntdb_context *)
+ntdb_lockall: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_lockall_read: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_name: const char *(const struct ntdb_context *)
+ntdb_nextkey: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA *)
+ntdb_open: struct ntdb_context *(const char *, int, int, mode_t, union ntdb_attribute *)
+ntdb_parse_record_: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, enum NTDB_ERROR (*)(NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_remove_flag: void (struct ntdb_context *, unsigned int)
+ntdb_repack: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_set_attribute: enum NTDB_ERROR (struct ntdb_context *, const union ntdb_attribute *)
+ntdb_store: enum NTDB_ERROR (struct ntdb_context *, NTDB_DATA, NTDB_DATA, int)
+ntdb_summary: enum NTDB_ERROR (struct ntdb_context *, enum ntdb_summary_flags, char **)
+ntdb_transaction_cancel: void (struct ntdb_context *)
+ntdb_transaction_commit: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_transaction_prepare_commit: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_transaction_start: enum NTDB_ERROR (struct ntdb_context *)
+ntdb_traverse_: int64_t (struct ntdb_context *, int (*)(struct ntdb_context *, NTDB_DATA, NTDB_DATA, void *), void *)
+ntdb_unlockall: void (struct ntdb_context *)
+ntdb_unlockall_read: void (struct ntdb_context *)
+ntdb_unset_attribute: void (struct ntdb_context *, enum ntdb_attribute_type)
+ntdb_wipe_all: enum NTDB_ERROR (struct ntdb_context *)
--- /dev/null
+../../licenses/LGPL-3
\ No newline at end of file
--- /dev/null
+CC=gcc
+CFLAGS=-g -O0 -Wall -W -I../../ -I./
+LIBS=
+
+LIBNTDB_OBJ = ccan_hash.o ccan_tally.o check.o free.o hash.o io.o lock.o open.o summary.o ntdb.o transaction.o traverse.o
+
+all: ntdbtorture ntdbtool ntdbdump ntdbrestore ntdbbackup
+
+ntdbtorture: tools/ntdbtorture.c libntdb.a
+ $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
+
+ntdbtool: tools/ntdbtool.c libntdb.a
+ $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
+
+ntdbdump: tools/ntdbdump.c libntdb.a
+ $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
+
+ntdbrestore: tools/ntdbrestore.c libntdb.a
+ $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
+
+ntdbbackup: tools/ntdbbackup.c libntdb.a
+ $(CC) $(CFLAGS) -o tools/$@ tools/$@.c libntdb.a $(LIBS)
+
+libntdb.a: $(LIBNTDB_OBJ)
+ @echo Creating library $@
+ ar r libntdb.a $(LIBNTDB_OBJ)
+ ranlib libntdb.a
+
+check.o: check.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c check.c -o $@
+
+free.o: free.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c free.c -o $@
+
+hash.o: hash.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c hash.c -o $@
+
+io.o: io.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c io.c -o $@
+
+lock.o: lock.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c lock.c -o $@
+
+open.o: open.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c open.c -o $@
+
+summary.o: summary.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c summary.c -o $@
+
+ntdb.o: ntdb.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c ntdb.c -o $@
+
+transaction.o: transaction.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c transaction.c -o $@
+
+traverse.o: traverse.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c traverse.c -o $@
+
+ccan_hash.o: ../hash/hash.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c ../hash/hash.c -o $@
+
+ccan_tally.o: ../tally/tally.c
+ @echo Compiling $@
+ $(CC) $(CFLAGS) -c ../tally/tally.c -o $@
+
+clean:
+ rm -f *.o
+ rm -f *.a
+ rm -f tools/ntdbtorture tools/ntdbtool tools/ntdbdump tools/ntdbrestore tools/ntdbbackup
--- /dev/null
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * ntdb - Next Generation Trivial Database
+ *
+ * This package provides an experimental persistent keyword/data store.
+ * Its main advantage over tdb is that it's 64-bit.
+ *
+ * Example:
+ * #include <stdio.h>
+ * #include <err.h>
+ * #include <unistd.h>
+ * #include <ccan/ntdb/ntdb.h>
+ *
+ * int main(int argc, char *argv[])
+ * {
+ * NTDB_DATA key = ntdb_mkdata("key", 3);
+ * NTDB_DATA val = ntdb_mkdata("val", 3);
+ * struct ntdb_context *ntdb;
+ *
+ * ntdb = ntdb_open("example.ntdb", NTDB_DEFAULT,
+ * O_RDWR | O_CREAT | O_TRUNC, 0600, NULL);
+ * if (ntdb == NULL)
+ * errx(1, "failed to open database file");
+ *
+ * ntdb_store(ntdb, key, val, NTDB_INSERT);
+ *
+ * ntdb_close(ntdb);
+ *
+ * return 0;
+ * }
+ *
+ * License: LGPL (v3 or any later version)
+ * Authors: Rusty Russell
+ * Andrew Tridgell
+ * Jeremy Allison
+ * Jelmer Vernooij
+ * Volker Lendecke
+ * Andrew Esh
+ * Simon McVittie
+ * Tim Potter
+ * Maintainer: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+ if (argc != 2)
+ return 1;
+
+ if (strcmp(argv[1], "depends") == 0) {
+ printf("ccan/asearch\n");
+ printf("ccan/build_assert\n");
+ printf("ccan/cast\n");
+ printf("ccan/compiler\n");
+ printf("ccan/endian\n");
+ printf("ccan/hash\n");
+ printf("ccan/ilog\n");
+ printf("ccan/likely\n");
+ printf("ccan/tally\n");
+ printf("ccan/typesafe_cb\n");
+ return 0;
+ }
+
+ if (strcmp(argv[1], "testdepends") == 0) {
+ printf("ccan/failtest\n");
+ printf("ccan/err\n");
+ return 0;
+ }
+
+ return 1;
+}
--- /dev/null
+ /*
+ Trivial Database 2: free list/block handling
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+#include <ccan/asearch/asearch.h>
+
+/* We keep an ordered array of offsets. */
+static bool append(struct ntdb_context *ntdb,
+ ntdb_off_t **arr, size_t *num, ntdb_off_t off)
+{
+ ntdb_off_t *new;
+
+ if (*num == 0) {
+ new = ntdb->alloc_fn(ntdb, sizeof(ntdb_off_t), ntdb->alloc_data);
+ } else {
+ new = ntdb->expand_fn(*arr, (*num + 1) * sizeof(ntdb_off_t),
+ ntdb->alloc_data);
+ }
+ if (!new)
+ return false;
+ new[(*num)++] = off;
+ *arr = new;
+ return true;
+}
+
+static enum NTDB_ERROR check_header(struct ntdb_context *ntdb,
+ ntdb_off_t *recovery,
+ uint64_t *features,
+ size_t *num_capabilities)
+{
+ uint64_t hash_test;
+ struct ntdb_header hdr;
+ enum NTDB_ERROR ecode;
+ ntdb_off_t off, next;
+
+ ecode = ntdb_read_convert(ntdb, 0, &hdr, sizeof(hdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ /* magic food should not be converted, so convert back. */
+ ntdb_convert(ntdb, hdr.magic_food, sizeof(hdr.magic_food));
+
+ hash_test = NTDB_HASH_MAGIC;
+ hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
+ if (hdr.hash_test != hash_test) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "check: hash test %llu should be %llu",
+ (long long)hdr.hash_test,
+ (long long)hash_test);
+ }
+
+ if (strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "check: bad magic '%.*s'",
+ (unsigned)sizeof(hdr.magic_food),
+ hdr.magic_food);
+ }
+
+ /* Features which are used must be a subset of features offered. */
+ if (hdr.features_used & ~hdr.features_offered) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "check: features used (0x%llx) which"
+ " are not offered (0x%llx)",
+ (long long)hdr.features_used,
+ (long long)hdr.features_offered);
+ }
+
+ *features = hdr.features_offered;
+ *recovery = hdr.recovery;
+ if (*recovery) {
+ if (*recovery < sizeof(hdr)
+ || *recovery > ntdb->file->map_size) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check:"
+ " invalid recovery offset %zu",
+ (size_t)*recovery);
+ }
+ }
+
+ for (off = hdr.capabilities; off && ecode == NTDB_SUCCESS; off = next) {
+ const struct ntdb_capability *cap;
+ enum NTDB_ERROR e;
+
+ cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
+ if (NTDB_PTR_IS_ERR(cap)) {
+ return NTDB_PTR_ERR(cap);
+ }
+
+ /* All capabilities are unknown. */
+ e = unknown_capability(ntdb, "ntdb_check", cap->type);
+ next = cap->next;
+ ntdb_access_release(ntdb, cap);
+ if (e)
+ return e;
+ (*num_capabilities)++;
+ }
+
+ /* Don't check reserved: they *can* be used later. */
+ return NTDB_SUCCESS;
+}
+
+static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b, void *ctx)
+{
+ /* Can overflow an int. */
+ return *a > *b ? 1
+ : *a < *b ? -1
+ : 0;
+}
+
+static enum NTDB_ERROR check_entry(struct ntdb_context *ntdb,
+ ntdb_off_t off_and_hash,
+ ntdb_len_t bucket,
+ ntdb_off_t used[],
+ size_t num_used,
+ size_t *num_found,
+ enum NTDB_ERROR (*check)(NTDB_DATA,
+ NTDB_DATA,
+ void *),
+ void *data)
+{
+ enum NTDB_ERROR ecode;
+ const struct ntdb_used_record *r;
+ const unsigned char *kptr;
+ ntdb_len_t klen, dlen;
+ uint32_t hash;
+ ntdb_off_t off = off_and_hash & NTDB_OFF_MASK;
+ ntdb_off_t *p;
+
+ /* Empty bucket is fine. */
+ if (!off_and_hash) {
+ return NTDB_SUCCESS;
+ }
+
+ /* This can't point to a chain, we handled those at toplevel. */
+ if (off_and_hash & (1ULL << NTDB_OFF_CHAIN_BIT)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Invalid chain bit in offset "
+ " %llu", (long long)off_and_hash);
+ }
+
+ p = asearch(&off, used, num_used, off_cmp, NULL);
+ if (!p) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Invalid offset"
+ " %llu in hash", (long long)off);
+ }
+ /* Mark it invalid. */
+ *p ^= 1;
+ (*num_found)++;
+
+ r = ntdb_access_read(ntdb, off, sizeof(*r), true);
+ if (NTDB_PTR_IS_ERR(r)) {
+ return NTDB_PTR_ERR(r);
+ }
+ klen = rec_key_length(r);
+ dlen = rec_data_length(r);
+ ntdb_access_release(ntdb, r);
+
+ kptr = ntdb_access_read(ntdb, off + sizeof(*r), klen + dlen, false);
+ if (NTDB_PTR_IS_ERR(kptr)) {
+ return NTDB_PTR_ERR(kptr);
+ }
+
+ hash = ntdb_hash(ntdb, kptr, klen);
+
+ /* Are we in the right chain? */
+ if (bits_from(hash, 0, ntdb->hash_bits) != bucket) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: Bad bucket %u vs %llu",
+ bits_from(hash, 0, ntdb->hash_bits),
+ (long long)bucket);
+ /* Next 8 bits should be the same as top bits of bucket. */
+ } else if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
+ != bits_from(off_and_hash, 64-NTDB_OFF_UPPER_STEAL,
+ NTDB_OFF_UPPER_STEAL)) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash bits %llu vs %llu",
+ (long long)off_and_hash,
+ (long long)hash);
+ } else if (check) {
+ NTDB_DATA k, d;
+
+ k = ntdb_mkdata(kptr, klen);
+ d = ntdb_mkdata(kptr + klen, dlen);
+ ecode = check(k, d, data);
+ } else {
+ ecode = NTDB_SUCCESS;
+ }
+ ntdb_access_release(ntdb, kptr);
+
+ return ecode;
+}
+
+static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb,
+ ntdb_off_t off,
+ ntdb_len_t bucket,
+ ntdb_off_t used[],
+ size_t num_used,
+ size_t *num_found,
+ enum NTDB_ERROR (*check)(NTDB_DATA,
+ NTDB_DATA,
+ void *),
+ void *data)
+{
+ struct ntdb_used_record rec;
+ enum NTDB_ERROR ecode;
+ const ntdb_off_t *entries;
+ ntdb_len_t i, num;
+
+ /* This is a used entry. */
+ (*num_found)++;
+
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash chain magic %llu",
+ (long long)rec_magic(&rec));
+ }
+
+ if (rec_data_length(&rec) % sizeof(ntdb_off_t)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash chain data length %llu",
+ (long long)rec_data_length(&rec));
+ }
+
+ if (rec_key_length(&rec) != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash chain key length %llu",
+ (long long)rec_key_length(&rec));
+ }
+
+ off += sizeof(rec);
+ num = rec_data_length(&rec) / sizeof(ntdb_off_t);
+ entries = ntdb_access_read(ntdb, off, rec_data_length(&rec), true);
+ if (NTDB_PTR_IS_ERR(entries)) {
+ return NTDB_PTR_ERR(entries);
+ }
+
+ /* Check each non-deleted entry in chain. */
+ for (i = 0; i < num; i++) {
+ ecode = check_entry(ntdb, entries[i], bucket,
+ used, num_used, num_found, check, data);
+ if (ecode) {
+ break;
+ }
+ }
+
+ ntdb_access_release(ntdb, entries);
+ return ecode;
+}
+
+static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb,
+ ntdb_off_t used[],
+ size_t num_used,
+ size_t num_other_used,
+ enum NTDB_ERROR (*check)(NTDB_DATA,
+ NTDB_DATA,
+ void *),
+ void *data)
+{
+ enum NTDB_ERROR ecode;
+ struct ntdb_used_record rec;
+ const ntdb_off_t *entries;
+ ntdb_len_t i;
+ /* Free tables and capabilities also show up as used, as do we. */
+ size_t num_found = num_other_used + 1;
+
+ ecode = ntdb_read_convert(ntdb, NTDB_HASH_OFFSET, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash table magic %llu",
+ (long long)rec_magic(&rec));
+ }
+
+ if (rec_data_length(&rec) != (sizeof(ntdb_off_t) << ntdb->hash_bits)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash table data length %llu",
+ (long long)rec_data_length(&rec));
+ }
+
+ if (rec_key_length(&rec) != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash table key length %llu",
+ (long long)rec_key_length(&rec));
+ }
+
+ entries = ntdb_access_read(ntdb, NTDB_HASH_OFFSET + sizeof(rec),
+ rec_data_length(&rec), true);
+ if (NTDB_PTR_IS_ERR(entries)) {
+ return NTDB_PTR_ERR(entries);
+ }
+
+ for (i = 0; i < (1 << ntdb->hash_bits); i++) {
+ ntdb_off_t off = entries[i] & NTDB_OFF_MASK;
+ if (entries[i] & (1ULL << NTDB_OFF_CHAIN_BIT)) {
+ ecode = check_hash_chain(ntdb, off, i,
+ used, num_used, &num_found,
+ check, data);
+ } else {
+ ecode = check_entry(ntdb, entries[i], i,
+ used, num_used, &num_found,
+ check, data);
+ }
+ if (ecode) {
+ break;
+ }
+ }
+ ntdb_access_release(ntdb, entries);
+
+ if (ecode == NTDB_SUCCESS && num_found != num_used) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Not all entries are in hash");
+ }
+ return ecode;
+}
+
+static enum NTDB_ERROR check_free(struct ntdb_context *ntdb,
+ ntdb_off_t off,
+ const struct ntdb_free_record *frec,
+ ntdb_off_t prev, unsigned int ftable,
+ unsigned int bucket)
+{
+ enum NTDB_ERROR ecode;
+
+ if (frec_magic(frec) != NTDB_FREE_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: offset %llu bad magic 0x%llx",
+ (long long)off,
+ (long long)frec->magic_and_prev);
+ }
+ if (frec_ftable(frec) != ftable) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: offset %llu bad freetable %u",
+ (long long)off, frec_ftable(frec));
+
+ }
+
+ ecode = ntdb_oob(ntdb, off,
+ frec_len(frec) + sizeof(struct ntdb_used_record),
+ false);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ if (size_to_bucket(frec_len(frec)) != bucket) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: offset %llu in wrong bucket"
+ " (%u vs %u)",
+ (long long)off,
+ bucket, size_to_bucket(frec_len(frec)));
+ }
+ if (prev && prev != frec_prev(frec)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: offset %llu bad prev"
+ " (%llu vs %llu)",
+ (long long)off,
+ (long long)prev, (long long)frec_len(frec));
+ }
+ return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb,
+ ntdb_off_t ftable_off,
+ unsigned ftable_num,
+ ntdb_off_t fr[],
+ size_t num_free,
+ size_t *num_found)
+{
+ struct ntdb_freetable ft;
+ ntdb_off_t h;
+ unsigned int i;
+ enum NTDB_ERROR ecode;
+
+ ecode = ntdb_read_convert(ntdb, ftable_off, &ft, sizeof(ft));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC
+ || rec_key_length(&ft.hdr) != 0
+ || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Invalid header on free table");
+ }
+
+ for (i = 0; i < NTDB_FREE_BUCKETS; i++) {
+ ntdb_off_t off, prev = 0, *p, first = 0;
+ struct ntdb_free_record f;
+
+ h = bucket_off(ftable_off, i);
+ for (off = ntdb_read_off(ntdb, h); off; off = f.next) {
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+ if (!first) {
+ off &= NTDB_OFF_MASK;
+ first = off;
+ }
+ ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ ecode = check_free(ntdb, off, &f, prev, ftable_num, i);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* FIXME: Check hash bits */
+ p = asearch(&off, fr, num_free, off_cmp, NULL);
+ if (!p) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: Invalid offset"
+ " %llu in free table",
+ (long long)off);
+ }
+ /* Mark it invalid. */
+ *p ^= 1;
+ (*num_found)++;
+ prev = off;
+ }
+
+ if (first) {
+ /* Now we can check first back pointer. */
+ ecode = ntdb_read_convert(ntdb, first, &f, sizeof(f));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ ecode = check_free(ntdb, first, &f, prev, ftable_num, i);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ }
+ }
+ return NTDB_SUCCESS;
+}
+
+/* Slow, but should be very rare. */
+ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off)
+{
+ size_t len;
+ enum NTDB_ERROR ecode;
+
+ for (len = 0; off + len < ntdb->file->map_size; len++) {
+ char c;
+ ecode = ntdb->io->tread(ntdb, off, &c, 1);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ if (c != 0 && c != 0x43)
+ break;
+ }
+ return len;
+}
+
+static enum NTDB_ERROR check_linear(struct ntdb_context *ntdb,
+ ntdb_off_t **used, size_t *num_used,
+ ntdb_off_t **fr, size_t *num_free,
+ uint64_t features, ntdb_off_t recovery)
+{
+ ntdb_off_t off;
+ ntdb_len_t len;
+ enum NTDB_ERROR ecode;
+ bool found_recovery = false;
+
+ for (off = sizeof(struct ntdb_header);
+ off < ntdb->file->map_size;
+ off += len) {
+ union {
+ struct ntdb_used_record u;
+ struct ntdb_free_record f;
+ struct ntdb_recovery_record r;
+ } rec;
+ /* r is larger: only get that if we need to. */
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.f));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* If we crash after ftruncate, we can get zeroes or fill. */
+ if (rec.r.magic == NTDB_RECOVERY_INVALID_MAGIC
+ || rec.r.magic == 0x4343434343434343ULL) {
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ if (recovery == off) {
+ found_recovery = true;
+ len = sizeof(rec.r) + rec.r.max_len;
+ } else {
+ len = dead_space(ntdb, off);
+ if (NTDB_OFF_IS_ERR(len)) {
+ return NTDB_OFF_TO_ERR(len);
+ }
+ if (len < sizeof(rec.r)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: invalid"
+ " dead space at %zu",
+ (size_t)off);
+ }
+
+ ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+ "Dead space at %zu-%zu (of %zu)",
+ (size_t)off, (size_t)(off + len),
+ (size_t)ntdb->file->map_size);
+ }
+ } else if (rec.r.magic == NTDB_RECOVERY_MAGIC) {
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec.r));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ if (recovery != off) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: unexpected"
+ " recovery record at offset"
+ " %zu",
+ (size_t)off);
+ }
+ if (rec.r.len > rec.r.max_len) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: invalid recovery"
+ " length %zu",
+ (size_t)rec.r.len);
+ }
+ if (rec.r.eof > ntdb->file->map_size) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: invalid old EOF"
+ " %zu", (size_t)rec.r.eof);
+ }
+ found_recovery = true;
+ len = sizeof(rec.r) + rec.r.max_len;
+ } else if (frec_magic(&rec.f) == NTDB_FREE_MAGIC) {
+ len = sizeof(rec.u) + frec_len(&rec.f);
+ if (off + len > ntdb->file->map_size) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: free overlength"
+ " %llu at offset %llu",
+ (long long)len,
+ (long long)off);
+ }
+ /* This record should be in free lists. */
+ if (frec_ftable(&rec.f) != NTDB_FTABLE_NONE
+ && !append(ntdb, fr, num_free, off)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM,
+ NTDB_LOG_ERROR,
+ "ntdb_check: tracking %zu'th"
+ " free record.", *num_free);
+ }
+ } else if (rec_magic(&rec.u) == NTDB_USED_MAGIC
+ || rec_magic(&rec.u) == NTDB_CHAIN_MAGIC
+ || rec_magic(&rec.u) == NTDB_HTABLE_MAGIC
+ || rec_magic(&rec.u) == NTDB_FTABLE_MAGIC
+ || rec_magic(&rec.u) == NTDB_CAP_MAGIC) {
+ uint64_t klen, dlen, extra;
+
+ /* This record is used! */
+ if (!append(ntdb, used, num_used, off)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM,
+ NTDB_LOG_ERROR,
+ "ntdb_check: tracking %zu'th"
+ " used record.", *num_used);
+ }
+
+ klen = rec_key_length(&rec.u);
+ dlen = rec_data_length(&rec.u);
+ extra = rec_extra_padding(&rec.u);
+
+ len = sizeof(rec.u) + klen + dlen + extra;
+ if (off + len > ntdb->file->map_size) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: used overlength"
+ " %llu at offset %llu",
+ (long long)len,
+ (long long)off);
+ }
+
+ if (len < sizeof(rec.f)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: too short record"
+ " %llu at %llu",
+ (long long)len,
+ (long long)off);
+ }
+
+ /* Check that records have correct 0 at end (but may
+ * not in future). */
+ if (extra && !features
+ && rec_magic(&rec.u) != NTDB_CAP_MAGIC) {
+ const char *p;
+ char c;
+ p = ntdb_access_read(ntdb, off + sizeof(rec.u)
+ + klen + dlen, 1, false);
+ if (NTDB_PTR_IS_ERR(p))
+ return NTDB_PTR_ERR(p);
+ c = *p;
+ ntdb_access_release(ntdb, p);
+
+ if (c != '\0') {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check:"
+ " non-zero extra"
+ " at %llu",
+ (long long)off);
+ }
+ }
+ } else {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: Bad magic 0x%llx"
+ " at offset %zu",
+ (long long)rec_magic(&rec.u),
+ (size_t)off);
+ }
+ }
+
+ /* We must have found recovery area if there was one. */
+ if (recovery != 0 && !found_recovery) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: expected a recovery area at %zu",
+ (size_t)recovery);
+ }
+
+ return NTDB_SUCCESS;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb,
+ enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *),
+ void *data)
+{
+ ntdb_off_t *fr = NULL, *used = NULL;
+ ntdb_off_t ft = 0, recovery = 0;
+ size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0,
+ num_capabilities = 0;
+ uint64_t features = 0;
+ enum NTDB_ERROR ecode;
+
+ if (ntdb->flags & NTDB_CANT_CHECK) {
+ return ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+ "ntdb_check: database has unknown capability,"
+ " cannot check.");
+ }
+
+ ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ ecode = ntdb_lock_expand(ntdb, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_allrecord_unlock(ntdb, F_RDLCK);
+ return ecode;
+ }
+
+ ecode = check_header(ntdb, &recovery, &features, &num_capabilities);
+ if (ecode != NTDB_SUCCESS)
+ goto out;
+
+ /* First we do a linear scan, checking all records. */
+ ecode = check_linear(ntdb, &used, &num_used, &fr, &num_free, features,
+ recovery);
+ if (ecode != NTDB_SUCCESS)
+ goto out;
+
+ for (ft = first_ftable(ntdb); ft; ft = next_ftable(ntdb, ft)) {
+ if (NTDB_OFF_IS_ERR(ft)) {
+ ecode = NTDB_OFF_TO_ERR(ft);
+ goto out;
+ }
+ ecode = check_free_table(ntdb, ft, num_ftables, fr, num_free,
+ &num_found);
+ if (ecode != NTDB_SUCCESS)
+ goto out;
+ num_ftables++;
+ }
+
+ /* FIXME: Check key uniqueness? */
+ ecode = check_hash(ntdb, used, num_used, num_ftables + num_capabilities,
+ check, data);
+ if (ecode != NTDB_SUCCESS)
+ goto out;
+
+ if (num_found != num_free) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Not all entries are in"
+ " free table");
+ }
+
+out:
+ ntdb_allrecord_unlock(ntdb, F_RDLCK);
+ ntdb_unlock_expand(ntdb, F_RDLCK);
+ ntdb->free_fn(fr, ntdb->alloc_data);
+ ntdb->free_fn(used, ntdb->alloc_data);
+ return ecode;
+}
--- /dev/null
+Interface differences between TDB and NTDB.
+
+- ntdb shares 'struct TDB_DATA' with tdb, but TDB defines the TDB_DATA
+ typedef, whereas ntdb defines NTDB_DATA (ie. both are compatible).
+ If you include both ntdb.h and tdb.h, #include tdb.h first,
+ otherwise you'll get a compile error when tdb.h re-defined struct
+ TDB_DATA.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+- ntdb functions return NTDB_SUCCESS (ie 0) on success, and a negative
+ error on failure, whereas tdb functions returned 0 on success, and
+ -1 on failure. tdb then used tdb_error() to determine the error;
+ this API is nasty if we ever want to support threads, so is not supported.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d)
+ {
+ if (tdb_store(tdb, key, d) == -1) {
+ printf("store failed: %s\n", tdb_errorstr(tdb));
+ }
+ }
+
+ void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d)
+ {
+ enum NTDB_ERROR e;
+
+ e = ntdb_store(ntdb, key, d);
+ if (e) {
+ printf("store failed: %s\n", ntdb_errorstr(e));
+ }
+ }
+
+- ntdb's ntdb_fetch() returns an error, tdb's returned the data directly
+ (or tdb_null, and you were supposed to check tdb_error() to find out why).
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ void tdb_example(struct tdb_context *tdb, TDB_DATA key)
+ {
+ TDB_DATA data;
+
+ data = tdb_fetch(tdb, key);
+ if (!data.dptr) {
+ printf("fetch failed: %s\n", tdb_errorstr(tdb));
+ }
+ }
+
+ void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key)
+ {
+ NTDB_DATA data;
+ enum NTDB_ERROR e;
+
+ e = ntdb_fetch(ntdb, key, &data);
+ if (e) {
+ printf("fetch failed: %s\n", ntdb_errorstr(e));
+ }
+ }
+
+- ntdb's ntdb_nextkey() frees the old key's dptr, in tdb you needed to do
+ this manually.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ void tdb_example(struct tdb_context *tdb)
+ {
+ TDB_DATA key, next, data;
+
+ for (key = tdb_firstkey(tdb); key.dptr; key = next) {
+ printf("Got key!\n");
+ next = tdb_nextkey(tdb, key);
+ free(key.dptr);
+ }
+ }
+
+
+ void ntdb_example(struct ntdb_context *ntdb)
+ {
+ NTDB_DATA k, data;
+ enum NTDB_ERROR e;
+
+ for (e = ntdb_firstkey(ntdb,&k); !e; e = ntdb_nextkey(ntdb,&k))
+ printf("Got key!\n");
+ }
+
+- Unlike tdb_open/tdb_open_ex, ntdb_open does not allow NULL names,
+ even for NTDB_INTERNAL dbs, and thus ntdb_name() never returns NULL.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ struct tdb_context *tdb_example(void)
+ {
+ return tdb_open(NULL, 0, TDB_INTERNAL, O_RDWR, 0);
+ }
+
+ struct ntdb_context *ntdb_example(void)
+ {
+ return ntdb_open("example", NTDB_INTERNAL, O_RDWR, 0);
+ }
+
+- ntdb uses a linked list of attribute structures to implement logging and
+ alternate hashes. tdb used tdb_open_ex, which was not extensible.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ /* Custom hash function */
+ static unsigned int my_tdb_hash_func(TDB_DATA *key)
+ {
+ return key->dsize;
+ }
+
+ struct tdb_context *tdb_example(void)
+ {
+ return tdb_open_ex("example.tdb", 0, TDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600, NULL, my_hash_func);
+ }
+
+ /* Custom hash function */
+ static unsigned int my_ntdb_hash_func(const void *key, size_t len,
+ uint32_t seed, void *data)
+ {
+ return len;
+ }
+
+ struct ntdb_context *ntdb_example(void)
+ {
+ union ntdb_attribute hash;
+
+ hash.base.attr = NTDB_ATTRIBUTE_HASH;
+ hash.base.next = NULL;
+ hash.hash.fn = my_ntdb_hash_func;
+ return ntdb_open("example.ntdb", NTDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600, &hash);
+ }
+
+- tdb's tdb_open/tdb_open_ex took an explicit hash size, defaulting to
+ 131. ntdb's uses an attribute for this, defaulting to 8192.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ struct tdb_context *tdb_example(void)
+ {
+ return tdb_open("example.tdb", 10007, TDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600);
+ }
+
+ struct ntdb_context *ntdb_example(void)
+ {
+ union ntdb_attribute hashsize;
+
+ hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE;
+ hashsize.base.next = NULL;
+ hashsize.hashsize.size = 16384;
+ return ntdb_open("example.ntdb", NTDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600, &hashsize);
+ }
+
+- ntdb's log function is simpler than tdb's log function. The string
+ is already formatted, is not terminated by a '\n', and it takes an
+ enum ntdb_log_level not a tdb_debug_level, and which has only three
+ values: NTDB_LOG_ERROR, NTDB_LOG_USE_ERROR and NTDB_LOG_WARNING.
+
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ static void tdb_log(struct tdb_context *tdb,
+ enum tdb_debug_level level, const char *fmt, ...)
+ {
+ va_list ap;
+ const char *name;
+
+ switch (level) {
+ case TDB_DEBUG_FATAL:
+ fprintf(stderr, "FATAL: ");
+ break;
+ case TDB_DEBUG_ERROR:
+ fprintf(stderr, "ERROR: ");
+ break;
+ case TDB_DEBUG_WARNING:
+ fprintf(stderr, "WARNING: ");
+ break;
+ case TDB_DEBUG_TRACE:
+ /* Don't print out tracing. */
+ return;
+ }
+
+ name = tdb_name(tdb);
+ if (!name) {
+ name = "unnamed";
+ }
+
+ fprintf(stderr, "tdb(%s):", name);
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+
+ struct tdb_context *tdb_example(void)
+ {
+ struct tdb_logging_context lctx;
+
+ lctx.log_fn = tdb_log;
+ return tdb_open_ex("example.tdb", 0, TDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600, &lctx, NULL);
+ }
+
+ static void ntdb_log(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+ {
+ switch (level) {
+ case NTDB_LOG_ERROR:
+ fprintf(stderr, "ERROR: ");
+ break;
+ case NTDB_LOG_USE_ERROR:
+ /* We made a mistake, so abort. */
+ abort();
+ break;
+ case NTDB_LOG_WARNING:
+ fprintf(stderr, "WARNING: ");
+ break;
+ }
+
+ fprintf(stderr, "ntdb(%s):%s:%s\n",
+ ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+ }
+
+ struct ntdb_context *ntdb_example(void)
+ {
+ union ntdb_attribute log;
+
+ log.base.attr = NTDB_ATTRIBUTE_LOG;
+ log.base.next = NULL;
+ log.log.fn = ntdb_log;
+ return ntdb_open("example.ntdb", NTDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600, &log);
+ }
+
+- ntdb provides ntdb_deq() for comparing two NTDB_DATA, and ntdb_mkdata() for
+ creating an NTDB_DATA.
+
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ void tdb_example(struct tdb_context *tdb)
+ {
+ TDB_DATA data, key;
+
+ key.dsize = strlen("hello");
+ key.dptr = "hello";
+ data = tdb_fetch(tdb, key);
+ if (data.dsize == key.dsize
+ && !memcmp(data.dptr, key.dptr, key.dsize))
+ printf("key is same as data\n");
+ }
+ free(data.dptr);
+ }
+
+ void ntdb_example(struct ntdb_context *ntdb)
+ {
+ NTDB_DATA data, key;
+
+ key = ntdb_mkdata("hello", strlen("hello"));
+ if (ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS) {
+ if (ntdb_deq(key, data)) {
+ printf("key is same as data\n");
+ }
+ free(data.dptr);
+ }
+ }
+
+- ntdb's ntdb_parse_record() takes a type-checked callback data
+ pointer, not a void * (though a void * pointer still works). The
+ callback function is allowed to do read operations on the database,
+ or write operations if you first call ntdb_lockall(). TDB's
+ tdb_parse_record() did not allow any database access within the
+ callback, could crash if you tried.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ static int tdb_parser(TDB_DATA key, TDB_DATA data, void *private_data)
+ {
+ TDB_DATA *expect = private_data;
+
+ return data.dsize == expect->dsize
+ && !memcmp(data.dptr, expect->dptr, data.dsize);
+ }
+
+ void tdb_example(struct tdb_context *tdb, TDB_DATA key, NTDB_DATA d)
+ {
+ switch (tdb_parse_record(tdb, key, tdb_parser, &d)) {
+ case -1:
+ printf("parse failed: %s\n", tdb_errorstr(tdb));
+ break;
+ case 0:
+ printf("data was different!\n");
+ break;
+ case 1:
+ printf("data was same!\n");
+ break;
+ }
+ }
+
+ static int ntdb_parser(TDB_DATA key, TDB_DATA data, TDB_DATA *expect)
+ {
+ return ntdb_deq(data, *expect);
+ }
+
+ void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d)
+ {
+ enum NTDB_ERROR e;
+
+ e = tdb_parse_record(tdb, key, tdb_parser, &d);
+ switch (e) {
+ case 0:
+ printf("data was different!\n");
+ break;
+ case 1:
+ printf("data was same!\n");
+ break;
+ default:
+ printf("parse failed: %s\n", ntdb_errorstr(e));
+ break;
+ }
+ }
+
+- ntdb does locking on read-only databases (ie. O_RDONLY passed to ntdb_open).
+ tdb did not: use the NTDB_NOLOCK flag if you want to suppress locking.
+
+ Example:
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ struct tdb_context *tdb_example(void)
+ {
+ return tdb_open("example.tdb", 0, TDB_DEFAULT, O_RDONLY, 0);
+ }
+
+ struct ntdb_context *ntdb_example(void)
+ {
+ return ntdb_open("example.ntdb", NTDB_NOLOCK, O_RDONLY, NULL);
+ }
+
+- Failure inside a transaction (such as a lock function failing) does
+ not implicitly cancel the transaction; you still need to call
+ ntdb_transaction_cancel().
+
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ void tdb_example(struct tdb_context *tdb, TDB_DATA key, TDB_DATA d)
+ {
+ if (tdb_transaction_start(tdb) == -1) {
+ printf("transaction failed: %s\n", tdb_errorstr(tdb));
+ return;
+ }
+
+ if (tdb_store(tdb, key, d) == -1) {
+ printf("store failed: %s\n", tdb_errorstr(tdb));
+ return;
+ }
+ if (tdb_transaction_commit(tdb) == -1) {
+ printf("commit failed: %s\n", tdb_errorstr(tdb));
+ }
+ }
+
+ void ntdb_example(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA d)
+ {
+ enum NTDB_ERROR e;
+
+ e = ntdb_transaction_start(ntdb);
+ if (e) {
+ printf("transaction failed: %s\n", ntdb_errorstr(e));
+ return;
+ }
+
+ e = ntdb_store(ntdb, key, d);
+ if (e) {
+ printf("store failed: %s\n", ntdb_errorstr(e));
+ ntdb_transaction_cancel(ntdb);
+ }
+
+ e = ntdb_transaction_commit(ntdb);
+ if (e) {
+ printf("commit failed: %s\n", ntdb_errorstr(e));
+ }
+ }
+
+- There is no NTDB_CLEAR_IF_FIRST flag; it has severe scalability and
+ API problems. If necessary, you can emulate this by using the open
+ hook and placing a 1-byte lock at offset 4. If your program forks
+ and exits, you will need to place this lock again in the child before
+ the parent exits.
+
+ Example:
+
+ #include <tdb.h>
+ #include <ntdb.h>
+
+ struct tdb_context *tdb_example(void)
+ {
+ return tdb_open("example.tdb", 0, TDB_CLEAR_IF_FIRST,
+ O_CREAT|O_RDWR, 0600);
+ }
+
+ static enum NTDB_ERROR clear_if_first(int fd, void *unused)
+ {
+ /* We hold a lock offset 4 always, so we can tell if
+ * anyone else is. */
+ struct flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 4; /* ACTIVE_LOCK */
+ fl.l_len = 1;
+
+ if (fcntl(fd, F_SETLK, &fl) == 0) {
+ /* We must be first ones to open it! Clear it. */
+ if (ftruncate(fd, 0) != 0) {
+ return NTDB_ERR_IO;
+ }
+ }
+ fl.l_type = F_RDLCK;
+ if (fcntl(fd, F_SETLKW, &fl) != 0) {
+ return NTDB_ERR_IO;
+ }
+ return NTDB_SUCCESS;
+ }
+
+ struct ntdb_context *ntdb_example(void)
+ {
+ union ntdb_attribute open_attr;
+
+ open_attr.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
+ open_attr.openhook.base.next = NULL;
+ open_attr.openhook.fn = clear_if_first;
+
+ return ntdb_open("example.ntdb", NTDB_DEFAULT,
+ O_CREAT|O_RDWR, 0600, &open_attr);
+ }
+
+- ntdb traversals are not reliable if the database is changed during
+ the traversal, ie your traversal may not cover all elements, or may
+ cover elements multiple times. As a special exception, deleting the
+ current record within ntdb_traverse() is reliable.
+
+- There is no ntdb_traverse_read, since ntdb_traverse does not hold
+ a lock across the entire traversal anyway. If you want to make sure
+ that your traversal function does not write to the database, you can
+ set and clear the NTDB_RDONLY flag around the traversal.
+
+- ntdb does not need tdb_reopen() or tdb_reopen_all(). If you call
+ fork() after during certain operations the child should close the
+ ntdb, or complete the operations before continuing to use the tdb:
+
+ ntdb_transaction_start(): child must ntdb_transaction_cancel()
+ ntdb_lockall(): child must call ntdb_unlockall()
+ ntdb_lockall_read(): child must call ntdb_unlockall_read()
+ ntdb_chainlock(): child must call ntdb_chainunlock()
+ ntdb_parse() callback: child must return from ntdb_parse()
+
+- ntdb will not open a non-ntdb file, even if O_CREAT is specified. tdb
+ will overwrite an unknown file in that case.
--- /dev/null
+#LyX 2.0 created this file. For more info see http://www.lyx.org/
+\lyxformat 413
+\begin_document
+\begin_header
+\textclass article
+\use_default_options true
+\maintain_unincluded_children false
+\language english
+\language_package default
+\inputencoding auto
+\fontencoding global
+\font_roman default
+\font_sans default
+\font_typewriter default
+\font_default_family default
+\use_non_tex_fonts false
+\font_sc false
+\font_osf false
+\font_sf_scale 100
+\font_tt_scale 100
+
+\graphics default
+\default_output_format default
+\output_sync 0
+\bibtex_command default
+\index_command default
+\paperfontsize default
+\use_hyperref false
+\papersize default
+\use_geometry false
+\use_amsmath 1
+\use_esint 1
+\use_mhchem 1
+\use_mathdots 1
+\cite_engine basic
+\use_bibtopic false
+\use_indices false
+\paperorientation portrait
+\suppress_date false
+\use_refstyle 0
+\index Index
+\shortcut idx
+\color #008000
+\end_index
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation indent
+\paragraph_indentation default
+\quotes_language english
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tracking_changes true
+\output_changes true
+\html_math_output 0
+\html_css_as_file 0
+\html_be_strict false
+\end_header
+
+\begin_body
+
+\begin_layout Title
+NTDB: Redesigning The Trivial DataBase
+\end_layout
+
+\begin_layout Author
+Rusty Russell, IBM Corporation
+\end_layout
+
+\begin_layout Date
+19 June 2012
+\end_layout
+
+\begin_layout Abstract
+The Trivial DataBase on-disk format is 32 bits; with usage cases heading
+ towards the 4G limit, that must change.
+ This required breakage provides an opportunity to revisit TDB's other design
+ decisions and reassess them.
+\end_layout
+
+\begin_layout Section
+Introduction
+\end_layout
+
+\begin_layout Standard
+The Trivial DataBase was originally written by Andrew Tridgell as a simple
+ key/data pair storage system with the same API as dbm, but allowing multiple
+ readers and writers while being small enough (< 1000 lines of C) to include
+ in SAMBA.
+ The simple design created in 1999 has proven surprisingly robust and performant
+, used in Samba versions 3 and 4 as well as numerous other projects.
+ Its useful life was greatly increased by the (backwards-compatible!) addition
+ of transaction support in 2005.
+\end_layout
+
+\begin_layout Standard
+The wider variety and greater demands of TDB-using code has lead to some
+ organic growth of the API, as well as some compromises on the implementation.
+ None of these, by themselves, are seen as show-stoppers, but the cumulative
+ effect is to a loss of elegance over the initial, simple TDB implementation.
+ Here is a table of the approximate number of lines of implementation code
+ and number of API functions at the end of each year:
+\end_layout
+
+\begin_layout Standard
+\begin_inset Tabular
+<lyxtabular version="3" rows="12" columns="3">
+<features tabularvalignment="middle">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Year End
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+API Functions
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Lines of C Code Implementation
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1999
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+13
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1195
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2000
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+24
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1725
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2001
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+32
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2228
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2002
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+35
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2481
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2003
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+35
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2552
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2004
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+40
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2584
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2005
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+38
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2647
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2006
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+52
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+3754
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2007
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+66
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+4398
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2008
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+71
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+4768
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2009
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+73
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+5715
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+This review is an attempt to catalog and address all the known issues with
+ TDB and create solutions which address the problems without significantly
+ increasing complexity; all involved are far too aware of the dangers of
+ second system syndrome in rewriting a successful project like this.
+\end_layout
+
+\begin_layout Standard
+Note: the final decision was to make ntdb a separate library, with a separarate
+ 'ntdb' namespace so both can potentially be linked together.
+ This document still refers to
+\begin_inset Quotes eld
+\end_inset
+
+tdb
+\begin_inset Quotes erd
+\end_inset
+
+ everywhere, for simplicity.
+\end_layout
+
+\begin_layout Section
+API Issues
+\end_layout
+
+\begin_layout Subsection
+tdb_open_ex Is Not Expandable
+\end_layout
+
+\begin_layout Standard
+The tdb_open() call was expanded to tdb_open_ex(), which added an optional
+ hashing function and an optional logging function argument.
+ Additional arguments to open would require the introduction of a tdb_open_ex2
+ call etc.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\begin_inset CommandInset label
+LatexCommand label
+name "attributes"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+tdb_open() will take a linked-list of attributes:
+\end_layout
+
+\begin_layout LyX-Code
+enum tdb_attribute {
+\end_layout
+
+\begin_layout LyX-Code
+ TDB_ATTRIBUTE_LOG = 0,
+\end_layout
+
+\begin_layout LyX-Code
+ TDB_ATTRIBUTE_HASH = 1
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout LyX-Code
+struct tdb_attribute_base {
+\end_layout
+
+\begin_layout LyX-Code
+ enum tdb_attribute attr;
+\end_layout
+
+\begin_layout LyX-Code
+ union tdb_attribute *next;
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout LyX-Code
+struct tdb_attribute_log {
+\end_layout
+
+\begin_layout LyX-Code
+ struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
+\end_layout
+
+\begin_layout LyX-Code
+ tdb_log_func log_fn;
+\end_layout
+
+\begin_layout LyX-Code
+ void *log_private;
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout LyX-Code
+struct tdb_attribute_hash {
+\end_layout
+
+\begin_layout LyX-Code
+ struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
+\end_layout
+
+\begin_layout LyX-Code
+ tdb_hash_func hash_fn;
+\end_layout
+
+\begin_layout LyX-Code
+ void *hash_private;
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout LyX-Code
+union tdb_attribute {
+\end_layout
+
+\begin_layout LyX-Code
+ struct tdb_attribute_base base;
+\end_layout
+
+\begin_layout LyX-Code
+ struct tdb_attribute_log log;
+\end_layout
+
+\begin_layout LyX-Code
+ struct tdb_attribute_hash hash;
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout Standard
+This allows future attributes to be added, even if this expands the size
+ of the union.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+tdb_traverse Makes Impossible Guarantees
+\end_layout
+
+\begin_layout Standard
+tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it
+ was thought that it was important to guarantee that all records which exist
+ at the start and end of the traversal would be included, and no record
+ would be included twice.
+\end_layout
+
+\begin_layout Standard
+This adds complexity (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "Reliable-Traversal-Adds"
+
+\end_inset
+
+) and does not work anyway for records which are altered (in particular,
+ those which are expanded may be effectively deleted and re-added behind
+ the traversal).
+\end_layout
+
+\begin_layout Subsubsection
+\begin_inset CommandInset label
+LatexCommand label
+name "traverse-Proposed-Solution"
+
+\end_inset
+
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Abandon the guarantee.
+ You will see every record if no changes occur during your traversal, otherwise
+ you will see some subset.
+ You can prevent changes by using a transaction or the locking API.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+ Delete-during-traverse will still delete every record, too (assuming no
+ other changes).
+\end_layout
+
+\begin_layout Subsection
+Nesting of Transactions Is Fraught
+\end_layout
+
+\begin_layout Standard
+TDB has alternated between allowing nested transactions and not allowing
+ them.
+ Various paths in the Samba codebase assume that transactions will nest,
+ and in a sense they can: the operation is only committed to disk when the
+ outer transaction is committed.
+ There are two problems, however:
+\end_layout
+
+\begin_layout Enumerate
+Canceling the inner transaction will cause the outer transaction commit
+ to fail, and will not undo any operations since the inner transaction began.
+ This problem is soluble with some additional internal code.
+\end_layout
+
+\begin_layout Enumerate
+An inner transaction commit can be cancelled by the outer transaction.
+ This is desirable in the way which Samba's database initialization code
+ uses transactions, but could be a surprise to any users expecting a successful
+ transaction commit to expose changes to others.
+\end_layout
+
+\begin_layout Standard
+The current solution is to specify the behavior at tdb_open(), with the
+ default currently that nested transactions are allowed.
+ This flag can also be changed at runtime.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Given the usage patterns, it seems that the
+\begin_inset Quotes eld
+\end_inset
+
+least-surprise
+\begin_inset Quotes erd
+\end_inset
+
+ behavior of disallowing nested transactions should become the default.
+ Additionally, it seems the outer transaction is the only code which knows
+ whether inner transactions should be allowed, so a flag to indicate this
+ could be added to tdb_transaction_start.
+ However, this behavior can be simulated with a wrapper which uses tdb_add_flags
+() and tdb_remove_flags(), so the API should not be expanded for this relatively
+-obscure case.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete; the nesting flag has been removed.
+\end_layout
+
+\begin_layout Subsection
+Incorrect Hash Function is Not Detected
+\end_layout
+
+\begin_layout Standard
+tdb_open_ex() allows the calling code to specify a different hash function
+ to use, but does not check that all other processes accessing this tdb
+ are using the same hash function.
+ The result is that records are missing from tdb_fetch().
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The header should contain an example hash result (eg.
+ the hash of 0xdeadbeef), and tdb_open_ex() should check that the given
+ hash function produces the same answer, or fail the tdb_open call.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+tdb_set_max_dead/TDB_VOLATILE Expose Implementation
+\end_layout
+
+\begin_layout Standard
+In response to scalability issues with the free list (
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "TDB-Freelist-Is"
+
+\end_inset
+
+) two API workarounds have been incorporated in TDB: tdb_set_max_dead()
+ and the TDB_VOLATILE flag to tdb_open.
+ The latter actually calls the former with an argument of
+\begin_inset Quotes eld
+\end_inset
+
+5
+\begin_inset Quotes erd
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+This code allows deleted records to accumulate without putting them in the
+ free list.
+ On delete we iterate through each chain and free them in a batch if there
+ are more than max_dead entries.
+ These are never otherwise recycled except as a side-effect of a tdb_repack.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+With the scalability problems of the freelist solved, this API can be removed.
+ The TDB_VOLATILE flag may still be useful as a hint that store and delete
+ of records will be at least as common as fetch in order to allow some internal
+ tuning, but initially will become a no-op.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+ Unknown flags cause tdb_open() to fail as well, so they can be detected
+ at runtime.
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "TDB-Files-Cannot"
+
+\end_inset
+
+TDB Files Cannot Be Opened Multiple Times In The Same Process
+\end_layout
+
+\begin_layout Standard
+No process can open the same TDB twice; we check and disallow it.
+ This is an unfortunate side-effect of fcntl locks, which operate on a per-file
+ rather than per-file-descriptor basis, and do not nest.
+ Thus, closing any file descriptor on a file clears all the locks obtained
+ by this process, even if they were placed using a different file descriptor!
+\end_layout
+
+\begin_layout Standard
+Note that even if this were solved, deadlock could occur if operations were
+ nested: this is a more manageable programming error in most cases.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+We could lobby POSIX to fix the perverse rules, or at least lobby Linux
+ to violate them so that the most common implementation does not have this
+ restriction.
+ This would be a generally good idea for other fcntl lock users.
+\end_layout
+
+\begin_layout Standard
+Samba uses a wrapper which hands out the same tdb_context to multiple callers
+ if this happens, and does simple reference counting.
+ We should do this inside the tdb library, which already emulates lock nesting
+ internally; it would need to recognize when deadlock occurs within a single
+ process.
+ This would create a new failure mode for tdb operations (while we currently
+ handle locking failures, they are impossible in normal use and a process
+ encountering them can do little but give up).
+\end_layout
+
+\begin_layout Standard
+I do not see benefit in an additional tdb_open flag to indicate whether
+ re-opening is allowed, as though there may be some benefit to adding a
+ call to detect when a tdb_context is shared, to allow other to create such
+ an API.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+TDB API Is Not POSIX Thread-safe
+\end_layout
+
+\begin_layout Standard
+The TDB API uses an error code which can be queried after an operation to
+ determine what went wrong.
+ This programming model does not work with threads, unless specific additional
+ guarantees are given by the implementation.
+ In addition, even otherwise-independent threads cannot open the same TDB
+ (as in
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "TDB-Files-Cannot"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Reachitecting the API to include a tdb_errcode pointer would be a great
+ deal of churn, but fortunately most functions return 0 on success and -1
+ on error: we can change these to return 0 on success and a negative error
+ code on error, and the API remains similar to previous.
+ The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA
+ pointer and return an error code.
+ It is also simpler to have tdb_nextkey replace its key argument in place,
+ freeing up any old .dptr.
+\end_layout
+
+\begin_layout Standard
+Internal locking is required to make sure that fcntl locks do not overlap
+ between threads, and also that the global list of tdbs is maintained.
+\end_layout
+
+\begin_layout Standard
+The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe
+ version of the library, and otherwise no overhead will exist.
+ Alternatively, a hooking mechanism similar to that proposed for
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "Proposed-Solution-locking-hook"
+
+\end_inset
+
+ could be used to enable pthread locking at runtime.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Incomplete; API has been changed but thread safety has not been implemented.
+\end_layout
+
+\begin_layout Subsection
+*_nonblock Functions And *_mark Functions Expose Implementation
+\end_layout
+
+\begin_layout Standard
+CTDB
+\begin_inset Foot
+status collapsed
+
+\begin_layout Plain Layout
+Clustered TDB, see http://ctdb.samba.org
+\end_layout
+
+\end_inset
+
+ wishes to operate on TDB in a non-blocking manner.
+ This is currently done as follows:
+\end_layout
+
+\begin_layout Enumerate
+Call the _nonblock variant of an API function (eg.
+ tdb_lockall_nonblock).
+ If this fails:
+\end_layout
+
+\begin_layout Enumerate
+Fork a child process, and wait for it to call the normal variant (eg.
+ tdb_lockall).
+\end_layout
+
+\begin_layout Enumerate
+If the child succeeds, call the _mark variant to indicate we already have
+ the locks (eg.
+ tdb_lockall_mark).
+\end_layout
+
+\begin_layout Enumerate
+Upon completion, tell the child to release the locks (eg.
+ tdb_unlockall).
+\end_layout
+
+\begin_layout Enumerate
+Indicate to tdb that it should consider the locks removed (eg.
+ tdb_unlockall_mark).
+\end_layout
+
+\begin_layout Standard
+There are several issues with this approach.
+ Firstly, adding two new variants of each function clutters the API for
+ an obscure use, and so not all functions have three variants.
+ Secondly, it assumes that all paths of the functions ask for the same locks,
+ otherwise the parent process will have to get a lock which the child doesn't
+ have under some circumstances.
+ I don't believe this is currently the case, but it constrains the implementatio
+n.
+\end_layout
+
+\begin_layout Subsubsection
+\begin_inset CommandInset label
+LatexCommand label
+name "Proposed-Solution-locking-hook"
+
+\end_inset
+
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Implement a hook for locking methods, so that the caller can control the
+ calls to create and remove fcntl locks.
+ In this scenario, ctdbd would operate as follows:
+\end_layout
+
+\begin_layout Enumerate
+Call the normal API function, eg tdb_lockall().
+\end_layout
+
+\begin_layout Enumerate
+When the lock callback comes in, check if the child has the lock.
+ Initially, this is always false.
+ If so, return 0.
+ Otherwise, try to obtain it in non-blocking mode.
+ If that fails, return EWOULDBLOCK.
+\end_layout
+
+\begin_layout Enumerate
+Release locks in the unlock callback as normal.
+\end_layout
+
+\begin_layout Enumerate
+If tdb_lockall() fails, see if we recorded a lock failure; if so, call the
+ child to repeat the operation.
+\end_layout
+
+\begin_layout Enumerate
+The child records what locks it obtains, and returns that information to
+ the parent.
+\end_layout
+
+\begin_layout Enumerate
+When the child has succeeded, goto 1.
+\end_layout
+
+\begin_layout Standard
+This is flexible enough to handle any potential locking scenario, even when
+ lock requirements change.
+ It can be optimized so that the parent does not release locks, just tells
+ the child which locks it doesn't need to obtain.
+\end_layout
+
+\begin_layout Standard
+It also keeps the complexity out of the API, and in ctdbd where it is needed.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+tdb_chainlock Functions Expose Implementation
+\end_layout
+
+\begin_layout Standard
+tdb_chainlock locks some number of records, including the record indicated
+ by the given key.
+ This gave atomicity guarantees; no-one can start a transaction, alter,
+ read or delete that key while the lock is held.
+\end_layout
+
+\begin_layout Standard
+It also makes the same guarantee for any other key in the chain, which is
+ an internal implementation detail and potentially a cause for deadlock.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+None.
+ It would be nice to have an explicit single entry lock which effected no
+ other keys.
+ Unfortunately, this won't work for an entry which doesn't exist.
+ Thus while chainlock may be implemented more efficiently for the existing
+ case, it will still have overlap issues with the non-existing case.
+ So it is best to keep the current (lack of) guarantee about which records
+ will be effected to avoid constraining our implementation.
+\end_layout
+
+\begin_layout Subsection
+Signal Handling is Not Race-Free
+\end_layout
+
+\begin_layout Standard
+The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate
+ that the tdb locking code should return with a failure, rather than trying
+ again when a signal is received (and errno == EAGAIN).
+ This is usually used to implement timeouts.
+\end_layout
+
+\begin_layout Standard
+Unfortunately, this does not work in the case where the signal is received
+ before the tdb code enters the fcntl() call to place the lock: the code
+ will sleep within the fcntl() code, unaware that the signal wants it to
+ exit.
+ In the case of long timeouts, this does not happen in practice.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The locking hooks proposed in
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "Proposed-Solution-locking-hook"
+
+\end_inset
+
+ would allow the user to decide on whether to fail the lock acquisition
+ on a signal.
+ This allows the caller to choose their own compromise: they could narrow
+ the race by checking immediately before the fcntl call.
+\begin_inset Foot
+status collapsed
+
+\begin_layout Plain Layout
+It may be possible to make this race-free in some implementations by having
+ the signal handler alter the struct flock to make it invalid.
+ This will cause the fcntl() lock call to fail with EINVAL if the signal
+ occurs before the kernel is entered, otherwise EAGAIN.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+The API Uses Gratuitous Typedefs, Capitals
+\end_layout
+
+\begin_layout Standard
+typedefs are useful for providing source compatibility when types can differ
+ across implementations, or arguably in the case of function pointer definitions
+ which are hard for humans to parse.
+ Otherwise it is simply obfuscation and pollutes the namespace.
+\end_layout
+
+\begin_layout Standard
+Capitalization is usually reserved for compile-time constants and macros.
+\end_layout
+
+\begin_layout Description
+TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the
+ definition isn't visible to the API user anyway.
+\end_layout
+
+\begin_layout Description
+TDB_DATA There is no reason to use this over struct TDB_DATA; the struct
+ needs to be understood by the API user.
+\end_layout
+
+\begin_layout Description
+struct
+\begin_inset space ~
+\end_inset
+
+TDB_DATA This would normally be called 'struct tdb_data'.
+\end_layout
+
+\begin_layout Description
+enum
+\begin_inset space ~
+\end_inset
+
+TDB_ERROR Similarly, this would normally be enum tdb_error.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+None.
+ Introducing lower case variants would please pedants like myself, but if
+ it were done the existing ones should be kept.
+ There is little point forcing a purely cosmetic change upon tdb users.
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "tdb_log_func-Doesnt-Take"
+
+\end_inset
+
+tdb_log_func Doesn't Take The Private Pointer
+\end_layout
+
+\begin_layout Standard
+For API compatibility reasons, the logging function needs to call tdb_get_loggin
+g_private() to retrieve the pointer registered by the tdb_open_ex for logging.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+It should simply take an extra argument, since we are prepared to break
+ the API/ABI.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+Various Callback Functions Are Not Typesafe
+\end_layout
+
+\begin_layout Standard
+The callback functions in tdb_set_logging_function (after
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "tdb_log_func-Doesnt-Take"
+
+\end_inset
+
+ is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check
+ all take void * and must internally convert it to the argument type they
+ were expecting.
+\end_layout
+
+\begin_layout Standard
+If this type changes, the compiler will not produce warnings on the callers,
+ since it only sees void *.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+With careful use of macros, we can create callback functions which give
+ a warning when used on gcc and the types of the callback and its private
+ argument differ.
+ Unsupported compilers will not give a warning, which is no worse than now.
+ In addition, the callbacks become clearer, as they need not use void *
+ for their parameter.
+\end_layout
+
+\begin_layout Standard
+See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic
+\end_layout
+
+\begin_layout Standard
+The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should
+ be cleared if the caller discovers it is the only process with the TDB
+ open.
+ However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not
+ be detected, so will have the TDB erased underneath them (usually resulting
+ in a crash).
+\end_layout
+
+\begin_layout Standard
+There is a similar issue on fork(); if the parent exits (or otherwise closes
+ the tdb) before the child calls tdb_reopen_all() to establish the lock
+ used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener
+ at that moment will believe it alone has opened the TDB and will erase
+ it.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Remove TDB_CLEAR_IF_FIRST.
+ Other workarounds are possible, but see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "TDB_CLEAR_IF_FIRST-Imposes-Performance"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+ An open hook is provided to replicate this functionality if required.
+\end_layout
+
+\begin_layout Subsection
+Extending The Header Is Difficult
+\end_layout
+
+\begin_layout Standard
+We have reserved (zeroed) words in the TDB header, which can be used for
+ future features.
+ If the future features are compulsory, the version number must be updated
+ to prevent old code from accessing the database.
+ But if the future feature is optional, we have no way of telling if older
+ code is accessing the database or not.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The header should contain a
+\begin_inset Quotes eld
+\end_inset
+
+format variant
+\begin_inset Quotes erd
+\end_inset
+
+ value (64-bit).
+ This is divided into two 32-bit parts:
+\end_layout
+
+\begin_layout Enumerate
+The lower part reflects the format variant understood by code accessing
+ the database.
+\end_layout
+
+\begin_layout Enumerate
+The upper part reflects the format variant you must understand to write
+ to the database (otherwise you can only open for reading).
+\end_layout
+
+\begin_layout Standard
+The latter field can only be written at creation time, the former should
+ be written under the OPEN_LOCK when opening the database for writing, if
+ the variant of the code is lower than the current lowest variant.
+\end_layout
+
+\begin_layout Standard
+This should allow backwards-compatible features to be added, and detection
+ if older code (which doesn't understand the feature) writes to the database.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+Record Headers Are Not Expandible
+\end_layout
+
+\begin_layout Standard
+If we later want to add (say) checksums on keys and data, it would require
+ another format change, which we'd like to avoid.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+We often have extra padding at the tail of a record.
+ If we ensure that the first byte (if any) of this padding is zero, we will
+ have a way for future changes to detect code which doesn't understand a
+ new format: the new code would write (say) a 1 at the tail, and thus if
+ there is no tail or the first byte is 0, we would know the extension is
+ not present on that record.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+TDB Does Not Use Talloc
+\end_layout
+
+\begin_layout Standard
+Many users of TDB (particularly Samba) use the talloc allocator, and thus
+ have to wrap TDB in a talloc context to use it conveniently.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The allocation within TDB is not complicated enough to justify the use of
+ talloc, and I am reluctant to force another (excellent) library on TDB
+ users.
+ Nonetheless a compromise is possible.
+ An attribute (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "attributes"
+
+\end_inset
+
+) can be added later to tdb_open() to provide an alternate allocation mechanism,
+ specifically for talloc but usable by any other allocator (which would
+ ignore the
+\begin_inset Quotes eld
+\end_inset
+
+context
+\begin_inset Quotes erd
+\end_inset
+
+ argument).
+\end_layout
+
+\begin_layout Standard
+This would form a talloc heirarchy as expected, but the caller would still
+ have to attach a destructor to the tdb context returned from tdb_open to
+ close it.
+ All TDB_DATA fields would be children of the tdb_context, and the caller
+ would still have to manage them (using talloc_free() or talloc_steal()).
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute.
+\end_layout
+
+\begin_layout Section
+Performance And Scalability Issues
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "TDB_CLEAR_IF_FIRST-Imposes-Performance"
+
+\end_inset
+
+TDB_CLEAR_IF_FIRST Imposes Performance Penalty
+\end_layout
+
+\begin_layout Standard
+When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset
+ 4 (aka.
+ the ACTIVE_LOCK).
+ While these locks never conflict in normal tdb usage, they do add substantial
+ overhead for most fcntl lock implementations when the kernel scans to detect
+ if a lock conflict exists.
+ This is often a single linked list, making the time to acquire and release
+ a fcntl lock O(N) where N is the number of processes with the TDB open,
+ not the number actually doing work.
+\end_layout
+
+\begin_layout Standard
+In a Samba server it is common to have huge numbers of clients sitting idle,
+ and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag.
+\begin_inset Foot
+status collapsed
+
+\begin_layout Plain Layout
+There is a flag to tdb_reopen_all() which is used for this optimization:
+ if the parent process will outlive the child, the child does not need the
+ ACTIVE_LOCK.
+ This is a workaround for this very performance issue.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Remove the flag.
+ It was a neat idea, but even trivial servers tend to know when they are
+ initializing for the first time and can simply unlink the old tdb at that
+ point.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+TDB Files Have a 4G Limit
+\end_layout
+
+\begin_layout Standard
+This seems to be becoming an issue (so much for
+\begin_inset Quotes eld
+\end_inset
+
+trivial
+\begin_inset Quotes erd
+\end_inset
+
+!), particularly for ldb.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+A new, incompatible TDB format which uses 64 bit offsets internally rather
+ than 32 bit as now.
+ For simplicity of endian conversion (which TDB does on the fly if required),
+ all values will be 64 bit on disk.
+ In practice, some upper bits may be used for other purposes, but at least
+ 56 bits will be available for file offsets.
+\end_layout
+
+\begin_layout Standard
+tdb_open() will automatically detect the old version, and even create them
+ if TDB_VERSION6 is specified to tdb_open.
+\end_layout
+
+\begin_layout Standard
+32 bit processes will still be able to access TDBs larger than 4G (assuming
+ that their off_t allows them to seek to 64 bits), they will gracefully
+ fall back as they fail to mmap.
+ This can happen already with large TDBs.
+\end_layout
+
+\begin_layout Standard
+Old versions of tdb will fail to open the new TDB files (since 28 August
+ 2009, commit 398d0c29290: prior to that any unrecognized file format would
+ be erased and initialized as a fresh tdb!)
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+TDB Records Have a 4G Limit
+\end_layout
+
+\begin_layout Standard
+This has not been a reported problem, and the API uses size_t which can
+ be 64 bit on 64 bit platforms.
+ However, other limits may have made such an issue moot.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Record sizes will be 64 bit, with an error returned on 32 bit platforms
+ which try to access such records (the current implementation would return
+ TDB_ERR_OOM in a similar case).
+ It seems unlikely that 32 bit keys will be a limitation, so the implementation
+ may not support this (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:Records-Incur-A"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+Hash Size Is Determined At TDB Creation Time
+\end_layout
+
+\begin_layout Standard
+TDB contains a number of hash chains in the header; the number is specified
+ at creation time, and defaults to 131.
+ This is such a bottleneck on large databases (as each hash chain gets quite
+ long), that LDB uses 10,000 for this hash.
+ In general it is impossible to know what the 'right' answer is at database
+ creation time.
+\end_layout
+
+\begin_layout Subsubsection
+\begin_inset CommandInset label
+LatexCommand label
+name "sub:Hash-Size-Solution"
+
+\end_inset
+
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+After comprehensive performance testing on various scalable hash variants
+\begin_inset Foot
+status collapsed
+
+\begin_layout Plain Layout
+http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying
+ because I was previously convinced that an expanding tree of hashes would
+ be very close to optimal.
+\end_layout
+
+\end_inset
+
+, it became clear that it is hard to beat a straight linear hash table which
+ doubles in size when it reaches saturation.
+ Unfortunately, altering the hash table introduces serious locking complications
+: the entire hash table needs to be locked to enlarge the hash table, and
+ others might be holding locks.
+ Particularly insidious are insertions done under tdb_chainlock.
+\end_layout
+
+\begin_layout Standard
+Thus an expanding layered hash will be used: an array of hash groups, with
+ each hash group exploding into pointers to lower hash groups once it fills,
+ turning into a hash tree.
+ This has implications for locking: we must lock the entire group in case
+ we need to expand it, yet we don't know how deep the tree is at that point.
+\end_layout
+
+\begin_layout Standard
+Note that bits from the hash table entries should be stolen to hold more
+ hash bits to reduce the penalty of collisions.
+ We can use the otherwise-unused lower 3 bits.
+ If we limit the size of the database to 64 exabytes, we can use the top
+ 8 bits of the hash entry as well.
+ These 11 bits would reduce false positives down to 1 in 2000 which is more
+ than we need: we can use one of the bits to indicate that the extra hash
+ bits are valid.
+ This means we can choose not to re-hash all entries when we expand a hash
+ group; simply use the next bits we need and mark them invalid.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Ignore.
+ Scaling the hash automatically proved inefficient at small hash sizes;
+ we default to a 8192-element hash (changable via NTDB_ATTRIBUTE_HASHSIZE),
+ and when buckets clash we expand to an array of hash entries.
+ This scales slightly better than the tdb chain (due to the 8 top bits containin
+g extra hash).
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "TDB-Freelist-Is"
+
+\end_inset
+
+TDB Freelist Is Highly Contended
+\end_layout
+
+\begin_layout Standard
+TDB uses a single linked list for the free list.
+ Allocation occurs as follows, using heuristics which have evolved over
+ time:
+\end_layout
+
+\begin_layout Enumerate
+Get the free list lock for this whole operation.
+\end_layout
+
+\begin_layout Enumerate
+Multiply length by 1.25, so we always over-allocate by 25%.
+\end_layout
+
+\begin_layout Enumerate
+Set the slack multiplier to 1.
+\end_layout
+
+\begin_layout Enumerate
+Examine the current freelist entry: if it is > length but < the current
+ best case, remember it as the best case.
+\end_layout
+
+\begin_layout Enumerate
+Multiply the slack multiplier by 1.05.
+\end_layout
+
+\begin_layout Enumerate
+If our best fit so far is less than length * slack multiplier, return it.
+ The slack will be turned into a new free record if it's large enough.
+\end_layout
+
+\begin_layout Enumerate
+Otherwise, go onto the next freelist entry.
+\end_layout
+
+\begin_layout Standard
+Deleting a record occurs as follows:
+\end_layout
+
+\begin_layout Enumerate
+Lock the hash chain for this whole operation.
+\end_layout
+
+\begin_layout Enumerate
+Walk the chain to find the record, keeping the prev pointer offset.
+\end_layout
+
+\begin_layout Enumerate
+If max_dead is non-zero:
+\end_layout
+
+\begin_deeper
+\begin_layout Enumerate
+Walk the hash chain again and count the dead records.
+\end_layout
+
+\begin_layout Enumerate
+If it's more than max_dead, bulk free all the dead ones (similar to steps
+ 4 and below, but the lock is only obtained once).
+\end_layout
+
+\begin_layout Enumerate
+Simply mark this record as dead and return.
+\end_layout
+
+\end_deeper
+\begin_layout Enumerate
+Get the free list lock for the remainder of this operation.
+\end_layout
+
+\begin_layout Enumerate
+\begin_inset CommandInset label
+LatexCommand label
+name "right-merging"
+
+\end_inset
+
+Examine the following block to see if it is free; if so, enlarge the current
+ block and remove that block from the free list.
+ This was disabled, as removal from the free list was O(entries-in-free-list).
+\end_layout
+
+\begin_layout Enumerate
+Examine the preceeding block to see if it is free: for this reason, each
+ block has a 32-bit tailer which indicates its length.
+ If it is free, expand it to cover our new block and return.
+\end_layout
+
+\begin_layout Enumerate
+Otherwise, prepend ourselves to the free list.
+\end_layout
+
+\begin_layout Standard
+Disabling right-merging (step
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "right-merging"
+
+\end_inset
+
+) causes fragmentation; the other heuristics proved insufficient to address
+ this, so the final answer to this was that when we expand the TDB file
+ inside a transaction commit, we repack the entire tdb.
+\end_layout
+
+\begin_layout Standard
+The single list lock limits our allocation rate; due to the other issues
+ this is not currently seen as a bottleneck.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The first step is to remove all the current heuristics, as they obviously
+ interact, then examine them once the lock contention is addressed.
+\end_layout
+
+\begin_layout Standard
+The free list must be split to reduce contention.
+ Assuming perfect free merging, we can at most have 1 free list entry for
+ each entry.
+ This implies that the number of free lists is related to the size of the
+ hash table, but as it is rare to walk a large number of free list entries
+ we can use far fewer, say 1/32 of the number of hash buckets.
+\end_layout
+
+\begin_layout Standard
+It seems tempting to try to reuse the hash implementation which we use for
+ records here, but we have two ways of searching for free entries: for allocatio
+n we search by size (and possibly zone) which produces too many clashes
+ for our hash table to handle well, and for coalescing we search by address.
+ Thus an array of doubly-linked free lists seems preferable.
+\end_layout
+
+\begin_layout Standard
+There are various benefits in using per-size free lists (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:TDB-Becomes-Fragmented"
+
+\end_inset
+
+) but it's not clear this would reduce contention in the common case where
+ all processes are allocating/freeing the same size.
+ Thus we almost certainly need to divide in other ways: the most obvious
+ is to divide the file into zones, and using a free list (or table of free
+ lists) for each.
+ This approximates address ordering.
+\end_layout
+
+\begin_layout Standard
+Unfortunately it is difficult to know what heuristics should be used to
+ determine zone sizes, and our transaction code relies on being able to
+ create a
+\begin_inset Quotes eld
+\end_inset
+
+recovery area
+\begin_inset Quotes erd
+\end_inset
+
+ by simply appending to the file (difficult if it would need to create a
+ new zone header).
+ Thus we use a linked-list of free tables; currently we only ever create
+ one, but if there is more than one we choose one at random to use.
+ In future we may use heuristics to add new free tables on contention.
+ We only expand the file when all free tables are exhausted.
+\end_layout
+
+\begin_layout Standard
+The basic algorithm is as follows.
+ Freeing is simple:
+\end_layout
+
+\begin_layout Enumerate
+Identify the correct free list.
+\end_layout
+
+\begin_layout Enumerate
+Lock the corresponding list.
+\end_layout
+
+\begin_layout Enumerate
+Re-check the list (we didn't have a lock, sizes could have changed): relock
+ if necessary.
+\end_layout
+
+\begin_layout Enumerate
+Place the freed entry in the list.
+\end_layout
+
+\begin_layout Standard
+Allocation is a little more complicated, as we perform delayed coalescing
+ at this point:
+\end_layout
+
+\begin_layout Enumerate
+Pick a free table; usually the previous one.
+\end_layout
+
+\begin_layout Enumerate
+Lock the corresponding list.
+\end_layout
+
+\begin_layout Enumerate
+If the top entry is -large enough, remove it from the list and return it.
+\end_layout
+
+\begin_layout Enumerate
+Otherwise, coalesce entries in the list.If there was no entry large enough,
+ unlock the list and try the next largest list
+\end_layout
+
+\begin_layout Enumerate
+If no list has an entry which meets our needs, try the next free table.
+\end_layout
+
+\begin_layout Enumerate
+If no zone satisfies, expand the file.
+\end_layout
+
+\begin_layout Standard
+This optimizes rapid insert/delete of free list entries by not coalescing
+ them all the time..
+ First-fit address ordering ordering seems to be fairly good for keeping
+ fragmentation low (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:TDB-Becomes-Fragmented"
+
+\end_inset
+
+).
+ Note that address ordering does not need a tailer to coalesce, though if
+ we needed one we could have one cheaply: see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:Records-Incur-A"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+Each free entry has the free table number in the header: less than 255.
+ It also contains a doubly-linked list for easy deletion.
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "sub:TDB-Becomes-Fragmented"
+
+\end_inset
+
+TDB Becomes Fragmented
+\end_layout
+
+\begin_layout Standard
+Much of this is a result of allocation strategy
+\begin_inset Foot
+status collapsed
+
+\begin_layout Plain Layout
+The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute
+xas.edu/pub/garbage/malloc/ismm98.ps
+\end_layout
+
+\end_inset
+
+ and deliberate hobbling of coalescing; internal fragmentation (aka overallocati
+on) is deliberately set at 25%, and external fragmentation is only cured
+ by the decision to repack the entire db when a transaction commit needs
+ to enlarge the file.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The 25% overhead on allocation works in practice for ldb because indexes
+ tend to expand by one record at a time.
+ This internal fragmentation can be resolved by having an
+\begin_inset Quotes eld
+\end_inset
+
+expanded
+\begin_inset Quotes erd
+\end_inset
+
+ bit in the header to note entries that have previously expanded, and allocating
+ more space for them.
+\end_layout
+
+\begin_layout Standard
+There are is a spectrum of possible solutions for external fragmentation:
+ one is to use a fragmentation-avoiding allocation strategy such as best-fit
+ address-order allocator.
+ The other end of the spectrum would be to use a bump allocator (very fast
+ and simple) and simply repack the file when we reach the end.
+\end_layout
+
+\begin_layout Standard
+There are three problems with efficient fragmentation-avoiding allocators:
+ they are non-trivial, they tend to use a single free list for each size,
+ and there's no evidence that tdb allocation patterns will match those recorded
+ for general allocators (though it seems likely).
+\end_layout
+
+\begin_layout Standard
+Thus we don't spend too much effort on external fragmentation; we will be
+ no worse than the current code if we need to repack on occasion.
+ More effort is spent on reducing freelist contention, and reducing overhead.
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "sub:Records-Incur-A"
+
+\end_inset
+
+Records Incur A 28-Byte Overhead
+\end_layout
+
+\begin_layout Standard
+Each TDB record has a header as follows:
+\end_layout
+
+\begin_layout LyX-Code
+struct tdb_record {
+\end_layout
+
+\begin_layout LyX-Code
+ tdb_off_t next; /* offset of the next record in the list */
+\end_layout
+
+\begin_layout LyX-Code
+ tdb_len_t rec_len; /* total byte length of record */
+\end_layout
+
+\begin_layout LyX-Code
+ tdb_len_t key_len; /* byte length of key */
+\end_layout
+
+\begin_layout LyX-Code
+ tdb_len_t data_len; /* byte length of data */
+\end_layout
+
+\begin_layout LyX-Code
+ uint32_t full_hash; /* the full 32 bit hash of the key */
+\end_layout
+
+\begin_layout LyX-Code
+ uint32_t magic; /* try to catch errors */
+\end_layout
+
+\begin_layout LyX-Code
+ /* the following union is implied:
+\end_layout
+
+\begin_layout LyX-Code
+ union {
+\end_layout
+
+\begin_layout LyX-Code
+ char record[rec_len];
+\end_layout
+
+\begin_layout LyX-Code
+ struct {
+\end_layout
+
+\begin_layout LyX-Code
+ char key[key_len];
+\end_layout
+
+\begin_layout LyX-Code
+ char data[data_len];
+\end_layout
+
+\begin_layout LyX-Code
+ }
+\end_layout
+
+\begin_layout LyX-Code
+ uint32_t totalsize; (tailer)
+\end_layout
+
+\begin_layout LyX-Code
+ }
+\end_layout
+
+\begin_layout LyX-Code
+ */
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout Standard
+Naively, this would double to a 56-byte overhead on a 64 bit implementation.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+We can use various techniques to reduce this for an allocated block:
+\end_layout
+
+\begin_layout Enumerate
+The 'next' pointer is not required, as we are using a flat hash table.
+\end_layout
+
+\begin_layout Enumerate
+'rec_len' can instead be expressed as an addition to key_len and data_len
+ (it accounts for wasted or overallocated length in the record).
+ Since the record length is always a multiple of 8, we can conveniently
+ fit it in 32 bits (representing up to 35 bits).
+\end_layout
+
+\begin_layout Enumerate
+'key_len' and 'data_len' can be reduced.
+ I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine
+ the two into one 64-bit field and using a 5 bit value which indicates at
+ what bit to divide the two.
+ Keys are unlikely to scale as fast as data, so I'm assuming a maximum key
+ size of 32 bits.
+\end_layout
+
+\begin_layout Enumerate
+'full_hash' is used to avoid a memcmp on the
+\begin_inset Quotes eld
+\end_inset
+
+miss
+\begin_inset Quotes erd
+\end_inset
+
+ case, but this is diminishing returns after a handful of bits (at 10 bits,
+ it reduces 99.9% of false memcmp).
+ As an aside, as the lower bits are already incorporated in the hash table
+ resolution, the upper bits should be used here.
+ Note that it's not clear that these bits will be a win, given the extra
+ bits in the hash table itself (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:Hash-Size-Solution"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Enumerate
+'magic' does not need to be enlarged: it currently reflects one of 5 values
+ (used, free, dead, recovery, and unused_recovery).
+ It is useful for quick sanity checking however, and should not be eliminated.
+\end_layout
+
+\begin_layout Enumerate
+'tailer' is only used to coalesce free blocks (so a block to the right can
+ find the header to check if this block is free).
+ This can be replaced by a single 'free' bit in the header of the following
+ block (and the tailer only exists in free blocks).
+\begin_inset Foot
+status collapsed
+
+\begin_layout Plain Layout
+This technique from Thomas Standish.
+ Data Structure Techniques.
+ Addison-Wesley, Reading, Massachusetts, 1980.
+\end_layout
+
+\end_inset
+
+ The current proposed coalescing algorithm doesn't need this, however.
+\end_layout
+
+\begin_layout Standard
+This produces a 16 byte used header like this:
+\end_layout
+
+\begin_layout LyX-Code
+struct tdb_used_record {
+\end_layout
+
+\begin_layout LyX-Code
+ uint32_t used_magic : 16,
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ key_data_divide: 5,
+\end_layout
+
+\begin_layout LyX-Code
+ top_hash: 11;
+\end_layout
+
+\begin_layout LyX-Code
+ uint32_t extra_octets;
+\end_layout
+
+\begin_layout LyX-Code
+ uint64_t key_and_data_len;
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout Standard
+And a free record like this:
+\end_layout
+
+\begin_layout LyX-Code
+struct tdb_free_record {
+\end_layout
+
+\begin_layout LyX-Code
+ uint64_t free_magic: 8,
+\end_layout
+
+\begin_layout LyX-Code
+ prev : 56;
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ uint64_t free_table: 8,
+\end_layout
+
+\begin_layout LyX-Code
+ total_length : 56
+\end_layout
+
+\begin_layout LyX-Code
+ uint64_t next;;
+\end_layout
+
+\begin_layout LyX-Code
+};
+\end_layout
+
+\begin_layout Standard
+Note that by limiting valid offsets to 56 bits, we can pack everything we
+ need into 3 64-byte words, meaning our minimum record size is 8 bytes.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+Transaction Commit Requires 4 fdatasync
+\end_layout
+
+\begin_layout Standard
+The current transaction algorithm is:
+\end_layout
+
+\begin_layout Enumerate
+write_recovery_data();
+\end_layout
+
+\begin_layout Enumerate
+sync();
+\end_layout
+
+\begin_layout Enumerate
+write_recovery_header();
+\end_layout
+
+\begin_layout Enumerate
+sync();
+\end_layout
+
+\begin_layout Enumerate
+overwrite_with_new_data();
+\end_layout
+
+\begin_layout Enumerate
+sync();
+\end_layout
+
+\begin_layout Enumerate
+remove_recovery_header();
+\end_layout
+
+\begin_layout Enumerate
+sync();
+\end_layout
+
+\begin_layout Standard
+On current ext3, each sync flushes all data to disk, so the next 3 syncs
+ are relatively expensive.
+ But this could become a performance bottleneck on other filesystems such
+ as ext4.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Neil Brown points out that this is overzealous, and only one sync is needed:
+\end_layout
+
+\begin_layout Enumerate
+Bundle the recovery data, a transaction counter and a strong checksum of
+ the new data.
+\end_layout
+
+\begin_layout Enumerate
+Strong checksum that whole bundle.
+\end_layout
+
+\begin_layout Enumerate
+Store the bundle in the database.
+\end_layout
+
+\begin_layout Enumerate
+Overwrite the oldest of the two recovery pointers in the header (identified
+ using the transaction counter) with the offset of this bundle.
+\end_layout
+
+\begin_layout Enumerate
+sync.
+\end_layout
+
+\begin_layout Enumerate
+Write the new data to the file.
+\end_layout
+
+\begin_layout Standard
+Checking for recovery means identifying the latest bundle with a valid checksum
+ and using the new data checksum to ensure that it has been applied.
+ This is more expensive than the current check, but need only be done at
+ open.
+ For running databases, a separate header field can be used to indicate
+ a transaction in progress; we need only check for recovery if this is set.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Deferred.
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "sub:TDB-Does-Not"
+
+\end_inset
+
+TDB Does Not Have Snapshot Support
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+None.
+ At some point you say
+\begin_inset Quotes eld
+\end_inset
+
+use a real database
+\begin_inset Quotes erd
+\end_inset
+
+ (but see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "replay-attribute"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Standard
+But as a thought experiment, if we implemented transactions to only overwrite
+ free entries (this is tricky: there must not be a header in each entry
+ which indicates whether it is free, but use of presence in metadata elsewhere),
+ and a pointer to the hash table, we could create an entirely new commit
+ without destroying existing data.
+ Then it would be easy to implement snapshots in a similar way.
+\end_layout
+
+\begin_layout Standard
+This would not allow arbitrary changes to the database, such as tdb_repack
+ does, and would require more space (since we have to preserve the current
+ and future entries at once).
+ If we used hash trees rather than one big hash table, we might only have
+ to rewrite some sections of the hash, too.
+\end_layout
+
+\begin_layout Standard
+We could then implement snapshots using a similar method, using multiple
+ different hash tables/free tables.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Deferred.
+\end_layout
+
+\begin_layout Subsection
+Transactions Cannot Operate in Parallel
+\end_layout
+
+\begin_layout Standard
+This would be useless for ldb, as it hits the index records with just about
+ every update.
+ It would add significant complexity in resolving clashes, and cause the
+ all transaction callers to write their code to loop in the case where the
+ transactions spuriously failed.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+None (but see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "replay-attribute"
+
+\end_inset
+
+).
+ We could solve a small part of the problem by providing read-only transactions.
+ These would allow one write transaction to begin, but it could not commit
+ until all r/o transactions are done.
+ This would require a new RO_TRANSACTION_LOCK, which would be upgraded on
+ commit.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Deferred.
+\end_layout
+
+\begin_layout Subsection
+Default Hash Function Is Suboptimal
+\end_layout
+
+\begin_layout Standard
+The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially
+ if we expand it to 64 bits), and works best when the hash bucket size is
+ a prime number (which also means a slow modulus).
+ In addition, it is highly predictable which could potentially lead to a
+ Denial of Service attack in some TDB uses.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+The Jenkins lookup3 hash
+\begin_inset Foot
+status open
+
+\begin_layout Plain Layout
+http://burtleburtle.net/bob/c/lookup3.c
+\end_layout
+
+\end_inset
+
+ is a fast and superbly-mixing hash.
+ It's used by the Linux kernel and almost everything else.
+ This has the particular properties that it takes an initial seed, and produces
+ two 32 bit hash numbers, which we can combine into a 64-bit hash.
+\end_layout
+
+\begin_layout Standard
+The seed should be created at tdb-creation time from some random source,
+ and placed in the header.
+ This is far from foolproof, but adds a little bit of protection against
+ hash bombing.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+\begin_inset CommandInset label
+LatexCommand label
+name "Reliable-Traversal-Adds"
+
+\end_inset
+
+Reliable Traversal Adds Complexity
+\end_layout
+
+\begin_layout Standard
+We lock a record during traversal iteration, and try to grab that lock in
+ the delete code.
+ If that grab on delete fails, we simply mark it deleted and continue onwards;
+ traversal checks for this condition and does the delete when it moves off
+ the record.
+\end_layout
+
+\begin_layout Standard
+If traversal terminates, the dead record may be left indefinitely.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+Remove reliability guarantees; see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "traverse-Proposed-Solution"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Complete.
+\end_layout
+
+\begin_layout Subsection
+Fcntl Locking Adds Overhead
+\end_layout
+
+\begin_layout Standard
+Placing a fcntl lock means a system call, as does removing one.
+ This is actually one reason why transactions can be faster (everything
+ is locked once at transaction start).
+ In the uncontended case, this overhead can theoretically be eliminated.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+None.
+\end_layout
+
+\begin_layout Standard
+We tried this before with spinlock support, in the early days of TDB, and
+ it didn't make much difference except in manufactured benchmarks.
+\end_layout
+
+\begin_layout Standard
+We could use spinlocks (with futex kernel support under Linux), but it means
+ that we lose automatic cleanup when a process dies with a lock.
+ There is a method of auto-cleanup under Linux, but it's not supported by
+ other operating systems.
+ We could reintroduce a clear-if-first-style lock and sweep for dead futexes
+ on open, but that wouldn't help the normal case of one concurrent opener
+ dying.
+ Increasingly elaborate repair schemes could be considered, but they require
+ an ABI change (everyone must use them) anyway, so there's no need to do
+ this at the same time as everything else.
+\end_layout
+
+\begin_layout Subsection
+Some Transactions Don't Require Durability
+\end_layout
+
+\begin_layout Standard
+Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast)
+ usage, and occasionally empties the results into a transactional TDB.
+ This kind of usage prioritizes performance over durability: as long as
+ we are consistent, data can be lost.
+\end_layout
+
+\begin_layout Standard
+This would be more neatly implemented inside tdb: a
+\begin_inset Quotes eld
+\end_inset
+
+soft
+\begin_inset Quotes erd
+\end_inset
+
+ transaction commit (ie.
+ syncless) which meant that data may be reverted on a crash.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\end_layout
+
+\begin_layout Standard
+None.
+\end_layout
+
+\begin_layout Standard
+Unfortunately any transaction scheme which overwrites old data requires
+ a sync before that overwrite to avoid the possibility of corruption.
+\end_layout
+
+\begin_layout Standard
+It seems possible to use a scheme similar to that described in
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:TDB-Does-Not"
+
+\end_inset
+
+,where transactions are committed without overwriting existing data, and
+ an array of top-level pointers were available in the header.
+ If the transaction is
+\begin_inset Quotes eld
+\end_inset
+
+soft
+\begin_inset Quotes erd
+\end_inset
+
+ then we would not need a sync at all: existing processes would pick up
+ the new hash table and free list and work with that.
+\end_layout
+
+\begin_layout Standard
+At some later point, a sync would allow recovery of the old data into the
+ free lists (perhaps when the array of top-level pointers filled).
+ On crash, tdb_open() would examine the array of top levels, and apply the
+ transactions until it encountered an invalid checksum.
+\end_layout
+
+\begin_layout Subsection
+Tracing Is Fragile, Replay Is External
+\end_layout
+
+\begin_layout Standard
+The current TDB has compile-time-enabled tracing code, but it often breaks
+ as it is not enabled by default.
+ In a similar way, the ctdb code has an external wrapper which does replay
+ tracing so it can coordinate cluster-wide transactions.
+\end_layout
+
+\begin_layout Subsubsection
+Proposed Solution
+\begin_inset CommandInset label
+LatexCommand label
+name "replay-attribute"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Tridge points out that an attribute can be later added to tdb_open (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "attributes"
+
+\end_inset
+
+) to provide replay/trace hooks, which could become the basis for this and
+ future parallel transactions and snapshot support.
+\end_layout
+
+\begin_layout Subsubsection
+Status
+\end_layout
+
+\begin_layout Standard
+Deferred.
+\end_layout
+
+\end_body
+\end_document
--- /dev/null
+NTDB: Redesigning The Trivial DataBase
+
+Rusty Russell, IBM Corporation
+
+19 June 2012
+
+Abstract
+
+The Trivial DataBase on-disk format is 32 bits; with usage cases
+heading towards the 4G limit, that must change. This required
+breakage provides an opportunity to revisit TDB's other design
+decisions and reassess them.
+
+1 Introduction
+
+The Trivial DataBase was originally written by Andrew Tridgell as
+a simple key/data pair storage system with the same API as dbm,
+but allowing multiple readers and writers while being small
+enough (< 1000 lines of C) to include in SAMBA. The simple design
+created in 1999 has proven surprisingly robust and performant,
+used in Samba versions 3 and 4 as well as numerous other
+projects. Its useful life was greatly increased by the
+(backwards-compatible!) addition of transaction support in 2005.
+
+The wider variety and greater demands of TDB-using code has lead
+to some organic growth of the API, as well as some compromises on
+the implementation. None of these, by themselves, are seen as
+show-stoppers, but the cumulative effect is to a loss of elegance
+over the initial, simple TDB implementation. Here is a table of
+the approximate number of lines of implementation code and number
+of API functions at the end of each year:
+
+
++-----------+----------------+--------------------------------+
+| Year End | API Functions | Lines of C Code Implementation |
++-----------+----------------+--------------------------------+
++-----------+----------------+--------------------------------+
+| 1999 | 13 | 1195 |
++-----------+----------------+--------------------------------+
+| 2000 | 24 | 1725 |
++-----------+----------------+--------------------------------+
+| 2001 | 32 | 2228 |
++-----------+----------------+--------------------------------+
+| 2002 | 35 | 2481 |
++-----------+----------------+--------------------------------+
+| 2003 | 35 | 2552 |
++-----------+----------------+--------------------------------+
+| 2004 | 40 | 2584 |
++-----------+----------------+--------------------------------+
+| 2005 | 38 | 2647 |
++-----------+----------------+--------------------------------+
+| 2006 | 52 | 3754 |
++-----------+----------------+--------------------------------+
+| 2007 | 66 | 4398 |
++-----------+----------------+--------------------------------+
+| 2008 | 71 | 4768 |
++-----------+----------------+--------------------------------+
+| 2009 | 73 | 5715 |
++-----------+----------------+--------------------------------+
+
+
+This review is an attempt to catalog and address all the known
+issues with TDB and create solutions which address the problems
+without significantly increasing complexity; all involved are far
+too aware of the dangers of second system syndrome in rewriting a
+successful project like this.
+
+Note: the final decision was to make ntdb a separate library,
+with a separarate 'ntdb' namespace so both can potentially be
+linked together. This document still refers to “tdb” everywhere,
+for simplicity.
+
+2 API Issues
+
+2.1 tdb_open_ex Is Not Expandable
+
+The tdb_open() call was expanded to tdb_open_ex(), which added an
+optional hashing function and an optional logging function
+argument. Additional arguments to open would require the
+introduction of a tdb_open_ex2 call etc.
+
+2.1.1 Proposed Solution<attributes>
+
+tdb_open() will take a linked-list of attributes:
+
+enum tdb_attribute {
+
+ TDB_ATTRIBUTE_LOG = 0,
+
+ TDB_ATTRIBUTE_HASH = 1
+
+};
+
+struct tdb_attribute_base {
+
+ enum tdb_attribute attr;
+
+ union tdb_attribute *next;
+
+};
+
+struct tdb_attribute_log {
+
+ struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG
+*/
+
+ tdb_log_func log_fn;
+
+ void *log_private;
+
+};
+
+struct tdb_attribute_hash {
+
+ struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH
+*/
+
+ tdb_hash_func hash_fn;
+
+ void *hash_private;
+
+};
+
+union tdb_attribute {
+
+ struct tdb_attribute_base base;
+
+ struct tdb_attribute_log log;
+
+ struct tdb_attribute_hash hash;
+
+};
+
+This allows future attributes to be added, even if this expands
+the size of the union.
+
+2.1.2 Status
+
+Complete.
+
+2.2 tdb_traverse Makes Impossible Guarantees
+
+tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions,
+and it was thought that it was important to guarantee that all
+records which exist at the start and end of the traversal would
+be included, and no record would be included twice.
+
+This adds complexity (see[Reliable-Traversal-Adds]) and does not
+work anyway for records which are altered (in particular, those
+which are expanded may be effectively deleted and re-added behind
+the traversal).
+
+2.2.1 <traverse-Proposed-Solution>Proposed Solution
+
+Abandon the guarantee. You will see every record if no changes
+occur during your traversal, otherwise you will see some subset.
+You can prevent changes by using a transaction or the locking
+API.
+
+2.2.2 Status
+
+Complete. Delete-during-traverse will still delete every record,
+too (assuming no other changes).
+
+2.3 Nesting of Transactions Is Fraught
+
+TDB has alternated between allowing nested transactions and not
+allowing them. Various paths in the Samba codebase assume that
+transactions will nest, and in a sense they can: the operation is
+only committed to disk when the outer transaction is committed.
+There are two problems, however:
+
+1. Canceling the inner transaction will cause the outer
+ transaction commit to fail, and will not undo any operations
+ since the inner transaction began. This problem is soluble with
+ some additional internal code.
+
+2. An inner transaction commit can be cancelled by the outer
+ transaction. This is desirable in the way which Samba's
+ database initialization code uses transactions, but could be a
+ surprise to any users expecting a successful transaction commit
+ to expose changes to others.
+
+The current solution is to specify the behavior at tdb_open(),
+with the default currently that nested transactions are allowed.
+This flag can also be changed at runtime.
+
+2.3.1 Proposed Solution
+
+Given the usage patterns, it seems that the“least-surprise”
+behavior of disallowing nested transactions should become the
+default. Additionally, it seems the outer transaction is the only
+code which knows whether inner transactions should be allowed, so
+a flag to indicate this could be added to tdb_transaction_start.
+However, this behavior can be simulated with a wrapper which uses
+tdb_add_flags() and tdb_remove_flags(), so the API should not be
+expanded for this relatively-obscure case.
+
+2.3.2 Status
+
+Complete; the nesting flag has been removed.
+
+2.4 Incorrect Hash Function is Not Detected
+
+tdb_open_ex() allows the calling code to specify a different hash
+function to use, but does not check that all other processes
+accessing this tdb are using the same hash function. The result
+is that records are missing from tdb_fetch().
+
+2.4.1 Proposed Solution
+
+The header should contain an example hash result (eg. the hash of
+0xdeadbeef), and tdb_open_ex() should check that the given hash
+function produces the same answer, or fail the tdb_open call.
+
+2.4.2 Status
+
+Complete.
+
+2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation
+
+In response to scalability issues with the free list ([TDB-Freelist-Is]
+) two API workarounds have been incorporated in TDB:
+tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The
+latter actually calls the former with an argument of“5”.
+
+This code allows deleted records to accumulate without putting
+them in the free list. On delete we iterate through each chain
+and free them in a batch if there are more than max_dead entries.
+These are never otherwise recycled except as a side-effect of a
+tdb_repack.
+
+2.5.1 Proposed Solution
+
+With the scalability problems of the freelist solved, this API
+can be removed. The TDB_VOLATILE flag may still be useful as a
+hint that store and delete of records will be at least as common
+as fetch in order to allow some internal tuning, but initially
+will become a no-op.
+
+2.5.2 Status
+
+Complete. Unknown flags cause tdb_open() to fail as well, so they
+can be detected at runtime.
+
+2.6 <TDB-Files-Cannot>TDB Files Cannot Be Opened Multiple Times
+ In The Same Process
+
+No process can open the same TDB twice; we check and disallow it.
+This is an unfortunate side-effect of fcntl locks, which operate
+on a per-file rather than per-file-descriptor basis, and do not
+nest. Thus, closing any file descriptor on a file clears all the
+locks obtained by this process, even if they were placed using a
+different file descriptor!
+
+Note that even if this were solved, deadlock could occur if
+operations were nested: this is a more manageable programming
+error in most cases.
+
+2.6.1 Proposed Solution
+
+We could lobby POSIX to fix the perverse rules, or at least lobby
+Linux to violate them so that the most common implementation does
+not have this restriction. This would be a generally good idea
+for other fcntl lock users.
+
+Samba uses a wrapper which hands out the same tdb_context to
+multiple callers if this happens, and does simple reference
+counting. We should do this inside the tdb library, which already
+emulates lock nesting internally; it would need to recognize when
+deadlock occurs within a single process. This would create a new
+failure mode for tdb operations (while we currently handle
+locking failures, they are impossible in normal use and a process
+encountering them can do little but give up).
+
+I do not see benefit in an additional tdb_open flag to indicate
+whether re-opening is allowed, as though there may be some
+benefit to adding a call to detect when a tdb_context is shared,
+to allow other to create such an API.
+
+2.6.2 Status
+
+Complete.
+
+2.7 TDB API Is Not POSIX Thread-safe
+
+The TDB API uses an error code which can be queried after an
+operation to determine what went wrong. This programming model
+does not work with threads, unless specific additional guarantees
+are given by the implementation. In addition, even
+otherwise-independent threads cannot open the same TDB (as in[TDB-Files-Cannot]
+).
+
+2.7.1 Proposed Solution
+
+Reachitecting the API to include a tdb_errcode pointer would be a
+great deal of churn, but fortunately most functions return 0 on
+success and -1 on error: we can change these to return 0 on
+success and a negative error code on error, and the API remains
+similar to previous. The tdb_fetch, tdb_firstkey and tdb_nextkey
+functions need to take a TDB_DATA pointer and return an error
+code. It is also simpler to have tdb_nextkey replace its key
+argument in place, freeing up any old .dptr.
+
+Internal locking is required to make sure that fcntl locks do not
+overlap between threads, and also that the global list of tdbs is
+maintained.
+
+The aim is that building tdb with -DTDB_PTHREAD will result in a
+pthread-safe version of the library, and otherwise no overhead
+will exist. Alternatively, a hooking mechanism similar to that
+proposed for[Proposed-Solution-locking-hook] could be used to
+enable pthread locking at runtime.
+
+2.7.2 Status
+
+Incomplete; API has been changed but thread safety has not been
+implemented.
+
+2.8 *_nonblock Functions And *_mark Functions Expose
+ Implementation
+
+CTDB[footnote:
+Clustered TDB, see http://ctdb.samba.org
+] wishes to operate on TDB in a non-blocking manner. This is
+currently done as follows:
+
+1. Call the _nonblock variant of an API function (eg.
+ tdb_lockall_nonblock). If this fails:
+
+2. Fork a child process, and wait for it to call the normal
+ variant (eg. tdb_lockall).
+
+3. If the child succeeds, call the _mark variant to indicate we
+ already have the locks (eg. tdb_lockall_mark).
+
+4. Upon completion, tell the child to release the locks (eg.
+ tdb_unlockall).
+
+5. Indicate to tdb that it should consider the locks removed (eg.
+ tdb_unlockall_mark).
+
+There are several issues with this approach. Firstly, adding two
+new variants of each function clutters the API for an obscure
+use, and so not all functions have three variants. Secondly, it
+assumes that all paths of the functions ask for the same locks,
+otherwise the parent process will have to get a lock which the
+child doesn't have under some circumstances. I don't believe this
+is currently the case, but it constrains the implementation.
+
+2.8.1 <Proposed-Solution-locking-hook>Proposed Solution
+
+Implement a hook for locking methods, so that the caller can
+control the calls to create and remove fcntl locks. In this
+scenario, ctdbd would operate as follows:
+
+1. Call the normal API function, eg tdb_lockall().
+
+2. When the lock callback comes in, check if the child has the
+ lock. Initially, this is always false. If so, return 0.
+ Otherwise, try to obtain it in non-blocking mode. If that
+ fails, return EWOULDBLOCK.
+
+3. Release locks in the unlock callback as normal.
+
+4. If tdb_lockall() fails, see if we recorded a lock failure; if
+ so, call the child to repeat the operation.
+
+5. The child records what locks it obtains, and returns that
+ information to the parent.
+
+6. When the child has succeeded, goto 1.
+
+This is flexible enough to handle any potential locking scenario,
+even when lock requirements change. It can be optimized so that
+the parent does not release locks, just tells the child which
+locks it doesn't need to obtain.
+
+It also keeps the complexity out of the API, and in ctdbd where
+it is needed.
+
+2.8.2 Status
+
+Complete.
+
+2.9 tdb_chainlock Functions Expose Implementation
+
+tdb_chainlock locks some number of records, including the record
+indicated by the given key. This gave atomicity guarantees;
+no-one can start a transaction, alter, read or delete that key
+while the lock is held.
+
+It also makes the same guarantee for any other key in the chain,
+which is an internal implementation detail and potentially a
+cause for deadlock.
+
+2.9.1 Proposed Solution
+
+None. It would be nice to have an explicit single entry lock
+which effected no other keys. Unfortunately, this won't work for
+an entry which doesn't exist. Thus while chainlock may be
+implemented more efficiently for the existing case, it will still
+have overlap issues with the non-existing case. So it is best to
+keep the current (lack of) guarantee about which records will be
+effected to avoid constraining our implementation.
+
+2.10 Signal Handling is Not Race-Free
+
+The tdb_setalarm_sigptr() call allows the caller's signal handler
+to indicate that the tdb locking code should return with a
+failure, rather than trying again when a signal is received (and
+errno == EAGAIN). This is usually used to implement timeouts.
+
+Unfortunately, this does not work in the case where the signal is
+received before the tdb code enters the fcntl() call to place the
+lock: the code will sleep within the fcntl() code, unaware that
+the signal wants it to exit. In the case of long timeouts, this
+does not happen in practice.
+
+2.10.1 Proposed Solution
+
+The locking hooks proposed in[Proposed-Solution-locking-hook]
+would allow the user to decide on whether to fail the lock
+acquisition on a signal. This allows the caller to choose their
+own compromise: they could narrow the race by checking
+immediately before the fcntl call.[footnote:
+It may be possible to make this race-free in some implementations
+by having the signal handler alter the struct flock to make it
+invalid. This will cause the fcntl() lock call to fail with
+EINVAL if the signal occurs before the kernel is entered,
+otherwise EAGAIN.
+]
+
+2.10.2 Status
+
+Complete.
+
+2.11 The API Uses Gratuitous Typedefs, Capitals
+
+typedefs are useful for providing source compatibility when types
+can differ across implementations, or arguably in the case of
+function pointer definitions which are hard for humans to parse.
+Otherwise it is simply obfuscation and pollutes the namespace.
+
+Capitalization is usually reserved for compile-time constants and
+macros.
+
+ TDB_CONTEXT There is no reason to use this over 'struct
+ tdb_context'; the definition isn't visible to the API user
+ anyway.
+
+ TDB_DATA There is no reason to use this over struct TDB_DATA;
+ the struct needs to be understood by the API user.
+
+ struct TDB_DATA This would normally be called 'struct
+ tdb_data'.
+
+ enum TDB_ERROR Similarly, this would normally be enum
+ tdb_error.
+
+2.11.1 Proposed Solution
+
+None. Introducing lower case variants would please pedants like
+myself, but if it were done the existing ones should be kept.
+There is little point forcing a purely cosmetic change upon tdb
+users.
+
+2.12 <tdb_log_func-Doesnt-Take>tdb_log_func Doesn't Take The
+ Private Pointer
+
+For API compatibility reasons, the logging function needs to call
+tdb_get_logging_private() to retrieve the pointer registered by
+the tdb_open_ex for logging.
+
+2.12.1 Proposed Solution
+
+It should simply take an extra argument, since we are prepared to
+break the API/ABI.
+
+2.12.2 Status
+
+Complete.
+
+2.13 Various Callback Functions Are Not Typesafe
+
+The callback functions in tdb_set_logging_function (after[tdb_log_func-Doesnt-Take]
+ is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read
+and tdb_check all take void * and must internally convert it to
+the argument type they were expecting.
+
+If this type changes, the compiler will not produce warnings on
+the callers, since it only sees void *.
+
+2.13.1 Proposed Solution
+
+With careful use of macros, we can create callback functions
+which give a warning when used on gcc and the types of the
+callback and its private argument differ. Unsupported compilers
+will not give a warning, which is no worse than now. In addition,
+the callbacks become clearer, as they need not use void * for
+their parameter.
+
+See CCAN's typesafe_cb module at
+http://ccan.ozlabs.org/info/typesafe_cb.html
+
+2.13.2 Status
+
+Complete.
+
+2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens,
+ tdb_reopen_all Problematic
+
+The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB
+file should be cleared if the caller discovers it is the only
+process with the TDB open. However, if any caller does not
+specify TDB_CLEAR_IF_FIRST it will not be detected, so will have
+the TDB erased underneath them (usually resulting in a crash).
+
+There is a similar issue on fork(); if the parent exits (or
+otherwise closes the tdb) before the child calls tdb_reopen_all()
+to establish the lock used to indicate the TDB is opened by
+someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe
+it alone has opened the TDB and will erase it.
+
+2.14.1 Proposed Solution
+
+Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but
+see[TDB_CLEAR_IF_FIRST-Imposes-Performance].
+
+2.14.2 Status
+
+Complete. An open hook is provided to replicate this
+functionality if required.
+
+2.15 Extending The Header Is Difficult
+
+We have reserved (zeroed) words in the TDB header, which can be
+used for future features. If the future features are compulsory,
+the version number must be updated to prevent old code from
+accessing the database. But if the future feature is optional, we
+have no way of telling if older code is accessing the database or
+not.
+
+2.15.1 Proposed Solution
+
+The header should contain a“format variant” value (64-bit). This
+is divided into two 32-bit parts:
+
+1. The lower part reflects the format variant understood by code
+ accessing the database.
+
+2. The upper part reflects the format variant you must understand
+ to write to the database (otherwise you can only open for
+ reading).
+
+The latter field can only be written at creation time, the former
+should be written under the OPEN_LOCK when opening the database
+for writing, if the variant of the code is lower than the current
+lowest variant.
+
+This should allow backwards-compatible features to be added, and
+detection if older code (which doesn't understand the feature)
+writes to the database.
+
+2.15.2 Status
+
+Complete.
+
+2.16 Record Headers Are Not Expandible
+
+If we later want to add (say) checksums on keys and data, it
+would require another format change, which we'd like to avoid.
+
+2.16.1 Proposed Solution
+
+We often have extra padding at the tail of a record. If we ensure
+that the first byte (if any) of this padding is zero, we will
+have a way for future changes to detect code which doesn't
+understand a new format: the new code would write (say) a 1 at
+the tail, and thus if there is no tail or the first byte is 0, we
+would know the extension is not present on that record.
+
+2.16.2 Status
+
+Complete.
+
+2.17 TDB Does Not Use Talloc
+
+Many users of TDB (particularly Samba) use the talloc allocator,
+and thus have to wrap TDB in a talloc context to use it
+conveniently.
+
+2.17.1 Proposed Solution
+
+The allocation within TDB is not complicated enough to justify
+the use of talloc, and I am reluctant to force another
+(excellent) library on TDB users. Nonetheless a compromise is
+possible. An attribute (see[attributes]) can be added later to
+tdb_open() to provide an alternate allocation mechanism,
+specifically for talloc but usable by any other allocator (which
+would ignore the“context” argument).
+
+This would form a talloc heirarchy as expected, but the caller
+would still have to attach a destructor to the tdb context
+returned from tdb_open to close it. All TDB_DATA fields would be
+children of the tdb_context, and the caller would still have to
+manage them (using talloc_free() or talloc_steal()).
+
+2.17.2 Status
+
+Complete, using the NTDB_ATTRIBUTE_ALLOCATOR attribute.
+
+3 Performance And Scalability Issues
+
+3.1 <TDB_CLEAR_IF_FIRST-Imposes-Performance>TDB_CLEAR_IF_FIRST
+ Imposes Performance Penalty
+
+When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is
+placed at offset 4 (aka. the ACTIVE_LOCK). While these locks
+never conflict in normal tdb usage, they do add substantial
+overhead for most fcntl lock implementations when the kernel
+scans to detect if a lock conflict exists. This is often a single
+linked list, making the time to acquire and release a fcntl lock
+O(N) where N is the number of processes with the TDB open, not
+the number actually doing work.
+
+In a Samba server it is common to have huge numbers of clients
+sitting idle, and thus they have weaned themselves off the
+TDB_CLEAR_IF_FIRST flag.[footnote:
+There is a flag to tdb_reopen_all() which is used for this
+optimization: if the parent process will outlive the child, the
+child does not need the ACTIVE_LOCK. This is a workaround for
+this very performance issue.
+]
+
+3.1.1 Proposed Solution
+
+Remove the flag. It was a neat idea, but even trivial servers
+tend to know when they are initializing for the first time and
+can simply unlink the old tdb at that point.
+
+3.1.2 Status
+
+Complete.
+
+3.2 TDB Files Have a 4G Limit
+
+This seems to be becoming an issue (so much for“trivial”!),
+particularly for ldb.
+
+3.2.1 Proposed Solution
+
+A new, incompatible TDB format which uses 64 bit offsets
+internally rather than 32 bit as now. For simplicity of endian
+conversion (which TDB does on the fly if required), all values
+will be 64 bit on disk. In practice, some upper bits may be used
+for other purposes, but at least 56 bits will be available for
+file offsets.
+
+tdb_open() will automatically detect the old version, and even
+create them if TDB_VERSION6 is specified to tdb_open.
+
+32 bit processes will still be able to access TDBs larger than 4G
+(assuming that their off_t allows them to seek to 64 bits), they
+will gracefully fall back as they fail to mmap. This can happen
+already with large TDBs.
+
+Old versions of tdb will fail to open the new TDB files (since 28
+August 2009, commit 398d0c29290: prior to that any unrecognized
+file format would be erased and initialized as a fresh tdb!)
+
+3.2.2 Status
+
+Complete.
+
+3.3 TDB Records Have a 4G Limit
+
+This has not been a reported problem, and the API uses size_t
+which can be 64 bit on 64 bit platforms. However, other limits
+may have made such an issue moot.
+
+3.3.1 Proposed Solution
+
+Record sizes will be 64 bit, with an error returned on 32 bit
+platforms which try to access such records (the current
+implementation would return TDB_ERR_OOM in a similar case). It
+seems unlikely that 32 bit keys will be a limitation, so the
+implementation may not support this (see[sub:Records-Incur-A]).
+
+3.3.2 Status
+
+Complete.
+
+3.4 Hash Size Is Determined At TDB Creation Time
+
+TDB contains a number of hash chains in the header; the number is
+specified at creation time, and defaults to 131. This is such a
+bottleneck on large databases (as each hash chain gets quite
+long), that LDB uses 10,000 for this hash. In general it is
+impossible to know what the 'right' answer is at database
+creation time.
+
+3.4.1 <sub:Hash-Size-Solution>Proposed Solution
+
+After comprehensive performance testing on various scalable hash
+variants[footnote:
+http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94
+This was annoying because I was previously convinced that an
+expanding tree of hashes would be very close to optimal.
+], it became clear that it is hard to beat a straight linear hash
+table which doubles in size when it reaches saturation.
+Unfortunately, altering the hash table introduces serious locking
+complications: the entire hash table needs to be locked to
+enlarge the hash table, and others might be holding locks.
+Particularly insidious are insertions done under tdb_chainlock.
+
+Thus an expanding layered hash will be used: an array of hash
+groups, with each hash group exploding into pointers to lower
+hash groups once it fills, turning into a hash tree. This has
+implications for locking: we must lock the entire group in case
+we need to expand it, yet we don't know how deep the tree is at
+that point.
+
+Note that bits from the hash table entries should be stolen to
+hold more hash bits to reduce the penalty of collisions. We can
+use the otherwise-unused lower 3 bits. If we limit the size of
+the database to 64 exabytes, we can use the top 8 bits of the
+hash entry as well. These 11 bits would reduce false positives
+down to 1 in 2000 which is more than we need: we can use one of
+the bits to indicate that the extra hash bits are valid. This
+means we can choose not to re-hash all entries when we expand a
+hash group; simply use the next bits we need and mark them
+invalid.
+
+3.4.2 Status
+
+Ignore. Scaling the hash automatically proved inefficient at
+small hash sizes; we default to a 8192-element hash (changable
+via NTDB_ATTRIBUTE_HASHSIZE), and when buckets clash we expand to
+an array of hash entries. This scales slightly better than the
+tdb chain (due to the 8 top bits containing extra hash).
+
+3.5 <TDB-Freelist-Is>TDB Freelist Is Highly Contended
+
+TDB uses a single linked list for the free list. Allocation
+occurs as follows, using heuristics which have evolved over time:
+
+1. Get the free list lock for this whole operation.
+
+2. Multiply length by 1.25, so we always over-allocate by 25%.
+
+3. Set the slack multiplier to 1.
+
+4. Examine the current freelist entry: if it is > length but <
+ the current best case, remember it as the best case.
+
+5. Multiply the slack multiplier by 1.05.
+
+6. If our best fit so far is less than length * slack multiplier,
+ return it. The slack will be turned into a new free record if
+ it's large enough.
+
+7. Otherwise, go onto the next freelist entry.
+
+Deleting a record occurs as follows:
+
+1. Lock the hash chain for this whole operation.
+
+2. Walk the chain to find the record, keeping the prev pointer
+ offset.
+
+3. If max_dead is non-zero:
+
+ (a) Walk the hash chain again and count the dead records.
+
+ (b) If it's more than max_dead, bulk free all the dead ones
+ (similar to steps 4 and below, but the lock is only obtained
+ once).
+
+ (c) Simply mark this record as dead and return.
+
+4. Get the free list lock for the remainder of this operation.
+
+5. <right-merging>Examine the following block to see if it is
+ free; if so, enlarge the current block and remove that block
+ from the free list. This was disabled, as removal from the free
+ list was O(entries-in-free-list).
+
+6. Examine the preceeding block to see if it is free: for this
+ reason, each block has a 32-bit tailer which indicates its
+ length. If it is free, expand it to cover our new block and
+ return.
+
+7. Otherwise, prepend ourselves to the free list.
+
+Disabling right-merging (step[right-merging]) causes
+fragmentation; the other heuristics proved insufficient to
+address this, so the final answer to this was that when we expand
+the TDB file inside a transaction commit, we repack the entire
+tdb.
+
+The single list lock limits our allocation rate; due to the other
+issues this is not currently seen as a bottleneck.
+
+3.5.1 Proposed Solution
+
+The first step is to remove all the current heuristics, as they
+obviously interact, then examine them once the lock contention is
+addressed.
+
+The free list must be split to reduce contention. Assuming
+perfect free merging, we can at most have 1 free list entry for
+each entry. This implies that the number of free lists is related
+to the size of the hash table, but as it is rare to walk a large
+number of free list entries we can use far fewer, say 1/32 of the
+number of hash buckets.
+
+It seems tempting to try to reuse the hash implementation which
+we use for records here, but we have two ways of searching for
+free entries: for allocation we search by size (and possibly
+zone) which produces too many clashes for our hash table to
+handle well, and for coalescing we search by address. Thus an
+array of doubly-linked free lists seems preferable.
+
+There are various benefits in using per-size free lists (see[sub:TDB-Becomes-Fragmented]
+) but it's not clear this would reduce contention in the common
+case where all processes are allocating/freeing the same size.
+Thus we almost certainly need to divide in other ways: the most
+obvious is to divide the file into zones, and using a free list
+(or table of free lists) for each. This approximates address
+ordering.
+
+Unfortunately it is difficult to know what heuristics should be
+used to determine zone sizes, and our transaction code relies on
+being able to create a“recovery area” by simply appending to the
+file (difficult if it would need to create a new zone header).
+Thus we use a linked-list of free tables; currently we only ever
+create one, but if there is more than one we choose one at random
+to use. In future we may use heuristics to add new free tables on
+contention. We only expand the file when all free tables are
+exhausted.
+
+The basic algorithm is as follows. Freeing is simple:
+
+1. Identify the correct free list.
+
+2. Lock the corresponding list.
+
+3. Re-check the list (we didn't have a lock, sizes could have
+ changed): relock if necessary.
+
+4. Place the freed entry in the list.
+
+Allocation is a little more complicated, as we perform delayed
+coalescing at this point:
+
+1. Pick a free table; usually the previous one.
+
+2. Lock the corresponding list.
+
+3. If the top entry is -large enough, remove it from the list and
+ return it.
+
+4. Otherwise, coalesce entries in the list.If there was no entry
+ large enough, unlock the list and try the next largest list
+
+5. If no list has an entry which meets our needs, try the next
+ free table.
+
+6. If no zone satisfies, expand the file.
+
+This optimizes rapid insert/delete of free list entries by not
+coalescing them all the time.. First-fit address ordering
+ordering seems to be fairly good for keeping fragmentation low
+(see[sub:TDB-Becomes-Fragmented]). Note that address ordering
+does not need a tailer to coalesce, though if we needed one we
+could have one cheaply: see[sub:Records-Incur-A].
+
+Each free entry has the free table number in the header: less
+than 255. It also contains a doubly-linked list for easy
+deletion.
+
+3.6 <sub:TDB-Becomes-Fragmented>TDB Becomes Fragmented
+
+Much of this is a result of allocation strategy[footnote:
+The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995
+ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps
+] and deliberate hobbling of coalescing; internal fragmentation
+(aka overallocation) is deliberately set at 25%, and external
+fragmentation is only cured by the decision to repack the entire
+db when a transaction commit needs to enlarge the file.
+
+3.6.1 Proposed Solution
+
+The 25% overhead on allocation works in practice for ldb because
+indexes tend to expand by one record at a time. This internal
+fragmentation can be resolved by having an“expanded” bit in the
+header to note entries that have previously expanded, and
+allocating more space for them.
+
+There are is a spectrum of possible solutions for external
+fragmentation: one is to use a fragmentation-avoiding allocation
+strategy such as best-fit address-order allocator. The other end
+of the spectrum would be to use a bump allocator (very fast and
+simple) and simply repack the file when we reach the end.
+
+There are three problems with efficient fragmentation-avoiding
+allocators: they are non-trivial, they tend to use a single free
+list for each size, and there's no evidence that tdb allocation
+patterns will match those recorded for general allocators (though
+it seems likely).
+
+Thus we don't spend too much effort on external fragmentation; we
+will be no worse than the current code if we need to repack on
+occasion. More effort is spent on reducing freelist contention,
+and reducing overhead.
+
+3.7 <sub:Records-Incur-A>Records Incur A 28-Byte Overhead
+
+Each TDB record has a header as follows:
+
+struct tdb_record {
+
+ tdb_off_t next; /* offset of the next record in the list
+*/
+
+ tdb_len_t rec_len; /* total byte length of record */
+
+ tdb_len_t key_len; /* byte length of key */
+
+ tdb_len_t data_len; /* byte length of data */
+
+ uint32_t full_hash; /* the full 32 bit hash of the key */
+
+ uint32_t magic; /* try to catch errors */
+
+ /* the following union is implied:
+
+ union {
+
+ char record[rec_len];
+
+ struct {
+
+ char key[key_len];
+
+ char data[data_len];
+
+ }
+
+ uint32_t totalsize; (tailer)
+
+ }
+
+ */
+
+};
+
+Naively, this would double to a 56-byte overhead on a 64 bit
+implementation.
+
+3.7.1 Proposed Solution
+
+We can use various techniques to reduce this for an allocated
+block:
+
+1. The 'next' pointer is not required, as we are using a flat
+ hash table.
+
+2. 'rec_len' can instead be expressed as an addition to key_len
+ and data_len (it accounts for wasted or overallocated length in
+ the record). Since the record length is always a multiple of 8,
+ we can conveniently fit it in 32 bits (representing up to 35
+ bits).
+
+3. 'key_len' and 'data_len' can be reduced. I'm unwilling to
+ restrict 'data_len' to 32 bits, but instead we can combine the
+ two into one 64-bit field and using a 5 bit value which
+ indicates at what bit to divide the two. Keys are unlikely to
+ scale as fast as data, so I'm assuming a maximum key size of 32
+ bits.
+
+4. 'full_hash' is used to avoid a memcmp on the“miss” case, but
+ this is diminishing returns after a handful of bits (at 10
+ bits, it reduces 99.9% of false memcmp). As an aside, as the
+ lower bits are already incorporated in the hash table
+ resolution, the upper bits should be used here. Note that it's
+ not clear that these bits will be a win, given the extra bits
+ in the hash table itself (see[sub:Hash-Size-Solution]).
+
+5. 'magic' does not need to be enlarged: it currently reflects
+ one of 5 values (used, free, dead, recovery, and
+ unused_recovery). It is useful for quick sanity checking
+ however, and should not be eliminated.
+
+6. 'tailer' is only used to coalesce free blocks (so a block to
+ the right can find the header to check if this block is free).
+ This can be replaced by a single 'free' bit in the header of
+ the following block (and the tailer only exists in free
+ blocks).[footnote:
+This technique from Thomas Standish. Data Structure Techniques.
+Addison-Wesley, Reading, Massachusetts, 1980.
+] The current proposed coalescing algorithm doesn't need this,
+ however.
+
+This produces a 16 byte used header like this:
+
+struct tdb_used_record {
+
+ uint32_t used_magic : 16,
+
+
+
+ key_data_divide: 5,
+
+ top_hash: 11;
+
+ uint32_t extra_octets;
+
+ uint64_t key_and_data_len;
+
+};
+
+And a free record like this:
+
+struct tdb_free_record {
+
+ uint64_t free_magic: 8,
+
+ prev : 56;
+
+
+
+ uint64_t free_table: 8,
+
+ total_length : 56
+
+ uint64_t next;;
+
+};
+
+Note that by limiting valid offsets to 56 bits, we can pack
+everything we need into 3 64-byte words, meaning our minimum
+record size is 8 bytes.
+
+3.7.2 Status
+
+Complete.
+
+3.8 Transaction Commit Requires 4 fdatasync
+
+The current transaction algorithm is:
+
+1. write_recovery_data();
+
+2. sync();
+
+3. write_recovery_header();
+
+4. sync();
+
+5. overwrite_with_new_data();
+
+6. sync();
+
+7. remove_recovery_header();
+
+8. sync();
+
+On current ext3, each sync flushes all data to disk, so the next
+3 syncs are relatively expensive. But this could become a
+performance bottleneck on other filesystems such as ext4.
+
+3.8.1 Proposed Solution
+
+Neil Brown points out that this is overzealous, and only one sync
+is needed:
+
+1. Bundle the recovery data, a transaction counter and a strong
+ checksum of the new data.
+
+2. Strong checksum that whole bundle.
+
+3. Store the bundle in the database.
+
+4. Overwrite the oldest of the two recovery pointers in the
+ header (identified using the transaction counter) with the
+ offset of this bundle.
+
+5. sync.
+
+6. Write the new data to the file.
+
+Checking for recovery means identifying the latest bundle with a
+valid checksum and using the new data checksum to ensure that it
+has been applied. This is more expensive than the current check,
+but need only be done at open. For running databases, a separate
+header field can be used to indicate a transaction in progress;
+we need only check for recovery if this is set.
+
+3.8.2 Status
+
+Deferred.
+
+3.9 <sub:TDB-Does-Not>TDB Does Not Have Snapshot Support
+
+3.9.1 Proposed Solution
+
+None. At some point you say“use a real database” (but see[replay-attribute]
+).
+
+But as a thought experiment, if we implemented transactions to
+only overwrite free entries (this is tricky: there must not be a
+header in each entry which indicates whether it is free, but use
+of presence in metadata elsewhere), and a pointer to the hash
+table, we could create an entirely new commit without destroying
+existing data. Then it would be easy to implement snapshots in a
+similar way.
+
+This would not allow arbitrary changes to the database, such as
+tdb_repack does, and would require more space (since we have to
+preserve the current and future entries at once). If we used hash
+trees rather than one big hash table, we might only have to
+rewrite some sections of the hash, too.
+
+We could then implement snapshots using a similar method, using
+multiple different hash tables/free tables.
+
+3.9.2 Status
+
+Deferred.
+
+3.10 Transactions Cannot Operate in Parallel
+
+This would be useless for ldb, as it hits the index records with
+just about every update. It would add significant complexity in
+resolving clashes, and cause the all transaction callers to write
+their code to loop in the case where the transactions spuriously
+failed.
+
+3.10.1 Proposed Solution
+
+None (but see[replay-attribute]). We could solve a small part of
+the problem by providing read-only transactions. These would
+allow one write transaction to begin, but it could not commit
+until all r/o transactions are done. This would require a new
+RO_TRANSACTION_LOCK, which would be upgraded on commit.
+
+3.10.2 Status
+
+Deferred.
+
+3.11 Default Hash Function Is Suboptimal
+
+The Knuth-inspired multiplicative hash used by tdb is fairly slow
+(especially if we expand it to 64 bits), and works best when the
+hash bucket size is a prime number (which also means a slow
+modulus). In addition, it is highly predictable which could
+potentially lead to a Denial of Service attack in some TDB uses.
+
+3.11.1 Proposed Solution
+
+The Jenkins lookup3 hash[footnote:
+http://burtleburtle.net/bob/c/lookup3.c
+] is a fast and superbly-mixing hash. It's used by the Linux
+kernel and almost everything else. This has the particular
+properties that it takes an initial seed, and produces two 32 bit
+hash numbers, which we can combine into a 64-bit hash.
+
+The seed should be created at tdb-creation time from some random
+source, and placed in the header. This is far from foolproof, but
+adds a little bit of protection against hash bombing.
+
+3.11.2 Status
+
+Complete.
+
+3.12 <Reliable-Traversal-Adds>Reliable Traversal Adds Complexity
+
+We lock a record during traversal iteration, and try to grab that
+lock in the delete code. If that grab on delete fails, we simply
+mark it deleted and continue onwards; traversal checks for this
+condition and does the delete when it moves off the record.
+
+If traversal terminates, the dead record may be left
+indefinitely.
+
+3.12.1 Proposed Solution
+
+Remove reliability guarantees; see[traverse-Proposed-Solution].
+
+3.12.2 Status
+
+Complete.
+
+3.13 Fcntl Locking Adds Overhead
+
+Placing a fcntl lock means a system call, as does removing one.
+This is actually one reason why transactions can be faster
+(everything is locked once at transaction start). In the
+uncontended case, this overhead can theoretically be eliminated.
+
+3.13.1 Proposed Solution
+
+None.
+
+We tried this before with spinlock support, in the early days of
+TDB, and it didn't make much difference except in manufactured
+benchmarks.
+
+We could use spinlocks (with futex kernel support under Linux),
+but it means that we lose automatic cleanup when a process dies
+with a lock. There is a method of auto-cleanup under Linux, but
+it's not supported by other operating systems. We could
+reintroduce a clear-if-first-style lock and sweep for dead
+futexes on open, but that wouldn't help the normal case of one
+concurrent opener dying. Increasingly elaborate repair schemes
+could be considered, but they require an ABI change (everyone
+must use them) anyway, so there's no need to do this at the same
+time as everything else.
+
+3.14 Some Transactions Don't Require Durability
+
+Volker points out that gencache uses a CLEAR_IF_FIRST tdb for
+normal (fast) usage, and occasionally empties the results into a
+transactional TDB. This kind of usage prioritizes performance
+over durability: as long as we are consistent, data can be lost.
+
+This would be more neatly implemented inside tdb: a“soft”
+transaction commit (ie. syncless) which meant that data may be
+reverted on a crash.
+
+3.14.1 Proposed Solution
+
+None.
+
+Unfortunately any transaction scheme which overwrites old data
+requires a sync before that overwrite to avoid the possibility of
+corruption.
+
+It seems possible to use a scheme similar to that described in[sub:TDB-Does-Not]
+,where transactions are committed without overwriting existing
+data, and an array of top-level pointers were available in the
+header. If the transaction is“soft” then we would not need a sync
+at all: existing processes would pick up the new hash table and
+free list and work with that.
+
+At some later point, a sync would allow recovery of the old data
+into the free lists (perhaps when the array of top-level pointers
+filled). On crash, tdb_open() would examine the array of top
+levels, and apply the transactions until it encountered an
+invalid checksum.
+
+3.15 Tracing Is Fragile, Replay Is External
+
+The current TDB has compile-time-enabled tracing code, but it
+often breaks as it is not enabled by default. In a similar way,
+the ctdb code has an external wrapper which does replay tracing
+so it can coordinate cluster-wide transactions.
+
+3.15.1 Proposed Solution<replay-attribute>
+
+Tridge points out that an attribute can be later added to
+tdb_open (see[attributes]) to provide replay/trace hooks, which
+could become the basis for this and future parallel transactions
+and snapshot support.
+
+3.15.2 Status
+
+Deferred.
--- /dev/null
+ /*
+ Trivial Database 2: free list/block handling
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+#include <ccan/ilog/ilog.h>
+#include <time.h>
+#include <limits.h>
+
+static unsigned fls64(uint64_t val)
+{
+ return ilog64(val);
+}
+
+/* In which bucket would we find a particular record size? (ignoring header) */
+unsigned int size_to_bucket(ntdb_len_t data_len)
+{
+ unsigned int bucket;
+
+ /* We can't have records smaller than this. */
+ assert(data_len >= NTDB_MIN_DATA_LEN);
+
+ /* Ignoring the header... */
+ if (data_len - NTDB_MIN_DATA_LEN <= 64) {
+ /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */
+ bucket = (data_len - NTDB_MIN_DATA_LEN) / 8;
+ } else {
+ /* After that we go power of 2. */
+ bucket = fls64(data_len - NTDB_MIN_DATA_LEN) + 2;
+ }
+
+ if (unlikely(bucket >= NTDB_FREE_BUCKETS))
+ bucket = NTDB_FREE_BUCKETS - 1;
+ return bucket;
+}
+
+ntdb_off_t first_ftable(struct ntdb_context *ntdb)
+{
+ return ntdb_read_off(ntdb, offsetof(struct ntdb_header, free_table));
+}
+
+ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable)
+{
+ return ntdb_read_off(ntdb, ftable + offsetof(struct ntdb_freetable,next));
+}
+
+enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb)
+{
+ /* Use reservoir sampling algorithm to select a free list at random. */
+ unsigned int rnd, max = 0, count = 0;
+ ntdb_off_t off;
+
+ ntdb->ftable_off = off = first_ftable(ntdb);
+ ntdb->ftable = 0;
+
+ while (off) {
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+
+ rnd = random();
+ if (rnd >= max) {
+ ntdb->ftable_off = off;
+ ntdb->ftable = count;
+ max = rnd;
+ }
+
+ off = next_ftable(ntdb, off);
+ count++;
+ }
+ return NTDB_SUCCESS;
+}
+
+/* Offset of a given bucket. */
+ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket)
+{
+ return ftable_off + offsetof(struct ntdb_freetable, buckets)
+ + bucket * sizeof(ntdb_off_t);
+}
+
+/* Returns free_buckets + 1, or list number to search, or -ve error. */
+static ntdb_off_t find_free_head(struct ntdb_context *ntdb,
+ ntdb_off_t ftable_off,
+ ntdb_off_t bucket)
+{
+ /* Speculatively search for a non-zero bucket. */
+ return ntdb_find_nonzero_off(ntdb, bucket_off(ftable_off, 0),
+ bucket, NTDB_FREE_BUCKETS);
+}
+
+static void check_list(struct ntdb_context *ntdb, ntdb_off_t b_off)
+{
+#ifdef CCAN_NTDB_DEBUG
+ ntdb_off_t off, prev = 0, first;
+ struct ntdb_free_record r;
+
+ first = off = (ntdb_read_off(ntdb, b_off) & NTDB_OFF_MASK);
+ while (off != 0) {
+ ntdb_read_convert(ntdb, off, &r, sizeof(r));
+ if (frec_magic(&r) != NTDB_FREE_MAGIC)
+ abort();
+ if (prev && frec_prev(&r) != prev)
+ abort();
+ prev = off;
+ off = r.next;
+ }
+
+ if (first) {
+ ntdb_read_convert(ntdb, first, &r, sizeof(r));
+ if (frec_prev(&r) != prev)
+ abort();
+ }
+#endif
+}
+
+/* Remove from free bucket. */
+static enum NTDB_ERROR remove_from_list(struct ntdb_context *ntdb,
+ ntdb_off_t b_off, ntdb_off_t r_off,
+ const struct ntdb_free_record *r)
+{
+ ntdb_off_t off, prev_next, head;
+ enum NTDB_ERROR ecode;
+
+ /* Is this only element in list? Zero out bucket, and we're done. */
+ if (frec_prev(r) == r_off)
+ return ntdb_write_off(ntdb, b_off, 0);
+
+ /* off = &r->prev->next */
+ off = frec_prev(r) + offsetof(struct ntdb_free_record, next);
+
+ /* Get prev->next */
+ prev_next = ntdb_read_off(ntdb, off);
+ if (NTDB_OFF_IS_ERR(prev_next))
+ return NTDB_OFF_TO_ERR(prev_next);
+
+ /* If prev->next == 0, we were head: update bucket to point to next. */
+ if (prev_next == 0) {
+ /* We must preserve upper bits. */
+ head = ntdb_read_off(ntdb, b_off);
+ if (NTDB_OFF_IS_ERR(head))
+ return NTDB_OFF_TO_ERR(head);
+
+ if ((head & NTDB_OFF_MASK) != r_off) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "remove_from_list:"
+ " %llu head %llu on list %llu",
+ (long long)r_off,
+ (long long)head,
+ (long long)b_off);
+ }
+ head = ((head & ~NTDB_OFF_MASK) | r->next);
+ ecode = ntdb_write_off(ntdb, b_off, head);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+ } else {
+ /* r->prev->next = r->next */
+ ecode = ntdb_write_off(ntdb, off, r->next);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+ }
+
+ /* If we were the tail, off = &head->prev. */
+ if (r->next == 0) {
+ head = ntdb_read_off(ntdb, b_off);
+ if (NTDB_OFF_IS_ERR(head))
+ return NTDB_OFF_TO_ERR(head);
+ head &= NTDB_OFF_MASK;
+ off = head + offsetof(struct ntdb_free_record, magic_and_prev);
+ } else {
+ /* off = &r->next->prev */
+ off = r->next + offsetof(struct ntdb_free_record,
+ magic_and_prev);
+ }
+
+#ifdef CCAN_NTDB_DEBUG
+ /* *off == r */
+ if ((ntdb_read_off(ntdb, off) & NTDB_OFF_MASK) != r_off) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "remove_from_list:"
+ " %llu bad prev in list %llu",
+ (long long)r_off, (long long)b_off);
+ }
+#endif
+ /* r->next->prev = r->prev */
+ return ntdb_write_off(ntdb, off, r->magic_and_prev);
+}
+
+/* Enqueue in this free bucket: sets coalesce if we've added 128
+ * entries to it. */
+static enum NTDB_ERROR enqueue_in_free(struct ntdb_context *ntdb,
+ ntdb_off_t b_off,
+ ntdb_off_t off,
+ ntdb_len_t len,
+ bool *coalesce)
+{
+ struct ntdb_free_record new;
+ enum NTDB_ERROR ecode;
+ ntdb_off_t prev, head;
+ uint64_t magic = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL));
+
+ head = ntdb_read_off(ntdb, b_off);
+ if (NTDB_OFF_IS_ERR(head))
+ return NTDB_OFF_TO_ERR(head);
+
+ /* We only need to set ftable_and_len; rest is set in enqueue_in_free */
+ new.ftable_and_len = ((uint64_t)ntdb->ftable
+ << (64 - NTDB_OFF_UPPER_STEAL))
+ | len;
+
+ /* new->next = head. */
+ new.next = (head & NTDB_OFF_MASK);
+
+ /* First element? Prev points to ourselves. */
+ if (!new.next) {
+ new.magic_and_prev = (magic | off);
+ } else {
+ /* new->prev = next->prev */
+ prev = ntdb_read_off(ntdb,
+ new.next + offsetof(struct ntdb_free_record,
+ magic_and_prev));
+ new.magic_and_prev = prev;
+ if (frec_magic(&new) != NTDB_FREE_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "enqueue_in_free: %llu bad head"
+ " prev %llu",
+ (long long)new.next,
+ (long long)prev);
+ }
+ /* next->prev = new. */
+ ecode = ntdb_write_off(ntdb, new.next
+ + offsetof(struct ntdb_free_record,
+ magic_and_prev),
+ off | magic);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+#ifdef CCAN_NTDB_DEBUG
+ prev = ntdb_read_off(ntdb, frec_prev(&new)
+ + offsetof(struct ntdb_free_record, next));
+ if (prev != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "enqueue_in_free:"
+ " %llu bad tail next ptr %llu",
+ (long long)frec_prev(&new)
+ + offsetof(struct ntdb_free_record,
+ next),
+ (long long)prev);
+ }
+#endif
+ }
+
+ /* Update enqueue count, but don't set high bit: see NTDB_OFF_IS_ERR */
+ if (*coalesce)
+ head += (1ULL << (64 - NTDB_OFF_UPPER_STEAL));
+ head &= ~(NTDB_OFF_MASK | (1ULL << 63));
+ head |= off;
+
+ ecode = ntdb_write_off(ntdb, b_off, head);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* It's time to coalesce if counter wrapped. */
+ if (*coalesce)
+ *coalesce = ((head & ~NTDB_OFF_MASK) == 0);
+
+ return ntdb_write_convert(ntdb, off, &new, sizeof(new));
+}
+
+static ntdb_off_t ftable_offset(struct ntdb_context *ntdb, unsigned int ftable)
+{
+ ntdb_off_t off;
+ unsigned int i;
+
+ if (likely(ntdb->ftable == ftable))
+ return ntdb->ftable_off;
+
+ off = first_ftable(ntdb);
+ for (i = 0; i < ftable; i++) {
+ if (NTDB_OFF_IS_ERR(off)) {
+ break;
+ }
+ off = next_ftable(ntdb, off);
+ }
+ return off;
+}
+
+/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and
+ * need to blatt the *protect record (which is set to an error). */
+static ntdb_len_t coalesce(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_off_t b_off,
+ ntdb_len_t data_len,
+ ntdb_off_t *protect)
+{
+ ntdb_off_t end;
+ struct ntdb_free_record rec;
+ enum NTDB_ERROR ecode;
+
+ ntdb->stats.alloc_coalesce_tried++;
+ end = off + sizeof(struct ntdb_used_record) + data_len;
+
+ while (end < ntdb->file->map_size) {
+ const struct ntdb_free_record *r;
+ ntdb_off_t nb_off;
+ unsigned ftable, bucket;
+
+ r = ntdb_access_read(ntdb, end, sizeof(*r), true);
+ if (NTDB_PTR_IS_ERR(r)) {
+ ecode = NTDB_PTR_ERR(r);
+ goto err;
+ }
+
+ if (frec_magic(r) != NTDB_FREE_MAGIC
+ || frec_ftable(r) == NTDB_FTABLE_NONE) {
+ ntdb_access_release(ntdb, r);
+ break;
+ }
+
+ ftable = frec_ftable(r);
+ bucket = size_to_bucket(frec_len(r));
+ nb_off = ftable_offset(ntdb, ftable);
+ if (NTDB_OFF_IS_ERR(nb_off)) {
+ ntdb_access_release(ntdb, r);
+ ecode = NTDB_OFF_TO_ERR(nb_off);
+ goto err;
+ }
+ nb_off = bucket_off(nb_off, bucket);
+ ntdb_access_release(ntdb, r);
+
+ /* We may be violating lock order here, so best effort. */
+ if (ntdb_lock_free_bucket(ntdb, nb_off, NTDB_LOCK_NOWAIT)
+ != NTDB_SUCCESS) {
+ ntdb->stats.alloc_coalesce_lockfail++;
+ break;
+ }
+
+ /* Now we have lock, re-check. */
+ ecode = ntdb_read_convert(ntdb, end, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_unlock_free_bucket(ntdb, nb_off);
+ goto err;
+ }
+
+ if (unlikely(frec_magic(&rec) != NTDB_FREE_MAGIC)) {
+ ntdb->stats.alloc_coalesce_race++;
+ ntdb_unlock_free_bucket(ntdb, nb_off);
+ break;
+ }
+
+ if (unlikely(frec_ftable(&rec) != ftable)
+ || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
+ ntdb->stats.alloc_coalesce_race++;
+ ntdb_unlock_free_bucket(ntdb, nb_off);
+ break;
+ }
+
+ /* Did we just mess up a record you were hoping to use? */
+ if (end == *protect) {
+ ntdb->stats.alloc_coalesce_iterate_clash++;
+ *protect = NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST);
+ }
+
+ ecode = remove_from_list(ntdb, nb_off, end, &rec);
+ check_list(ntdb, nb_off);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_unlock_free_bucket(ntdb, nb_off);
+ goto err;
+ }
+
+ end += sizeof(struct ntdb_used_record) + frec_len(&rec);
+ ntdb_unlock_free_bucket(ntdb, nb_off);
+ ntdb->stats.alloc_coalesce_num_merged++;
+ }
+
+ /* Didn't find any adjacent free? */
+ if (end == off + sizeof(struct ntdb_used_record) + data_len)
+ return 0;
+
+ /* Before we expand, check this isn't one you wanted protected? */
+ if (off == *protect) {
+ *protect = NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS);
+ ntdb->stats.alloc_coalesce_iterate_clash++;
+ }
+
+ /* OK, expand initial record */
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ goto err;
+ }
+
+ if (frec_len(&rec) != data_len) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "coalesce: expected data len %zu not %zu",
+ (size_t)data_len, (size_t)frec_len(&rec));
+ goto err;
+ }
+
+ ecode = remove_from_list(ntdb, b_off, off, &rec);
+ check_list(ntdb, b_off);
+ if (ecode != NTDB_SUCCESS) {
+ goto err;
+ }
+
+ /* Try locking violation first. We don't allow coalesce recursion! */
+ ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_NOWAIT, false);
+ if (ecode != NTDB_SUCCESS) {
+ /* Need to drop lock. Can't rely on anything stable. */
+ ntdb->stats.alloc_coalesce_lockfail++;
+ *protect = NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT);
+
+ /* We have to drop this to avoid deadlocks, so make sure record
+ * doesn't get coalesced by someone else! */
+ rec.ftable_and_len = (NTDB_FTABLE_NONE
+ << (64 - NTDB_OFF_UPPER_STEAL))
+ | (end - off - sizeof(struct ntdb_used_record));
+ ecode = ntdb_write_off(ntdb,
+ off + offsetof(struct ntdb_free_record,
+ ftable_and_len),
+ rec.ftable_and_len);
+ if (ecode != NTDB_SUCCESS) {
+ goto err;
+ }
+
+ ntdb_unlock_free_bucket(ntdb, b_off);
+
+ ecode = add_free_record(ntdb, off, end - off, NTDB_LOCK_WAIT,
+ false);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ } else if (NTDB_OFF_IS_ERR(*protect)) {
+ /* For simplicity, we always drop lock if they can't continue */
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ }
+ ntdb->stats.alloc_coalesce_succeeded++;
+
+ /* Return usable length. */
+ return end - off - sizeof(struct ntdb_used_record);
+
+err:
+ /* To unify error paths, we *always* unlock bucket on error. */
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ return NTDB_ERR_TO_OFF(ecode);
+}
+
+/* List is locked: we unlock it. */
+static enum NTDB_ERROR coalesce_list(struct ntdb_context *ntdb,
+ ntdb_off_t ftable_off,
+ ntdb_off_t b_off,
+ unsigned int limit)
+{
+ enum NTDB_ERROR ecode;
+ ntdb_off_t off;
+
+ off = ntdb_read_off(ntdb, b_off);
+ if (NTDB_OFF_IS_ERR(off)) {
+ ecode = NTDB_OFF_TO_ERR(off);
+ goto unlock_err;
+ }
+ /* A little bit of paranoia: counter should be 0. */
+ off &= NTDB_OFF_MASK;
+
+ while (off && limit--) {
+ struct ntdb_free_record rec;
+ ntdb_len_t coal;
+ ntdb_off_t next;
+
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ next = rec.next;
+ coal = coalesce(ntdb, off, b_off, frec_len(&rec), &next);
+ if (NTDB_OFF_IS_ERR(coal)) {
+ /* This has already unlocked on error. */
+ return NTDB_OFF_TO_ERR(coal);
+ }
+ if (NTDB_OFF_IS_ERR(next)) {
+ /* Coalescing had to unlock, so stop. */
+ return NTDB_SUCCESS;
+ }
+ /* Keep going if we're doing well... */
+ limit += size_to_bucket(coal / 16 + NTDB_MIN_DATA_LEN);
+ off = next;
+ }
+
+ /* Now, move those elements to the tail of the list so we get something
+ * else next time. */
+ if (off) {
+ struct ntdb_free_record oldhrec, newhrec, oldtrec, newtrec;
+ ntdb_off_t oldhoff, oldtoff, newtoff;
+
+ /* The record we were up to is the new head. */
+ ecode = ntdb_read_convert(ntdb, off, &newhrec, sizeof(newhrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ /* Get the new tail. */
+ newtoff = frec_prev(&newhrec);
+ ecode = ntdb_read_convert(ntdb, newtoff, &newtrec,
+ sizeof(newtrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ /* Get the old head. */
+ oldhoff = ntdb_read_off(ntdb, b_off);
+ if (NTDB_OFF_IS_ERR(oldhoff)) {
+ ecode = NTDB_OFF_TO_ERR(oldhoff);
+ goto unlock_err;
+ }
+
+ /* This could happen if they all coalesced away. */
+ if (oldhoff == off)
+ goto out;
+
+ ecode = ntdb_read_convert(ntdb, oldhoff, &oldhrec,
+ sizeof(oldhrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ /* Get the old tail. */
+ oldtoff = frec_prev(&oldhrec);
+ ecode = ntdb_read_convert(ntdb, oldtoff, &oldtrec,
+ sizeof(oldtrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ /* Old tail's next points to old head. */
+ oldtrec.next = oldhoff;
+
+ /* Old head's prev points to old tail. */
+ oldhrec.magic_and_prev
+ = (NTDB_FREE_MAGIC << (64 - NTDB_OFF_UPPER_STEAL))
+ | oldtoff;
+
+ /* New tail's next is 0. */
+ newtrec.next = 0;
+
+ /* Write out the modified versions. */
+ ecode = ntdb_write_convert(ntdb, oldtoff, &oldtrec,
+ sizeof(oldtrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ ecode = ntdb_write_convert(ntdb, oldhoff, &oldhrec,
+ sizeof(oldhrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ ecode = ntdb_write_convert(ntdb, newtoff, &newtrec,
+ sizeof(newtrec));
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+
+ /* And finally link in new head. */
+ ecode = ntdb_write_off(ntdb, b_off, off);
+ if (ecode != NTDB_SUCCESS)
+ goto unlock_err;
+ }
+out:
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ return NTDB_SUCCESS;
+
+unlock_err:
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ return ecode;
+}
+
+/* List must not be locked if coalesce_ok is set. */
+enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len_with_header,
+ enum ntdb_lock_flags waitflag,
+ bool coalesce_ok)
+{
+ ntdb_off_t b_off;
+ ntdb_len_t len;
+ enum NTDB_ERROR ecode;
+
+ assert(len_with_header >= sizeof(struct ntdb_free_record));
+
+ len = len_with_header - sizeof(struct ntdb_used_record);
+
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len));
+ ecode = ntdb_lock_free_bucket(ntdb, b_off, waitflag);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ ecode = enqueue_in_free(ntdb, b_off, off, len, &coalesce_ok);
+ check_list(ntdb, b_off);
+
+ /* Coalescing unlocks free list. */
+ if (!ecode && coalesce_ok)
+ ecode = coalesce_list(ntdb, ntdb->ftable_off, b_off, 2);
+ else
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ return ecode;
+}
+
+static size_t adjust_size(size_t keylen, size_t datalen)
+{
+ size_t size = keylen + datalen;
+
+ if (size < NTDB_MIN_DATA_LEN)
+ size = NTDB_MIN_DATA_LEN;
+
+ /* Round to next uint64_t boundary. */
+ return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
+}
+
+/* If we have enough left over to be useful, split that off. */
+static size_t record_leftover(size_t keylen, size_t datalen,
+ bool want_extra, size_t total_len)
+{
+ ssize_t leftover;
+
+ if (want_extra)
+ datalen += datalen / 2;
+ leftover = total_len - adjust_size(keylen, datalen);
+
+ if (leftover < (ssize_t)sizeof(struct ntdb_free_record))
+ return 0;
+
+ return leftover;
+}
+
+/* We need size bytes to put our key and data in. */
+static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb,
+ ntdb_off_t ftable_off,
+ ntdb_off_t bucket,
+ size_t keylen, size_t datalen,
+ bool want_extra,
+ unsigned magic)
+{
+ ntdb_off_t off, b_off,best_off;
+ struct ntdb_free_record best = { 0 };
+ double multiplier;
+ size_t size = adjust_size(keylen, datalen);
+ enum NTDB_ERROR ecode;
+
+ ntdb->stats.allocs++;
+ b_off = bucket_off(ftable_off, bucket);
+
+ /* FIXME: Try non-blocking wait first, to measure contention. */
+ /* Lock this bucket. */
+ ecode = ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+
+ best.ftable_and_len = -1ULL;
+ best_off = 0;
+
+ /* Get slack if we're after extra. */
+ if (want_extra)
+ multiplier = 1.5;
+ else
+ multiplier = 1.0;
+
+ /* Walk the list to see if any are large enough, getting less fussy
+ * as we go. */
+ off = ntdb_read_off(ntdb, b_off);
+ if (NTDB_OFF_IS_ERR(off)) {
+ ecode = NTDB_OFF_TO_ERR(off);
+ goto unlock_err;
+ }
+ off &= NTDB_OFF_MASK;
+
+ while (off) {
+ const struct ntdb_free_record *r;
+ ntdb_off_t next;
+
+ r = ntdb_access_read(ntdb, off, sizeof(*r), true);
+ if (NTDB_PTR_IS_ERR(r)) {
+ ecode = NTDB_PTR_ERR(r);
+ goto unlock_err;
+ }
+
+ if (frec_magic(r) != NTDB_FREE_MAGIC) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "lock_and_alloc:"
+ " %llu non-free 0x%llx",
+ (long long)off,
+ (long long)r->magic_and_prev);
+ ntdb_access_release(ntdb, r);
+ goto unlock_err;
+ }
+
+ if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) {
+ best_off = off;
+ best = *r;
+ }
+
+ if (frec_len(&best) <= size * multiplier && best_off) {
+ ntdb_access_release(ntdb, r);
+ break;
+ }
+
+ multiplier *= 1.01;
+
+ next = r->next;
+ ntdb_access_release(ntdb, r);
+ off = next;
+ }
+
+ /* If we found anything at all, use it. */
+ if (best_off) {
+ struct ntdb_used_record rec;
+ size_t leftover;
+
+ /* We're happy with this size: take it. */
+ ecode = remove_from_list(ntdb, b_off, best_off, &best);
+ check_list(ntdb, b_off);
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock_err;
+ }
+
+ leftover = record_leftover(keylen, datalen, want_extra,
+ frec_len(&best));
+
+ assert(keylen + datalen + leftover <= frec_len(&best));
+ /* We need to mark non-free before we drop lock, otherwise
+ * coalesce() could try to merge it! */
+ ecode = set_header(ntdb, &rec, magic, keylen, datalen,
+ frec_len(&best) - leftover);
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock_err;
+ }
+
+ ecode = ntdb_write_convert(ntdb, best_off, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock_err;
+ }
+
+ /* For futureproofing, we put a 0 in any unused space. */
+ if (rec_extra_padding(&rec)) {
+ ecode = ntdb->io->twrite(ntdb, best_off + sizeof(rec)
+ + keylen + datalen, "", 1);
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock_err;
+ }
+ }
+
+ /* Bucket of leftover will be <= current bucket, so nested
+ * locking is allowed. */
+ if (leftover) {
+ ntdb->stats.alloc_leftover++;
+ ecode = add_free_record(ntdb,
+ best_off + sizeof(rec)
+ + frec_len(&best) - leftover,
+ leftover, NTDB_LOCK_WAIT, false);
+ if (ecode != NTDB_SUCCESS) {
+ best_off = NTDB_ERR_TO_OFF(ecode);
+ }
+ }
+ ntdb_unlock_free_bucket(ntdb, b_off);
+
+ return best_off;
+ }
+
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ return 0;
+
+unlock_err:
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ return NTDB_ERR_TO_OFF(ecode);
+}
+
+/* Get a free block from current free list, or 0 if none, -ve on error. */
+static ntdb_off_t get_free(struct ntdb_context *ntdb,
+ size_t keylen, size_t datalen, bool want_extra,
+ unsigned magic)
+{
+ ntdb_off_t off, ftable_off;
+ ntdb_off_t start_b, b, ftable;
+ bool wrapped = false;
+
+ /* If they are growing, add 50% to get to higher bucket. */
+ if (want_extra)
+ start_b = size_to_bucket(adjust_size(keylen,
+ datalen + datalen / 2));
+ else
+ start_b = size_to_bucket(adjust_size(keylen, datalen));
+
+ ftable_off = ntdb->ftable_off;
+ ftable = ntdb->ftable;
+ while (!wrapped || ftable_off != ntdb->ftable_off) {
+ /* Start at exact size bucket, and search up... */
+ for (b = find_free_head(ntdb, ftable_off, start_b);
+ b < NTDB_FREE_BUCKETS;
+ b = find_free_head(ntdb, ftable_off, b + 1)) {
+ /* Try getting one from list. */
+ off = lock_and_alloc(ntdb, ftable_off,
+ b, keylen, datalen, want_extra,
+ magic);
+ if (NTDB_OFF_IS_ERR(off))
+ return off;
+ if (off != 0) {
+ if (b == start_b)
+ ntdb->stats.alloc_bucket_exact++;
+ if (b == NTDB_FREE_BUCKETS - 1)
+ ntdb->stats.alloc_bucket_max++;
+ /* Worked? Stay using this list. */
+ ntdb->ftable_off = ftable_off;
+ ntdb->ftable = ftable;
+ return off;
+ }
+ /* Didn't work. Try next bucket. */
+ }
+
+ if (NTDB_OFF_IS_ERR(b)) {
+ return b;
+ }
+
+ /* Hmm, try next table. */
+ ftable_off = next_ftable(ntdb, ftable_off);
+ if (NTDB_OFF_IS_ERR(ftable_off)) {
+ return ftable_off;
+ }
+ ftable++;
+
+ if (ftable_off == 0) {
+ wrapped = true;
+ ftable_off = first_ftable(ntdb);
+ if (NTDB_OFF_IS_ERR(ftable_off)) {
+ return ftable_off;
+ }
+ ftable = 0;
+ }
+ }
+
+ return 0;
+}
+
+enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
+ struct ntdb_used_record *rec,
+ unsigned magic, uint64_t keylen, uint64_t datalen,
+ uint64_t actuallen)
+{
+ uint64_t keybits = (fls64(keylen) + 1) / 2;
+
+ rec->magic_and_meta = ((actuallen - (keylen + datalen)) << 11)
+ | (keybits << 43)
+ | ((uint64_t)magic << 48);
+ rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
+
+ /* Encoding can fail on big values. */
+ if (rec_key_length(rec) != keylen
+ || rec_data_length(rec) != datalen
+ || rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "Could not encode k=%llu,d=%llu,a=%llu",
+ (long long)keylen, (long long)datalen,
+ (long long)actuallen);
+ }
+ return NTDB_SUCCESS;
+}
+
+/* You need 'size', this tells you how much you should expand by. */
+ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size)
+{
+ ntdb_off_t new_size, top_size;
+
+ /* limit size in order to avoid using up huge amounts of memory for
+ * in memory tdbs if an oddball huge record creeps in */
+ if (size > 100 * 1024) {
+ top_size = map_size + size * 2;
+ } else {
+ top_size = map_size + size * 100;
+ }
+
+ /* always make room for at least top_size more records, and at
+ least 25% more space. if the DB is smaller than 100MiB,
+ otherwise grow it by 10% only. */
+ if (map_size > 100 * 1024 * 1024) {
+ new_size = map_size * 1.10;
+ } else {
+ new_size = map_size * 1.25;
+ }
+
+ if (new_size < top_size)
+ new_size = top_size;
+
+ /* We always make the file a multiple of transaction page
+ * size. This guarantees that the transaction recovery area
+ * is always aligned, otherwise the transaction code can overwrite
+ * itself. */
+ new_size = (new_size + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
+ return new_size - map_size;
+}
+
+/* Expand the database. */
+static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size)
+{
+ uint64_t old_size;
+ ntdb_len_t wanted;
+ enum NTDB_ERROR ecode;
+
+ /* Need to hold a hash lock to expand DB: transactions rely on it. */
+ if (!(ntdb->flags & NTDB_NOLOCK)
+ && !ntdb->file->allrecord_lock.count && !ntdb_has_hash_locks(ntdb)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_expand: must hold lock during expand");
+ }
+
+ /* Only one person can expand file at a time. */
+ ecode = ntdb_lock_expand(ntdb, F_WRLCK);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* Someone else may have expanded the file, so retry. */
+ old_size = ntdb->file->map_size;
+ ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
+ if (ntdb->file->map_size != old_size) {
+ ntdb_unlock_expand(ntdb, F_WRLCK);
+ return NTDB_SUCCESS;
+ }
+
+ /* We need room for the record header too. */
+ size = adjust_size(0, sizeof(struct ntdb_used_record) + size);
+ /* Overallocate. */
+ wanted = ntdb_expand_adjust(old_size, size);
+
+ ecode = ntdb->io->expand_file(ntdb, wanted);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_unlock_expand(ntdb, F_WRLCK);
+ return ecode;
+ }
+
+ /* We need to drop this lock before adding free record. */
+ ntdb_unlock_expand(ntdb, F_WRLCK);
+
+ ntdb->stats.expands++;
+ return add_free_record(ntdb, old_size, wanted, NTDB_LOCK_WAIT, true);
+}
+
+/* This won't fail: it will expand the database if it has to. */
+ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
+ unsigned magic, bool growing)
+{
+ ntdb_off_t off;
+
+ for (;;) {
+ enum NTDB_ERROR ecode;
+ off = get_free(ntdb, keylen, datalen, growing, magic);
+ if (likely(off != 0))
+ break;
+
+ ecode = ntdb_expand(ntdb, adjust_size(keylen, datalen));
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ }
+
+ return off;
+}
--- /dev/null
+ /*
+ Trivial Database 2: hash handling
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/hash/hash.h>
+
+/* Default hash function. */
+uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
+ void *unused)
+{
+ return hash_stable((const unsigned char *)key, length, seed);
+}
+
+uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len)
+{
+ return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data);
+}
+
+static ntdb_bool_err key_matches(struct ntdb_context *ntdb,
+ const struct ntdb_used_record *rec,
+ ntdb_off_t off,
+ const NTDB_DATA *key,
+ const char **rptr)
+{
+ ntdb_bool_err ret = false;
+ const char *rkey;
+
+ if (rec_key_length(rec) != key->dsize) {
+ ntdb->stats.compare_wrong_keylen++;
+ return ret;
+ }
+
+ rkey = ntdb_access_read(ntdb, off + sizeof(*rec),
+ key->dsize + rec_data_length(rec), false);
+ if (NTDB_PTR_IS_ERR(rkey)) {
+ return (ntdb_bool_err)NTDB_PTR_ERR(rkey);
+ }
+ if (memcmp(rkey, key->dptr, key->dsize) == 0) {
+ if (rptr) {
+ *rptr = rkey;
+ } else {
+ ntdb_access_release(ntdb, rkey);
+ }
+ return true;
+ }
+ ntdb->stats.compare_wrong_keycmp++;
+ ntdb_access_release(ntdb, rkey);
+ return ret;
+}
+
+/* Does entry match? */
+static ntdb_bool_err match(struct ntdb_context *ntdb,
+ uint32_t hash,
+ const NTDB_DATA *key,
+ ntdb_off_t val,
+ struct ntdb_used_record *rec,
+ const char **rptr)
+{
+ ntdb_off_t off;
+ enum NTDB_ERROR ecode;
+
+ ntdb->stats.compares++;
+
+ /* Top bits of offset == next bits of hash. */
+ if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
+ != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) {
+ ntdb->stats.compare_wrong_offsetbits++;
+ return false;
+ }
+
+ off = val & NTDB_OFF_MASK;
+ ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec));
+ if (ecode != NTDB_SUCCESS) {
+ return (ntdb_bool_err)ecode;
+ }
+
+ return key_matches(ntdb, rec, off, key, rptr);
+}
+
+static bool is_chain(ntdb_off_t val)
+{
+ return val & (1ULL << NTDB_OFF_CHAIN_BIT);
+}
+
+static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx)
+{
+ return base + sizeof(struct ntdb_used_record)
+ + idx * sizeof(ntdb_off_t);
+}
+
+/* This is the core routine which searches the hashtable for an entry.
+ * On error, no locks are held and -ve is returned.
+ * Otherwise, hinfo is filled in.
+ * If not found, the return value is 0.
+ * If found, the return value is the offset, and *rec is the record. */
+ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
+ NTDB_DATA key,
+ int ltype,
+ struct hash_info *h,
+ struct ntdb_used_record *rec,
+ const char **rptr)
+{
+ ntdb_off_t off, val;
+ const ntdb_off_t *arr = NULL;
+ ntdb_len_t i;
+ bool found_empty;
+ enum NTDB_ERROR ecode;
+ struct ntdb_used_record chdr;
+ ntdb_bool_err berr;
+
+ h->h = ntdb_hash(ntdb, key.dptr, key.dsize);
+
+ h->table = NTDB_HASH_OFFSET;
+ h->table_size = 1 << ntdb->hash_bits;
+ h->bucket = bits_from(h->h, 0, ntdb->hash_bits);
+ h->old_val = 0;
+
+ ecode = ntdb_lock_hash(ntdb, h->bucket, ltype);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+
+ off = hbucket_off(h->table, h->bucket);
+ val = ntdb_read_off(ntdb, off);
+ if (NTDB_OFF_IS_ERR(val)) {
+ ecode = NTDB_OFF_TO_ERR(val);
+ goto fail;
+ }
+
+ /* Directly in hash table? */
+ if (!likely(is_chain(val))) {
+ if (val) {
+ berr = match(ntdb, h->h, &key, val, rec, rptr);
+ if (berr < 0) {
+ ecode = NTDB_OFF_TO_ERR(berr);
+ goto fail;
+ }
+ if (berr) {
+ return val & NTDB_OFF_MASK;
+ }
+ /* If you want to insert here, make a chain. */
+ h->old_val = val;
+ }
+ return 0;
+ }
+
+ /* Nope? Iterate through chain. */
+ h->table = val & NTDB_OFF_MASK;
+
+ ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+
+ if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "find_and_lock:"
+ " corrupt record %#x at %llu",
+ rec_magic(&chdr), (long long)off);
+ goto fail;
+ }
+
+ h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
+
+ arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
+ rec_data_length(&chdr), true);
+ if (NTDB_PTR_IS_ERR(arr)) {
+ ecode = NTDB_PTR_ERR(arr);
+ goto fail;
+ }
+
+ found_empty = false;
+ for (i = 0; i < h->table_size; i++) {
+ if (arr[i] == 0) {
+ if (!found_empty) {
+ h->bucket = i;
+ found_empty = true;
+ }
+ } else {
+ berr = match(ntdb, h->h, &key, arr[i], rec, rptr);
+ if (berr < 0) {
+ ecode = NTDB_OFF_TO_ERR(berr);
+ ntdb_access_release(ntdb, arr);
+ goto fail;
+ }
+ if (berr) {
+ /* We found it! */
+ h->bucket = i;
+ off = arr[i] & NTDB_OFF_MASK;
+ ntdb_access_release(ntdb, arr);
+ return off;
+ }
+ }
+ }
+ if (!found_empty) {
+ /* Set to any non-zero value */
+ h->old_val = 1;
+ h->bucket = i;
+ }
+
+ ntdb_access_release(ntdb, arr);
+ return 0;
+
+fail:
+ ntdb_unlock_hash(ntdb, h->bucket, ltype);
+ return NTDB_ERR_TO_OFF(ecode);
+}
+
+static ntdb_off_t encode_offset(const struct ntdb_context *ntdb,
+ ntdb_off_t new_off, uint32_t hash)
+{
+ ntdb_off_t extra;
+
+ assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0);
+ assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0);
+ /* We pack extra hash bits into the upper bits of the offset. */
+ extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL);
+ extra <<= (64 - NTDB_OFF_UPPER_STEAL);
+
+ return new_off | extra;
+}
+
+/* Simply overwrite the hash entry we found before. */
+enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
+ ntdb_off_t new_off)
+{
+ return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket),
+ encode_offset(ntdb, new_off, h->h));
+}
+
+enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h)
+{
+ return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0);
+}
+
+
+enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
+ ntdb_off_t new_off)
+{
+ enum NTDB_ERROR ecode;
+ ntdb_off_t chain;
+ struct ntdb_used_record chdr;
+ const ntdb_off_t *old;
+ ntdb_off_t *new;
+
+ /* We hit an empty bucket during search? That's where it goes. */
+ if (!h->old_val) {
+ return replace_in_hash(ntdb, h, new_off);
+ }
+
+ /* Full at top-level? Create a 2-element chain. */
+ if (h->table == NTDB_HASH_OFFSET) {
+ ntdb_off_t pair[2];
+
+ /* One element is old value, the other is the new value. */
+ pair[0] = h->old_val;
+ pair[1] = encode_offset(ntdb, new_off, h->h);
+
+ chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true);
+ if (NTDB_OFF_IS_ERR(chain)) {
+ return NTDB_OFF_TO_ERR(chain);
+ }
+ ecode = ntdb_write_convert(ntdb,
+ chain
+ + sizeof(struct ntdb_used_record),
+ pair, sizeof(pair));
+ if (ecode == NTDB_SUCCESS) {
+ ecode = ntdb_write_off(ntdb,
+ hbucket_off(h->table, h->bucket),
+ chain
+ | (1ULL << NTDB_OFF_CHAIN_BIT));
+ }
+ return ecode;
+ }
+
+ /* Full bucket. Expand. */
+ ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (rec_extra_padding(&chdr) >= sizeof(new_off)) {
+ /* Expand in place. */
+ uint64_t dlen = rec_data_length(&chdr);
+
+ ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0,
+ dlen + sizeof(new_off),
+ dlen + rec_extra_padding(&chdr));
+
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ /* find_and_lock set up h to point to last bucket. */
+ ecode = replace_in_hash(ntdb, h, new_off);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ /* For futureproofing, we always make the first byte of padding
+ * a zero. */
+ if (rec_extra_padding(&chdr)) {
+ ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr)
+ + dlen + sizeof(new_off),
+ "", 1);
+ }
+ return ecode;
+ }
+
+ /* We need to reallocate the chain. */
+ chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t),
+ NTDB_CHAIN_MAGIC, true);
+ if (NTDB_OFF_IS_ERR(chain)) {
+ return NTDB_OFF_TO_ERR(chain);
+ }
+
+ /* Map both and copy across old buckets. */
+ old = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
+ h->table_size*sizeof(ntdb_off_t), true);
+ if (NTDB_PTR_IS_ERR(old)) {
+ return NTDB_PTR_ERR(old);
+ }
+ new = ntdb_access_write(ntdb, hbucket_off(chain, 0),
+ (h->table_size + 1)*sizeof(ntdb_off_t), true);
+ if (NTDB_PTR_IS_ERR(new)) {
+ ntdb_access_release(ntdb, old);
+ return NTDB_PTR_ERR(new);
+ }
+
+ memcpy(new, old, h->bucket * sizeof(ntdb_off_t));
+ new[h->bucket] = encode_offset(ntdb, new_off, h->h);
+ ntdb_access_release(ntdb, old);
+
+ ecode = ntdb_access_commit(ntdb, new);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* Free the old chain. */
+ ecode = add_free_record(ntdb, h->table,
+ sizeof(struct ntdb_used_record)
+ + rec_data_length(&chdr)
+ + rec_extra_padding(&chdr),
+ NTDB_LOCK_WAIT, true);
+
+ /* Replace top-level to point to new chain */
+ return ntdb_write_off(ntdb,
+ hbucket_off(NTDB_HASH_OFFSET,
+ bits_from(h->h, 0, ntdb->hash_bits)),
+ chain | (1ULL << NTDB_OFF_CHAIN_BIT));
+}
+
+/* Traverse support: returns offset of record, or 0 or -ve error. */
+static ntdb_off_t iterate_chain(struct ntdb_context *ntdb,
+ ntdb_off_t val,
+ struct hash_info *h)
+{
+ ntdb_off_t i;
+ enum NTDB_ERROR ecode;
+ struct ntdb_used_record chdr;
+
+ /* First load up chain header. */
+ h->table = val & NTDB_OFF_MASK;
+ ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "get_table:"
+ " corrupt record %#x at %llu",
+ rec_magic(&chdr),
+ (long long)h->table);
+ }
+
+ /* Chain length is implied by data length. */
+ h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
+
+ i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket,
+ h->table_size);
+ if (NTDB_OFF_IS_ERR(i)) {
+ return i;
+ }
+
+ if (i != h->table_size) {
+ /* Return to next bucket. */
+ h->bucket = i + 1;
+ val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
+ if (NTDB_OFF_IS_ERR(val)) {
+ return val;
+ }
+ return val & NTDB_OFF_MASK;
+ }
+
+ /* Go back up to hash table. */
+ h->table = NTDB_HASH_OFFSET;
+ h->table_size = 1 << ntdb->hash_bits;
+ h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1;
+ return 0;
+}
+
+/* Keeps hash locked unless returns 0 or error. */
+static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb,
+ struct hash_info *h)
+{
+ ntdb_off_t val, i;
+ enum NTDB_ERROR ecode;
+
+ if (h->table != NTDB_HASH_OFFSET) {
+ /* We're in a chain. */
+ i = bits_from(h->h, 0, ntdb->hash_bits);
+ ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+
+ /* We dropped lock, bucket might have moved! */
+ val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i));
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
+ }
+
+ /* We don't remove chains: there should still be one there! */
+ if (!val || !is_chain(val)) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "iterate_hash:"
+ " vanished hchain %llu at %llu",
+ (long long)val,
+ (long long)i);
+ val = NTDB_ERR_TO_OFF(ecode);
+ goto unlock;
+ }
+
+ /* Find next bucket in the chain. */
+ val = iterate_chain(ntdb, val, h);
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
+ }
+ if (val != 0) {
+ return val;
+ }
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
+
+ /* OK, we've reset h back to top level. */
+ }
+
+ /* We do this unlocked, then re-check. */
+ for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
+ h->bucket, h->table_size);
+ i != h->table_size;
+ i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
+ i+1, h->table_size)) {
+ ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+
+ val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
+ }
+
+ /* Lost race, and it's empty? */
+ if (!val) {
+ ntdb->stats.traverse_val_vanished++;
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
+ continue;
+ }
+
+ if (!is_chain(val)) {
+ /* So caller knows what lock to free. */
+ h->h = i;
+ /* Return to next bucket. */
+ h->bucket = i + 1;
+ val &= NTDB_OFF_MASK;
+ return val;
+ }
+
+ /* Start at beginning of chain */
+ h->bucket = 0;
+ h->h = i;
+
+ val = iterate_chain(ntdb, val, h);
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
+ }
+ if (val != 0) {
+ return val;
+ }
+
+ /* Otherwise, bucket has been set to i+1 */
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
+ }
+ return 0;
+
+unlock:
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
+ return val;
+}
+
+/* Return success if we find something, NTDB_ERR_NOEXIST if none. */
+enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
+ struct hash_info *h,
+ NTDB_DATA *kbuf, size_t *dlen)
+{
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
+ enum NTDB_ERROR ecode;
+
+ off = lock_and_iterate_hash(ntdb, h);
+
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ } else if (off == 0) {
+ return NTDB_ERR_NOEXIST;
+ }
+
+ /* The hash for this key is still locked. */
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock;
+ }
+ if (rec_magic(&rec) != NTDB_USED_MAGIC) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "next_in_hash:"
+ " corrupt record at %llu",
+ (long long)off);
+ goto unlock;
+ }
+
+ kbuf->dsize = rec_key_length(&rec);
+
+ /* They want data as well? */
+ if (dlen) {
+ *dlen = rec_data_length(&rec);
+ kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
+ kbuf->dsize + *dlen);
+ } else {
+ kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
+ kbuf->dsize);
+ }
+ if (NTDB_PTR_IS_ERR(kbuf->dptr)) {
+ ecode = NTDB_PTR_ERR(kbuf->dptr);
+ goto unlock;
+ }
+ ecode = NTDB_SUCCESS;
+
+unlock:
+ ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK);
+ return ecode;
+
+}
+
+enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
+ struct hash_info *h,
+ NTDB_DATA *kbuf, size_t *dlen)
+{
+ h->table = NTDB_HASH_OFFSET;
+ h->table_size = 1 << ntdb->hash_bits;
+ h->bucket = 0;
+
+ return next_in_hash(ntdb, h, kbuf, dlen);
+}
+
+/* Even if the entry isn't in this hash bucket, you'd have to lock this
+ * bucket to find it. */
+static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb,
+ const NTDB_DATA *key, int ltype)
+{
+ uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize);
+
+ return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype);
+}
+
+/* lock/unlock one hash chain. This is meant to be used to reduce
+ contention - it cannot guarantee how many records will be locked */
+_PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+ return chainlock(ntdb, &key, F_WRLCK);
+}
+
+_PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+ uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
+
+ ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb,
+ NTDB_DATA key)
+{
+ return chainlock(ntdb, &key, F_RDLCK);
+}
+
+_PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+ uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
+
+ ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK);
+}
--- /dev/null
+ /*
+ Unix SMB/CIFS implementation.
+
+ trivial database library
+
+ Copyright (C) Andrew Tridgell 1999-2005
+ Copyright (C) Paul `Rusty' Russell 2000
+ Copyright (C) Jeremy Allison 2000-2003
+ Copyright (C) Rusty Russell 2010
+
+ ** NOTE! The following LGPL license applies to the ntdb
+ ** library. This does NOT imply that all of Samba is released
+ ** under the LGPL
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+
+static void free_old_mmaps(struct ntdb_context *ntdb)
+{
+ struct ntdb_old_mmap *i;
+
+ assert(ntdb->file->direct_count == 0);
+
+ while ((i = ntdb->file->old_mmaps) != NULL) {
+ ntdb->file->old_mmaps = i->next;
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb->free_fn(i->map_ptr, ntdb->alloc_data);
+ } else {
+ munmap(i->map_ptr, i->map_size);
+ }
+ ntdb->free_fn(i, ntdb->alloc_data);
+ }
+}
+
+static enum NTDB_ERROR save_old_map(struct ntdb_context *ntdb)
+{
+ struct ntdb_old_mmap *old;
+
+ assert(ntdb->file->direct_count);
+
+ old = ntdb->alloc_fn(ntdb->file, sizeof(*old), ntdb->alloc_data);
+ if (!old) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "save_old_map alloc failed");
+ }
+ old->next = ntdb->file->old_mmaps;
+ old->map_ptr = ntdb->file->map_ptr;
+ old->map_size = ntdb->file->map_size;
+ ntdb->file->old_mmaps = old;
+
+ return NTDB_SUCCESS;
+}
+
+enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb)
+{
+ if (ntdb->file->fd == -1) {
+ return NTDB_SUCCESS;
+ }
+
+ if (!ntdb->file->map_ptr) {
+ return NTDB_SUCCESS;
+ }
+
+ /* We can't unmap now if there are accessors. */
+ if (ntdb->file->direct_count) {
+ return save_old_map(ntdb);
+ } else {
+ munmap(ntdb->file->map_ptr, ntdb->file->map_size);
+ ntdb->file->map_ptr = NULL;
+ }
+ return NTDB_SUCCESS;
+}
+
+enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb)
+{
+ int mmap_flags;
+
+ if (ntdb->flags & NTDB_INTERNAL)
+ return NTDB_SUCCESS;
+
+#ifndef HAVE_INCOHERENT_MMAP
+ if (ntdb->flags & NTDB_NOMMAP)
+ return NTDB_SUCCESS;
+#endif
+
+ if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY)
+ mmap_flags = PROT_READ;
+ else
+ mmap_flags = PROT_READ | PROT_WRITE;
+
+ /* size_t can be smaller than off_t. */
+ if ((size_t)ntdb->file->map_size == ntdb->file->map_size) {
+ ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size,
+ mmap_flags,
+ MAP_SHARED, ntdb->file->fd, 0);
+ } else
+ ntdb->file->map_ptr = MAP_FAILED;
+
+ /*
+ * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
+ */
+ if (ntdb->file->map_ptr == MAP_FAILED) {
+ ntdb->file->map_ptr = NULL;
+#ifdef HAVE_INCOHERENT_MMAP
+ /* Incoherent mmap means everyone must mmap! */
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_mmap failed for size %lld (%s)",
+ (long long)ntdb->file->map_size,
+ strerror(errno));
+#else
+ ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+ "ntdb_mmap failed for size %lld (%s)",
+ (long long)ntdb->file->map_size, strerror(errno));
+#endif
+ }
+ return NTDB_SUCCESS;
+}
+
+/* check for an out of bounds access - if it is out of bounds then
+ see if the database has been expanded by someone else and expand
+ if necessary
+ note that "len" is the minimum length needed for the db.
+
+ If probe is true, len being too large isn't a failure.
+*/
+static enum NTDB_ERROR ntdb_normal_oob(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len,
+ bool probe)
+{
+ struct stat st;
+ enum NTDB_ERROR ecode;
+
+ if (len + off < len) {
+ if (probe)
+ return NTDB_SUCCESS;
+
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_oob off %llu len %llu wrap\n",
+ (long long)off, (long long)len);
+ }
+
+ if (ntdb->flags & NTDB_INTERNAL) {
+ if (probe)
+ return NTDB_SUCCESS;
+
+ ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_oob len %lld beyond internal"
+ " alloc size %lld",
+ (long long)(off + len),
+ (long long)ntdb->file->map_size);
+ return NTDB_ERR_IO;
+ }
+
+ ecode = ntdb_lock_expand(ntdb, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (fstat(ntdb->file->fd, &st) != 0) {
+ ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "Failed to fstat file: %s", strerror(errno));
+ ntdb_unlock_expand(ntdb, F_RDLCK);
+ return NTDB_ERR_IO;
+ }
+
+ ntdb_unlock_expand(ntdb, F_RDLCK);
+
+ if (st.st_size < off + len) {
+ if (probe)
+ return NTDB_SUCCESS;
+
+ ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_oob len %llu beyond eof at %llu",
+ (long long)(off + len), (long long)st.st_size);
+ return NTDB_ERR_IO;
+ }
+
+ /* Unmap, update size, remap */
+ ecode = ntdb_munmap(ntdb);
+ if (ecode) {
+ return ecode;
+ }
+
+ ntdb->file->map_size = st.st_size;
+ return ntdb_mmap(ntdb);
+}
+
+/* Endian conversion: we only ever deal with 8 byte quantities */
+void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size)
+{
+ assert(size % 8 == 0);
+ if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) {
+ uint64_t i, *p = (uint64_t *)buf;
+ for (i = 0; i < size / 8; i++)
+ p[i] = bswap_64(p[i]);
+ }
+ return buf;
+}
+
+/* Return first non-zero offset in offset array, or end, or -ve error. */
+/* FIXME: Return the off? */
+uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
+ ntdb_off_t base, uint64_t start, uint64_t end)
+{
+ uint64_t i;
+ const uint64_t *val;
+
+ /* Zero vs non-zero is the same unconverted: minor optimization. */
+ val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t),
+ (end - start) * sizeof(ntdb_off_t), false);
+ if (NTDB_PTR_IS_ERR(val)) {
+ return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
+ }
+
+ for (i = 0; i < (end - start); i++) {
+ if (val[i])
+ break;
+ }
+ ntdb_access_release(ntdb, val);
+ return start + i;
+}
+
+/* Return first zero offset in num offset array, or num, or -ve error. */
+uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
+ uint64_t num)
+{
+ uint64_t i;
+ const uint64_t *val;
+
+ /* Zero vs non-zero is the same unconverted: minor optimization. */
+ val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false);
+ if (NTDB_PTR_IS_ERR(val)) {
+ return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
+ }
+
+ for (i = 0; i < num; i++) {
+ if (!val[i])
+ break;
+ }
+ ntdb_access_release(ntdb, val);
+ return i;
+}
+
+enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len)
+{
+ char buf[8192] = { 0 };
+ void *p = ntdb->io->direct(ntdb, off, len, true);
+ enum NTDB_ERROR ecode = NTDB_SUCCESS;
+
+ assert(!(ntdb->flags & NTDB_RDONLY));
+ if (NTDB_PTR_IS_ERR(p)) {
+ return NTDB_PTR_ERR(p);
+ }
+ if (p) {
+ memset(p, 0, len);
+ return ecode;
+ }
+ while (len) {
+ unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
+ ecode = ntdb->io->twrite(ntdb, off, buf, todo);
+ if (ecode != NTDB_SUCCESS) {
+ break;
+ }
+ len -= todo;
+ off += todo;
+ }
+ return ecode;
+}
+
+/* write a lump of data at a specified offset */
+static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off,
+ const void *buf, ntdb_len_t len)
+{
+ enum NTDB_ERROR ecode;
+
+ if (ntdb->flags & NTDB_RDONLY) {
+ return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+ "Write to read-only database");
+ }
+
+ ecode = ntdb_oob(ntdb, off, len, false);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (ntdb->file->map_ptr) {
+ memcpy(off + (char *)ntdb->file->map_ptr, buf, len);
+ } else {
+#ifdef HAVE_INCOHERENT_MMAP
+ return NTDB_ERR_IO;
+#else
+ ssize_t ret;
+ ret = pwrite(ntdb->file->fd, buf, len, off);
+ if (ret != len) {
+ /* This shouldn't happen: we avoid sparse files. */
+ if (ret >= 0)
+ errno = ENOSPC;
+
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_write: %zi at %zu len=%zu (%s)",
+ ret, (size_t)off, (size_t)len,
+ strerror(errno));
+ }
+#endif
+ }
+ return NTDB_SUCCESS;
+}
+
+/* read a lump of data at a specified offset */
+static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off,
+ void *buf, ntdb_len_t len)
+{
+ enum NTDB_ERROR ecode;
+
+ ecode = ntdb_oob(ntdb, off, len, false);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (ntdb->file->map_ptr) {
+ memcpy(buf, off + (char *)ntdb->file->map_ptr, len);
+ } else {
+#ifdef HAVE_INCOHERENT_MMAP
+ return NTDB_ERR_IO;
+#else
+ ssize_t r = pread(ntdb->file->fd, buf, len, off);
+ if (r != len) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_read failed with %zi at %zu "
+ "len=%zu (%s) map_size=%zu",
+ r, (size_t)off, (size_t)len,
+ strerror(errno),
+ (size_t)ntdb->file->map_size);
+ }
+#endif
+ }
+ return NTDB_SUCCESS;
+}
+
+enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+ const void *rec, size_t len)
+{
+ enum NTDB_ERROR ecode;
+
+ if (unlikely((ntdb->flags & NTDB_CONVERT))) {
+ void *conv = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data);
+ if (!conv) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_write: no memory converting"
+ " %zu bytes", len);
+ }
+ memcpy(conv, rec, len);
+ ecode = ntdb->io->twrite(ntdb, off,
+ ntdb_convert(ntdb, conv, len), len);
+ ntdb->free_fn(conv, ntdb->alloc_data);
+ } else {
+ ecode = ntdb->io->twrite(ntdb, off, rec, len);
+ }
+ return ecode;
+}
+
+enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+ void *rec, size_t len)
+{
+ enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len);
+ ntdb_convert(ntdb, rec, len);
+ return ecode;
+}
+
+static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset,
+ ntdb_len_t len, unsigned int prefix)
+{
+ unsigned char *buf;
+ enum NTDB_ERROR ecode;
+
+ /* some systems don't like zero length malloc */
+ buf = ntdb->alloc_fn(ntdb, prefix + len ? prefix + len : 1,
+ ntdb->alloc_data);
+ if (!buf) {
+ ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_alloc_read alloc failed len=%zu",
+ (size_t)(prefix + len));
+ return NTDB_ERR_PTR(NTDB_ERR_OOM);
+ } else {
+ ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len);
+ if (unlikely(ecode != NTDB_SUCCESS)) {
+ ntdb->free_fn(buf, ntdb->alloc_data);
+ return NTDB_ERR_PTR(ecode);
+ }
+ }
+ return buf;
+}
+
+/* read a lump of data, allocating the space for it */
+void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len)
+{
+ return _ntdb_alloc_read(ntdb, offset, len, 0);
+}
+
+static enum NTDB_ERROR fill(struct ntdb_context *ntdb,
+ const void *buf, size_t size,
+ ntdb_off_t off, ntdb_len_t len)
+{
+ while (len) {
+ size_t n = len > size ? size : len;
+ ssize_t ret = pwrite(ntdb->file->fd, buf, n, off);
+ if (ret != n) {
+ if (ret >= 0)
+ errno = ENOSPC;
+
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "fill failed:"
+ " %zi at %zu len=%zu (%s)",
+ ret, (size_t)off, (size_t)len,
+ strerror(errno));
+ }
+ len -= n;
+ off += n;
+ }
+ return NTDB_SUCCESS;
+}
+
+/* expand a file. we prefer to use ftruncate, as that is what posix
+ says to use for mmap expansion */
+static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb,
+ ntdb_len_t addition)
+{
+ char buf[8192];
+ enum NTDB_ERROR ecode;
+
+ assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0);
+ if (ntdb->flags & NTDB_RDONLY) {
+ return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+ "Expand on read-only database");
+ }
+
+ if (ntdb->flags & NTDB_INTERNAL) {
+ char *new;
+
+ /* Can't free it if we have direct accesses. */
+ if (ntdb->file->direct_count) {
+ ecode = save_old_map(ntdb);
+ if (ecode) {
+ return ecode;
+ }
+ new = ntdb->alloc_fn(ntdb->file,
+ ntdb->file->map_size + addition,
+ ntdb->alloc_data);
+ if (new) {
+ memcpy(new, ntdb->file->map_ptr,
+ ntdb->file->map_size);
+ }
+ } else {
+ new = ntdb->expand_fn(ntdb->file->map_ptr,
+ ntdb->file->map_size + addition,
+ ntdb->alloc_data);
+ }
+ if (!new) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "No memory to expand database");
+ }
+ ntdb->file->map_ptr = new;
+ ntdb->file->map_size += addition;
+ return NTDB_SUCCESS;
+ } else {
+ /* Unmap before trying to write; old NTDB claimed OpenBSD had
+ * problem with this otherwise. */
+ ecode = ntdb_munmap(ntdb);
+ if (ecode) {
+ return ecode;
+ }
+
+ /* If this fails, we try to fill anyway. */
+ if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition))
+ ;
+
+ /* now fill the file with something. This ensures that the
+ file isn't sparse, which would be very bad if we ran out of
+ disk. This must be done with write, not via mmap */
+ memset(buf, 0x43, sizeof(buf));
+ ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size,
+ addition);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+ ntdb->file->map_size += addition;
+ return ntdb_mmap(ntdb);
+ }
+}
+
+const void *ntdb_access_read(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len, bool convert)
+{
+ void *ret = NULL;
+
+ if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+ ret = ntdb->io->direct(ntdb, off, len, false);
+
+ if (NTDB_PTR_IS_ERR(ret)) {
+ return ret;
+ }
+ }
+ if (!ret) {
+ struct ntdb_access_hdr *hdr;
+ hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
+ if (NTDB_PTR_IS_ERR(hdr)) {
+ return hdr;
+ }
+ hdr->next = ntdb->access;
+ ntdb->access = hdr;
+ ret = hdr + 1;
+ if (convert) {
+ ntdb_convert(ntdb, (void *)ret, len);
+ }
+ } else {
+ ntdb->file->direct_count++;
+ }
+
+ return ret;
+}
+
+void *ntdb_access_write(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len, bool convert)
+{
+ void *ret = NULL;
+
+ if (ntdb->flags & NTDB_RDONLY) {
+ ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+ "Write to read-only database");
+ return NTDB_ERR_PTR(NTDB_ERR_RDONLY);
+ }
+
+ if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+ ret = ntdb->io->direct(ntdb, off, len, true);
+
+ if (NTDB_PTR_IS_ERR(ret)) {
+ return ret;
+ }
+ }
+
+ if (!ret) {
+ struct ntdb_access_hdr *hdr;
+ hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
+ if (NTDB_PTR_IS_ERR(hdr)) {
+ return hdr;
+ }
+ hdr->next = ntdb->access;
+ ntdb->access = hdr;
+ hdr->off = off;
+ hdr->len = len;
+ hdr->convert = convert;
+ ret = hdr + 1;
+ if (convert)
+ ntdb_convert(ntdb, (void *)ret, len);
+ } else {
+ ntdb->file->direct_count++;
+ }
+ return ret;
+}
+
+static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p)
+{
+ struct ntdb_access_hdr **hp;
+
+ for (hp = &ntdb->access; *hp; hp = &(*hp)->next) {
+ if (*hp + 1 == p)
+ return hp;
+ }
+ return NULL;
+}
+
+void ntdb_access_release(struct ntdb_context *ntdb, const void *p)
+{
+ struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
+
+ if (hp) {
+ hdr = *hp;
+ *hp = hdr->next;
+ ntdb->free_fn(hdr, ntdb->alloc_data);
+ } else {
+ if (--ntdb->file->direct_count == 0) {
+ free_old_mmaps(ntdb);
+ }
+ }
+}
+
+enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p)
+{
+ struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
+ enum NTDB_ERROR ecode;
+
+ if (hp) {
+ hdr = *hp;
+ if (hdr->convert)
+ ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len);
+ else
+ ecode = ntdb_write(ntdb, hdr->off, p, hdr->len);
+ *hp = hdr->next;
+ ntdb->free_fn(hdr, ntdb->alloc_data);
+ } else {
+ if (--ntdb->file->direct_count == 0) {
+ free_old_mmaps(ntdb);
+ }
+ ecode = NTDB_SUCCESS;
+ }
+
+ return ecode;
+}
+
+static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len,
+ bool write_mode)
+{
+ enum NTDB_ERROR ecode;
+
+ if (unlikely(!ntdb->file->map_ptr))
+ return NULL;
+
+ ecode = ntdb_oob(ntdb, off, len, false);
+ if (unlikely(ecode != NTDB_SUCCESS))
+ return NTDB_ERR_PTR(ecode);
+ return (char *)ntdb->file->map_ptr + off;
+}
+
+static ntdb_off_t ntdb_read_normal_off(struct ntdb_context *ntdb,
+ ntdb_off_t off)
+{
+ ntdb_off_t ret;
+ enum NTDB_ERROR ecode;
+ ntdb_off_t *p;
+
+ p = ntdb_direct(ntdb, off, sizeof(*p), false);
+ if (NTDB_PTR_IS_ERR(p)) {
+ return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p));
+ }
+ if (likely(p)) {
+ return *p;
+ }
+
+ ecode = ntdb_read(ntdb, off, &ret, sizeof(ret));
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ return ret;
+}
+
+static ntdb_off_t ntdb_read_convert_off(struct ntdb_context *ntdb,
+ ntdb_off_t off)
+{
+ ntdb_off_t ret;
+ enum NTDB_ERROR ecode;
+
+ ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret));
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ return ret;
+}
+
+static enum NTDB_ERROR ntdb_write_normal_off(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_off_t val)
+{
+ ntdb_off_t *p;
+
+ p = ntdb_direct(ntdb, off, sizeof(*p), true);
+ if (NTDB_PTR_IS_ERR(p)) {
+ return NTDB_PTR_ERR(p);
+ }
+ if (likely(p)) {
+ *p = val;
+ return NTDB_SUCCESS;
+ }
+ return ntdb_write(ntdb, off, &val, sizeof(val));
+}
+
+static enum NTDB_ERROR ntdb_write_convert_off(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_off_t val)
+{
+ return ntdb_write_convert(ntdb, off, &val, sizeof(val));
+}
+
+void ntdb_inc_seqnum(struct ntdb_context *ntdb)
+{
+ ntdb_off_t seq;
+
+ if (likely(!(ntdb->flags & NTDB_CONVERT))) {
+ int64_t *direct;
+
+ direct = ntdb->io->direct(ntdb,
+ offsetof(struct ntdb_header, seqnum),
+ sizeof(*direct), true);
+ if (likely(direct)) {
+ /* Don't let it go negative, even briefly */
+ if (unlikely((*direct) + 1) < 0)
+ *direct = 0;
+ (*direct)++;
+ return;
+ }
+ }
+
+ seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
+ if (!NTDB_OFF_IS_ERR(seq)) {
+ seq++;
+ if (unlikely((int64_t)seq < 0))
+ seq = 0;
+ ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq);
+ }
+}
+
+static const struct ntdb_methods io_methods = {
+ ntdb_read,
+ ntdb_write,
+ ntdb_normal_oob,
+ ntdb_expand_file,
+ ntdb_direct,
+ ntdb_read_normal_off,
+ ntdb_write_normal_off,
+};
+
+static const struct ntdb_methods io_convert_methods = {
+ ntdb_read,
+ ntdb_write,
+ ntdb_normal_oob,
+ ntdb_expand_file,
+ ntdb_direct,
+ ntdb_read_convert_off,
+ ntdb_write_convert_off,
+};
+
+/*
+ initialise the default methods table
+*/
+void ntdb_io_init(struct ntdb_context *ntdb)
+{
+ if (ntdb->flags & NTDB_CONVERT)
+ ntdb->io = &io_convert_methods;
+ else
+ ntdb->io = &io_methods;
+}
--- /dev/null
+ /*
+ Unix SMB/CIFS implementation.
+
+ trivial database library
+
+ Copyright (C) Andrew Tridgell 1999-2005
+ Copyright (C) Paul `Rusty' Russell 2000
+ Copyright (C) Jeremy Allison 2000-2003
+
+ ** NOTE! The following LGPL license applies to the ntdb
+ ** library. This does NOT imply that all of Samba is released
+ ** under the LGPL
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "private.h"
+#include <ccan/build_assert/build_assert.h>
+
+/* If we were threaded, we could wait for unlock, but we're not, so fail. */
+enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call)
+{
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "%s: lock owned by another ntdb in this process.",
+ call);
+}
+
+/* If we fork, we no longer really own locks. */
+bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log)
+{
+ /* No locks? No problem! */
+ if (ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0) {
+ return true;
+ }
+
+ /* No fork? No problem! */
+ if (ntdb->file->locker == getpid()) {
+ return true;
+ }
+
+ if (log) {
+ ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "%s: fork() detected after lock acquisition!"
+ " (%u vs %u)", call,
+ (unsigned int)ntdb->file->locker,
+ (unsigned int)getpid());
+ }
+ return false;
+}
+
+int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
+ void *unused)
+{
+ struct flock fl;
+ int ret;
+
+ do {
+ fl.l_type = rw;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = off;
+ fl.l_len = len;
+
+ if (waitflag)
+ ret = fcntl(fd, F_SETLKW, &fl);
+ else
+ ret = fcntl(fd, F_SETLK, &fl);
+ } while (ret != 0 && errno == EINTR);
+ return ret;
+}
+
+int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
+{
+ struct flock fl;
+ int ret;
+
+ do {
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = off;
+ fl.l_len = len;
+
+ ret = fcntl(fd, F_SETLKW, &fl);
+ } while (ret != 0 && errno == EINTR);
+ return ret;
+}
+
+static int lock(struct ntdb_context *ntdb,
+ int rw, off_t off, off_t len, bool waitflag)
+{
+ int ret;
+ if (ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0) {
+ ntdb->file->locker = getpid();
+ }
+
+ ntdb->stats.lock_lowlevel++;
+ ret = ntdb->lock_fn(ntdb->file->fd, rw, off, len, waitflag,
+ ntdb->lock_data);
+ if (!waitflag) {
+ ntdb->stats.lock_nonblock++;
+ if (ret != 0)
+ ntdb->stats.lock_nonblock_fail++;
+ }
+ return ret;
+}
+
+static int unlock(struct ntdb_context *ntdb, int rw, off_t off, off_t len)
+{
+#if 0 /* Check they matched up locks and unlocks correctly. */
+ char line[80];
+ FILE *locks;
+ bool found = false;
+
+ locks = fopen("/proc/locks", "r");
+
+ while (fgets(line, 80, locks)) {
+ char *p;
+ int type, start, l;
+
+ /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
+ p = strchr(line, ':') + 1;
+ if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
+ continue;
+ p += strlen(" FLOCK ADVISORY ");
+ if (strncmp(p, "READ ", strlen("READ ")) == 0)
+ type = F_RDLCK;
+ else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
+ type = F_WRLCK;
+ else
+ abort();
+ p += 6;
+ if (atoi(p) != getpid())
+ continue;
+ p = strchr(strchr(p, ' ') + 1, ' ') + 1;
+ start = atoi(p);
+ p = strchr(p, ' ') + 1;
+ if (strncmp(p, "EOF", 3) == 0)
+ l = 0;
+ else
+ l = atoi(p) - start + 1;
+
+ if (off == start) {
+ if (len != l) {
+ fprintf(stderr, "Len %u should be %u: %s",
+ (int)len, l, line);
+ abort();
+ }
+ if (type != rw) {
+ fprintf(stderr, "Type %s wrong: %s",
+ rw == F_RDLCK ? "READ" : "WRITE", line);
+ abort();
+ }
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "Unlock on %u@%u not found!",
+ (int)off, (int)len);
+ abort();
+ }
+
+ fclose(locks);
+#endif
+
+ return ntdb->unlock_fn(ntdb->file->fd, rw, off, len, ntdb->lock_data);
+}
+
+/* a byte range locking function - return 0 on success
+ this functions locks len bytes at the specified offset.
+
+ note that a len of zero means lock to end of file
+*/
+static enum NTDB_ERROR ntdb_brlock(struct ntdb_context *ntdb,
+ int rw_type, ntdb_off_t offset, ntdb_off_t len,
+ enum ntdb_lock_flags flags)
+{
+ int ret;
+
+ if (rw_type == F_WRLCK && (ntdb->flags & NTDB_RDONLY)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+ "Write lock attempted on read-only database");
+ }
+
+ if (ntdb->flags & NTDB_NOLOCK) {
+ return NTDB_SUCCESS;
+ }
+
+ /* A 32 bit system cannot open a 64-bit file, but it could have
+ * expanded since then: check here. */
+ if ((size_t)(offset + len) != offset + len) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_brlock: lock on giant offset %llu",
+ (long long)(offset + len));
+ }
+
+ ret = lock(ntdb, rw_type, offset, len, flags & NTDB_LOCK_WAIT);
+ if (ret != 0) {
+ /* Generic lock error. errno set by fcntl.
+ * EAGAIN is an expected return from non-blocking
+ * locks. */
+ if (!(flags & NTDB_LOCK_PROBE)
+ && (errno != EAGAIN && errno != EINTR)) {
+ ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_brlock failed (fd=%d) at"
+ " offset %zu rw_type=%d flags=%d len=%zu:"
+ " %s",
+ ntdb->file->fd, (size_t)offset, rw_type,
+ flags, (size_t)len, strerror(errno));
+ }
+ return NTDB_ERR_LOCK;
+ }
+ return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR ntdb_brunlock(struct ntdb_context *ntdb,
+ int rw_type, ntdb_off_t offset, size_t len)
+{
+ if (ntdb->flags & NTDB_NOLOCK) {
+ return NTDB_SUCCESS;
+ }
+
+ if (!check_lock_pid(ntdb, "ntdb_brunlock", false))
+ return NTDB_ERR_LOCK;
+
+ if (unlock(ntdb, rw_type, offset, len) == -1) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_brunlock failed (fd=%d) at offset %zu"
+ " rw_type=%d len=%zu: %s",
+ ntdb->file->fd, (size_t)offset, rw_type,
+ (size_t)len, strerror(errno));
+ }
+ return NTDB_SUCCESS;
+}
+
+/*
+ upgrade a read lock to a write lock. This needs to be handled in a
+ special way as some OSes (such as solaris) have too conservative
+ deadlock detection and claim a deadlock when progress can be
+ made. For those OSes we may loop for a while.
+*/
+enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start)
+{
+ int count = 1000;
+
+ if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
+ return NTDB_ERR_LOCK;
+
+ if (ntdb->file->allrecord_lock.count != 1) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_allrecord_upgrade failed:"
+ " count %u too high",
+ ntdb->file->allrecord_lock.count);
+ }
+
+ if (ntdb->file->allrecord_lock.off != 1) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_allrecord_upgrade failed:"
+ " already upgraded?");
+ }
+
+ if (ntdb->file->allrecord_lock.owner != ntdb) {
+ return owner_conflict(ntdb, "ntdb_allrecord_upgrade");
+ }
+
+ while (count--) {
+ struct timeval tv;
+ if (ntdb_brlock(ntdb, F_WRLCK, start, 0,
+ NTDB_LOCK_WAIT|NTDB_LOCK_PROBE) == NTDB_SUCCESS) {
+ ntdb->file->allrecord_lock.ltype = F_WRLCK;
+ ntdb->file->allrecord_lock.off = 0;
+ return NTDB_SUCCESS;
+ }
+ if (errno != EDEADLK) {
+ break;
+ }
+ /* sleep for as short a time as we can - more portable than usleep() */
+ tv.tv_sec = 0;
+ tv.tv_usec = 1;
+ select(0, NULL, NULL, NULL, &tv);
+ }
+
+ if (errno != EAGAIN && errno != EINTR)
+ ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_allrecord_upgrade failed");
+ return NTDB_ERR_LOCK;
+}
+
+static struct ntdb_lock *find_nestlock(struct ntdb_context *ntdb, ntdb_off_t offset,
+ const struct ntdb_context *owner)
+{
+ unsigned int i;
+
+ for (i=0; i<ntdb->file->num_lockrecs; i++) {
+ if (ntdb->file->lockrecs[i].off == offset) {
+ if (owner && ntdb->file->lockrecs[i].owner != owner)
+ return NULL;
+ return &ntdb->file->lockrecs[i];
+ }
+ }
+ return NULL;
+}
+
+enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb)
+{
+ enum NTDB_ERROR ecode;
+
+ if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
+ return NTDB_ERR_LOCK;
+
+ ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK,
+ false);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_allrecord_unlock(ntdb, F_WRLCK);
+ return ecode;
+ }
+ ecode = ntdb_transaction_recover(ntdb);
+ ntdb_unlock_open(ntdb, F_WRLCK);
+ ntdb_allrecord_unlock(ntdb, F_WRLCK);
+
+ return ecode;
+}
+
+/* lock an offset in the database. */
+static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb,
+ ntdb_off_t offset, int ltype,
+ enum ntdb_lock_flags flags)
+{
+ struct ntdb_lock *new_lck;
+ enum NTDB_ERROR ecode;
+
+ assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
+ + ntdb->file->map_size / 8));
+
+ if (ntdb->flags & NTDB_NOLOCK)
+ return NTDB_SUCCESS;
+
+ if (!check_lock_pid(ntdb, "ntdb_nest_lock", true)) {
+ return NTDB_ERR_LOCK;
+ }
+
+ ntdb->stats.locks++;
+
+ new_lck = find_nestlock(ntdb, offset, NULL);
+ if (new_lck) {
+ if (new_lck->owner != ntdb) {
+ return owner_conflict(ntdb, "ntdb_nest_lock");
+ }
+
+ if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_nest_lock:"
+ " offset %zu has read lock",
+ (size_t)offset);
+ }
+ /* Just increment the struct, posix locks don't stack. */
+ new_lck->count++;
+ return NTDB_SUCCESS;
+ }
+
+#if 0
+ if (ntdb->file->num_lockrecs
+ && offset >= NTDB_HASH_LOCK_START
+ && offset < NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_nest_lock: already have a hash lock?");
+ }
+#endif
+ if (ntdb->file->lockrecs == NULL) {
+ new_lck = ntdb->alloc_fn(ntdb->file, sizeof(*ntdb->file->lockrecs),
+ ntdb->alloc_data);
+ } else {
+ new_lck = (struct ntdb_lock *)ntdb->expand_fn(
+ ntdb->file->lockrecs,
+ sizeof(*ntdb->file->lockrecs)
+ * (ntdb->file->num_lockrecs+1),
+ ntdb->alloc_data);
+ }
+ if (new_lck == NULL) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_nest_lock:"
+ " unable to allocate %zu lock struct",
+ ntdb->file->num_lockrecs + 1);
+ }
+ ntdb->file->lockrecs = new_lck;
+
+ /* Since fcntl locks don't nest, we do a lock for the first one,
+ and simply bump the count for future ones */
+ ecode = ntdb_brlock(ntdb, ltype, offset, 1, flags);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* First time we grab a lock, perhaps someone died in commit? */
+ if (!(flags & NTDB_LOCK_NOCHECK)
+ && ntdb->file->num_lockrecs == 0) {
+ ntdb_bool_err berr = ntdb_needs_recovery(ntdb);
+ if (berr != false) {
+ ntdb_brunlock(ntdb, ltype, offset, 1);
+
+ if (berr < 0)
+ return NTDB_OFF_TO_ERR(berr);
+ ecode = ntdb_lock_and_recover(ntdb);
+ if (ecode == NTDB_SUCCESS) {
+ ecode = ntdb_brlock(ntdb, ltype, offset, 1,
+ flags);
+ }
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ }
+ }
+
+ ntdb->file->lockrecs[ntdb->file->num_lockrecs].owner = ntdb;
+ ntdb->file->lockrecs[ntdb->file->num_lockrecs].off = offset;
+ ntdb->file->lockrecs[ntdb->file->num_lockrecs].count = 1;
+ ntdb->file->lockrecs[ntdb->file->num_lockrecs].ltype = ltype;
+ ntdb->file->num_lockrecs++;
+
+ return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR ntdb_nest_unlock(struct ntdb_context *ntdb,
+ ntdb_off_t off, int ltype)
+{
+ struct ntdb_lock *lck;
+ enum NTDB_ERROR ecode;
+
+ if (ntdb->flags & NTDB_NOLOCK)
+ return NTDB_SUCCESS;
+
+ lck = find_nestlock(ntdb, off, ntdb);
+ if ((lck == NULL) || (lck->count == 0)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_nest_unlock: no lock for %zu",
+ (size_t)off);
+ }
+
+ if (lck->count > 1) {
+ lck->count--;
+ return NTDB_SUCCESS;
+ }
+
+ /*
+ * This lock has count==1 left, so we need to unlock it in the
+ * kernel. We don't bother with decrementing the in-memory array
+ * element, we're about to overwrite it with the last array element
+ * anyway.
+ */
+ ecode = ntdb_brunlock(ntdb, ltype, off, 1);
+
+ /*
+ * Shrink the array by overwriting the element just unlocked with the
+ * last array element.
+ */
+ *lck = ntdb->file->lockrecs[--ntdb->file->num_lockrecs];
+
+ return ecode;
+}
+
+/*
+ get the transaction lock
+ */
+enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype)
+{
+ return ntdb_nest_lock(ntdb, NTDB_TRANSACTION_LOCK, ltype, NTDB_LOCK_WAIT);
+}
+
+/*
+ release the transaction lock
+ */
+void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype)
+{
+ ntdb_nest_unlock(ntdb, NTDB_TRANSACTION_LOCK, ltype);
+}
+
+/* We only need to lock individual bytes, but Linux merges consecutive locks
+ * so we lock in contiguous ranges. */
+static enum NTDB_ERROR ntdb_lock_gradual(struct ntdb_context *ntdb,
+ int ltype, enum ntdb_lock_flags flags,
+ ntdb_off_t off, ntdb_off_t len)
+{
+ enum NTDB_ERROR ecode;
+ enum ntdb_lock_flags nb_flags = (flags & ~NTDB_LOCK_WAIT);
+
+ if (len <= 1) {
+ /* 0 would mean to end-of-file... */
+ assert(len != 0);
+ /* Single hash. Just do blocking lock. */
+ return ntdb_brlock(ntdb, ltype, off, len, flags);
+ }
+
+ /* First we try non-blocking. */
+ ecode = ntdb_brlock(ntdb, ltype, off, len, nb_flags);
+ if (ecode != NTDB_ERR_LOCK) {
+ return ecode;
+ }
+
+ /* Try locking first half, then second. */
+ ecode = ntdb_lock_gradual(ntdb, ltype, flags, off, len / 2);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+
+ ecode = ntdb_lock_gradual(ntdb, ltype, flags,
+ off + len / 2, len - len / 2);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_brunlock(ntdb, ltype, off, len / 2);
+ }
+ return ecode;
+}
+
+/* lock/unlock entire database. It can only be upgradable if you have some
+ * other way of guaranteeing exclusivity (ie. transaction write lock). */
+enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
+ enum ntdb_lock_flags flags, bool upgradable)
+{
+ enum NTDB_ERROR ecode;
+ ntdb_bool_err berr;
+
+ if (ntdb->flags & NTDB_NOLOCK) {
+ return NTDB_SUCCESS;
+ }
+
+ if (!check_lock_pid(ntdb, "ntdb_allrecord_lock", true)) {
+ return NTDB_ERR_LOCK;
+ }
+
+ if (ntdb->file->allrecord_lock.count) {
+ if (ntdb->file->allrecord_lock.owner != ntdb) {
+ return owner_conflict(ntdb, "ntdb_allrecord_lock");
+ }
+
+ if (ltype == F_RDLCK
+ || ntdb->file->allrecord_lock.ltype == F_WRLCK) {
+ ntdb->file->allrecord_lock.count++;
+ return NTDB_SUCCESS;
+ }
+
+ /* a global lock of a different type exists */
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "ntdb_allrecord_lock: already have %s lock",
+ ntdb->file->allrecord_lock.ltype == F_RDLCK
+ ? "read" : "write");
+ }
+
+ if (ntdb_has_hash_locks(ntdb)) {
+ /* can't combine global and chain locks */
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "ntdb_allrecord_lock:"
+ " already have chain lock");
+ }
+
+ if (upgradable && ltype != F_RDLCK) {
+ /* ntdb error: you can't upgrade a write lock! */
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_allrecord_lock:"
+ " can't upgrade a write lock");
+ }
+
+ ntdb->stats.locks++;
+again:
+ /* Lock hashes, gradually. */
+ ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START,
+ 1 << ntdb->hash_bits);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+
+ /* Lock free tables: there to end of file. */
+ ecode = ntdb_brlock(ntdb, ltype,
+ NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits),
+ 0, flags);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START,
+ 1 << ntdb->hash_bits);
+ return ecode;
+ }
+
+ ntdb->file->allrecord_lock.owner = ntdb;
+ ntdb->file->allrecord_lock.count = 1;
+ /* If it's upgradable, it's actually exclusive so we can treat
+ * it as a write lock. */
+ ntdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
+ ntdb->file->allrecord_lock.off = upgradable;
+
+ /* Now check for needing recovery. */
+ if (flags & NTDB_LOCK_NOCHECK)
+ return NTDB_SUCCESS;
+
+ berr = ntdb_needs_recovery(ntdb);
+ if (likely(berr == false))
+ return NTDB_SUCCESS;
+
+ ntdb_allrecord_unlock(ntdb, ltype);
+ if (berr < 0)
+ return NTDB_OFF_TO_ERR(berr);
+ ecode = ntdb_lock_and_recover(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ goto again;
+}
+
+enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
+ int ltype, enum ntdb_lock_flags flags)
+{
+ return ntdb_nest_lock(ntdb, NTDB_OPEN_LOCK, ltype, flags);
+}
+
+void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype)
+{
+ ntdb_nest_unlock(ntdb, NTDB_OPEN_LOCK, ltype);
+}
+
+bool ntdb_has_open_lock(struct ntdb_context *ntdb)
+{
+ return !(ntdb->flags & NTDB_NOLOCK)
+ && find_nestlock(ntdb, NTDB_OPEN_LOCK, ntdb) != NULL;
+}
+
+enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype)
+{
+ /* Lock doesn't protect data, so don't check (we recurse if we do!) */
+ return ntdb_nest_lock(ntdb, NTDB_EXPANSION_LOCK, ltype,
+ NTDB_LOCK_WAIT | NTDB_LOCK_NOCHECK);
+}
+
+void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype)
+{
+ ntdb_nest_unlock(ntdb, NTDB_EXPANSION_LOCK, ltype);
+}
+
+/* unlock entire db */
+void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype)
+{
+ if (ntdb->flags & NTDB_NOLOCK)
+ return;
+
+ if (ntdb->file->allrecord_lock.count == 0) {
+ ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "ntdb_allrecord_unlock: not locked!");
+ return;
+ }
+
+ if (ntdb->file->allrecord_lock.owner != ntdb) {
+ ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "ntdb_allrecord_unlock: not locked by us!");
+ return;
+ }
+
+ /* Upgradable locks are marked as write locks. */
+ if (ntdb->file->allrecord_lock.ltype != ltype
+ && (!ntdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
+ ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_allrecord_unlock: have %s lock",
+ ntdb->file->allrecord_lock.ltype == F_RDLCK
+ ? "read" : "write");
+ return;
+ }
+
+ if (ntdb->file->allrecord_lock.count > 1) {
+ ntdb->file->allrecord_lock.count--;
+ return;
+ }
+
+ ntdb->file->allrecord_lock.count = 0;
+ ntdb->file->allrecord_lock.ltype = 0;
+
+ ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, 0);
+}
+
+bool ntdb_has_expansion_lock(struct ntdb_context *ntdb)
+{
+ return find_nestlock(ntdb, NTDB_EXPANSION_LOCK, ntdb) != NULL;
+}
+
+bool ntdb_has_hash_locks(struct ntdb_context *ntdb)
+{
+ unsigned int i;
+
+ for (i=0; i<ntdb->file->num_lockrecs; i++) {
+ if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START
+ && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START
+ + (1 << ntdb->hash_bits)))
+ return true;
+ }
+ return false;
+}
+
+static bool ntdb_has_free_lock(struct ntdb_context *ntdb)
+{
+ unsigned int i;
+
+ if (ntdb->flags & NTDB_NOLOCK)
+ return false;
+
+ for (i=0; i<ntdb->file->num_lockrecs; i++) {
+ if (ntdb->file->lockrecs[i].off
+ > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits))
+ return true;
+ }
+ return false;
+}
+
+enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
+ unsigned int h,
+ int ltype)
+{
+ unsigned l = NTDB_HASH_LOCK_START + h;
+
+ assert(h < (1 << ntdb->hash_bits));
+
+ /* a allrecord lock allows us to avoid per chain locks */
+ if (ntdb->file->allrecord_lock.count) {
+ if (!check_lock_pid(ntdb, "ntdb_lock_hashes", true))
+ return NTDB_ERR_LOCK;
+
+ if (ntdb->file->allrecord_lock.owner != ntdb)
+ return owner_conflict(ntdb, "ntdb_lock_hashes");
+ if (ltype == ntdb->file->allrecord_lock.ltype
+ || ltype == F_RDLCK) {
+ return NTDB_SUCCESS;
+ }
+
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "ntdb_lock_hashes:"
+ " already have %s allrecordlock",
+ ntdb->file->allrecord_lock.ltype == F_RDLCK
+ ? "read" : "write");
+ }
+
+ if (ntdb_has_free_lock(ntdb)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_lock_hashes: already have free lock");
+ }
+
+ if (ntdb_has_expansion_lock(ntdb)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_lock_hashes:"
+ " already have expansion lock");
+ }
+
+ return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT);
+}
+
+enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
+ unsigned int h, int ltype)
+{
+ unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1));
+
+ if (ntdb->flags & NTDB_NOLOCK)
+ return 0;
+
+ /* a allrecord lock allows us to avoid per chain locks */
+ if (ntdb->file->allrecord_lock.count) {
+ if (ntdb->file->allrecord_lock.ltype == F_RDLCK
+ && ltype == F_WRLCK) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_unlock_hashes RO allrecord!");
+ }
+ if (ntdb->file->allrecord_lock.owner != ntdb) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
+ "ntdb_unlock_hashes:"
+ " not locked by us!");
+ }
+ return NTDB_SUCCESS;
+ }
+
+ return ntdb_nest_unlock(ntdb, l, ltype);
+}
+
+/* Hash locks use NTDB_HASH_LOCK_START + <number of hash entries>..
+ * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide.
+ * The result is that on 32 bit systems we don't use lock values > 2^31 on
+ * files that are less than 4GB.
+ */
+static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb,
+ ntdb_off_t b_off)
+{
+ return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
+ + b_off / sizeof(ntdb_off_t);
+}
+
+enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
+ enum ntdb_lock_flags waitflag)
+{
+ assert(b_off >= sizeof(struct ntdb_header));
+
+ if (ntdb->flags & NTDB_NOLOCK)
+ return 0;
+
+ /* a allrecord lock allows us to avoid per chain locks */
+ if (ntdb->file->allrecord_lock.count) {
+ if (!check_lock_pid(ntdb, "ntdb_lock_free_bucket", true))
+ return NTDB_ERR_LOCK;
+
+ if (ntdb->file->allrecord_lock.owner != ntdb) {
+ return owner_conflict(ntdb, "ntdb_lock_free_bucket");
+ }
+
+ if (ntdb->file->allrecord_lock.ltype == F_WRLCK)
+ return 0;
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_lock_free_bucket with"
+ " read-only allrecordlock!");
+ }
+
+#if 0 /* FIXME */
+ if (ntdb_has_expansion_lock(ntdb)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
+ "ntdb_lock_free_bucket:"
+ " already have expansion lock");
+ }
+#endif
+
+ return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK,
+ waitflag);
+}
+
+void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off)
+{
+ if (ntdb->file->allrecord_lock.count)
+ return;
+
+ ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb)
+{
+ return ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
+}
+
+_PUBLIC_ void ntdb_unlockall(struct ntdb_context *ntdb)
+{
+ ntdb_allrecord_unlock(ntdb, F_WRLCK);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb)
+{
+ return ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
+}
+
+_PUBLIC_ void ntdb_unlockall_read(struct ntdb_context *ntdb)
+{
+ ntdb_allrecord_unlock(ntdb, F_RDLCK);
+}
+
+void ntdb_lock_cleanup(struct ntdb_context *ntdb)
+{
+ unsigned int i;
+
+ /* We don't want to warn: they're allowed to close ntdb after fork. */
+ if (!check_lock_pid(ntdb, "ntdb_close", false))
+ return;
+
+ while (ntdb->file->allrecord_lock.count
+ && ntdb->file->allrecord_lock.owner == ntdb) {
+ ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
+ }
+
+ for (i=0; i<ntdb->file->num_lockrecs; i++) {
+ if (ntdb->file->lockrecs[i].owner == ntdb) {
+ ntdb_nest_unlock(ntdb,
+ ntdb->file->lockrecs[i].off,
+ ntdb->file->lockrecs[i].ltype);
+ i--;
+ }
+ }
+}
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<refentry>
+ <refmeta>
+ <refentrytitle>ntdb</refentrytitle>
+ <manvolnum>3</manvolnum>
+ <refmiscinfo class="source">Samba</refmiscinfo>
+ <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+ <refmiscinfo class="version">4.1</refmiscinfo>
+ </refmeta>
+ <refnamediv>
+ <refname>ntdb</refname>
+<refpurpose>A not-so trivial keyword/data database system</refpurpose>
+ </refnamediv>
+ <refsynopsisdiv>
+<synopsis>#include <ntdb.h></synopsis>
+ </refsynopsisdiv>
+ <refsect1><title>DESCRIPTION</title>
+ <para>
+ If you have previously used the tdb library from Samba, much of
+ this will seem familiar, but there are some API changes which a
+ compiler will warn you about if you simply replace 'tdb' with
+ 'ntdb' in your code! The on-disk format for ntdb is
+ incompatible with tdb.
+ </para>
+ <para>
+ tdb's API was based on gdbm, and ntdb continues this tradition,
+ with enhancements. A differences guide is available in the text
+ file <filename>lib/ntdb/doc/TDB_porting.txt</filename> in the
+ SAMBA source tree.
+ </para>
+ </refsect1>
+ <refsect1><title>NTDB API OVERVIEW</title>
+ <para>
+ The complete API is documented in the ntdb.h header, which is
+ kept up-to-date and recommended reading.
+ </para>
+ <para>
+ Normal usage is to call ntdb_open() to create or open an ntdb
+ file. ntdb_store() is used to add records, ntdb_fetch() is used
+ to fetch them. Traversals are supported via callback
+ (ntdb_traverse()) or iteration (ntdb_firstkey() and
+ ntdb_nextkey()). Transactions are supported for batching
+ updates or reads atomically, using ntdb_transaction_start() and
+ ntdb_transaction_commit().
+ </para>
+ <refsect2><title>Use With Talloc</title>
+ <para>
+ ntdb_open() takes an optional linked list of attributes:
+ in particular you can specify an alternate allocator (such as
+ talloc):
+ </para>
+ <programlisting>
+#include <talloc.h>
+#include <ntdb.h>
+
+static void *my_alloc(const void *owner, size_t len, void *priv)
+{
+ return talloc_size(owner, len);
+}
+
+static void *my_expand(void *old, size_t newlen, void *priv)
+{
+ return talloc_realloc_size(NULL, old, newlen);
+}
+
+static void my_free(void *old, void *priv)
+{
+ talloc_free(old);
+}
+
+/* This opens an ntdb file as a talloc object with given parent. */
+struct ntdb_context *ntdb_open_talloc(const void *parent,
+ const char *filename)
+{
+ struct ntdb_context *ntdb;
+ union ntdb_attribute alloc;
+
+ alloc.base.attr = NTDB_ATTRIBUTE_ALLOCATOR;
+ alloc.base.next = NULL;
+ alloc.alloc.alloc = my_alloc;
+ alloc.alloc.expand = my_expand;
+ alloc.alloc.free = my_free;
+
+ ntdb = ntdb_open(filename, NTDB_DEFAULT, O_RDWR|O_CREAT, 0600,
+ &alloc);
+ if (ntdb) {
+ talloc_steal(parent, ntdb);
+ talloc_set_name(ntdb, "%s", filename);
+ }
+ return ntdb;
+}
+</programlisting>
+ </refsect2>
+ </refsect1>
+ <refsect1><title>SEE ALSO</title>
+ <para>
+ <ulink url="http://tdb.samba.org/"/>
+ </para>
+ </refsect1>
+
+ <refsect1><title>AUTHOR</title>
+ <para> The original tdb software was created by Andrew Tridgell, and
+ is now developed by the
+ Samba Team as an Open Source project similar to the way the
+ Linux kernel is developed. ntdb was derived from tdb, but mostly
+ rewritten by Rusty Russell.
+ </para>
+ </refsect1>
+
+ <refsect1><title>COPYRIGHT/LICENSE</title>
+ <para>
+ Copyright (C) Rusty Russell 2013, IBM Corporation
+ </para>
+ <para>
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 3 of the
+ License, or (at your option) any later version.
+ </para>
+ <para>
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see http://www.gnu.org/licenses/.
+ </para>
+ </refsect1>
+</refentry>
--- /dev/null
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<refentry id="ntdbbackup.8">
+
+<refmeta>
+ <refentrytitle>ntdbbackup</refentrytitle>
+ <manvolnum>8</manvolnum>
+ <refmiscinfo class="source">Samba</refmiscinfo>
+ <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+ <refmiscinfo class="version">4.1</refmiscinfo>
+</refmeta>
+
+
+<refnamediv>
+ <refname>ntdbbackup</refname>
+ <refpurpose>tool for backing up and for validating the integrity of samba .ntdb files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ntdbbackup</command>
+ <arg choice="opt">-s suffix</arg>
+ <arg choice="opt">-v</arg>
+ <arg choice="opt">-h</arg>
+ </cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>This tool is part of the <citerefentry><refentrytitle>samba</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> suite.</para>
+
+ <para><command>ntdbbackup</command> is a tool that may be used to backup samba .ntdb
+ files. This tool may also be used to verify the integrity of the .ntdb files prior
+ to samba startup or during normal operation. If it finds file damage and it finds
+ a prior backup the backup file will be restored.
+ </para>
+</refsect1>
+
+
+<refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>-h</term>
+ <listitem><para>
+ Get help information.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-s suffix</term>
+ <listitem><para>
+ The <command>-s</command> option allows the administrator to specify a file
+ backup extension. This way it is possible to keep a history of ntdb backup
+ files by using a new suffix for each backup.
+ </para> </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-v</term>
+ <listitem><para>
+ The <command>-v</command> will check the database for damages (corrupt data)
+ which if detected causes the backup to be restored.
+ </para></listitem>
+ </varlistentry>
+
+ </variablelist>
+</refsect1>
+
+
+<refsect1>
+ <title>COMMANDS</title>
+
+ <para><emphasis>GENERAL INFORMATION</emphasis></para>
+
+ <para>
+ The <command>ntdbbackup</command> utility can safely be run at any time. It was designed so
+ that it can be used at any time to validate the integrity of ntdb files, even during Samba
+ operation. Typical usage for the command will be:
+ </para>
+
+ <para>ntdbbackup [-s suffix] *.ntdb</para>
+
+ <para>
+ Before restarting samba the following command may be run to validate .ntdb files:
+ </para>
+
+ <para>ntdbbackup -v [-s suffix] *.ntdb</para>
+
+ <para>
+ Note that Samba 4 can use .tdb files instead, so you should
+ use <command>tdbbackup</command> on those files.
+ </para>
+
+ <para>
+ Samba .tdb and .ntdb files are stored in various locations, be sure to run backup all
+ .(n)tdb files on the system. Important files includes:
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ <command>secrets.(n)tdb</command> - usual location is in the /usr/local/samba/private
+ directory, or on some systems in /etc/samba.
+ </para></listitem>
+
+ <listitem><para>
+ <command>passdb.(n)tdb</command> - usual location is in the /usr/local/samba/private
+ directory, or on some systems in /etc/samba.
+ </para></listitem>
+
+ <listitem><para>
+ <command>*.tdb</command> and <command>*.ntdb</command> located in the /usr/local/samba/var directory or on some
+ systems in the /var/cache or /var/lib/samba directories.
+ </para></listitem>
+ </itemizedlist>
+
+</refsect1>
+
+<refsect1>
+ <title>VERSION</title>
+
+ <para>This man page is correct for version 4 of the Samba suite.</para>
+</refsect1>
+
+<refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ tdbbackup(8), ntdbrestore(8)
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>AUTHOR</title>
+
+ <para>
+ The original Samba software and related utilities were created by Andrew Tridgell.
+ Samba is now developed by the Samba Team as an Open Source project similar to the way
+ the Linux kernel is developed.
+ </para>
+
+ <para>The ntdbbackup man page was written by Rusty Russell,
+ based on the tdbbackup man page by John H Terpstra.</para>
+</refsect1>
+
+</refentry>
--- /dev/null
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<refentry id="ntdbdump.8">
+
+<refmeta>
+ <refentrytitle>ntdbdump</refentrytitle>
+ <manvolnum>8</manvolnum>
+ <refmiscinfo class="source">Samba</refmiscinfo>
+ <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+ <refmiscinfo class="version">4.1</refmiscinfo>
+</refmeta>
+
+
+<refnamediv>
+ <refname>ntdbdump</refname>
+ <refpurpose>tool for printing the contents of an NTDB file</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ntdbdump</command>
+ <arg choice="opt">-k <replaceable>keyname</replaceable></arg>
+ <arg choice="opt">-e</arg>
+ <arg choice="opt">-h</arg>
+ <arg choice="req">filename</arg>
+ </cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>This tool is part of the <citerefentry><refentrytitle>samba</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> suite.</para>
+
+ <para><command>ntdbdump</command> is a very simple utility that 'dumps' the
+ contents of a NTDB (New Trivial DataBase) file to standard output in a
+ human-readable format.
+ </para>
+
+ <para>This tool can be used when debugging problems with NTDB files. It is
+ intended for those who are somewhat familiar with Samba internals.
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>-h</term>
+ <listitem><para>
+ Get help information.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>-k <replaceable>keyname</replaceable></term>
+ <listitem><para>
+ The <command>-k</command> option restricts dumping to a single key, if found.
+ </para> </listitem>
+ </varlistentry>
+
+ </variablelist>
+</refsect1>
+
+<refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ tdbdump(8), ntdbtool(8)
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>VERSION</title>
+
+ <para>This man page is correct for version 4 of the Samba suite.</para>
+</refsect1>
+
+<refsect1>
+ <title>AUTHOR</title>
+
+ <para>
+ The original Samba software and related utilities were created by Andrew Tridgell.
+ Samba is now developed by the Samba Team as an Open Source project similar to the way
+ the Linux kernel is developed.
+ </para>
+
+ <para>The ntdbdump man page was written by Rusty Russell, base on the tdbdump man page by Jelmer Vernooij.</para>
+</refsect1>
+
+</refentry>
--- /dev/null
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<refentry id="ntdbrestore.8">
+
+<refmeta>
+ <refentrytitle>ntdbrestore</refentrytitle>
+ <manvolnum>8</manvolnum>
+ <refmiscinfo class="source">Samba</refmiscinfo>
+ <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+ <refmiscinfo class="version">4.1</refmiscinfo>
+</refmeta>
+
+
+<refnamediv>
+ <refname>ntdbrestore</refname>
+ <refpurpose>tool for creating a NTDB file out of a ntdbdump output</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+ <cmdsynopsis>
+ <command>ntdbrestore</command>
+ <arg choice="req">ntdbfilename</arg>
+ </cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>This tool is part of the <citerefentry><refentrytitle>samba</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> suite.</para>
+
+ <para><command>ntdbrestore</command> is a very simple utility that 'restores' the
+ contents of dump file into NTDB (New Trivial DataBase) file. The dump file is obtained from the ntdbdump or tdbdump
+ commands.
+ </para>
+
+ <para>This tool wait on the standard input for the content of the dump and will write the ntdb in the ntdbfilename
+ parameter.
+ </para>
+ <para>This tool can be used to translate between ntdb and tdb files by dumping and restoring.
+ </para>
+</refsect1>
+
+
+<refsect1>
+ <title>VERSION</title>
+
+ <para>This man page is correct for version 4 of the Samba suite.</para>
+</refsect1>
+
+<refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ ntdbdump(8), tdbrestore(8)
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>AUTHOR</title>
+
+ <para>
+ The original Samba software and related utilities were created by Andrew Tridgell.
+ Samba is now developed by the Samba Team as an Open Source project similar to the way
+ the Linux kernel is developed.
+
+ ntdbrestore was written by Rusty Russell based on tdbrestore, which was initially written by Volker Lendecke based on an
+ idea by Simon McVittie.
+ </para>
+
+ <para>The ntdbrestore man page was written by Rusty Russell, based on the tdbrestore man page by Matthieu Patou.</para>
+</refsect1>
+
+</refentry>
--- /dev/null
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<refentry id="ntdbtool.8">
+
+<refmeta>
+ <refentrytitle>ntdbtool</refentrytitle>
+ <manvolnum>8</manvolnum>
+ <refmiscinfo class="source">Samba</refmiscinfo>
+ <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+ <refmiscinfo class="version">4.1</refmiscinfo>
+</refmeta>
+
+
+<refnamediv>
+ <refname>ntdbtool</refname>
+ <refpurpose>manipulate the contents NTDB files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+
+ <cmdsynopsis>
+ <command>ntdbtool</command>
+ <arg choice="plain">
+ <replaceable>NTDBFILE</replaceable>
+ </arg>
+ <arg rep="repeat" choice="opt">
+ <replaceable>COMMANDS</replaceable>
+ </arg>
+ </cmdsynopsis>
+
+</refsynopsisdiv>
+
+<refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>This tool is part of the
+ <citerefentry><refentrytitle>samba</refentrytitle>
+ <manvolnum>1</manvolnum></citerefentry> suite.</para>
+
+ <para><command>ntdbtool</command> a tool for displaying and
+ altering the contents of Samba NTDB (New Trivial DataBase) files. Each
+ of the commands listed below can be entered interactively or
+ provided on the command line.</para>
+
+</refsect1>
+
+
+<refsect1>
+ <title>COMMANDS</title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term><option>create</option>
+ <replaceable>NTDBFILE</replaceable></term>
+ <listitem><para>Create a new database named
+ <replaceable>NTDBFILE</replaceable>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>open</option>
+ <replaceable>NTDBFILE</replaceable></term>
+ <listitem><para>Open an existing database named
+ <replaceable>NTDBFILE</replaceable>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>erase</option></term>
+ <listitem><para>Erase the current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>dump</option></term>
+ <listitem><para>Dump the current database as strings.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>cdump</option></term>
+ <listitem><para>Dump the current database as connection records.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>keys</option></term>
+ <listitem><para>Dump the current database keys as strings.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>hexkeys</option></term>
+ <listitem><para>Dump the current database keys as hex values.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>info</option></term>
+ <listitem><para>Print summary information about the
+ current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>insert</option>
+ <replaceable>KEY</replaceable>
+ <replaceable>DATA</replaceable>
+ </term>
+ <listitem><para>Insert a record into the
+ current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>move</option>
+ <replaceable>KEY</replaceable>
+ <replaceable>NTDBFILE</replaceable>
+ </term>
+ <listitem><para>Move a record from the
+ current database into <replaceable>NTDBFILE</replaceable>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>store</option>
+ <replaceable>KEY</replaceable>
+ <replaceable>DATA</replaceable>
+ </term>
+ <listitem><para>Store (replace) a record in the
+ current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>show</option>
+ <replaceable>KEY</replaceable>
+ </term>
+ <listitem><para>Show a record by key.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>delete</option>
+ <replaceable>KEY</replaceable>
+ </term>
+ <listitem><para>Delete a record by key.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>list</option>
+ </term>
+ <listitem><para>Print the current database hash table and free list.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>free</option>
+ </term>
+ <listitem><para>Print the current database and free list.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>!</option>
+ <replaceable>COMMAND</replaceable>
+ </term>
+ <listitem><para>Execute the given system command.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <option>first</option>
+ </term>
+ <listitem><para>Print the first record in the current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <option>next</option>
+ </term>
+ <listitem><para>Print the next record in the current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <option>check</option>
+ </term>
+ <listitem><para>Check the integrity of the current database.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <option>repack</option>
+ </term>
+ <listitem><para>Repack a database using a temporary file to remove fragmentation.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <option>quit</option>
+ </term>
+ <listitem><para>Exit <command>ntdbtool</command>.
+ </para></listitem>
+ </varlistentry>
+
+ </variablelist>
+</refsect1>
+
+<refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ tdbtool(8)
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>CAVEATS</title>
+ <para>The contents of the Samba NTDB files are private
+ to the implementation and should not be altered with
+ <command>ntdbtool</command>.
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>VERSION</title>
+ <para>This man page is correct for version 4.0 of the Samba suite.</para>
+</refsect1>
+
+<refsect1>
+ <title>AUTHOR</title>
+
+ <para> The original Samba software and related utilities were
+ created by Andrew Tridgell. Samba is now developed by the
+ Samba Team as an Open Source project similar to the way the
+ Linux kernel is developed.</para>
+</refsect1>
+
+</refentry>
--- /dev/null
+ /*
+ Trivial Database 2: fetch, store and misc routines.
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#ifndef HAVE_LIBREPLACE
+#include <stdarg.h>
+#endif
+
+static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
+ ntdb_off_t off,
+ ntdb_len_t keylen,
+ ntdb_len_t datalen,
+ struct ntdb_used_record *rec)
+{
+ uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
+ enum NTDB_ERROR ecode;
+
+ ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
+ keylen + dataroom);
+ if (ecode == NTDB_SUCCESS) {
+ ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
+ }
+ return ecode;
+}
+
+static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
+ struct hash_info *h,
+ NTDB_DATA key, NTDB_DATA dbuf,
+ ntdb_off_t old_off, ntdb_len_t old_room,
+ bool growing)
+{
+ ntdb_off_t new_off;
+ enum NTDB_ERROR ecode;
+
+ /* Allocate a new record. */
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing);
+ if (NTDB_OFF_IS_ERR(new_off)) {
+ return NTDB_OFF_TO_ERR(new_off);
+ }
+
+ /* We didn't like the existing one: remove it. */
+ if (old_off) {
+ ntdb->stats.frees++;
+ ecode = add_free_record(ntdb, old_off,
+ sizeof(struct ntdb_used_record)
+ + key.dsize + old_room,
+ NTDB_LOCK_WAIT, true);
+ if (ecode == NTDB_SUCCESS)
+ ecode = replace_in_hash(ntdb, h, new_off);
+ } else {
+ ecode = add_to_hash(ntdb, h, new_off);
+ }
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ new_off += sizeof(struct ntdb_used_record);
+ ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ new_off += key.dsize;
+ ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ if (ntdb->flags & NTDB_SEQNUM)
+ ntdb_inc_seqnum(ntdb);
+
+ return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR update_data(struct ntdb_context *ntdb,
+ ntdb_off_t off,
+ NTDB_DATA dbuf,
+ ntdb_len_t extra)
+{
+ enum NTDB_ERROR ecode;
+
+ ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize);
+ if (ecode == NTDB_SUCCESS && extra) {
+ /* Put a zero in; future versions may append other data. */
+ ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1);
+ }
+ if (ntdb->flags & NTDB_SEQNUM)
+ ntdb_inc_seqnum(ntdb);
+
+ return ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
+ NTDB_DATA key, NTDB_DATA dbuf, int flag)
+{
+ struct hash_info h;
+ ntdb_off_t off;
+ ntdb_len_t old_room = 0;
+ struct ntdb_used_record rec;
+ enum NTDB_ERROR ecode;
+
+ off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+
+ /* Now we have lock on this hash bucket. */
+ if (flag == NTDB_INSERT) {
+ if (off) {
+ ecode = NTDB_ERR_EXISTS;
+ goto out;
+ }
+ } else {
+ if (off) {
+ old_room = rec_data_length(&rec)
+ + rec_extra_padding(&rec);
+ if (old_room >= dbuf.dsize) {
+ /* Can modify in-place. Easy! */
+ ecode = update_rec_hdr(ntdb, off,
+ key.dsize, dbuf.dsize,
+ &rec);
+ if (ecode != NTDB_SUCCESS) {
+ goto out;
+ }
+ ecode = update_data(ntdb,
+ off + sizeof(rec)
+ + key.dsize, dbuf,
+ old_room - dbuf.dsize);
+ if (ecode != NTDB_SUCCESS) {
+ goto out;
+ }
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
+ return NTDB_SUCCESS;
+ }
+ } else {
+ if (flag == NTDB_MODIFY) {
+ /* if the record doesn't exist and we
+ are in NTDB_MODIFY mode then we should fail
+ the store */
+ ecode = NTDB_ERR_NOEXIST;
+ goto out;
+ }
+ }
+ }
+
+ /* If we didn't use the old record, this implies we're growing. */
+ ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
+out:
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
+ return ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
+ NTDB_DATA key, NTDB_DATA dbuf)
+{
+ struct hash_info h;
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
+ ntdb_len_t old_room = 0, old_dlen;
+ unsigned char *newdata;
+ NTDB_DATA new_dbuf;
+ enum NTDB_ERROR ecode;
+
+ off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+
+ if (off) {
+ old_dlen = rec_data_length(&rec);
+ old_room = old_dlen + rec_extra_padding(&rec);
+
+ /* Fast path: can append in place. */
+ if (rec_extra_padding(&rec) >= dbuf.dsize) {
+ ecode = update_rec_hdr(ntdb, off, key.dsize,
+ old_dlen + dbuf.dsize, &rec);
+ if (ecode != NTDB_SUCCESS) {
+ goto out;
+ }
+
+ off += sizeof(rec) + key.dsize + old_dlen;
+ ecode = update_data(ntdb, off, dbuf,
+ rec_extra_padding(&rec));
+ goto out;
+ }
+
+ /* Slow path. */
+ newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize,
+ ntdb->alloc_data);
+ if (!newdata) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_append:"
+ " failed to allocate %zu bytes",
+ (size_t)(key.dsize + old_dlen
+ + dbuf.dsize));
+ goto out;
+ }
+ ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize,
+ newdata, old_dlen);
+ if (ecode != NTDB_SUCCESS) {
+ goto out_free_newdata;
+ }
+ memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
+ new_dbuf.dptr = newdata;
+ new_dbuf.dsize = old_dlen + dbuf.dsize;
+ } else {
+ newdata = NULL;
+ new_dbuf = dbuf;
+ }
+
+ /* If they're using ntdb_append(), it implies they're growing record. */
+ ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true);
+
+out_free_newdata:
+ ntdb->free_fn(newdata, ntdb->alloc_data);
+out:
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
+ return ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
+ NTDB_DATA *data)
+{
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
+ struct hash_info h;
+ enum NTDB_ERROR ecode;
+ const char *keyp;
+
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+
+ if (!off) {
+ ecode = NTDB_ERR_NOEXIST;
+ } else {
+ data->dsize = rec_data_length(&rec);
+ data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data);
+ if (unlikely(!data->dptr)) {
+ ecode = NTDB_ERR_OOM;
+ } else {
+ memcpy(data->dptr, keyp + key.dsize, data->dsize);
+ ecode = NTDB_SUCCESS;
+ }
+ ntdb_access_release(ntdb, keyp);
+ }
+
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
+ return ecode;
+}
+
+_PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
+ struct hash_info h;
+
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return false;
+ }
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
+
+ return off ? true : false;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
+ struct hash_info h;
+ enum NTDB_ERROR ecode;
+
+ off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+
+ if (!off) {
+ ecode = NTDB_ERR_NOEXIST;
+ goto unlock;
+ }
+
+ ecode = delete_from_hash(ntdb, &h);
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock;
+ }
+
+ /* Free the deleted entry. */
+ ntdb->stats.frees++;
+ ecode = add_free_record(ntdb, off,
+ sizeof(struct ntdb_used_record)
+ + rec_key_length(&rec)
+ + rec_data_length(&rec)
+ + rec_extra_padding(&rec),
+ NTDB_LOCK_WAIT, true);
+
+ if (ntdb->flags & NTDB_SEQNUM)
+ ntdb_inc_seqnum(ntdb);
+
+unlock:
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
+ return ecode;
+}
+
+_PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb)
+{
+ return ntdb->flags;
+}
+
+static bool inside_transaction(const struct ntdb_context *ntdb)
+{
+ return ntdb->transaction != NULL;
+}
+
+static bool readonly_changable(struct ntdb_context *ntdb, const char *caller)
+{
+ if (inside_transaction(ntdb)) {
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "%s: can't change"
+ " NTDB_RDONLY inside transaction",
+ caller);
+ return false;
+ }
+ return true;
+}
+
+_PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag)
+{
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_add_flag: internal db");
+ return;
+ }
+ switch (flag) {
+ case NTDB_NOLOCK:
+ ntdb->flags |= NTDB_NOLOCK;
+ break;
+ case NTDB_NOMMAP:
+ if (ntdb->file->direct_count) {
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_add_flag: Can't get NTDB_NOMMAP from"
+ " ntdb_parse_record!");
+ return;
+ }
+ ntdb->flags |= NTDB_NOMMAP;
+#ifndef HAVE_INCOHERENT_MMAP
+ ntdb_munmap(ntdb);
+#endif
+ break;
+ case NTDB_NOSYNC:
+ ntdb->flags |= NTDB_NOSYNC;
+ break;
+ case NTDB_SEQNUM:
+ ntdb->flags |= NTDB_SEQNUM;
+ break;
+ case NTDB_ALLOW_NESTING:
+ ntdb->flags |= NTDB_ALLOW_NESTING;
+ break;
+ case NTDB_RDONLY:
+ if (readonly_changable(ntdb, "ntdb_add_flag"))
+ ntdb->flags |= NTDB_RDONLY;
+ break;
+ default:
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_add_flag: Unknown flag %u", flag);
+ }
+}
+
+_PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag)
+{
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_remove_flag: internal db");
+ return;
+ }
+ switch (flag) {
+ case NTDB_NOLOCK:
+ ntdb->flags &= ~NTDB_NOLOCK;
+ break;
+ case NTDB_NOMMAP:
+ ntdb->flags &= ~NTDB_NOMMAP;
+#ifndef HAVE_INCOHERENT_MMAP
+ /* If mmap incoherent, we were mmaping anyway. */
+ ntdb_mmap(ntdb);
+#endif
+ break;
+ case NTDB_NOSYNC:
+ ntdb->flags &= ~NTDB_NOSYNC;
+ break;
+ case NTDB_SEQNUM:
+ ntdb->flags &= ~NTDB_SEQNUM;
+ break;
+ case NTDB_ALLOW_NESTING:
+ ntdb->flags &= ~NTDB_ALLOW_NESTING;
+ break;
+ case NTDB_RDONLY:
+ if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) {
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_remove_flag: can't"
+ " remove NTDB_RDONLY on ntdb"
+ " opened with O_RDONLY");
+ break;
+ }
+ if (readonly_changable(ntdb, "ntdb_remove_flag"))
+ ntdb->flags &= ~NTDB_RDONLY;
+ break;
+ default:
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_remove_flag: Unknown flag %u",
+ flag);
+ }
+}
+
+_PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode)
+{
+ /* Gcc warns if you miss a case in the switch, so use that. */
+ switch (NTDB_ERR_TO_OFF(ecode)) {
+ case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist";
+ case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted";
+ }
+ return "Invalid error code";
+}
+
+enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb,
+ enum NTDB_ERROR ecode,
+ enum ntdb_log_level level,
+ const char *fmt, ...)
+{
+ char *message;
+ va_list ap;
+ size_t len;
+ /* ntdb_open paths care about errno, so save it. */
+ int saved_errno = errno;
+
+ if (!ntdb->log_fn)
+ return ecode;
+
+ va_start(ap, fmt);
+ len = vsnprintf(NULL, 0, fmt, ap);
+ va_end(ap);
+
+ message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data);
+ if (!message) {
+ ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM,
+ "out of memory formatting message:", ntdb->log_data);
+ ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data);
+ } else {
+ va_start(ap, fmt);
+ vsnprintf(message, len+1, fmt, ap);
+ va_end(ap);
+ ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data);
+ ntdb->free_fn(message, ntdb->alloc_data);
+ }
+ errno = saved_errno;
+ return ecode;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
+ NTDB_DATA key,
+ enum NTDB_ERROR (*parse)(NTDB_DATA k,
+ NTDB_DATA d,
+ void *data),
+ void *data)
+{
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
+ struct hash_info h;
+ enum NTDB_ERROR ecode;
+ const char *keyp;
+
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+
+ if (!off) {
+ ecode = NTDB_ERR_NOEXIST;
+ } else {
+ unsigned int old_flags;
+ NTDB_DATA d = ntdb_mkdata(keyp + key.dsize,
+ rec_data_length(&rec));
+
+ /*
+ * Make sure they don't try to write db, since they
+ * have read lock! They can if they've done
+ * ntdb_lockall(): if it was ntdb_lockall_read, that'll
+ * stop them doing a write operation anyway.
+ */
+ old_flags = ntdb->flags;
+ if (!ntdb->file->allrecord_lock.count &&
+ !(ntdb->flags & NTDB_NOLOCK)) {
+ ntdb->flags |= NTDB_RDONLY;
+ }
+ ecode = parse(key, d, data);
+ ntdb->flags = old_flags;
+ ntdb_access_release(ntdb, keyp);
+ }
+
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
+ return ecode;
+}
+
+_PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb)
+{
+ return ntdb->name;
+}
+
+_PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb)
+{
+ return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
+}
+
+
+_PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb)
+{
+ return ntdb->file->fd;
+}
+
+struct traverse_state {
+ enum NTDB_ERROR error;
+ struct ntdb_context *dest_db;
+};
+
+/*
+ traverse function for repacking
+ */
+static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data,
+ struct traverse_state *state)
+{
+ state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT);
+ if (state->error != NTDB_SUCCESS) {
+ return -1;
+ }
+ return 0;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb)
+{
+ struct ntdb_context *tmp_db;
+ struct traverse_state state;
+
+ state.error = ntdb_transaction_start(ntdb);
+ if (state.error != NTDB_SUCCESS) {
+ return state.error;
+ }
+
+ tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
+ if (tmp_db == NULL) {
+ state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ __location__
+ " Failed to create tmp_db");
+ ntdb_transaction_cancel(ntdb);
+ return state.error;
+ }
+
+ state.dest_db = tmp_db;
+ if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) {
+ goto fail;
+ }
+
+ state.error = ntdb_wipe_all(ntdb);
+ if (state.error != NTDB_SUCCESS) {
+ goto fail;
+ }
+
+ state.dest_db = ntdb;
+ if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) {
+ goto fail;
+ }
+
+ ntdb_close(tmp_db);
+ return ntdb_transaction_commit(ntdb);
+
+fail:
+ ntdb_transaction_cancel(ntdb);
+ ntdb_close(tmp_db);
+ return state.error;
+}
--- /dev/null
+#ifndef CCAN_NTDB_H
+#define CCAN_NTDB_H
+
+/*
+ NTDB: trivial database library version 2
+
+ Copyright (C) Andrew Tridgell 1999-2004
+ Copyright (C) Rusty Russell 2010-2012
+
+ ** NOTE! The following LGPL license applies to the ntdb
+ ** library. This does NOT imply that all of Samba is released
+ ** under the LGPL
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_LIBREPLACE
+#include <replace.h>
+#include <system/filesys.h>
+#else
+#include "config.h"
+#if HAVE_FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#ifndef _PUBLIC_
+#ifdef HAVE_VISIBILITY_ATTR
+#define _PUBLIC_ __attribute__((visibility("default")))
+#else
+#define _PUBLIC_
+#endif
+#endif
+
+/* For mode_t */
+#include <sys/types.h>
+/* For O_* flags. */
+#include <sys/stat.h>
+/* For sig_atomic_t. */
+#include <signal.h>
+/* For uint64_t */
+#include <stdint.h>
+/* For bool */
+#include <stdbool.h>
+/* For memcmp */
+#include <string.h>
+#endif
+
+#include <ccan/compiler/compiler.h>
+#include <ccan/typesafe_cb/typesafe_cb.h>
+#include <ccan/cast/cast.h>
+
+union ntdb_attribute;
+struct ntdb_context;
+
+/**
+ * struct TDB_DATA - (n)tdb data blob
+ *
+ * To ease compatibility, we use 'struct TDB_DATA' from tdb.h, so if
+ * you want to include both tdb.h and ntdb.h, you need to #include
+ * tdb.h first.
+ */
+#ifndef __TDB_H__
+struct TDB_DATA {
+ unsigned char *dptr;
+ size_t dsize;
+};
+#endif
+
+typedef struct TDB_DATA NTDB_DATA;
+
+/**
+ * ntdb_open - open a database file
+ * @name: the file name (or database name if flags contains NTDB_INTERNAL)
+ * @ntdb_flags: options for this database
+ * @open_flags: flags argument for ntdb's open() call.
+ * @mode: mode argument for ntdb's open() call.
+ * @attributes: linked list of extra attributes for this ntdb.
+ *
+ * This call opens (and potentially creates) a database file.
+ * Multiple processes can have the NTDB file open at once.
+ *
+ * On failure it will return NULL, and set errno: it may also call
+ * any log attribute found in @attributes.
+ *
+ * See also:
+ * union ntdb_attribute
+ */
+struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
+ int open_flags, mode_t mode,
+ union ntdb_attribute *attributes);
+
+
+/* flags for ntdb_open() */
+#define NTDB_DEFAULT 0 /* just a readability place holder */
+#define NTDB_INTERNAL 2 /* don't store on disk */
+#define NTDB_NOLOCK 4 /* don't do any locking */
+#define NTDB_NOMMAP 8 /* don't use mmap */
+#define NTDB_CONVERT 16 /* convert endian */
+#define NTDB_NOSYNC 64 /* don't use synchronous transactions */
+#define NTDB_SEQNUM 128 /* maintain a sequence number */
+#define NTDB_ALLOW_NESTING 256 /* fake nested transactions */
+#define NTDB_RDONLY 512 /* implied by O_RDONLY */
+#define NTDB_CANT_CHECK 2048 /* has a feature which we don't understand */
+
+/**
+ * ntdb_close - close and free a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This always succeeds, in that @ntdb is unusable after this call. But if
+ * some unexpected error occurred while closing, it will return non-zero
+ * (the only clue as to cause will be via the log attribute).
+ */
+int ntdb_close(struct ntdb_context *ntdb);
+
+/**
+ * enum NTDB_ERROR - error returns for NTDB
+ *
+ * See Also:
+ * ntdb_errorstr()
+ */
+enum NTDB_ERROR {
+ NTDB_SUCCESS = 0, /* No error. */
+ NTDB_ERR_CORRUPT = -1, /* We read the db, and it was bogus. */
+ NTDB_ERR_IO = -2, /* We couldn't read/write the db. */
+ NTDB_ERR_LOCK = -3, /* Locking failed. */
+ NTDB_ERR_OOM = -4, /* Out of Memory. */
+ NTDB_ERR_EXISTS = -5, /* The key already exists. */
+ NTDB_ERR_NOEXIST = -6, /* The key does not exist. */
+ NTDB_ERR_EINVAL = -7, /* You're using it wrong. */
+ NTDB_ERR_RDONLY = -8, /* The database is read-only. */
+ NTDB_ERR_LAST = NTDB_ERR_RDONLY
+};
+
+/**
+ * ntdb_store - store a key/value pair in a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key
+ * @dbuf: the data to associate with the key.
+ * @flag: NTDB_REPLACE, NTDB_INSERT or NTDB_MODIFY.
+ *
+ * This inserts (or overwrites) a key/value pair in the NTDB. If flag
+ * is NTDB_REPLACE, it doesn't matter whether the key exists or not;
+ * NTDB_INSERT means it must not exist (returns NTDB_ERR_EXISTS otherwise),
+ * and NTDB_MODIFY means it must exist (returns NTDB_ERR_NOEXIST otherwise).
+ *
+ * On success, this returns NTDB_SUCCESS.
+ *
+ * See also:
+ * ntdb_fetch, ntdb_transaction_start, ntdb_append, ntdb_delete.
+ */
+enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
+ NTDB_DATA key,
+ NTDB_DATA dbuf,
+ int flag);
+
+/* flags to ntdb_store() */
+#define NTDB_REPLACE 1 /* A readability place holder */
+#define NTDB_INSERT 2 /* Don't overwrite an existing entry */
+#define NTDB_MODIFY 3 /* Don't create an existing entry */
+
+/**
+ * ntdb_fetch - fetch a value from a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key
+ * @data: pointer to data.
+ *
+ * This looks up a key in the database and sets it in @data.
+ *
+ * If it returns NTDB_SUCCESS, the key was found: it is your
+ * responsibility to call free() on @data->dptr.
+ *
+ * Otherwise, it returns an error (usually, NTDB_ERR_NOEXIST) and @data is
+ * undefined.
+ */
+enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
+ NTDB_DATA *data);
+
+/**
+ * ntdb_errorstr - map the ntdb error onto a constant readable string
+ * @ecode: the enum NTDB_ERROR to map.
+ *
+ * This is useful for displaying errors to users.
+ */
+const char *ntdb_errorstr(enum NTDB_ERROR ecode);
+
+/**
+ * ntdb_append - append a value to a key/value pair in a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key
+ * @dbuf: the data to append.
+ *
+ * This is equivalent to fetching a record, reallocating .dptr to add the
+ * data, and writing it back, only it's much more efficient. If the key
+ * doesn't exist, it's equivalent to ntdb_store (with an additional hint that
+ * you expect to expand the record in future).
+ *
+ * See Also:
+ * ntdb_fetch(), ntdb_store()
+ */
+enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
+ NTDB_DATA key, NTDB_DATA dbuf);
+
+/**
+ * ntdb_delete - delete a key from a ntdb.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to delete.
+ *
+ * Returns NTDB_SUCCESS on success, or an error (usually NTDB_ERR_NOEXIST).
+ *
+ * See Also:
+ * ntdb_fetch(), ntdb_store()
+ */
+enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_exists - does a key exist in the database?
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to search for.
+ *
+ * Returns true if it exists, or false if it doesn't or any other error.
+ */
+bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_deq - are NTDB_DATA equal?
+ * @a: one NTDB_DATA
+ * @b: another NTDB_DATA
+ */
+static inline bool ntdb_deq(NTDB_DATA a, NTDB_DATA b)
+{
+ return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0;
+}
+
+/**
+ * ntdb_mkdata - make a NTDB_DATA from const data
+ * @p: the constant pointer
+ * @len: the length
+ *
+ * As the dptr member of NTDB_DATA is not constant, you need to
+ * cast it. This function keeps thost casts in one place, as well as
+ * suppressing the warning some compilers give when casting away a
+ * qualifier (eg. gcc with -Wcast-qual)
+ */
+static inline NTDB_DATA ntdb_mkdata(const void *p, size_t len)
+{
+ NTDB_DATA d;
+ d.dptr = cast_const(void *, p);
+ d.dsize = len;
+ return d;
+}
+
+/**
+ * ntdb_transaction_start - start a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This begins a series of atomic operations. Other processes will be able
+ * to read the ntdb, but not alter it (they will block), nor will they see
+ * any changes until ntdb_transaction_commit() is called.
+ *
+ * Note that if the NTDB_ALLOW_NESTING flag is set, a ntdb_transaction_start()
+ * within a transaction will succeed, but it's not a real transaction:
+ * (1) An inner transaction which is committed is not actually committed until
+ * the outer transaction is; if the outer transaction is cancelled, the
+ * inner ones are discarded.
+ * (2) ntdb_transaction_cancel() marks the outer transaction as having an error,
+ * so the final ntdb_transaction_commit() will fail.
+ * (3) the outer transaction will see the results of the inner transaction.
+ *
+ * See Also:
+ * ntdb_transaction_cancel, ntdb_transaction_commit.
+ */
+enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_transaction_cancel - abandon a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This aborts a transaction, discarding any changes which were made.
+ * ntdb_close() does this implicitly.
+ */
+void ntdb_transaction_cancel(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_transaction_commit - commit a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This completes a transaction, writing any changes which were made.
+ *
+ * fsync() is used to commit the transaction (unless NTDB_NOSYNC is set),
+ * making it robust against machine crashes, but very slow compared to
+ * other NTDB operations.
+ *
+ * A failure can only be caused by unexpected errors (eg. I/O or
+ * memory); this is no point looping on transaction failure.
+ *
+ * See Also:
+ * ntdb_transaction_prepare_commit()
+ */
+enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_transaction_prepare_commit - prepare to commit a transaction
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This ensures we have the resources to commit a transaction (using
+ * ntdb_transaction_commit): if this succeeds then a transaction will only
+ * fail if the write() or fsync() calls fail.
+ *
+ * If this fails you must still call ntdb_transaction_cancel() to cancel
+ * the transaction.
+ *
+ * See Also:
+ * ntdb_transaction_commit()
+ */
+enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_traverse - traverse a NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @fn: the function to call for every key/value pair (or NULL)
+ * @p: the pointer to hand to @f
+ *
+ * This walks the NTDB until all they keys have been traversed, or @fn
+ * returns non-zero. If the traverse function or other processes are
+ * changing data or adding or deleting keys, the traverse may be
+ * unreliable: keys may be skipped or (rarely) visited twice.
+ *
+ * There is one specific exception: the special case of deleting the
+ * current key does not undermine the reliability of the traversal.
+ *
+ * On success, returns the number of keys iterated. On error returns
+ * a negative enum NTDB_ERROR value.
+ */
+#define ntdb_traverse(ntdb, fn, p) \
+ ntdb_traverse_(ntdb, typesafe_cb_preargs(int, void *, (fn), (p), \
+ struct ntdb_context *, \
+ NTDB_DATA, NTDB_DATA), (p))
+
+int64_t ntdb_traverse_(struct ntdb_context *ntdb,
+ int (*fn)(struct ntdb_context *,
+ NTDB_DATA, NTDB_DATA, void *), void *p);
+
+/**
+ * ntdb_parse_record - operate directly on data in the database.
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key whose record we should hand to @parse
+ * @parse: the function to call for the data
+ * @data: the private pointer to hand to @parse (types must match).
+ *
+ * This avoids a copy for many cases, by handing you a pointer into
+ * the memory-mapped database. It also locks the record to prevent
+ * other accesses at the same time, so it won't change.
+ *
+ * Within the @parse callback you can perform read operations on the
+ * database, but no write operations: no ntdb_store() or
+ * ntdb_delete(), for example. The exception is if you call
+ * ntdb_lockall() before ntdb_parse_record().
+ *
+ * Never alter the data handed to parse()!
+ */
+#define ntdb_parse_record(ntdb, key, parse, data) \
+ ntdb_parse_record_((ntdb), (key), \
+ typesafe_cb_preargs(enum NTDB_ERROR, void *, \
+ (parse), (data), \
+ NTDB_DATA, NTDB_DATA), (data))
+
+enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
+ NTDB_DATA key,
+ enum NTDB_ERROR (*parse)(NTDB_DATA k,
+ NTDB_DATA d,
+ void *data),
+ void *data);
+
+/**
+ * ntdb_get_seqnum - get a database sequence number
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns a sequence number: any change to the database from a
+ * ntdb context opened with the NTDB_SEQNUM flag will cause that number
+ * to increment. Note that the incrementing is unreliable (it is done
+ * without locking), so this is only useful as an optimization.
+ *
+ * For example, you may have a regular database backup routine which
+ * does not operate if the sequence number is unchanged. In the
+ * unlikely event of a failed increment, it will be backed up next
+ * time any way.
+ *
+ * Returns an enum NTDB_ERROR (ie. negative) on error.
+ */
+int64_t ntdb_get_seqnum(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_firstkey - get the "first" key in a NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: pointer to key.
+ *
+ * This returns an arbitrary key in the database; with ntdb_nextkey() it allows
+ * open-coded traversal of the database, though it is slightly less efficient
+ * than ntdb_traverse.
+ *
+ * It is your responsibility to free @key->dptr on success.
+ *
+ * Returns NTDB_ERR_NOEXIST if the database is empty.
+ */
+enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key);
+
+/**
+ * ntdb_nextkey - get the "next" key in a NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: a key returned by ntdb_firstkey() or ntdb_nextkey().
+ *
+ * This returns another key in the database; it will free @key.dptr for
+ * your convenience.
+ *
+ * Returns NTDB_ERR_NOEXIST if there are no more keys.
+ */
+enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key);
+
+/**
+ * ntdb_chainlock - lock a record in the NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to lock.
+ *
+ * This prevents any access occurring to a group of keys including @key,
+ * even if @key does not exist. This allows primitive atomic updates of
+ * records without using transactions.
+ *
+ * You cannot begin a transaction while holding a ntdb_chainlock(), nor can
+ * you do any operations on any other keys in the database. This also means
+ * that you cannot hold more than one ntdb_chainlock() at a time.
+ *
+ * See Also:
+ * ntdb_chainunlock()
+ */
+enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_chainunlock - unlock a record in the NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to unlock.
+ *
+ * The key must have previously been locked by ntdb_chainlock().
+ */
+void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_chainlock_read - lock a record in the NTDB, for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to lock.
+ *
+ * This prevents any changes from occurring to a group of keys including @key,
+ * even if @key does not exist. This allows primitive atomic updates of
+ * records without using transactions.
+ *
+ * You cannot begin a transaction while holding a ntdb_chainlock_read(), nor can
+ * you do any operations on any other keys in the database. This also means
+ * that you cannot hold more than one ntdb_chainlock()/read() at a time.
+ *
+ * See Also:
+ * ntdb_chainlock()
+ */
+enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_chainunlock_read - unlock a record in the NTDB for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @key: the key to unlock.
+ *
+ * The key must have previously been locked by ntdb_chainlock_read().
+ */
+void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key);
+
+/**
+ * ntdb_lockall - lock the entire NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * You cannot hold a ntdb_chainlock while calling this. It nests, so you
+ * must call ntdb_unlockall as many times as you call ntdb_lockall.
+ */
+enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_unlockall - unlock the entire NTDB
+ * @ntdb: the ntdb context returned from ntdb_open()
+ */
+void ntdb_unlockall(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_lockall_read - lock the entire NTDB for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This prevents others writing to the database, eg. ntdb_delete, ntdb_store,
+ * ntdb_append, but not ntdb_fetch.
+ *
+ * You cannot hold a ntdb_chainlock while calling this. It nests, so you
+ * must call ntdb_unlockall_read as many times as you call ntdb_lockall_read.
+ */
+enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_unlockall_read - unlock the entire NTDB for reading
+ * @ntdb: the ntdb context returned from ntdb_open()
+ */
+void ntdb_unlockall_read(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_wipe_all - wipe the database clean
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * Completely erase the database. This is faster than iterating through
+ * each key and doing ntdb_delete.
+ */
+enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_repack - repack the database
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This repacks the database; if it is suffering from a great deal of
+ * fragmentation this might help. However, it can take twice the
+ * memory of the existing NTDB.
+ */
+enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_check - check a NTDB for consistency
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @check: function to check each key/data pair (or NULL)
+ * @data: argument for @check, must match type.
+ *
+ * This performs a consistency check of the open database, optionally calling
+ * a check() function on each record so you can do your own data consistency
+ * checks as well. If check() returns an error, that is returned from
+ * ntdb_check().
+ *
+ * Note that the NTDB uses a feature which we don't understand which
+ * indicates we can't run ntdb_check(), this will log a warning to that
+ * effect and return NTDB_SUCCESS. You can detect this condition by
+ * looking for NTDB_CANT_CHECK in ntdb_get_flags().
+ *
+ * Returns NTDB_SUCCESS or an error.
+ */
+#define ntdb_check(ntdb, check, data) \
+ ntdb_check_((ntdb), typesafe_cb_preargs(enum NTDB_ERROR, void *, \
+ (check), (data), \
+ NTDB_DATA, \
+ NTDB_DATA), \
+ (data))
+
+enum NTDB_ERROR ntdb_check_(struct ntdb_context *ntdb,
+ enum NTDB_ERROR (*check)(NTDB_DATA k,
+ NTDB_DATA d,
+ void *data),
+ void *data);
+
+/**
+ * enum ntdb_summary_flags - flags for ntdb_summary.
+ */
+enum ntdb_summary_flags {
+ NTDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */
+};
+
+/**
+ * ntdb_summary - return a string describing the NTDB state
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @flags: flags to control the summary output.
+ * @summary: pointer to string to allocate.
+ *
+ * This returns a developer-readable string describing the overall
+ * state of the ntdb, such as the percentage used and sizes of records.
+ * It is designed to provide information about the ntdb at a glance
+ * without displaying any keys or data in the database.
+ *
+ * On success, sets @summary to point to a malloc()'ed nul-terminated
+ * multi-line string. It is your responsibility to free() it.
+ */
+enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
+ enum ntdb_summary_flags flags,
+ char **summary);
+
+
+/**
+ * ntdb_get_flags - return the flags for a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns the flags on the current ntdb. Some of these are caused by
+ * the flags argument to ntdb_open(), others (such as NTDB_CONVERT) are
+ * intuited.
+ */
+unsigned int ntdb_get_flags(struct ntdb_context *ntdb);
+
+/**
+ * ntdb_add_flag - set a flag for a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING.
+ *
+ * You can use this to set a flag on the NTDB. You cannot set these flags
+ * on a NTDB_INTERNAL ntdb.
+ */
+void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag);
+
+/**
+ * ntdb_remove_flag - unset a flag for a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @flag: one of NTDB_NOLOCK, NTDB_NOMMAP, NTDB_NOSYNC or NTDB_ALLOW_NESTING.
+ *
+ * You can use this to clear a flag on the NTDB. You cannot clear flags
+ * on a NTDB_INTERNAL ntdb.
+ */
+void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag);
+
+/**
+ * enum ntdb_attribute_type - descriminator for union ntdb_attribute.
+ */
+enum ntdb_attribute_type {
+ NTDB_ATTRIBUTE_LOG = 0,
+ NTDB_ATTRIBUTE_HASH = 1,
+ NTDB_ATTRIBUTE_SEED = 2,
+ NTDB_ATTRIBUTE_STATS = 3,
+ NTDB_ATTRIBUTE_OPENHOOK = 4,
+ NTDB_ATTRIBUTE_FLOCK = 5,
+ NTDB_ATTRIBUTE_ALLOCATOR = 6,
+ NTDB_ATTRIBUTE_HASHSIZE = 7
+};
+
+/**
+ * ntdb_get_attribute - get an attribute for an existing ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @attr: the union ntdb_attribute to set.
+ *
+ * This gets an attribute from a NTDB which has previously been set (or
+ * may return the default values). Set @attr.base.attr to the
+ * attribute type you want get.
+ */
+enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
+ union ntdb_attribute *attr);
+
+/**
+ * ntdb_set_attribute - set an attribute for an existing ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @attr: the union ntdb_attribute to set.
+ *
+ * This sets an attribute on a NTDB, overriding any previous attribute
+ * of the same type. It returns NTDB_ERR_EINVAL if the attribute is
+ * unknown or invalid.
+ *
+ * Note that NTDB_ATTRIBUTE_HASH, NTDB_ATTRIBUTE_SEED, and
+ * NTDB_ATTRIBUTE_OPENHOOK cannot currently be set after ntdb_open.
+ */
+enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
+ const union ntdb_attribute *attr);
+
+/**
+ * ntdb_unset_attribute - reset an attribute for an existing ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ * @type: the attribute type to unset.
+ *
+ * This unsets an attribute on a NTDB, returning it to the defaults
+ * (where applicable).
+ *
+ * Note that it only makes sense for NTDB_ATTRIBUTE_LOG and NTDB_ATTRIBUTE_FLOCK
+ * to be unset.
+ */
+void ntdb_unset_attribute(struct ntdb_context *ntdb,
+ enum ntdb_attribute_type type);
+
+/**
+ * ntdb_name - get the name of a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns a copy of the name string, made at ntdb_open() time.
+ *
+ * This is mostly useful for logging.
+ */
+const char *ntdb_name(const struct ntdb_context *ntdb);
+
+/**
+ * ntdb_fd - get the file descriptor of a ntdb
+ * @ntdb: the ntdb context returned from ntdb_open()
+ *
+ * This returns the file descriptor for the underlying database file, or -1
+ * for NTDB_INTERNAL.
+ */
+int ntdb_fd(const struct ntdb_context *ntdb);
+
+/**
+ * ntdb_foreach - iterate through every open NTDB.
+ * @fn: the function to call for every NTDB
+ * @p: the pointer to hand to @fn
+ *
+ * NTDB internally keeps track of all open TDBs; this function allows you to
+ * iterate through them. If @fn returns non-zero, traversal stops.
+ */
+#define ntdb_foreach(fn, p) \
+ ntdb_foreach_(typesafe_cb_preargs(int, void *, (fn), (p), \
+ struct ntdb_context *), (p))
+
+void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p);
+
+/**
+ * struct ntdb_attribute_base - common fields for all ntdb attributes.
+ */
+struct ntdb_attribute_base {
+ enum ntdb_attribute_type attr;
+ union ntdb_attribute *next;
+};
+
+/**
+ * enum ntdb_log_level - log levels for ntdb_attribute_log
+ * @NTDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors
+ * or internal consistency failures.
+ * @NTDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters
+ * or writing to a read-only database.
+ * @NTDB_LOG_WARNING: used for informational messages on issues which
+ * are unusual but handled by NTDB internally, such
+ * as a failure to mmap or failure to open /dev/urandom.
+ * It's also used when ntdb_open() fails without O_CREAT
+ * because a file does not exist.
+ */
+enum ntdb_log_level {
+ NTDB_LOG_ERROR,
+ NTDB_LOG_USE_ERROR,
+ NTDB_LOG_WARNING
+};
+
+/**
+ * struct ntdb_attribute_log - log function attribute
+ *
+ * This attribute provides a hook for you to log errors.
+ */
+struct ntdb_attribute_log {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_LOG */
+ void (*fn)(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data);
+ void *data;
+};
+
+/**
+ * struct ntdb_attribute_hash - hash function attribute
+ *
+ * This attribute allows you to provide an alternative hash function.
+ * This hash function will be handed keys from the database; it will also
+ * be handed the 8-byte NTDB_HASH_MAGIC value for checking the header (the
+ * ntdb_open() will fail if the hash value doesn't match the header).
+ *
+ * Note that if your hash function gives different results on
+ * different machine endians, your ntdb will no longer work across
+ * different architectures!
+ */
+struct ntdb_attribute_hash {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */
+ uint32_t (*fn)(const void *key, size_t len, uint32_t seed,
+ void *data);
+ void *data;
+};
+
+/**
+ * struct ntdb_attribute_seed - hash function seed attribute
+ *
+ * The hash function seed is normally taken from /dev/urandom (or equivalent)
+ * but can be set manually here. This is mainly for testing purposes.
+ */
+struct ntdb_attribute_seed {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_SEED */
+ uint64_t seed;
+};
+
+/**
+ * struct ntdb_attribute_stats - ntdb operational statistics
+ *
+ * This attribute records statistics of various low-level NTDB operations.
+ * This can be used to assist performance evaluation. This is only
+ * useful for ntdb_get_attribute().
+ *
+ * New fields will be added at the end, hence the "size" argument which
+ * indicates how large your structure is: it must be filled in before
+ * calling ntdb_get_attribute(), which will overwrite it with the size
+ * ntdb knows about.
+ */
+struct ntdb_attribute_stats {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_STATS */
+ size_t size; /* = sizeof(struct ntdb_attribute_stats) */
+ uint64_t allocs;
+ uint64_t alloc_subhash;
+ uint64_t alloc_chain;
+ uint64_t alloc_bucket_exact;
+ uint64_t alloc_bucket_max;
+ uint64_t alloc_leftover;
+ uint64_t alloc_coalesce_tried;
+ uint64_t alloc_coalesce_iterate_clash;
+ uint64_t alloc_coalesce_lockfail;
+ uint64_t alloc_coalesce_race;
+ uint64_t alloc_coalesce_succeeded;
+ uint64_t alloc_coalesce_num_merged;
+ uint64_t compares;
+ uint64_t compare_wrong_offsetbits;
+ uint64_t compare_wrong_keylen;
+ uint64_t compare_wrong_rechash;
+ uint64_t compare_wrong_keycmp;
+ uint64_t transactions;
+ uint64_t transaction_cancel;
+ uint64_t transaction_nest;
+ uint64_t transaction_expand_file;
+ uint64_t transaction_read_direct;
+ uint64_t transaction_read_direct_fail;
+ uint64_t transaction_write_direct;
+ uint64_t transaction_write_direct_fail;
+ uint64_t traverses;
+ uint64_t traverse_val_vanished;
+ uint64_t expands;
+ uint64_t frees;
+ uint64_t locks;
+ uint64_t lock_lowlevel;
+ uint64_t lock_nonblock;
+ uint64_t lock_nonblock_fail;
+};
+
+/**
+ * struct ntdb_attribute_openhook - ntdb special effects hook for open
+ *
+ * This attribute contains a function to call once we have the OPEN_LOCK
+ * for the ntdb, but before we've examined its contents. If this succeeds,
+ * the ntdb will be populated if it's then zero-length.
+ *
+ * This is a hack to allow support for TDB-style TDB_CLEAR_IF_FIRST
+ * behaviour.
+ */
+struct ntdb_attribute_openhook {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_OPENHOOK */
+ enum NTDB_ERROR (*fn)(int fd, void *data);
+ void *data;
+};
+
+/**
+ * struct ntdb_attribute_flock - ntdb special effects hook for file locking
+ *
+ * This attribute contains function to call to place locks on a file; it can
+ * be used to support non-blocking operations or lock proxying.
+ *
+ * They should return 0 on success, -1 on failure and set errno.
+ *
+ * An error will be logged on error if errno is neither EAGAIN nor EINTR
+ * (normally it would only return EAGAIN if waitflag is false, and
+ * loop internally on EINTR).
+ */
+struct ntdb_attribute_flock {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_FLOCK */
+ int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *);
+ int (*unlock)(int fd, int rw, off_t off, off_t len, void *);
+ void *data;
+};
+
+/**
+ * struct ntdb_attribute_hashsize - ntdb hashsize setting.
+ *
+ * This attribute is only settable on ntdb_open; it indicates that we create
+ * a hashtable of the given size, rather than the default.
+ */
+struct ntdb_attribute_hashsize {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASHSIZE */
+ uint32_t size;
+};
+
+/**
+ * struct ntdb_attribute_allocator - allocator for ntdb to use.
+ *
+ * You can replace malloc/free with your own allocation functions.
+ * The allocator takes an "owner" pointer, which is either NULL (for
+ * the initial struct ntdb_context and struct ntdb_file), or a
+ * previously allocated pointer. This is useful for relationship
+ * tracking, such as the talloc library.
+ *
+ * The expand function is realloc, but only ever used to expand an
+ * existing allocation.
+ *
+ * Be careful mixing allocators: two ntdb_contexts which have the same file
+ * open will share the same struct ntdb_file. This may be allocated by one
+ * ntdb's allocator, and freed by the other.
+ */
+struct ntdb_attribute_allocator {
+ struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_ALLOCATOR */
+ void *(*alloc)(const void *owner, size_t len, void *priv_data);
+ void *(*expand)(void *old, size_t newlen, void *priv_data);
+ void (*free)(void *old, void *priv_data);
+ void *priv_data;
+};
+
+/**
+ * union ntdb_attribute - ntdb attributes.
+ *
+ * This represents all the known attributes.
+ *
+ * See also:
+ * struct ntdb_attribute_log, struct ntdb_attribute_hash,
+ * struct ntdb_attribute_seed, struct ntdb_attribute_stats,
+ * struct ntdb_attribute_openhook, struct ntdb_attribute_flock,
+ * struct ntdb_attribute_allocator alloc.
+ */
+union ntdb_attribute {
+ struct ntdb_attribute_base base;
+ struct ntdb_attribute_log log;
+ struct ntdb_attribute_hash hash;
+ struct ntdb_attribute_seed seed;
+ struct ntdb_attribute_stats stats;
+ struct ntdb_attribute_openhook openhook;
+ struct ntdb_attribute_flock flock;
+ struct ntdb_attribute_allocator alloc;
+ struct ntdb_attribute_hashsize hashsize;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ntdb.h */
--- /dev/null
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: ntdb
+Description: A (not-so) trivial database
+Version: @PACKAGE_VERSION@
+Libs: @LIB_RPATH@ -L${libdir} -lntdb
+Cflags: -I${includedir}
+URL: http://tdb.samba.org/
--- /dev/null
+ /*
+ Trivial Database 2: opening and closing TDBs
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/build_assert/build_assert.h>
+
+/* all tdbs, to detect double-opens (fcntl file don't nest!) */
+static struct ntdb_context *tdbs = NULL;
+
+static struct ntdb_file *find_file(dev_t device, ino_t ino)
+{
+ struct ntdb_context *i;
+
+ for (i = tdbs; i; i = i->next) {
+ if (i->file->device == device && i->file->inode == ino) {
+ i->file->refcnt++;
+ return i->file;
+ }
+ }
+ return NULL;
+}
+
+static bool read_all(int fd, void *buf, size_t len)
+{
+ while (len) {
+ ssize_t ret;
+ ret = read(fd, buf, len);
+ if (ret < 0)
+ return false;
+ if (ret == 0) {
+ /* ETOOSHORT? */
+ errno = EWOULDBLOCK;
+ return false;
+ }
+ buf = (char *)buf + ret;
+ len -= ret;
+ }
+ return true;
+}
+
+static uint32_t random_number(struct ntdb_context *ntdb)
+{
+ int fd;
+ uint32_t ret = 0;
+ struct timeval now;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd >= 0) {
+ if (read_all(fd, &ret, sizeof(ret))) {
+ close(fd);
+ return ret;
+ }
+ close(fd);
+ }
+ /* FIXME: Untested! Based on Wikipedia protocol description! */
+ fd = open("/dev/egd-pool", O_RDWR);
+ if (fd >= 0) {
+ /* Command is 1, next byte is size we want to read. */
+ char cmd[2] = { 1, sizeof(uint32_t) };
+ if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
+ char reply[1 + sizeof(uint32_t)];
+ int r = read(fd, reply, sizeof(reply));
+ if (r > 1) {
+ /* Copy at least some bytes. */
+ memcpy(&ret, reply+1, r - 1);
+ if (reply[0] == sizeof(uint32_t)
+ && r == sizeof(reply)) {
+ close(fd);
+ return ret;
+ }
+ }
+ }
+ close(fd);
+ }
+
+ /* Fallback: pid and time. */
+ gettimeofday(&now, NULL);
+ ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
+ ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+ "ntdb_open: random from getpid and time");
+ return ret;
+}
+
+static void ntdb_context_init(struct ntdb_context *ntdb)
+{
+ /* Initialize the NTDB fields here */
+ ntdb_io_init(ntdb);
+ ntdb->transaction = NULL;
+ ntdb->access = NULL;
+}
+
+/* initialise a new database:
+ *
+ * struct ntdb_header;
+ * struct {
+ * struct ntdb_used_record hash_header;
+ * ntdb_off_t hash_buckets[1 << ntdb->hash_bits];
+ * } hash;
+ * struct ntdb_freetable ftable;
+ * struct {
+ * struct ntdb_free_record free_header;
+ * char forty_three[...];
+ * } remainder;
+ */
+#define NEW_DATABASE_HDR_SIZE(hbits) \
+ (sizeof(struct ntdb_header) \
+ + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \
+ + sizeof(struct ntdb_freetable) \
+ + sizeof(struct ntdb_free_record))
+
+static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
+ struct ntdb_attribute_seed *seed,
+ struct ntdb_header *rhdr)
+{
+ /* We make it up in memory, then write it out if not internal */
+ struct ntdb_freetable *ftable;
+ struct ntdb_used_record *htable;
+ struct ntdb_header *hdr;
+ struct ntdb_free_record *remainder;
+ char *mem;
+ unsigned int magic_len;
+ ssize_t rlen;
+ size_t dbsize, hashsize, hdrsize, remaindersize;
+ enum NTDB_ERROR ecode;
+
+ hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits;
+
+ /* Always make db a multiple of NTDB_PGSIZE */
+ hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits);
+ dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
+
+ mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
+ if (!mem) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_new_database: failed to allocate");
+ }
+
+ hdr = (void *)mem;
+ htable = (void *)(mem + sizeof(*hdr));
+ ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize);
+ remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize
+ + sizeof(*ftable));
+
+ /* Fill in the header */
+ hdr->version = NTDB_VERSION;
+ if (seed)
+ hdr->hash_seed = seed->seed;
+ else
+ hdr->hash_seed = random_number(ntdb);
+ hdr->hash_test = NTDB_HASH_MAGIC;
+ hdr->hash_test = ntdb->hash_fn(&hdr->hash_test,
+ sizeof(hdr->hash_test),
+ hdr->hash_seed,
+ ntdb->hash_data);
+ hdr->hash_bits = ntdb->hash_bits;
+ hdr->recovery = 0;
+ hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK;
+ hdr->seqnum = 0;
+ hdr->capabilities = 0;
+ memset(hdr->reserved, 0, sizeof(hdr->reserved));
+
+ /* Hash is all zero after header. */
+ set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize);
+ memset(htable + 1, 0, hashsize);
+
+ /* Free is empty. */
+ hdr->free_table = (char *)ftable - (char *)hdr;
+ memset(ftable, 0, sizeof(*ftable));
+ ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
+ sizeof(*ftable) - sizeof(ftable->hdr),
+ sizeof(*ftable) - sizeof(ftable->hdr));
+ if (ecode != NTDB_SUCCESS) {
+ goto out;
+ }
+
+ /* Rest of database is a free record, containing junk. */
+ remaindersize = dbsize - hdrsize;
+ remainder->ftable_and_len
+ = (remaindersize + sizeof(*remainder)
+ - sizeof(struct ntdb_used_record));
+ remainder->next = 0;
+ remainder->magic_and_prev
+ = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
+ | ((char *)remainder - (char *)hdr);
+ memset(remainder + 1, 0x43, remaindersize);
+
+ /* Put in our single free entry. */
+ ftable->buckets[size_to_bucket(remaindersize)] =
+ (char *)remainder - (char *)hdr;
+
+ /* Magic food */
+ memset(hdr->magic_food, 0, sizeof(hdr->magic_food));
+ strcpy(hdr->magic_food, NTDB_MAGIC_FOOD);
+
+ /* This creates an endian-converted database, as if read from disk */
+ magic_len = sizeof(hdr->magic_food);
+ ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len);
+
+ /* Return copy of header. */
+ *rhdr = *hdr;
+
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb->file->map_size = dbsize;
+ ntdb->file->map_ptr = hdr;
+ return NTDB_SUCCESS;
+ }
+ if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_new_database:"
+ " failed to seek: %s", strerror(errno));
+ goto out;
+ }
+
+ if (ftruncate(ntdb->file->fd, 0) == -1) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_new_database:"
+ " failed to truncate: %s", strerror(errno));
+ goto out;
+ }
+
+ rlen = write(ntdb->file->fd, hdr, dbsize);
+ if (rlen != dbsize) {
+ if (rlen >= 0)
+ errno = ENOSPC;
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_new_database: %zi writing header: %s",
+ rlen, strerror(errno));
+ goto out;
+ }
+
+out:
+ ntdb->free_fn(hdr, ntdb->alloc_data);
+ return ecode;
+}
+
+static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
+{
+ ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data);
+ if (!ntdb->file)
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_open: cannot alloc ntdb_file structure");
+ ntdb->file->num_lockrecs = 0;
+ ntdb->file->lockrecs = NULL;
+ ntdb->file->allrecord_lock.count = 0;
+ ntdb->file->refcnt = 1;
+ ntdb->file->map_ptr = NULL;
+ ntdb->file->direct_count = 0;
+ ntdb->file->old_mmaps = NULL;
+ return NTDB_SUCCESS;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
+ const union ntdb_attribute *attr)
+{
+ switch (attr->base.attr) {
+ case NTDB_ATTRIBUTE_LOG:
+ ntdb->log_fn = attr->log.fn;
+ ntdb->log_data = attr->log.data;
+ break;
+ case NTDB_ATTRIBUTE_HASH:
+ case NTDB_ATTRIBUTE_SEED:
+ case NTDB_ATTRIBUTE_OPENHOOK:
+ case NTDB_ATTRIBUTE_HASHSIZE:
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_set_attribute:"
+ " cannot set %s after opening",
+ attr->base.attr == NTDB_ATTRIBUTE_HASH
+ ? "NTDB_ATTRIBUTE_HASH"
+ : attr->base.attr == NTDB_ATTRIBUTE_SEED
+ ? "NTDB_ATTRIBUTE_SEED"
+ : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK
+ ? "NTDB_ATTRIBUTE_OPENHOOK"
+ : "NTDB_ATTRIBUTE_HASHSIZE");
+ case NTDB_ATTRIBUTE_STATS:
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_set_attribute:"
+ " cannot set NTDB_ATTRIBUTE_STATS");
+ case NTDB_ATTRIBUTE_FLOCK:
+ ntdb->lock_fn = attr->flock.lock;
+ ntdb->unlock_fn = attr->flock.unlock;
+ ntdb->lock_data = attr->flock.data;
+ break;
+ case NTDB_ATTRIBUTE_ALLOCATOR:
+ ntdb->alloc_fn = attr->alloc.alloc;
+ ntdb->expand_fn = attr->alloc.expand;
+ ntdb->free_fn = attr->alloc.free;
+ ntdb->alloc_data = attr->alloc.priv_data;
+ break;
+ default:
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_set_attribute:"
+ " unknown attribute type %u",
+ attr->base.attr);
+ }
+ return NTDB_SUCCESS;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
+ union ntdb_attribute *attr)
+{
+ switch (attr->base.attr) {
+ case NTDB_ATTRIBUTE_LOG:
+ if (!ntdb->log_fn)
+ return NTDB_ERR_NOEXIST;
+ attr->log.fn = ntdb->log_fn;
+ attr->log.data = ntdb->log_data;
+ break;
+ case NTDB_ATTRIBUTE_HASH:
+ attr->hash.fn = ntdb->hash_fn;
+ attr->hash.data = ntdb->hash_data;
+ break;
+ case NTDB_ATTRIBUTE_SEED:
+ attr->seed.seed = ntdb->hash_seed;
+ break;
+ case NTDB_ATTRIBUTE_OPENHOOK:
+ if (!ntdb->openhook)
+ return NTDB_ERR_NOEXIST;
+ attr->openhook.fn = ntdb->openhook;
+ attr->openhook.data = ntdb->openhook_data;
+ break;
+ case NTDB_ATTRIBUTE_STATS: {
+ size_t size = attr->stats.size;
+ if (size > ntdb->stats.size)
+ size = ntdb->stats.size;
+ memcpy(&attr->stats, &ntdb->stats, size);
+ break;
+ }
+ case NTDB_ATTRIBUTE_FLOCK:
+ attr->flock.lock = ntdb->lock_fn;
+ attr->flock.unlock = ntdb->unlock_fn;
+ attr->flock.data = ntdb->lock_data;
+ break;
+ case NTDB_ATTRIBUTE_ALLOCATOR:
+ attr->alloc.alloc = ntdb->alloc_fn;
+ attr->alloc.expand = ntdb->expand_fn;
+ attr->alloc.free = ntdb->free_fn;
+ attr->alloc.priv_data = ntdb->alloc_data;
+ break;
+ case NTDB_ATTRIBUTE_HASHSIZE:
+ attr->hashsize.size = 1 << ntdb->hash_bits;
+ break;
+ default:
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_get_attribute:"
+ " unknown attribute type %u",
+ attr->base.attr);
+ }
+ attr->base.next = NULL;
+ return NTDB_SUCCESS;
+}
+
+_PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
+ enum ntdb_attribute_type type)
+{
+ switch (type) {
+ case NTDB_ATTRIBUTE_LOG:
+ ntdb->log_fn = NULL;
+ break;
+ case NTDB_ATTRIBUTE_OPENHOOK:
+ ntdb->openhook = NULL;
+ break;
+ case NTDB_ATTRIBUTE_HASH:
+ case NTDB_ATTRIBUTE_SEED:
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_unset_attribute: cannot unset %s after opening",
+ type == NTDB_ATTRIBUTE_HASH
+ ? "NTDB_ATTRIBUTE_HASH"
+ : "NTDB_ATTRIBUTE_SEED");
+ break;
+ case NTDB_ATTRIBUTE_STATS:
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_unset_attribute:"
+ "cannot unset NTDB_ATTRIBUTE_STATS");
+ break;
+ case NTDB_ATTRIBUTE_FLOCK:
+ ntdb->lock_fn = ntdb_fcntl_lock;
+ ntdb->unlock_fn = ntdb_fcntl_unlock;
+ break;
+ default:
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_unset_attribute: unknown attribute type %u",
+ type);
+ }
+}
+
+/* The top three bits of the capability tell us whether it matters. */
+enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
+ ntdb_off_t type)
+{
+ if (type & NTDB_CAP_NOOPEN) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "%s: file has unknown capability %llu",
+ caller, type & NTDB_CAP_NOOPEN);
+ }
+
+ if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
+ "%s: file has unknown capability %llu"
+ " (cannot write to it)",
+ caller, type & NTDB_CAP_NOOPEN);
+ }
+
+ if (type & NTDB_CAP_NOCHECK) {
+ ntdb->flags |= NTDB_CANT_CHECK;
+ }
+ return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
+ ntdb_off_t capabilities)
+{
+ ntdb_off_t off, next;
+ enum NTDB_ERROR ecode = NTDB_SUCCESS;
+ const struct ntdb_capability *cap;
+
+ /* Check capability list. */
+ for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
+ cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
+ if (NTDB_PTR_IS_ERR(cap)) {
+ return NTDB_PTR_ERR(cap);
+ }
+
+ switch (cap->type & NTDB_CAP_TYPE_MASK) {
+ /* We don't understand any capabilities (yet). */
+ default:
+ ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
+ }
+ next = cap->next;
+ ntdb_access_release(ntdb, cap);
+ }
+ return ecode;
+}
+
+static void *default_alloc(const void *owner, size_t len, void *priv_data)
+{
+ return malloc(len);
+}
+
+static void *default_expand(void *ptr, size_t len, void *priv_data)
+{
+ return realloc(ptr, len);
+}
+
+static void default_free(void *ptr, void *priv_data)
+{
+ free(ptr);
+}
+
+/* First allocation needs manual search of attributes. */
+static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr,
+ const char *name)
+{
+ size_t len = sizeof(struct ntdb_context) + strlen(name) + 1;
+
+ while (attr) {
+ if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) {
+ return attr->alloc.alloc(NULL, len,
+ attr->alloc.priv_data);
+ }
+ attr = attr->base.next;
+ }
+ return default_alloc(NULL, len, NULL);
+}
+
+static unsigned int next_pow2(uint64_t size)
+{
+ unsigned int bits = 1;
+
+ while ((1ULL << bits) < size)
+ bits++;
+ return bits;
+}
+
+_PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
+ int open_flags, mode_t mode,
+ union ntdb_attribute *attr)
+{
+ struct ntdb_context *ntdb;
+ struct stat st;
+ int saved_errno = 0;
+ uint64_t hash_test;
+ unsigned v;
+ ssize_t rlen;
+ struct ntdb_header hdr;
+ struct ntdb_attribute_seed *seed = NULL;
+ ntdb_bool_err berr;
+ enum NTDB_ERROR ecode;
+ int openlock;
+
+ ntdb = alloc_ntdb(attr, name);
+ if (!ntdb) {
+ /* Can't log this */
+ errno = ENOMEM;
+ return NULL;
+ }
+ /* Set name immediately for logging functions. */
+ ntdb->name = strcpy((char *)(ntdb + 1), name);
+ ntdb->flags = ntdb_flags;
+ ntdb->log_fn = NULL;
+ ntdb->open_flags = open_flags;
+ ntdb->file = NULL;
+ ntdb->openhook = NULL;
+ ntdb->lock_fn = ntdb_fcntl_lock;
+ ntdb->unlock_fn = ntdb_fcntl_unlock;
+ ntdb->hash_fn = ntdb_jenkins_hash;
+ memset(&ntdb->stats, 0, sizeof(ntdb->stats));
+ ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
+ ntdb->stats.size = sizeof(ntdb->stats);
+ ntdb->alloc_fn = default_alloc;
+ ntdb->expand_fn = default_expand;
+ ntdb->free_fn = default_free;
+ ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */
+
+ while (attr) {
+ switch (attr->base.attr) {
+ case NTDB_ATTRIBUTE_HASH:
+ ntdb->hash_fn = attr->hash.fn;
+ ntdb->hash_data = attr->hash.data;
+ break;
+ case NTDB_ATTRIBUTE_SEED:
+ seed = &attr->seed;
+ break;
+ case NTDB_ATTRIBUTE_OPENHOOK:
+ ntdb->openhook = attr->openhook.fn;
+ ntdb->openhook_data = attr->openhook.data;
+ break;
+ case NTDB_ATTRIBUTE_HASHSIZE:
+ ntdb->hash_bits = next_pow2(attr->hashsize.size);
+ if (ntdb->hash_bits > 31) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_open: hash_size %u"
+ " too large",
+ attr->hashsize.size);
+ goto fail;
+ }
+ break;
+ default:
+ /* These are set as normal. */
+ ecode = ntdb_set_attribute(ntdb, attr);
+ if (ecode != NTDB_SUCCESS)
+ goto fail;
+ }
+ attr = attr->base.next;
+ }
+
+ if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
+ | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
+ | NTDB_RDONLY)) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_open: unknown flags %u", ntdb_flags);
+ goto fail;
+ }
+
+ if (seed) {
+ if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_open:"
+ " cannot set NTDB_ATTRIBUTE_SEED"
+ " without O_CREAT.");
+ goto fail;
+ }
+ }
+
+ if ((open_flags & O_ACCMODE) == O_WRONLY) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_open: can't open ntdb %s write-only",
+ name);
+ goto fail;
+ }
+
+ if ((open_flags & O_ACCMODE) == O_RDONLY) {
+ openlock = F_RDLCK;
+ ntdb->flags |= NTDB_RDONLY;
+ } else {
+ if (ntdb_flags & NTDB_RDONLY) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_open: can't use NTDB_RDONLY"
+ " without O_RDONLY");
+ goto fail;
+ }
+ openlock = F_WRLCK;
+ }
+
+ /* internal databases don't need any of the rest. */
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
+ ecode = ntdb_new_file(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+ ntdb->file->fd = -1;
+ ecode = ntdb_new_database(ntdb, seed, &hdr);
+ if (ecode == NTDB_SUCCESS) {
+ ntdb_convert(ntdb, &hdr.hash_seed,
+ sizeof(hdr.hash_seed));
+ ntdb->hash_seed = hdr.hash_seed;
+ ntdb_context_init(ntdb);
+ ntdb_ftable_init(ntdb);
+ }
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+ return ntdb;
+ }
+
+ if (stat(name, &st) != -1)
+ ntdb->file = find_file(st.st_dev, st.st_ino);
+
+ if (!ntdb->file) {
+ ecode = ntdb_new_file(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+
+ /* Set this now, as ntdb_nest_lock examines it. */
+ ntdb->file->map_size = 0;
+
+ if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) {
+ enum ntdb_log_level lvl;
+ /* errno set by open(2) */
+ saved_errno = errno;
+
+ /* Probing for files like this is a common pattern. */
+ if (!(open_flags & O_CREAT) && errno == ENOENT) {
+ lvl = NTDB_LOG_WARNING;
+ } else {
+ lvl = NTDB_LOG_ERROR;
+ }
+ ntdb_logerr(ntdb, NTDB_ERR_IO, lvl,
+ "ntdb_open: could not open file %s: %s",
+ name, strerror(errno));
+
+ goto fail_errno;
+ }
+
+ /* ensure there is only one process initialising at once:
+ * do it immediately to reduce the create/openlock race. */
+ ecode = ntdb_lock_open(ntdb, openlock,
+ NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+ if (ecode != NTDB_SUCCESS) {
+ saved_errno = errno;
+ goto fail_errno;
+ }
+
+ /* on exec, don't inherit the fd */
+ v = fcntl(ntdb->file->fd, F_GETFD, 0);
+ fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC);
+
+ if (fstat(ntdb->file->fd, &st) == -1) {
+ saved_errno = errno;
+ ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open: could not stat open %s: %s",
+ name, strerror(errno));
+ goto fail_errno;
+ }
+
+ ntdb->file->device = st.st_dev;
+ ntdb->file->inode = st.st_ino;
+
+ /* call their open hook if they gave us one. */
+ if (ntdb->openhook) {
+ ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_open: open hook failed");
+ goto fail;
+ }
+ open_flags |= O_CREAT;
+ }
+ } else {
+ /* ensure there is only one process initialising at once */
+ ecode = ntdb_lock_open(ntdb, openlock,
+ NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+ if (ecode != NTDB_SUCCESS) {
+ saved_errno = errno;
+ goto fail_errno;
+ }
+ }
+
+ /* If they used O_TRUNC, read will return 0. */
+ rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
+ if (rlen == 0 && (open_flags & O_CREAT)) {
+ ecode = ntdb_new_database(ntdb, seed, &hdr);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+ } else if (rlen < 0) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open: error %s reading %s",
+ strerror(errno), name);
+ goto fail;
+ } else if (rlen < sizeof(hdr)
+ || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open: %s is not a ntdb file", name);
+ goto fail;
+ }
+
+ if (hdr.version != NTDB_VERSION) {
+ if (hdr.version == bswap_64(NTDB_VERSION))
+ ntdb->flags |= NTDB_CONVERT;
+ else {
+ /* wrong version */
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open:"
+ " %s is unknown version 0x%llx",
+ name, (long long)hdr.version);
+ goto fail;
+ }
+ } else if (ntdb->flags & NTDB_CONVERT) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open:"
+ " %s does not need NTDB_CONVERT",
+ name);
+ goto fail;
+ }
+
+ ntdb_context_init(ntdb);
+
+ ntdb_convert(ntdb, &hdr, sizeof(hdr));
+ ntdb->hash_bits = hdr.hash_bits;
+ ntdb->hash_seed = hdr.hash_seed;
+ hash_test = NTDB_HASH_MAGIC;
+ hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
+ if (hdr.hash_test != hash_test) {
+ /* wrong hash variant */
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open:"
+ " %s uses a different hash function",
+ name);
+ goto fail;
+ }
+
+ ecode = capabilities_ok(ntdb, hdr.capabilities);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+
+ /* Clear any features we don't understand. */
+ if ((open_flags & O_ACCMODE) != O_RDONLY) {
+ hdr.features_used &= NTDB_FEATURE_MASK;
+ ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
+ features_used),
+ &hdr.features_used,
+ sizeof(hdr.features_used));
+ if (ecode != NTDB_SUCCESS)
+ goto fail;
+ }
+
+ ntdb_unlock_open(ntdb, openlock);
+
+ /* This makes sure we have current map_size and mmap. */
+ ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
+ if (unlikely(ecode != NTDB_SUCCESS))
+ goto fail;
+
+ if (ntdb->file->map_size % NTDB_PGSIZE != 0) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open:"
+ " %s size %llu isn't a multiple of %u",
+ name, (long long)ntdb->file->map_size,
+ NTDB_PGSIZE);
+ goto fail;
+ }
+
+ /* Now it's fully formed, recover if necessary. */
+ berr = ntdb_needs_recovery(ntdb);
+ if (unlikely(berr != false)) {
+ if (berr < 0) {
+ ecode = NTDB_OFF_TO_ERR(berr);
+ goto fail;
+ }
+ ecode = ntdb_lock_and_recover(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+ }
+
+ ecode = ntdb_ftable_init(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+
+ ntdb->next = tdbs;
+ tdbs = ntdb;
+ return ntdb;
+
+ fail:
+ /* Map ecode to some logical errno. */
+ switch (NTDB_ERR_TO_OFF(ecode)) {
+ case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
+ case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
+ saved_errno = EIO;
+ break;
+ case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
+ saved_errno = EWOULDBLOCK;
+ break;
+ case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
+ saved_errno = ENOMEM;
+ break;
+ case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
+ saved_errno = EINVAL;
+ break;
+ default:
+ saved_errno = EINVAL;
+ break;
+ }
+
+fail_errno:
+#ifdef NTDB_TRACE
+ close(ntdb->tracefd);
+#endif
+ if (ntdb->file) {
+ ntdb_lock_cleanup(ntdb);
+ if (--ntdb->file->refcnt == 0) {
+ assert(ntdb->file->num_lockrecs == 0);
+ if (ntdb->file->map_ptr) {
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb->free_fn(ntdb->file->map_ptr,
+ ntdb->alloc_data);
+ } else
+ ntdb_munmap(ntdb);
+ }
+ if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0)
+ ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_open: failed to close ntdb fd"
+ " on error: %s", strerror(errno));
+ ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
+ ntdb->free_fn(ntdb->file, ntdb->alloc_data);
+ }
+ }
+
+ ntdb->free_fn(ntdb, ntdb->alloc_data);
+ errno = saved_errno;
+ return NULL;
+}
+
+_PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
+{
+ int ret = 0;
+ struct ntdb_context **i;
+
+ ntdb_trace(ntdb, "ntdb_close");
+
+ if (ntdb->transaction) {
+ ntdb_transaction_cancel(ntdb);
+ }
+
+ ntdb_lock_cleanup(ntdb);
+ if (--ntdb->file->refcnt == 0) {
+ if (ntdb->file->map_ptr) {
+ if (ntdb->flags & NTDB_INTERNAL) {
+ ntdb->free_fn(ntdb->file->map_ptr,
+ ntdb->alloc_data);
+ } else {
+ ntdb_munmap(ntdb);
+ }
+ }
+ ret = close(ntdb->file->fd);
+ ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
+ ntdb->free_fn(ntdb->file, ntdb->alloc_data);
+ }
+
+ /* Remove from tdbs list */
+ for (i = &tdbs; *i; i = &(*i)->next) {
+ if (*i == ntdb) {
+ *i = ntdb->next;
+ break;
+ }
+ }
+
+#ifdef NTDB_TRACE
+ close(ntdb->tracefd);
+#endif
+ ntdb->free_fn(ntdb, ntdb->alloc_data);
+
+ return ret;
+}
+
+_PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
+{
+ struct ntdb_context *i;
+
+ for (i = tdbs; i; i = i->next) {
+ if (fn(i, p) != 0)
+ break;
+ }
+}
--- /dev/null
+#ifndef NTDB_PRIVATE_H
+#define NTDB_PRIVATE_H
+/*
+ Trivial Database 2: private types and prototypes
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "config.h"
+#ifndef HAVE_CCAN
+#error You need ccan to build ntdb!
+#endif
+#include "ntdb.h"
+#include <ccan/compiler/compiler.h>
+#include <ccan/likely/likely.h>
+#include <ccan/endian/endian.h>
+
+#ifdef HAVE_LIBREPLACE
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+#include "system/shmem.h"
+#include "system/select.h"
+#include "system/wait.h"
+#else
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <utime.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <time.h>
+#endif
+#include <assert.h>
+
+#ifndef TEST_IT
+#define TEST_IT(cond)
+#endif
+
+/* #define NTDB_TRACE 1 */
+
+#ifndef __STRING
+#define __STRING(x) #x
+#endif
+
+#ifndef __STRINGSTRING
+#define __STRINGSTRING(x) __STRING(x)
+#endif
+
+#ifndef __location__
+#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
+#endif
+
+typedef uint64_t ntdb_len_t;
+typedef uint64_t ntdb_off_t;
+
+#define NTDB_MAGIC_FOOD "NTDB file\n"
+#define NTDB_VERSION ((uint64_t)(0x26011967 + 7))
+#define NTDB_USED_MAGIC ((uint64_t)0x1999)
+#define NTDB_HTABLE_MAGIC ((uint64_t)0x1888)
+#define NTDB_CHAIN_MAGIC ((uint64_t)0x1777)
+#define NTDB_FTABLE_MAGIC ((uint64_t)0x1666)
+#define NTDB_CAP_MAGIC ((uint64_t)0x1555)
+#define NTDB_FREE_MAGIC ((uint64_t)0xFE)
+#define NTDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
+#define NTDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
+#define NTDB_RECOVERY_INVALID_MAGIC (0x0ULL)
+
+/* Capability bits. */
+#define NTDB_CAP_TYPE_MASK 0x1FFFFFFFFFFFFFFFULL
+#define NTDB_CAP_NOCHECK 0x8000000000000000ULL
+#define NTDB_CAP_NOWRITE 0x4000000000000000ULL
+#define NTDB_CAP_NOOPEN 0x2000000000000000ULL
+
+#define NTDB_OFF_IS_ERR(off) unlikely(off >= (ntdb_off_t)(long)NTDB_ERR_LAST)
+#define NTDB_OFF_TO_ERR(off) ((enum NTDB_ERROR)(long)(off))
+#define NTDB_ERR_TO_OFF(ecode) ((ntdb_off_t)(long)(ecode))
+
+/* Packing errors into pointers and v.v. */
+#define NTDB_PTR_IS_ERR(ptr) \
+ unlikely((unsigned long)(ptr) >= (unsigned long)NTDB_ERR_LAST)
+#define NTDB_PTR_ERR(p) ((enum NTDB_ERROR)(long)(p))
+#define NTDB_ERR_PTR(err) ((void *)(long)(err))
+
+/* This doesn't really need to be pagesize, but we use it for similar
+ * reasons. */
+#define NTDB_PGSIZE 16384
+
+/* Common case of returning true, false or -ve error. */
+typedef int ntdb_bool_err;
+
+/* Prevent others from opening the file. */
+#define NTDB_OPEN_LOCK 0
+/* Expanding file. */
+#define NTDB_EXPANSION_LOCK 2
+/* Doing a transaction. */
+#define NTDB_TRANSACTION_LOCK 8
+/* Hash chain locks. */
+#define NTDB_HASH_LOCK_START 64
+
+/* Extend file by least 100 times larger than needed. */
+#define NTDB_EXTENSION_FACTOR 100
+
+/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
+#define NTDB_OFF_UPPER_STEAL 8
+
+/* And we use the lower bit, too. */
+#define NTDB_OFF_CHAIN_BIT 0
+
+/* Hash table sits just after the header. */
+#define NTDB_HASH_OFFSET (sizeof(struct ntdb_header))
+
+/* Additional features we understand. Currently: none. */
+#define NTDB_FEATURE_MASK ((uint64_t)0)
+
+/* The bit number where we store the extra hash bits. */
+/* Convenience mask to get actual offset. */
+#define NTDB_OFF_MASK \
+ (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - (1<<NTDB_OFF_CHAIN_BIT))
+
+/* How many buckets in a free list: see size_to_bucket(). */
+#define NTDB_FREE_BUCKETS (64 - NTDB_OFF_UPPER_STEAL)
+
+/* We have to be able to fit a free record here. */
+#define NTDB_MIN_DATA_LEN \
+ (sizeof(struct ntdb_free_record) - sizeof(struct ntdb_used_record))
+
+/* Indicates this entry is not on an flist (can happen during coalescing) */
+#define NTDB_FTABLE_NONE ((1ULL << NTDB_OFF_UPPER_STEAL) - 1)
+
+/* By default, hash is 64k bytes */
+#define NTDB_DEFAULT_HBITS 13
+
+struct ntdb_used_record {
+ /* For on-disk compatibility, we avoid bitfields:
+ magic: 16, (highest)
+ key_len_bits: 5,
+ extra_padding: 32
+ */
+ uint64_t magic_and_meta;
+ /* The bottom key_len_bits*2 are key length, rest is data length. */
+ uint64_t key_and_data_len;
+};
+
+static inline unsigned rec_key_bits(const struct ntdb_used_record *r)
+{
+ return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
+}
+
+static inline uint64_t rec_key_length(const struct ntdb_used_record *r)
+{
+ return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
+}
+
+static inline uint64_t rec_data_length(const struct ntdb_used_record *r)
+{
+ return r->key_and_data_len >> rec_key_bits(r);
+}
+
+static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r)
+{
+ return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
+}
+
+static inline uint16_t rec_magic(const struct ntdb_used_record *r)
+{
+ return (r->magic_and_meta >> 48);
+}
+
+struct ntdb_free_record {
+ uint64_t magic_and_prev; /* NTDB_OFF_UPPER_STEAL bits magic, then prev */
+ uint64_t ftable_and_len; /* Len not counting these two fields. */
+ /* This is why the minimum record size is 8 bytes. */
+ uint64_t next;
+};
+
+static inline uint64_t frec_prev(const struct ntdb_free_record *f)
+{
+ return f->magic_and_prev & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1);
+}
+
+static inline uint64_t frec_magic(const struct ntdb_free_record *f)
+{
+ return f->magic_and_prev >> (64 - NTDB_OFF_UPPER_STEAL);
+}
+
+static inline uint64_t frec_len(const struct ntdb_free_record *f)
+{
+ return f->ftable_and_len & ((1ULL << (64 - NTDB_OFF_UPPER_STEAL))-1);
+}
+
+static inline unsigned frec_ftable(const struct ntdb_free_record *f)
+{
+ return f->ftable_and_len >> (64 - NTDB_OFF_UPPER_STEAL);
+}
+
+struct ntdb_recovery_record {
+ uint64_t magic;
+ /* Length of record (add this header to get total length). */
+ uint64_t max_len;
+ /* Length used. */
+ uint64_t len;
+ /* Old length of file before transaction. */
+ uint64_t eof;
+};
+
+/* this is stored at the front of every database */
+struct ntdb_header {
+ char magic_food[64]; /* for /etc/magic */
+ /* FIXME: Make me 32 bit? */
+ uint64_t version; /* version of the code */
+ uint64_t hash_bits; /* bits for toplevel hash table. */
+ uint64_t hash_test; /* result of hashing HASH_MAGIC. */
+ uint64_t hash_seed; /* "random" seed written at creation time. */
+ ntdb_off_t free_table; /* (First) free table. */
+ ntdb_off_t recovery; /* Transaction recovery area. */
+
+ uint64_t features_used; /* Features all writers understand */
+ uint64_t features_offered; /* Features offered */
+
+ uint64_t seqnum; /* Sequence number for NTDB_SEQNUM */
+
+ ntdb_off_t capabilities; /* Optional linked list of capabilities. */
+ ntdb_off_t reserved[22];
+
+ /*
+ * Hash table is next:
+ *
+ * struct ntdb_used_record htable_hdr;
+ * ntdb_off_t htable[1 << hash_bits];
+ */
+};
+
+struct ntdb_freetable {
+ struct ntdb_used_record hdr;
+ ntdb_off_t next;
+ ntdb_off_t buckets[NTDB_FREE_BUCKETS];
+};
+
+struct ntdb_capability {
+ struct ntdb_used_record hdr;
+ ntdb_off_t type;
+ ntdb_off_t next;
+ /* ... */
+};
+
+/* Information about a particular (locked) hash entry. */
+struct hash_info {
+ /* Full hash value of entry. */
+ uint32_t h;
+ /* Start of hash table / chain. */
+ ntdb_off_t table;
+ /* Number of entries in this table/chain. */
+ ntdb_off_t table_size;
+ /* Bucket we (or an empty space) were found in. */
+ ntdb_off_t bucket;
+ /* Old value that was in that entry (if not found) */
+ ntdb_off_t old_val;
+};
+
+enum ntdb_lock_flags {
+ /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
+ NTDB_LOCK_NOWAIT = 0,
+ NTDB_LOCK_WAIT = 1,
+ /* If set, don't log an error on failure. */
+ NTDB_LOCK_PROBE = 2,
+ /* If set, don't check for recovery (used by recovery code). */
+ NTDB_LOCK_NOCHECK = 4,
+};
+
+struct ntdb_lock {
+ struct ntdb_context *owner;
+ off_t off;
+ uint32_t count;
+ uint32_t ltype;
+};
+
+/* This is only needed for ntdb_access_commit, but used everywhere to
+ * simplify. */
+struct ntdb_access_hdr {
+ struct ntdb_access_hdr *next;
+ ntdb_off_t off;
+ ntdb_len_t len;
+ bool convert;
+};
+
+/* mmaps we are keeping around because they are still direct accessed */
+struct ntdb_old_mmap {
+ struct ntdb_old_mmap *next;
+
+ void *map_ptr;
+ ntdb_len_t map_size;
+};
+
+struct ntdb_file {
+ /* How many are sharing us? */
+ unsigned int refcnt;
+
+ /* Mmap (if any), or malloc (for NTDB_INTERNAL). */
+ void *map_ptr;
+
+ /* How much space has been mapped (<= current file size) */
+ ntdb_len_t map_size;
+
+ /* The file descriptor (-1 for NTDB_INTERNAL). */
+ int fd;
+
+ /* How many are accessing directly? */
+ unsigned int direct_count;
+
+ /* Old maps, still direct accessed. */
+ struct ntdb_old_mmap *old_mmaps;
+
+ /* Lock information */
+ pid_t locker;
+ struct ntdb_lock allrecord_lock;
+ size_t num_lockrecs;
+ struct ntdb_lock *lockrecs;
+
+ /* Identity of this file. */
+ dev_t device;
+ ino_t inode;
+};
+
+struct ntdb_methods {
+ enum NTDB_ERROR (*tread)(struct ntdb_context *, ntdb_off_t, void *,
+ ntdb_len_t);
+ enum NTDB_ERROR (*twrite)(struct ntdb_context *, ntdb_off_t, const void *,
+ ntdb_len_t);
+ enum NTDB_ERROR (*oob)(struct ntdb_context *, ntdb_off_t, ntdb_len_t, bool);
+ enum NTDB_ERROR (*expand_file)(struct ntdb_context *, ntdb_len_t);
+ void *(*direct)(struct ntdb_context *, ntdb_off_t, size_t, bool);
+ ntdb_off_t (*read_off)(struct ntdb_context *ntdb, ntdb_off_t off);
+ enum NTDB_ERROR (*write_off)(struct ntdb_context *ntdb, ntdb_off_t off,
+ ntdb_off_t val);
+};
+
+/*
+ internal prototypes
+*/
+/* Get bits from a value. */
+static inline uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
+{
+ assert(num <= 32);
+ return (val >> start) & ((1U << num) - 1);
+}
+
+
+/* hash.c: */
+uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
+ void *unused);
+
+enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
+ struct hash_info *h,
+ NTDB_DATA *kbuf, size_t *dlen);
+
+enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
+ struct hash_info *h,
+ NTDB_DATA *kbuf, size_t *dlen);
+
+/* Hash random memory. */
+uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len);
+
+/* Find and lock a hash entry (or where it would be). */
+ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
+ NTDB_DATA key,
+ int ltype,
+ struct hash_info *h,
+ struct ntdb_used_record *rec,
+ const char **rkey);
+
+enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
+ ntdb_off_t new_off);
+
+enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
+ ntdb_off_t new_off);
+
+enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h);
+
+/* For ntdb_check */
+bool is_subhash(ntdb_off_t val);
+enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
+ ntdb_off_t type);
+
+/* free.c: */
+enum NTDB_ERROR ntdb_ftable_init(struct ntdb_context *ntdb);
+
+/* check.c needs these to iterate through free lists. */
+ntdb_off_t first_ftable(struct ntdb_context *ntdb);
+ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable);
+
+/* This returns space or -ve error number. */
+ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
+ unsigned magic, bool growing);
+
+/* Put this record in a free list. */
+enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len_with_header,
+ enum ntdb_lock_flags waitflag,
+ bool coalesce_ok);
+
+/* Set up header for a used/ftable/htable/chain/capability record. */
+enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
+ struct ntdb_used_record *rec,
+ unsigned magic, uint64_t keylen, uint64_t datalen,
+ uint64_t actuallen);
+
+/* Used by ntdb_check to verify. */
+unsigned int size_to_bucket(ntdb_len_t data_len);
+ntdb_off_t bucket_off(ntdb_off_t ftable_off, unsigned bucket);
+
+/* Used by ntdb_summary */
+ntdb_off_t dead_space(struct ntdb_context *ntdb, ntdb_off_t off);
+
+/* Adjust expansion, used by create_recovery_area */
+ntdb_off_t ntdb_expand_adjust(ntdb_off_t map_size, ntdb_off_t size);
+
+/* io.c: */
+/* Initialize ntdb->methods. */
+void ntdb_io_init(struct ntdb_context *ntdb);
+
+/* Convert endian of the buffer if required. */
+void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size);
+
+/* Unmap and try to map the ntdb. */
+enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb);
+enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb);
+
+/* Either alloc a copy, or give direct access. Release frees or noop. */
+const void *ntdb_access_read(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len, bool convert);
+void *ntdb_access_write(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len, bool convert);
+
+/* Release result of ntdb_access_read/write. */
+void ntdb_access_release(struct ntdb_context *ntdb, const void *p);
+/* Commit result of ntdb_acces_write. */
+enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p);
+
+/* Clear an ondisk area. */
+enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len);
+
+/* Return a non-zero offset between >= start < end in this array (or end). */
+ntdb_off_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
+ ntdb_off_t base,
+ uint64_t start,
+ uint64_t end);
+
+/* Return a zero offset in this array, or num. */
+ntdb_off_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
+ uint64_t num);
+
+/* Allocate and make a copy of some offset. */
+void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len);
+
+/* Writes a converted copy of a record. */
+enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+ const void *rec, size_t len);
+
+/* Reads record and converts it */
+enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
+ void *rec, size_t len);
+
+/* Bump the seqnum (caller checks for ntdb->flags & NTDB_SEQNUM) */
+void ntdb_inc_seqnum(struct ntdb_context *ntdb);
+
+/* lock.c: */
+/* Print message because another ntdb owns a lock we want. */
+enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call);
+
+/* If we fork, we no longer really own locks. */
+bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log);
+
+/* Lock/unlock a hash bucket. */
+enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
+ unsigned int hbucket,
+ int ltype);
+enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
+ unsigned int hash, int ltype);
+
+/* For closing the file. */
+void ntdb_lock_cleanup(struct ntdb_context *ntdb);
+
+/* Lock/unlock a particular free bucket. */
+enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
+ enum ntdb_lock_flags waitflag);
+void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off);
+
+/* Serialize transaction start. */
+enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype);
+void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype);
+
+/* Do we have any hash locks (ie. via ntdb_chainlock) ? */
+bool ntdb_has_hash_locks(struct ntdb_context *ntdb);
+
+/* Lock entire database. */
+enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
+ enum ntdb_lock_flags flags, bool upgradable);
+void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype);
+enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start);
+
+/* Serialize db open. */
+enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
+ int ltype, enum ntdb_lock_flags flags);
+void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype);
+bool ntdb_has_open_lock(struct ntdb_context *ntdb);
+
+/* Serialize db expand. */
+enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype);
+void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype);
+bool ntdb_has_expansion_lock(struct ntdb_context *ntdb);
+
+/* If it needs recovery, grab all the locks and do it. */
+enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb);
+
+/* Default lock and unlock functions. */
+int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *);
+int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *);
+
+/* transaction.c: */
+enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb);
+ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb);
+
+struct ntdb_context {
+ /* Single list of all TDBs, to detect multiple opens. */
+ struct ntdb_context *next;
+
+ /* Filename of the database. */
+ const char *name;
+
+ /* Logging function */
+ void (*log_fn)(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data);
+ void *log_data;
+
+ /* Open flags passed to ntdb_open. */
+ int open_flags;
+
+ /* low level (fnctl) lock functions. */
+ int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *);
+ int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *);
+ void *lock_data;
+
+ /* the ntdb flags passed to ntdb_open. */
+ uint32_t flags;
+
+ /* Our statistics. */
+ struct ntdb_attribute_stats stats;
+
+ /* The actual file information */
+ struct ntdb_file *file;
+
+ /* Hash function. */
+ uint32_t (*hash_fn)(const void *key, size_t len, uint32_t seed, void *);
+ void *hash_data;
+ uint32_t hash_seed;
+ /* Bits in toplevel hash table. */
+ unsigned int hash_bits;
+
+ /* Allocate and free functions. */
+ void *(*alloc_fn)(const void *owner, size_t len, void *priv_data);
+ void *(*expand_fn)(void *old, size_t newlen, void *priv_data);
+ void (*free_fn)(void *old, void *priv_data);
+ void *alloc_data;
+
+ /* Our open hook, if any. */
+ enum NTDB_ERROR (*openhook)(int fd, void *data);
+ void *openhook_data;
+
+ /* Set if we are in a transaction. */
+ struct ntdb_transaction *transaction;
+
+ /* What free table are we using? */
+ ntdb_off_t ftable_off;
+ unsigned int ftable;
+
+ /* IO methods: changes for transactions. */
+ const struct ntdb_methods *io;
+
+ /* Direct access information */
+ struct ntdb_access_hdr *access;
+};
+
+/* ntdb.c: */
+enum NTDB_ERROR COLD PRINTF_FMT(4, 5)
+ ntdb_logerr(struct ntdb_context *ntdb,
+ enum NTDB_ERROR ecode,
+ enum ntdb_log_level level,
+ const char *fmt, ...);
+
+static inline enum NTDB_ERROR ntdb_oob(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len,
+ bool probe)
+{
+ if (likely(off + len >= off)
+ && likely(off + len <= ntdb->file->map_size)
+ && likely(!probe)) {
+ return NTDB_SUCCESS;
+ }
+ return ntdb->io->oob(ntdb, off, len, probe);
+}
+
+/* Convenience routine to get an offset. */
+static inline ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb,
+ ntdb_off_t off)
+{
+ return ntdb->io->read_off(ntdb, off);
+}
+
+/* Write an offset at an offset. */
+static inline enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb,
+ ntdb_off_t off,
+ ntdb_off_t val)
+{
+ return ntdb->io->write_off(ntdb, off, val);
+}
+
+#ifdef NTDB_TRACE
+void ntdb_trace(struct ntdb_context *ntdb, const char *op);
+void ntdb_trace_seqnum(struct ntdb_context *ntdb, uint32_t seqnum, const char *op);
+void ntdb_trace_open(struct ntdb_context *ntdb, const char *op,
+ unsigned hash_size, unsigned ntdb_flags, unsigned open_flags);
+void ntdb_trace_ret(struct ntdb_context *ntdb, const char *op, int ret);
+void ntdb_trace_retrec(struct ntdb_context *ntdb, const char *op, NTDB_DATA ret);
+void ntdb_trace_1rec(struct ntdb_context *ntdb, const char *op,
+ NTDB_DATA rec);
+void ntdb_trace_1rec_ret(struct ntdb_context *ntdb, const char *op,
+ NTDB_DATA rec, int ret);
+void ntdb_trace_1rec_retrec(struct ntdb_context *ntdb, const char *op,
+ NTDB_DATA rec, NTDB_DATA ret);
+void ntdb_trace_2rec_flag_ret(struct ntdb_context *ntdb, const char *op,
+ NTDB_DATA rec1, NTDB_DATA rec2, unsigned flag,
+ int ret);
+void ntdb_trace_2rec_retrec(struct ntdb_context *ntdb, const char *op,
+ NTDB_DATA rec1, NTDB_DATA rec2, NTDB_DATA ret);
+#else
+#define ntdb_trace(ntdb, op)
+#define ntdb_trace_seqnum(ntdb, seqnum, op)
+#define ntdb_trace_open(ntdb, op, hash_size, ntdb_flags, open_flags)
+#define ntdb_trace_ret(ntdb, op, ret)
+#define ntdb_trace_retrec(ntdb, op, ret)
+#define ntdb_trace_1rec(ntdb, op, rec)
+#define ntdb_trace_1rec_ret(ntdb, op, rec, ret)
+#define ntdb_trace_1rec_retrec(ntdb, op, rec, ret)
+#define ntdb_trace_2rec_flag_ret(ntdb, op, rec1, rec2, flag, ret)
+#define ntdb_trace_2rec_retrec(ntdb, op, rec1, rec2, ret)
+#endif /* !NTDB_TRACE */
+
+#endif
--- /dev/null
+/*
+ Unix SMB/CIFS implementation.
+
+ Python interface to ntdb. Simply modified from tdb version.
+
+ Copyright (C) 2004-2006 Tim Potter <tpot@samba.org>
+ Copyright (C) 2007-2008 Jelmer Vernooij <jelmer@samba.org>
+ Copyright (C) 2011 Rusty Russell <rusty@rustcorp.com.au>
+
+ ** NOTE! The following LGPL license applies to the ntdb
+ ** library. This does NOT imply that all of Samba is released
+ ** under the LGPL
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <Python.h>
+#include "replace.h"
+#include "system/filesys.h"
+
+/* Include ntdb headers */
+#include <ntdb.h>
+
+typedef struct {
+ PyObject_HEAD
+ struct ntdb_context *ctx;
+ bool closed;
+} PyNtdbObject;
+
+static PyTypeObject PyNtdb;
+
+static void PyErr_SetTDBError(enum NTDB_ERROR e)
+{
+ PyErr_SetObject(PyExc_RuntimeError,
+ Py_BuildValue("(i,s)", e, ntdb_errorstr(e)));
+}
+
+static NTDB_DATA PyString_AsNtdb_Data(PyObject *data)
+{
+ NTDB_DATA ret;
+ ret.dptr = (unsigned char *)PyString_AsString(data);
+ ret.dsize = PyString_Size(data);
+ return ret;
+}
+
+static PyObject *PyString_FromNtdb_Data(NTDB_DATA data)
+{
+ PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr,
+ data.dsize);
+ free(data.dptr);
+ return ret;
+}
+
+#define PyErr_NTDB_ERROR_IS_ERR_RAISE(ret) \
+ if (ret != NTDB_SUCCESS) { \
+ PyErr_SetTDBError(ret); \
+ return NULL; \
+ }
+
+#define PyNtdb_CHECK_CLOSED(pyobj) \
+ if (pyobj->closed) {\
+ PyErr_SetObject(PyExc_RuntimeError, \
+ Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed")); \
+ return NULL; \
+ }
+
+static void stderr_log(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+{
+ fprintf(stderr, "%s:%s:%s\n",
+ ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+}
+
+static PyObject *py_ntdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+ char *name = NULL;
+ int ntdb_flags = NTDB_DEFAULT, flags = O_RDWR, mode = 0600;
+ struct ntdb_context *ctx;
+ PyNtdbObject *ret;
+ union ntdb_attribute logattr;
+ const char *kwnames[] = { "name", "ntdb_flags", "flags", "mode", NULL };
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", cast_const2(char **, kwnames), &name, &ntdb_flags, &flags, &mode))
+ return NULL;
+
+ if (name == NULL) {
+ ntdb_flags |= NTDB_INTERNAL;
+ name = "<internal>";
+ }
+
+ logattr.log.base.attr = NTDB_ATTRIBUTE_LOG;
+ logattr.log.base.next = NULL;
+ logattr.log.fn = stderr_log;
+ ctx = ntdb_open(name, ntdb_flags, flags, mode, &logattr);
+ if (ctx == NULL) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return NULL;
+ }
+
+ ret = PyObject_New(PyNtdbObject, &PyNtdb);
+ if (!ret) {
+ ntdb_close(ctx);
+ return NULL;
+ }
+
+ ret->ctx = ctx;
+ ret->closed = false;
+ return (PyObject *)ret;
+}
+
+static PyObject *obj_transaction_cancel(PyNtdbObject *self)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ ntdb_transaction_cancel(self->ctx);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_transaction_commit(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ PyNtdb_CHECK_CLOSED(self);
+ ret = ntdb_transaction_commit(self->ctx);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_transaction_prepare_commit(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ PyNtdb_CHECK_CLOSED(self);
+ ret = ntdb_transaction_prepare_commit(self->ctx);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_transaction_start(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ PyNtdb_CHECK_CLOSED(self);
+ ret = ntdb_transaction_start(self->ctx);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_lockall(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ PyNtdb_CHECK_CLOSED(self);
+ ret = ntdb_lockall(self->ctx);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_unlockall(PyNtdbObject *self)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ ntdb_unlockall(self->ctx);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_lockall_read(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ PyNtdb_CHECK_CLOSED(self);
+ ret = ntdb_lockall_read(self->ctx);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_unlockall_read(PyNtdbObject *self)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ ntdb_unlockall_read(self->ctx);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_close(PyNtdbObject *self)
+{
+ int ret;
+ if (self->closed)
+ Py_RETURN_NONE;
+ ret = ntdb_close(self->ctx);
+ self->closed = true;
+ if (ret != 0) {
+ PyErr_SetTDBError(NTDB_ERR_IO);
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_get(PyNtdbObject *self, PyObject *args)
+{
+ NTDB_DATA key, data;
+ PyObject *py_key;
+ enum NTDB_ERROR ret;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "O", &py_key))
+ return NULL;
+
+ key = PyString_AsNtdb_Data(py_key);
+ ret = ntdb_fetch(self->ctx, key, &data);
+ if (ret == NTDB_ERR_NOEXIST)
+ Py_RETURN_NONE;
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ return PyString_FromNtdb_Data(data);
+}
+
+static PyObject *obj_append(PyNtdbObject *self, PyObject *args)
+{
+ NTDB_DATA key, data;
+ PyObject *py_key, *py_data;
+ enum NTDB_ERROR ret;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data))
+ return NULL;
+
+ key = PyString_AsNtdb_Data(py_key);
+ data = PyString_AsNtdb_Data(py_data);
+
+ ret = ntdb_append(self->ctx, key, data);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_firstkey(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ NTDB_DATA key;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ ret = ntdb_firstkey(self->ctx, &key);
+ if (ret == NTDB_ERR_NOEXIST)
+ Py_RETURN_NONE;
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+
+ return PyString_FromNtdb_Data(key);
+}
+
+static PyObject *obj_nextkey(PyNtdbObject *self, PyObject *args)
+{
+ NTDB_DATA key;
+ PyObject *py_key;
+ enum NTDB_ERROR ret;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "O", &py_key))
+ return NULL;
+
+ /* Malloc here, since ntdb_nextkey frees. */
+ key.dsize = PyString_Size(py_key);
+ key.dptr = malloc(key.dsize);
+ memcpy(key.dptr, PyString_AsString(py_key), key.dsize);
+
+ ret = ntdb_nextkey(self->ctx, &key);
+ if (ret == NTDB_ERR_NOEXIST)
+ Py_RETURN_NONE;
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+
+ return PyString_FromNtdb_Data(key);
+}
+
+static PyObject *obj_delete(PyNtdbObject *self, PyObject *args)
+{
+ NTDB_DATA key;
+ PyObject *py_key;
+ enum NTDB_ERROR ret;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "O", &py_key))
+ return NULL;
+
+ key = PyString_AsNtdb_Data(py_key);
+ ret = ntdb_delete(self->ctx, key);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_has_key(PyNtdbObject *self, PyObject *args)
+{
+ NTDB_DATA key;
+ PyObject *py_key;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "O", &py_key))
+ return NULL;
+
+ key = PyString_AsNtdb_Data(py_key);
+ if (ntdb_exists(self->ctx, key))
+ return Py_True;
+ return Py_False;
+}
+
+static PyObject *obj_store(PyNtdbObject *self, PyObject *args)
+{
+ NTDB_DATA key, value;
+ enum NTDB_ERROR ret;
+ int flag = NTDB_REPLACE;
+ PyObject *py_key, *py_value;
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag))
+ return NULL;
+
+ key = PyString_AsNtdb_Data(py_key);
+ value = PyString_AsNtdb_Data(py_value);
+
+ ret = ntdb_store(self->ctx, key, value, flag);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_add_flag(PyNtdbObject *self, PyObject *args)
+{
+ unsigned flag;
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "I", &flag))
+ return NULL;
+
+ ntdb_add_flag(self->ctx, flag);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_remove_flag(PyNtdbObject *self, PyObject *args)
+{
+ unsigned flag;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyArg_ParseTuple(args, "I", &flag))
+ return NULL;
+
+ ntdb_remove_flag(self->ctx, flag);
+ Py_RETURN_NONE;
+}
+
+typedef struct {
+ PyObject_HEAD
+ NTDB_DATA current;
+ bool end;
+ PyNtdbObject *iteratee;
+} PyNtdbIteratorObject;
+
+static PyObject *ntdb_iter_next(PyNtdbIteratorObject *self)
+{
+ enum NTDB_ERROR e;
+ PyObject *ret;
+ if (self->end)
+ return NULL;
+ ret = PyString_FromStringAndSize((const char *)self->current.dptr,
+ self->current.dsize);
+ e = ntdb_nextkey(self->iteratee->ctx, &self->current);
+ if (e == NTDB_ERR_NOEXIST)
+ self->end = true;
+ else
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(e);
+ return ret;
+}
+
+static void ntdb_iter_dealloc(PyNtdbIteratorObject *self)
+{
+ Py_DECREF(self->iteratee);
+ PyObject_Del(self);
+}
+
+PyTypeObject PyNtdbIterator = {
+ .tp_name = "Iterator",
+ .tp_basicsize = sizeof(PyNtdbIteratorObject),
+ .tp_iternext = (iternextfunc)ntdb_iter_next,
+ .tp_dealloc = (destructor)ntdb_iter_dealloc,
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_iter = PyObject_SelfIter,
+};
+
+static PyObject *ntdb_object_iter(PyNtdbObject *self)
+{
+ PyNtdbIteratorObject *ret;
+ enum NTDB_ERROR e;
+ PyNtdb_CHECK_CLOSED(self);
+
+ ret = PyObject_New(PyNtdbIteratorObject, &PyNtdbIterator);
+ if (!ret)
+ return NULL;
+ e = ntdb_firstkey(self->ctx, &ret->current);
+ if (e == NTDB_ERR_NOEXIST) {
+ ret->end = true;
+ } else {
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(e);
+ ret->end = false;
+ }
+ ret->iteratee = self;
+ Py_INCREF(self);
+ return (PyObject *)ret;
+}
+
+static PyObject *obj_clear(PyNtdbObject *self)
+{
+ enum NTDB_ERROR ret;
+ PyNtdb_CHECK_CLOSED(self);
+ ret = ntdb_wipe_all(self->ctx);
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ Py_RETURN_NONE;
+}
+
+static PyObject *obj_enable_seqnum(PyNtdbObject *self)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ ntdb_add_flag(self->ctx, NTDB_SEQNUM);
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef ntdb_object_methods[] = {
+ { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS,
+ "S.transaction_cancel() -> None\n"
+ "Cancel the currently active transaction." },
+ { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS,
+ "S.transaction_commit() -> None\n"
+ "Commit the currently active transaction." },
+ { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS,
+ "S.transaction_prepare_commit() -> None\n"
+ "Prepare to commit the currently active transaction" },
+ { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS,
+ "S.transaction_start() -> None\n"
+ "Start a new transaction." },
+ { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL },
+ { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL },
+ { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL },
+ { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL },
+ { "close", (PyCFunction)obj_close, METH_NOARGS, NULL },
+ { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n"
+ "Fetch a value." },
+ { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n"
+ "Append data to an existing key." },
+ { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n"
+ "Return the first key in this database." },
+ { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n"
+ "Return the next key in this database." },
+ { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n"
+ "Delete an entry." },
+ { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n"
+ "Check whether key exists in this database." },
+ { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None"
+ "Store data." },
+ { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" },
+ { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" },
+ { "iterkeys", (PyCFunction)ntdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" },
+ { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n"
+ "Wipe the entire database." },
+ { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS,
+ "S.enable_seqnum() -> None" },
+ { NULL }
+};
+
+static PyObject *obj_get_flags(PyNtdbObject *self, void *closure)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ return PyInt_FromLong(ntdb_get_flags(self->ctx));
+}
+
+static PyObject *obj_get_filename(PyNtdbObject *self, void *closure)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ return PyString_FromString(ntdb_name(self->ctx));
+}
+
+static PyObject *obj_get_seqnum(PyNtdbObject *self, void *closure)
+{
+ PyNtdb_CHECK_CLOSED(self);
+ return PyInt_FromLong(ntdb_get_seqnum(self->ctx));
+}
+
+
+static PyGetSetDef ntdb_object_getsetters[] = {
+ { cast_const(char *, "flags"), (getter)obj_get_flags, NULL, NULL },
+ { cast_const(char *, "filename"), (getter)obj_get_filename, NULL,
+ cast_const(char *, "The filename of this NTDB file.")},
+ { cast_const(char *, "seqnum"), (getter)obj_get_seqnum, NULL, NULL },
+ { NULL }
+};
+
+static PyObject *ntdb_object_repr(PyNtdbObject *self)
+{
+ if (ntdb_get_flags(self->ctx) & NTDB_INTERNAL) {
+ return PyString_FromString("Ntdb(<internal>)");
+ } else {
+ return PyString_FromFormat("Ntdb('%s')", ntdb_name(self->ctx));
+ }
+}
+
+static void ntdb_object_dealloc(PyNtdbObject *self)
+{
+ if (!self->closed)
+ ntdb_close(self->ctx);
+ self->ob_type->tp_free(self);
+}
+
+static PyObject *obj_getitem(PyNtdbObject *self, PyObject *key)
+{
+ NTDB_DATA tkey, val;
+ enum NTDB_ERROR ret;
+
+ PyNtdb_CHECK_CLOSED(self);
+
+ if (!PyString_Check(key)) {
+ PyErr_SetString(PyExc_TypeError, "Expected string as key");
+ return NULL;
+ }
+
+ tkey.dptr = (unsigned char *)PyString_AsString(key);
+ tkey.dsize = PyString_Size(key);
+
+ ret = ntdb_fetch(self->ctx, tkey, &val);
+ if (ret == NTDB_ERR_NOEXIST) {
+ PyErr_SetString(PyExc_KeyError, "No such NTDB entry");
+ return NULL;
+ } else {
+ PyErr_NTDB_ERROR_IS_ERR_RAISE(ret);
+ return PyString_FromNtdb_Data(val);
+ }
+}
+
+static int obj_setitem(PyNtdbObject *self, PyObject *key, PyObject *value)
+{
+ NTDB_DATA tkey, tval;
+ enum NTDB_ERROR ret;
+ if (self->closed) {
+ PyErr_SetObject(PyExc_RuntimeError,
+ Py_BuildValue("(i,s)", NTDB_ERR_EINVAL, "database is closed"));
+ return -1;
+ }
+
+ if (!PyString_Check(key)) {
+ PyErr_SetString(PyExc_TypeError, "Expected string as key");
+ return -1;
+ }
+
+ tkey = PyString_AsNtdb_Data(key);
+
+ if (value == NULL) {
+ ret = ntdb_delete(self->ctx, tkey);
+ } else {
+ if (!PyString_Check(value)) {
+ PyErr_SetString(PyExc_TypeError, "Expected string as value");
+ return -1;
+ }
+
+ tval = PyString_AsNtdb_Data(value);
+
+ ret = ntdb_store(self->ctx, tkey, tval, NTDB_REPLACE);
+ }
+
+ if (ret != NTDB_SUCCESS) {
+ PyErr_SetTDBError(ret);
+ return -1;
+ }
+
+ return ret;
+}
+
+static PyMappingMethods ntdb_object_mapping = {
+ .mp_subscript = (binaryfunc)obj_getitem,
+ .mp_ass_subscript = (objobjargproc)obj_setitem,
+};
+
+static PyTypeObject PyNtdb = {
+ .tp_name = "ntdb.Ntdb",
+ .tp_basicsize = sizeof(PyNtdbObject),
+ .tp_methods = ntdb_object_methods,
+ .tp_getset = ntdb_object_getsetters,
+ .tp_new = py_ntdb_open,
+ .tp_doc = "A NTDB file",
+ .tp_repr = (reprfunc)ntdb_object_repr,
+ .tp_dealloc = (destructor)ntdb_object_dealloc,
+ .tp_as_mapping = &ntdb_object_mapping,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER,
+ .tp_iter = (getiterfunc)ntdb_object_iter,
+};
+
+static PyMethodDef ntdb_methods[] = {
+ { "open", (PyCFunction)py_ntdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, ntdb_flags=NTDB_DEFAULT, flags=O_RDWR, mode=0600)\n"
+ "Open a NTDB file." },
+ { NULL }
+};
+
+void initntdb(void);
+void initntdb(void)
+{
+ PyObject *m;
+
+ if (PyType_Ready(&PyNtdb) < 0)
+ return;
+
+ if (PyType_Ready(&PyNtdbIterator) < 0)
+ return;
+
+ m = Py_InitModule3("ntdb", ntdb_methods, "NTDB is a simple key-value database similar to GDBM that supports multiple writers.");
+ if (m == NULL)
+ return;
+
+ PyModule_AddObject(m, "REPLACE", PyInt_FromLong(NTDB_REPLACE));
+ PyModule_AddObject(m, "INSERT", PyInt_FromLong(NTDB_INSERT));
+ PyModule_AddObject(m, "MODIFY", PyInt_FromLong(NTDB_MODIFY));
+
+ PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(NTDB_DEFAULT));
+ PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(NTDB_INTERNAL));
+ PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(NTDB_NOLOCK));
+ PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(NTDB_NOMMAP));
+ PyModule_AddObject(m, "CONVERT", PyInt_FromLong(NTDB_CONVERT));
+ PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(NTDB_NOSYNC));
+ PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(NTDB_SEQNUM));
+ PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(NTDB_ALLOW_NESTING));
+
+ PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText"));
+
+ PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION));
+
+ Py_INCREF(&PyNtdb);
+ PyModule_AddObject(m, "Ntdb", (PyObject *)&PyNtdb);
+
+ Py_INCREF(&PyNtdbIterator);
+}
--- /dev/null
+ /*
+ Trivial Database 2: human-readable summary code
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/tally/tally.h>
+
+#define SUMMARY_FORMAT \
+ "Size of file/data: %zu/%zu\n" \
+ "Number of records: %zu\n" \
+ "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
+ "Smallest/average/largest data: %zu/%zu/%zu\n%s" \
+ "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
+ "Number of free records: %zu\n" \
+ "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
+ "Number of uncoalesced records: %zu\n" \
+ "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
+ "Toplevel hash used: %u of %u\n" \
+ "Number of hashes: %zu\n" \
+ "Smallest/average/largest hash chains: %zu/%zu/%zu\n%s" \
+ "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
+
+#define BUCKET_SUMMARY_FORMAT_A \
+ "Free bucket %zu: total entries %zu.\n" \
+ "Smallest/average/largest length: %zu/%zu/%zu\n%s"
+#define BUCKET_SUMMARY_FORMAT_B \
+ "Free bucket %zu-%zu: total entries %zu.\n" \
+ "Smallest/average/largest length: %zu/%zu/%zu\n%s"
+#define CAPABILITY_FORMAT \
+ "Capability %llu%s\n"
+
+#define HISTO_WIDTH 70
+#define HISTO_HEIGHT 20
+
+static ntdb_off_t count_hash(struct ntdb_context *ntdb,
+ ntdb_off_t hash_off,
+ ntdb_off_t num)
+{
+ const ntdb_off_t *h;
+ ntdb_off_t i, count = 0;
+
+ h = ntdb_access_read(ntdb, hash_off, sizeof(*h) * num, true);
+ if (NTDB_PTR_IS_ERR(h)) {
+ return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h));
+ }
+ for (i = 0; i < num; i++)
+ count += (h[i] != 0);
+
+ ntdb_access_release(ntdb, h);
+ return count;
+}
+
+static enum NTDB_ERROR summarize(struct ntdb_context *ntdb,
+ struct tally *ftables,
+ struct tally *fr,
+ struct tally *keys,
+ struct tally *data,
+ struct tally *extra,
+ struct tally *uncoal,
+ struct tally *hashes,
+ size_t *num_caps)
+{
+ ntdb_off_t off;
+ ntdb_len_t len;
+ ntdb_len_t unc = 0;
+
+ for (off = sizeof(struct ntdb_header);
+ off < ntdb->file->map_size;
+ off += len) {
+ const union {
+ struct ntdb_used_record u;
+ struct ntdb_free_record f;
+ struct ntdb_recovery_record r;
+ } *p;
+ /* We might not be able to get the whole thing. */
+ p = ntdb_access_read(ntdb, off, sizeof(p->f), true);
+ if (NTDB_PTR_IS_ERR(p)) {
+ return NTDB_PTR_ERR(p);
+ }
+ if (frec_magic(&p->f) != NTDB_FREE_MAGIC) {
+ if (unc > 1) {
+ tally_add(uncoal, unc);
+ unc = 0;
+ }
+ }
+
+ if (p->r.magic == NTDB_RECOVERY_INVALID_MAGIC
+ || p->r.magic == NTDB_RECOVERY_MAGIC) {
+ len = sizeof(p->r) + p->r.max_len;
+ } else if (frec_magic(&p->f) == NTDB_FREE_MAGIC) {
+ len = frec_len(&p->f);
+ tally_add(fr, len);
+ len += sizeof(p->u);
+ unc++;
+ } else if (rec_magic(&p->u) == NTDB_USED_MAGIC) {
+ len = sizeof(p->u)
+ + rec_key_length(&p->u)
+ + rec_data_length(&p->u)
+ + rec_extra_padding(&p->u);
+
+ tally_add(keys, rec_key_length(&p->u));
+ tally_add(data, rec_data_length(&p->u));
+ tally_add(extra, rec_extra_padding(&p->u));
+ } else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) {
+ ntdb_off_t count = count_hash(ntdb,
+ off + sizeof(p->u),
+ 1 << ntdb->hash_bits);
+ if (NTDB_OFF_IS_ERR(count)) {
+ return NTDB_OFF_TO_ERR(count);
+ }
+ tally_add(hashes, count);
+ tally_add(extra, rec_extra_padding(&p->u));
+ len = sizeof(p->u)
+ + rec_data_length(&p->u)
+ + rec_extra_padding(&p->u);
+ } else if (rec_magic(&p->u) == NTDB_FTABLE_MAGIC) {
+ len = sizeof(p->u)
+ + rec_data_length(&p->u)
+ + rec_extra_padding(&p->u);
+ tally_add(ftables, rec_data_length(&p->u));
+ tally_add(extra, rec_extra_padding(&p->u));
+ } else if (rec_magic(&p->u) == NTDB_CHAIN_MAGIC) {
+ len = sizeof(p->u)
+ + rec_data_length(&p->u)
+ + rec_extra_padding(&p->u);
+ tally_add(hashes,
+ rec_data_length(&p->u)/sizeof(ntdb_off_t));
+ tally_add(extra, rec_extra_padding(&p->u));
+ } else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) {
+ len = sizeof(p->u)
+ + rec_data_length(&p->u)
+ + rec_extra_padding(&p->u);
+ (*num_caps)++;
+ } else {
+ len = dead_space(ntdb, off);
+ if (NTDB_OFF_IS_ERR(len)) {
+ return NTDB_OFF_TO_ERR(len);
+ }
+ }
+ ntdb_access_release(ntdb, p);
+ }
+ if (unc)
+ tally_add(uncoal, unc);
+ return NTDB_SUCCESS;
+}
+
+static void add_capabilities(struct ntdb_context *ntdb, char *summary)
+{
+ ntdb_off_t off, next;
+ const struct ntdb_capability *cap;
+ size_t count = 0;
+
+ /* Append to summary. */
+ summary += strlen(summary);
+
+ off = ntdb_read_off(ntdb, offsetof(struct ntdb_header, capabilities));
+ if (NTDB_OFF_IS_ERR(off))
+ return;
+
+ /* Walk capability list. */
+ for (; off; off = next) {
+ cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
+ if (NTDB_PTR_IS_ERR(cap)) {
+ break;
+ }
+ count++;
+ sprintf(summary, CAPABILITY_FORMAT,
+ cap->type & NTDB_CAP_TYPE_MASK,
+ /* Noopen? How did we get here? */
+ (cap->type & NTDB_CAP_NOOPEN) ? " (unopenable)"
+ : ((cap->type & NTDB_CAP_NOWRITE)
+ && (cap->type & NTDB_CAP_NOCHECK)) ? " (uncheckable,read-only)"
+ : (cap->type & NTDB_CAP_NOWRITE) ? " (read-only)"
+ : (cap->type & NTDB_CAP_NOCHECK) ? " (uncheckable)"
+ : "");
+ summary += strlen(summary);
+ next = cap->next;
+ ntdb_access_release(ntdb, cap);
+ }
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
+ enum ntdb_summary_flags flags,
+ char **summary)
+{
+ ntdb_len_t len;
+ size_t num_caps = 0;
+ struct tally *ftables, *freet, *keys, *data, *extra, *uncoal, *hashes;
+ char *freeg, *keysg, *datag, *extrag, *uncoalg, *hashesg;
+ enum NTDB_ERROR ecode;
+
+ freeg = keysg = datag = extrag = uncoalg = hashesg = NULL;
+
+ ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ ecode = ntdb_lock_expand(ntdb, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_allrecord_unlock(ntdb, F_RDLCK);
+ return ecode;
+ }
+
+ /* Start stats off empty. */
+ ftables = tally_new(HISTO_HEIGHT);
+ freet = tally_new(HISTO_HEIGHT);
+ keys = tally_new(HISTO_HEIGHT);
+ data = tally_new(HISTO_HEIGHT);
+ extra = tally_new(HISTO_HEIGHT);
+ uncoal = tally_new(HISTO_HEIGHT);
+ hashes = tally_new(HISTO_HEIGHT);
+ if (!ftables || !freet || !keys || !data || !extra
+ || !uncoal || !hashes) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_summary: failed to allocate"
+ " tally structures");
+ goto unlock;
+ }
+
+ ecode = summarize(ntdb, ftables, freet, keys, data, extra,
+ uncoal, hashes, &num_caps);
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock;
+ }
+
+ if (flags & NTDB_SUMMARY_HISTOGRAMS) {
+ freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
+ keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
+ datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
+ extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
+ uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
+ hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
+ }
+
+ /* 20 is max length of a %llu. */
+ len = strlen(SUMMARY_FORMAT) + 33*20 + 1
+ + (freeg ? strlen(freeg) : 0)
+ + (keysg ? strlen(keysg) : 0)
+ + (datag ? strlen(datag) : 0)
+ + (extrag ? strlen(extrag) : 0)
+ + (uncoalg ? strlen(uncoalg) : 0)
+ + (hashesg ? strlen(hashesg) : 0)
+ + num_caps * (strlen(CAPABILITY_FORMAT) + 20
+ + strlen(" (uncheckable,read-only)"));
+
+ *summary = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data);
+ if (!*summary) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_summary: failed to allocate string");
+ goto unlock;
+ }
+
+ sprintf(*summary, SUMMARY_FORMAT,
+ (size_t)ntdb->file->map_size,
+ tally_total(keys, NULL) + tally_total(data, NULL),
+ tally_num(keys),
+ tally_min(keys), tally_mean(keys), tally_max(keys),
+ keysg ? keysg : "",
+ tally_min(data), tally_mean(data), tally_max(data),
+ datag ? datag : "",
+ tally_min(extra), tally_mean(extra), tally_max(extra),
+ extrag ? extrag : "",
+ tally_num(freet),
+ tally_min(freet), tally_mean(freet), tally_max(freet),
+ freeg ? freeg : "",
+ tally_total(uncoal, NULL),
+ tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
+ uncoalg ? uncoalg : "",
+ (unsigned)count_hash(ntdb, sizeof(struct ntdb_header),
+ 1 << ntdb->hash_bits),
+ 1 << ntdb->hash_bits,
+ tally_num(hashes),
+ tally_min(hashes), tally_mean(hashes), tally_max(hashes),
+ hashesg ? hashesg : "",
+ tally_total(keys, NULL) * 100.0 / ntdb->file->map_size,
+ tally_total(data, NULL) * 100.0 / ntdb->file->map_size,
+ tally_total(extra, NULL) * 100.0 / ntdb->file->map_size,
+ tally_total(freet, NULL) * 100.0 / ntdb->file->map_size,
+ (tally_num(keys) + tally_num(freet) + tally_num(hashes))
+ * sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size,
+ tally_num(ftables) * sizeof(struct ntdb_freetable)
+ * 100.0 / ntdb->file->map_size,
+ (tally_total(hashes, NULL) * sizeof(ntdb_off_t)
+ + (sizeof(ntdb_off_t) << ntdb->hash_bits))
+ * 100.0 / ntdb->file->map_size);
+
+ add_capabilities(ntdb, *summary);
+
+unlock:
+ ntdb->free_fn(freeg, ntdb->alloc_data);
+ ntdb->free_fn(keysg, ntdb->alloc_data);
+ ntdb->free_fn(datag, ntdb->alloc_data);
+ ntdb->free_fn(extrag, ntdb->alloc_data);
+ ntdb->free_fn(uncoalg, ntdb->alloc_data);
+ ntdb->free_fn(hashesg, ntdb->alloc_data);
+ ntdb->free_fn(freet, ntdb->alloc_data);
+ ntdb->free_fn(keys, ntdb->alloc_data);
+ ntdb->free_fn(data, ntdb->alloc_data);
+ ntdb->free_fn(extra, ntdb->alloc_data);
+ ntdb->free_fn(uncoal, ntdb->alloc_data);
+ ntdb->free_fn(ftables, ntdb->alloc_data);
+ ntdb->free_fn(hashes, ntdb->alloc_data);
+
+ ntdb_allrecord_unlock(ntdb, F_RDLCK);
+ ntdb_unlock_expand(ntdb, F_RDLCK);
+ return ecode;
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include <ccan/hash/hash.h>
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+/* We use the same seed which we saw a failure on. */
+static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
+{
+ return hash64_stable((const unsigned char *)key, len,
+ *(uint64_t *)p);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ uint64_t seed = 16014841315512641303ULL;
+ union ntdb_attribute fixed_hattr
+ = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = fixedhash,
+ .data = &seed } };
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+ NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
+
+ fixed_hattr.base.next = &tap_log_attr;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-12-store.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* We seemed to lose some keys.
+ * Insert and check they're in there! */
+ for (j = 0; j < 500; j++) {
+ NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(d, data));
+ free(d.dptr);
+ }
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h" // For NTDB_TOPLEVEL_HASH_BITS
+#include <ccan/hash/hash.h>
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+/* We rig the hash so adjacent-numbered records always clash. */
+static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv)
+{
+ return *((const unsigned int *)key) / 2;
+}
+
+/* We use the same seed which we saw a failure on. */
+static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
+{
+ return hash64_stable((const unsigned char *)key, len,
+ *(uint64_t *)p);
+}
+
+static bool store_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA d, data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < 1000; i++) {
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ ntdb_fetch(ntdb, key, &d);
+ if (!ntdb_deq(d, data))
+ return false;
+ free(d.dptr);
+ }
+ return true;
+}
+
+static void test_val(struct ntdb_context *ntdb, uint64_t val)
+{
+ uint64_t v;
+ NTDB_DATA key = { (unsigned char *)&v, sizeof(v) };
+ NTDB_DATA d, data = { (unsigned char *)&v, sizeof(v) };
+
+ /* Insert an entry, then delete it. */
+ v = val;
+ /* Delete should fail. */
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Insert should succeed. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Delete should succeed. */
+ ok1(ntdb_delete(ntdb, key) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Re-add it, then add collision. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ v = val + 1;
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Can find both? */
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+ v = val;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+
+ /* Delete second one. */
+ v = val + 1;
+ ok1(ntdb_delete(ntdb, key) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Re-add */
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Now, try deleting first one. */
+ v = val;
+ ok1(ntdb_delete(ntdb, key) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Can still find second? */
+ v = val + 1;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+
+ /* Now, this will be ideally placed. */
+ v = val + 2;
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* This will collide with both. */
+ v = val;
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+
+ /* We can still find them all, right? */
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+ v = val + 1;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+ v = val + 2;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+
+ /* And if we delete val + 1, that val + 2 should not move! */
+ v = val + 1;
+ ok1(ntdb_delete(ntdb, key) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ v = val;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+ v = val + 2;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == data.dsize);
+ free(d.dptr);
+
+ /* Delete those two, so we are empty. */
+ ok1(ntdb_delete(ntdb, key) == 0);
+ v = val;
+ ok1(ntdb_delete(ntdb, key) == 0);
+
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ uint64_t seed = 16014841315512641303ULL;
+ union ntdb_attribute clash_hattr
+ = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = clash } };
+ union ntdb_attribute fixed_hattr
+ = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = fixedhash,
+ .data = &seed } };
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ /* These two values gave trouble before. */
+ int vals[] = { 755, 837 };
+
+ clash_hattr.base.next = &tap_log_attr;
+ fixed_hattr.base.next = &tap_log_attr;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0])
+ * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* Check start of hash table. */
+ test_val(ntdb, 0);
+
+ /* Check end of hash table. */
+ test_val(ntdb, -1ULL);
+
+ /* Check mixed bitpattern. */
+ test_val(ntdb, 0x123456789ABCDEF0ULL);
+
+ ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0));
+ ntdb_close(ntdb);
+
+ /* Deleting these entries in the db gave problems. */
+ ntdb = ntdb_open("run-13-delete.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(store_records(ntdb));
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
+ NTDB_DATA key;
+
+ key.dptr = (unsigned char *)&vals[j];
+ key.dsize = sizeof(vals[j]);
+ ok1(ntdb_delete(ntdb, key) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ }
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static bool test_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < 1000; i++) {
+ if (ntdb_exists(ntdb, key))
+ return false;
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ if (!ntdb_exists(ntdb, key))
+ return false;
+ }
+
+ for (i = 0; i < 1000; i++) {
+ if (!ntdb_exists(ntdb, key))
+ return false;
+ if (ntdb_delete(ntdb, key) != 0)
+ return false;
+ if (ntdb_exists(ntdb, key))
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-14-exists.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (ok1(ntdb))
+ ok1(test_records(ntdb));
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static bool add_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < 1000; i++) {
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ }
+ return true;
+}
+
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-16-wipe_all.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (ok1(ntdb)) {
+ NTDB_DATA key;
+ ok1(add_records(ntdb));
+ ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
+ ok1(ntdb_firstkey(ntdb, &key) == NTDB_ERR_NOEXIST);
+ ntdb_close(ntdb);
+ }
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include <ccan/hash/hash.h>
+#include <assert.h>
+
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static const struct ntdb_context *curr_ntdb;
+static const struct ntdb_file *curr_file;
+
+static int owner_null_count,
+ owner_weird_count, alloc_count, free_count, expand_count;
+
+static void *test_alloc(const void *owner, size_t len, void *priv_data)
+{
+ void *ret;
+
+ if (!owner) {
+ owner_null_count++;
+ } else if (owner != curr_ntdb && owner != curr_file) {
+ owner_weird_count++;
+ }
+
+ alloc_count++;
+ ret = malloc(len);
+
+ /* The first time, this is the current ntdb, next is
+ * for the file struct. */
+ if (!owner) {
+ if (!curr_ntdb) {
+ curr_ntdb = ret;
+ } else if (!curr_file) {
+ curr_file = ret;
+ }
+ }
+ assert(priv_data == &owner_weird_count);
+ return ret;
+}
+
+static void *test_expand(void *old, size_t newlen, void *priv_data)
+{
+ expand_count++;
+
+ assert(priv_data == &owner_weird_count);
+ return realloc(old, newlen);
+}
+
+static void test_free(void *old, void *priv_data)
+{
+ assert(priv_data == &owner_weird_count);
+ if (old) {
+ free_count++;
+ }
+ free(old);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ union ntdb_attribute alloc_attr;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+ NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
+
+ alloc_attr.base.next = &tap_log_attr;
+ alloc_attr.base.attr = NTDB_ATTRIBUTE_ALLOCATOR;
+
+ alloc_attr.alloc.alloc = test_alloc;
+ alloc_attr.alloc.expand = test_expand;
+ alloc_attr.alloc.free = test_free;
+ alloc_attr.alloc.priv_data = &owner_weird_count;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 700 * 3 + 4) + 1);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ curr_ntdb = NULL;
+ curr_file = NULL;
+ ntdb = ntdb_open("run-20-alloc-attr.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &alloc_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ for (j = 0; j < 700; j++) {
+ NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(d, data));
+ test_free(d.dptr, &owner_weird_count);
+ }
+ ntdb_close(ntdb);
+
+ ok1(owner_null_count == 2+i*2);
+ ok1(owner_weird_count == 0);
+ ok1(alloc_count == free_count);
+ ok1(expand_count != 0);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data, NTDB_DATA *expected)
+{
+ if (!ntdb_deq(data, *expected))
+ return NTDB_ERR_EINVAL;
+ return NTDB_SUCCESS;
+}
+
+static enum NTDB_ERROR parse_err(NTDB_DATA key, NTDB_DATA data, void *unused)
+{
+ return 100;
+}
+
+static bool test_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < 1000; i++) {
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ }
+
+ for (i = 0; i < 1000; i++) {
+ if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_SUCCESS)
+ return false;
+ }
+
+ if (ntdb_parse_record(ntdb, key, parse, &data) != NTDB_ERR_NOEXIST)
+ return false;
+
+ /* Test error return from parse function. */
+ i = 0;
+ if (ntdb_parse_record(ntdb, key, parse_err, NULL) != 100)
+ return false;
+
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-21-parse_record.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (ok1(ntdb))
+ ok1(test_records(ntdb));
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h" // struct ntdb_context
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ unsigned char *buffer;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data;
+
+ buffer = malloc(1000);
+ for (i = 0; i < 1000; i++)
+ buffer[i] = i;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-55-transaction.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(ntdb_transaction_start(ntdb) == 0);
+ data.dptr = buffer;
+ data.dsize = 1000;
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+ ok1(data.dsize == 1000);
+ ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+ free(data.dptr);
+
+ /* Cancelling a transaction means no store */
+ ntdb_transaction_cancel(ntdb);
+ ok1(ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_ERR_NOEXIST);
+
+ /* Commit the transaction. */
+ ok1(ntdb_transaction_start(ntdb) == 0);
+ data.dptr = buffer;
+ data.dsize = 1000;
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+ ok1(data.dsize == 1000);
+ ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+ free(data.dptr);
+ ok1(ntdb_transaction_commit(ntdb) == 0);
+ ok1(ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+ ok1(data.dsize == 1000);
+ ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+ free(data.dptr);
+
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ free(buffer);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h" // struct ntdb_context
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4), d;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 12 + 1);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-60-transaction.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+
+ ok1(ntdb_transaction_start(ntdb) == 0);
+ /* Do an identical replace. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ ok1(ntdb_transaction_commit(ntdb) == 0);
+
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(data, d));
+ free(d.dptr);
+ ntdb_close(ntdb);
+
+ /* Reopen, fetch. */
+ ntdb = ntdb_open("api-60-transaction.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(data, d));
+ free(d.dptr);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-80-ntdb_fd.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ continue;
+
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(ntdb_fd(ntdb) == -1);
+ else
+ ok1(ntdb_fd(ntdb) > 2);
+ ntdb_close(ntdb);
+ ok1(tap_log_messages == 0);
+ }
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, seq;
+ struct ntdb_context *ntdb;
+ NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-81-seqnum.ntdb",
+ flags[i]|NTDB_SEQNUM|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ continue;
+
+ seq = 0;
+ ok1(ntdb_get_seqnum(ntdb) == seq);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+ /* Fetch doesn't change seqnum */
+ if (ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS))
+ free(d.dptr);
+ ok1(ntdb_get_seqnum(ntdb) == seq);
+ ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+
+ ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+ /* Empty append works */
+ ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+
+ ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+
+ if (!(flags[i] & NTDB_INTERNAL)) {
+ ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+ ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+ ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == ++seq);
+ ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS);
+ ok1(ntdb_get_seqnum(ntdb) == seq);
+
+ ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_get_seqnum(ntdb) == seq + 1);
+ ntdb_transaction_cancel(ntdb);
+ ok1(ntdb_get_seqnum(ntdb) == seq);
+ }
+ ntdb_close(ntdb);
+ ok1(tap_log_messages == 0);
+ }
+ return exit_status();
+}
--- /dev/null
+#include "../private.h" // for ntdb_fcntl_unlock
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include <errno.h>
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
+ void *_err)
+{
+ int *lock_err = _err;
+ struct flock fl;
+ int ret;
+
+ if (*lock_err) {
+ errno = *lock_err;
+ return -1;
+ }
+
+ do {
+ fl.l_type = rw;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = off;
+ fl.l_len = len;
+
+ if (waitflag)
+ ret = fcntl(fd, F_SETLKW, &fl);
+ else
+ ret = fcntl(fd, F_SETLK, &fl);
+ } while (ret != 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int trav_err;
+static int trav(struct ntdb_context *ntdb, NTDB_DATA k, NTDB_DATA d, int *terr)
+{
+ *terr = trav_err;
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ union ntdb_attribute lock_attr;
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+ int lock_err;
+
+ lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+ lock_attr.base.next = &tap_log_attr;
+ lock_attr.flock.lock = mylock;
+ lock_attr.flock.unlock = ntdb_fcntl_unlock;
+ lock_attr.flock.data = &lock_err;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 81);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ NTDB_DATA d;
+
+ /* Nonblocking open; expect no error message. */
+ lock_err = EAGAIN;
+ ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
+ ok(errno == lock_err, "Errno is %u", errno);
+ ok1(!ntdb);
+ ok1(tap_log_messages == 0);
+
+ lock_err = EINTR;
+ ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
+ ok(errno == lock_err, "Errno is %u", errno);
+ ok1(!ntdb);
+ ok1(tap_log_messages == 0);
+
+ /* Forced fail open. */
+ lock_err = ENOMEM;
+ ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
+ ok1(errno == lock_err);
+ ok1(!ntdb);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ lock_err = 0;
+ ntdb = ntdb_open("run-82-lockattr.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
+ if (!ok1(ntdb))
+ continue;
+ ok1(tap_log_messages == 0);
+
+ /* Nonblocking store. */
+ lock_err = EAGAIN;
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ /* Nonblocking fetch. */
+ lock_err = EAGAIN;
+ ok1(!ntdb_exists(ntdb, key));
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(!ntdb_exists(ntdb, key));
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(!ntdb_exists(ntdb, key));
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ lock_err = EAGAIN;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ /* Nonblocking delete. */
+ lock_err = EAGAIN;
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ /* Nonblocking locks. */
+ lock_err = EAGAIN;
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ lock_err = EAGAIN;
+ ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_chainlock_read(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ lock_err = EAGAIN;
+ ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_lockall(ntdb) == NTDB_ERR_LOCK);
+ /* This actually does divide and conquer. */
+ ok1(tap_log_messages > 0);
+ tap_log_messages = 0;
+
+ lock_err = EAGAIN;
+ ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_lockall_read(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages > 0);
+ tap_log_messages = 0;
+
+ /* Nonblocking traverse; go nonblock partway through. */
+ lock_err = 0;
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ /* Need two entries to ensure two lock attempts! */
+ ok1(ntdb_store(ntdb, ntdb_mkdata("key2", 4), data,
+ NTDB_REPLACE) == 0);
+ trav_err = EAGAIN;
+ ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ trav_err = EINTR;
+ lock_err = 0;
+ ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ trav_err = ENOMEM;
+ lock_err = 0;
+ ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ /* Nonblocking transactions. */
+ lock_err = EAGAIN;
+ ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = EINTR;
+ ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+ lock_err = ENOMEM;
+ ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ tap_log_messages = 0;
+
+ /* Nonblocking transaction prepare. */
+ lock_err = 0;
+ ok1(ntdb_transaction_start(ntdb) == 0);
+ ok1(ntdb_delete(ntdb, key) == 0);
+
+ lock_err = EAGAIN;
+ ok1(ntdb_transaction_prepare_commit(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+
+ lock_err = 0;
+ ok1(ntdb_transaction_prepare_commit(ntdb) == 0);
+ ok1(ntdb_transaction_commit(ntdb) == 0);
+
+ /* And the transaction was committed, right? */
+ ok1(!ntdb_exists(ntdb, key));
+ ntdb_close(ntdb);
+ ok1(tap_log_messages == 0);
+ }
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "external-agent.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+#define KEY_STR "key"
+
+static enum NTDB_ERROR clear_if_first(int fd, void *arg)
+{
+/* We hold a lock offset 4 always, so we can tell if anyone is holding it.
+ * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */
+ struct flock fl;
+
+ if (arg != clear_if_first)
+ return NTDB_ERR_CORRUPT;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 4;
+ fl.l_len = 1;
+
+ if (fcntl(fd, F_SETLK, &fl) == 0) {
+ /* We must be first ones to open it! */
+ diag("truncating file!");
+ if (ftruncate(fd, 0) != 0) {
+ return NTDB_ERR_IO;
+ }
+ }
+ fl.l_type = F_RDLCK;
+ if (fcntl(fd, F_SETLKW, &fl) != 0) {
+ return NTDB_ERR_IO;
+ }
+ return NTDB_SUCCESS;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb, *ntdb2;
+ struct agent *agent;
+ union ntdb_attribute cif;
+ NTDB_DATA key = ntdb_mkdata(KEY_STR, strlen(KEY_STR));
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+ cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
+ cif.openhook.base.next = &tap_log_attr;
+ cif.openhook.fn = clear_if_first;
+ cif.openhook.data = clear_if_first;
+
+ agent = prepare_external_agent();
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 16);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ /* Create it */
+ ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
+ ok1(ntdb);
+ ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
+ ntdb_close(ntdb);
+
+ /* Now, open with CIF, should clear it. */
+ ntdb = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR, 0, &cif);
+ ok1(ntdb);
+ ok1(!ntdb_exists(ntdb, key));
+ ok1(ntdb_store(ntdb, key, key, NTDB_REPLACE) == 0);
+
+ /* Agent should not clear it, since it's still open. */
+ ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
+ "run-83-openhook.ntdb") == SUCCESS);
+ ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
+ == SUCCESS);
+ ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
+
+ /* Still exists for us too. */
+ ok1(ntdb_exists(ntdb, key));
+
+ /* Nested open should not erase db. */
+ ntdb2 = ntdb_open("run-83-openhook.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR, 0, &cif);
+ ok1(ntdb_exists(ntdb2, key));
+ ok1(ntdb_exists(ntdb, key));
+ ntdb_close(ntdb2);
+
+ ok1(ntdb_exists(ntdb, key));
+
+ /* Close it, now agent should clear it. */
+ ntdb_close(ntdb);
+
+ ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
+ "run-83-openhook.ntdb") == SUCCESS);
+ ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
+ == FAILED);
+ ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
+
+ ok1(tap_log_messages == 0);
+ }
+
+ free_external_agent(agent);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ union ntdb_attribute *attr;
+ NTDB_DATA key = ntdb_mkdata("key", 3), data;
+
+ ntdb = ntdb_open("run-91-get-stats.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ /* Force an expansion */
+ data.dsize = 65536;
+ data.dptr = calloc(data.dsize, 1);
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ free(data.dptr);
+
+ /* Use malloc so valgrind will catch overruns. */
+ attr = malloc(sizeof *attr);
+ attr->stats.base.attr = NTDB_ATTRIBUTE_STATS;
+ attr->stats.size = sizeof(*attr);
+
+ ok1(ntdb_get_attribute(ntdb, attr) == 0);
+ ok1(attr->stats.size == sizeof(*attr));
+ ok1(attr->stats.allocs > 0);
+ ok1(attr->stats.expands > 0);
+ ok1(attr->stats.locks > 0);
+ free(attr);
+
+ /* Try short one. */
+ attr = malloc(offsetof(struct ntdb_attribute_stats, allocs)
+ + sizeof(attr->stats.allocs));
+ attr->stats.base.attr = NTDB_ATTRIBUTE_STATS;
+ attr->stats.size = offsetof(struct ntdb_attribute_stats, allocs)
+ + sizeof(attr->stats.allocs);
+ ok1(ntdb_get_attribute(ntdb, attr) == 0);
+ ok1(attr->stats.size == sizeof(*attr));
+ ok1(attr->stats.allocs > 0);
+ free(attr);
+ ok1(tap_log_messages == 0);
+
+ ntdb_close(ntdb);
+
+ }
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 48);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ /* RW -> R0 */
+ ntdb = ntdb_open("run-92-get-set-readonly.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY));
+
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
+
+ ntdb_add_flag(ntdb, NTDB_RDONLY);
+ ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
+
+ /* Can't store, append, delete. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 1);
+ ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 2);
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 3);
+
+ /* Can't start a transaction, or any write lock. */
+ ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 4);
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 5);
+ ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 6);
+ ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 7);
+
+ /* Back to RW. */
+ ntdb_remove_flag(ntdb, NTDB_RDONLY);
+ ok1(!(ntdb_get_flags(ntdb) & NTDB_RDONLY));
+
+ ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == NTDB_SUCCESS);
+ ok1(ntdb_append(ntdb, key, data) == NTDB_SUCCESS);
+ ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+
+ ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
+ ok1(ntdb_transaction_commit(ntdb) == NTDB_SUCCESS);
+
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS);
+ ntdb_chainunlock(ntdb, key);
+ ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+ ntdb_unlockall(ntdb);
+ ok1(ntdb_wipe_all(ntdb) == NTDB_SUCCESS);
+ ok1(tap_log_messages == 7);
+
+ ntdb_close(ntdb);
+
+ /* R0 -> RW */
+ ntdb = ntdb_open("run-92-get-set-readonly.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDONLY, 0600, &tap_log_attr);
+ ok1(ntdb);
+ ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
+
+ /* Can't store, append, delete. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 8);
+ ok1(ntdb_append(ntdb, key, data) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 9);
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 10);
+
+ /* Can't start a transaction, or any write lock. */
+ ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 11);
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 12);
+ ok1(ntdb_lockall(ntdb) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 13);
+ ok1(ntdb_wipe_all(ntdb) == NTDB_ERR_RDONLY);
+ ok1(tap_log_messages == 14);
+
+ /* Can't remove NTDB_RDONLY since we opened with O_RDONLY */
+ ntdb_remove_flag(ntdb, NTDB_RDONLY);
+ ok1(tap_log_messages == 15);
+ ok1(ntdb_get_flags(ntdb) & NTDB_RDONLY);
+ ntdb_close(ntdb);
+
+ ok1(tap_log_messages == 15);
+ tap_log_messages = 0;
+ }
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+#define NUM_TESTS 1000
+
+static bool store_all(struct ntdb_context *ntdb)
+{
+ unsigned int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA dbuf = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < NUM_TESTS; i++) {
+ if (ntdb_store(ntdb, key, dbuf, NTDB_INSERT) != NTDB_SUCCESS)
+ return false;
+ }
+ return true;
+}
+
+static int mark_entry(struct ntdb_context *ntdb,
+ NTDB_DATA key, NTDB_DATA data, bool found[])
+{
+ unsigned int num;
+
+ if (key.dsize != sizeof(num))
+ return -1;
+ memcpy(&num, key.dptr, key.dsize);
+ if (num >= NUM_TESTS)
+ return -1;
+ if (found[num])
+ return -1;
+ found[num] = true;
+ return 0;
+}
+
+static bool is_all_set(bool found[], unsigned int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++)
+ if (!found[i])
+ return false;
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ bool found[NUM_TESTS];
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT
+ };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 6 + 1);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-93-repack.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ break;
+
+ ok1(store_all(ntdb));
+
+ ok1(ntdb_repack(ntdb) == NTDB_SUCCESS);
+ memset(found, 0, sizeof(found));
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ ok1(ntdb_traverse(ntdb, mark_entry, found) == NUM_TESTS);
+ ok1(is_all_set(found, NUM_TESTS));
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+/* We use direct access to hand to the parse function: what if db expands? */
+#include "config.h"
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "../private.h" /* To establish size, esp. for NTDB_INTERNAL dbs */
+#include "helpapi-external-agent.h"
+
+static struct ntdb_context *ntdb;
+
+static off_t ntdb_size(void)
+{
+ return ntdb->file->map_size;
+}
+
+struct parse_info {
+ unsigned int depth;
+ NTDB_DATA expected;
+};
+
+static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data,
+ struct parse_info *pinfo)
+{
+ off_t flen;
+ unsigned int i;
+
+ if (!ntdb_deq(data, pinfo->expected))
+ return NTDB_ERR_EINVAL;
+
+ flen = ntdb_size();
+
+ for (i = 0; ntdb_size() == flen; i++) {
+ NTDB_DATA add = ntdb_mkdata(&i, sizeof(i));
+
+ /* This is technically illegal parse(), which is why we
+ * grabbed allrecord lock.*/
+ ntdb_store(ntdb, add, add, NTDB_INSERT);
+ }
+
+ /* Access the record again. */
+ if (!ntdb_deq(data, pinfo->expected))
+ return NTDB_ERR_EINVAL;
+
+ /* Recurse! Woot! */
+ if (pinfo->depth != 0) {
+ enum NTDB_ERROR ecode;
+
+ pinfo->depth--;
+ ecode = ntdb_parse_record(ntdb, key, parse, pinfo);
+ if (ecode) {
+ return ecode;
+ }
+ }
+
+ /* Access the record one more time. */
+ if (!ntdb_deq(data, pinfo->expected))
+ return NTDB_ERR_EINVAL;
+
+ return NTDB_SUCCESS;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ struct parse_info pinfo;
+ NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5);
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-94-expand-during-parse.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
+ ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+ pinfo.expected = data;
+ pinfo.depth = 3;
+ ok1(ntdb_parse_record(ntdb, key, parse, &pinfo) == NTDB_SUCCESS);
+ ntdb_unlockall(ntdb);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+/* Make sure write operations fail during ntdb_parse(). */
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static struct ntdb_context *ntdb;
+
+/* We could get either of these. */
+static bool xfail(enum NTDB_ERROR ecode)
+{
+ return ecode == NTDB_ERR_RDONLY || ecode == NTDB_ERR_LOCK;
+}
+
+static enum NTDB_ERROR parse(NTDB_DATA key, NTDB_DATA data,
+ NTDB_DATA *expected)
+{
+ NTDB_DATA add = ntdb_mkdata("another", strlen("another"));
+
+ if (!ntdb_deq(data, *expected)) {
+ return NTDB_ERR_EINVAL;
+ }
+
+ /* These should all fail.*/
+ if (!xfail(ntdb_store(ntdb, add, add, NTDB_INSERT))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_append(ntdb, key, add))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_delete(ntdb, key))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_transaction_start(ntdb))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_chainlock(ntdb, key))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_lockall(ntdb))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_wipe_all(ntdb))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ if (!xfail(ntdb_repack(ntdb))) {
+ return NTDB_ERR_EINVAL;
+ }
+ tap_log_messages--;
+
+ /* Access the record one more time. */
+ if (!ntdb_deq(data, *expected)) {
+ return NTDB_ERR_EINVAL;
+ }
+
+ return NTDB_SUCCESS;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP, NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("hello", 5), data = ntdb_mkdata("world", 5);
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-95-read-only-during-parse.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == NTDB_SUCCESS);
+ ok1(ntdb_parse_record(ntdb, key, parse, &data) == NTDB_SUCCESS);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h" // for ntdb_context
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(87);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-add-remove-flags.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(ntdb_get_flags(ntdb) == ntdb->flags);
+ tap_log_messages = 0;
+ ntdb_add_flag(ntdb, NTDB_NOLOCK);
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(tap_log_messages == 1);
+ else {
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_get_flags(ntdb) & NTDB_NOLOCK);
+ }
+
+ tap_log_messages = 0;
+ ntdb_add_flag(ntdb, NTDB_NOMMAP);
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(tap_log_messages == 1);
+ else {
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_get_flags(ntdb) & NTDB_NOMMAP);
+ ok1(ntdb->file->map_ptr == NULL);
+ }
+
+ tap_log_messages = 0;
+ ntdb_add_flag(ntdb, NTDB_NOSYNC);
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(tap_log_messages == 1);
+ else {
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_get_flags(ntdb) & NTDB_NOSYNC);
+ }
+
+ ok1(ntdb_get_flags(ntdb) == ntdb->flags);
+
+ tap_log_messages = 0;
+ ntdb_remove_flag(ntdb, NTDB_NOLOCK);
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(tap_log_messages == 1);
+ else {
+ ok1(tap_log_messages == 0);
+ ok1(!(ntdb_get_flags(ntdb) & NTDB_NOLOCK));
+ }
+
+ tap_log_messages = 0;
+ ntdb_remove_flag(ntdb, NTDB_NOMMAP);
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(tap_log_messages == 1);
+ else {
+ ok1(tap_log_messages == 0);
+ ok1(!(ntdb_get_flags(ntdb) & NTDB_NOMMAP));
+ ok1(ntdb->file->map_ptr != NULL);
+ }
+
+ tap_log_messages = 0;
+ ntdb_remove_flag(ntdb, NTDB_NOSYNC);
+ if (flags[i] & NTDB_INTERNAL)
+ ok1(tap_log_messages == 1);
+ else {
+ ok1(tap_log_messages == 0);
+ ok1(!(ntdb_get_flags(ntdb) & NTDB_NOSYNC));
+ }
+
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+#define NUM_RECORDS 1000
+
+static bool store_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < NUM_RECORDS; i++)
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ return true;
+}
+
+static enum NTDB_ERROR check(NTDB_DATA key,
+ NTDB_DATA data,
+ bool *array)
+{
+ int val;
+
+ if (key.dsize != sizeof(val)) {
+ diag("Wrong key size: %zu\n", key.dsize);
+ return NTDB_ERR_CORRUPT;
+ }
+
+ if (key.dsize != data.dsize
+ || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) {
+ diag("Key and data differ\n");
+ return NTDB_ERR_CORRUPT;
+ }
+
+ memcpy(&val, key.dptr, sizeof(val));
+ if (val >= NUM_RECORDS || val < 0) {
+ diag("check value %i\n", val);
+ return NTDB_ERR_CORRUPT;
+ }
+
+ if (array[val]) {
+ diag("Value %i already seen\n", val);
+ return NTDB_ERR_CORRUPT;
+ }
+
+ array[val] = true;
+ return NTDB_SUCCESS;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ return 0;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ bool array[NUM_RECORDS];
+
+ ntdb = ntdb_open("run-check-callback.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(store_records(ntdb));
+ for (j = 0; j < NUM_RECORDS; j++)
+ array[j] = false;
+ ok1(ntdb_check(ntdb, check, array) == NTDB_SUCCESS);
+ for (j = 0; j < NUM_RECORDS; j++)
+ if (!array[j])
+ break;
+ ok1(j == NUM_RECORDS);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+#define NUM_RECORDS 1000
+
+static bool store_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < NUM_RECORDS; i++)
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ return true;
+}
+
+struct trav_data {
+ unsigned int records[NUM_RECORDS];
+ unsigned int calls;
+};
+
+static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p)
+{
+ struct trav_data *td = p;
+ int val;
+
+ memcpy(&val, dbuf.dptr, dbuf.dsize);
+ td->records[td->calls++] = val;
+ return 0;
+}
+
+/* Since ntdb_nextkey frees dptr, we need to clone it. */
+static NTDB_DATA dup_key(NTDB_DATA key)
+{
+ void *p = malloc(key.dsize);
+ memcpy(p, key.dptr, key.dsize);
+ key.dptr = p;
+ return key;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ int num;
+ struct trav_data td;
+ NTDB_DATA k;
+ struct ntdb_context *ntdb;
+ union ntdb_attribute seed_attr;
+ enum NTDB_ERROR ecode;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed_attr.base.next = &tap_log_attr;
+ seed_attr.seed.seed = 6334326220117065685ULL;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0])
+ * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("api-firstkey-nextkey.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600,
+ &seed_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(ntdb_firstkey(ntdb, &k) == NTDB_ERR_NOEXIST);
+
+ /* One entry... */
+ k.dptr = (unsigned char *)#
+ k.dsize = sizeof(num);
+ num = 0;
+ ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0);
+ ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS);
+ ok1(k.dsize == sizeof(num));
+ ok1(memcmp(k.dptr, &num, sizeof(num)) == 0);
+ ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST);
+
+ /* Two entries. */
+ k.dptr = (unsigned char *)#
+ k.dsize = sizeof(num);
+ num = 1;
+ ok1(ntdb_store(ntdb, k, k, NTDB_INSERT) == 0);
+ ok1(ntdb_firstkey(ntdb, &k) == NTDB_SUCCESS);
+ ok1(k.dsize == sizeof(num));
+ memcpy(&num, k.dptr, sizeof(num));
+ ok1(num == 0 || num == 1);
+ ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS);
+ ok1(k.dsize == sizeof(j));
+ memcpy(&j, k.dptr, sizeof(j));
+ ok1(j == 0 || j == 1);
+ ok1(j != num);
+ ok1(ntdb_nextkey(ntdb, &k) == NTDB_ERR_NOEXIST);
+
+ /* Clean up. */
+ k.dptr = (unsigned char *)#
+ k.dsize = sizeof(num);
+ num = 0;
+ ok1(ntdb_delete(ntdb, k) == 0);
+ num = 1;
+ ok1(ntdb_delete(ntdb, k) == 0);
+
+ /* Now lots of records. */
+ ok1(store_records(ntdb));
+ td.calls = 0;
+
+ num = ntdb_traverse(ntdb, trav, &td);
+ ok1(num == NUM_RECORDS);
+ ok1(td.calls == NUM_RECORDS);
+
+ /* Simple loop should match ntdb_traverse */
+ for (j = 0, ecode = ntdb_firstkey(ntdb, &k); j < td.calls; j++) {
+ int val;
+
+ ok1(ecode == NTDB_SUCCESS);
+ ok1(k.dsize == sizeof(val));
+ memcpy(&val, k.dptr, k.dsize);
+ ok1(td.records[j] == val);
+ ecode = ntdb_nextkey(ntdb, &k);
+ }
+
+ /* But arbitrary orderings should work too. */
+ for (j = td.calls-1; j > 0; j--) {
+ k.dptr = (unsigned char *)&td.records[j-1];
+ k.dsize = sizeof(td.records[j-1]);
+ k = dup_key(k);
+ ok1(ntdb_nextkey(ntdb, &k) == NTDB_SUCCESS);
+ ok1(k.dsize == sizeof(td.records[j]));
+ ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0);
+ free(k.dptr);
+ }
+
+ /* Even delete should work. */
+ for (j = 0, ecode = ntdb_firstkey(ntdb, &k);
+ ecode != NTDB_ERR_NOEXIST;
+ j++) {
+ ok1(ecode == NTDB_SUCCESS);
+ ok1(k.dsize == 4);
+ ok1(ntdb_delete(ntdb, k) == 0);
+ ecode = ntdb_nextkey(ntdb, &k);
+ }
+
+ diag("delete using first/nextkey gave %u of %u records",
+ j, NUM_RECORDS);
+ ok1(j == NUM_RECORDS);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+/* Test forking while holding lock.
+ *
+ * There are only five ways to do this currently:
+ * (1) grab a ntdb_chainlock, then fork.
+ * (2) grab a ntdb_lockall, then fork.
+ * (3) grab a ntdb_lockall_read, then fork.
+ * (4) start a transaction, then fork.
+ * (5) fork from inside a ntdb_parse() callback.
+ *
+ * Note that we don't hold a lock across ntdb_traverse callbacks, so
+ * that doesn't matter.
+ */
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+static bool am_child = false;
+
+static enum NTDB_ERROR fork_in_parse(NTDB_DATA key, NTDB_DATA data,
+ struct ntdb_context *ntdb)
+{
+ int status;
+
+ if (fork() == 0) {
+ am_child = true;
+
+ /* We expect this to fail. */
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
+ exit(1);
+
+ if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
+ exit(1);
+
+ if (tap_log_messages != 2)
+ exit(2);
+
+ return NTDB_SUCCESS;
+ }
+ wait(&status);
+ ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ return NTDB_SUCCESS;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ int status;
+
+ tap_log_messages = 0;
+
+ ntdb = ntdb_open("run-fork-test.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ continue;
+
+ /* Put a record in here. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_SUCCESS);
+
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_SUCCESS);
+ if (fork() == 0) {
+ /* We expect this to fail. */
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (tap_log_messages != 2)
+ return 2;
+
+ /* Child can do this without any complaints. */
+ ntdb_chainunlock(ntdb, key);
+ if (tap_log_messages != 2)
+ return 3;
+ ntdb_close(ntdb);
+ if (tap_log_messages != 2)
+ return 4;
+ return 0;
+ }
+ wait(&status);
+ ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ ntdb_chainunlock(ntdb, key);
+
+ ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+ if (fork() == 0) {
+ /* We expect this to fail. */
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (tap_log_messages != 2)
+ return 2;
+
+ /* Child can do this without any complaints. */
+ ntdb_unlockall(ntdb);
+ if (tap_log_messages != 2)
+ return 3;
+ ntdb_close(ntdb);
+ if (tap_log_messages != 2)
+ return 4;
+ return 0;
+ }
+ wait(&status);
+ ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ ntdb_unlockall(ntdb);
+
+ ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
+ if (fork() == 0) {
+ /* We expect this to fail. */
+ /* This would always fail anyway... */
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (tap_log_messages != 2)
+ return 2;
+
+ /* Child can do this without any complaints. */
+ ntdb_unlockall_read(ntdb);
+ if (tap_log_messages != 2)
+ return 3;
+ ntdb_close(ntdb);
+ if (tap_log_messages != 2)
+ return 4;
+ return 0;
+ }
+ wait(&status);
+ ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ ntdb_unlockall_read(ntdb);
+
+ ok1(ntdb_transaction_start(ntdb) == NTDB_SUCCESS);
+ /* If transactions is empty, noop "commit" succeeds. */
+ ok1(ntdb_delete(ntdb, key) == NTDB_SUCCESS);
+ if (fork() == 0) {
+ int last_log_messages;
+
+ /* We expect this to fail. */
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (ntdb_fetch(ntdb, key, &data) != NTDB_ERR_LOCK)
+ return 1;
+
+ if (tap_log_messages != 2)
+ return 2;
+
+ if (ntdb_transaction_prepare_commit(ntdb)
+ != NTDB_ERR_LOCK)
+ return 3;
+ if (tap_log_messages == 2)
+ return 4;
+
+ last_log_messages = tap_log_messages;
+ /* Child can do this without any complaints. */
+ ntdb_transaction_cancel(ntdb);
+ if (tap_log_messages != last_log_messages)
+ return 4;
+ ntdb_close(ntdb);
+ if (tap_log_messages != last_log_messages)
+ return 4;
+ return 0;
+ }
+ wait(&status);
+ ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ ntdb_transaction_cancel(ntdb);
+
+ ok1(ntdb_parse_record(ntdb, key, fork_in_parse, ntdb)
+ == NTDB_SUCCESS);
+ ntdb_close(ntdb);
+ if (am_child) {
+ /* Child can return from parse without complaints. */
+ if (tap_log_messages != 2)
+ exit(3);
+ exit(0);
+ }
+ ok1(tap_log_messages == 0);
+ }
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include <limits.h>
+#include "logging.h"
+#include "external-agent.h"
+#include "helpapi-external-agent.h"
+
+#undef alarm
+#define alarm fast_alarm
+
+/* Speed things up by doing things in milliseconds. */
+static unsigned int fast_alarm(unsigned int milli_seconds)
+{
+ struct itimerval it;
+
+ it.it_interval.tv_sec = it.it_interval.tv_usec = 0;
+ it.it_value.tv_sec = milli_seconds / 1000;
+ it.it_value.tv_usec = milli_seconds * 1000;
+ setitimer(ITIMER_REAL, &it, NULL);
+ return 0;
+}
+
+#define CatchSignal(sig, handler) signal((sig), (handler))
+
+static void do_nothing(int signum)
+{
+}
+
+/* This example code is taken from SAMBA, so try not to change it. */
+static struct flock flock_struct;
+
+/* Return a value which is none of v1, v2 or v3. */
+static inline short int invalid_value(short int v1, short int v2, short int v3)
+{
+ short int try = (v1+v2+v3)^((v1+v2+v3) << 16);
+ while (try == v1 || try == v2 || try == v3)
+ try++;
+ return try;
+}
+
+/* We invalidate in as many ways as we can, so the OS rejects it */
+static void invalidate_flock_struct(int signum)
+{
+ flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK);
+ flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END);
+ flock_struct.l_start = -1;
+ /* A large negative. */
+ flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1);
+}
+
+static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
+ void *_timeout)
+{
+ int ret, saved_errno = errno;
+ unsigned int timeout = *(unsigned int *)_timeout;
+
+ flock_struct.l_type = rw;
+ flock_struct.l_whence = SEEK_SET;
+ flock_struct.l_start = off;
+ flock_struct.l_len = len;
+
+ CatchSignal(SIGALRM, invalidate_flock_struct);
+ alarm(timeout);
+
+ for (;;) {
+ if (waitflag)
+ ret = fcntl(fd, F_SETLKW, &flock_struct);
+ else
+ ret = fcntl(fd, F_SETLK, &flock_struct);
+
+ if (ret == 0)
+ break;
+
+ /* Not signalled? Something else went wrong. */
+ if (flock_struct.l_len == len) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ saved_errno = errno;
+ break;
+ } else {
+ saved_errno = EINTR;
+ break;
+ }
+ }
+
+ alarm(0);
+ errno = saved_errno;
+ return ret;
+}
+
+static int ntdb_chainlock_with_timeout_internal(struct ntdb_context *ntdb,
+ NTDB_DATA key,
+ unsigned int timeout,
+ int rw_type)
+{
+ union ntdb_attribute locking;
+ enum NTDB_ERROR ecode;
+
+ if (timeout) {
+ locking.base.attr = NTDB_ATTRIBUTE_FLOCK;
+ ecode = ntdb_get_attribute(ntdb, &locking);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+
+ /* Replace locking function with our own. */
+ locking.flock.data = &timeout;
+ locking.flock.lock = timeout_lock;
+
+ ecode = ntdb_set_attribute(ntdb, &locking);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+ }
+ if (rw_type == F_RDLCK)
+ ecode = ntdb_chainlock_read(ntdb, key);
+ else
+ ecode = ntdb_chainlock(ntdb, key);
+
+ if (timeout) {
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+ }
+ return ecode;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ NTDB_DATA key = ntdb_mkdata("hello", 5);
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ struct agent *agent;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 15);
+
+ agent = prepare_external_agent();
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ enum NTDB_ERROR ecode;
+ ntdb = ntdb_open("run-locktimeout.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ break;
+
+ /* Simple cases: should succeed. */
+ ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
+ F_RDLCK);
+ ok1(ecode == NTDB_SUCCESS);
+ ok1(tap_log_messages == 0);
+
+ ntdb_chainunlock_read(ntdb, key);
+ ok1(tap_log_messages == 0);
+
+ ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
+ F_WRLCK);
+ ok1(ecode == NTDB_SUCCESS);
+ ok1(tap_log_messages == 0);
+
+ ntdb_chainunlock(ntdb, key);
+ ok1(tap_log_messages == 0);
+
+ /* OK, get agent to start transaction, then we should time out. */
+ ok1(external_agent_operation(agent, OPEN, "run-locktimeout.ntdb")
+ == SUCCESS);
+ ok1(external_agent_operation(agent, TRANSACTION_START, "")
+ == SUCCESS);
+ ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
+ F_WRLCK);
+ ok1(ecode == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+
+ /* Even if we get a different signal, should be fine. */
+ CatchSignal(SIGUSR1, do_nothing);
+ external_agent_operation(agent, SEND_SIGNAL, "");
+ ecode = ntdb_chainlock_with_timeout_internal(ntdb, key, 20,
+ F_WRLCK);
+ ok1(ecode == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 0);
+
+ ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "")
+ == SUCCESS);
+ ok1(external_agent_operation(agent, CLOSE, "")
+ == SUCCESS);
+ ntdb_close(ntdb);
+ }
+ free_external_agent(agent);
+ return exit_status();
+}
--- /dev/null
+/* Another test revealed that we lost an entry. This reproduces it. */
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include <ccan/hash/hash.h>
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+#define NUM_RECORDS 1189
+
+/* We use the same seed which we saw this failure on. */
+static uint32_t failhash(const void *key, size_t len, uint32_t seed, void *p)
+{
+ return hash64_stable((const unsigned char *)key, len,
+ 699537674708983027ULL);
+}
+
+int main(int argc, char *argv[])
+{
+ int i;
+ struct ntdb_context *ntdb;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+ union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = failhash } };
+
+ hattr.base.next = &tap_log_attr;
+ plan_tests(1 + NUM_RECORDS + 2);
+
+ ntdb = ntdb_open("run-missing-entries.ntdb", NTDB_INTERNAL,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+ if (ok1(ntdb)) {
+ for (i = 0; i < NUM_RECORDS; i++) {
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ }
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include "logging.h"
+#include "../private.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb, *ntdb2;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 30);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-open-multiple-times.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ntdb2 = ntdb_open("run-open-multiple-times.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT, 0600, &tap_log_attr);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_check(ntdb2, NULL, NULL) == 0);
+ ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr);
+
+ /* Store in one, fetch in the other. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(d, data));
+ free(d.dptr);
+
+ /* Vice versa, with delete. */
+ ok1(ntdb_delete(ntdb2, key) == 0);
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST);
+
+ /* OK, now close first one, check second still good. */
+ ok1(ntdb_close(ntdb) == 0);
+
+ ok1((flags[i] & NTDB_NOMMAP) || ntdb2->file->map_ptr);
+ ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == 0);
+ ok1(ntdb_fetch(ntdb2, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(d, data));
+ free(d.dptr);
+
+ /* Reopen */
+ ntdb = ntdb_open("run-open-multiple-times.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT, 0600, &tap_log_attr);
+ ok1(ntdb);
+
+ ok1(ntdb_transaction_start(ntdb2) == 0);
+
+ /* Anything in the other one should fail. */
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 1);
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 2);
+ ok1(ntdb_transaction_start(ntdb) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 3);
+ ok1(ntdb_chainlock(ntdb, key) == NTDB_ERR_LOCK);
+ ok1(tap_log_messages == 4);
+
+ /* Transaciton should work as normal. */
+ ok1(ntdb_store(ntdb2, key, data, NTDB_REPLACE) == NTDB_SUCCESS);
+
+ /* Now... try closing with locks held. */
+ ok1(ntdb_close(ntdb2) == 0);
+
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(ntdb_deq(d, data));
+ free(d.dptr);
+ ok1(ntdb_close(ntdb) == 0);
+ ok1(tap_log_messages == 4);
+ tap_log_messages = 0;
+ }
+
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+#define MAX_SIZE 10000
+#define SIZE_STEP 131
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data;
+
+ data.dptr = malloc(MAX_SIZE);
+ memset(data.dptr, 0x24, MAX_SIZE);
+
+ plan_tests(sizeof(flags) / sizeof(flags[0])
+ * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-record-expand.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ data.dsize = 0;
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ for (data.dsize = 0;
+ data.dsize < MAX_SIZE;
+ data.dsize += SIZE_STEP) {
+ memset(data.dptr, data.dsize, data.dsize);
+ ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ }
+ ntdb_close(ntdb);
+ }
+ ok1(tap_log_messages == 0);
+ free(data.dptr);
+
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-simple-delete.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (ntdb) {
+ /* Delete should fail. */
+ ok1(ntdb_delete(ntdb, key) == NTDB_ERR_NOEXIST);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Insert should succeed. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Delete should now work. */
+ ok1(ntdb_delete(ntdb, key) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+ }
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "config.h"
+#include "../ntdb.h"
+#include "../private.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helpapi-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+ NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
+ char *summary;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-summary.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* Put some stuff in there. */
+ for (j = 0; j < 500; j++) {
+ /* Make sure padding varies to we get some graphs! */
+ data.dsize = j % (sizeof(j) + 1);
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ fail("Storing in ntdb");
+ }
+
+ for (j = 0;
+ j <= NTDB_SUMMARY_HISTOGRAMS;
+ j += NTDB_SUMMARY_HISTOGRAMS) {
+ ok1(ntdb_summary(ntdb, j, &summary) == NTDB_SUCCESS);
+ ok1(strstr(summary, "Number of records: 500\n"));
+ ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
+ ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
+ if (j == NTDB_SUMMARY_HISTOGRAMS) {
+ ok1(strstr(summary, "|")
+ && strstr(summary, "*"));
+ } else {
+ ok1(!strstr(summary, "|")
+ && !strstr(summary, "*"));
+ }
+ free(summary);
+ }
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "external-agent.h"
+#include "logging.h"
+#include "lock-tracking.h"
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <ccan/err/err.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include "tap-interface.h"
+#include <stdio.h>
+#include <stdarg.h>
+
+static struct ntdb_context *ntdb;
+
+void (*external_agent_free)(void *) = free;
+
+static enum NTDB_ERROR clear_if_first(int fd, void *arg)
+{
+/* We hold a lock offset 4 always, so we can tell if anyone is holding it.
+ * (This is compatible with tdb's TDB_CLEAR_IF_FIRST flag). */
+ struct flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 4;
+ fl.l_len = 1;
+
+ if (fcntl(fd, F_SETLK, &fl) == 0) {
+ /* We must be first ones to open it! */
+ diag("agent truncating file!");
+ if (ftruncate(fd, 0) != 0) {
+ return NTDB_ERR_IO;
+ }
+ }
+ fl.l_type = F_RDLCK;
+ if (fcntl(fd, F_SETLKW, &fl) != 0) {
+ return NTDB_ERR_IO;
+ }
+ return NTDB_SUCCESS;
+}
+
+static enum agent_return do_operation(enum operation op, const char *name)
+{
+ NTDB_DATA k, d;
+ enum agent_return ret;
+ NTDB_DATA data;
+ enum NTDB_ERROR ecode;
+ union ntdb_attribute cif;
+ const char *eq;
+
+ if (op != OPEN && op != OPEN_WITH_HOOK && !ntdb) {
+ diag("external: No ntdb open!");
+ return OTHER_FAILURE;
+ }
+
+ diag("external: %s", operation_name(op));
+
+ eq = strchr(name, '=');
+ if (eq) {
+ k = ntdb_mkdata(name, eq - name);
+ d = ntdb_mkdata(eq + 1, strlen(eq+1));
+ } else {
+ k = ntdb_mkdata(name, strlen(name));
+ d.dsize = 0;
+ d.dptr = NULL;
+ }
+
+ locking_would_block = 0;
+ switch (op) {
+ case OPEN:
+ if (ntdb) {
+ diag("Already have ntdb %s open", ntdb_name(ntdb));
+ return OTHER_FAILURE;
+ }
+ ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &tap_log_attr);
+ if (!ntdb) {
+ if (!locking_would_block)
+ diag("Opening ntdb gave %s", strerror(errno));
+ forget_locking();
+ ret = OTHER_FAILURE;
+ } else
+ ret = SUCCESS;
+ break;
+ case OPEN_WITH_HOOK:
+ if (ntdb) {
+ diag("Already have ntdb %s open", ntdb_name(ntdb));
+ return OTHER_FAILURE;
+ }
+ cif.openhook.base.attr = NTDB_ATTRIBUTE_OPENHOOK;
+ cif.openhook.base.next = &tap_log_attr;
+ cif.openhook.fn = clear_if_first;
+ ntdb = ntdb_open(name, MAYBE_NOSYNC, O_RDWR, 0, &cif);
+ if (!ntdb) {
+ if (!locking_would_block)
+ diag("Opening ntdb gave %s", strerror(errno));
+ forget_locking();
+ ret = OTHER_FAILURE;
+ } else
+ ret = SUCCESS;
+ break;
+ case FETCH:
+ ecode = ntdb_fetch(ntdb, k, &data);
+ if (ecode == NTDB_ERR_NOEXIST) {
+ ret = FAILED;
+ } else if (ecode < 0) {
+ ret = OTHER_FAILURE;
+ } else if (!ntdb_deq(data, d)) {
+ ret = OTHER_FAILURE;
+ external_agent_free(data.dptr);
+ } else {
+ ret = SUCCESS;
+ external_agent_free(data.dptr);
+ }
+ break;
+ case STORE:
+ ret = ntdb_store(ntdb, k, d, 0) == 0 ? SUCCESS : OTHER_FAILURE;
+ break;
+ case TRANSACTION_START:
+ ret = ntdb_transaction_start(ntdb) == 0 ? SUCCESS : OTHER_FAILURE;
+ break;
+ case TRANSACTION_COMMIT:
+ ret = ntdb_transaction_commit(ntdb)==0 ? SUCCESS : OTHER_FAILURE;
+ break;
+ case NEEDS_RECOVERY:
+ ret = external_agent_needs_rec(ntdb);
+ break;
+ case CHECK:
+ ret = ntdb_check(ntdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE;
+ break;
+ case CLOSE:
+ ret = ntdb_close(ntdb) == 0 ? SUCCESS : OTHER_FAILURE;
+ ntdb = NULL;
+ break;
+ case SEND_SIGNAL:
+ /* We do this async */
+ ret = SUCCESS;
+ break;
+ default:
+ ret = OTHER_FAILURE;
+ }
+
+ if (locking_would_block)
+ ret = WOULD_HAVE_BLOCKED;
+
+ return ret;
+}
+
+struct agent {
+ int cmdfd, responsefd;
+};
+
+/* Do this before doing any ntdb stuff. Return handle, or NULL. */
+struct agent *prepare_external_agent(void)
+{
+ int pid, ret;
+ int command[2], response[2];
+ char name[1+PATH_MAX];
+
+ if (pipe(command) != 0 || pipe(response) != 0)
+ return NULL;
+
+ pid = fork();
+ if (pid < 0)
+ return NULL;
+
+ if (pid != 0) {
+ struct agent *agent = malloc(sizeof(*agent));
+
+ close(command[0]);
+ close(response[1]);
+ agent->cmdfd = command[1];
+ agent->responsefd = response[0];
+ return agent;
+ }
+
+ close(command[1]);
+ close(response[0]);
+
+ /* We want to fail, not block. */
+ nonblocking_locks = true;
+ log_prefix = "external: ";
+ while ((ret = read(command[0], name, sizeof(name))) > 0) {
+ enum agent_return result;
+
+ result = do_operation(name[0], name+1);
+ if (write(response[1], &result, sizeof(result))
+ != sizeof(result))
+ err(1, "Writing response");
+ if (name[0] == SEND_SIGNAL) {
+ struct timeval ten_ms;
+ ten_ms.tv_sec = 0;
+ ten_ms.tv_usec = 10000;
+ select(0, NULL, NULL, NULL, &ten_ms);
+ kill(getppid(), SIGUSR1);
+ }
+ }
+ exit(0);
+}
+
+/* Ask the external agent to try to do an operation. */
+enum agent_return external_agent_operation(struct agent *agent,
+ enum operation op,
+ const char *name)
+{
+ enum agent_return res;
+ unsigned int len;
+ char *string;
+
+ if (!name)
+ name = "";
+ len = 1 + strlen(name) + 1;
+ string = malloc(len);
+
+ string[0] = op;
+ strcpy(string+1, name);
+
+ if (write(agent->cmdfd, string, len) != len
+ || read(agent->responsefd, &res, sizeof(res)) != sizeof(res))
+ res = AGENT_DIED;
+
+ free(string);
+ return res;
+}
+
+const char *agent_return_name(enum agent_return ret)
+{
+ return ret == SUCCESS ? "SUCCESS"
+ : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED"
+ : ret == AGENT_DIED ? "AGENT_DIED"
+ : ret == FAILED ? "FAILED"
+ : ret == OTHER_FAILURE ? "OTHER_FAILURE"
+ : "**INVALID**";
+}
+
+const char *operation_name(enum operation op)
+{
+ switch (op) {
+ case OPEN: return "OPEN";
+ case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK";
+ case FETCH: return "FETCH";
+ case STORE: return "STORE";
+ case CHECK: return "CHECK";
+ case TRANSACTION_START: return "TRANSACTION_START";
+ case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT";
+ case NEEDS_RECOVERY: return "NEEDS_RECOVERY";
+ case SEND_SIGNAL: return "SEND_SIGNAL";
+ case CLOSE: return "CLOSE";
+ }
+ return "**INVALID**";
+}
+
+void free_external_agent(struct agent *agent)
+{
+ close(agent->cmdfd);
+ close(agent->responsefd);
+ free(agent);
+}
--- /dev/null
+#ifndef NTDB_TEST_EXTERNAL_AGENT_H
+#define NTDB_TEST_EXTERNAL_AGENT_H
+
+/* For locking tests, we need a different process to try things at
+ * various times. */
+enum operation {
+ OPEN,
+ OPEN_WITH_HOOK,
+ FETCH,
+ STORE,
+ TRANSACTION_START,
+ TRANSACTION_COMMIT,
+ NEEDS_RECOVERY,
+ CHECK,
+ SEND_SIGNAL,
+ CLOSE,
+};
+
+/* Do this before doing any ntdb stuff. Return handle, or -1. */
+struct agent *prepare_external_agent(void);
+
+enum agent_return {
+ SUCCESS,
+ WOULD_HAVE_BLOCKED,
+ AGENT_DIED,
+ FAILED, /* For fetch, or NEEDS_RECOVERY */
+ OTHER_FAILURE,
+};
+
+/* Ask the external agent to try to do an operation.
+ * name == ntdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
+ * <key>=<data> for FETCH/STORE.
+ */
+enum agent_return external_agent_operation(struct agent *handle,
+ enum operation op,
+ const char *name);
+
+/* Hook into free() on ntdb_data in external agent. */
+extern void (*external_agent_free)(void *);
+
+/* Mapping enum -> string. */
+const char *agent_return_name(enum agent_return ret);
+const char *operation_name(enum operation op);
+
+void free_external_agent(struct agent *agent);
+
+/* Internal use: */
+struct ntdb_context;
+enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb);
+
+#endif /* NTDB_TEST_EXTERNAL_AGENT_H */
--- /dev/null
+#include "failtest_helper.h"
+#include "logging.h"
+#include <string.h>
+#include "tap-interface.h"
+
+bool failtest_suppress = false;
+
+bool failmatch(const struct failtest_call *call,
+ const char *file, int line, enum failtest_call_type type)
+{
+ return call->type == type
+ && call->line == line
+ && ((strcmp(call->file, file) == 0)
+ || (strends(call->file, file)
+ && (call->file[strlen(call->file) - strlen(file) - 1]
+ == '/')));
+}
+
+static bool is_nonblocking_lock(const struct failtest_call *call)
+{
+ return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK;
+}
+
+static bool is_unlock(const struct failtest_call *call)
+{
+ return call->type == FAILTEST_FCNTL
+ && call->u.fcntl.arg.fl.l_type == F_UNLCK;
+}
+
+bool exit_check_log(struct tlist_calls *history)
+{
+ const struct failtest_call *i;
+ unsigned int malloc_count = 0;
+
+ tlist_for_each(history, i, list) {
+ if (!i->fail)
+ continue;
+ /* Failing the /dev/urandom open doesn't count: we fall back. */
+ if (failmatch(i, URANDOM_OPEN))
+ continue;
+
+ /* Similarly with read fail. */
+ if (failmatch(i, URANDOM_READ))
+ continue;
+
+ /* Initial allocation of ntdb doesn't log. */
+ if (i->type == FAILTEST_MALLOC) {
+ if (malloc_count++ == 0) {
+ continue;
+ }
+ }
+
+ /* We don't block "failures" on non-blocking locks. */
+ if (is_nonblocking_lock(i))
+ continue;
+
+ if (!tap_log_messages)
+ diag("We didn't log for %s:%u", i->file, i->line);
+ return tap_log_messages != 0;
+ }
+ return true;
+}
+
+/* Some places we soldier on despite errors: only fail them once. */
+enum failtest_result
+block_repeat_failures(struct tlist_calls *history)
+{
+ const struct failtest_call *last;
+
+ last = tlist_tail(history, list);
+
+ if (failtest_suppress)
+ return FAIL_DONT_FAIL;
+
+ if (failmatch(last, URANDOM_OPEN)
+ || failmatch(last, URANDOM_READ)) {
+ return FAIL_PROBE;
+ }
+
+ /* We handle mmap failing, by falling back to read/write, so
+ * don't try all possible paths. */
+ if (last->type == FAILTEST_MMAP)
+ return FAIL_PROBE;
+
+ /* Unlock or non-blocking lock is fail-once. */
+ if (is_unlock(last) || is_nonblocking_lock(last))
+ return FAIL_PROBE;
+
+ return FAIL_OK;
+}
--- /dev/null
+#ifndef NTDB_TEST_FAILTEST_HELPER_H
+#define NTDB_TEST_FAILTEST_HELPER_H
+#include <ccan/failtest/failtest.h>
+#include <stdbool.h>
+
+/* FIXME: Check these! */
+#define URANDOM_OPEN "open.c", 62, FAILTEST_OPEN
+#define URANDOM_READ "open.c", 42, FAILTEST_READ
+
+bool exit_check_log(struct tlist_calls *history);
+bool failmatch(const struct failtest_call *call,
+ const char *file, int line, enum failtest_call_type type);
+enum failtest_result block_repeat_failures(struct tlist_calls *history);
+
+/* Set this to suppress failure. */
+extern bool failtest_suppress;
+
+#endif /* NTDB_TEST_LOGGING_H */
--- /dev/null
+#include "external-agent.h"
+
+/* This isn't possible with via the ntdb API, but this makes it link. */
+enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb)
+{
+ return FAILED;
+}
--- /dev/null
+#include "external-agent.h"
+#include "../private.h"
+
+enum agent_return external_agent_needs_rec(struct ntdb_context *ntdb)
+{
+ return ntdb_needs_recovery(ntdb) ? SUCCESS : FAILED;
+}
--- /dev/null
+/* NTDB tools to create various canned database layouts. */
+#include "layout.h"
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <ccan/err/err.h>
+#include "logging.h"
+
+struct ntdb_layout *new_ntdb_layout(void)
+{
+ struct ntdb_layout *layout = malloc(sizeof(*layout));
+ layout->num_elems = 0;
+ layout->elem = NULL;
+ return layout;
+}
+
+static void add(struct ntdb_layout *layout, union ntdb_layout_elem elem)
+{
+ layout->elem = realloc(layout->elem,
+ sizeof(layout->elem[0])
+ * (layout->num_elems+1));
+ layout->elem[layout->num_elems++] = elem;
+}
+
+void ntdb_layout_add_freetable(struct ntdb_layout *layout)
+{
+ union ntdb_layout_elem elem;
+ elem.base.type = FREETABLE;
+ add(layout, elem);
+}
+
+void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len,
+ unsigned ftable)
+{
+ union ntdb_layout_elem elem;
+ elem.base.type = FREE;
+ elem.free.len = len;
+ elem.free.ftable_num = ftable;
+ add(layout, elem);
+}
+
+void ntdb_layout_add_capability(struct ntdb_layout *layout,
+ uint64_t type,
+ bool write_breaks,
+ bool check_breaks,
+ bool open_breaks,
+ ntdb_len_t extra)
+{
+ union ntdb_layout_elem elem;
+ elem.base.type = CAPABILITY;
+ elem.capability.type = type;
+ if (write_breaks)
+ elem.capability.type |= NTDB_CAP_NOWRITE;
+ if (open_breaks)
+ elem.capability.type |= NTDB_CAP_NOOPEN;
+ if (check_breaks)
+ elem.capability.type |= NTDB_CAP_NOCHECK;
+ elem.capability.extra = extra;
+ add(layout, elem);
+}
+
+static NTDB_DATA dup_key(NTDB_DATA key)
+{
+ NTDB_DATA ret;
+ ret.dsize = key.dsize;
+ ret.dptr = malloc(ret.dsize);
+ memcpy(ret.dptr, key.dptr, ret.dsize);
+ return ret;
+}
+
+void ntdb_layout_add_used(struct ntdb_layout *layout,
+ NTDB_DATA key, NTDB_DATA data,
+ ntdb_len_t extra)
+{
+ union ntdb_layout_elem elem;
+ elem.base.type = DATA;
+ elem.used.key = dup_key(key);
+ elem.used.data = dup_key(data);
+ elem.used.extra = extra;
+ add(layout, elem);
+}
+
+static ntdb_len_t free_record_len(ntdb_len_t len)
+{
+ return sizeof(struct ntdb_used_record) + len;
+}
+
+static ntdb_len_t data_record_len(struct tle_used *used)
+{
+ ntdb_len_t len;
+ len = sizeof(struct ntdb_used_record)
+ + used->key.dsize + used->data.dsize + used->extra;
+ assert(len >= sizeof(struct ntdb_free_record));
+ return len;
+}
+
+static ntdb_len_t capability_len(struct tle_capability *cap)
+{
+ return sizeof(struct ntdb_capability) + cap->extra;
+}
+
+static ntdb_len_t freetable_len(struct tle_freetable *ftable)
+{
+ return sizeof(struct ntdb_freetable);
+}
+
+static void set_free_record(void *mem, ntdb_len_t len)
+{
+ /* We do all the work in add_to_freetable */
+}
+
+static void add_zero_pad(struct ntdb_used_record *u, size_t len, size_t extra)
+{
+ if (extra)
+ ((char *)(u + 1))[len] = '\0';
+}
+
+static void set_data_record(void *mem, struct ntdb_context *ntdb,
+ struct tle_used *used)
+{
+ struct ntdb_used_record *u = mem;
+
+ set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize,
+ used->key.dsize + used->data.dsize + used->extra);
+ memcpy(u + 1, used->key.dptr, used->key.dsize);
+ memcpy((char *)(u + 1) + used->key.dsize,
+ used->data.dptr, used->data.dsize);
+ add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
+}
+
+static void set_capability(void *mem, struct ntdb_context *ntdb,
+ struct tle_capability *cap, struct ntdb_header *hdr,
+ ntdb_off_t last_cap)
+{
+ struct ntdb_capability *c = mem;
+ ntdb_len_t len = sizeof(*c) - sizeof(struct ntdb_used_record) + cap->extra;
+
+ c->type = cap->type;
+ c->next = 0;
+ set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len);
+
+ /* Append to capability list. */
+ if (!last_cap) {
+ hdr->capabilities = cap->base.off;
+ } else {
+ c = (struct ntdb_capability *)((char *)hdr + last_cap);
+ c->next = cap->base.off;
+ }
+}
+
+static void set_freetable(void *mem, struct ntdb_context *ntdb,
+ struct tle_freetable *freetable, struct ntdb_header *hdr,
+ ntdb_off_t last_ftable)
+{
+ struct ntdb_freetable *ftable = mem;
+ memset(ftable, 0, sizeof(*ftable));
+ set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
+ sizeof(*ftable) - sizeof(ftable->hdr),
+ sizeof(*ftable) - sizeof(ftable->hdr));
+
+ if (last_ftable) {
+ ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable);
+ ftable->next = freetable->base.off;
+ } else {
+ hdr->free_table = freetable->base.off;
+ }
+}
+
+static void add_to_freetable(struct ntdb_context *ntdb,
+ ntdb_off_t eoff,
+ ntdb_off_t elen,
+ unsigned ftable,
+ struct tle_freetable *freetable)
+{
+ ntdb->ftable_off = freetable->base.off;
+ ntdb->ftable = ftable;
+ add_free_record(ntdb, eoff, sizeof(struct ntdb_used_record) + elen,
+ NTDB_LOCK_WAIT, false);
+}
+
+static ntdb_off_t hbucket_offset(ntdb_len_t idx)
+{
+ return sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record)
+ + idx * sizeof(ntdb_off_t);
+}
+
+/* FIXME: Our hash table handling here is primitive: we don't expand! */
+static void add_to_hashtable(struct ntdb_context *ntdb,
+ ntdb_off_t eoff,
+ NTDB_DATA key)
+{
+ ntdb_off_t b_off;
+ uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
+
+ b_off = hbucket_offset(h & ((1 << ntdb->hash_bits)-1));
+ if (ntdb_read_off(ntdb, b_off) != 0)
+ abort();
+
+ ntdb_write_off(ntdb, b_off, encode_offset(ntdb, eoff, h));
+}
+
+static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num)
+{
+ unsigned i;
+
+ for (i = 0; i < layout->num_elems; i++) {
+ if (layout->elem[i].base.type != FREETABLE)
+ continue;
+ if (num == 0)
+ return &layout->elem[i].ftable;
+ num--;
+ }
+ abort();
+}
+
+/* FIXME: Support NTDB_CONVERT */
+struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
+ void (*freefn)(void *),
+ union ntdb_attribute *attr)
+{
+ unsigned int i;
+ ntdb_off_t off, hdrlen, len, last_ftable, last_cap;
+ char *mem;
+ struct ntdb_context *ntdb;
+
+ /* Now populate our header, cribbing from a real NTDB header. */
+ ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr);
+
+ off = sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record)
+ + (sizeof(ntdb_off_t) << ntdb->hash_bits);
+ hdrlen = off;
+
+ /* First pass of layout: calc lengths */
+ for (i = 0; i < layout->num_elems; i++) {
+ union ntdb_layout_elem *e = &layout->elem[i];
+ e->base.off = off;
+ switch (e->base.type) {
+ case FREETABLE:
+ len = freetable_len(&e->ftable);
+ break;
+ case FREE:
+ len = free_record_len(e->free.len);
+ break;
+ case DATA:
+ len = data_record_len(&e->used);
+ break;
+ case CAPABILITY:
+ len = capability_len(&e->capability);
+ break;
+ default:
+ abort();
+ }
+ off += len;
+ }
+
+ mem = malloc(off);
+ /* Fill with some weird pattern. */
+ memset(mem, 0x99, off);
+ memcpy(mem, ntdb->file->map_ptr, hdrlen);
+
+ /* Mug the ntdb we have to make it use this. */
+ freefn(ntdb->file->map_ptr);
+ ntdb->file->map_ptr = mem;
+ ntdb->file->map_size = off;
+
+ last_ftable = 0;
+ last_cap = 0;
+ for (i = 0; i < layout->num_elems; i++) {
+ union ntdb_layout_elem *e = &layout->elem[i];
+ switch (e->base.type) {
+ case FREETABLE:
+ set_freetable(mem + e->base.off, ntdb, &e->ftable,
+ (struct ntdb_header *)mem, last_ftable);
+ last_ftable = e->base.off;
+ break;
+ case FREE:
+ set_free_record(mem + e->base.off, e->free.len);
+ break;
+ case DATA:
+ set_data_record(mem + e->base.off, ntdb, &e->used);
+ break;
+ case CAPABILITY:
+ set_capability(mem + e->base.off, ntdb, &e->capability,
+ (struct ntdb_header *)mem, last_cap);
+ last_cap = e->base.off;
+ break;
+ }
+ }
+ /* Must have a free table! */
+ assert(last_ftable);
+
+ /* Now fill the free and hash tables. */
+ for (i = 0; i < layout->num_elems; i++) {
+ union ntdb_layout_elem *e = &layout->elem[i];
+ switch (e->base.type) {
+ case FREE:
+ add_to_freetable(ntdb, e->base.off, e->free.len,
+ e->free.ftable_num,
+ find_ftable(layout, e->free.ftable_num));
+ break;
+ case DATA:
+ add_to_hashtable(ntdb, e->base.off, e->used.key);
+ break;
+ default:
+ break;
+ }
+ }
+
+ ntdb->ftable_off = find_ftable(layout, 0)->base.off;
+ return ntdb;
+}
+
+void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
+ union ntdb_attribute *attr, const char *filename)
+{
+ struct ntdb_context *ntdb = ntdb_layout_get(layout, freefn, attr);
+ int fd;
+
+ fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0600);
+ if (fd < 0)
+ err(1, "opening %s for writing", filename);
+ if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size)
+ != ntdb->file->map_size)
+ err(1, "writing %s", filename);
+ close(fd);
+ ntdb_close(ntdb);
+}
+
+void ntdb_layout_free(struct ntdb_layout *layout)
+{
+ unsigned int i;
+
+ for (i = 0; i < layout->num_elems; i++) {
+ if (layout->elem[i].base.type == DATA) {
+ free(layout->elem[i].used.key.dptr);
+ free(layout->elem[i].used.data.dptr);
+ }
+ }
+ free(layout->elem);
+ free(layout);
+}
--- /dev/null
+#ifndef NTDB_TEST_LAYOUT_H
+#define NTDB_TEST_LAYOUT_H
+#include "../private.h"
+
+struct ntdb_layout *new_ntdb_layout(void);
+void ntdb_layout_add_freetable(struct ntdb_layout *layout);
+void ntdb_layout_add_free(struct ntdb_layout *layout, ntdb_len_t len,
+ unsigned ftable);
+void ntdb_layout_add_used(struct ntdb_layout *layout,
+ NTDB_DATA key, NTDB_DATA data,
+ ntdb_len_t extra);
+void ntdb_layout_add_capability(struct ntdb_layout *layout,
+ uint64_t type,
+ bool write_breaks,
+ bool check_breaks,
+ bool open_breaks,
+ ntdb_len_t extra);
+
+#if 0 /* FIXME: Allow allocation of subtables */
+void ntdb_layout_add_hashtable(struct ntdb_layout *layout,
+ int htable_parent, /* -1 == toplevel */
+ unsigned int bucket,
+ ntdb_len_t extra);
+#endif
+/* freefn is needed if we're using failtest_free. */
+struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
+ void (*freefn)(void *),
+ union ntdb_attribute *attr);
+void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
+ union ntdb_attribute *attr, const char *filename);
+
+void ntdb_layout_free(struct ntdb_layout *layout);
+
+enum layout_type {
+ FREETABLE, FREE, DATA, CAPABILITY
+};
+
+/* Shared by all union members. */
+struct tle_base {
+ enum layout_type type;
+ ntdb_off_t off;
+};
+
+struct tle_freetable {
+ struct tle_base base;
+};
+
+struct tle_free {
+ struct tle_base base;
+ ntdb_len_t len;
+ unsigned ftable_num;
+};
+
+struct tle_used {
+ struct tle_base base;
+ NTDB_DATA key;
+ NTDB_DATA data;
+ ntdb_len_t extra;
+};
+
+struct tle_capability {
+ struct tle_base base;
+ uint64_t type;
+ ntdb_len_t extra;
+};
+
+union ntdb_layout_elem {
+ struct tle_base base;
+ struct tle_freetable ftable;
+ struct tle_free free;
+ struct tle_used used;
+ struct tle_capability capability;
+};
+
+struct ntdb_layout {
+ unsigned int num_elems;
+ union ntdb_layout_elem *elem;
+};
+
+#include "helprun-layout.h"
+#endif /* NTDB_TEST_LAYOUT_H */
--- /dev/null
+/* We save the locks so we can reaquire them. */
+#include "../private.h" /* For NTDB_HASH_LOCK_START, etc. */
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "tap-interface.h"
+#include "lock-tracking.h"
+
+struct lock {
+ struct lock *next;
+ unsigned int off;
+ unsigned int len;
+ int type;
+};
+static struct lock *locks;
+int locking_errors = 0;
+bool suppress_lockcheck = false;
+bool nonblocking_locks;
+int locking_would_block = 0;
+void (*unlock_callback)(int fd);
+
+int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
+{
+ va_list ap;
+ int ret, arg3;
+ struct flock *fl;
+ bool may_block = false;
+
+ if (cmd != F_SETLK && cmd != F_SETLKW) {
+ /* This may be totally bogus, but we don't know in general. */
+ va_start(ap, cmd);
+ arg3 = va_arg(ap, int);
+ va_end(ap);
+
+ return fcntl(fd, cmd, arg3);
+ }
+
+ va_start(ap, cmd);
+ fl = va_arg(ap, struct flock *);
+ va_end(ap);
+
+ if (cmd == F_SETLKW && nonblocking_locks) {
+ cmd = F_SETLK;
+ may_block = true;
+ }
+ ret = fcntl(fd, cmd, fl);
+
+ /* Detect when we failed, but might have been OK if we waited. */
+ if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) {
+ locking_would_block++;
+ }
+
+ if (fl->l_type == F_UNLCK) {
+ struct lock **l;
+ struct lock *old = NULL;
+
+ for (l = &locks; *l; l = &(*l)->next) {
+ if ((*l)->off == fl->l_start
+ && (*l)->len == fl->l_len) {
+ if (ret == 0) {
+ old = *l;
+ *l = (*l)->next;
+ free(old);
+ }
+ break;
+ }
+ }
+ if (!old && !suppress_lockcheck) {
+ diag("Unknown unlock %u@%u - %i",
+ (int)fl->l_len, (int)fl->l_start, ret);
+ locking_errors++;
+ }
+ } else {
+ struct lock *new, *i;
+ unsigned int fl_end = fl->l_start + fl->l_len;
+ if (fl->l_len == 0)
+ fl_end = (unsigned int)-1;
+
+ /* Check for overlaps: we shouldn't do this. */
+ for (i = locks; i; i = i->next) {
+ unsigned int i_end = i->off + i->len;
+ if (i->len == 0)
+ i_end = (unsigned int)-1;
+
+ if (fl->l_start >= i->off && fl->l_start < i_end)
+ break;
+ if (fl_end > i->off && fl_end < i_end)
+ break;
+
+ /* ntdb_allrecord_lock does this, handle adjacent: */
+ if (fl->l_start > NTDB_HASH_LOCK_START
+ && fl->l_start == i_end && fl->l_type == i->type) {
+ if (ret == 0) {
+ i->len = fl->l_len
+ ? i->len + fl->l_len
+ : 0;
+ }
+ goto done;
+ }
+ }
+ if (i) {
+ /* Special case: upgrade of allrecord lock. */
+ if (i->type == F_RDLCK && fl->l_type == F_WRLCK
+ && i->off == NTDB_HASH_LOCK_START
+ && fl->l_start == NTDB_HASH_LOCK_START
+ && i->len == 0
+ && fl->l_len == 0) {
+ if (ret == 0)
+ i->type = F_WRLCK;
+ goto done;
+ }
+ if (!suppress_lockcheck) {
+ diag("%s lock %u@%u overlaps %u@%u",
+ fl->l_type == F_WRLCK ? "write" : "read",
+ (int)fl->l_len, (int)fl->l_start,
+ i->len, (int)i->off);
+ locking_errors++;
+ }
+ }
+
+ if (ret == 0) {
+ new = malloc(sizeof *new);
+ new->off = fl->l_start;
+ new->len = fl->l_len;
+ new->type = fl->l_type;
+ new->next = locks;
+ locks = new;
+ }
+ }
+done:
+ if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback)
+ unlock_callback(fd);
+ return ret;
+}
+
+unsigned int forget_locking(void)
+{
+ unsigned int num = 0;
+ while (locks) {
+ struct lock *next = locks->next;
+ free(locks);
+ locks = next;
+ num++;
+ }
+ return num;
+}
--- /dev/null
+#ifndef LOCK_TRACKING_H
+#define LOCK_TRACKING_H
+#include <stdbool.h>
+
+/* Set this if you want a callback after fnctl unlock. */
+extern void (*unlock_callback)(int fd);
+
+/* Replacement fcntl. */
+int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ );
+
+/* Discard locking info: returns number of locks outstanding. */
+unsigned int forget_locking(void);
+
+/* Number of errors in locking. */
+extern int locking_errors;
+
+/* Suppress lock checking. */
+extern bool suppress_lockcheck;
+
+/* Make all locks non-blocking. */
+extern bool nonblocking_locks;
+
+/* Number of times we failed a lock because we made it non-blocking. */
+extern int locking_would_block;
+#endif /* LOCK_TRACKING_H */
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "tap-interface.h"
+#include "logging.h"
+
+unsigned tap_log_messages;
+const char *log_prefix = "";
+char *log_last = NULL;
+bool suppress_logging;
+
+union ntdb_attribute tap_log_attr = {
+ .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG },
+ .fn = tap_log_fn }
+};
+
+void tap_log_fn(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message, void *priv)
+{
+ if (suppress_logging)
+ return;
+
+ diag("ntdb log level %u: %s: %s%s",
+ level, ntdb_errorstr(ecode), log_prefix, message);
+ if (log_last)
+ free(log_last);
+ log_last = strdup(message);
+ tap_log_messages++;
+}
--- /dev/null
+#ifndef NTDB_TEST_LOGGING_H
+#define NTDB_TEST_LOGGING_H
+#include "../ntdb.h"
+#include <stdbool.h>
+#include <string.h>
+
+extern bool suppress_logging;
+extern const char *log_prefix;
+extern unsigned tap_log_messages;
+extern union ntdb_attribute tap_log_attr;
+extern char *log_last;
+
+void tap_log_fn(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message, void *priv);
+#endif /* NTDB_TEST_LOGGING_H */
--- /dev/null
+#ifndef NTDB_NO_FSYNC_H
+#define NTDB_NO_FSYNC_H
+/* Obey $TDB_NO_FSYNC, a bit like tdb does (only note our NTDB_NOSYNC
+ * does less) */
+#define MAYBE_NOSYNC (getenv("TDB_NO_FSYNC") ? NTDB_NOSYNC : 0)
+#endif
--- /dev/null
+#include "config.h"
+#include "../check.c"
+#include "../free.c"
+#include "../hash.c"
+#include "../io.c"
+#include "../lock.c"
+#include "../open.c"
+#include "../summary.c"
+#include "../ntdb.c"
+#include "../transaction.c"
+#include "../traverse.c"
--- /dev/null
+#!/usr/bin/env python
+# Some simple tests for the Python bindings for TDB
+# Note that this tests the interface of the Python bindings
+# It does not test tdb itself.
+#
+# Copyright (C) 2007-2013 Jelmer Vernooij <jelmer@samba.org>
+# Published under the GNU LGPLv3 or later
+
+import ntdb
+from unittest import TestCase
+import os, tempfile
+
+
+class OpenTdbTests(TestCase):
+
+ def test_nonexistent_read(self):
+ self.assertRaises(IOError, ntdb.Ntdb, "/some/nonexistent/file", 0,
+ ntdb.DEFAULT, os.O_RDWR)
+
+class CloseTdbTests(TestCase):
+
+ def test_double_close(self):
+ self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT,
+ os.O_CREAT|os.O_RDWR)
+ self.assertNotEqual(None, self.ntdb)
+
+ # ensure that double close does not crash python
+ self.ntdb.close()
+ self.ntdb.close()
+
+ # Check that further operations do not crash python
+ self.assertRaises(RuntimeError, lambda: self.ntdb.transaction_start())
+
+ self.assertRaises(RuntimeError, lambda: self.ntdb["bar"])
+
+
+class InternalTdbTests(TestCase):
+
+ def test_repr(self):
+ self.ntdb = ntdb.Ntdb()
+
+ # repr used to crash on internal db
+ self.assertEquals(repr(self.ntdb), "Ntdb(<internal>)")
+
+
+class SimpleTdbTests(TestCase):
+
+ def setUp(self):
+ super(SimpleTdbTests, self).setUp()
+ self.ntdb = ntdb.Ntdb(tempfile.mkstemp()[1], ntdb.DEFAULT,
+ os.O_CREAT|os.O_RDWR)
+ self.assertNotEqual(None, self.ntdb)
+
+ def tearDown(self):
+ del self.ntdb
+
+ def test_repr(self):
+ self.assertTrue(repr(self.ntdb).startswith("Ntdb('"))
+
+ def test_lockall(self):
+ self.ntdb.lock_all()
+
+ def test_unlockall(self):
+ self.ntdb.lock_all()
+ self.ntdb.unlock_all()
+
+ def test_lockall_read(self):
+ self.ntdb.read_lock_all()
+ self.ntdb.read_unlock_all()
+
+ def test_store(self):
+ self.ntdb.store("bar", "bla")
+ self.assertEquals("bla", self.ntdb.get("bar"))
+
+ def test_getitem(self):
+ self.ntdb["bar"] = "foo"
+ self.assertEquals("foo", self.ntdb["bar"])
+
+ def test_delete(self):
+ self.ntdb["bar"] = "foo"
+ del self.ntdb["bar"]
+ self.assertRaises(KeyError, lambda: self.ntdb["bar"])
+
+ def test_contains(self):
+ self.ntdb["bla"] = "bloe"
+ self.assertTrue("bla" in self.ntdb)
+
+ def test_keyerror(self):
+ self.assertRaises(KeyError, lambda: self.ntdb["bla"])
+
+ def test_name(self):
+ self.ntdb.filename
+
+ def test_iterator(self):
+ self.ntdb["bla"] = "1"
+ self.ntdb["brainslug"] = "2"
+ l = list(self.ntdb)
+ l.sort()
+ self.assertEquals(["bla", "brainslug"], l)
+
+ def test_transaction_cancel(self):
+ self.ntdb["bloe"] = "2"
+ self.ntdb.transaction_start()
+ self.ntdb["bloe"] = "1"
+ self.ntdb.transaction_cancel()
+ self.assertEquals("2", self.ntdb["bloe"])
+
+ def test_transaction_commit(self):
+ self.ntdb["bloe"] = "2"
+ self.ntdb.transaction_start()
+ self.ntdb["bloe"] = "1"
+ self.ntdb.transaction_commit()
+ self.assertEquals("1", self.ntdb["bloe"])
+
+ def test_transaction_prepare_commit(self):
+ self.ntdb["bloe"] = "2"
+ self.ntdb.transaction_start()
+ self.ntdb["bloe"] = "1"
+ self.ntdb.transaction_prepare_commit()
+ self.ntdb.transaction_commit()
+ self.assertEquals("1", self.ntdb["bloe"])
+
+ def test_iterkeys(self):
+ self.ntdb["bloe"] = "2"
+ self.ntdb["bla"] = "25"
+ i = self.ntdb.iterkeys()
+ self.assertEquals(set(["bloe", "bla"]), set([i.next(), i.next()]))
+
+ def test_clear(self):
+ self.ntdb["bloe"] = "2"
+ self.ntdb["bla"] = "25"
+ self.assertEquals(2, len(list(self.ntdb)))
+ self.ntdb.clear()
+ self.assertEquals(0, len(list(self.ntdb)))
+
+ def test_len(self):
+ self.assertEquals(0, len(list(self.ntdb)))
+ self.ntdb["entry"] = "value"
+ self.assertEquals(1, len(list(self.ntdb)))
+
+ def test_add_flags(self):
+ self.ntdb.add_flag(ntdb.NOMMAP)
+ self.ntdb.remove_flag(ntdb.NOMMAP)
+
+
+class VersionTests(TestCase):
+
+ def test_present(self):
+ self.assertTrue(isinstance(ntdb.__version__, str))
+
+
+if __name__ == '__main__':
+ import unittest
+ unittest.TestProgram()
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_used_record rec;
+ struct ntdb_context ntdb = { .log_fn = tap_log_fn };
+
+ plan_tests(64 + 32 + 48*5 + 1);
+
+ /* We should be able to encode any data value. */
+ for (i = 0; i < 64; i++)
+ ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i,
+ 1ULL << i) == 0);
+
+ /* And any key and data with < 64 bits between them. */
+ for (i = 0; i < 32; i++) {
+ ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
+ ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
+ klen + dlen) == 0);
+ }
+
+ /* We should neatly encode all values. */
+ for (i = 0; i < 48; i++) {
+ uint64_t klen = 1ULL << (i < 16 ? i : 15);
+ uint64_t dlen = 1ULL << i;
+ uint64_t xlen = 1ULL << (i < 32 ? i : 31);
+ ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
+ klen+dlen+xlen) == 0);
+ ok1(rec_key_length(&rec) == klen);
+ ok1(rec_data_length(&rec) == dlen);
+ ok1(rec_extra_padding(&rec) == xlen);
+ ok1(rec_magic(&rec) == NTDB_USED_MAGIC);
+ }
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "helprun-external-agent.h"
+
+static unsigned int dumb_fls(uint64_t num)
+{
+ int i;
+
+ for (i = 63; i >= 0; i--) {
+ if (num & (1ULL << i))
+ break;
+ }
+ return i + 1;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+
+ plan_tests(64 * 64 + 2);
+
+ ok1(fls64(0) == 0);
+ ok1(dumb_fls(0) == 0);
+
+ for (i = 0; i < 64; i++) {
+ for (j = 0; j < 64; j++) {
+ uint64_t val = (1ULL << i) | (1ULL << j);
+ ok(fls64(val) == dumb_fls(val),
+ "%llu -> %u should be %u", (long long)val,
+ fls64(val), dumb_fls(val));
+ }
+ }
+ return exit_status();
+}
--- /dev/null
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-new_database.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ failtest_exit(exit_status());
+
+ failtest_suppress = true;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ failtest_suppress = false;
+ ntdb_close(ntdb);
+ if (!ok1(tap_log_messages == 0))
+ break;
+ }
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ uint64_t val;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ failtest_suppress = true;
+ ntdb = ntdb_open("run-expand.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ break;
+
+ val = ntdb->file->map_size;
+ /* Need some hash lock for expand. */
+ ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0);
+ failtest_suppress = false;
+ if (!ok1(ntdb_expand(ntdb, 1) == 0)) {
+ failtest_suppress = true;
+ ntdb_close(ntdb);
+ break;
+ }
+ failtest_suppress = true;
+
+ ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR);
+ ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ val = ntdb->file->map_size;
+ ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0);
+ failtest_suppress = false;
+ if (!ok1(ntdb_expand(ntdb, 1024) == 0)) {
+ failtest_suppress = true;
+ ntdb_close(ntdb);
+ break;
+ }
+ failtest_suppress = true;
+ ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0);
+ ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "layout.h"
+#include "helprun-external-agent.h"
+
+static ntdb_len_t free_record_length(struct ntdb_context *ntdb, ntdb_off_t off)
+{
+ struct ntdb_free_record f;
+ enum NTDB_ERROR ecode;
+
+ ecode = ntdb_read_convert(ntdb, off, &f, sizeof(f));
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+ if (frec_magic(&f) != NTDB_FREE_MAGIC)
+ return NTDB_ERR_CORRUPT;
+ return frec_len(&f);
+}
+
+int main(int argc, char *argv[])
+{
+ ntdb_off_t b_off, test;
+ struct ntdb_context *ntdb;
+ struct ntdb_layout *layout;
+ NTDB_DATA data, key;
+ ntdb_len_t len;
+
+ /* FIXME: Test NTDB_CONVERT */
+ /* FIXME: Test lock order fail. */
+
+ plan_tests(42);
+ data = ntdb_mkdata("world", 5);
+ key = ntdb_mkdata("hello", 5);
+
+ /* No coalescing can be done due to EOF */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ len = 15560;
+ ntdb_layout_add_free(layout, len, 0);
+ ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+ /* NOMMAP is for lockcheck. */
+ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
+ O_RDWR, 0, &tap_log_attr);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == len);
+
+ /* Figure out which bucket free entry is. */
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(len));
+ /* Lock and fail to coalesce. */
+ ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+ test = layout->elem[1].base.off;
+ ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, len, &test)
+ == 0);
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == len);
+ ok1(test == layout->elem[1].base.off);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+
+ /* No coalescing can be done due to used record */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_free(layout, 15528, 0);
+ ntdb_layout_add_used(layout, key, data, 6);
+ ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+ /* NOMMAP is for lockcheck. */
+ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
+ O_RDWR, 0, &tap_log_attr);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Figure out which bucket free entry is. */
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(15528));
+ /* Lock and fail to coalesce. */
+ ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+ test = layout->elem[1].base.off;
+ ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 15528, &test)
+ == 0);
+ ntdb_unlock_free_bucket(ntdb, b_off);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528);
+ ok1(test == layout->elem[1].base.off);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+
+ /* Coalescing can be done due to two free records, then EOF */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_free(layout, 1024, 0);
+ ntdb_layout_add_free(layout, 14520, 0);
+ ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+ /* NOMMAP is for lockcheck. */
+ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
+ O_RDWR, 0, &tap_log_attr);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+ ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14520);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Figure out which bucket (first) free entry is. */
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+ /* Lock and coalesce. */
+ ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+ test = layout->elem[2].base.off;
+ ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+ == 1024 + sizeof(struct ntdb_used_record) + 14520);
+ /* Should tell us it's erased this one... */
+ ok1(test == NTDB_ERR_NOEXIST);
+ ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off)
+ == 1024 + sizeof(struct ntdb_used_record) + 14520);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+
+ /* Coalescing can be done due to two free records, then data */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_free(layout, 1024, 0);
+ ntdb_layout_add_free(layout, 14488, 0);
+ ntdb_layout_add_used(layout, key, data, 6);
+ ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+ /* NOMMAP is for lockcheck. */
+ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
+ O_RDWR, 0, &tap_log_attr);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+ ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14488);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Figure out which bucket free entry is. */
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+ /* Lock and coalesce. */
+ ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+ test = layout->elem[2].base.off;
+ ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+ == 1024 + sizeof(struct ntdb_used_record) + 14488);
+ ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off)
+ == 1024 + sizeof(struct ntdb_used_record) + 14488);
+ ok1(test == NTDB_ERR_NOEXIST);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+
+ /* Coalescing can be done due to three free records, then EOF */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_free(layout, 1024, 0);
+ ntdb_layout_add_free(layout, 512, 0);
+ ntdb_layout_add_free(layout, 13992, 0);
+ ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
+ /* NOMMAP is for lockcheck. */
+ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP|MAYBE_NOSYNC,
+ O_RDWR, 0, &tap_log_attr);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
+ ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512);
+ ok1(free_record_length(ntdb, layout->elem[3].base.off) == 13992);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Figure out which bucket free entry is. */
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(1024));
+ /* Lock and coalesce. */
+ ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
+ test = layout->elem[2].base.off;
+ ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
+ == 1024 + sizeof(struct ntdb_used_record) + 512
+ + sizeof(struct ntdb_used_record) + 13992);
+ ok1(ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off)
+ == 1024 + sizeof(struct ntdb_used_record) + 512
+ + sizeof(struct ntdb_used_record) + 13992);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+/* We rig the hash so all records clash. */
+static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv)
+{
+ return *((const unsigned int *)key) << 20;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ unsigned int v;
+ struct ntdb_used_record rec;
+ NTDB_DATA key = { (unsigned char *)&v, sizeof(v) };
+ NTDB_DATA dbuf = { (unsigned char *)&v, sizeof(v) };
+ union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = clash } };
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT,
+ };
+
+ hattr.base.next = &tap_log_attr;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 137 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ struct hash_info h;
+ ntdb_off_t new_off, new_off2, off;
+
+ ntdb = ntdb_open("run-04-basichash.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ v = 0;
+ /* Should not find it. */
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in top table, bucket 0. */
+ ok1(h.table == NTDB_HASH_OFFSET);
+ ok1(h.table_size == (1 << ntdb->hash_bits));
+ ok1(h.bucket == 0);
+ ok1(h.old_val == 0);
+
+ /* Should have lock on bucket 0 */
+ ok1(h.h == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ /* FIXME: Check lock length */
+
+ /* Allocate a new record. */
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off));
+
+ /* We should be able to add it now. */
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+
+ /* Make sure we fill it in for later finding. */
+ off = new_off + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+
+ /* We should be able to unlock that OK. */
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Database should be consistent. */
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Now, this should give a successful lookup. */
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located it in top table, bucket 0. */
+ ok1(h.table == NTDB_HASH_OFFSET);
+ ok1(h.table_size == (1 << ntdb->hash_bits));
+ ok1(h.bucket == 0);
+
+ /* Should have lock on bucket 0 */
+ ok1(h.h == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ /* FIXME: Check lock length */
+
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Database should be consistent. */
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Test expansion. */
+ v = 1;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located clash in toplevel bucket 0. */
+ ok1(h.table == NTDB_HASH_OFFSET);
+ ok1(h.table_size == (1 << ntdb->hash_bits));
+ ok1(h.bucket == 0);
+ ok1((h.old_val & NTDB_OFF_MASK) == new_off);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ /* FIXME: Check lock length */
+
+ new_off2 = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
+
+ off = new_off2 + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+
+ /* We should be able to add it now. */
+ ok1(add_to_hash(ntdb, &h, new_off2) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Should be happy with expansion. */
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Should be able to find both. */
+ v = 1;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 1);
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ v = 0;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 0);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ /* FIXME: Check lock length */
+
+ /* Simple delete should work. */
+ ok1(delete_from_hash(ntdb, &h) == 0);
+ ok1(add_free_record(ntdb, new_off,
+ sizeof(struct ntdb_used_record)
+ + rec_key_length(&rec)
+ + rec_data_length(&rec)
+ + rec_extra_padding(&rec),
+ NTDB_LOCK_NOWAIT, false) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Should still be able to find other record. */
+ v = 1;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off2);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 1);
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Now should find empty space. */
+ v = 0;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain, bucket 0. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 0);
+ ok1(h.old_val == 0);
+
+ /* Adding another record should work. */
+ v = 2;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain, bucket 0. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 0);
+ ok1(h.old_val == 0);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ off = new_off + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+
+ /* Adding another record should cause expansion. */
+ v = 3;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should not have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 2);
+ ok1(h.old_val != 0);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
+ off = new_off + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Retrieve it and check. */
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have appended to chain, bucket 2. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 3);
+ ok1(h.bucket == 2);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* YA record: relocation. */
+ v = 4;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should not have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 3);
+ ok1(h.bucket == 3);
+ ok1(h.old_val != 0);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
+ off = new_off + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Retrieve it and check. */
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == new_off);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have appended to chain, bucket 2. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 4);
+ ok1(h.bucket == 3);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4), d;
+ union ntdb_attribute seed_attr;
+ unsigned int msgs = 0;
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+
+ seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed_attr.base.next = &tap_log_attr;
+ seed_attr.seed.seed = 0;
+
+ failtest_suppress = true;
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-05-readonly-open.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600,
+ &seed_attr);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ntdb_close(ntdb);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-05-readonly-open.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDONLY, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ break;
+ ok1(tap_log_messages == msgs);
+ /* Fetch should succeed, stores should fail. */
+ if (!ok1(ntdb_fetch(ntdb, key, &d) == 0))
+ goto fail;
+ ok1(ntdb_deq(d, data));
+ free(d.dptr);
+ if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY)
+ == NTDB_ERR_RDONLY))
+ goto fail;
+ ok1(tap_log_messages == ++msgs);
+ if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT)
+ == NTDB_ERR_RDONLY))
+ goto fail;
+ ok1(tap_log_messages == ++msgs);
+ failtest_suppress = true;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ ok1(tap_log_messages == msgs);
+ /* SIGH: failtest bug, it doesn't save the ntdb file because
+ * we have it read-only. If we go around again, it gets
+ * changed underneath us and things get screwy. */
+ if (failtest_has_failed())
+ break;
+ }
+ failtest_exit(exit_status());
+
+fail:
+ failtest_suppress = true;
+ ntdb_close(ntdb);
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+
+ failtest_suppress = true;
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-10-simple-store.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ break;
+ /* Modify should fail. */
+ failtest_suppress = false;
+ if (!ok1(ntdb_store(ntdb, key, data, NTDB_MODIFY)
+ == NTDB_ERR_NOEXIST))
+ goto fail;
+ failtest_suppress = true;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Insert should succeed. */
+ failtest_suppress = false;
+ if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0))
+ goto fail;
+ failtest_suppress = true;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Second insert should fail. */
+ failtest_suppress = false;
+ if (!ok1(ntdb_store(ntdb, key, data, NTDB_INSERT)
+ == NTDB_ERR_EXISTS))
+ goto fail;
+ failtest_suppress = true;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+ ok1(tap_log_messages == 0);
+ failtest_exit(exit_status());
+
+fail:
+ failtest_suppress = true;
+ ntdb_close(ntdb);
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+
+ failtest_suppress = true;
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-11-simple-fetch.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (ntdb) {
+ NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+
+ /* fetch should fail. */
+ failtest_suppress = false;
+ if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_ERR_NOEXIST))
+ goto fail;
+ failtest_suppress = true;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Insert should succeed. */
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Fetch should now work. */
+ failtest_suppress = false;
+ if (!ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS))
+ goto fail;
+ failtest_suppress = true;
+ ok1(ntdb_deq(d, data));
+ free(d.dptr);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+ }
+ ok1(tap_log_messages == 0);
+ failtest_exit(exit_status());
+
+fail:
+ failtest_suppress = true;
+ ntdb_close(ntdb);
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include "../private.h"
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL,
+ NTDB_INTERNAL|NTDB_CONVERT,
+ NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+
+ failtest_suppress = true;
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-12-check.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+
+ /* This is what we really want to test: ntdb_check(). */
+ failtest_suppress = false;
+ if (!ok1(ntdb_check(ntdb, NULL, NULL) == 0))
+ goto fail;
+ failtest_suppress = true;
+
+ ntdb_close(ntdb);
+ }
+ ok1(tap_log_messages == 0);
+ failtest_exit(exit_status());
+
+fail:
+ failtest_suppress = true;
+ ntdb_close(ntdb);
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/ilog/ilog.h>
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+#define MAX_SIZE 13100
+#define SIZE_STEP 131
+
+static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key)
+{
+ ntdb_off_t off;
+ struct ntdb_used_record urec;
+ struct hash_info h;
+
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL);
+ if (NTDB_OFF_IS_ERR(off))
+ return 0;
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
+ return off;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j, moves;
+ struct ntdb_context *ntdb;
+ unsigned char *buffer;
+ ntdb_off_t oldoff = 0, newoff;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data;
+
+ buffer = malloc(MAX_SIZE);
+ for (i = 0; i < MAX_SIZE; i++)
+ buffer[i] = i;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0])
+ * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7)
+ + 1);
+
+ /* Using ntdb_store. */
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ moves = 0;
+ for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
+ data.dptr = buffer;
+ data.dsize = j;
+ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+ ok1(data.dsize == j);
+ ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+ free(data.dptr);
+ newoff = ntdb_offset(ntdb, key);
+ if (newoff != oldoff)
+ moves++;
+ oldoff = newoff;
+ }
+ ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0));
+ /* We should increase by 50% each time... */
+ ok(moves <= ilog64(j / SIZE_STEP)*2,
+ "Moved %u times", moves);
+ ntdb_close(ntdb);
+ }
+
+ /* Using ntdb_append. */
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ size_t prev_len = 0;
+ ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ moves = 0;
+ for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
+ data.dptr = buffer + prev_len;
+ data.dsize = j - prev_len;
+ ok1(ntdb_append(ntdb, key, data) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+ ok1(data.dsize == j);
+ ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+ free(data.dptr);
+ prev_len = data.dsize;
+ newoff = ntdb_offset(ntdb, key);
+ if (newoff != oldoff)
+ moves++;
+ oldoff = newoff;
+ }
+ ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0));
+ /* We should increase by 50% each time... */
+ ok(moves <= ilog64(j / SIZE_STEP)*2,
+ "Moved %u times", moves);
+ ntdb_close(ntdb);
+ }
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-append.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* Huge initial store. */
+ data.dptr = buffer;
+ data.dsize = MAX_SIZE;
+ ok1(ntdb_append(ntdb, key, data) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_fetch(ntdb, key, &data) == NTDB_SUCCESS);
+ ok1(data.dsize == MAX_SIZE);
+ ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
+ free(data.dptr);
+ ok1(!ntdb->file || (ntdb->file->allrecord_lock.count == 0
+ && ntdb->file->num_lockrecs == 0));
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ free(buffer);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+#define OVERLOAD 100
+
+static uint32_t badhash(const void *key, size_t len, uint32_t seed, void *priv)
+{
+ return 0;
+}
+
+static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *p)
+{
+ if (p)
+ return ntdb_delete(ntdb, key);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+ NTDB_DATA dbuf = { (unsigned char *)&j, sizeof(j) };
+ union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = badhash } };
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT,
+ };
+
+ hattr.base.next = &tap_log_attr;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * (7 * OVERLOAD + 11) + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
+
+ ntdb = ntdb_open("run-25-hashoverload.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* Overload a bucket. */
+ for (j = 0; j < OVERLOAD; j++) {
+ ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+ }
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Check we can find them all. */
+ for (j = 0; j < OVERLOAD; j++) {
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == sizeof(j));
+ ok1(d.dptr != NULL);
+ ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
+ free(d.dptr);
+ }
+
+ /* Traverse through them. */
+ ok1(ntdb_traverse(ntdb, trav, NULL) == OVERLOAD);
+
+ /* Delete the first 99. */
+ for (j = 0; j < OVERLOAD-1; j++)
+ ok1(ntdb_delete(ntdb, key) == 0);
+
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == sizeof(j));
+ ok1(d.dptr != NULL);
+ ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
+ free(d.dptr);
+
+ /* Traverse through them. */
+ ok1(ntdb_traverse(ntdb, trav, NULL) == 1);
+
+ /* Re-add */
+ for (j = 0; j < OVERLOAD-1; j++) {
+ ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+ }
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Now try deleting as we go. */
+ ok1(ntdb_traverse(ntdb, trav, trav) == OVERLOAD);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb_traverse(ntdb, trav, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static bool empty_freetable(struct ntdb_context *ntdb)
+{
+ struct ntdb_freetable ftab;
+ unsigned int i;
+
+ /* Now, free table should be completely exhausted in zone 0 */
+ if (ntdb_read_convert(ntdb, ntdb->ftable_off, &ftab, sizeof(ftab)) != 0)
+ abort();
+
+ for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) {
+ if (ftab.buckets[i])
+ return false;
+ }
+ return true;
+}
+
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ NTDB_DATA k, d;
+ uint64_t size;
+ bool was_empty = false;
+
+ k.dptr = (void *)&j;
+ k.dsize = sizeof(j);
+
+ ntdb = ntdb_open("run-30-exhaust-before-expand.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* There's one empty record in initial db. */
+ ok1(!empty_freetable(ntdb));
+
+ size = ntdb->file->map_size;
+
+ /* Create one record to chew up most space. */
+ d.dsize = size - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 32;
+ d.dptr = calloc(d.dsize, 1);
+ j = 0;
+ ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
+ ok1(ntdb->file->map_size == size);
+ free(d.dptr);
+
+ /* Now insert minimal-length records until we expand. */
+ for (j = 1; ntdb->file->map_size == size; j++) {
+ was_empty = empty_freetable(ntdb);
+ if (ntdb_store(ntdb, k, k, NTDB_INSERT) != 0)
+ err(1, "Failed to store record %i", j);
+ }
+
+ /* Would have been empty before expansion, but no longer. */
+ ok1(was_empty);
+ ok1(!empty_freetable(ntdb));
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h"
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <ccan/failtest/failtest.h>
+#include "logging.h"
+#include "failtest_helper.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, messages = 0;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 4);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-35-convert.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ if (!ok1(ntdb))
+ failtest_exit(exit_status());
+
+ ntdb_close(ntdb);
+ /* We can fail in log message formatting or open. That's OK */
+ if (failtest_has_failed()) {
+ failtest_exit(exit_status());
+ }
+ /* If we say NTDB_CONVERT, it must be converted */
+ ntdb = ntdb_open("run-35-convert.ntdb",
+ flags[i]|NTDB_CONVERT|MAYBE_NOSYNC,
+ O_RDWR, 0600, &tap_log_attr);
+ if (flags[i] & NTDB_CONVERT) {
+ if (!ntdb)
+ failtest_exit(exit_status());
+ ok1(ntdb_get_flags(ntdb) & NTDB_CONVERT);
+ ntdb_close(ntdb);
+ } else {
+ if (!ok1(!ntdb && errno == EIO))
+ failtest_exit(exit_status());
+ ok1(tap_log_messages == ++messages);
+ if (!ok1(log_last && strstr(log_last, "NTDB_CONVERT")))
+ failtest_exit(exit_status());
+ }
+
+ /* If don't say NTDB_CONVERT, it *may* be converted */
+ ntdb = ntdb_open("run-35-convert.ntdb",
+ (flags[i] & ~NTDB_CONVERT)|MAYBE_NOSYNC,
+ O_RDWR, 0600, &tap_log_attr);
+ if (!ntdb)
+ failtest_exit(exit_status());
+ ok1(ntdb_get_flags(ntdb) == (flags[i]|MAYBE_NOSYNC));
+ ntdb_close(ntdb);
+ }
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "layout.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ ntdb_off_t off;
+ struct ntdb_context *ntdb;
+ struct ntdb_layout *layout;
+ NTDB_DATA key, data;
+ union ntdb_attribute seed;
+
+ /* This seed value previously tickled a layout.c bug. */
+ seed.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed.seed.seed = 0xb1142bc054d035b4ULL;
+ seed.base.next = &tap_log_attr;
+
+ plan_tests(11);
+ key = ntdb_mkdata("Hello", 5);
+ data = ntdb_mkdata("world", 5);
+
+ /* Create a NTDB with three free tables. */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_free(layout, 80, 0);
+ /* Used record prevent coalescing. */
+ ntdb_layout_add_used(layout, key, data, 6);
+ ntdb_layout_add_free(layout, 160, 1);
+ key.dsize--;
+ ntdb_layout_add_used(layout, key, data, 7);
+ ntdb_layout_add_free(layout, 320, 2);
+ key.dsize--;
+ ntdb_layout_add_used(layout, key, data, 8);
+ ntdb_layout_add_free(layout, 40, 0);
+ ntdb = ntdb_layout_get(layout, free, &seed);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0,
+ NTDB_USED_MAGIC);
+ ok1(off == layout->elem[3].base.off);
+ ok1(ntdb->ftable_off == layout->elem[0].base.off);
+
+ off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0,
+ NTDB_USED_MAGIC);
+ ok1(off == layout->elem[5].base.off);
+ ok1(ntdb->ftable_off == layout->elem[1].base.off);
+
+ off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0,
+ NTDB_USED_MAGIC);
+ ok1(off == layout->elem[7].base.off);
+ ok1(ntdb->ftable_off == layout->elem[2].base.off);
+
+ off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0,
+ NTDB_USED_MAGIC);
+ ok1(off == layout->elem[9].base.off);
+ ok1(ntdb->ftable_off == layout->elem[0].base.off);
+
+ /* Now we fail. */
+ off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC);
+ ok1(off == 0);
+
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h"
+#include <unistd.h>
+#include "lock-tracking.h"
+
+static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
+static ssize_t write_check(int fd, const void *buf, size_t count);
+static int ftruncate_check(int fd, off_t length);
+
+#define pwrite pwrite_check
+#define write write_check
+#define fcntl fcntl_with_lockcheck
+#define ftruncate ftruncate_check
+
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include "external-agent.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static struct agent *agent;
+static bool opened;
+static int errors = 0;
+#define TEST_DBNAME "run-56-open-during-transaction.ntdb"
+
+#undef write
+#undef pwrite
+#undef fcntl
+#undef ftruncate
+
+static bool is_same(const char *snapshot, const char *latest, off_t len)
+{
+ unsigned i;
+
+ for (i = 0; i < len; i++) {
+ if (snapshot[i] != latest[i])
+ return false;
+ }
+ return true;
+}
+
+static bool compare_file(int fd, const char *snapshot, off_t snapshot_len)
+{
+ char *contents;
+ bool ret;
+
+ /* over-length read serves as length check. */
+ contents = malloc(snapshot_len+1);
+ ret = pread(fd, contents, snapshot_len+1, 0) == snapshot_len
+ && is_same(snapshot, contents, snapshot_len);
+ free(contents);
+ return ret;
+}
+
+static void check_file_intact(int fd)
+{
+ enum agent_return ret;
+ struct stat st;
+ char *contents;
+
+ fstat(fd, &st);
+ contents = malloc(st.st_size);
+ if (pread(fd, contents, st.st_size, 0) != st.st_size) {
+ diag("Read fail");
+ errors++;
+ return;
+ }
+
+ /* Ask agent to open file. */
+ ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
+
+ /* It's OK to open it, but it must not have changed! */
+ if (!compare_file(fd, contents, st.st_size)) {
+ diag("Agent changed file after opening %s",
+ agent_return_name(ret));
+ errors++;
+ }
+
+ if (ret == SUCCESS) {
+ ret = external_agent_operation(agent, CLOSE, NULL);
+ if (ret != SUCCESS) {
+ diag("Agent failed to close ntdb: %s",
+ agent_return_name(ret));
+ errors++;
+ }
+ } else if (ret != WOULD_HAVE_BLOCKED) {
+ diag("Agent opening file gave %s",
+ agent_return_name(ret));
+ errors++;
+ }
+
+ free(contents);
+}
+
+static void after_unlock(int fd)
+{
+ if (opened)
+ check_file_intact(fd);
+}
+
+static ssize_t pwrite_check(int fd,
+ const void *buf, size_t count, off_t offset)
+{
+ if (opened)
+ check_file_intact(fd);
+
+ return pwrite(fd, buf, count, offset);
+}
+
+static ssize_t write_check(int fd, const void *buf, size_t count)
+{
+ if (opened)
+ check_file_intact(fd);
+
+ return write(fd, buf, count);
+}
+
+static int ftruncate_check(int fd, off_t length)
+{
+ if (opened)
+ check_file_intact(fd);
+
+ return ftruncate(fd, length);
+
+}
+
+int main(int argc, char *argv[])
+{
+ const int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ int i;
+ struct ntdb_context *ntdb;
+ NTDB_DATA key, data;
+
+ plan_tests(sizeof(flags)/sizeof(flags[0]) * 5);
+ agent = prepare_external_agent();
+ if (!agent)
+ err(1, "preparing agent");
+
+ unlock_callback = after_unlock;
+ for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
+ diag("Test with %s and %s\n",
+ (flags[i] & NTDB_CONVERT) ? "CONVERT" : "DEFAULT",
+ (flags[i] & NTDB_NOMMAP) ? "no mmap" : "mmap");
+ unlink(TEST_DBNAME);
+ ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+
+ opened = true;
+ ok1(ntdb_transaction_start(ntdb) == 0);
+ key = ntdb_mkdata("hi", strlen("hi"));
+ data = ntdb_mkdata("world", strlen("world"));
+
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb_transaction_commit(ntdb) == 0);
+ ok(!errors, "We had %u open errors", errors);
+
+ opened = false;
+ ntdb_close(ntdb);
+ }
+
+ return exit_status();
+}
--- /dev/null
+#include "../private.h"
+#include <unistd.h>
+#include "lock-tracking.h"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include <assert.h>
+static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
+static ssize_t write_check(int fd, const void *buf, size_t count);
+static int ftruncate_check(int fd, off_t length);
+
+#define pwrite pwrite_check
+#define write write_check
+#define fcntl fcntl_with_lockcheck
+#define ftruncate ftruncate_check
+
+/* There's a malloc inside transaction_setup_recovery, and valgrind complains
+ * when we longjmp and leak it. */
+#define MAX_ALLOCATIONS 10
+static void *allocated[MAX_ALLOCATIONS];
+static unsigned max_alloc = 0;
+
+static void *malloc_noleak(size_t len)
+{
+ unsigned int i;
+
+ for (i = 0; i < MAX_ALLOCATIONS; i++)
+ if (!allocated[i]) {
+ allocated[i] = malloc(len);
+ if (i > max_alloc) {
+ max_alloc = i;
+ diag("max_alloc: %i", max_alloc);
+ }
+ return allocated[i];
+ }
+ diag("Too many allocations!");
+ abort();
+}
+
+static void *realloc_noleak(void *p, size_t size)
+{
+ unsigned int i;
+
+ for (i = 0; i < MAX_ALLOCATIONS; i++) {
+ if (allocated[i] == p) {
+ if (i > max_alloc) {
+ max_alloc = i;
+ diag("max_alloc: %i", max_alloc);
+ }
+ return allocated[i] = realloc(p, size);
+ }
+ }
+ diag("Untracked realloc!");
+ abort();
+}
+
+static void free_noleak(void *p)
+{
+ unsigned int i;
+
+ /* We don't catch asprintf, so don't complain if we miss one. */
+ for (i = 0; i < MAX_ALLOCATIONS; i++) {
+ if (allocated[i] == p) {
+ allocated[i] = NULL;
+ break;
+ }
+ }
+ free(p);
+}
+
+static void free_all(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < MAX_ALLOCATIONS; i++) {
+ free(allocated[i]);
+ allocated[i] = NULL;
+ }
+}
+
+#define malloc malloc_noleak
+#define free(x) free_noleak(x)
+#define realloc realloc_noleak
+
+#include "ntdb-source.h"
+
+#undef malloc
+#undef free
+#undef realloc
+#undef write
+#undef pwrite
+#undef fcntl
+#undef ftruncate
+
+#include <stdbool.h>
+#include <stdarg.h>
+#include <ccan/err/err.h>
+#include <setjmp.h>
+#include "external-agent.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static bool in_transaction;
+static int target, current;
+static jmp_buf jmpbuf;
+#define TEST_DBNAME "run-57-die-during-transaction.ntdb"
+#define KEY_STRING "helloworld"
+#define DATA_STRING "Helloworld"
+
+static void maybe_die(int fd)
+{
+ if (in_transaction && current++ == target) {
+ longjmp(jmpbuf, 1);
+ }
+}
+
+static ssize_t pwrite_check(int fd,
+ const void *buf, size_t count, off_t offset)
+{
+ ssize_t ret;
+
+ maybe_die(fd);
+
+ ret = pwrite(fd, buf, count, offset);
+ if (ret != count)
+ return ret;
+
+ maybe_die(fd);
+ return ret;
+}
+
+static ssize_t write_check(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ maybe_die(fd);
+
+ ret = write(fd, buf, count);
+ if (ret != count)
+ return ret;
+
+ maybe_die(fd);
+ return ret;
+}
+
+static int ftruncate_check(int fd, off_t length)
+{
+ int ret;
+
+ maybe_die(fd);
+
+ ret = ftruncate(fd, length);
+
+ maybe_die(fd);
+ return ret;
+}
+
+static bool test_death(enum operation op, struct agent *agent,
+ bool pre_create_recovery)
+{
+ struct ntdb_context *ntdb = NULL;
+ NTDB_DATA key, data;
+ enum agent_return ret;
+ int needed_recovery = 0;
+
+ current = target = 0;
+ /* Big long data to force a change. */
+ data = ntdb_mkdata(DATA_STRING, strlen(DATA_STRING));
+
+reset:
+ unlink(TEST_DBNAME);
+ ntdb = ntdb_open(TEST_DBNAME, NTDB_NOMMAP|MAYBE_NOSYNC,
+ O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr);
+ if (!ntdb) {
+ diag("Failed opening NTDB: %s", strerror(errno));
+ return false;
+ }
+
+ if (setjmp(jmpbuf) != 0) {
+ /* We're partway through. Simulate our death. */
+ close(ntdb->file->fd);
+ forget_locking();
+ in_transaction = false;
+
+ ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
+ if (ret == SUCCESS)
+ needed_recovery++;
+ else if (ret != FAILED) {
+ diag("Step %u agent NEEDS_RECOVERY = %s", current,
+ agent_return_name(ret));
+ return false;
+ }
+
+ /* Could be key, or data. */
+ ret = external_agent_operation(agent, op,
+ KEY_STRING "=" KEY_STRING);
+ if (ret != SUCCESS) {
+ ret = external_agent_operation(agent, op,
+ KEY_STRING
+ "=" DATA_STRING);
+ }
+ if (ret != SUCCESS) {
+ diag("Step %u op %s failed = %s", current,
+ operation_name(op),
+ agent_return_name(ret));
+ return false;
+ }
+
+ ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
+ if (ret != FAILED) {
+ diag("Still needs recovery after step %u = %s",
+ current, agent_return_name(ret));
+ return false;
+ }
+
+ ret = external_agent_operation(agent, CHECK, "");
+ if (ret != SUCCESS) {
+ diag("Step %u check failed = %s", current,
+ agent_return_name(ret));
+ return false;
+ }
+
+ ret = external_agent_operation(agent, CLOSE, "");
+ if (ret != SUCCESS) {
+ diag("Step %u close failed = %s", current,
+ agent_return_name(ret));
+ return false;
+ }
+
+ /* Suppress logging as this tries to use closed fd. */
+ suppress_logging = true;
+ suppress_lockcheck = true;
+ ntdb_close(ntdb);
+ suppress_logging = false;
+ suppress_lockcheck = false;
+ target++;
+ current = 0;
+ free_all();
+ goto reset;
+ }
+
+ /* Put key for agent to fetch. */
+ key = ntdb_mkdata(KEY_STRING, strlen(KEY_STRING));
+
+ if (pre_create_recovery) {
+ /* Using a transaction now means we allocate the recovery
+ * area immediately. That makes the later transaction smaller
+ * and thus tickles a bug we had. */
+ if (ntdb_transaction_start(ntdb) != 0)
+ return false;
+ }
+ if (ntdb_store(ntdb, key, key, NTDB_INSERT) != 0)
+ return false;
+ if (pre_create_recovery) {
+ if (ntdb_transaction_commit(ntdb) != 0)
+ return false;
+ }
+
+ /* This is the key we insert in transaction. */
+ key.dsize--;
+
+ ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
+ if (ret != SUCCESS)
+ errx(1, "Agent failed to open: %s", agent_return_name(ret));
+
+ ret = external_agent_operation(agent, FETCH, KEY_STRING "=" KEY_STRING);
+ if (ret != SUCCESS)
+ errx(1, "Agent failed find key: %s", agent_return_name(ret));
+
+ in_transaction = true;
+ if (ntdb_transaction_start(ntdb) != 0)
+ return false;
+
+ if (ntdb_store(ntdb, key, data, NTDB_INSERT) != 0)
+ return false;
+
+ if (ntdb_transaction_commit(ntdb) != 0)
+ return false;
+
+ in_transaction = false;
+
+ /* We made it! */
+ diag("Completed %u runs", current);
+ ntdb_close(ntdb);
+ ret = external_agent_operation(agent, CLOSE, "");
+ if (ret != SUCCESS) {
+ diag("Step %u close failed = %s", current,
+ agent_return_name(ret));
+ return false;
+ }
+
+ ok1(needed_recovery);
+ ok1(locking_errors == 0);
+ ok1(forget_locking() == 0);
+ locking_errors = 0;
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ enum operation ops[] = { FETCH, STORE, TRANSACTION_START };
+ struct agent *agent;
+ int i, j;
+
+ plan_tests(24);
+ unlock_callback = maybe_die;
+
+ external_agent_free = free_noleak;
+ agent = prepare_external_agent();
+ if (!agent)
+ err(1, "preparing agent");
+
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) {
+ diag("Testing %s after death (%s recovery area)",
+ operation_name(ops[i]), j ? "with" : "without");
+ ok1(test_death(ops[i], agent, j));
+ }
+ }
+
+ free_external_agent(agent);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+/* The largest 32-bit value which is still a multiple of NTDB_PGSIZE */
+#define ALMOST_4G ((uint32_t)-NTDB_PGSIZE)
+/* And this pushes it over 32 bits */
+#define A_LITTLE_BIT (NTDB_PGSIZE * 2)
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ if (sizeof(off_t) <= 4) {
+ plan_tests(1);
+ pass("No 64 bit off_t");
+ return exit_status();
+ }
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 16);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ off_t old_size;
+ NTDB_DATA k, d;
+ struct hash_info h;
+ struct ntdb_used_record rec;
+ ntdb_off_t off;
+
+ ntdb = ntdb_open("run-64-bit-ntdb.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ old_size = ntdb->file->map_size;
+
+ /* Add a fake record to chew up the existing free space. */
+ k = ntdb_mkdata("fake", 4);
+ d.dsize = ntdb->file->map_size
+ - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8;
+ d.dptr = malloc(d.dsize);
+ memset(d.dptr, 0, d.dsize);
+ ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
+ ok1(ntdb->file->map_size == old_size);
+ free(d.dptr);
+
+ /* This makes a sparse file */
+ ok1(ftruncate(ntdb->file->fd, ALMOST_4G) == 0);
+ ok1(add_free_record(ntdb, old_size, ALMOST_4G - old_size,
+ NTDB_LOCK_WAIT, false) == NTDB_SUCCESS);
+
+ /* Now add a little record past the 4G barrier. */
+ ok1(ntdb_expand_file(ntdb, A_LITTLE_BIT) == NTDB_SUCCESS);
+ ok1(add_free_record(ntdb, ALMOST_4G, A_LITTLE_BIT,
+ NTDB_LOCK_WAIT, false)
+ == NTDB_SUCCESS);
+
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+
+ /* Test allocation path. */
+ k = ntdb_mkdata("key", 4);
+ d = ntdb_mkdata("data", 5);
+ ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+
+ /* Make sure it put it at end as we expected. */
+ off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL);
+ ok1(off >= ALMOST_4G);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
+
+ ok1(ntdb_fetch(ntdb, k, &d) == 0);
+ ok1(d.dsize == 5);
+ ok1(strcmp((char *)d.dptr, "data") == 0);
+ free(d.dptr);
+
+ ok1(ntdb_delete(ntdb, k) == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+
+ ntdb_close(ntdb);
+ }
+
+ /* We might get messages about mmap failing, so don't test
+ * tap_log_messages */
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
+ void *unused)
+{
+ return 0;
+}
+
+static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
+{
+ return 0;
+}
+
+static uint32_t hash_fn(const void *key, size_t len, uint32_t seed,
+ void *priv)
+{
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ union ntdb_attribute seed_attr;
+ union ntdb_attribute hash_attr;
+ union ntdb_attribute lock_attr;
+
+ seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed_attr.base.next = &hash_attr;
+ seed_attr.seed.seed = 100;
+
+ hash_attr.base.attr = NTDB_ATTRIBUTE_HASH;
+ hash_attr.base.next = &lock_attr;
+ hash_attr.hash.fn = hash_fn;
+ hash_attr.hash.data = &hash_attr;
+
+ lock_attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+ lock_attr.base.next = &tap_log_attr;
+ lock_attr.flock.lock = mylock;
+ lock_attr.flock.unlock = myunlock;
+ lock_attr.flock.data = &lock_attr;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 50);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ union ntdb_attribute attr;
+
+ /* First open with no attributes. */
+ ntdb = ntdb_open("run-90-get-set-attributes.ntdb",
+ flags[i] |MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
+ ok1(ntdb);
+
+ /* Get log on no attributes will fail */
+ attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ ok1(ntdb_get_attribute(ntdb, &attr) == NTDB_ERR_NOEXIST);
+ /* These always work. */
+ attr.base.attr = NTDB_ATTRIBUTE_HASH;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH);
+ ok1(attr.hash.fn == ntdb_jenkins_hash);
+ attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
+ ok1(attr.flock.lock == ntdb_fcntl_lock);
+ ok1(attr.flock.unlock == ntdb_fcntl_unlock);
+ attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED);
+ /* This is possible, just astronomically unlikely. */
+ ok1(attr.seed.seed != 0);
+
+ /* Unset attributes. */
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+
+ /* Set them. */
+ ok1(ntdb_set_attribute(ntdb, &tap_log_attr) == 0);
+ ok1(ntdb_set_attribute(ntdb, &lock_attr) == 0);
+ /* These should fail. */
+ ok1(ntdb_set_attribute(ntdb, &seed_attr) == NTDB_ERR_EINVAL);
+ ok1(tap_log_messages == 1);
+ ok1(ntdb_set_attribute(ntdb, &hash_attr) == NTDB_ERR_EINVAL);
+ ok1(tap_log_messages == 2);
+ tap_log_messages = 0;
+
+ /* Getting them should work as expected. */
+ attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG);
+ ok1(attr.log.fn == tap_log_attr.log.fn);
+ ok1(attr.log.data == tap_log_attr.log.data);
+
+ attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
+ ok1(attr.flock.lock == mylock);
+ ok1(attr.flock.unlock == myunlock);
+ ok1(attr.flock.data == &lock_attr);
+
+ /* Unset them again. */
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+ ok1(tap_log_messages == 0);
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
+ ok1(tap_log_messages == 0);
+
+ ntdb_close(ntdb);
+ ok1(tap_log_messages == 0);
+
+ /* Now open with all attributes. */
+ ntdb = ntdb_open("run-90-get-set-attributes.ntdb",
+ flags[i] | MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600,
+ &seed_attr);
+
+ ok1(ntdb);
+
+ /* Get will succeed */
+ attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_LOG);
+ ok1(attr.log.fn == tap_log_attr.log.fn);
+ ok1(attr.log.data == tap_log_attr.log.data);
+
+ attr.base.attr = NTDB_ATTRIBUTE_HASH;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_HASH);
+ ok1(attr.hash.fn == hash_fn);
+ ok1(attr.hash.data == &hash_attr);
+
+ attr.base.attr = NTDB_ATTRIBUTE_FLOCK;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_FLOCK);
+ ok1(attr.flock.lock == mylock);
+ ok1(attr.flock.unlock == myunlock);
+ ok1(attr.flock.data == &lock_attr);
+
+ attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ ok1(ntdb_get_attribute(ntdb, &attr) == 0);
+ ok1(attr.base.attr == NTDB_ATTRIBUTE_SEED);
+ ok1(attr.seed.seed == seed_attr.seed.seed);
+
+ /* Unset attributes. */
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_HASH);
+ ok1(tap_log_messages == 1);
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_SEED);
+ ok1(tap_log_messages == 2);
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_FLOCK);
+ ntdb_unset_attribute(ntdb, NTDB_ATTRIBUTE_LOG);
+ ok1(tap_log_messages == 2);
+ tap_log_messages = 0;
+
+ ntdb_close(ntdb);
+
+ }
+ return exit_status();
+}
--- /dev/null
+#include <ccan/failtest/failtest_override.h>
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "layout.h"
+#include "failtest_helper.h"
+#include <stdarg.h>
+#include "helprun-external-agent.h"
+
+static size_t len_of(bool breaks_check, bool breaks_write, bool breaks_open)
+{
+ size_t len = 0;
+ if (breaks_check)
+ len += 8;
+ if (breaks_write)
+ len += 16;
+ if (breaks_open)
+ len += 32;
+ return len;
+}
+
+/* Creates a NTDB with various capabilities. */
+static void create_ntdb(const char *name,
+ unsigned int cap,
+ bool breaks_check,
+ bool breaks_write,
+ bool breaks_open, ...)
+{
+ NTDB_DATA key, data;
+ va_list ap;
+ struct ntdb_layout *layout;
+ struct ntdb_context *ntdb;
+ int fd, clen;
+ union ntdb_attribute seed_attr;
+
+ /* Force a seed which doesn't allow records to clash! */
+ seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed_attr.base.next = &tap_log_attr;
+ seed_attr.seed.seed = 0;
+
+ key = ntdb_mkdata("Hello", 5);
+ data = ntdb_mkdata("world", 5);
+
+ /* Create a NTDB with some data, and some capabilities */
+ layout = new_ntdb_layout();
+ ntdb_layout_add_freetable(layout);
+ ntdb_layout_add_used(layout, key, data, 6);
+ clen = len_of(breaks_check, breaks_write, breaks_open);
+ ntdb_layout_add_free(layout, 15496 - clen, 0);
+ ntdb_layout_add_capability(layout, cap,
+ breaks_write, breaks_check, breaks_open,
+ clen);
+
+ va_start(ap, breaks_open);
+ while ((cap = va_arg(ap, int)) != 0) {
+ breaks_check = va_arg(ap, int);
+ breaks_write = va_arg(ap, int);
+ breaks_open = va_arg(ap, int);
+
+ key.dsize--;
+ ntdb_layout_add_used(layout, key, data, 11 - key.dsize);
+ clen = len_of(breaks_check, breaks_write, breaks_open);
+ ntdb_layout_add_free(layout, 16304 - clen, 0);
+ ntdb_layout_add_capability(layout, cap,
+ breaks_write, breaks_check,
+ breaks_open, clen);
+ }
+ va_end(ap);
+
+ /* We open-code this, because we need to use the failtest write. */
+ ntdb = ntdb_layout_get(layout, failtest_free, &seed_attr);
+
+ fd = open(name, O_RDWR|O_TRUNC|O_CREAT, 0600);
+ if (fd < 0)
+ err(1, "opening %s for writing", name);
+ if (write(fd, ntdb->file->map_ptr, ntdb->file->map_size)
+ != ntdb->file->map_size)
+ err(1, "writing %s", name);
+ close(fd);
+ ntdb_close(ntdb);
+ ntdb_layout_free(layout);
+}
+
+/* Note all the "goto out" early exits: they're to shorten failtest time. */
+int main(int argc, char *argv[])
+{
+ struct ntdb_context *ntdb;
+ char *summary;
+
+ failtest_init(argc, argv);
+ failtest_hook = block_repeat_failures;
+ failtest_exit_check = exit_check_log;
+ plan_tests(60);
+
+ failtest_suppress = true;
+ /* Capability says you can ignore it? */
+ create_ntdb("run-capabilities.ntdb", 1, false, false, false, 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ if (!ok1(ntdb))
+ goto out;
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ ok1(tap_log_messages == 0);
+ ntdb_close(ntdb);
+
+ /* Two capabilitues say you can ignore them? */
+ create_ntdb("run-capabilities.ntdb",
+ 1, false, false, false,
+ 2, false, false, false, 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ if (!ok1(ntdb))
+ goto out;
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
+ ok1(strstr(summary, "Capability 1\n"));
+ free(summary);
+ ntdb_close(ntdb);
+
+ /* Capability says you can't check. */
+ create_ntdb("run-capabilities.ntdb",
+ 1, false, false, false,
+ 2, true, false, false, 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ if (!ok1(ntdb))
+ goto out;
+ ok1(tap_log_messages == 0);
+ ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ /* We expect a warning! */
+ ok1(tap_log_messages == 1);
+ ok1(strstr(log_last, "capabilit"));
+ ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
+ ok1(strstr(summary, "Capability 1\n"));
+ ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
+ free(summary);
+ ntdb_close(ntdb);
+
+ /* Capability says you can't write. */
+ create_ntdb("run-capabilities.ntdb",
+ 1, false, false, false,
+ 2, false, true, false, 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ /* We expect a message. */
+ ok1(!ntdb);
+ if (!ok1(tap_log_messages == 2))
+ goto out;
+ if (!ok1(strstr(log_last, "unknown")))
+ goto out;
+ ok1(strstr(log_last, "write"));
+
+ /* We can open it read-only though! */
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ if (!ok1(ntdb))
+ goto out;
+ ok1(tap_log_messages == 2);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ ok1(tap_log_messages == 2);
+ ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
+ ok1(strstr(summary, "Capability 1\n"));
+ ok1(strstr(summary, "Capability 2 (read-only)\n"));
+ free(summary);
+ ntdb_close(ntdb);
+
+ /* Capability says you can't open. */
+ create_ntdb("run-capabilities.ntdb",
+ 1, false, false, false,
+ 2, false, false, true, 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ /* We expect a message. */
+ ok1(!ntdb);
+ if (!ok1(tap_log_messages == 3))
+ goto out;
+ if (!ok1(strstr(log_last, "unknown")))
+ goto out;
+
+ /* Combine capabilities correctly. */
+ create_ntdb("run-capabilities.ntdb",
+ 1, false, false, false,
+ 2, true, false, false,
+ 3, false, true, false, 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ /* We expect a message. */
+ ok1(!ntdb);
+ if (!ok1(tap_log_messages == 4))
+ goto out;
+ if (!ok1(strstr(log_last, "unknown")))
+ goto out;
+ ok1(strstr(log_last, "write"));
+
+ /* We can open it read-only though! */
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ if (!ok1(ntdb))
+ goto out;
+ ok1(tap_log_messages == 4);
+ ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ /* We expect a warning! */
+ ok1(tap_log_messages == 5);
+ ok1(strstr(log_last, "unknown"));
+ ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
+ ok1(strstr(summary, "Capability 1\n"));
+ ok1(strstr(summary, "Capability 2 (uncheckable)\n"));
+ ok1(strstr(summary, "Capability 3 (read-only)\n"));
+ free(summary);
+ ntdb_close(ntdb);
+
+ /* Two capability flags in one. */
+ create_ntdb("run-capabilities.ntdb",
+ 1, false, false, false,
+ 2, true, true, false,
+ 0);
+
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDWR, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ /* We expect a message. */
+ ok1(!ntdb);
+ if (!ok1(tap_log_messages == 6))
+ goto out;
+ if (!ok1(strstr(log_last, "unknown")))
+ goto out;
+ ok1(strstr(log_last, "write"));
+
+ /* We can open it read-only though! */
+ failtest_suppress = false;
+ ntdb = ntdb_open("run-capabilities.ntdb", MAYBE_NOSYNC, O_RDONLY, 0,
+ &tap_log_attr);
+ failtest_suppress = true;
+ if (!ok1(ntdb))
+ goto out;
+ ok1(tap_log_messages == 6);
+ ok1(ntdb_get_flags(ntdb) & NTDB_CANT_CHECK);
+ ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
+ /* We expect a warning! */
+ ok1(tap_log_messages == 7);
+ ok1(strstr(log_last, "unknown"));
+ ok1(ntdb_summary(ntdb, 0, &summary) == NTDB_SUCCESS);
+ ok1(strstr(summary, "Capability 1\n"));
+ ok1(strstr(summary, "Capability 2 (uncheckable,read-only)\n"));
+ free(summary);
+ ntdb_close(ntdb);
+
+out:
+ failtest_exit(exit_status());
+
+ /*
+ * We will never reach this but the compiler complains if we do not
+ * return in this function.
+ */
+ return EFAULT;
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = ntdb_mkdata("key", 3);
+ NTDB_DATA data = ntdb_mkdata("data", 4);
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
+
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ size_t size;
+ NTDB_DATA k, d;
+ ntdb = ntdb_open("run-expand-in-transaction.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ size = ntdb->file->map_size;
+ /* Add a fake record to chew up the existing free space. */
+ k = ntdb_mkdata("fake", 4);
+ d.dsize = ntdb->file->map_size
+ - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8;
+ d.dptr = malloc(d.dsize);
+ memset(d.dptr, 0, d.dsize);
+ ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
+ ok1(ntdb->file->map_size == size);
+ free(d.dptr);
+ ok1(ntdb_transaction_start(ntdb) == 0);
+ ok1(ntdb_store(ntdb, key, data, NTDB_INSERT) == 0);
+ ok1(ntdb->file->map_size > size);
+ ok1(ntdb_transaction_commit(ntdb) == 0);
+ ok1(ntdb->file->map_size > size);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j;
+ struct ntdb_context *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ NTDB_DATA key = { (unsigned char *)&j, sizeof(j) };
+ NTDB_DATA data = { (unsigned char *)&j, sizeof(j) };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ uint64_t features;
+ ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* Put some stuff in there. */
+ for (j = 0; j < 100; j++) {
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ fail("Storing in ntdb");
+ }
+
+ /* Mess with features fields in hdr. */
+ features = (~NTDB_FEATURE_MASK ^ 1);
+ ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
+ features_used),
+ &features, sizeof(features)) == 0);
+ ok1(ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
+ features_offered),
+ &features, sizeof(features)) == 0);
+ ntdb_close(ntdb);
+
+ ntdb = ntdb_open("run-features.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR, 0, &tap_log_attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ /* Should not have changed features offered. */
+ ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header,
+ features_offered),
+ &features, sizeof(features)) == 0);
+ ok1(features == (~NTDB_FEATURE_MASK ^ 1));
+
+ /* Should have cleared unknown bits in features_used. */
+ ok1(ntdb_read_convert(ntdb, offsetof(struct ntdb_header,
+ features_used),
+ &features, sizeof(features)) == 0);
+ ok1(features == (1 & NTDB_FEATURE_MASK));
+
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "../private.h"
+#include <unistd.h>
+#include "lock-tracking.h"
+
+#define fcntl fcntl_with_lockcheck
+#include "ntdb-source.h"
+
+#include "tap-interface.h"
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include "external-agent.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+#define TEST_DBNAME "run-lockall.ntdb"
+#define KEY_STR "key"
+
+#undef fcntl
+
+int main(int argc, char *argv[])
+{
+ struct agent *agent;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+ int i;
+
+ plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1);
+ agent = prepare_external_agent();
+ if (!agent)
+ err(1, "preparing agent");
+
+ for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
+ enum agent_return ret;
+ struct ntdb_context *ntdb;
+
+ ntdb = ntdb_open(TEST_DBNAME, flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ok1(ntdb);
+
+ ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
+ ok1(ret == SUCCESS);
+
+ ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+ ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
+ == WOULD_HAVE_BLOCKED);
+ ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
+ == WOULD_HAVE_BLOCKED);
+ /* Test nesting. */
+ ok1(ntdb_lockall(ntdb) == NTDB_SUCCESS);
+ ntdb_unlockall(ntdb);
+ ntdb_unlockall(ntdb);
+
+ ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
+ == SUCCESS);
+
+ ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
+ ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
+ == WOULD_HAVE_BLOCKED);
+ ok1(external_agent_operation(agent, FETCH, KEY_STR "=" KEY_STR)
+ == SUCCESS);
+ ok1(ntdb_lockall_read(ntdb) == NTDB_SUCCESS);
+ ntdb_unlockall_read(ntdb);
+ ntdb_unlockall_read(ntdb);
+
+ ok1(external_agent_operation(agent, STORE, KEY_STR "=" KEY_STR)
+ == SUCCESS);
+ ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS);
+ ntdb_close(ntdb);
+ }
+
+ free_external_agent(agent);
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+/* We had a bug where we marked the ntdb read-only for a ntdb_traverse_read.
+ * If we then expanded the ntdb, we would remap read-only, and later SEGV. */
+#include "tap-interface.h"
+#include "external-agent.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static bool file_larger(int fd, ntdb_len_t size)
+{
+ struct stat st;
+
+ fstat(fd, &st);
+ return st.st_size != size;
+}
+
+static unsigned add_records_to_grow(struct agent *agent, int fd, ntdb_len_t size)
+{
+ unsigned int i;
+
+ for (i = 0; !file_larger(fd, size); i++) {
+ char data[50];
+ sprintf(data, "%i=%i", i, i);
+ if (external_agent_operation(agent, STORE, data) != SUCCESS)
+ return 0;
+ }
+ diag("Added %u records to grow file", i);
+ return i;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct agent *agent;
+ struct ntdb_context *ntdb;
+ NTDB_DATA d = ntdb_mkdata("hello", 5);
+ const char filename[] = "run-remap-in-read_traverse.ntdb";
+
+ plan_tests(4);
+
+ agent = prepare_external_agent();
+
+ ntdb = ntdb_open(filename, MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+
+ ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS);
+ i = add_records_to_grow(agent, ntdb->file->fd, ntdb->file->map_size);
+
+ /* Do a traverse. */
+ ok1(ntdb_traverse(ntdb, NULL, NULL) == i);
+
+ /* Now store something! */
+ ok1(ntdb_store(ntdb, d, d, NTDB_INSERT) == 0);
+ ok1(tap_log_messages == 0);
+ ntdb_close(ntdb);
+ free_external_agent(agent);
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static int log_count = 0;
+
+/* Normally we get a log when setting random seed. */
+static void my_log_fn(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message, void *priv)
+{
+ log_count++;
+}
+
+static union ntdb_attribute log_attr = {
+ .log = { .base = { .attr = NTDB_ATTRIBUTE_LOG },
+ .fn = my_log_fn }
+};
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct ntdb_context *ntdb;
+ union ntdb_attribute attr;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+
+ attr.seed.base.attr = NTDB_ATTRIBUTE_SEED;
+ attr.seed.base.next = &log_attr;
+ attr.seed.seed = 42;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ struct ntdb_header hdr;
+ int fd;
+ ntdb = ntdb_open("run-seed.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &attr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(ntdb->hash_seed == 42);
+ ok1(log_count == 0);
+ ntdb_close(ntdb);
+
+ if (flags[i] & NTDB_INTERNAL)
+ continue;
+
+ fd = open("run-seed.ntdb", O_RDONLY);
+ ok1(fd >= 0);
+ ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
+ if (flags[i] & NTDB_CONVERT)
+ ok1(bswap_64(hdr.hash_seed) == 42);
+ else
+ ok1(hdr.hash_seed == 42);
+ close(fd);
+ }
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "helprun-external-agent.h"
+
+int main(int argc, char *argv[])
+{
+ enum NTDB_ERROR e;
+ plan_tests(NTDB_ERR_RDONLY*-1 + 2);
+
+ for (e = NTDB_SUCCESS; e >= NTDB_ERR_RDONLY; e--) {
+ switch (e) {
+ case NTDB_SUCCESS:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Success"));
+ break;
+ case NTDB_ERR_IO:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "IO Error"));
+ break;
+ case NTDB_ERR_LOCK:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Locking error"));
+ break;
+ case NTDB_ERR_OOM:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Out of memory"));
+ break;
+ case NTDB_ERR_EXISTS:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Record exists"));
+ break;
+ case NTDB_ERR_EINVAL:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Invalid parameter"));
+ break;
+ case NTDB_ERR_NOEXIST:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Record does not exist"));
+ break;
+ case NTDB_ERR_RDONLY:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "write not permitted"));
+ break;
+ case NTDB_ERR_CORRUPT:
+ ok1(!strcmp(ntdb_errorstr(e),
+ "Corrupt database"));
+ break;
+ }
+ }
+ ok1(!strcmp(ntdb_errorstr(e), "Invalid error code"));
+
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+static int drop_count(struct ntdb_context *ntdb, unsigned int *count)
+{
+ if (--(*count) == 0)
+ return 1;
+ return 0;
+}
+
+static int set_found(struct ntdb_context *ntdb, bool found[3])
+{
+ unsigned int idx;
+
+ if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach0.ntdb") == 0)
+ idx = 0;
+ else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach1.ntdb") == 0)
+ idx = 1;
+ else if (strcmp(ntdb_name(ntdb), "run-ntdb_foreach2.ntdb") == 0)
+ idx = 2;
+ else
+ abort();
+
+ if (found[idx])
+ abort();
+ found[idx] = true;
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, count;
+ bool found[3];
+ struct ntdb_context *ntdb0, *ntdb1, *ntdb;
+ int flags[] = { NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 8);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb0 = ntdb_open("run-ntdb_foreach0.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+ ntdb = ntdb_open("run-ntdb_foreach2.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+
+ memset(found, 0, sizeof(found));
+ ntdb_foreach(set_found, found);
+ ok1(found[0] && found[1] && found[2]);
+
+ /* Test premature iteration termination */
+ count = 1;
+ ntdb_foreach(drop_count, &count);
+ ok1(count == 0);
+
+ ntdb_close(ntdb1);
+ memset(found, 0, sizeof(found));
+ ntdb_foreach(set_found, found);
+ ok1(found[0] && !found[1] && found[2]);
+
+ ntdb_close(ntdb);
+ memset(found, 0, sizeof(found));
+ ntdb_foreach(set_found, found);
+ ok1(found[0] && !found[1] && !found[2]);
+
+ ntdb1 = ntdb_open("run-ntdb_foreach1.ntdb",
+ flags[i]|MAYBE_NOSYNC,
+ O_RDWR, 0600, &tap_log_attr);
+ memset(found, 0, sizeof(found));
+ ntdb_foreach(set_found, found);
+ ok1(found[0] && found[1] && !found[2]);
+
+ ntdb_close(ntdb0);
+ memset(found, 0, sizeof(found));
+ ntdb_foreach(set_found, found);
+ ok1(!found[0] && found[1] && !found[2]);
+
+ ntdb_close(ntdb1);
+ memset(found, 0, sizeof(found));
+ ntdb_foreach(set_found, found);
+ ok1(!found[0] && !found[1] && !found[2]);
+ ok1(tap_log_messages == 0);
+ }
+
+ return exit_status();
+}
--- /dev/null
+#include "ntdb-source.h"
+#include "tap-interface.h"
+#include "logging.h"
+#include "helprun-external-agent.h"
+
+#define NUM_RECORDS 1000
+
+/* We use the same seed which we saw a failure on. */
+static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
+{
+ return hash64_stable((const unsigned char *)key, len,
+ *(uint64_t *)p);
+}
+
+static bool store_records(struct ntdb_context *ntdb)
+{
+ int i;
+ NTDB_DATA key = { (unsigned char *)&i, sizeof(i) };
+ NTDB_DATA data = { (unsigned char *)&i, sizeof(i) };
+
+ for (i = 0; i < NUM_RECORDS; i++)
+ if (ntdb_store(ntdb, key, data, NTDB_REPLACE) != 0)
+ return false;
+ return true;
+}
+
+struct trav_data {
+ unsigned int calls, call_limit;
+ int low, high;
+ bool mismatch;
+ bool delete;
+ enum NTDB_ERROR delete_error;
+};
+
+static int trav(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
+ struct trav_data *td)
+{
+ int val;
+
+ td->calls++;
+ if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val)
+ || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
+ td->mismatch = true;
+ return -1;
+ }
+ memcpy(&val, dbuf.dptr, dbuf.dsize);
+ if (val < td->low)
+ td->low = val;
+ if (val > td->high)
+ td->high = val;
+
+ if (td->delete) {
+ td->delete_error = ntdb_delete(ntdb, key);
+ if (td->delete_error != NTDB_SUCCESS) {
+ return -1;
+ }
+ }
+
+ if (td->calls == td->call_limit)
+ return 1;
+ return 0;
+}
+
+struct trav_grow_data {
+ unsigned int calls;
+ unsigned int num_large;
+ bool mismatch;
+ enum NTDB_ERROR error;
+};
+
+static int trav_grow(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
+ struct trav_grow_data *tgd)
+{
+ int val;
+ unsigned char buffer[128] = { 0 };
+
+ tgd->calls++;
+ if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val)
+ || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
+ tgd->mismatch = true;
+ return -1;
+ }
+
+ if (dbuf.dsize > sizeof(val))
+ /* We must have seen this before! */
+ tgd->num_large++;
+
+ /* Make a big difference to the database. */
+ dbuf.dptr = buffer;
+ dbuf.dsize = sizeof(buffer);
+ tgd->error = ntdb_append(ntdb, key, dbuf);
+ if (tgd->error != NTDB_SUCCESS) {
+ return -1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ int num;
+ struct trav_data td;
+ struct trav_grow_data tgd;
+ struct ntdb_context *ntdb;
+ uint64_t seed = 16014841315512641303ULL;
+ int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
+ NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
+ NTDB_NOMMAP|NTDB_CONVERT };
+ union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
+ .fn = fixedhash,
+ .data = &seed } };
+
+ hattr.base.next = &tap_log_attr;
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ ntdb = ntdb_open("run-traverse.ntdb", flags[i]|MAYBE_NOSYNC,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
+ ok1(ntdb);
+ if (!ntdb)
+ continue;
+
+ ok1(ntdb_traverse(ntdb, NULL, NULL) == 0);
+
+ ok1(store_records(ntdb));
+ num = ntdb_traverse(ntdb, NULL, NULL);
+ ok1(num == NUM_RECORDS);
+
+ /* Full traverse. */
+ td.calls = 0;
+ td.call_limit = UINT_MAX;
+ td.low = INT_MAX;
+ td.high = INT_MIN;
+ td.mismatch = false;
+ td.delete = false;
+
+ num = ntdb_traverse(ntdb, trav, &td);
+ ok1(num == NUM_RECORDS);
+ ok1(!td.mismatch);
+ ok1(td.calls == NUM_RECORDS);
+ ok1(td.low == 0);
+ ok1(td.high == NUM_RECORDS-1);
+
+ /* Short traverse. */
+ td.calls = 0;
+ td.call_limit = NUM_RECORDS / 2;
+ td.low = INT_MAX;
+ td.high = INT_MIN;
+ td.mismatch = false;
+ td.delete = false;
+
+ num = ntdb_traverse(ntdb, trav, &td);
+ ok1(num == NUM_RECORDS / 2);
+ ok1(!td.mismatch);
+ ok1(td.calls == NUM_RECORDS / 2);
+ ok1(td.low <= NUM_RECORDS / 2);
+ ok1(td.high > NUM_RECORDS / 2);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(tap_log_messages == 0);
+
+ /* Deleting traverse (delete everything). */
+ td.calls = 0;
+ td.call_limit = UINT_MAX;
+ td.low = INT_MAX;
+ td.high = INT_MIN;
+ td.mismatch = false;
+ td.delete = true;
+ td.delete_error = NTDB_SUCCESS;
+ num = ntdb_traverse(ntdb, trav, &td);
+ ok1(num == NUM_RECORDS);
+ ok1(td.delete_error == NTDB_SUCCESS);
+ ok1(!td.mismatch);
+ ok1(td.calls == NUM_RECORDS);
+ ok1(td.low == 0);
+ ok1(td.high == NUM_RECORDS - 1);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Now it's empty! */
+ ok1(ntdb_traverse(ntdb, NULL, NULL) == 0);
+
+ /* Re-add. */
+ ok1(store_records(ntdb));
+ ok1(ntdb_traverse(ntdb, NULL, NULL) == NUM_RECORDS);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+
+ /* Grow. This will cause us to be reshuffled. */
+ tgd.calls = 0;
+ tgd.num_large = 0;
+ tgd.mismatch = false;
+ tgd.error = NTDB_SUCCESS;
+ ok1(ntdb_traverse(ntdb, trav_grow, &tgd) > 1);
+ ok1(tgd.error == 0);
+ ok1(!tgd.mismatch);
+ ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ ok1(tgd.num_large < tgd.calls);
+ diag("growing db: %u calls, %u repeats",
+ tgd.calls, tgd.num_large);
+
+ ntdb_close(ntdb);
+ }
+
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
--- /dev/null
+#include "tap-interface.h"
+
+unsigned tap_ok_count, tap_ok_target = -1U;
--- /dev/null
+/*
+ Unix SMB/CIFS implementation.
+ Simplistic implementation of tap interface.
+
+ Copyright (C) Rusty Russell 2012
+
+ ** NOTE! The following LGPL license applies to the talloc
+ ** library. This does NOT imply that all of Samba is released
+ ** under the LGPL
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include <stdio.h>
+#include <ccan/err/err.h>
+#include "no-fsync.h"
+
+#ifndef __location__
+#define __TAP_STRING_LINE1__(s) #s
+#define __TAP_STRING_LINE2__(s) __TAP_STRING_LINE1__(s)
+#define __TAP_STRING_LINE3__ __TAP_STRING_LINE2__(__LINE__)
+#define __location__ __FILE__ ":" __TAP_STRING_LINE3__
+#endif
+
+extern unsigned tap_ok_count, tap_ok_target;
+#define plan_tests(num) do { tap_ok_target = (num); } while(0)
+#define ok(e, ...) ((e) ? (printf("."), tap_ok_count++, true) : (warnx(__VA_ARGS__), false))
+#define ok1(e) ok((e), "%s:%s", __location__, #e)
+#define pass(...) (printf("."), tap_ok_count++)
+#define fail(...) warnx(__VA_ARGS__)
+#define diag(...) do { printf(__VA_ARGS__); printf("\n"); } while(0)
+#define exit_status() (tap_ok_count == tap_ok_target ? 0 : 1)
--- /dev/null
+OBJS:=../../ntdb.o ../../hash.o ../../tally.o
+CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg
+LDFLAGS:=-L../../..
+
+default: ntdbtorture ntdbtool ntdbdump ntdbrestore mkntdb speed growtdb-bench
+
+ntdbdump: ntdbdump.c $(OBJS)
+ntdbrestore: ntdbrestore.c $(OBJS)
+ntdbtorture: ntdbtorture.c $(OBJS)
+ntdbtool: ntdbtool.c $(OBJS)
+mkntdb: mkntdb.c $(OBJS)
+speed: speed.c $(OBJS)
+growtdb-bench: growtdb-bench.c $(OBJS)
+
+clean:
+ rm -f ntdbtorture ntdbdump ntdbrestore ntdbtool mkntdb speed growtdb-bench
--- /dev/null
+#include "ntdb.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ccan/err/err.h>
+
+static void logfn(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+{
+ fprintf(stderr, "ntdb:%s:%s:%s\n",
+ ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j, users, groups;
+ NTDB_DATA idxkey, idxdata;
+ NTDB_DATA k, d, gk;
+ char cmd[100];
+ struct ntdb_context *ntdb;
+ enum NTDB_ERROR ecode;
+ union ntdb_attribute log;
+
+ if (argc != 3) {
+ printf("Usage: growtdb-bench <users> <groups>\n");
+ exit(1);
+ }
+ users = atoi(argv[1]);
+ groups = atoi(argv[2]);
+
+ sprintf(cmd, "cat /proc/%i/statm", getpid());
+
+ log.base.attr = NTDB_ATTRIBUTE_LOG;
+ log.base.next = NULL;
+ log.log.fn = logfn;
+
+ ntdb = ntdb_open("/tmp/growtdb.ntdb", NTDB_DEFAULT,
+ O_RDWR|O_CREAT|O_TRUNC, 0600, &log);
+
+ idxkey.dptr = (unsigned char *)"User index";
+ idxkey.dsize = strlen("User index");
+ idxdata.dsize = 51;
+ idxdata.dptr = calloc(idxdata.dsize, 1);
+ if (idxdata.dptr == NULL) {
+ fprintf(stderr, "Unable to allocate memory for idxdata.dptr\n");
+ return -1;
+ }
+
+ /* Create users. */
+ k.dsize = 48;
+ k.dptr = calloc(k.dsize, 1);
+ if (k.dptr == NULL) {
+ fprintf(stderr, "Unable to allocate memory for k.dptr\n");
+ return -1;
+ }
+ d.dsize = 64;
+ d.dptr = calloc(d.dsize, 1);
+ if (d.dptr == NULL) {
+ fprintf(stderr, "Unable to allocate memory for d.dptr\n");
+ return -1;
+ }
+
+ ntdb_transaction_start(ntdb);
+ for (i = 0; i < users; i++) {
+ memcpy(k.dptr, &i, sizeof(i));
+ ecode = ntdb_store(ntdb, k, d, NTDB_INSERT);
+ if (ecode != NTDB_SUCCESS)
+ errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode));
+
+ /* This simulates a growing index record. */
+ ecode = ntdb_append(ntdb, idxkey, idxdata);
+ if (ecode != NTDB_SUCCESS)
+ errx(1, "ntdb append failed: %s", ntdb_errorstr(ecode));
+ }
+ if ((ecode = ntdb_transaction_commit(ntdb)) != 0)
+ errx(1, "ntdb commit1 failed: %s", ntdb_errorstr(ecode));
+
+ if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0)
+ errx(1, "ntdb_check failed after initial insert!");
+
+ system(cmd);
+
+ /* Now put them all in groups: add 32 bytes to each record for
+ * a group. */
+ gk.dsize = 48;
+ gk.dptr = calloc(k.dsize, 1);
+ if (gk.dptr == NULL) {
+ fprintf(stderr, "Unable to allocate memory for gk.dptr\n");
+ return -1;
+ }
+ gk.dptr[gk.dsize-1] = 1;
+
+ d.dsize = 32;
+ for (i = 0; i < groups; i++) {
+ ntdb_transaction_start(ntdb);
+ /* Create the "group". */
+ memcpy(gk.dptr, &i, sizeof(i));
+ ecode = ntdb_store(ntdb, gk, d, NTDB_INSERT);
+ if (ecode != NTDB_SUCCESS)
+ errx(1, "ntdb insert failed: %s", ntdb_errorstr(ecode));
+
+ /* Now populate it. */
+ for (j = 0; j < users; j++) {
+ /* Append to the user. */
+ memcpy(k.dptr, &j, sizeof(j));
+ if ((ecode = ntdb_append(ntdb, k, d)) != 0)
+ errx(1, "ntdb append failed: %s",
+ ntdb_errorstr(ecode));
+
+ /* Append to the group. */
+ if ((ecode = ntdb_append(ntdb, gk, d)) != 0)
+ errx(1, "ntdb append failed: %s",
+ ntdb_errorstr(ecode));
+ }
+ if ((ecode = ntdb_transaction_commit(ntdb)) != 0)
+ errx(1, "ntdb commit2 failed: %s", ntdb_errorstr(ecode));
+ if ((ecode = ntdb_check(ntdb, NULL, NULL)) != 0)
+ errx(1, "ntdb_check failed after iteration %i!", i);
+ system(cmd);
+ }
+
+ return 0;
+}
--- /dev/null
+#include "ntdb.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <ccan/err/err.h>
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, num_recs;
+ struct ntdb_context *ntdb;
+
+ if (argc != 3 || (num_recs = atoi(argv[2])) == 0)
+ errx(1, "Usage: mktdb <tdbfile> <numrecords>");
+
+ ntdb = ntdb_open(argv[1], NTDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL);
+ if (!ntdb)
+ err(1, "Opening %s", argv[1]);
+
+ for (i = 0; i < num_recs; i++) {
+ NTDB_DATA d;
+
+ d.dptr = (void *)&i;
+ d.dsize = sizeof(i);
+ if (ntdb_store(ntdb, d, d, NTDB_INSERT) != 0)
+ err(1, "Failed to store record %i", i);
+ }
+ printf("Done\n");
+ return 0;
+}
--- /dev/null
+/*
+ Unix SMB/CIFS implementation.
+ low level ntdb backup and restore utility
+ Copyright (C) Andrew Tridgell 2002
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+
+ This program is meant for backup/restore of ntdb databases. Typical usage would be:
+ tdbbackup *.ntdb
+ when Samba shuts down cleanly, which will make a backup of all the local databases
+ to *.bak files. Then on Samba startup you would use:
+ tdbbackup -v *.ntdb
+ and this will check the databases for corruption and if corruption is detected then
+ the backup will be restored.
+
+ You may also like to do a backup on a regular basis while Samba is
+ running, perhaps using cron.
+
+ The reason this program is needed is to cope with power failures
+ while Samba is running. A power failure could lead to database
+ corruption and Samba will then not start correctly.
+
+ Note that many of the databases in Samba are transient and thus
+ don't need to be backed up, so you can optimise the above a little
+ by only running the backup on the critical databases.
+
+ */
+
+#include "config.h"
+#include "ntdb.h"
+#include "private.h"
+
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+
+static int failed;
+
+static void ntdb_log(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+{
+ fprintf(stderr, "%s:%s\n", ntdb_errorstr(ecode), message);
+}
+
+static char *add_suffix(const char *name, const char *suffix)
+{
+ char *ret;
+ int len = strlen(name) + strlen(suffix) + 1;
+ ret = (char *)malloc(len);
+ if (!ret) {
+ fprintf(stderr,"Out of memory!\n");
+ exit(1);
+ }
+ snprintf(ret, len, "%s%s", name, suffix);
+ return ret;
+}
+
+static int copy_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ struct ntdb_context *ntdb_new = (struct ntdb_context *)state;
+ enum NTDB_ERROR err;
+
+ err = ntdb_store(ntdb_new, key, dbuf, NTDB_INSERT);
+ if (err) {
+ fprintf(stderr,"Failed to insert into %s: %s\n",
+ ntdb_name(ntdb_new), ntdb_errorstr(err));
+ failed = 1;
+ return 1;
+ }
+ return 0;
+}
+
+
+static int test_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ return 0;
+}
+
+/*
+ carefully backup a ntdb, validating the contents and
+ only doing the backup if its OK
+ this function is also used for restore
+*/
+static int backup_ntdb(const char *old_name, const char *new_name)
+{
+ struct ntdb_context *ntdb;
+ struct ntdb_context *ntdb_new;
+ char *tmp_name;
+ struct stat st;
+ int count1, count2;
+ enum NTDB_ERROR err;
+ union ntdb_attribute log_attr;
+
+ tmp_name = add_suffix(new_name, ".tmp");
+
+ /* stat the old ntdb to find its permissions */
+ if (stat(old_name, &st) != 0) {
+ perror(old_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ log_attr.base.next = NULL;
+ log_attr.log.fn = ntdb_log;
+
+ /* open the old ntdb */
+ ntdb = ntdb_open(old_name, NTDB_DEFAULT, O_RDWR, 0, &log_attr);
+ if (!ntdb) {
+ printf("Failed to open %s\n", old_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ unlink(tmp_name);
+ ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT,
+ O_RDWR|O_CREAT|O_EXCL, st.st_mode & 0777,
+ &log_attr);
+ if (!ntdb_new) {
+ perror(tmp_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ err = ntdb_transaction_start(ntdb);
+ if (err) {
+ fprintf(stderr, "Failed to start transaction on old ntdb: %s\n",
+ ntdb_errorstr(err));
+ ntdb_close(ntdb);
+ ntdb_close(ntdb_new);
+ unlink(tmp_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ /* lock the backup ntdb so that nobody else can change it */
+ err = ntdb_lockall(ntdb_new);
+ if (err) {
+ fprintf(stderr, "Failed to lock backup ntdb: %s\n",
+ ntdb_errorstr(err));
+ ntdb_close(ntdb);
+ ntdb_close(ntdb_new);
+ unlink(tmp_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ failed = 0;
+
+ /* traverse and copy */
+ count1 = ntdb_traverse(ntdb, copy_fn, (void *)ntdb_new);
+ if (count1 < 0 || failed) {
+ fprintf(stderr,"failed to copy %s\n", old_name);
+ ntdb_close(ntdb);
+ ntdb_close(ntdb_new);
+ unlink(tmp_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ /* close the old ntdb */
+ ntdb_close(ntdb);
+
+ /* copy done, unlock the backup ntdb */
+ ntdb_unlockall(ntdb_new);
+
+#ifdef HAVE_FDATASYNC
+ if (fdatasync(ntdb_fd(ntdb_new)) != 0) {
+#else
+ if (fsync(ntdb_fd(ntdb_new)) != 0) {
+#endif
+ /* not fatal */
+ fprintf(stderr, "failed to fsync backup file\n");
+ }
+
+ /* close the new ntdb and re-open read-only */
+ ntdb_close(ntdb_new);
+
+ /* we don't need the hash attr any more */
+ log_attr.base.next = NULL;
+
+ ntdb_new = ntdb_open(tmp_name, NTDB_DEFAULT, O_RDONLY, 0, &log_attr);
+ if (!ntdb_new) {
+ fprintf(stderr,"failed to reopen %s\n", tmp_name);
+ unlink(tmp_name);
+ perror(tmp_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ /* traverse the new ntdb to confirm */
+ count2 = ntdb_traverse(ntdb_new, test_fn, NULL);
+ if (count2 != count1) {
+ fprintf(stderr,"failed to copy %s\n", old_name);
+ ntdb_close(ntdb_new);
+ unlink(tmp_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ /* close the new ntdb and rename it to .bak */
+ ntdb_close(ntdb_new);
+ if (rename(tmp_name, new_name) != 0) {
+ perror(new_name);
+ free(tmp_name);
+ return 1;
+ }
+
+ free(tmp_name);
+
+ return 0;
+}
+
+/*
+ verify a ntdb and if it is corrupt then restore from *.bak
+*/
+static int verify_ntdb(const char *fname, const char *bak_name)
+{
+ struct ntdb_context *ntdb;
+ int count = -1;
+ union ntdb_attribute log_attr;
+
+ log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ log_attr.base.next = NULL;
+ log_attr.log.fn = ntdb_log;
+
+ /* open the ntdb */
+ ntdb = ntdb_open(fname, NTDB_DEFAULT, O_RDONLY, 0, &log_attr);
+
+ /* traverse the ntdb, then close it */
+ if (ntdb) {
+ count = ntdb_traverse(ntdb, test_fn, NULL);
+ ntdb_close(ntdb);
+ }
+
+ /* count is < 0 means an error */
+ if (count < 0) {
+ printf("restoring %s\n", fname);
+ return backup_ntdb(bak_name, fname);
+ }
+
+ printf("%s : %d records\n", fname, count);
+
+ return 0;
+}
+
+/*
+ see if one file is newer than another
+*/
+static int file_newer(const char *fname1, const char *fname2)
+{
+ struct stat st1, st2;
+ if (stat(fname1, &st1) != 0) {
+ return 0;
+ }
+ if (stat(fname2, &st2) != 0) {
+ return 1;
+ }
+ return (st1.st_mtime > st2.st_mtime);
+}
+
+static void usage(void)
+{
+ printf("Usage: ntdbbackup [options] <fname...>\n\n");
+ printf(" -h this help message\n");
+ printf(" -v verify mode (restore if corrupt)\n");
+ printf(" -s suffix set the backup suffix\n");
+ printf(" -v verify mode (restore if corrupt)\n");
+}
+
+
+ int main(int argc, char *argv[])
+{
+ int i;
+ int ret = 0;
+ int c;
+ int verify = 0;
+ const char *suffix = ".bak";
+
+ while ((c = getopt(argc, argv, "vhs:")) != -1) {
+ switch (c) {
+ case 'h':
+ usage();
+ exit(0);
+ case 'v':
+ verify = 1;
+ break;
+ case 's':
+ suffix = optarg;
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ usage();
+ exit(1);
+ }
+
+ for (i=0; i<argc; i++) {
+ const char *fname = argv[i];
+ char *bak_name;
+
+ bak_name = add_suffix(fname, suffix);
+
+ if (verify) {
+ if (verify_ntdb(fname, bak_name) != 0) {
+ ret = 1;
+ }
+ } else {
+ if (file_newer(fname, bak_name) &&
+ backup_ntdb(fname, bak_name) != 0) {
+ ret = 1;
+ }
+ }
+
+ free(bak_name);
+ }
+
+ return ret;
+}
--- /dev/null
+/*
+ simple ntdb dump util
+ Copyright (C) Andrew Tridgell 2001
+ Copyright (C) Rusty Russell 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "config.h"
+#include "ntdb.h"
+#include "private.h"
+
+static void print_data(NTDB_DATA d)
+{
+ unsigned char *p = (unsigned char *)d.dptr;
+ int len = d.dsize;
+ while (len--) {
+ if (isprint(*p) && !strchr("\"\\", *p)) {
+ fputc(*p, stdout);
+ } else {
+ printf("\\%02X", *p);
+ }
+ p++;
+ }
+}
+
+static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ printf("{\n");
+ printf("key(%d) = \"", (int)key.dsize);
+ print_data(key);
+ printf("\"\n");
+ printf("data(%d) = \"", (int)dbuf.dsize);
+ print_data(dbuf);
+ printf("\"\n");
+ printf("}\n");
+ return 0;
+}
+
+static int dump_ntdb(const char *fname, const char *keyname)
+{
+ struct ntdb_context *ntdb;
+ NTDB_DATA key, value;
+
+ ntdb = ntdb_open(fname, 0, O_RDONLY, 0, NULL);
+ if (!ntdb) {
+ printf("Failed to open %s\n", fname);
+ return 1;
+ }
+
+ if (!keyname) {
+ ntdb_traverse(ntdb, traverse_fn, NULL);
+ } else {
+ key = ntdb_mkdata(keyname, strlen(keyname));
+ if (ntdb_fetch(ntdb, key, &value) != 0) {
+ return 1;
+ } else {
+ print_data(value);
+ free(value.dptr);
+ }
+ }
+
+ return 0;
+}
+
+static void usage( void)
+{
+ printf( "Usage: ntdbdump [options] <filename>\n\n");
+ printf( " -h this help message\n");
+ printf( " -k keyname dumps value of keyname\n");
+}
+
+ int main(int argc, char *argv[])
+{
+ char *fname, *keyname=NULL;
+ int c;
+
+ if (argc < 2) {
+ printf("Usage: ntdbdump <fname>\n");
+ exit(1);
+ }
+
+ while ((c = getopt( argc, argv, "hk:")) != -1) {
+ switch (c) {
+ case 'h':
+ usage();
+ exit( 0);
+ case 'k':
+ keyname = optarg;
+ break;
+ default:
+ usage();
+ exit( 1);
+ }
+ }
+
+ fname = argv[optind];
+
+ return dump_ntdb(fname, keyname);
+}
--- /dev/null
+/*
+ ntdbrestore -- construct a ntdb from (n)tdbdump output.
+ Copyright (C) Rusty Russell 2012
+ Copyright (C) Volker Lendecke 2010
+ Copyright (C) Simon McVittie 2005
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "config.h"
+#include "ntdb.h"
+#include "private.h"
+#include <assert.h>
+
+static int read_linehead(FILE *f)
+{
+ int i, c;
+ int num_bytes;
+ char prefix[128];
+
+ while (1) {
+ c = getc(f);
+ if (c == EOF) {
+ return -1;
+ }
+ if (c == '(') {
+ break;
+ }
+ }
+ for (i=0; i<sizeof(prefix); i++) {
+ c = getc(f);
+ if (c == EOF) {
+ return -1;
+ }
+ prefix[i] = c;
+ if (c == '"') {
+ break;
+ }
+ }
+ if (i == sizeof(prefix)) {
+ return -1;
+ }
+ prefix[i] = '\0';
+
+ if (sscanf(prefix, "%d) = ", &num_bytes) != 1) {
+ return -1;
+ }
+ return num_bytes;
+}
+
+static int read_hex(void) {
+ int c;
+ c = getchar();
+ if (c == EOF) {
+ fprintf(stderr, "Unexpected EOF in data\n");
+ return -1;
+ } else if (c == '"') {
+ fprintf(stderr, "Unexpected \\\" sequence\n");
+ return -1;
+ } else if ('0' <= c && c <= '9') {
+ return c - '0';
+ } else if ('A' <= c && c <= 'F') {
+ return c - 'A' + 10;
+ } else if ('a' <= c && c <= 'f') {
+ return c - 'a' + 10;
+ } else {
+ fprintf(stderr, "Invalid hex: %c\n", c);
+ return -1;
+ }
+}
+
+static int read_data(FILE *f, NTDB_DATA *d, size_t size) {
+ int c, low, high;
+ int i;
+
+ d->dptr = (unsigned char *)malloc(size);
+ if (d->dptr == NULL) {
+ return -1;
+ }
+ d->dsize = size;
+
+ for (i=0; i<size; i++) {
+ c = getc(f);
+ if (c == EOF) {
+ fprintf(stderr, "Unexpected EOF in data\n");
+ return 1;
+ } else if (c == '"') {
+ return 0;
+ } else if (c == '\\') {
+ high = read_hex();
+ if (high < 0) {
+ return -1;
+ }
+ high = high << 4;
+ assert(high == (high & 0xf0));
+ low = read_hex();
+ if (low < 0) {
+ return -1;
+ }
+ assert(low == (low & 0x0f));
+ d->dptr[i] = (low|high);
+ } else {
+ d->dptr[i] = c;
+ }
+ }
+ return 0;
+}
+
+static int swallow(FILE *f, const char *s, int *eof)
+{
+ char line[128];
+
+ if (fgets(line, sizeof(line), f) == NULL) {
+ if (eof != NULL) {
+ *eof = 1;
+ }
+ return -1;
+ }
+ if (strcmp(line, s) != 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static bool read_rec(FILE *f, struct ntdb_context *ntdb, int *eof)
+{
+ int length;
+ NTDB_DATA key, data;
+ bool ret = false;
+ enum NTDB_ERROR e;
+
+ key.dptr = NULL;
+ data.dptr = NULL;
+
+ if (swallow(f, "{\n", eof) == -1) {
+ goto fail;
+ }
+ length = read_linehead(f);
+ if (length == -1) {
+ goto fail;
+ }
+ if (read_data(f, &key, length) == -1) {
+ goto fail;
+ }
+ if (swallow(f, "\"\n", NULL) == -1) {
+ goto fail;
+ }
+ length = read_linehead(f);
+ if (length == -1) {
+ goto fail;
+ }
+ if (read_data(f, &data, length) == -1) {
+ goto fail;
+ }
+ if ((swallow(f, "\"\n", NULL) == -1)
+ || (swallow(f, "}\n", NULL) == -1)) {
+ goto fail;
+ }
+ e = ntdb_store(ntdb, key, data, NTDB_INSERT);
+ if (e != NTDB_SUCCESS) {
+ fprintf(stderr, "NTDB error: %s\n", ntdb_errorstr(e));
+ goto fail;
+ }
+
+ ret = true;
+fail:
+ free(key.dptr);
+ free(data.dptr);
+ return ret;
+}
+
+static int restore_ntdb(const char *fname, unsigned int hsize)
+{
+ struct ntdb_context *ntdb;
+ union ntdb_attribute hashsize;
+
+ hashsize.base.attr = NTDB_ATTRIBUTE_HASHSIZE;
+ hashsize.base.next = NULL;
+ hashsize.hashsize.size = hsize;
+
+ ntdb = ntdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666,
+ hsize ? &hashsize : NULL);
+ if (!ntdb) {
+ perror("ntdb_open");
+ fprintf(stderr, "Failed to open %s\n", fname);
+ return 1;
+ }
+
+ while (1) {
+ int eof = 0;
+ if (!read_rec(stdin, ntdb, &eof)) {
+ if (eof) {
+ break;
+ }
+ return 1;
+ }
+ }
+ if (ntdb_close(ntdb)) {
+ fprintf(stderr, "Error closing ntdb\n");
+ return 1;
+ }
+ fprintf(stderr, "EOF\n");
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int hsize = 0;
+ const char *execname = argv[0];
+
+ if (argv[1] && strcmp(argv[1], "-h") == 0) {
+ if (argv[2]) {
+ hsize = atoi(argv[2]);
+ }
+ if (hsize == 0) {
+ fprintf(stderr, "-h requires a integer value"
+ " (eg. 128 or 131072)\n");
+ exit(1);
+ }
+ argv += 2;
+ argc -= 2;
+ }
+ if (argc != 2) {
+ printf("Usage: %s [-h <hashsize>] dbname < tdbdump_output\n",
+ execname);
+ exit(1);
+ }
+
+
+ return restore_ntdb(argv[1], hsize);
+}
--- /dev/null
+/*
+ Unix SMB/CIFS implementation.
+ Samba database functions
+ Copyright (C) Andrew Tridgell 1999-2000
+ Copyright (C) Paul `Rusty' Russell 2000
+ Copyright (C) Jeremy Allison 2000
+ Copyright (C) Andrew Esh 2001
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "config.h"
+#include "ntdb.h"
+#include "private.h"
+
+static int do_command(void);
+const char *cmdname;
+char *arg1, *arg2;
+size_t arg1len, arg2len;
+int bIterate = 0;
+char *line;
+NTDB_DATA iterate_kbuf;
+char cmdline[1024];
+static int disable_mmap;
+
+enum commands {
+ CMD_CREATE_NTDB,
+ CMD_OPEN_NTDB,
+ CMD_TRANSACTION_START,
+ CMD_TRANSACTION_COMMIT,
+ CMD_TRANSACTION_CANCEL,
+ CMD_ERASE,
+ CMD_DUMP,
+ CMD_INSERT,
+ CMD_MOVE,
+ CMD_STORE,
+ CMD_SHOW,
+ CMD_KEYS,
+ CMD_HEXKEYS,
+ CMD_DELETE,
+#if 0
+ CMD_LIST_HASH_FREE,
+ CMD_LIST_FREE,
+#endif
+ CMD_INFO,
+ CMD_MMAP,
+ CMD_SPEED,
+ CMD_FIRST,
+ CMD_NEXT,
+ CMD_SYSTEM,
+ CMD_CHECK,
+ CMD_QUIT,
+ CMD_HELP
+};
+
+typedef struct {
+ const char *name;
+ enum commands cmd;
+} COMMAND_TABLE;
+
+COMMAND_TABLE cmd_table[] = {
+ {"create", CMD_CREATE_NTDB},
+ {"open", CMD_OPEN_NTDB},
+#if 0
+ {"transaction_start", CMD_TRANSACTION_START},
+ {"transaction_commit", CMD_TRANSACTION_COMMIT},
+ {"transaction_cancel", CMD_TRANSACTION_CANCEL},
+#endif
+ {"erase", CMD_ERASE},
+ {"dump", CMD_DUMP},
+ {"insert", CMD_INSERT},
+ {"move", CMD_MOVE},
+ {"store", CMD_STORE},
+ {"show", CMD_SHOW},
+ {"keys", CMD_KEYS},
+ {"hexkeys", CMD_HEXKEYS},
+ {"delete", CMD_DELETE},
+#if 0
+ {"list", CMD_LIST_HASH_FREE},
+ {"free", CMD_LIST_FREE},
+#endif
+ {"info", CMD_INFO},
+ {"speed", CMD_SPEED},
+ {"mmap", CMD_MMAP},
+ {"first", CMD_FIRST},
+ {"1", CMD_FIRST},
+ {"next", CMD_NEXT},
+ {"n", CMD_NEXT},
+ {"check", CMD_CHECK},
+ {"quit", CMD_QUIT},
+ {"q", CMD_QUIT},
+ {"!", CMD_SYSTEM},
+ {NULL, CMD_HELP}
+};
+
+struct timeval tp1,tp2;
+
+static void _start_timer(void)
+{
+ gettimeofday(&tp1,NULL);
+}
+
+static double _end_timer(void)
+{
+ gettimeofday(&tp2,NULL);
+ return((tp2.tv_sec - tp1.tv_sec) +
+ (tp2.tv_usec - tp1.tv_usec)*1.0e-6);
+}
+
+static void ntdb_log(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+{
+ fprintf(stderr, "ntdb:%s:%s:%s\n",
+ ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+}
+
+/* a ntdb tool for manipulating a ntdb database */
+
+static struct ntdb_context *ntdb;
+
+static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
+static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
+static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state);
+
+static void print_asc(const char *buf,int len)
+{
+ int i;
+
+ /* We're probably printing ASCII strings so don't try to display
+ the trailing NULL character. */
+
+ if (buf[len - 1] == 0)
+ len--;
+
+ for (i=0;i<len;i++)
+ printf("%c",isprint(buf[i])?buf[i]:'.');
+}
+
+static void print_data(const char *buf,int len)
+{
+ int i=0;
+ if (len<=0) return;
+ printf("[%03X] ",i);
+ for (i=0;i<len;) {
+ printf("%02X ",(int)((unsigned char)buf[i]));
+ i++;
+ if (i%8 == 0) printf(" ");
+ if (i%16 == 0) {
+ print_asc(&buf[i-16],8); printf(" ");
+ print_asc(&buf[i-8],8); printf("\n");
+ if (i<len) printf("[%03X] ",i);
+ }
+ }
+ if (i%16) {
+ int n;
+
+ n = 16 - (i%16);
+ printf(" ");
+ if (n>8) printf(" ");
+ while (n--) printf(" ");
+
+ n = i%16;
+ if (n > 8) n = 8;
+ print_asc(&buf[i-(i%16)],n); printf(" ");
+ n = (i%16) - n;
+ if (n>0) print_asc(&buf[i-n],n);
+ printf("\n");
+ }
+}
+
+static void help(void)
+{
+ printf("\n"
+"tdbtool: \n"
+" create dbname : create a database\n"
+" open dbname : open an existing database\n"
+" openjh dbname : open an existing database (jenkins hash)\n"
+" transaction_start : start a transaction\n"
+" transaction_commit : commit a transaction\n"
+" transaction_cancel : cancel a transaction\n"
+" erase : erase the database\n"
+" dump : dump the database as strings\n"
+" keys : dump the database keys as strings\n"
+" hexkeys : dump the database keys as hex values\n"
+" info : print summary info about the database\n"
+" insert key data : insert a record\n"
+" move key file : move a record to a destination ntdb\n"
+" store key data : store a record (replace)\n"
+" show key : show a record by key\n"
+" delete key : delete a record by key\n"
+#if 0
+" list : print the database hash table and freelist\n"
+" free : print the database freelist\n"
+#endif
+" check : check the integrity of an opened database\n"
+" speed : perform speed tests on the database\n"
+" ! command : execute system command\n"
+" 1 | first : print the first record\n"
+" n | next : print the next record\n"
+" q | quit : terminate\n"
+" \\n : repeat 'next' command\n"
+"\n");
+}
+
+static void terror(enum NTDB_ERROR err, const char *why)
+{
+ if (err != NTDB_SUCCESS)
+ printf("%s:%s\n", ntdb_errorstr(err), why);
+ else
+ printf("%s\n", why);
+}
+
+static void create_ntdb(const char *tdbname)
+{
+ union ntdb_attribute log_attr;
+ log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ log_attr.base.next = NULL;
+ log_attr.log.fn = ntdb_log;
+
+ if (ntdb) ntdb_close(ntdb);
+ ntdb = ntdb_open(tdbname, (disable_mmap?NTDB_NOMMAP:0),
+ O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr);
+ if (!ntdb) {
+ printf("Could not create %s: %s\n", tdbname, strerror(errno));
+ }
+}
+
+static void open_ntdb(const char *tdbname)
+{
+ union ntdb_attribute log_attr;
+ log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ log_attr.base.next = NULL;
+ log_attr.log.fn = ntdb_log;
+
+ if (ntdb) ntdb_close(ntdb);
+ ntdb = ntdb_open(tdbname, disable_mmap?NTDB_NOMMAP:0, O_RDWR, 0600,
+ &log_attr);
+ if (!ntdb) {
+ printf("Could not open %s: %s\n", tdbname, strerror(errno));
+ }
+}
+
+static void insert_ntdb(char *keyname, size_t keylen, char* data, size_t datalen)
+{
+ NTDB_DATA key, dbuf;
+ enum NTDB_ERROR ecode;
+
+ if ((keyname == NULL) || (keylen == 0)) {
+ terror(NTDB_SUCCESS, "need key");
+ return;
+ }
+
+ key.dptr = (unsigned char *)keyname;
+ key.dsize = keylen;
+ dbuf.dptr = (unsigned char *)data;
+ dbuf.dsize = datalen;
+
+ ecode = ntdb_store(ntdb, key, dbuf, NTDB_INSERT);
+ if (ecode) {
+ terror(ecode, "insert failed");
+ }
+}
+
+static void store_ntdb(char *keyname, size_t keylen, char* data, size_t datalen)
+{
+ NTDB_DATA key, dbuf;
+ enum NTDB_ERROR ecode;
+
+ if ((keyname == NULL) || (keylen == 0)) {
+ terror(NTDB_SUCCESS, "need key");
+ return;
+ }
+
+ if ((data == NULL) || (datalen == 0)) {
+ terror(NTDB_SUCCESS, "need data");
+ return;
+ }
+
+ key.dptr = (unsigned char *)keyname;
+ key.dsize = keylen;
+ dbuf.dptr = (unsigned char *)data;
+ dbuf.dsize = datalen;
+
+ printf("Storing key:\n");
+ print_rec(ntdb, key, dbuf, NULL);
+
+ ecode = ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
+ if (ecode) {
+ terror(ecode, "store failed");
+ }
+}
+
+static void show_ntdb(char *keyname, size_t keylen)
+{
+ NTDB_DATA key, dbuf;
+ enum NTDB_ERROR ecode;
+
+ if ((keyname == NULL) || (keylen == 0)) {
+ terror(NTDB_SUCCESS, "need key");
+ return;
+ }
+
+ key.dptr = (unsigned char *)keyname;
+ key.dsize = keylen;
+
+ ecode = ntdb_fetch(ntdb, key, &dbuf);
+ if (ecode) {
+ terror(ecode, "fetch failed");
+ return;
+ }
+
+ print_rec(ntdb, key, dbuf, NULL);
+
+ free( dbuf.dptr );
+}
+
+static void delete_ntdb(char *keyname, size_t keylen)
+{
+ NTDB_DATA key;
+ enum NTDB_ERROR ecode;
+
+ if ((keyname == NULL) || (keylen == 0)) {
+ terror(NTDB_SUCCESS, "need key");
+ return;
+ }
+
+ key.dptr = (unsigned char *)keyname;
+ key.dsize = keylen;
+
+ ecode = ntdb_delete(ntdb, key);
+ if (ecode) {
+ terror(ecode, "delete failed");
+ }
+}
+
+static void move_rec(char *keyname, size_t keylen, char* tdbname)
+{
+ NTDB_DATA key, dbuf;
+ struct ntdb_context *dst_ntdb;
+ enum NTDB_ERROR ecode;
+
+ if ((keyname == NULL) || (keylen == 0)) {
+ terror(NTDB_SUCCESS, "need key");
+ return;
+ }
+
+ if ( !tdbname ) {
+ terror(NTDB_SUCCESS, "need destination ntdb name");
+ return;
+ }
+
+ key.dptr = (unsigned char *)keyname;
+ key.dsize = keylen;
+
+ ecode = ntdb_fetch(ntdb, key, &dbuf);
+ if (ecode) {
+ terror(ecode, "fetch failed");
+ return;
+ }
+
+ print_rec(ntdb, key, dbuf, NULL);
+
+ dst_ntdb = ntdb_open(tdbname, 0, O_RDWR, 0600, NULL);
+ if ( !dst_ntdb ) {
+ terror(NTDB_SUCCESS, "unable to open destination ntdb");
+ return;
+ }
+
+ ecode = ntdb_store( dst_ntdb, key, dbuf, NTDB_REPLACE);
+ if (ecode)
+ terror(ecode, "failed to move record");
+ else
+ printf("record moved\n");
+
+ ntdb_close( dst_ntdb );
+}
+
+static int print_rec(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ printf("\nkey %d bytes\n", (int)key.dsize);
+ print_asc((const char *)key.dptr, key.dsize);
+ printf("\ndata %d bytes\n", (int)dbuf.dsize);
+ print_data((const char *)dbuf.dptr, dbuf.dsize);
+ return 0;
+}
+
+static int print_key(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ printf("key %d bytes: ", (int)key.dsize);
+ print_asc((const char *)key.dptr, key.dsize);
+ printf("\n");
+ return 0;
+}
+
+static int print_hexkey(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ printf("key %d bytes\n", (int)key.dsize);
+ print_data((const char *)key.dptr, key.dsize);
+ printf("\n");
+ return 0;
+}
+
+static int total_bytes;
+
+static int traverse_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf, void *state)
+{
+ total_bytes += dbuf.dsize;
+ return 0;
+}
+
+static void info_ntdb(void)
+{
+ enum NTDB_ERROR ecode;
+ char *summary;
+
+ ecode = ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &summary);
+
+ if (ecode) {
+ terror(ecode, "Getting summary");
+ } else {
+ printf("%s", summary);
+ free(summary);
+ }
+}
+
+static void speed_ntdb(const char *tlimit)
+{
+ unsigned timelimit = tlimit?atoi(tlimit):0;
+ double t;
+ int ops;
+ if (timelimit == 0) timelimit = 5;
+
+ ops = 0;
+ printf("Testing store speed for %u seconds\n", timelimit);
+ _start_timer();
+ do {
+ long int r = random();
+ NTDB_DATA key, dbuf;
+ key = ntdb_mkdata("store test", strlen("store test"));
+ dbuf.dptr = (unsigned char *)&r;
+ dbuf.dsize = sizeof(r);
+ ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
+ t = _end_timer();
+ ops++;
+ } while (t < timelimit);
+ printf("%10.3f ops/sec\n", ops/t);
+
+ ops = 0;
+ printf("Testing fetch speed for %u seconds\n", timelimit);
+ _start_timer();
+ do {
+ long int r = random();
+ NTDB_DATA key, dbuf;
+ key = ntdb_mkdata("store test", strlen("store test"));
+ dbuf.dptr = (unsigned char *)&r;
+ dbuf.dsize = sizeof(r);
+ ntdb_fetch(ntdb, key, &dbuf);
+ t = _end_timer();
+ ops++;
+ } while (t < timelimit);
+ printf("%10.3f ops/sec\n", ops/t);
+
+ ops = 0;
+ printf("Testing transaction speed for %u seconds\n", timelimit);
+ _start_timer();
+ do {
+ long int r = random();
+ NTDB_DATA key, dbuf;
+ key = ntdb_mkdata("transaction test", strlen("transaction test"));
+ dbuf.dptr = (unsigned char *)&r;
+ dbuf.dsize = sizeof(r);
+ ntdb_transaction_start(ntdb);
+ ntdb_store(ntdb, key, dbuf, NTDB_REPLACE);
+ ntdb_transaction_commit(ntdb);
+ t = _end_timer();
+ ops++;
+ } while (t < timelimit);
+ printf("%10.3f ops/sec\n", ops/t);
+
+ ops = 0;
+ printf("Testing traverse speed for %u seconds\n", timelimit);
+ _start_timer();
+ do {
+ ntdb_traverse(ntdb, traverse_fn, NULL);
+ t = _end_timer();
+ ops++;
+ } while (t < timelimit);
+ printf("%10.3f ops/sec\n", ops/t);
+}
+
+static void toggle_mmap(void)
+{
+ disable_mmap = !disable_mmap;
+ if (disable_mmap) {
+ printf("mmap is disabled\n");
+ } else {
+ printf("mmap is enabled\n");
+ }
+}
+
+static char *ntdb_getline(const char *prompt)
+{
+ static char thisline[1024];
+ char *p;
+ fputs(prompt, stdout);
+ thisline[0] = 0;
+ p = fgets(thisline, sizeof(thisline)-1, stdin);
+ if (p) p = strchr(p, '\n');
+ if (p) *p = 0;
+ return p?thisline:NULL;
+}
+
+static int do_delete_fn(struct ntdb_context *the_ntdb, NTDB_DATA key, NTDB_DATA dbuf,
+ void *state)
+{
+ return ntdb_delete(the_ntdb, key);
+}
+
+static void first_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey)
+{
+ NTDB_DATA dbuf;
+ enum NTDB_ERROR ecode;
+ ecode = ntdb_firstkey(the_ntdb, pkey);
+ if (!ecode)
+ ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf);
+ if (ecode) terror(ecode, "fetch failed");
+ else {
+ print_rec(the_ntdb, *pkey, dbuf, NULL);
+ }
+}
+
+static void next_record(struct ntdb_context *the_ntdb, NTDB_DATA *pkey)
+{
+ NTDB_DATA dbuf;
+ enum NTDB_ERROR ecode;
+ ecode = ntdb_nextkey(the_ntdb, pkey);
+
+ if (!ecode)
+ ecode = ntdb_fetch(the_ntdb, *pkey, &dbuf);
+ if (ecode)
+ terror(ecode, "fetch failed");
+ else
+ print_rec(the_ntdb, *pkey, dbuf, NULL);
+}
+
+static void check_db(struct ntdb_context *the_ntdb)
+{
+ if (!the_ntdb) {
+ printf("Error: No database opened!\n");
+ } else {
+ if (ntdb_check(the_ntdb, NULL, NULL) != 0)
+ printf("Integrity check for the opened database failed.\n");
+ else
+ printf("Database integrity is OK.\n");
+ }
+}
+
+static int do_command(void)
+{
+ COMMAND_TABLE *ctp = cmd_table;
+ enum commands mycmd = CMD_HELP;
+ int cmd_len;
+
+ if (cmdname && strlen(cmdname) == 0) {
+ mycmd = CMD_NEXT;
+ } else {
+ while (ctp->name) {
+ cmd_len = strlen(ctp->name);
+ if (strncmp(ctp->name,cmdname,cmd_len) == 0) {
+ mycmd = ctp->cmd;
+ break;
+ }
+ ctp++;
+ }
+ }
+
+ switch (mycmd) {
+ case CMD_CREATE_NTDB:
+ bIterate = 0;
+ create_ntdb(arg1);
+ return 0;
+ case CMD_OPEN_NTDB:
+ bIterate = 0;
+ open_ntdb(arg1);
+ return 0;
+ case CMD_SYSTEM:
+ /* Shell command */
+ if (system(arg1) == -1) {
+ terror(NTDB_SUCCESS, "system() call failed\n");
+ }
+ return 0;
+ case CMD_QUIT:
+ return 1;
+ default:
+ /* all the rest require a open database */
+ if (!ntdb) {
+ bIterate = 0;
+ terror(NTDB_SUCCESS, "database not open");
+ help();
+ return 0;
+ }
+ switch (mycmd) {
+ case CMD_TRANSACTION_START:
+ bIterate = 0;
+ ntdb_transaction_start(ntdb);
+ return 0;
+ case CMD_TRANSACTION_COMMIT:
+ bIterate = 0;
+ ntdb_transaction_commit(ntdb);
+ return 0;
+ case CMD_TRANSACTION_CANCEL:
+ bIterate = 0;
+ ntdb_transaction_cancel(ntdb);
+ return 0;
+ case CMD_ERASE:
+ bIterate = 0;
+ ntdb_traverse(ntdb, do_delete_fn, NULL);
+ return 0;
+ case CMD_DUMP:
+ bIterate = 0;
+ ntdb_traverse(ntdb, print_rec, NULL);
+ return 0;
+ case CMD_INSERT:
+ bIterate = 0;
+ insert_ntdb(arg1, arg1len,arg2,arg2len);
+ return 0;
+ case CMD_MOVE:
+ bIterate = 0;
+ move_rec(arg1,arg1len,arg2);
+ return 0;
+ case CMD_STORE:
+ bIterate = 0;
+ store_ntdb(arg1,arg1len,arg2,arg2len);
+ return 0;
+ case CMD_SHOW:
+ bIterate = 0;
+ show_ntdb(arg1, arg1len);
+ return 0;
+ case CMD_KEYS:
+ ntdb_traverse(ntdb, print_key, NULL);
+ return 0;
+ case CMD_HEXKEYS:
+ ntdb_traverse(ntdb, print_hexkey, NULL);
+ return 0;
+ case CMD_DELETE:
+ bIterate = 0;
+ delete_ntdb(arg1,arg1len);
+ return 0;
+#if 0
+ case CMD_LIST_HASH_FREE:
+ ntdb_dump_all(ntdb);
+ return 0;
+ case CMD_LIST_FREE:
+ ntdb_printfreelist(ntdb);
+ return 0;
+#endif
+ case CMD_INFO:
+ info_ntdb();
+ return 0;
+ case CMD_SPEED:
+ speed_ntdb(arg1);
+ return 0;
+ case CMD_MMAP:
+ toggle_mmap();
+ return 0;
+ case CMD_FIRST:
+ bIterate = 1;
+ first_record(ntdb, &iterate_kbuf);
+ return 0;
+ case CMD_NEXT:
+ if (bIterate)
+ next_record(ntdb, &iterate_kbuf);
+ return 0;
+ case CMD_CHECK:
+ check_db(ntdb);
+ return 0;
+ case CMD_HELP:
+ help();
+ return 0;
+ case CMD_CREATE_NTDB:
+ case CMD_OPEN_NTDB:
+ case CMD_SYSTEM:
+ case CMD_QUIT:
+ /*
+ * unhandled commands. cases included here to avoid compiler
+ * warnings.
+ */
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static char *convert_string(char *instring, size_t *sizep)
+{
+ size_t length = 0;
+ char *outp, *inp;
+ char temp[3];
+
+ outp = inp = instring;
+
+ while (*inp) {
+ if (*inp == '\\') {
+ inp++;
+ if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
+ temp[0] = *inp++;
+ temp[1] = '\0';
+ if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
+ temp[1] = *inp++;
+ temp[2] = '\0';
+ }
+ *outp++ = (char)strtol((const char *)temp,NULL,16);
+ } else {
+ *outp++ = *inp++;
+ }
+ } else {
+ *outp++ = *inp++;
+ }
+ length++;
+ }
+ *sizep = length;
+ return instring;
+}
+
+int main(int argc, char *argv[])
+{
+ cmdname = "";
+ arg1 = NULL;
+ arg1len = 0;
+ arg2 = NULL;
+ arg2len = 0;
+
+ if (argv[1]) {
+ cmdname = "open";
+ arg1 = argv[1];
+ do_command();
+ cmdname = "";
+ arg1 = NULL;
+ }
+
+ switch (argc) {
+ case 1:
+ case 2:
+ /* Interactive mode */
+ while ((cmdname = ntdb_getline("ntdb> "))) {
+ arg2 = arg1 = NULL;
+ if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) {
+ arg1++;
+ arg2 = arg1;
+ while (*arg2) {
+ if (*arg2 == ' ') {
+ *arg2++ = '\0';
+ break;
+ }
+ if ((*arg2++ == '\\') && (*arg2 == ' ')) {
+ arg2++;
+ }
+ }
+ }
+ if (arg1) arg1 = convert_string(arg1,&arg1len);
+ if (arg2) arg2 = convert_string(arg2,&arg2len);
+ if (do_command()) break;
+ }
+ break;
+ case 5:
+ arg2 = convert_string(argv[4],&arg2len);
+ case 4:
+ arg1 = convert_string(argv[3],&arg1len);
+ case 3:
+ cmdname = argv[2];
+ default:
+ do_command();
+ break;
+ }
+
+ if (ntdb) ntdb_close(ntdb);
+
+ return 0;
+}
--- /dev/null
+/* this tests ntdb by doing lots of ops from several simultaneous
+ writers - that stresses the locking code.
+*/
+
+#include "config.h"
+#include "ntdb.h"
+#include "private.h"
+#include <ccan/err/err.h>
+
+//#define REOPEN_PROB 30
+#define DELETE_PROB 8
+#define STORE_PROB 4
+#define APPEND_PROB 6
+#define TRANSACTION_PROB 10
+#define TRANSACTION_PREPARE_PROB 2
+#define LOCKSTORE_PROB 5
+#define TRAVERSE_PROB 20
+#define TRAVERSE_MOD_PROB 100
+#define TRAVERSE_ABORT_PROB 500
+#define CULL_PROB 100
+#define KEYLEN 3
+#define DATALEN 100
+
+static struct ntdb_context *db;
+static int in_transaction;
+static int in_traverse;
+static int error_count;
+#if TRANSACTION_PROB
+static int always_transaction = 0;
+#endif
+static int loopnum;
+static int count_pipe;
+static union ntdb_attribute log_attr;
+static union ntdb_attribute seed_attr;
+static union ntdb_attribute hsize_attr;
+
+static void ntdb_log(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+{
+ printf("ntdb:%s:%s:%s\n",
+ ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+ fflush(stdout);
+#if 0
+ {
+ char str[200];
+ signal(SIGUSR1, SIG_IGN);
+ sprintf(str,"xterm -e gdb /proc/%u/exe %u", (unsigned int)getpid(), (unsigned int)getpid());
+ system(str);
+ }
+#endif
+}
+
+#include "../private.h"
+
+static void segv_handler(int sig, siginfo_t *info, void *p)
+{
+ char string[100];
+
+ sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n",
+ (unsigned int)getpid(), info->si_addr, db->file->map_ptr,
+ (size_t)db->file->map_size);
+ if (write(2, string, strlen(string)) > 0)
+ sleep(60);
+ _exit(11);
+}
+
+static void warn_on_err(enum NTDB_ERROR e, struct ntdb_context *ntdb,
+ const char *why)
+{
+ if (e != NTDB_SUCCESS) {
+ fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), why,
+ ntdb ? ntdb_errorstr(e) : "(no ntdb)");
+ error_count++;
+ }
+}
+
+static char *randbuf(int len)
+{
+ char *buf;
+ int i;
+ buf = (char *)malloc(len+1);
+ if (buf == NULL) {
+ perror("randbuf: unable to allocate memory for buffer.\n");
+ exit(1);
+ }
+
+ for (i=0;i<len;i++) {
+ buf[i] = 'a' + (rand() % 26);
+ }
+ buf[i] = 0;
+ return buf;
+}
+
+static void addrec_db(void);
+static int modify_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
+ void *state)
+{
+#if CULL_PROB
+ if (random() % CULL_PROB == 0) {
+ ntdb_delete(ntdb, key);
+ }
+#endif
+
+#if TRAVERSE_MOD_PROB
+ if (random() % TRAVERSE_MOD_PROB == 0) {
+ addrec_db();
+ }
+#endif
+
+#if TRAVERSE_ABORT_PROB
+ if (random() % TRAVERSE_ABORT_PROB == 0)
+ return 1;
+#endif
+
+ return 0;
+}
+
+static void addrec_db(void)
+{
+ int klen, dlen;
+ char *k, *d;
+ NTDB_DATA key, data;
+ enum NTDB_ERROR e;
+
+ klen = 1 + (rand() % KEYLEN);
+ dlen = 1 + (rand() % DATALEN);
+
+ k = randbuf(klen);
+ d = randbuf(dlen);
+
+ key.dptr = (unsigned char *)k;
+ key.dsize = klen+1;
+
+ data.dptr = (unsigned char *)d;
+ data.dsize = dlen+1;
+
+#if REOPEN_PROB
+ if (in_traverse == 0 && in_transaction == 0 && random() % REOPEN_PROB == 0) {
+ ntdb_reopen_all(0);
+ goto next;
+ }
+#endif
+
+#if TRANSACTION_PROB
+ if (in_traverse == 0 && in_transaction == 0 && (always_transaction || random() % TRANSACTION_PROB == 0)) {
+ e = ntdb_transaction_start(db);
+ warn_on_err(e, db, "ntdb_transaction_start failed");
+ in_transaction++;
+ goto next;
+ }
+ if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
+ if (random() % TRANSACTION_PREPARE_PROB == 0) {
+ e = ntdb_transaction_prepare_commit(db);
+ warn_on_err(e, db, "ntdb_transaction_prepare_commit failed");
+ }
+ e = ntdb_transaction_commit(db);
+ warn_on_err(e, db, "ntdb_transaction_commit failed");
+ in_transaction--;
+ goto next;
+ }
+
+ if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
+ ntdb_transaction_cancel(db);
+ in_transaction--;
+ goto next;
+ }
+#endif
+
+#if DELETE_PROB
+ if (random() % DELETE_PROB == 0) {
+ ntdb_delete(db, key);
+ goto next;
+ }
+#endif
+
+#if STORE_PROB
+ if (random() % STORE_PROB == 0) {
+ e = ntdb_store(db, key, data, NTDB_REPLACE);
+ warn_on_err(e, db, "ntdb_store failed");
+ goto next;
+ }
+#endif
+
+#if APPEND_PROB
+ if (random() % APPEND_PROB == 0) {
+ e = ntdb_append(db, key, data);
+ warn_on_err(e, db, "ntdb_append failed");
+ goto next;
+ }
+#endif
+
+#if LOCKSTORE_PROB
+ if (random() % LOCKSTORE_PROB == 0) {
+ ntdb_chainlock(db, key);
+ if (ntdb_fetch(db, key, &data) != NTDB_SUCCESS) {
+ data.dsize = 0;
+ data.dptr = NULL;
+ }
+ e = ntdb_store(db, key, data, NTDB_REPLACE);
+ warn_on_err(e, db, "ntdb_store failed");
+ if (data.dptr) free(data.dptr);
+ ntdb_chainunlock(db, key);
+ goto next;
+ }
+#endif
+
+#if TRAVERSE_PROB
+ /* FIXME: recursive traverses break transactions? */
+ if (in_traverse == 0 && random() % TRAVERSE_PROB == 0) {
+ in_traverse++;
+ ntdb_traverse(db, modify_traverse, NULL);
+ in_traverse--;
+ goto next;
+ }
+#endif
+
+ if (ntdb_fetch(db, key, &data) == NTDB_SUCCESS)
+ free(data.dptr);
+
+next:
+ free(k);
+ free(d);
+}
+
+static int traverse_fn(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA dbuf,
+ void *state)
+{
+ ntdb_delete(ntdb, key);
+ return 0;
+}
+
+static void usage(void)
+{
+ printf("Usage: ntdbtorture"
+#if TRANSACTION_PROB
+ " [-t]"
+#endif
+ " [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-S] [-H HASH_SIZE]\n");
+ exit(0);
+}
+
+static void send_count_and_suicide(int sig)
+{
+ /* This ensures our successor can continue where we left off. */
+ if (write(count_pipe, &loopnum, sizeof(loopnum)) != sizeof(loopnum))
+ exit(2);
+ /* This gives a unique signature. */
+ kill(getpid(), SIGUSR2);
+}
+
+static int run_child(const char *filename, int i, int seed, unsigned num_loops,
+ unsigned start, int ntdb_flags)
+{
+ struct sigaction act = { .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO };
+ sigaction(11, &act, NULL);
+
+ db = ntdb_open(filename, ntdb_flags, O_RDWR | O_CREAT, 0600,
+ &log_attr);
+ if (!db) {
+ fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), filename,
+ "db open failed");
+ exit(1);
+ }
+
+#if 0
+ if (i == 0) {
+ printf("pid %u\n", (unsigned int)getpid());
+ sleep(9);
+ } else
+ sleep(10);
+#endif
+
+ srand(seed + i);
+ srandom(seed + i);
+
+ /* Set global, then we're ready to handle being killed. */
+ loopnum = start;
+ signal(SIGUSR1, send_count_and_suicide);
+
+ for (;loopnum<num_loops && error_count == 0;loopnum++) {
+ addrec_db();
+ }
+
+ if (error_count == 0) {
+ enum NTDB_ERROR e;
+
+ ntdb_traverse(db, NULL, NULL);
+#if TRANSACTION_PROB
+ if (always_transaction) {
+ while (in_transaction) {
+ ntdb_transaction_cancel(db);
+ in_transaction--;
+ }
+ e = ntdb_transaction_start(db);
+ if (e) {
+ warn_on_err(e, db,
+ "ntdb_transaction_start failed");
+ exit(1);
+ }
+ }
+#endif
+ ntdb_traverse(db, traverse_fn, NULL);
+ ntdb_traverse(db, traverse_fn, NULL);
+
+#if TRANSACTION_PROB
+ if (always_transaction) {
+ e = ntdb_transaction_commit(db);
+ warn_on_err(e, db, "ntdb_transaction_commit failed");
+ }
+#endif
+ }
+
+ ntdb_close(db);
+
+ return (error_count < 100 ? error_count : 100);
+}
+
+static char *test_path(const char *filename)
+{
+ const char *prefix = getenv("TEST_DATA_PREFIX");
+
+ if (prefix) {
+ char *path = NULL;
+ int ret;
+
+ ret = asprintf(&path, "%s/%s", prefix, filename);
+ if (ret == -1) {
+ return NULL;
+ }
+ return path;
+ }
+
+ return strdup(filename);
+}
+
+int main(int argc, char * const *argv)
+{
+ int i, seed = -1;
+ int num_loops = 5000;
+ int num_procs = 3;
+ int c, pfds[2];
+ extern char *optarg;
+ pid_t *pids;
+ int kill_random = 0;
+ int *done;
+ int ntdb_flags = NTDB_DEFAULT;
+ char *test_ntdb;
+ enum NTDB_ERROR e;
+
+ log_attr.base.attr = NTDB_ATTRIBUTE_LOG;
+ log_attr.base.next = &seed_attr;
+ log_attr.log.fn = ntdb_log;
+ seed_attr.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed_attr.base.next = &hsize_attr;
+ hsize_attr.base.attr = NTDB_ATTRIBUTE_HASHSIZE;
+ hsize_attr.base.next = NULL;
+ hsize_attr.hashsize.size = 2; /* stress it by default. */
+
+ while ((c = getopt(argc, argv, "n:l:s:thkSH:")) != -1) {
+ switch (c) {
+ case 'n':
+ num_procs = strtol(optarg, NULL, 0);
+ break;
+ case 'l':
+ num_loops = strtol(optarg, NULL, 0);
+ break;
+ case 's':
+ seed = strtol(optarg, NULL, 0);
+ break;
+ case 'S':
+ ntdb_flags = NTDB_NOSYNC;
+ break;
+ case 't':
+#if TRANSACTION_PROB
+ always_transaction = 1;
+#else
+ fprintf(stderr, "Transactions not supported\n");
+ usage();
+#endif
+ break;
+ case 'k':
+ kill_random = 1;
+ break;
+ case 'H':
+ hsize_attr.hashsize.size = strtol(optarg, NULL, 0);
+ break;
+ default:
+ usage();
+ }
+ }
+
+ test_ntdb = test_path("torture.ntdb");
+
+ unlink(test_ntdb);
+
+ if (seed == -1) {
+ seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
+ }
+ seed_attr.seed.seed = (((uint64_t)seed) << 32) | seed;
+
+ if (num_procs == 1 && !kill_random) {
+ /* Don't fork for this case, makes debugging easier. */
+ error_count = run_child(test_ntdb, 0, seed, num_loops, 0,
+ ntdb_flags);
+ goto done;
+ }
+
+ pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
+ done = (int *)calloc(sizeof(int), num_procs);
+
+ if (pipe(pfds) != 0) {
+ perror("Creating pipe");
+ exit(1);
+ }
+ count_pipe = pfds[1];
+
+ for (i=0;i<num_procs;i++) {
+ if ((pids[i]=fork()) == 0) {
+ close(pfds[0]);
+ if (i == 0) {
+ printf("testing with %d processes, %d loops, seed=%d%s\n",
+ num_procs, num_loops, seed,
+#if TRANSACTION_PROB
+ always_transaction ? " (all within transactions)" : ""
+#else
+ ""
+#endif
+ );
+ }
+ exit(run_child(test_ntdb, i, seed, num_loops, 0,
+ ntdb_flags));
+ }
+ }
+
+ while (num_procs) {
+ int status, j;
+ pid_t pid;
+
+ if (error_count != 0) {
+ /* try and stop the test on any failure */
+ for (j=0;j<num_procs;j++) {
+ if (pids[j] != 0) {
+ kill(pids[j], SIGTERM);
+ }
+ }
+ }
+
+ pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
+ if (pid == 0) {
+ struct timespec ts;
+
+ /* Sleep for 1/10 second. */
+ ts.tv_sec = 0;
+ ts.tv_nsec = 100000000;
+ nanosleep(&ts, NULL);
+
+ /* Kill someone. */
+ kill(pids[random() % num_procs], SIGUSR1);
+ continue;
+ }
+
+ if (pid == -1) {
+ perror("failed to wait for child\n");
+ exit(1);
+ }
+
+ for (j=0;j<num_procs;j++) {
+ if (pids[j] == pid) break;
+ }
+ if (j == num_procs) {
+ printf("unknown child %d exited!?\n", (int)pid);
+ exit(1);
+ }
+ if (WIFSIGNALED(status)) {
+ if (WTERMSIG(status) == SIGUSR2
+ || WTERMSIG(status) == SIGUSR1) {
+ /* SIGUSR2 means they wrote to pipe. */
+ if (WTERMSIG(status) == SIGUSR2) {
+ if (read(pfds[0], &done[j],
+ sizeof(done[j]))
+ != sizeof(done[j]))
+ err(1,
+ "Short read from child?");
+ }
+ pids[j] = fork();
+ if (pids[j] == 0)
+ exit(run_child(test_ntdb, j, seed,
+ num_loops, done[j],
+ ntdb_flags));
+ printf("Restarting child %i for %u-%u\n",
+ j, done[j], num_loops);
+ continue;
+ }
+ printf("child %d exited with signal %d\n",
+ (int)pid, WTERMSIG(status));
+ error_count++;
+ } else {
+ if (WEXITSTATUS(status) != 0) {
+ printf("child %d exited with status %d\n",
+ (int)pid, WEXITSTATUS(status));
+ error_count++;
+ }
+ }
+ memmove(&pids[j], &pids[j+1],
+ (num_procs - j - 1)*sizeof(pids[0]));
+ num_procs--;
+ }
+
+ free(pids);
+
+done:
+ if (error_count == 0) {
+ db = ntdb_open(test_ntdb, NTDB_DEFAULT, O_RDWR | O_CREAT,
+ 0600, &log_attr);
+ if (!db) {
+ fprintf(stderr, "%u:%s:%s\n", (unsigned int)getpid(), test_ntdb,
+ "db open failed");
+ exit(1);
+ }
+ e = ntdb_check(db, NULL, NULL);
+ if (e) {
+ warn_on_err(e, db, "db check failed");
+ exit(1);
+ }
+ ntdb_close(db);
+ printf("OK\n");
+ }
+
+ free(test_ntdb);
+ return error_count;
+}
--- /dev/null
+/* Simple speed test for NTDB */
+#include <ccan/err/err.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include "ntdb.h"
+
+/* Nanoseconds per operation */
+static size_t normalize(const struct timeval *start,
+ const struct timeval *stop,
+ unsigned int num)
+{
+ struct timeval diff;
+
+ timersub(stop, start, &diff);
+
+ /* Floating point is more accurate here. */
+ return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
+ / num * 1000;
+}
+
+static size_t file_size(void)
+{
+ struct stat st;
+
+ if (stat("/tmp/speed.ntdb", &st) != 0)
+ return -1;
+ return st.st_size;
+}
+
+static int count_record(struct ntdb_context *ntdb,
+ NTDB_DATA key, NTDB_DATA data, void *p)
+{
+ int *total = p;
+ *total += *(int *)data.dptr;
+ return 0;
+}
+
+static void dump_and_clear_stats(struct ntdb_context **ntdb,
+ int flags,
+ union ntdb_attribute *attr)
+{
+ union ntdb_attribute stats;
+ enum NTDB_ERROR ecode;
+
+ stats.base.attr = NTDB_ATTRIBUTE_STATS;
+ stats.stats.size = sizeof(stats.stats);
+ ecode = ntdb_get_attribute(*ntdb, &stats);
+ if (ecode != NTDB_SUCCESS)
+ errx(1, "Getting stats: %s", ntdb_errorstr(ecode));
+
+ printf("allocs = %llu\n",
+ (unsigned long long)stats.stats.allocs);
+ printf(" alloc_subhash = %llu\n",
+ (unsigned long long)stats.stats.alloc_subhash);
+ printf(" alloc_chain = %llu\n",
+ (unsigned long long)stats.stats.alloc_chain);
+ printf(" alloc_bucket_exact = %llu\n",
+ (unsigned long long)stats.stats.alloc_bucket_exact);
+ printf(" alloc_bucket_max = %llu\n",
+ (unsigned long long)stats.stats.alloc_bucket_max);
+ printf(" alloc_leftover = %llu\n",
+ (unsigned long long)stats.stats.alloc_leftover);
+ printf(" alloc_coalesce_tried = %llu\n",
+ (unsigned long long)stats.stats.alloc_coalesce_tried);
+ printf(" alloc_coalesce_iterate_clash = %llu\n",
+ (unsigned long long)stats.stats.alloc_coalesce_iterate_clash);
+ printf(" alloc_coalesce_lockfail = %llu\n",
+ (unsigned long long)stats.stats.alloc_coalesce_lockfail);
+ printf(" alloc_coalesce_race = %llu\n",
+ (unsigned long long)stats.stats.alloc_coalesce_race);
+ printf(" alloc_coalesce_succeeded = %llu\n",
+ (unsigned long long)stats.stats.alloc_coalesce_succeeded);
+ printf(" alloc_coalesce_num_merged = %llu\n",
+ (unsigned long long)stats.stats.alloc_coalesce_num_merged);
+ printf("compares = %llu\n",
+ (unsigned long long)stats.stats.compares);
+ printf(" compare_wrong_offsetbits = %llu\n",
+ (unsigned long long)stats.stats.compare_wrong_offsetbits);
+ printf(" compare_wrong_keylen = %llu\n",
+ (unsigned long long)stats.stats.compare_wrong_keylen);
+ printf(" compare_wrong_rechash = %llu\n",
+ (unsigned long long)stats.stats.compare_wrong_rechash);
+ printf(" compare_wrong_keycmp = %llu\n",
+ (unsigned long long)stats.stats.compare_wrong_keycmp);
+ printf("transactions = %llu\n",
+ (unsigned long long)stats.stats.transactions);
+ printf(" transaction_cancel = %llu\n",
+ (unsigned long long)stats.stats.transaction_cancel);
+ printf(" transaction_nest = %llu\n",
+ (unsigned long long)stats.stats.transaction_nest);
+ printf(" transaction_expand_file = %llu\n",
+ (unsigned long long)stats.stats.transaction_expand_file);
+ printf(" transaction_read_direct = %llu\n",
+ (unsigned long long)stats.stats.transaction_read_direct);
+ printf(" transaction_read_direct_fail = %llu\n",
+ (unsigned long long)stats.stats.transaction_read_direct_fail);
+ printf(" transaction_write_direct = %llu\n",
+ (unsigned long long)stats.stats.transaction_write_direct);
+ printf(" transaction_write_direct_fail = %llu\n",
+ (unsigned long long)stats.stats.transaction_write_direct_fail);
+ printf("expands = %llu\n",
+ (unsigned long long)stats.stats.expands);
+ printf("frees = %llu\n",
+ (unsigned long long)stats.stats.frees);
+ printf("locks = %llu\n",
+ (unsigned long long)stats.stats.locks);
+ printf(" lock_lowlevel = %llu\n",
+ (unsigned long long)stats.stats.lock_lowlevel);
+ printf(" lock_nonblock = %llu\n",
+ (unsigned long long)stats.stats.lock_nonblock);
+ printf(" lock_nonblock_fail = %llu\n",
+ (unsigned long long)stats.stats.lock_nonblock_fail);
+
+ /* Now clear. */
+ ntdb_close(*ntdb);
+ *ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR, 0, attr);
+}
+
+static void ntdb_log(struct ntdb_context *ntdb,
+ enum ntdb_log_level level,
+ enum NTDB_ERROR ecode,
+ const char *message,
+ void *data)
+{
+ fprintf(stderr, "ntdb:%s:%s:%s\n",
+ ntdb_name(ntdb), ntdb_errorstr(ecode), message);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i, j, num = 1000, stage = 0, stopat = -1;
+ int flags = NTDB_DEFAULT;
+ bool transaction = false, summary = false;
+ NTDB_DATA key, data;
+ struct ntdb_context *ntdb;
+ struct timeval start, stop;
+ union ntdb_attribute seed, log;
+ bool do_stats = false;
+ enum NTDB_ERROR ecode;
+
+ /* Try to keep benchmarks even. */
+ seed.base.attr = NTDB_ATTRIBUTE_SEED;
+ seed.base.next = NULL;
+ seed.seed.seed = 0;
+
+ log.base.attr = NTDB_ATTRIBUTE_LOG;
+ log.base.next = &seed;
+ log.log.fn = ntdb_log;
+
+ if (argv[1] && strcmp(argv[1], "--internal") == 0) {
+ flags = NTDB_INTERNAL;
+ argc--;
+ argv++;
+ }
+ if (argv[1] && strcmp(argv[1], "--transaction") == 0) {
+ transaction = true;
+ argc--;
+ argv++;
+ }
+ if (argv[1] && strcmp(argv[1], "--no-sync") == 0) {
+ flags |= NTDB_NOSYNC;
+ argc--;
+ argv++;
+ }
+ if (argv[1] && strcmp(argv[1], "--summary") == 0) {
+ summary = true;
+ argc--;
+ argv++;
+ }
+ if (argv[1] && strcmp(argv[1], "--stats") == 0) {
+ do_stats = true;
+ argc--;
+ argv++;
+ }
+
+ ntdb = ntdb_open("/tmp/speed.ntdb", flags, O_RDWR|O_CREAT|O_TRUNC,
+ 0600, &log);
+ if (!ntdb)
+ err(1, "Opening /tmp/speed.ntdb");
+
+ key.dptr = (void *)&i;
+ key.dsize = sizeof(i);
+ data = key;
+
+ if (argv[1]) {
+ num = atoi(argv[1]);
+ argv++;
+ argc--;
+ }
+
+ if (argv[1]) {
+ stopat = atoi(argv[1]);
+ argv++;
+ argc--;
+ }
+
+ /* Add 1000 records. */
+ printf("Adding %u records: ", num); fflush(stdout);
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ gettimeofday(&start, NULL);
+ for (i = 0; i < num; i++)
+ if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
+ errx(1, "Inserting key %u in ntdb: %s",
+ i, ntdb_errorstr(ecode));
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+
+ if (++stage == stopat)
+ exit(0);
+
+ /* Finding 1000 records. */
+ printf("Finding %u records: ", num); fflush(stdout);
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ gettimeofday(&start, NULL);
+ for (i = 0; i < num; i++) {
+ NTDB_DATA dbuf;
+ if ((ecode = ntdb_fetch(ntdb, key, &dbuf)) != NTDB_SUCCESS
+ || *(int *)dbuf.dptr != i) {
+ errx(1, "Fetching key %u in ntdb gave %u",
+ i, ecode ? ecode : *(int *)dbuf.dptr);
+ }
+ }
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+ if (++stage == stopat)
+ exit(0);
+
+ /* Missing 1000 records. */
+ printf("Missing %u records: ", num); fflush(stdout);
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ gettimeofday(&start, NULL);
+ for (i = num; i < num*2; i++) {
+ NTDB_DATA dbuf;
+ ecode = ntdb_fetch(ntdb, key, &dbuf);
+ if (ecode != NTDB_ERR_NOEXIST)
+ errx(1, "Fetching key %u in ntdb gave %s",
+ i, ntdb_errorstr(ecode));
+ }
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+ if (++stage == stopat)
+ exit(0);
+
+ /* Traverse 1000 records. */
+ printf("Traversing %u records: ", num); fflush(stdout);
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ i = 0;
+ gettimeofday(&start, NULL);
+ if (ntdb_traverse(ntdb, count_record, &i) != num)
+ errx(1, "Traverse returned wrong number of records");
+ if (i != (num - 1) * (num / 2))
+ errx(1, "Traverse tallied to %u", i);
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+ if (++stage == stopat)
+ exit(0);
+
+ /* Delete 1000 records (not in order). */
+ printf("Deleting %u records: ", num); fflush(stdout);
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ gettimeofday(&start, NULL);
+ for (j = 0; j < num; j++) {
+ i = (j + 100003) % num;
+ if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS)
+ errx(1, "Deleting key %u in ntdb: %s",
+ i, ntdb_errorstr(ecode));
+ }
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+ if (++stage == stopat)
+ exit(0);
+
+ /* Re-add 1000 records (not in order). */
+ printf("Re-adding %u records: ", num); fflush(stdout);
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ gettimeofday(&start, NULL);
+ for (j = 0; j < num; j++) {
+ i = (j + 100003) % num;
+ if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
+ errx(1, "Inserting key %u in ntdb: %s",
+ i, ntdb_errorstr(ecode));
+ }
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+ if (++stage == stopat)
+ exit(0);
+
+ /* Append 1000 records. */
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ printf("Appending %u records: ", num); fflush(stdout);
+ gettimeofday(&start, NULL);
+ for (i = 0; i < num; i++)
+ if ((ecode = ntdb_append(ntdb, key, data)) != NTDB_SUCCESS)
+ errx(1, "Appending key %u in ntdb: %s",
+ i, ntdb_errorstr(ecode));
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (++stage == stopat)
+ exit(0);
+
+ /* Churn 1000 records: not in order! */
+ if (transaction && (ecode = ntdb_transaction_start(ntdb)))
+ errx(1, "starting transaction: %s", ntdb_errorstr(ecode));
+ printf("Churning %u records: ", num); fflush(stdout);
+ gettimeofday(&start, NULL);
+ for (j = 0; j < num; j++) {
+ i = (j + 1000019) % num;
+ if ((ecode = ntdb_delete(ntdb, key)) != NTDB_SUCCESS)
+ errx(1, "Deleting key %u in ntdb: %s",
+ i, ntdb_errorstr(ecode));
+ i += num;
+ if ((ecode = ntdb_store(ntdb, key, data, NTDB_INSERT)) != 0)
+ errx(1, "Inserting key %u in ntdb: %s",
+ i, ntdb_errorstr(ecode));
+ }
+ gettimeofday(&stop, NULL);
+ if (transaction && (ecode = ntdb_transaction_commit(ntdb)))
+ errx(1, "committing transaction: %s", ntdb_errorstr(ecode));
+ printf(" %zu ns (%zu bytes)\n",
+ normalize(&start, &stop, num), file_size());
+
+ if (ntdb_check(ntdb, NULL, NULL))
+ errx(1, "ntdb_check failed!");
+ if (summary) {
+ char *sumstr = NULL;
+ ntdb_summary(ntdb, NTDB_SUMMARY_HISTOGRAMS, &sumstr);
+ printf("%s\n", sumstr);
+ free(sumstr);
+ }
+ if (do_stats)
+ dump_and_clear_stats(&ntdb, flags, &log);
+ if (++stage == stopat)
+ exit(0);
+
+ return 0;
+}
--- /dev/null
+ /*
+ Unix SMB/CIFS implementation.
+
+ trivial database library
+
+ Copyright (C) Andrew Tridgell 2005
+ Copyright (C) Rusty Russell 2010
+
+ ** NOTE! The following LGPL license applies to the ntdb
+ ** library. This does NOT imply that all of Samba is released
+ ** under the LGPL
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "private.h"
+#include <assert.h>
+#define SAFE_FREE(ntdb, x) do { if ((x) != NULL) {ntdb->free_fn((void *)x, ntdb->alloc_data); (x)=NULL;} } while(0)
+
+/*
+ transaction design:
+
+ - only allow a single transaction at a time per database. This makes
+ using the transaction API simpler, as otherwise the caller would
+ have to cope with temporary failures in transactions that conflict
+ with other current transactions
+
+ - keep the transaction recovery information in the same file as the
+ database, using a special 'transaction recovery' record pointed at
+ by the header. This removes the need for extra journal files as
+ used by some other databases
+
+ - dynamically allocated the transaction recover record, re-using it
+ for subsequent transactions. If a larger record is needed then
+ ntdb_free() the old record to place it on the normal ntdb freelist
+ before allocating the new record
+
+ - during transactions, keep a linked list of writes all that have
+ been performed by intercepting all ntdb_write() calls. The hooked
+ transaction versions of ntdb_read() and ntdb_write() check this
+ linked list and try to use the elements of the list in preference
+ to the real database.
+
+ - don't allow any locks to be held when a transaction starts,
+ otherwise we can end up with deadlock (plus lack of lock nesting
+ in POSIX locks would mean the lock is lost)
+
+ - if the caller gains a lock during the transaction but doesn't
+ release it then fail the commit
+
+ - allow for nested calls to ntdb_transaction_start(), re-using the
+ existing transaction record. If the inner transaction is canceled
+ then a subsequent commit will fail
+
+ - keep a mirrored copy of the ntdb hash chain heads to allow for the
+ fast hash heads scan on traverse, updating the mirrored copy in
+ the transaction version of ntdb_write
+
+ - allow callers to mix transaction and non-transaction use of ntdb,
+ although once a transaction is started then an exclusive lock is
+ gained until the transaction is committed or canceled
+
+ - the commit stategy involves first saving away all modified data
+ into a linearised buffer in the transaction recovery area, then
+ marking the transaction recovery area with a magic value to
+ indicate a valid recovery record. In total 4 fsync/msync calls are
+ needed per commit to prevent race conditions. It might be possible
+ to reduce this to 3 or even 2 with some more work.
+
+ - check for a valid recovery record on open of the ntdb, while the
+ open lock is held. Automatically recover from the transaction
+ recovery area if needed, then continue with the open as
+ usual. This allows for smooth crash recovery with no administrator
+ intervention.
+
+ - if NTDB_NOSYNC is passed to flags in ntdb_open then transactions are
+ still available, but fsync/msync calls are made. This means we
+ still are safe against unexpected death during transaction commit,
+ but not against machine reboots.
+*/
+
+/*
+ hold the context of any current transaction
+*/
+struct ntdb_transaction {
+ /* the original io methods - used to do IOs to the real db */
+ const struct ntdb_methods *io_methods;
+
+ /* the list of transaction blocks. When a block is first
+ written to, it gets created in this list */
+ uint8_t **blocks;
+ size_t num_blocks;
+
+ /* non-zero when an internal transaction error has
+ occurred. All write operations will then fail until the
+ transaction is ended */
+ int transaction_error;
+
+ /* when inside a transaction we need to keep track of any
+ nested ntdb_transaction_start() calls, as these are allowed,
+ but don't create a new transaction */
+ unsigned int nesting;
+
+ /* set when a prepare has already occurred */
+ bool prepared;
+ ntdb_off_t magic_offset;
+
+ /* old file size before transaction */
+ ntdb_len_t old_map_size;
+};
+
+/*
+ read while in a transaction. We need to check first if the data is in our list
+ of transaction elements, then if not do a real read
+*/
+static enum NTDB_ERROR transaction_read(struct ntdb_context *ntdb, ntdb_off_t off,
+ void *buf, ntdb_len_t len)
+{
+ size_t blk;
+ enum NTDB_ERROR ecode;
+
+ /* break it down into block sized ops */
+ while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) {
+ ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE);
+ ecode = transaction_read(ntdb, off, buf, len2);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ len -= len2;
+ off += len2;
+ buf = (void *)(len2 + (char *)buf);
+ }
+
+ if (len == 0) {
+ return NTDB_SUCCESS;
+ }
+
+ blk = off / NTDB_PGSIZE;
+
+ /* see if we have it in the block list */
+ if (ntdb->transaction->num_blocks <= blk ||
+ ntdb->transaction->blocks[blk] == NULL) {
+ /* nope, do a real read */
+ ecode = ntdb->transaction->io_methods->tread(ntdb, off, buf, len);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+ return 0;
+ }
+
+ /* now copy it out of this block */
+ memcpy(buf, ntdb->transaction->blocks[blk] + (off % NTDB_PGSIZE), len);
+ return NTDB_SUCCESS;
+
+fail:
+ ntdb->transaction->transaction_error = 1;
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "transaction_read: failed at off=%zu len=%zu",
+ (size_t)off, (size_t)len);
+}
+
+
+/*
+ write while in a transaction
+*/
+static enum NTDB_ERROR transaction_write(struct ntdb_context *ntdb, ntdb_off_t off,
+ const void *buf, ntdb_len_t len)
+{
+ size_t blk;
+ enum NTDB_ERROR ecode;
+
+ /* Only a commit is allowed on a prepared transaction */
+ if (ntdb->transaction->prepared) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR,
+ "transaction_write: transaction already"
+ " prepared, write not allowed");
+ goto fail;
+ }
+
+ /* break it up into block sized chunks */
+ while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) {
+ ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE);
+ ecode = transaction_write(ntdb, off, buf, len2);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ len -= len2;
+ off += len2;
+ if (buf != NULL) {
+ buf = (const void *)(len2 + (const char *)buf);
+ }
+ }
+
+ if (len == 0) {
+ return NTDB_SUCCESS;
+ }
+
+ blk = off / NTDB_PGSIZE;
+ off = off % NTDB_PGSIZE;
+
+ if (ntdb->transaction->num_blocks <= blk) {
+ uint8_t **new_blocks;
+ /* expand the blocks array */
+ if (ntdb->transaction->blocks == NULL) {
+ new_blocks = (uint8_t **)ntdb->alloc_fn(ntdb,
+ (blk+1)*sizeof(uint8_t *), ntdb->alloc_data);
+ } else {
+ new_blocks = (uint8_t **)ntdb->expand_fn(
+ ntdb->transaction->blocks,
+ (blk+1)*sizeof(uint8_t *), ntdb->alloc_data);
+ }
+ if (new_blocks == NULL) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "transaction_write:"
+ " failed to allocate");
+ goto fail;
+ }
+ memset(&new_blocks[ntdb->transaction->num_blocks], 0,
+ (1+(blk - ntdb->transaction->num_blocks))*sizeof(uint8_t *));
+ ntdb->transaction->blocks = new_blocks;
+ ntdb->transaction->num_blocks = blk+1;
+ }
+
+ /* allocate and fill a block? */
+ if (ntdb->transaction->blocks[blk] == NULL) {
+ ntdb->transaction->blocks[blk] = (uint8_t *)
+ ntdb->alloc_fn(ntdb->transaction->blocks, NTDB_PGSIZE,
+ ntdb->alloc_data);
+ if (ntdb->transaction->blocks[blk] == NULL) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "transaction_write:"
+ " failed to allocate");
+ goto fail;
+ }
+ memset(ntdb->transaction->blocks[blk], 0, NTDB_PGSIZE);
+ if (ntdb->transaction->old_map_size > blk * NTDB_PGSIZE) {
+ ntdb_len_t len2 = NTDB_PGSIZE;
+ if (len2 + (blk * NTDB_PGSIZE) > ntdb->transaction->old_map_size) {
+ len2 = ntdb->transaction->old_map_size - (blk * NTDB_PGSIZE);
+ }
+ ecode = ntdb->transaction->io_methods->tread(ntdb,
+ blk * NTDB_PGSIZE,
+ ntdb->transaction->blocks[blk],
+ len2);
+ if (ecode != NTDB_SUCCESS) {
+ ecode = ntdb_logerr(ntdb, ecode,
+ NTDB_LOG_ERROR,
+ "transaction_write:"
+ " failed to"
+ " read old block: %s",
+ strerror(errno));
+ SAFE_FREE(ntdb, ntdb->transaction->blocks[blk]);
+ goto fail;
+ }
+ }
+ }
+
+ /* overwrite part of an existing block */
+ if (buf == NULL) {
+ memset(ntdb->transaction->blocks[blk] + off, 0, len);
+ } else {
+ memcpy(ntdb->transaction->blocks[blk] + off, buf, len);
+ }
+ return NTDB_SUCCESS;
+
+fail:
+ ntdb->transaction->transaction_error = 1;
+ return ecode;
+}
+
+
+/*
+ write while in a transaction - this variant never expands the transaction blocks, it only
+ updates existing blocks. This means it cannot change the recovery size
+*/
+static void transaction_write_existing(struct ntdb_context *ntdb, ntdb_off_t off,
+ const void *buf, ntdb_len_t len)
+{
+ size_t blk;
+
+ /* break it up into block sized chunks */
+ while (len + (off % NTDB_PGSIZE) > NTDB_PGSIZE) {
+ ntdb_len_t len2 = NTDB_PGSIZE - (off % NTDB_PGSIZE);
+ transaction_write_existing(ntdb, off, buf, len2);
+ len -= len2;
+ off += len2;
+ if (buf != NULL) {
+ buf = (const void *)(len2 + (const char *)buf);
+ }
+ }
+
+ if (len == 0) {
+ return;
+ }
+
+ blk = off / NTDB_PGSIZE;
+ off = off % NTDB_PGSIZE;
+
+ if (ntdb->transaction->num_blocks <= blk ||
+ ntdb->transaction->blocks[blk] == NULL) {
+ return;
+ }
+
+ /* overwrite part of an existing block */
+ memcpy(ntdb->transaction->blocks[blk] + off, buf, len);
+}
+
+
+/*
+ out of bounds check during a transaction
+*/
+static enum NTDB_ERROR transaction_oob(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_len_t len, bool probe)
+{
+ if ((off + len >= off && off + len <= ntdb->file->map_size) || probe) {
+ return NTDB_SUCCESS;
+ }
+
+ ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_oob len %lld beyond transaction size %lld",
+ (long long)(off + len),
+ (long long)ntdb->file->map_size);
+ return NTDB_ERR_IO;
+}
+
+/*
+ transaction version of ntdb_expand().
+*/
+static enum NTDB_ERROR transaction_expand_file(struct ntdb_context *ntdb,
+ ntdb_off_t addition)
+{
+ enum NTDB_ERROR ecode;
+
+ assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0);
+
+ /* add a write to the transaction elements, so subsequent
+ reads see the zero data */
+ ecode = transaction_write(ntdb, ntdb->file->map_size, NULL, addition);
+ if (ecode == NTDB_SUCCESS) {
+ ntdb->file->map_size += addition;
+ }
+ return ecode;
+}
+
+static void *transaction_direct(struct ntdb_context *ntdb, ntdb_off_t off,
+ size_t len, bool write_mode)
+{
+ size_t blk = off / NTDB_PGSIZE, end_blk;
+
+ /* This is wrong for zero-length blocks, but will fail gracefully */
+ end_blk = (off + len - 1) / NTDB_PGSIZE;
+
+ /* Can only do direct if in single block and we've already copied. */
+ if (write_mode) {
+ ntdb->stats.transaction_write_direct++;
+ if (blk != end_blk
+ || blk >= ntdb->transaction->num_blocks
+ || ntdb->transaction->blocks[blk] == NULL) {
+ ntdb->stats.transaction_write_direct_fail++;
+ return NULL;
+ }
+ return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE;
+ }
+
+ ntdb->stats.transaction_read_direct++;
+ /* Single which we have copied? */
+ if (blk == end_blk
+ && blk < ntdb->transaction->num_blocks
+ && ntdb->transaction->blocks[blk])
+ return ntdb->transaction->blocks[blk] + off % NTDB_PGSIZE;
+
+ /* Otherwise must be all not copied. */
+ while (blk <= end_blk) {
+ if (blk >= ntdb->transaction->num_blocks)
+ break;
+ if (ntdb->transaction->blocks[blk]) {
+ ntdb->stats.transaction_read_direct_fail++;
+ return NULL;
+ }
+ blk++;
+ }
+ return ntdb->transaction->io_methods->direct(ntdb, off, len, false);
+}
+
+static ntdb_off_t transaction_read_off(struct ntdb_context *ntdb,
+ ntdb_off_t off)
+{
+ ntdb_off_t ret;
+ enum NTDB_ERROR ecode;
+
+ ecode = transaction_read(ntdb, off, &ret, sizeof(ret));
+ ntdb_convert(ntdb, &ret, sizeof(ret));
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ return ret;
+}
+
+static enum NTDB_ERROR transaction_write_off(struct ntdb_context *ntdb,
+ ntdb_off_t off, ntdb_off_t val)
+{
+ ntdb_convert(ntdb, &val, sizeof(val));
+ return transaction_write(ntdb, off, &val, sizeof(val));
+}
+
+static const struct ntdb_methods transaction_methods = {
+ transaction_read,
+ transaction_write,
+ transaction_oob,
+ transaction_expand_file,
+ transaction_direct,
+ transaction_read_off,
+ transaction_write_off,
+};
+
+/*
+ sync to disk
+*/
+static enum NTDB_ERROR transaction_sync(struct ntdb_context *ntdb,
+ ntdb_off_t offset, ntdb_len_t length)
+{
+ if (ntdb->flags & NTDB_NOSYNC) {
+ return NTDB_SUCCESS;
+ }
+
+ if (fsync(ntdb->file->fd) != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_transaction: fsync failed: %s",
+ strerror(errno));
+ }
+#ifdef MS_SYNC
+ if (ntdb->file->map_ptr) {
+ ntdb_off_t moffset = offset & ~(getpagesize()-1);
+ if (msync(moffset + (char *)ntdb->file->map_ptr,
+ length + (offset - moffset), MS_SYNC) != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
+ "ntdb_transaction: msync failed: %s",
+ strerror(errno));
+ }
+ }
+#endif
+ return NTDB_SUCCESS;
+}
+
+static void free_transaction_blocks(struct ntdb_context *ntdb)
+{
+ int i;
+
+ /* free all the transaction blocks */
+ for (i=0;i<ntdb->transaction->num_blocks;i++) {
+ if (ntdb->transaction->blocks[i] != NULL) {
+ ntdb->free_fn(ntdb->transaction->blocks[i],
+ ntdb->alloc_data);
+ }
+ }
+ SAFE_FREE(ntdb, ntdb->transaction->blocks);
+ ntdb->transaction->num_blocks = 0;
+}
+
+static void _ntdb_transaction_cancel(struct ntdb_context *ntdb)
+{
+ enum NTDB_ERROR ecode;
+
+ if (ntdb->transaction == NULL) {
+ ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_cancel: no transaction");
+ return;
+ }
+
+ if (ntdb->transaction->nesting != 0) {
+ ntdb->transaction->transaction_error = 1;
+ ntdb->transaction->nesting--;
+ return;
+ }
+
+ ntdb->file->map_size = ntdb->transaction->old_map_size;
+
+ free_transaction_blocks(ntdb);
+
+ if (ntdb->transaction->magic_offset) {
+ const struct ntdb_methods *methods = ntdb->transaction->io_methods;
+ uint64_t invalid = NTDB_RECOVERY_INVALID_MAGIC;
+
+ /* remove the recovery marker */
+ ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset,
+ &invalid, sizeof(invalid));
+ if (ecode == NTDB_SUCCESS)
+ ecode = transaction_sync(ntdb,
+ ntdb->transaction->magic_offset,
+ sizeof(invalid));
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_cancel: failed to remove"
+ " recovery magic");
+ }
+ }
+
+ if (ntdb->file->allrecord_lock.count)
+ ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
+
+ /* restore the normal io methods */
+ ntdb->io = ntdb->transaction->io_methods;
+
+ ntdb_transaction_unlock(ntdb, F_WRLCK);
+
+ if (ntdb_has_open_lock(ntdb))
+ ntdb_unlock_open(ntdb, F_WRLCK);
+
+ SAFE_FREE(ntdb, ntdb->transaction);
+}
+
+/*
+ start a ntdb transaction. No token is returned, as only a single
+ transaction is allowed to be pending per ntdb_context
+*/
+_PUBLIC_ enum NTDB_ERROR ntdb_transaction_start(struct ntdb_context *ntdb)
+{
+ enum NTDB_ERROR ecode;
+
+ ntdb->stats.transactions++;
+ /* some sanity checks */
+ if (ntdb->flags & NTDB_INTERNAL) {
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_start:"
+ " cannot start a transaction on an"
+ " internal ntdb");
+ }
+
+ if (ntdb->flags & NTDB_RDONLY) {
+ return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_start:"
+ " cannot start a transaction on a"
+ " read-only ntdb");
+ }
+
+ /* cope with nested ntdb_transaction_start() calls */
+ if (ntdb->transaction != NULL) {
+ if (!(ntdb->flags & NTDB_ALLOW_NESTING)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_IO,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_start:"
+ " already inside transaction");
+ }
+ ntdb->transaction->nesting++;
+ ntdb->stats.transaction_nest++;
+ return 0;
+ }
+
+ if (ntdb_has_hash_locks(ntdb)) {
+ /* the caller must not have any locks when starting a
+ transaction as otherwise we'll be screwed by lack
+ of nested locks in POSIX */
+ return ntdb_logerr(ntdb, NTDB_ERR_LOCK,
+ NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_start:"
+ " cannot start a transaction with locks"
+ " held");
+ }
+
+ ntdb->transaction = (struct ntdb_transaction *)
+ ntdb->alloc_fn(ntdb, sizeof(struct ntdb_transaction),
+ ntdb->alloc_data);
+ if (ntdb->transaction == NULL) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_transaction_start:"
+ " cannot allocate");
+ }
+ memset(ntdb->transaction, 0, sizeof(*ntdb->transaction));
+
+ /* get the transaction write lock. This is a blocking lock. As
+ discussed with Volker, there are a number of ways we could
+ make this async, which we will probably do in the future */
+ ecode = ntdb_transaction_lock(ntdb, F_WRLCK);
+ if (ecode != NTDB_SUCCESS) {
+ SAFE_FREE(ntdb, ntdb->transaction->blocks);
+ SAFE_FREE(ntdb, ntdb->transaction);
+ return ecode;
+ }
+
+ /* get a read lock over entire file. This is upgraded to a write
+ lock during the commit */
+ ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, true);
+ if (ecode != NTDB_SUCCESS) {
+ goto fail_allrecord_lock;
+ }
+
+ /* make sure we know about any file expansions already done by
+ anyone else */
+ ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
+ ntdb->transaction->old_map_size = ntdb->file->map_size;
+
+ /* finally hook the io methods, replacing them with
+ transaction specific methods */
+ ntdb->transaction->io_methods = ntdb->io;
+ ntdb->io = &transaction_methods;
+ return NTDB_SUCCESS;
+
+fail_allrecord_lock:
+ ntdb_transaction_unlock(ntdb, F_WRLCK);
+ SAFE_FREE(ntdb, ntdb->transaction->blocks);
+ SAFE_FREE(ntdb, ntdb->transaction);
+ return ecode;
+}
+
+
+/*
+ cancel the current transaction
+*/
+_PUBLIC_ void ntdb_transaction_cancel(struct ntdb_context *ntdb)
+{
+ ntdb->stats.transaction_cancel++;
+ _ntdb_transaction_cancel(ntdb);
+}
+
+/*
+ work out how much space the linearised recovery data will consume (worst case)
+*/
+static ntdb_len_t ntdb_recovery_size(struct ntdb_context *ntdb)
+{
+ ntdb_len_t recovery_size = 0;
+ int i;
+
+ recovery_size = 0;
+ for (i=0;i<ntdb->transaction->num_blocks;i++) {
+ if (i * NTDB_PGSIZE >= ntdb->transaction->old_map_size) {
+ break;
+ }
+ if (ntdb->transaction->blocks[i] == NULL) {
+ continue;
+ }
+ recovery_size += 2*sizeof(ntdb_off_t) + NTDB_PGSIZE;
+ }
+
+ return recovery_size;
+}
+
+static enum NTDB_ERROR ntdb_recovery_area(struct ntdb_context *ntdb,
+ const struct ntdb_methods *methods,
+ ntdb_off_t *recovery_offset,
+ struct ntdb_recovery_record *rec)
+{
+ enum NTDB_ERROR ecode;
+
+ *recovery_offset = ntdb_read_off(ntdb,
+ offsetof(struct ntdb_header, recovery));
+ if (NTDB_OFF_IS_ERR(*recovery_offset)) {
+ return NTDB_OFF_TO_ERR(*recovery_offset);
+ }
+
+ if (*recovery_offset == 0) {
+ rec->max_len = 0;
+ return NTDB_SUCCESS;
+ }
+
+ ecode = methods->tread(ntdb, *recovery_offset, rec, sizeof(*rec));
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+
+ ntdb_convert(ntdb, rec, sizeof(*rec));
+ /* ignore invalid recovery regions: can happen in crash */
+ if (rec->magic != NTDB_RECOVERY_MAGIC &&
+ rec->magic != NTDB_RECOVERY_INVALID_MAGIC) {
+ *recovery_offset = 0;
+ rec->max_len = 0;
+ }
+ return NTDB_SUCCESS;
+}
+
+static unsigned int same(const unsigned char *new,
+ const unsigned char *old,
+ unsigned int length)
+{
+ unsigned int i;
+
+ for (i = 0; i < length; i++) {
+ if (new[i] != old[i])
+ break;
+ }
+ return i;
+}
+
+static unsigned int different(const unsigned char *new,
+ const unsigned char *old,
+ unsigned int length,
+ unsigned int min_same,
+ unsigned int *samelen)
+{
+ unsigned int i;
+
+ *samelen = 0;
+ for (i = 0; i < length; i++) {
+ if (new[i] == old[i]) {
+ (*samelen)++;
+ } else {
+ if (*samelen >= min_same) {
+ return i - *samelen;
+ }
+ *samelen = 0;
+ }
+ }
+
+ if (*samelen < min_same)
+ *samelen = 0;
+ return length - *samelen;
+}
+
+/* Allocates recovery blob, without ntdb_recovery_record at head set up. */
+static struct ntdb_recovery_record *alloc_recovery(struct ntdb_context *ntdb,
+ ntdb_len_t *len)
+{
+ struct ntdb_recovery_record *rec;
+ size_t i;
+ enum NTDB_ERROR ecode;
+ unsigned char *p;
+ const struct ntdb_methods *old_methods = ntdb->io;
+
+ rec = ntdb->alloc_fn(ntdb, sizeof(*rec) + ntdb_recovery_size(ntdb),
+ ntdb->alloc_data);
+ if (!rec) {
+ ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "transaction_setup_recovery:"
+ " cannot allocate");
+ return NTDB_ERR_PTR(NTDB_ERR_OOM);
+ }
+
+ /* We temporarily revert to the old I/O methods, so we can use
+ * ntdb_access_read */
+ ntdb->io = ntdb->transaction->io_methods;
+
+ /* build the recovery data into a single blob to allow us to do a single
+ large write, which should be more efficient */
+ p = (unsigned char *)(rec + 1);
+ for (i=0;i<ntdb->transaction->num_blocks;i++) {
+ ntdb_off_t offset;
+ ntdb_len_t length;
+ unsigned int off;
+ const unsigned char *buffer;
+
+ if (ntdb->transaction->blocks[i] == NULL) {
+ continue;
+ }
+
+ offset = i * NTDB_PGSIZE;
+ length = NTDB_PGSIZE;
+ if (offset >= ntdb->transaction->old_map_size) {
+ continue;
+ }
+
+ if (offset + length > ntdb->file->map_size) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_transaction_setup_recovery:"
+ " transaction data over new region"
+ " boundary");
+ goto fail;
+ }
+ buffer = ntdb_access_read(ntdb, offset, length, false);
+ if (NTDB_PTR_IS_ERR(buffer)) {
+ ecode = NTDB_PTR_ERR(buffer);
+ goto fail;
+ }
+
+ /* Skip over anything the same at the start. */
+ off = same(ntdb->transaction->blocks[i], buffer, length);
+ offset += off;
+
+ while (off < length) {
+ ntdb_len_t len1;
+ unsigned int samelen;
+
+ len1 = different(ntdb->transaction->blocks[i] + off,
+ buffer + off, length - off,
+ sizeof(offset) + sizeof(len1) + 1,
+ &samelen);
+
+ memcpy(p, &offset, sizeof(offset));
+ memcpy(p + sizeof(offset), &len1, sizeof(len1));
+ ntdb_convert(ntdb, p, sizeof(offset) + sizeof(len1));
+ p += sizeof(offset) + sizeof(len1);
+ memcpy(p, buffer + off, len1);
+ p += len1;
+ off += len1 + samelen;
+ offset += len1 + samelen;
+ }
+ ntdb_access_release(ntdb, buffer);
+ }
+
+ *len = p - (unsigned char *)(rec + 1);
+ ntdb->io = old_methods;
+ return rec;
+
+fail:
+ ntdb->free_fn(rec, ntdb->alloc_data);
+ ntdb->io = old_methods;
+ return NTDB_ERR_PTR(ecode);
+}
+
+static ntdb_off_t create_recovery_area(struct ntdb_context *ntdb,
+ ntdb_len_t rec_length,
+ struct ntdb_recovery_record *rec)
+{
+ ntdb_off_t off, recovery_off;
+ ntdb_len_t addition;
+ enum NTDB_ERROR ecode;
+ const struct ntdb_methods *methods = ntdb->transaction->io_methods;
+
+ /* round up to a multiple of page size. Overallocate, since each
+ * such allocation forces us to expand the file. */
+ rec->max_len = ntdb_expand_adjust(ntdb->file->map_size, rec_length);
+
+ /* Round up to a page. */
+ rec->max_len = ((sizeof(*rec) + rec->max_len + NTDB_PGSIZE-1)
+ & ~(NTDB_PGSIZE-1))
+ - sizeof(*rec);
+
+ off = ntdb->file->map_size;
+
+ /* Restore ->map_size before calling underlying expand_file.
+ Also so that we don't try to expand the file again in the
+ transaction commit, which would destroy the recovery
+ area */
+ addition = (ntdb->file->map_size - ntdb->transaction->old_map_size) +
+ sizeof(*rec) + rec->max_len;
+ ntdb->file->map_size = ntdb->transaction->old_map_size;
+ ntdb->stats.transaction_expand_file++;
+ ecode = methods->expand_file(ntdb, addition);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_recovery_allocate:"
+ " failed to create recovery area");
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+
+ /* we have to reset the old map size so that we don't try to
+ expand the file again in the transaction commit, which
+ would destroy the recovery area */
+ ntdb->transaction->old_map_size = ntdb->file->map_size;
+
+ /* write the recovery header offset and sync - we can sync without a race here
+ as the magic ptr in the recovery record has not been set */
+ recovery_off = off;
+ ntdb_convert(ntdb, &recovery_off, sizeof(recovery_off));
+ ecode = methods->twrite(ntdb, offsetof(struct ntdb_header, recovery),
+ &recovery_off, sizeof(ntdb_off_t));
+ if (ecode != NTDB_SUCCESS) {
+ ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_recovery_allocate:"
+ " failed to write recovery head");
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ transaction_write_existing(ntdb, offsetof(struct ntdb_header, recovery),
+ &recovery_off,
+ sizeof(ntdb_off_t));
+ return off;
+}
+
+/*
+ setup the recovery data that will be used on a crash during commit
+*/
+static enum NTDB_ERROR transaction_setup_recovery(struct ntdb_context *ntdb)
+{
+ ntdb_len_t recovery_size = 0;
+ ntdb_off_t recovery_off = 0;
+ ntdb_off_t old_map_size = ntdb->transaction->old_map_size;
+ struct ntdb_recovery_record *recovery;
+ const struct ntdb_methods *methods = ntdb->transaction->io_methods;
+ uint64_t magic;
+ enum NTDB_ERROR ecode;
+
+ recovery = alloc_recovery(ntdb, &recovery_size);
+ if (NTDB_PTR_IS_ERR(recovery))
+ return NTDB_PTR_ERR(recovery);
+
+ /* If we didn't actually change anything we overwrote? */
+ if (recovery_size == 0) {
+ /* In theory, we could have just appended data. */
+ if (ntdb->transaction->num_blocks * NTDB_PGSIZE
+ < ntdb->transaction->old_map_size) {
+ free_transaction_blocks(ntdb);
+ }
+ ntdb->free_fn(recovery, ntdb->alloc_data);
+ return NTDB_SUCCESS;
+ }
+
+ ecode = ntdb_recovery_area(ntdb, methods, &recovery_off, recovery);
+ if (ecode) {
+ ntdb->free_fn(recovery, ntdb->alloc_data);
+ return ecode;
+ }
+
+ if (recovery->max_len < recovery_size) {
+ /* Not large enough. Free up old recovery area. */
+ if (recovery_off) {
+ ntdb->stats.frees++;
+ ecode = add_free_record(ntdb, recovery_off,
+ sizeof(*recovery)
+ + recovery->max_len,
+ NTDB_LOCK_WAIT, true);
+ ntdb->free_fn(recovery, ntdb->alloc_data);
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_recovery_allocate:"
+ " failed to free previous"
+ " recovery area");
+ }
+
+ /* Refresh recovery after add_free_record above. */
+ recovery = alloc_recovery(ntdb, &recovery_size);
+ if (NTDB_PTR_IS_ERR(recovery))
+ return NTDB_PTR_ERR(recovery);
+ }
+
+ recovery_off = create_recovery_area(ntdb, recovery_size,
+ recovery);
+ if (NTDB_OFF_IS_ERR(recovery_off)) {
+ ntdb->free_fn(recovery, ntdb->alloc_data);
+ return NTDB_OFF_TO_ERR(recovery_off);
+ }
+ }
+
+ /* Now we know size, convert rec header. */
+ recovery->magic = NTDB_RECOVERY_INVALID_MAGIC;
+ recovery->len = recovery_size;
+ recovery->eof = old_map_size;
+ ntdb_convert(ntdb, recovery, sizeof(*recovery));
+
+ /* write the recovery data to the recovery area */
+ ecode = methods->twrite(ntdb, recovery_off, recovery,
+ sizeof(*recovery) + recovery_size);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb->free_fn(recovery, ntdb->alloc_data);
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_setup_recovery:"
+ " failed to write recovery data");
+ }
+ transaction_write_existing(ntdb, recovery_off, recovery, recovery_size);
+
+ ntdb->free_fn(recovery, ntdb->alloc_data);
+
+ /* as we don't have ordered writes, we have to sync the recovery
+ data before we update the magic to indicate that the recovery
+ data is present */
+ ecode = transaction_sync(ntdb, recovery_off, recovery_size);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+
+ magic = NTDB_RECOVERY_MAGIC;
+ ntdb_convert(ntdb, &magic, sizeof(magic));
+
+ ntdb->transaction->magic_offset
+ = recovery_off + offsetof(struct ntdb_recovery_record, magic);
+
+ ecode = methods->twrite(ntdb, ntdb->transaction->magic_offset,
+ &magic, sizeof(magic));
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_setup_recovery:"
+ " failed to write recovery magic");
+ }
+ transaction_write_existing(ntdb, ntdb->transaction->magic_offset,
+ &magic, sizeof(magic));
+
+ /* ensure the recovery magic marker is on disk */
+ return transaction_sync(ntdb, ntdb->transaction->magic_offset,
+ sizeof(magic));
+}
+
+static enum NTDB_ERROR _ntdb_transaction_prepare_commit(struct ntdb_context *ntdb)
+{
+ const struct ntdb_methods *methods;
+ enum NTDB_ERROR ecode;
+
+ if (ntdb->transaction == NULL) {
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_prepare_commit:"
+ " no transaction");
+ }
+
+ if (ntdb->transaction->prepared) {
+ _ntdb_transaction_cancel(ntdb);
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_prepare_commit:"
+ " transaction already prepared");
+ }
+
+ if (ntdb->transaction->transaction_error) {
+ _ntdb_transaction_cancel(ntdb);
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_ERROR,
+ "ntdb_transaction_prepare_commit:"
+ " transaction error pending");
+ }
+
+
+ if (ntdb->transaction->nesting != 0) {
+ return NTDB_SUCCESS;
+ }
+
+ /* check for a null transaction */
+ if (ntdb->transaction->blocks == NULL) {
+ return NTDB_SUCCESS;
+ }
+
+ methods = ntdb->transaction->io_methods;
+
+ /* upgrade the main transaction lock region to a write lock */
+ ecode = ntdb_allrecord_upgrade(ntdb, NTDB_HASH_LOCK_START);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* get the open lock - this prevents new users attaching to the database
+ during the commit */
+ ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /* Sets up ntdb->transaction->recovery and
+ * ntdb->transaction->magic_offset. */
+ ecode = transaction_setup_recovery(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ ntdb->transaction->prepared = true;
+
+ /* expand the file to the new size if needed */
+ if (ntdb->file->map_size != ntdb->transaction->old_map_size) {
+ ntdb_len_t add;
+
+ add = ntdb->file->map_size - ntdb->transaction->old_map_size;
+ /* Restore original map size for ntdb_expand_file */
+ ntdb->file->map_size = ntdb->transaction->old_map_size;
+ ecode = methods->expand_file(ntdb, add);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ }
+
+ /* Keep the open lock until the actual commit */
+ return NTDB_SUCCESS;
+}
+
+/*
+ prepare to commit the current transaction
+*/
+_PUBLIC_ enum NTDB_ERROR ntdb_transaction_prepare_commit(struct ntdb_context *ntdb)
+{
+ return _ntdb_transaction_prepare_commit(ntdb);
+}
+
+/*
+ commit the current transaction
+*/
+_PUBLIC_ enum NTDB_ERROR ntdb_transaction_commit(struct ntdb_context *ntdb)
+{
+ const struct ntdb_methods *methods;
+ int i;
+ enum NTDB_ERROR ecode;
+
+ if (ntdb->transaction == NULL) {
+ return ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
+ "ntdb_transaction_commit:"
+ " no transaction");
+ }
+
+ ntdb_trace(ntdb, "ntdb_transaction_commit");
+
+ if (ntdb->transaction->nesting != 0) {
+ ntdb->transaction->nesting--;
+ return NTDB_SUCCESS;
+ }
+
+ if (!ntdb->transaction->prepared) {
+ ecode = _ntdb_transaction_prepare_commit(ntdb);
+ if (ecode != NTDB_SUCCESS) {
+ _ntdb_transaction_cancel(ntdb);
+ return ecode;
+ }
+ }
+
+ /* check for a null transaction (prepare_commit may do this!) */
+ if (ntdb->transaction->blocks == NULL) {
+ _ntdb_transaction_cancel(ntdb);
+ return NTDB_SUCCESS;
+ }
+
+ methods = ntdb->transaction->io_methods;
+
+ /* perform all the writes */
+ for (i=0;i<ntdb->transaction->num_blocks;i++) {
+ ntdb_off_t offset;
+ ntdb_len_t length;
+
+ if (ntdb->transaction->blocks[i] == NULL) {
+ continue;
+ }
+
+ offset = i * NTDB_PGSIZE;
+ length = NTDB_PGSIZE;
+
+ ecode = methods->twrite(ntdb, offset,
+ ntdb->transaction->blocks[i], length);
+ if (ecode != NTDB_SUCCESS) {
+ /* we've overwritten part of the data and
+ possibly expanded the file, so we need to
+ run the crash recovery code */
+ ntdb->io = methods;
+ ntdb_transaction_recover(ntdb);
+
+ _ntdb_transaction_cancel(ntdb);
+
+ return ecode;
+ }
+ SAFE_FREE(ntdb, ntdb->transaction->blocks[i]);
+ }
+
+ SAFE_FREE(ntdb, ntdb->transaction->blocks);
+ ntdb->transaction->num_blocks = 0;
+
+ /* ensure the new data is on disk */
+ ecode = transaction_sync(ntdb, 0, ntdb->file->map_size);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+
+ /*
+ TODO: maybe write to some dummy hdr field, or write to magic
+ offset without mmap, before the last sync, instead of the
+ utime() call
+ */
+
+ /* on some systems (like Linux 2.6.x) changes via mmap/msync
+ don't change the mtime of the file, this means the file may
+ not be backed up (as ntdb rounding to block sizes means that
+ file size changes are quite rare too). The following forces
+ mtime changes when a transaction completes */
+#if HAVE_UTIME
+ utime(ntdb->name, NULL);
+#endif
+
+ /* use a transaction cancel to free memory and remove the
+ transaction locks: it "restores" map_size, too. */
+ ntdb->transaction->old_map_size = ntdb->file->map_size;
+ _ntdb_transaction_cancel(ntdb);
+
+ return NTDB_SUCCESS;
+}
+
+
+/*
+ recover from an aborted transaction. Must be called with exclusive
+ database write access already established (including the open
+ lock to prevent new processes attaching)
+*/
+enum NTDB_ERROR ntdb_transaction_recover(struct ntdb_context *ntdb)
+{
+ ntdb_off_t recovery_head, recovery_eof;
+ unsigned char *data, *p;
+ struct ntdb_recovery_record rec;
+ enum NTDB_ERROR ecode;
+
+ /* find the recovery area */
+ recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery));
+ if (NTDB_OFF_IS_ERR(recovery_head)) {
+ ecode = NTDB_OFF_TO_ERR(recovery_head);
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to read recovery head");
+ }
+
+ if (recovery_head == 0) {
+ /* we have never allocated a recovery record */
+ return NTDB_SUCCESS;
+ }
+
+ /* read the recovery record */
+ ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to read recovery record");
+ }
+
+ if (rec.magic != NTDB_RECOVERY_MAGIC) {
+ /* there is no valid recovery data */
+ return NTDB_SUCCESS;
+ }
+
+ if (ntdb->flags & NTDB_RDONLY) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " attempt to recover read only database");
+ }
+
+ recovery_eof = rec.eof;
+
+ data = (unsigned char *)ntdb->alloc_fn(ntdb, rec.len, ntdb->alloc_data);
+ if (data == NULL) {
+ return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to allocate recovery data");
+ }
+
+ /* read the full recovery data */
+ ecode = ntdb->io->tread(ntdb, recovery_head + sizeof(rec), data,
+ rec.len);
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to read recovery data");
+ }
+
+ /* recover the file data */
+ p = data;
+ while (p+sizeof(ntdb_off_t)+sizeof(ntdb_len_t) < data + rec.len) {
+ ntdb_off_t ofs;
+ ntdb_len_t len;
+ ntdb_convert(ntdb, p, sizeof(ofs) + sizeof(len));
+ memcpy(&ofs, p, sizeof(ofs));
+ memcpy(&len, p + sizeof(ofs), sizeof(len));
+ p += sizeof(ofs) + sizeof(len);
+
+ ecode = ntdb->io->twrite(ntdb, ofs, p, len);
+ if (ecode != NTDB_SUCCESS) {
+ ntdb->free_fn(data, ntdb->alloc_data);
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to recover %zu bytes"
+ " at offset %zu",
+ (size_t)len, (size_t)ofs);
+ }
+ p += len;
+ }
+
+ ntdb->free_fn(data, ntdb->alloc_data);
+
+ ecode = transaction_sync(ntdb, 0, ntdb->file->map_size);
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to sync recovery");
+ }
+
+ /* if the recovery area is after the recovered eof then remove it */
+ if (recovery_eof <= recovery_head) {
+ ecode = ntdb_write_off(ntdb, offsetof(struct ntdb_header,
+ recovery),
+ 0);
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to remove recovery head");
+ }
+ }
+
+ /* remove the recovery magic */
+ ecode = ntdb_write_off(ntdb,
+ recovery_head
+ + offsetof(struct ntdb_recovery_record, magic),
+ NTDB_RECOVERY_INVALID_MAGIC);
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to remove recovery magic");
+ }
+
+ ecode = transaction_sync(ntdb, 0, recovery_eof);
+ if (ecode != NTDB_SUCCESS) {
+ return ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
+ "ntdb_transaction_recover:"
+ " failed to sync2 recovery");
+ }
+
+ ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
+ "ntdb_transaction_recover: recovered %zu byte database",
+ (size_t)recovery_eof);
+
+ /* all done */
+ return NTDB_SUCCESS;
+}
+
+ntdb_bool_err ntdb_needs_recovery(struct ntdb_context *ntdb)
+{
+ ntdb_off_t recovery_head;
+ struct ntdb_recovery_record rec;
+ enum NTDB_ERROR ecode;
+
+ /* find the recovery area */
+ recovery_head = ntdb_read_off(ntdb, offsetof(struct ntdb_header,recovery));
+ if (NTDB_OFF_IS_ERR(recovery_head)) {
+ return recovery_head;
+ }
+
+ if (recovery_head == 0) {
+ /* we have never allocated a recovery record */
+ return false;
+ }
+
+ /* read the recovery record */
+ ecode = ntdb_read_convert(ntdb, recovery_head, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+
+ return (rec.magic == NTDB_RECOVERY_MAGIC);
+}
--- /dev/null
+ /*
+ Trivial Database 2: traverse function.
+ Copyright (C) Rusty Russell 2010
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <ccan/likely/likely.h>
+
+_PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb,
+ int (*fn)(struct ntdb_context *,
+ NTDB_DATA, NTDB_DATA, void *),
+ void *p)
+{
+ enum NTDB_ERROR ecode;
+ struct hash_info h;
+ NTDB_DATA k, d;
+ int64_t count = 0;
+
+ k.dptr = NULL;
+ for (ecode = first_in_hash(ntdb, &h, &k, &d.dsize);
+ ecode == NTDB_SUCCESS;
+ ecode = next_in_hash(ntdb, &h, &k, &d.dsize)) {
+ d.dptr = k.dptr + k.dsize;
+
+ count++;
+ if (fn && fn(ntdb, k, d, p)) {
+ ntdb->free_fn(k.dptr, ntdb->alloc_data);
+ return count;
+ }
+ ntdb->free_fn(k.dptr, ntdb->alloc_data);
+ }
+
+ if (ecode != NTDB_ERR_NOEXIST) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
+ return count;
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key)
+{
+ struct hash_info h;
+
+ return first_in_hash(ntdb, &h, key, NULL);
+}
+
+/* We lock twice, not very efficient. We could keep last key & h cached. */
+_PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key)
+{
+ struct hash_info h;
+ struct ntdb_used_record rec;
+ ntdb_off_t off;
+
+ off = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, NULL);
+ ntdb->free_fn(key->dptr, ntdb->alloc_data);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ }
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
+
+ /* If we found something, skip to next. */
+ if (off)
+ h.bucket++;
+ return next_in_hash(ntdb, &h, key, NULL);
+}
+
+static int wipe_one(struct ntdb_context *ntdb,
+ NTDB_DATA key, NTDB_DATA data, enum NTDB_ERROR *ecode)
+{
+ *ecode = ntdb_delete(ntdb, key);
+ return (*ecode != NTDB_SUCCESS);
+}
+
+_PUBLIC_ enum NTDB_ERROR ntdb_wipe_all(struct ntdb_context *ntdb)
+{
+ enum NTDB_ERROR ecode;
+ int64_t count;
+
+ ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
+ if (ecode != NTDB_SUCCESS)
+ return ecode;
+
+ /* FIXME: Be smarter. */
+ count = ntdb_traverse(ntdb, wipe_one, &ecode);
+ if (count < 0)
+ ecode = NTDB_OFF_TO_ERR(count);
+ ntdb_allrecord_unlock(ntdb, F_WRLCK);
+ return ecode;
+}
--- /dev/null
+#!/usr/bin/env python
+
+APPNAME = 'ntdb'
+VERSION = '1.0'
+
+blddir = 'bin'
+
+import sys, os
+
+# find the buildtools directory
+srcdir = '.'
+while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5:
+ srcdir = srcdir + '/..'
+sys.path.insert(0, srcdir + '/buildtools/wafsamba')
+
+import wafsamba, samba_dist, Options, Logs, glob
+
+samba_dist.DIST_DIRS('lib/ntdb:. lib/replace:lib/replace lib/ccan:lib/ccan buildtools:buildtools')
+
+def set_options(opt):
+ opt.BUILTIN_DEFAULT('replace,ccan')
+ opt.PRIVATE_EXTENSION_DEFAULT('ntdb', noextension='ntdb')
+ opt.RECURSE('lib/replace')
+ opt.add_option('--valgrind',
+ help=("use valgrind on tests programs"),
+ action="store_true", dest='VALGRIND', default=False)
+ opt.add_option('--valgrind-log',
+ help=("where to put the valgrind log"),
+ action="store", dest='VALGRINDLOG', default=None)
+
+ if opt.IN_LAUNCH_DIR():
+ opt.add_option('--disable-python',
+ help=("disable the pyntdb module"),
+ action="store_true", dest='disable_python', default=False)
+
+def configure(conf):
+ conf.RECURSE('lib/replace')
+ conf.RECURSE('lib/ccan')
+
+ conf.env.NTDB_TEST_RUN_SRC=['test/run-001-encode.c',
+ 'test/run-001-fls.c',
+ 'test/run-01-new_database.c',
+ 'test/run-02-expand.c',
+ 'test/run-03-coalesce.c',
+ 'test/run-04-basichash.c',
+ 'test/run-05-readonly-open.c',
+ 'test/run-10-simple-store.c',
+ 'test/run-11-simple-fetch.c',
+ 'test/run-12-check.c',
+ 'test/run-15-append.c',
+ 'test/run-25-hashoverload.c',
+ 'test/run-30-exhaust-before-expand.c',
+ 'test/run-35-convert.c',
+ 'test/run-50-multiple-freelists.c',
+ 'test/run-56-open-during-transaction.c',
+ 'test/run-57-die-during-transaction.c',
+ 'test/run-64-bit-tdb.c',
+ 'test/run-90-get-set-attributes.c',
+ 'test/run-capabilities.c',
+ 'test/run-expand-in-transaction.c',
+ 'test/run-features.c',
+ 'test/run-lockall.c',
+ 'test/run-remap-in-read_traverse.c',
+ 'test/run-seed.c',
+ 'test/run-tdb_errorstr.c',
+ 'test/run-tdb_foreach.c',
+ 'test/run-traverse.c']
+ conf.env.NTDB_TEST_API_SRC=['test/api-12-store.c',
+ 'test/api-13-delete.c',
+ 'test/api-14-exists.c',
+ 'test/api-16-wipe_all.c',
+ 'test/api-20-alloc-attr.c',
+ 'test/api-21-parse_record.c',
+ 'test/api-55-transaction.c',
+ 'test/api-60-noop-transaction.c',
+ 'test/api-80-tdb_fd.c',
+ 'test/api-81-seqnum.c',
+ 'test/api-82-lockattr.c',
+ 'test/api-83-openhook.c',
+ 'test/api-91-get-stats.c',
+ 'test/api-92-get-set-readonly.c',
+ 'test/api-93-repack.c',
+ 'test/api-94-expand-during-parse.c',
+ 'test/api-95-read-only-during-parse.c',
+ 'test/api-add-remove-flags.c',
+ 'test/api-check-callback.c',
+ 'test/api-firstkey-nextkey.c',
+ 'test/api-fork-test.c',
+ 'test/api-locktimeout.c',
+ 'test/api-missing-entries.c',
+ 'test/api-open-multiple-times.c',
+ 'test/api-record-expand.c',
+ 'test/api-simple-delete.c',
+ 'test/api-summary.c']
+ conf.env.NTDB_TEST_API_PY=['test/python-api.py']
+ conf.env.NTDB_TEST_API_HELPER_SRC=['test/helpapi-external-agent.c']
+ conf.env.NTDB_TEST_RUN_HELPER_SRC=['test/helprun-external-agent.c',
+ 'test/helprun-layout.c']
+ conf.env.NTDB_TEST_HELPER_SRC=['test/external-agent.c',
+ 'test/failtest_helper.c',
+ 'test/lock-tracking.c',
+ 'test/logging.c',
+ 'test/tap-interface.c']
+
+ conf.env.standalone_ntdb = conf.IN_LAUNCH_DIR()
+ conf.env.disable_python = getattr(Options.options, 'disable_python', False)
+
+ if not conf.env.standalone_ntdb:
+ if conf.CHECK_BUNDLED_SYSTEM('ntdb', minversion=VERSION,
+ implied_deps='replace'):
+ conf.define('USING_SYSTEM_NTDB', 1)
+ if conf.CHECK_BUNDLED_SYSTEM_PYTHON('pyntdb', 'ntdb', minversion=VERSION):
+ conf.define('USING_SYSTEM_PYNTDB', 1)
+
+ if not conf.env.disable_python:
+ # also disable if we don't have the python libs installed
+ conf.find_program('python', var='PYTHON')
+ conf.check_tool('python')
+ conf.check_python_version((2,4,2))
+ conf.SAMBA_CHECK_PYTHON_HEADERS(mandatory=False)
+ if not conf.env.HAVE_PYTHON_H:
+ Logs.warn('Disabling pyntdb as python devel libs not found')
+ conf.env.disable_python = True
+
+ conf.CHECK_XSLTPROC_MANPAGES()
+
+ # This make #include <ccan/...> work.
+ conf.ADD_EXTRA_INCLUDES('''#lib''')
+
+ conf.SAMBA_CONFIG_H()
+
+def build(bld):
+ bld.RECURSE('lib/replace')
+ bld.RECURSE('lib/ccan')
+
+ if bld.env.standalone_ntdb:
+ bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig'
+ private_library = False
+ else:
+ private_library = True
+
+ SRC = '''check.c free.c hash.c io.c lock.c open.c
+ summary.c ntdb.c transaction.c traverse.c'''
+
+ if not bld.CONFIG_SET('USING_SYSTEM_NTDB'):
+ NTDB_CCAN='ccan-likely ccan-ilog ccan-hash ccan-tally'
+ bld.SAMBA_LIBRARY('ntdb',
+ SRC,
+ deps='replace ' + NTDB_CCAN ,
+ includes='.',
+ abi_directory='ABI',
+ abi_match='ntdb_*',
+ hide_symbols=True,
+ vnum=VERSION,
+ public_headers='ntdb.h',
+ public_headers_install=not private_library,
+ pc_files='ntdb.pc',
+ private_library=private_library,
+ manpages='man/ntdb.3')
+
+ bld.SAMBA_BINARY('ntdbtorture',
+ 'tools/ntdbtorture.c',
+ deps='ntdb ccan-err',
+ install=False)
+
+ bld.SAMBA_BINARY('ntdbtool',
+ 'tools/ntdbtool.c',
+ deps='ntdb', manpages='man/ntdbtool.8')
+
+ bld.SAMBA_BINARY('ntdbdump',
+ 'tools/ntdbdump.c',
+ deps='ntdb', manpages='man/ntdbdump.8')
+
+ bld.SAMBA_BINARY('ntdbrestore',
+ 'tools/ntdbrestore.c',
+ deps='ntdb', manpages='man/ntdbrestore.8')
+
+ bld.SAMBA_BINARY('ntdbbackup',
+ 'tools/ntdbbackup.c',
+ deps='ntdb', manpages='man/ntdbbackup.8')
+
+ if bld.env.DEVELOPER_MODE:
+ # FIXME: We need CCAN for some API tests, but waf thinks it's
+ # already available via ntdb. It is, but not publicly.
+ # Workaround is to build a private, non-hiding version.
+ bld.SAMBA_SUBSYSTEM('ntdb-testing',
+ SRC,
+ deps='replace ' + NTDB_CCAN,
+ includes='.')
+
+ bld.SAMBA_SUBSYSTEM('ntdb-test-helpers',
+ bld.env.NTDB_TEST_HELPER_SRC,
+ deps='replace',
+ allow_warnings=True)
+ bld.SAMBA_SUBSYSTEM('ntdb-run-helpers',
+ bld.env.NTDB_TEST_RUN_HELPER_SRC,
+ deps='replace')
+ bld.SAMBA_SUBSYSTEM('ntdb-api-helpers',
+ bld.env.NTDB_TEST_API_HELPER_SRC,
+ deps='replace')
+
+ for f in bld.env.NTDB_TEST_RUN_SRC:
+ base = os.path.splitext(os.path.basename(f))[0]
+ bld.SAMBA_BINARY('ntdb-' + base, f,
+ deps=NTDB_CCAN + ' ccan-failtest ntdb-test-helpers ntdb-run-helpers',
+ install=False)
+
+ for f in bld.env.NTDB_TEST_API_SRC:
+ base = os.path.splitext(os.path.basename(f))[0]
+ bld.SAMBA_BINARY('ntdb-' + base, f,
+ deps='ntdb-test-helpers ntdb-api-helpers ntdb-testing',
+ install=False)
+
+ if not bld.CONFIG_SET('USING_SYSTEM_PYNTDB'):
+ bld.SAMBA_PYTHON('pyntdb',
+ source='pyntdb.c',
+ deps='ntdb',
+ enabled=not bld.env.disable_python,
+ realname='ntdb.so',
+ cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION)
+
+def testonly(ctx):
+ '''run ntdb testsuite'''
+ import Utils, samba_utils, shutil
+ ecode = 0;
+
+ env = samba_utils.LOAD_ENVIRONMENT()
+
+ if env.standalone_ntdb:
+ # FIXME: This is horrible :(
+ test_prefix = "%s/st" % (Utils.g_module.blddir)
+ shutil.rmtree(test_prefix, ignore_errors=True)
+ os.makedirs(test_prefix)
+
+ # Create scratch directory for tests.
+ testdir = os.path.join(test_prefix, 'ntdb-tests')
+ samba_utils.mkdir_p(testdir)
+ # Symlink back to source dir so it can find tests in test/
+ link = os.path.join(testdir, 'test')
+ if not os.path.exists(link):
+ os.symlink(os.path.abspath(os.path.join(env.cwd, 'test')), link)
+
+ if env.options['VALGRIND']:
+ os.environ['VALGRIND'] = 'valgrind -q --num-callers=30 --error-exitcode=11'
+ if env.options['VALGRINDLOG']:
+ os.environ['VALGRIND'] += ' --log-file=%s' % Options.options.VALGRINDLOG
+
+ for f in env.NTDB_TEST_RUN_SRC + env.NTDB_TEST_API_SRC:
+ name = "ntdb-" + os.path.splitext(os.path.basename(f))[0]
+ cmd = "cd " + testdir + " && $VALGRIND " + os.path.abspath(os.path.join(Utils.g_module.blddir, name)) + " > test-output 2>&1"
+ print("..." + f)
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print("%s (%s) failed:" % (name, f))
+ samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output'))
+ ecode = ret;
+ break;
+ if not env.disable_python:
+ for f in env.NTDB_TEST_API_PY:
+ print("..." + f)
+ cmd = "cd " + testdir + " && PYTHONPATH=%s %s %s > test-output 2>&1" % (
+ os.path.abspath(os.path.join(Utils.g_module.blddir, "python")),
+ env["PYTHON"], os.path.abspath(f))
+ ret = samba_utils.RUN_COMMAND(cmd)
+ if ret != 0:
+ print("%s (%s) failed:" % (name, f))
+ samba_utils.RUN_COMMAND("cat " + os.path.join(testdir, 'test-output'))
+ ecode = ret
+ break
+
+ sys.exit(ecode)
+
+# WAF doesn't build the unit tests for this, maybe because they don't link with ntdb?
+# This forces it
+def test(ctx):
+ import Scripting
+ Scripting.commands.append('build')
+ Scripting.commands.append('testonly')
+
+def dist():
+ '''makes a tarball for distribution'''
+ samba_dist.dist()
+
+def reconfigure(ctx):
+ '''reconfigure if config scripts have changed'''
+ import samba_utils
+ samba_utils.reconfigure(ctx)