2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include <ccan/likely/likely.h>
31 void tdb_munmap(struct tdb_context *tdb)
33 if (tdb->flags & TDB_INTERNAL)
37 munmap(tdb->map_ptr, tdb->map_size);
42 void tdb_mmap(struct tdb_context *tdb)
44 if (tdb->flags & TDB_INTERNAL)
47 if (tdb->flags & TDB_NOMMAP)
50 tdb->map_ptr = mmap(NULL, tdb->map_size,
51 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
52 MAP_SHARED, tdb->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->map_ptr == MAP_FAILED) {
59 tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
60 "tdb_mmap failed for size %lld (%s)\n",
61 (long long)tdb->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
75 if (len <= tdb->map_size)
77 if (tdb->flags & TDB_INTERNAL) {
79 /* Ensure ecode is set for log fn. */
80 tdb->ecode = TDB_ERR_IO;
81 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
82 "tdb_oob len %lld beyond internal"
83 " malloc size %lld\n",
85 (long long)tdb->map_size);
90 if (tdb_lock_expand(tdb, F_RDLCK) != 0)
93 ret = fstat(tdb->fd, &st);
95 tdb_unlock_expand(tdb, F_RDLCK);
98 tdb->ecode = TDB_ERR_IO;
102 if (st.st_size < (size_t)len) {
104 /* Ensure ecode is set for log fn. */
105 tdb->ecode = TDB_ERR_IO;
106 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
107 "tdb_oob len %lld beyond eof at %lld\n",
108 (long long)len, (long long)st.st_size);
113 /* Unmap, update size, remap */
116 tdb->map_size = st.st_size;
121 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
123 if (unlikely(!tdb->map_ptr))
126 /* FIXME: We can do a subset of this! */
127 if (tdb->transaction)
130 if (unlikely(tdb_oob(tdb, off + len, true) == -1))
132 return (char *)tdb->map_ptr + off;
135 /* Either make a copy into pad and return that, or return ptr into mmap. */
136 /* Note: pad has to be a real object, so we can't get here if len
137 * overflows size_t */
138 void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
140 if (likely(!(tdb->flags & TDB_CONVERT))) {
141 void *ret = tdb_direct(tdb, off, len);
145 return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
148 /* Endian conversion: we only ever deal with 8 byte quantities */
149 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
151 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
152 uint64_t i, *p = (uint64_t *)buf;
153 for (i = 0; i < size / 8; i++)
154 p[i] = bswap_64(p[i]);
159 /* Return first non-zero offset in num offset array, or num. */
160 /* FIXME: Return the off? */
161 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
167 /* Zero vs non-zero is the same unconverted: minor optimization. */
168 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
172 for (i = 0; i < num; i++) {
176 tdb_access_release(tdb, val);
180 /* Return first zero offset in num offset array, or num. */
181 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
187 /* Zero vs non-zero is the same unconverted: minor optimization. */
188 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
192 for (i = 0; i < num; i++) {
196 tdb_access_release(tdb, val);
200 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
202 char buf[8192] = { 0 };
203 void *p = tdb_direct(tdb, off, len);
209 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
210 if (tdb->methods->write(tdb, off, buf, todo) == -1)
218 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
222 ret = tdb_get(tdb, off, &pad, sizeof(pad));
229 /* Even on files, we can get partial writes due to signals. */
230 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
234 ret = pwrite(fd, buf, len, off);
241 buf = (char *)buf + ret;
248 /* Even on files, we can get partial reads due to signals. */
249 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
253 ret = pread(fd, buf, len, off);
261 buf = (char *)buf + ret;
268 bool tdb_read_all(int fd, void *buf, size_t len)
272 ret = read(fd, buf, len);
280 buf = (char *)buf + ret;
286 /* write a lump of data at a specified offset */
287 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
288 const void *buf, tdb_len_t len)
294 if (tdb->read_only) {
295 tdb->ecode = TDB_ERR_RDONLY;
299 if (tdb->methods->oob(tdb, off + len, 0) != 0)
303 memcpy(off + (char *)tdb->map_ptr, buf, len);
305 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
306 tdb->ecode = TDB_ERR_IO;
307 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
308 "tdb_write failed at %llu len=%llu (%s)\n",
309 off, len, strerror(errno));
316 /* read a lump of data at a specified offset */
317 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
320 if (tdb->methods->oob(tdb, off + len, 0) != 0) {
325 memcpy(buf, off + (char *)tdb->map_ptr, len);
327 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
328 /* Ensure ecode is set for log fn. */
329 tdb->ecode = TDB_ERR_IO;
330 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
331 "tdb_read failed at %lld "
332 "len=%lld (%s) map_size=%lld\n",
333 (long long)off, (long long)len,
335 (long long)tdb->map_size);
342 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
343 const void *rec, size_t len)
346 if (unlikely((tdb->flags & TDB_CONVERT))) {
347 void *conv = malloc(len);
349 tdb->ecode = TDB_ERR_OOM;
350 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
351 "tdb_write: no memory converting %zu bytes\n",
355 memcpy(conv, rec, len);
356 ret = tdb->methods->write(tdb, off,
357 tdb_convert(tdb, conv, len), len);
360 ret = tdb->methods->write(tdb, off, rec, len);
365 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
366 void *rec, size_t len)
368 int ret = tdb->methods->read(tdb, off, rec, len);
369 tdb_convert(tdb, rec, len);
373 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
375 return tdb_write_convert(tdb, off, &val, sizeof(val));
378 /* read a lump of data, allocating the space for it */
379 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
383 /* some systems don't like zero length malloc */
384 buf = malloc(len ? len : 1);
385 if (unlikely(!buf)) {
386 tdb->ecode = TDB_ERR_OOM;
387 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
388 "tdb_alloc_read malloc failed len=%lld\n",
390 } else if (unlikely(tdb->methods->read(tdb, offset, buf, len))) {
397 uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
399 struct tdb_used_record pad, *r;
403 r = tdb_get(tdb, off, &pad, sizeof(pad));
408 klen = rec_key_length(r);
409 key = tdb_access_read(tdb, off + sizeof(pad), klen, false);
413 hash = tdb_hash(tdb, key, klen);
414 tdb_access_release(tdb, key);
418 static int fill(struct tdb_context *tdb,
419 const void *buf, size_t size,
420 tdb_off_t off, tdb_len_t len)
423 size_t n = len > size ? size : len;
425 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
426 tdb->ecode = TDB_ERR_IO;
427 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
428 "fill write failed: giving up!\n");
437 /* expand a file. we prefer to use ftruncate, as that is what posix
438 says to use for mmap expansion */
439 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
443 if (tdb->read_only) {
444 tdb->ecode = TDB_ERR_RDONLY;
448 if (tdb->flags & TDB_INTERNAL) {
449 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
451 tdb->ecode = TDB_ERR_OOM;
455 tdb->map_size += addition;
457 /* Unmap before trying to write; old TDB claimed OpenBSD had
458 * problem with this otherwise. */
461 /* If this fails, we try to fill anyway. */
462 if (ftruncate(tdb->fd, tdb->map_size + addition))
465 /* now fill the file with something. This ensures that the
466 file isn't sparse, which would be very bad if we ran out of
467 disk. This must be done with write, not via mmap */
468 memset(buf, 0x43, sizeof(buf));
469 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
471 tdb->map_size += addition;
477 const void *tdb_access_read(struct tdb_context *tdb,
478 tdb_off_t off, tdb_len_t len, bool convert)
480 const void *ret = NULL;
482 if (likely(!(tdb->flags & TDB_CONVERT)))
483 ret = tdb_direct(tdb, off, len);
486 ret = tdb_alloc_read(tdb, off, len);
488 tdb_convert(tdb, (void *)ret, len);
493 void tdb_access_release(struct tdb_context *tdb, const void *p)
496 || (char *)p < (char *)tdb->map_ptr
497 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
502 /* write a lump of data at a specified offset */
503 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
504 const void *buf, tdb_len_t len)
510 if (tdb->read_only || tdb->traverse_read) {
511 tdb->ecode = TDB_ERR_RDONLY;
515 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
519 memcpy(off + (char *)tdb->map_ptr, buf, len);
521 ssize_t written = pwrite(tdb->fd, buf, len, off);
522 if ((written != (ssize_t)len) && (written != -1)) {
524 tdb->ecode = TDB_ERR_IO;
525 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
526 "%d of %d bytes at %d, trying once more\n",
527 (int)written, len, off));
528 written = pwrite(tdb->fd, (const char *)buf+written,
533 /* Ensure ecode is set for log fn. */
534 tdb->ecode = TDB_ERR_IO;
535 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
536 "len=%d (%s)\n", off, len, strerror(errno)));
538 } else if (written != (ssize_t)len) {
539 tdb->ecode = TDB_ERR_IO;
540 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
541 "write %d bytes at %d in two attempts\n",
552 do an unlocked scan of the hash table heads to find the next non-zero head. The value
553 will then be confirmed with the lock held
555 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
559 for (;h < tdb->header.hash_size;h++) {
560 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
566 for (;h < tdb->header.hash_size;h++) {
567 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
575 /* read/write a tdb_off_t */
576 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
578 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
581 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
584 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
588 /* read/write a record */
589 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
591 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
593 if (TDB_BAD_MAGIC(rec)) {
594 /* Ensure ecode is set for log fn. */
595 tdb->ecode = TDB_ERR_CORRUPT;
596 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
599 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
602 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
604 struct tdb_record r = *rec;
605 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
609 static const struct tdb_methods io_methods = {
617 initialise the default methods table
619 void tdb_io_init(struct tdb_context *tdb)
621 tdb->methods = &io_methods;