2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include <ccan/likely/likely.h>
31 void tdb_munmap(struct tdb_context *tdb)
33 if (tdb->flags & TDB_INTERNAL)
37 munmap(tdb->map_ptr, tdb->map_size);
42 void tdb_mmap(struct tdb_context *tdb)
44 if (tdb->flags & TDB_INTERNAL)
47 if (tdb->flags & TDB_NOMMAP)
50 tdb->map_ptr = mmap(NULL, tdb->map_size,
51 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
52 MAP_SHARED, tdb->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->map_ptr == MAP_FAILED) {
59 tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
60 "tdb_mmap failed for size %lld (%s)\n",
61 (long long)tdb->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
73 if (len <= tdb->map_size)
75 if (tdb->flags & TDB_INTERNAL) {
77 /* Ensure ecode is set for log fn. */
78 tdb->ecode = TDB_ERR_IO;
79 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
80 "tdb_oob len %lld beyond internal"
81 " malloc size %lld\n",
83 (long long)tdb->map_size);
88 if (fstat(tdb->fd, &st) == -1) {
89 tdb->ecode = TDB_ERR_IO;
93 if (st.st_size < (size_t)len) {
95 /* Ensure ecode is set for log fn. */
96 tdb->ecode = TDB_ERR_IO;
97 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
98 "tdb_oob len %lld beyond eof at %lld\n",
99 (long long)len, (long long)st.st_size);
104 /* Unmap, update size, remap */
106 tdb->map_size = st.st_size;
111 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
113 if (unlikely(!tdb->map_ptr))
116 /* FIXME: We can do a subset of this! */
117 if (tdb->transaction)
120 if (unlikely(tdb_oob(tdb, off + len, true) == -1))
122 return (char *)tdb->map_ptr + off;
125 /* Either make a copy into pad and return that, or return ptr into mmap. */
126 /* Note: pad has to be a real object, so we can't get here if len
127 * overflows size_t */
128 void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
130 if (likely(!(tdb->flags & TDB_CONVERT))) {
131 void *ret = tdb_direct(tdb, off, len);
135 return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
138 /* Endian conversion: we only ever deal with 8 byte quantities */
139 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
141 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
142 uint64_t i, *p = (uint64_t *)buf;
143 for (i = 0; i < size / 8; i++)
144 p[i] = bswap_64(p[i]);
149 /* Return first non-zero offset in num offset array, or num. */
150 /* FIXME: Return the off? */
151 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
157 /* Zero vs non-zero is the same unconverted: minor optimization. */
158 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
162 for (i = 0; i < num; i++) {
166 tdb_access_release(tdb, val);
170 /* Return first zero offset in num offset array, or num. */
171 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
177 /* Zero vs non-zero is the same unconverted: minor optimization. */
178 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
182 for (i = 0; i < num; i++) {
186 tdb_access_release(tdb, val);
190 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
192 char buf[8192] = { 0 };
193 void *p = tdb_direct(tdb, off, len);
199 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
200 if (tdb->methods->write(tdb, off, buf, todo) == -1)
208 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
212 ret = tdb_get(tdb, off, &pad, sizeof(pad));
219 /* Even on files, we can get partial writes due to signals. */
220 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
224 ret = pwrite(fd, buf, len, off);
231 buf = (char *)buf + ret;
238 /* Even on files, we can get partial reads due to signals. */
239 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
243 ret = pread(fd, buf, len, off);
251 buf = (char *)buf + ret;
258 bool tdb_read_all(int fd, void *buf, size_t len)
262 ret = read(fd, buf, len);
270 buf = (char *)buf + ret;
276 /* write a lump of data at a specified offset */
277 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
278 const void *buf, tdb_len_t len)
284 if (tdb->read_only) {
285 tdb->ecode = TDB_ERR_RDONLY;
289 if (tdb->methods->oob(tdb, off + len, 0) != 0)
293 memcpy(off + (char *)tdb->map_ptr, buf, len);
295 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
296 tdb->ecode = TDB_ERR_IO;
297 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
298 "tdb_write failed at %llu len=%llu (%s)\n",
299 off, len, strerror(errno));
306 /* read a lump of data at a specified offset */
307 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
310 if (tdb->methods->oob(tdb, off + len, 0) != 0) {
315 memcpy(buf, off + (char *)tdb->map_ptr, len);
317 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
318 /* Ensure ecode is set for log fn. */
319 tdb->ecode = TDB_ERR_IO;
320 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
321 "tdb_read failed at %lld "
322 "len=%lld (%s) map_size=%lld\n",
323 (long long)off, (long long)len,
325 (long long)tdb->map_size);
332 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
333 const void *rec, size_t len)
336 if (unlikely((tdb->flags & TDB_CONVERT))) {
337 void *conv = malloc(len);
339 tdb->ecode = TDB_ERR_OOM;
340 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
341 "tdb_write: no memory converting %zu bytes\n",
345 memcpy(conv, rec, len);
346 ret = tdb->methods->write(tdb, off,
347 tdb_convert(tdb, conv, len), len);
350 ret = tdb->methods->write(tdb, off, rec, len);
355 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
356 void *rec, size_t len)
358 int ret = tdb->methods->read(tdb, off, rec, len);
359 tdb_convert(tdb, rec, len);
363 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
365 return tdb_write_convert(tdb, off, &val, sizeof(val));
368 /* read a lump of data, allocating the space for it */
369 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
373 /* some systems don't like zero length malloc */
374 buf = malloc(len ? len : 1);
375 if (unlikely(!buf)) {
376 tdb->ecode = TDB_ERR_OOM;
377 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
378 "tdb_alloc_read malloc failed len=%lld\n",
380 } else if (unlikely(tdb->methods->read(tdb, offset, buf, len))) {
387 uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
389 struct tdb_used_record pad, *r;
393 r = tdb_get(tdb, off, &pad, sizeof(pad));
398 klen = rec_key_length(r);
399 key = tdb_access_read(tdb, off + sizeof(pad), klen, false);
403 hash = tdb_hash(tdb, key, klen);
404 tdb_access_release(tdb, key);
408 static int fill(struct tdb_context *tdb,
409 const void *buf, size_t size,
410 tdb_off_t off, tdb_len_t len)
413 size_t n = len > size ? size : len;
415 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
416 tdb->ecode = TDB_ERR_IO;
417 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
418 "fill write failed: giving up!\n");
427 /* expand a file. we prefer to use ftruncate, as that is what posix
428 says to use for mmap expansion */
429 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
433 if (tdb->read_only) {
434 tdb->ecode = TDB_ERR_RDONLY;
438 if (tdb->flags & TDB_INTERNAL) {
439 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
441 tdb->ecode = TDB_ERR_OOM;
445 tdb->map_size += addition;
447 /* Unmap before trying to write; old TDB claimed OpenBSD had
448 * problem with this otherwise. */
451 /* If this fails, we try to fill anyway. */
452 if (ftruncate(tdb->fd, tdb->map_size + addition))
455 /* now fill the file with something. This ensures that the
456 file isn't sparse, which would be very bad if we ran out of
457 disk. This must be done with write, not via mmap */
458 memset(buf, 0x43, sizeof(buf));
459 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
461 tdb->map_size += addition;
467 const void *tdb_access_read(struct tdb_context *tdb,
468 tdb_off_t off, tdb_len_t len, bool convert)
470 const void *ret = NULL;
472 if (likely(!(tdb->flags & TDB_CONVERT)))
473 ret = tdb_direct(tdb, off, len);
476 ret = tdb_alloc_read(tdb, off, len);
478 tdb_convert(tdb, (void *)ret, len);
483 void tdb_access_release(struct tdb_context *tdb, const void *p)
486 || (char *)p < (char *)tdb->map_ptr
487 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
492 /* write a lump of data at a specified offset */
493 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
494 const void *buf, tdb_len_t len)
500 if (tdb->read_only || tdb->traverse_read) {
501 tdb->ecode = TDB_ERR_RDONLY;
505 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
509 memcpy(off + (char *)tdb->map_ptr, buf, len);
511 ssize_t written = pwrite(tdb->fd, buf, len, off);
512 if ((written != (ssize_t)len) && (written != -1)) {
514 tdb->ecode = TDB_ERR_IO;
515 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
516 "%d of %d bytes at %d, trying once more\n",
517 (int)written, len, off));
518 written = pwrite(tdb->fd, (const char *)buf+written,
523 /* Ensure ecode is set for log fn. */
524 tdb->ecode = TDB_ERR_IO;
525 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
526 "len=%d (%s)\n", off, len, strerror(errno)));
528 } else if (written != (ssize_t)len) {
529 tdb->ecode = TDB_ERR_IO;
530 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
531 "write %d bytes at %d in two attempts\n",
542 do an unlocked scan of the hash table heads to find the next non-zero head. The value
543 will then be confirmed with the lock held
545 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
549 for (;h < tdb->header.hash_size;h++) {
550 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
556 for (;h < tdb->header.hash_size;h++) {
557 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
565 /* read/write a tdb_off_t */
566 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
568 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
571 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
574 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
578 /* read/write a record */
579 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
581 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
583 if (TDB_BAD_MAGIC(rec)) {
584 /* Ensure ecode is set for log fn. */
585 tdb->ecode = TDB_ERR_CORRUPT;
586 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
589 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
592 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
594 struct tdb_record r = *rec;
595 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
599 static const struct tdb_methods io_methods = {
607 initialise the default methods table
609 void tdb_io_init(struct tdb_context *tdb)
611 tdb->methods = &io_methods;