2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_context *tdb)
34 if (tdb->flags & TDB_INTERNAL)
38 munmap(tdb->map_ptr, tdb->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52 MAP_SHARED, tdb->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->map_ptr == MAP_FAILED) {
59 tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
60 "tdb_mmap failed for size %lld (%s)\n",
61 (long long)tdb->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
75 /* We can't hold pointers during this: we could unmap! */
76 assert(!tdb->direct_access
77 || (tdb->flags & TDB_NOLOCK)
78 || tdb_has_expansion_lock(tdb));
80 if (len <= tdb->map_size)
82 if (tdb->flags & TDB_INTERNAL) {
84 /* Ensure ecode is set for log fn. */
85 tdb->ecode = TDB_ERR_IO;
86 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
87 "tdb_oob len %lld beyond internal"
88 " malloc size %lld\n",
90 (long long)tdb->map_size);
95 if (tdb_lock_expand(tdb, F_RDLCK) != 0)
98 ret = fstat(tdb->fd, &st);
100 tdb_unlock_expand(tdb, F_RDLCK);
103 tdb->ecode = TDB_ERR_IO;
107 if (st.st_size < (size_t)len) {
109 /* Ensure ecode is set for log fn. */
110 tdb->ecode = TDB_ERR_IO;
111 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
112 "tdb_oob len %lld beyond eof at %lld\n",
113 (long long)len, (long long)st.st_size);
118 /* Unmap, update size, remap */
121 tdb->map_size = st.st_size;
126 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
128 if (unlikely(!tdb->map_ptr))
131 /* FIXME: We can do a subset of this! */
132 if (tdb->transaction)
135 if (unlikely(tdb_oob(tdb, off + len, true) == -1))
137 return (char *)tdb->map_ptr + off;
140 /* Either make a copy into pad and return that, or return ptr into mmap. */
141 /* Note: pad has to be a real object, so we can't get here if len
142 * overflows size_t */
143 void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
145 if (likely(!(tdb->flags & TDB_CONVERT))) {
146 void *ret = tdb_direct(tdb, off, len);
150 return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
153 /* Endian conversion: we only ever deal with 8 byte quantities */
154 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
156 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
157 uint64_t i, *p = (uint64_t *)buf;
158 for (i = 0; i < size / 8; i++)
159 p[i] = bswap_64(p[i]);
164 /* FIXME: Return the off? */
165 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
166 tdb_off_t base, uint64_t start, uint64_t end)
171 /* Zero vs non-zero is the same unconverted: minor optimization. */
172 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
173 (end - start) * sizeof(tdb_off_t), false);
177 for (i = 0; i < (end - start); i++) {
181 tdb_access_release(tdb, val);
185 /* Return first zero offset in num offset array, or num. */
186 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
192 /* Zero vs non-zero is the same unconverted: minor optimization. */
193 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
197 for (i = 0; i < num; i++) {
201 tdb_access_release(tdb, val);
205 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
207 char buf[8192] = { 0 };
208 void *p = tdb_direct(tdb, off, len);
214 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
215 if (tdb->methods->write(tdb, off, buf, todo) == -1)
223 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
227 ret = tdb_get(tdb, off, &pad, sizeof(pad));
234 /* Even on files, we can get partial writes due to signals. */
235 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
239 ret = pwrite(fd, buf, len, off);
246 buf = (char *)buf + ret;
253 /* Even on files, we can get partial reads due to signals. */
254 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
258 ret = pread(fd, buf, len, off);
266 buf = (char *)buf + ret;
273 bool tdb_read_all(int fd, void *buf, size_t len)
277 ret = read(fd, buf, len);
285 buf = (char *)buf + ret;
291 /* write a lump of data at a specified offset */
292 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
293 const void *buf, tdb_len_t len)
299 if (tdb->read_only) {
300 tdb->ecode = TDB_ERR_RDONLY;
304 if (tdb->methods->oob(tdb, off + len, 0) != 0)
308 memcpy(off + (char *)tdb->map_ptr, buf, len);
310 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
311 tdb->ecode = TDB_ERR_IO;
312 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
313 "tdb_write failed at %llu len=%llu (%s)\n",
314 (long long)off, (long long)len,
322 /* read a lump of data at a specified offset */
323 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
326 if (tdb->methods->oob(tdb, off + len, 0) != 0) {
331 memcpy(buf, off + (char *)tdb->map_ptr, len);
333 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
334 /* Ensure ecode is set for log fn. */
335 tdb->ecode = TDB_ERR_IO;
336 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
337 "tdb_read failed at %lld "
338 "len=%lld (%s) map_size=%lld\n",
339 (long long)off, (long long)len,
341 (long long)tdb->map_size);
348 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
349 const void *rec, size_t len)
352 if (unlikely((tdb->flags & TDB_CONVERT))) {
353 void *conv = malloc(len);
355 tdb->ecode = TDB_ERR_OOM;
356 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
357 "tdb_write: no memory converting %zu bytes\n",
361 memcpy(conv, rec, len);
362 ret = tdb->methods->write(tdb, off,
363 tdb_convert(tdb, conv, len), len);
366 ret = tdb->methods->write(tdb, off, rec, len);
371 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
372 void *rec, size_t len)
374 int ret = tdb->methods->read(tdb, off, rec, len);
375 tdb_convert(tdb, rec, len);
379 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
381 return tdb_write_convert(tdb, off, &val, sizeof(val));
384 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
385 tdb_len_t len, unsigned int prefix)
389 /* some systems don't like zero length malloc */
390 buf = malloc(prefix + len ? prefix + len : 1);
391 if (unlikely(!buf)) {
392 tdb->ecode = TDB_ERR_OOM;
393 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
394 "tdb_alloc_read malloc failed len=%lld\n",
395 (long long)prefix + len);
396 } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix, len))) {
403 /* read a lump of data, allocating the space for it */
404 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
406 return _tdb_alloc_read(tdb, offset, len, 0);
409 static int fill(struct tdb_context *tdb,
410 const void *buf, size_t size,
411 tdb_off_t off, tdb_len_t len)
414 size_t n = len > size ? size : len;
416 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
417 tdb->ecode = TDB_ERR_IO;
418 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
419 "fill write failed: giving up!\n");
428 /* expand a file. we prefer to use ftruncate, as that is what posix
429 says to use for mmap expansion */
430 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
434 if (tdb->read_only) {
435 tdb->ecode = TDB_ERR_RDONLY;
439 if (tdb->flags & TDB_INTERNAL) {
440 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
442 tdb->ecode = TDB_ERR_OOM;
446 tdb->map_size += addition;
448 /* Unmap before trying to write; old TDB claimed OpenBSD had
449 * problem with this otherwise. */
452 /* If this fails, we try to fill anyway. */
453 if (ftruncate(tdb->fd, tdb->map_size + addition))
456 /* now fill the file with something. This ensures that the
457 file isn't sparse, which would be very bad if we ran out of
458 disk. This must be done with write, not via mmap */
459 memset(buf, 0x43, sizeof(buf));
460 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
462 tdb->map_size += addition;
468 /* This is only neded for tdb_access_commit, but used everywhere to simplify. */
469 struct tdb_access_hdr {
475 const void *tdb_access_read(struct tdb_context *tdb,
476 tdb_off_t off, tdb_len_t len, bool convert)
478 const void *ret = NULL;
480 if (likely(!(tdb->flags & TDB_CONVERT)))
481 ret = tdb_direct(tdb, off, len);
484 struct tdb_access_hdr *hdr;
485 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
489 tdb_convert(tdb, (void *)ret, len);
492 tdb->direct_access++;
497 void *tdb_access_write(struct tdb_context *tdb,
498 tdb_off_t off, tdb_len_t len, bool convert)
502 if (likely(!(tdb->flags & TDB_CONVERT)))
503 ret = tdb_direct(tdb, off, len);
506 struct tdb_access_hdr *hdr;
507 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
511 hdr->convert = convert;
514 tdb_convert(tdb, (void *)ret, len);
517 tdb->direct_access++;
522 void tdb_access_release(struct tdb_context *tdb, const void *p)
525 || (char *)p < (char *)tdb->map_ptr
526 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
527 free((struct tdb_access_hdr *)p - 1);
529 tdb->direct_access--;
532 int tdb_access_commit(struct tdb_context *tdb, void *p)
537 || (char *)p < (char *)tdb->map_ptr
538 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) {
539 struct tdb_access_hdr *hdr;
541 hdr = (struct tdb_access_hdr *)p - 1;
543 ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
545 ret = tdb_write(tdb, hdr->off, p, hdr->len);
548 tdb->direct_access--;
554 /* write a lump of data at a specified offset */
555 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
556 const void *buf, tdb_len_t len)
562 if (tdb->read_only || tdb->traverse_read) {
563 tdb->ecode = TDB_ERR_RDONLY;
567 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
571 memcpy(off + (char *)tdb->map_ptr, buf, len);
573 ssize_t written = pwrite(tdb->fd, buf, len, off);
574 if ((written != (ssize_t)len) && (written != -1)) {
576 tdb->ecode = TDB_ERR_IO;
577 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
578 "%d of %d bytes at %d, trying once more\n",
579 (int)written, len, off));
580 written = pwrite(tdb->fd, (const char *)buf+written,
585 /* Ensure ecode is set for log fn. */
586 tdb->ecode = TDB_ERR_IO;
587 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
588 "len=%d (%s)\n", off, len, strerror(errno)));
590 } else if (written != (ssize_t)len) {
591 tdb->ecode = TDB_ERR_IO;
592 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
593 "write %d bytes at %d in two attempts\n",
604 do an unlocked scan of the hash table heads to find the next non-zero head. The value
605 will then be confirmed with the lock held
607 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
611 for (;h < tdb->header.hash_size;h++) {
612 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
618 for (;h < tdb->header.hash_size;h++) {
619 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
627 /* read/write a tdb_off_t */
628 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
630 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
633 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
636 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
640 /* read/write a record */
641 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
643 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
645 if (TDB_BAD_MAGIC(rec)) {
646 /* Ensure ecode is set for log fn. */
647 tdb->ecode = TDB_ERR_CORRUPT;
648 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
651 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
654 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
656 struct tdb_record r = *rec;
657 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
661 static const struct tdb_methods io_methods = {
669 initialise the default methods table
671 void tdb_io_init(struct tdb_context *tdb)
673 tdb->methods = &io_methods;