2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_context *tdb)
34 if (tdb->flags & TDB_INTERNAL)
38 munmap(tdb->map_ptr, tdb->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52 MAP_SHARED, tdb->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->map_ptr == MAP_FAILED) {
59 tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
60 "tdb_mmap failed for size %lld (%s)\n",
61 (long long)tdb->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
75 /* We can't hold pointers during this: we could unmap! */
76 assert(!tdb->direct_access
77 || (tdb->flags & TDB_NOLOCK)
78 || tdb_has_expansion_lock(tdb));
80 if (len <= tdb->map_size)
82 if (tdb->flags & TDB_INTERNAL) {
84 /* Ensure ecode is set for log fn. */
85 tdb->ecode = TDB_ERR_IO;
86 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
87 "tdb_oob len %lld beyond internal"
88 " malloc size %lld\n",
90 (long long)tdb->map_size);
95 if (tdb_lock_expand(tdb, F_RDLCK) != 0)
98 ret = fstat(tdb->fd, &st);
100 tdb_unlock_expand(tdb, F_RDLCK);
103 tdb->ecode = TDB_ERR_IO;
107 if (st.st_size < (size_t)len) {
109 /* Ensure ecode is set for log fn. */
110 tdb->ecode = TDB_ERR_IO;
111 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
112 "tdb_oob len %lld beyond eof at %lld\n",
113 (long long)len, (long long)st.st_size);
118 /* Unmap, update size, remap */
121 tdb->map_size = st.st_size;
126 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
128 if (unlikely(!tdb->map_ptr))
131 /* FIXME: We can do a subset of this! */
132 if (tdb->transaction)
135 if (unlikely(tdb_oob(tdb, off + len, true) == -1))
137 return (char *)tdb->map_ptr + off;
140 /* Either make a copy into pad and return that, or return ptr into mmap. */
141 /* Note: pad has to be a real object, so we can't get here if len
142 * overflows size_t */
143 void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
145 if (likely(!(tdb->flags & TDB_CONVERT))) {
146 void *ret = tdb_direct(tdb, off, len);
150 return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
153 /* Endian conversion: we only ever deal with 8 byte quantities */
154 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
156 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
157 uint64_t i, *p = (uint64_t *)buf;
158 for (i = 0; i < size / 8; i++)
159 p[i] = bswap_64(p[i]);
164 /* FIXME: Return the off? */
165 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
166 tdb_off_t base, uint64_t start, uint64_t end)
171 /* Zero vs non-zero is the same unconverted: minor optimization. */
172 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
173 (end - start) * sizeof(tdb_off_t), false);
177 for (i = 0; i < (end - start); i++) {
181 tdb_access_release(tdb, val);
185 /* Return first zero offset in num offset array, or num. */
186 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
192 /* Zero vs non-zero is the same unconverted: minor optimization. */
193 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
197 for (i = 0; i < num; i++) {
201 tdb_access_release(tdb, val);
205 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
207 char buf[8192] = { 0 };
208 void *p = tdb_direct(tdb, off, len);
214 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
215 if (tdb->methods->write(tdb, off, buf, todo) == -1)
223 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
227 ret = tdb_get(tdb, off, &pad, sizeof(pad));
234 /* Even on files, we can get partial writes due to signals. */
235 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
239 ret = pwrite(fd, buf, len, off);
246 buf = (char *)buf + ret;
253 /* Even on files, we can get partial reads due to signals. */
254 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
258 ret = pread(fd, buf, len, off);
266 buf = (char *)buf + ret;
273 bool tdb_read_all(int fd, void *buf, size_t len)
277 ret = read(fd, buf, len);
285 buf = (char *)buf + ret;
291 /* write a lump of data at a specified offset */
292 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
293 const void *buf, tdb_len_t len)
299 if (tdb->read_only) {
300 tdb->ecode = TDB_ERR_RDONLY;
304 if (tdb->methods->oob(tdb, off + len, 0) != 0)
308 memcpy(off + (char *)tdb->map_ptr, buf, len);
310 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
311 tdb->ecode = TDB_ERR_IO;
312 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
313 "tdb_write failed at %llu len=%llu (%s)\n",
314 off, len, strerror(errno));
321 /* read a lump of data at a specified offset */
322 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
325 if (tdb->methods->oob(tdb, off + len, 0) != 0) {
330 memcpy(buf, off + (char *)tdb->map_ptr, len);
332 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
333 /* Ensure ecode is set for log fn. */
334 tdb->ecode = TDB_ERR_IO;
335 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
336 "tdb_read failed at %lld "
337 "len=%lld (%s) map_size=%lld\n",
338 (long long)off, (long long)len,
340 (long long)tdb->map_size);
347 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
348 const void *rec, size_t len)
351 if (unlikely((tdb->flags & TDB_CONVERT))) {
352 void *conv = malloc(len);
354 tdb->ecode = TDB_ERR_OOM;
355 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
356 "tdb_write: no memory converting %zu bytes\n",
360 memcpy(conv, rec, len);
361 ret = tdb->methods->write(tdb, off,
362 tdb_convert(tdb, conv, len), len);
365 ret = tdb->methods->write(tdb, off, rec, len);
370 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
371 void *rec, size_t len)
373 int ret = tdb->methods->read(tdb, off, rec, len);
374 tdb_convert(tdb, rec, len);
378 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
380 return tdb_write_convert(tdb, off, &val, sizeof(val));
383 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
384 tdb_len_t len, unsigned int prefix)
388 /* some systems don't like zero length malloc */
389 buf = malloc(prefix + len ? prefix + len : 1);
390 if (unlikely(!buf)) {
391 tdb->ecode = TDB_ERR_OOM;
392 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
393 "tdb_alloc_read malloc failed len=%lld\n",
394 (long long)prefix + len);
395 } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix, len))) {
402 /* read a lump of data, allocating the space for it */
403 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
405 return _tdb_alloc_read(tdb, offset, len, 0);
408 static int fill(struct tdb_context *tdb,
409 const void *buf, size_t size,
410 tdb_off_t off, tdb_len_t len)
413 size_t n = len > size ? size : len;
415 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
416 tdb->ecode = TDB_ERR_IO;
417 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
418 "fill write failed: giving up!\n");
427 /* expand a file. we prefer to use ftruncate, as that is what posix
428 says to use for mmap expansion */
429 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
433 if (tdb->read_only) {
434 tdb->ecode = TDB_ERR_RDONLY;
438 if (tdb->flags & TDB_INTERNAL) {
439 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
441 tdb->ecode = TDB_ERR_OOM;
445 tdb->map_size += addition;
447 /* Unmap before trying to write; old TDB claimed OpenBSD had
448 * problem with this otherwise. */
451 /* If this fails, we try to fill anyway. */
452 if (ftruncate(tdb->fd, tdb->map_size + addition))
455 /* now fill the file with something. This ensures that the
456 file isn't sparse, which would be very bad if we ran out of
457 disk. This must be done with write, not via mmap */
458 memset(buf, 0x43, sizeof(buf));
459 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
461 tdb->map_size += addition;
467 /* This is only neded for tdb_access_commit, but used everywhere to simplify. */
468 struct tdb_access_hdr {
474 const void *tdb_access_read(struct tdb_context *tdb,
475 tdb_off_t off, tdb_len_t len, bool convert)
477 const void *ret = NULL;
479 if (likely(!(tdb->flags & TDB_CONVERT)))
480 ret = tdb_direct(tdb, off, len);
483 struct tdb_access_hdr *hdr;
484 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
488 tdb_convert(tdb, (void *)ret, len);
491 tdb->direct_access++;
496 void *tdb_access_write(struct tdb_context *tdb,
497 tdb_off_t off, tdb_len_t len, bool convert)
501 if (likely(!(tdb->flags & TDB_CONVERT)))
502 ret = tdb_direct(tdb, off, len);
505 struct tdb_access_hdr *hdr;
506 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
510 hdr->convert = convert;
513 tdb_convert(tdb, (void *)ret, len);
516 tdb->direct_access++;
521 void tdb_access_release(struct tdb_context *tdb, const void *p)
524 || (char *)p < (char *)tdb->map_ptr
525 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
526 free((struct tdb_access_hdr *)p - 1);
528 tdb->direct_access--;
531 int tdb_access_commit(struct tdb_context *tdb, void *p)
536 || (char *)p < (char *)tdb->map_ptr
537 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) {
538 struct tdb_access_hdr *hdr;
540 hdr = (struct tdb_access_hdr *)p - 1;
542 ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
544 ret = tdb_write(tdb, hdr->off, p, hdr->len);
547 tdb->direct_access--;
553 /* write a lump of data at a specified offset */
554 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
555 const void *buf, tdb_len_t len)
561 if (tdb->read_only || tdb->traverse_read) {
562 tdb->ecode = TDB_ERR_RDONLY;
566 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
570 memcpy(off + (char *)tdb->map_ptr, buf, len);
572 ssize_t written = pwrite(tdb->fd, buf, len, off);
573 if ((written != (ssize_t)len) && (written != -1)) {
575 tdb->ecode = TDB_ERR_IO;
576 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
577 "%d of %d bytes at %d, trying once more\n",
578 (int)written, len, off));
579 written = pwrite(tdb->fd, (const char *)buf+written,
584 /* Ensure ecode is set for log fn. */
585 tdb->ecode = TDB_ERR_IO;
586 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
587 "len=%d (%s)\n", off, len, strerror(errno)));
589 } else if (written != (ssize_t)len) {
590 tdb->ecode = TDB_ERR_IO;
591 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
592 "write %d bytes at %d in two attempts\n",
603 do an unlocked scan of the hash table heads to find the next non-zero head. The value
604 will then be confirmed with the lock held
606 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
610 for (;h < tdb->header.hash_size;h++) {
611 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
617 for (;h < tdb->header.hash_size;h++) {
618 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
626 /* read/write a tdb_off_t */
627 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
629 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
632 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
635 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
639 /* read/write a record */
640 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
642 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
644 if (TDB_BAD_MAGIC(rec)) {
645 /* Ensure ecode is set for log fn. */
646 tdb->ecode = TDB_ERR_CORRUPT;
647 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
650 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
653 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
655 struct tdb_record r = *rec;
656 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
660 static const struct tdb_methods io_methods = {
668 initialise the default methods table
670 void tdb_io_init(struct tdb_context *tdb)
672 tdb->methods = &io_methods;