2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_context *tdb)
34 if (tdb->flags & TDB_INTERNAL)
38 munmap(tdb->map_ptr, tdb->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 tdb->map_ptr = mmap(NULL, tdb->map_size,
52 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
53 MAP_SHARED, tdb->fd, 0);
56 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
58 if (tdb->map_ptr == MAP_FAILED) {
60 tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
61 "tdb_mmap failed for size %lld (%s)\n",
62 (long long)tdb->map_size, strerror(errno));
66 /* check for an out of bounds access - if it is out of bounds then
67 see if the database has been expanded by someone else and expand
69 note that "len" is the minimum length needed for the db
71 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
76 /* We can't hold pointers during this: we could unmap! */
77 assert(!tdb->direct_access || tdb_has_expansion_lock(tdb));
79 if (len <= tdb->map_size)
81 if (tdb->flags & TDB_INTERNAL) {
83 /* Ensure ecode is set for log fn. */
84 tdb->ecode = TDB_ERR_IO;
85 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
86 "tdb_oob len %lld beyond internal"
87 " malloc size %lld\n",
89 (long long)tdb->map_size);
94 if (tdb_lock_expand(tdb, F_RDLCK) != 0)
97 ret = fstat(tdb->fd, &st);
99 tdb_unlock_expand(tdb, F_RDLCK);
102 tdb->ecode = TDB_ERR_IO;
106 if (st.st_size < (size_t)len) {
108 /* Ensure ecode is set for log fn. */
109 tdb->ecode = TDB_ERR_IO;
110 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
111 "tdb_oob len %lld beyond eof at %lld\n",
112 (long long)len, (long long)st.st_size);
117 /* Unmap, update size, remap */
120 tdb->map_size = st.st_size;
125 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
127 if (unlikely(!tdb->map_ptr))
130 /* FIXME: We can do a subset of this! */
131 if (tdb->transaction)
134 if (unlikely(tdb_oob(tdb, off + len, true) == -1))
136 return (char *)tdb->map_ptr + off;
139 /* Either make a copy into pad and return that, or return ptr into mmap. */
140 /* Note: pad has to be a real object, so we can't get here if len
141 * overflows size_t */
142 void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
144 if (likely(!(tdb->flags & TDB_CONVERT))) {
145 void *ret = tdb_direct(tdb, off, len);
149 return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
152 /* Endian conversion: we only ever deal with 8 byte quantities */
153 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
155 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
156 uint64_t i, *p = (uint64_t *)buf;
157 for (i = 0; i < size / 8; i++)
158 p[i] = bswap_64(p[i]);
163 /* FIXME: Return the off? */
164 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
165 tdb_off_t base, uint64_t start, uint64_t end)
170 /* Zero vs non-zero is the same unconverted: minor optimization. */
171 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
172 (end - start) * sizeof(tdb_off_t), false);
176 for (i = 0; i < (end - start); i++) {
180 tdb_access_release(tdb, val);
184 /* Return first zero offset in num offset array, or num. */
185 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
191 /* Zero vs non-zero is the same unconverted: minor optimization. */
192 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
196 for (i = 0; i < num; i++) {
200 tdb_access_release(tdb, val);
204 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
206 char buf[8192] = { 0 };
207 void *p = tdb_direct(tdb, off, len);
213 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
214 if (tdb->methods->write(tdb, off, buf, todo) == -1)
222 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
226 ret = tdb_get(tdb, off, &pad, sizeof(pad));
233 /* Even on files, we can get partial writes due to signals. */
234 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
238 ret = pwrite(fd, buf, len, off);
245 buf = (char *)buf + ret;
252 /* Even on files, we can get partial reads due to signals. */
253 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
257 ret = pread(fd, buf, len, off);
265 buf = (char *)buf + ret;
272 bool tdb_read_all(int fd, void *buf, size_t len)
276 ret = read(fd, buf, len);
284 buf = (char *)buf + ret;
290 /* write a lump of data at a specified offset */
291 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
292 const void *buf, tdb_len_t len)
298 if (tdb->read_only) {
299 tdb->ecode = TDB_ERR_RDONLY;
303 if (tdb->methods->oob(tdb, off + len, 0) != 0)
307 memcpy(off + (char *)tdb->map_ptr, buf, len);
309 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
310 tdb->ecode = TDB_ERR_IO;
311 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
312 "tdb_write failed at %llu len=%llu (%s)\n",
313 off, len, strerror(errno));
320 /* read a lump of data at a specified offset */
321 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
324 if (tdb->methods->oob(tdb, off + len, 0) != 0) {
329 memcpy(buf, off + (char *)tdb->map_ptr, len);
331 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
332 /* Ensure ecode is set for log fn. */
333 tdb->ecode = TDB_ERR_IO;
334 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
335 "tdb_read failed at %lld "
336 "len=%lld (%s) map_size=%lld\n",
337 (long long)off, (long long)len,
339 (long long)tdb->map_size);
346 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
347 const void *rec, size_t len)
350 if (unlikely((tdb->flags & TDB_CONVERT))) {
351 void *conv = malloc(len);
353 tdb->ecode = TDB_ERR_OOM;
354 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
355 "tdb_write: no memory converting %zu bytes\n",
359 memcpy(conv, rec, len);
360 ret = tdb->methods->write(tdb, off,
361 tdb_convert(tdb, conv, len), len);
364 ret = tdb->methods->write(tdb, off, rec, len);
369 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
370 void *rec, size_t len)
372 int ret = tdb->methods->read(tdb, off, rec, len);
373 tdb_convert(tdb, rec, len);
377 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
379 return tdb_write_convert(tdb, off, &val, sizeof(val));
382 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
383 tdb_len_t len, unsigned int prefix)
387 /* some systems don't like zero length malloc */
388 buf = malloc(prefix + len ? prefix + len : 1);
389 if (unlikely(!buf)) {
390 tdb->ecode = TDB_ERR_OOM;
391 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
392 "tdb_alloc_read malloc failed len=%lld\n",
393 (long long)prefix + len);
394 } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix, len))) {
401 /* read a lump of data, allocating the space for it */
402 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
404 return _tdb_alloc_read(tdb, offset, len, 0);
407 static int fill(struct tdb_context *tdb,
408 const void *buf, size_t size,
409 tdb_off_t off, tdb_len_t len)
412 size_t n = len > size ? size : len;
414 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
415 tdb->ecode = TDB_ERR_IO;
416 tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
417 "fill write failed: giving up!\n");
426 /* expand a file. we prefer to use ftruncate, as that is what posix
427 says to use for mmap expansion */
428 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
432 if (tdb->read_only) {
433 tdb->ecode = TDB_ERR_RDONLY;
437 if (tdb->flags & TDB_INTERNAL) {
438 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
440 tdb->ecode = TDB_ERR_OOM;
444 tdb->map_size += addition;
446 /* Unmap before trying to write; old TDB claimed OpenBSD had
447 * problem with this otherwise. */
450 /* If this fails, we try to fill anyway. */
451 if (ftruncate(tdb->fd, tdb->map_size + addition))
454 /* now fill the file with something. This ensures that the
455 file isn't sparse, which would be very bad if we ran out of
456 disk. This must be done with write, not via mmap */
457 memset(buf, 0x43, sizeof(buf));
458 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
460 tdb->map_size += addition;
466 /* This is only neded for tdb_access_commit, but used everywhere to simplify. */
467 struct tdb_access_hdr {
473 const void *tdb_access_read(struct tdb_context *tdb,
474 tdb_off_t off, tdb_len_t len, bool convert)
476 const void *ret = NULL;
478 if (likely(!(tdb->flags & TDB_CONVERT)))
479 ret = tdb_direct(tdb, off, len);
482 struct tdb_access_hdr *hdr;
483 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
487 tdb_convert(tdb, (void *)ret, len);
490 tdb->direct_access++;
495 void *tdb_access_write(struct tdb_context *tdb,
496 tdb_off_t off, tdb_len_t len, bool convert)
500 if (likely(!(tdb->flags & TDB_CONVERT)))
501 ret = tdb_direct(tdb, off, len);
504 struct tdb_access_hdr *hdr;
505 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
509 hdr->convert = convert;
512 tdb_convert(tdb, (void *)ret, len);
515 tdb->direct_access++;
520 void tdb_access_release(struct tdb_context *tdb, const void *p)
523 || (char *)p < (char *)tdb->map_ptr
524 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
525 free((struct tdb_access_hdr *)p - 1);
527 tdb->direct_access--;
530 int tdb_access_commit(struct tdb_context *tdb, void *p)
535 || (char *)p < (char *)tdb->map_ptr
536 || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) {
537 struct tdb_access_hdr *hdr;
539 hdr = (struct tdb_access_hdr *)p - 1;
541 ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
543 ret = tdb_write(tdb, hdr->off, p, hdr->len);
546 tdb->direct_access--;
552 /* write a lump of data at a specified offset */
553 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
554 const void *buf, tdb_len_t len)
560 if (tdb->read_only || tdb->traverse_read) {
561 tdb->ecode = TDB_ERR_RDONLY;
565 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
569 memcpy(off + (char *)tdb->map_ptr, buf, len);
571 ssize_t written = pwrite(tdb->fd, buf, len, off);
572 if ((written != (ssize_t)len) && (written != -1)) {
574 tdb->ecode = TDB_ERR_IO;
575 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
576 "%d of %d bytes at %d, trying once more\n",
577 (int)written, len, off));
578 written = pwrite(tdb->fd, (const char *)buf+written,
583 /* Ensure ecode is set for log fn. */
584 tdb->ecode = TDB_ERR_IO;
585 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
586 "len=%d (%s)\n", off, len, strerror(errno)));
588 } else if (written != (ssize_t)len) {
589 tdb->ecode = TDB_ERR_IO;
590 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
591 "write %d bytes at %d in two attempts\n",
602 do an unlocked scan of the hash table heads to find the next non-zero head. The value
603 will then be confirmed with the lock held
605 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
609 for (;h < tdb->header.hash_size;h++) {
610 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
616 for (;h < tdb->header.hash_size;h++) {
617 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
625 /* read/write a tdb_off_t */
626 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
628 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
631 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
634 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
638 /* read/write a record */
639 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
641 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
643 if (TDB_BAD_MAGIC(rec)) {
644 /* Ensure ecode is set for log fn. */
645 tdb->ecode = TDB_ERR_CORRUPT;
646 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
649 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
652 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
654 struct tdb_record r = *rec;
655 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
659 static const struct tdb_methods io_methods = {
667 initialise the default methods table
669 void tdb_io_init(struct tdb_context *tdb)
671 tdb->methods = &io_methods;