2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_context *tdb)
34 if (tdb->flags & TDB_INTERNAL)
38 munmap(tdb->map_ptr, tdb->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52 MAP_SHARED, tdb->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->map_ptr == MAP_FAILED) {
59 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60 "tdb_mmap failed for size %lld (%s)",
61 (long long)tdb->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
76 /* We can't hold pointers during this: we could unmap! */
77 assert(!tdb->direct_access
78 || (tdb->flags & TDB_NOLOCK)
79 || tdb_has_expansion_lock(tdb));
81 if (len <= tdb->map_size)
83 if (tdb->flags & TDB_INTERNAL) {
85 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86 "tdb_oob len %lld beyond internal"
89 (long long)tdb->map_size);
94 ecode = tdb_lock_expand(tdb, F_RDLCK);
95 if (ecode != TDB_SUCCESS) {
99 if (fstat(tdb->fd, &st) != 0) {
100 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101 "Failed to fstat file: %s", strerror(errno));
102 tdb_unlock_expand(tdb, F_RDLCK);
106 tdb_unlock_expand(tdb, F_RDLCK);
108 if (st.st_size < (size_t)len) {
110 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111 "tdb_oob len %zu beyond eof at %zu",
112 (size_t)len, st.st_size);
117 /* Unmap, update size, remap */
120 tdb->map_size = st.st_size;
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
128 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129 uint64_t i, *p = (uint64_t *)buf;
130 for (i = 0; i < size / 8; i++)
131 p[i] = bswap_64(p[i]);
136 /* FIXME: Return the off? */
137 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
138 tdb_off_t base, uint64_t start, uint64_t end)
143 /* Zero vs non-zero is the same unconverted: minor optimization. */
144 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
145 (end - start) * sizeof(tdb_off_t), false);
146 if (TDB_PTR_IS_ERR(val)) {
147 tdb->ecode = TDB_PTR_ERR(val);
151 for (i = 0; i < (end - start); i++) {
155 tdb_access_release(tdb, val);
159 /* Return first zero offset in num offset array, or num. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
166 /* Zero vs non-zero is the same unconverted: minor optimization. */
167 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168 if (TDB_PTR_IS_ERR(val)) {
169 tdb->ecode = TDB_PTR_ERR(val);
173 for (i = 0; i < num; i++) {
177 tdb_access_release(tdb, val);
181 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
183 char buf[8192] = { 0 };
184 void *p = tdb->methods->direct(tdb, off, len, true);
185 enum TDB_ERROR ecode = TDB_SUCCESS;
187 assert(!tdb->read_only);
188 if (TDB_PTR_IS_ERR(p)) {
189 return TDB_PTR_ERR(p);
196 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
197 ecode = tdb->methods->twrite(tdb, off, buf, todo);
198 if (ecode != TDB_SUCCESS) {
207 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
210 enum TDB_ERROR ecode;
212 if (likely(!(tdb->flags & TDB_CONVERT))) {
213 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
215 if (TDB_PTR_IS_ERR(p)) {
216 tdb->ecode = TDB_PTR_ERR(p);
223 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
224 if (ecode != TDB_SUCCESS) {
231 /* write a lump of data at a specified offset */
232 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
233 const void *buf, tdb_len_t len)
235 enum TDB_ERROR ecode;
237 if (tdb->read_only) {
238 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
239 "Write to read-only database");
242 /* FIXME: Bogus optimization? */
247 ecode = tdb->methods->oob(tdb, off + len, 0);
248 if (ecode != TDB_SUCCESS) {
253 memcpy(off + (char *)tdb->map_ptr, buf, len);
256 ret = pwrite(tdb->fd, buf, len, off);
258 /* This shouldn't happen: we avoid sparse files. */
262 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
263 "tdb_write: %zi at %zu len=%zu (%s)",
264 ret, (size_t)off, (size_t)len,
271 /* read a lump of data at a specified offset */
272 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
273 void *buf, tdb_len_t len)
275 enum TDB_ERROR ecode;
277 ecode = tdb->methods->oob(tdb, off + len, 0);
278 if (ecode != TDB_SUCCESS) {
283 memcpy(buf, off + (char *)tdb->map_ptr, len);
285 ssize_t r = pread(tdb->fd, buf, len, off);
287 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
288 "tdb_read failed with %zi at %zu "
289 "len=%zu (%s) map_size=%zu",
290 r, (size_t)off, (size_t)len,
292 (size_t)tdb->map_size);
298 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
299 const void *rec, size_t len)
301 enum TDB_ERROR ecode;
303 if (unlikely((tdb->flags & TDB_CONVERT))) {
304 void *conv = malloc(len);
306 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
307 "tdb_write: no memory converting"
310 memcpy(conv, rec, len);
311 ecode = tdb->methods->twrite(tdb, off,
312 tdb_convert(tdb, conv, len), len);
315 ecode = tdb->methods->twrite(tdb, off, rec, len);
320 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
321 void *rec, size_t len)
323 enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
324 tdb_convert(tdb, rec, len);
328 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
329 tdb_off_t off, tdb_off_t val)
331 if (tdb->read_only) {
332 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
333 "Write to read-only database");
336 if (likely(!(tdb->flags & TDB_CONVERT))) {
337 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
339 if (TDB_PTR_IS_ERR(p)) {
340 return TDB_PTR_ERR(p);
347 return tdb_write_convert(tdb, off, &val, sizeof(val));
350 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
351 tdb_len_t len, unsigned int prefix)
354 enum TDB_ERROR ecode;
356 /* some systems don't like zero length malloc */
357 buf = malloc(prefix + len ? prefix + len : 1);
359 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
360 "tdb_alloc_read malloc failed len=%zu",
361 (size_t)(prefix + len));
362 return TDB_ERR_PTR(TDB_ERR_OOM);
364 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
365 if (unlikely(ecode != TDB_SUCCESS)) {
367 return TDB_ERR_PTR(ecode);
373 /* read a lump of data, allocating the space for it */
374 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
376 return _tdb_alloc_read(tdb, offset, len, 0);
379 static enum TDB_ERROR fill(struct tdb_context *tdb,
380 const void *buf, size_t size,
381 tdb_off_t off, tdb_len_t len)
384 size_t n = len > size ? size : len;
385 ssize_t ret = pwrite(tdb->fd, buf, n, off);
390 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
392 " %zi at %zu len=%zu (%s)",
393 ret, (size_t)off, (size_t)len,
402 /* expand a file. we prefer to use ftruncate, as that is what posix
403 says to use for mmap expansion */
404 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
408 enum TDB_ERROR ecode;
410 if (tdb->read_only) {
411 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
412 "Expand on read-only database");
415 if (tdb->flags & TDB_INTERNAL) {
416 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
418 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
419 "No memory to expand database");
422 tdb->map_size += addition;
424 /* Unmap before trying to write; old TDB claimed OpenBSD had
425 * problem with this otherwise. */
428 /* If this fails, we try to fill anyway. */
429 if (ftruncate(tdb->fd, tdb->map_size + addition))
432 /* now fill the file with something. This ensures that the
433 file isn't sparse, which would be very bad if we ran out of
434 disk. This must be done with write, not via mmap */
435 memset(buf, 0x43, sizeof(buf));
436 ecode = fill(tdb, buf, sizeof(buf), tdb->map_size, addition);
437 if (ecode != TDB_SUCCESS)
439 tdb->map_size += addition;
445 const void *tdb_access_read(struct tdb_context *tdb,
446 tdb_off_t off, tdb_len_t len, bool convert)
448 const void *ret = NULL;
450 if (likely(!(tdb->flags & TDB_CONVERT))) {
451 ret = tdb->methods->direct(tdb, off, len, false);
453 if (TDB_PTR_IS_ERR(ret)) {
458 struct tdb_access_hdr *hdr;
459 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
460 if (TDB_PTR_IS_ERR(hdr)) {
463 hdr->next = tdb->access;
467 tdb_convert(tdb, (void *)ret, len);
470 tdb->direct_access++;
475 void *tdb_access_write(struct tdb_context *tdb,
476 tdb_off_t off, tdb_len_t len, bool convert)
480 if (tdb->read_only) {
481 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
482 "Write to read-only database");
483 return TDB_ERR_PTR(TDB_ERR_RDONLY);
486 if (likely(!(tdb->flags & TDB_CONVERT))) {
487 ret = tdb->methods->direct(tdb, off, len, true);
489 if (TDB_PTR_IS_ERR(ret)) {
495 struct tdb_access_hdr *hdr;
496 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
497 if (TDB_PTR_IS_ERR(hdr)) {
500 hdr->next = tdb->access;
504 hdr->convert = convert;
507 tdb_convert(tdb, (void *)ret, len);
509 tdb->direct_access++;
514 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
516 struct tdb_access_hdr **hp;
518 for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
525 void tdb_access_release(struct tdb_context *tdb, const void *p)
527 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
534 tdb->direct_access--;
537 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
539 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
540 enum TDB_ERROR ecode;
545 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
547 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
551 tdb->direct_access--;
558 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
561 enum TDB_ERROR ecode;
563 if (unlikely(!tdb->map_ptr))
566 ecode = tdb_oob(tdb, off + len, true);
567 if (unlikely(ecode != TDB_SUCCESS))
568 return TDB_ERR_PTR(ecode);
569 return (char *)tdb->map_ptr + off;
572 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
574 if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
578 static const struct tdb_methods io_methods = {
587 initialise the default methods table
589 void tdb_io_init(struct tdb_context *tdb)
591 tdb->methods = &io_methods;