2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
38 munmap(file->map_ptr, file->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size, tdb->mmap_flags,
52 MAP_SHARED, tdb->file->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->file->map_ptr == MAP_FAILED) {
58 tdb->file->map_ptr = NULL;
59 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60 "tdb_mmap failed for size %lld (%s)",
61 (long long)tdb->file->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
76 /* We can't hold pointers during this: we could unmap! */
77 assert(!tdb->direct_access
78 || (tdb->flags & TDB_NOLOCK)
79 || tdb_has_expansion_lock(tdb));
81 if (len <= tdb->file->map_size)
83 if (tdb->flags & TDB_INTERNAL) {
85 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86 "tdb_oob len %lld beyond internal"
89 (long long)tdb->file->map_size);
94 ecode = tdb_lock_expand(tdb, F_RDLCK);
95 if (ecode != TDB_SUCCESS) {
99 if (fstat(tdb->file->fd, &st) != 0) {
100 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101 "Failed to fstat file: %s", strerror(errno));
102 tdb_unlock_expand(tdb, F_RDLCK);
106 tdb_unlock_expand(tdb, F_RDLCK);
108 if (st.st_size < (size_t)len) {
110 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111 "tdb_oob len %zu beyond eof at %zu",
112 (size_t)len, st.st_size);
117 /* Unmap, update size, remap */
118 tdb_munmap(tdb->file);
120 tdb->file->map_size = st.st_size;
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
128 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129 uint64_t i, *p = (uint64_t *)buf;
130 for (i = 0; i < size / 8; i++)
131 p[i] = bswap_64(p[i]);
136 /* Return first non-zero offset in offset array, or end, or -ve error. */
137 /* FIXME: Return the off? */
138 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
139 tdb_off_t base, uint64_t start, uint64_t end)
144 /* Zero vs non-zero is the same unconverted: minor optimization. */
145 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
146 (end - start) * sizeof(tdb_off_t), false);
147 if (TDB_PTR_IS_ERR(val)) {
148 return TDB_PTR_ERR(val);
151 for (i = 0; i < (end - start); i++) {
155 tdb_access_release(tdb, val);
159 /* Return first zero offset in num offset array, or num, or -ve error. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
166 /* Zero vs non-zero is the same unconverted: minor optimization. */
167 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168 if (TDB_PTR_IS_ERR(val)) {
169 return TDB_PTR_ERR(val);
172 for (i = 0; i < num; i++) {
176 tdb_access_release(tdb, val);
180 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
182 char buf[8192] = { 0 };
183 void *p = tdb->methods->direct(tdb, off, len, true);
184 enum TDB_ERROR ecode = TDB_SUCCESS;
186 assert(!tdb->read_only);
187 if (TDB_PTR_IS_ERR(p)) {
188 return TDB_PTR_ERR(p);
195 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
196 ecode = tdb->methods->twrite(tdb, off, buf, todo);
197 if (ecode != TDB_SUCCESS) {
206 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
209 enum TDB_ERROR ecode;
211 if (likely(!(tdb->flags & TDB_CONVERT))) {
212 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
214 if (TDB_PTR_IS_ERR(p)) {
215 return TDB_PTR_ERR(p);
221 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
222 if (ecode != TDB_SUCCESS) {
228 /* write a lump of data at a specified offset */
229 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
230 const void *buf, tdb_len_t len)
232 enum TDB_ERROR ecode;
234 if (tdb->read_only) {
235 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
236 "Write to read-only database");
239 ecode = tdb->methods->oob(tdb, off + len, 0);
240 if (ecode != TDB_SUCCESS) {
244 if (tdb->file->map_ptr) {
245 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
248 ret = pwrite(tdb->file->fd, buf, len, off);
250 /* This shouldn't happen: we avoid sparse files. */
254 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
255 "tdb_write: %zi at %zu len=%zu (%s)",
256 ret, (size_t)off, (size_t)len,
263 /* read a lump of data at a specified offset */
264 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
265 void *buf, tdb_len_t len)
267 enum TDB_ERROR ecode;
269 ecode = tdb->methods->oob(tdb, off + len, 0);
270 if (ecode != TDB_SUCCESS) {
274 if (tdb->file->map_ptr) {
275 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
277 ssize_t r = pread(tdb->file->fd, buf, len, off);
279 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
280 "tdb_read failed with %zi at %zu "
281 "len=%zu (%s) map_size=%zu",
282 r, (size_t)off, (size_t)len,
284 (size_t)tdb->file->map_size);
290 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
291 const void *rec, size_t len)
293 enum TDB_ERROR ecode;
295 if (unlikely((tdb->flags & TDB_CONVERT))) {
296 void *conv = malloc(len);
298 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
299 "tdb_write: no memory converting"
302 memcpy(conv, rec, len);
303 ecode = tdb->methods->twrite(tdb, off,
304 tdb_convert(tdb, conv, len), len);
307 ecode = tdb->methods->twrite(tdb, off, rec, len);
312 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
313 void *rec, size_t len)
315 enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
316 tdb_convert(tdb, rec, len);
320 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
321 tdb_off_t off, tdb_off_t val)
323 if (tdb->read_only) {
324 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
325 "Write to read-only database");
328 if (likely(!(tdb->flags & TDB_CONVERT))) {
329 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
331 if (TDB_PTR_IS_ERR(p)) {
332 return TDB_PTR_ERR(p);
339 return tdb_write_convert(tdb, off, &val, sizeof(val));
342 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
343 tdb_len_t len, unsigned int prefix)
346 enum TDB_ERROR ecode;
348 /* some systems don't like zero length malloc */
349 buf = malloc(prefix + len ? prefix + len : 1);
351 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
352 "tdb_alloc_read malloc failed len=%zu",
353 (size_t)(prefix + len));
354 return TDB_ERR_PTR(TDB_ERR_OOM);
356 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
357 if (unlikely(ecode != TDB_SUCCESS)) {
359 return TDB_ERR_PTR(ecode);
365 /* read a lump of data, allocating the space for it */
366 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
368 return _tdb_alloc_read(tdb, offset, len, 0);
371 static enum TDB_ERROR fill(struct tdb_context *tdb,
372 const void *buf, size_t size,
373 tdb_off_t off, tdb_len_t len)
376 size_t n = len > size ? size : len;
377 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
382 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
384 " %zi at %zu len=%zu (%s)",
385 ret, (size_t)off, (size_t)len,
394 /* expand a file. we prefer to use ftruncate, as that is what posix
395 says to use for mmap expansion */
396 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
400 enum TDB_ERROR ecode;
402 if (tdb->read_only) {
403 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
404 "Expand on read-only database");
407 if (tdb->flags & TDB_INTERNAL) {
408 char *new = realloc(tdb->file->map_ptr,
409 tdb->file->map_size + addition);
411 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
412 "No memory to expand database");
414 tdb->file->map_ptr = new;
415 tdb->file->map_size += addition;
417 /* Unmap before trying to write; old TDB claimed OpenBSD had
418 * problem with this otherwise. */
419 tdb_munmap(tdb->file);
421 /* If this fails, we try to fill anyway. */
422 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
425 /* now fill the file with something. This ensures that the
426 file isn't sparse, which would be very bad if we ran out of
427 disk. This must be done with write, not via mmap */
428 memset(buf, 0x43, sizeof(buf));
429 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
431 if (ecode != TDB_SUCCESS)
433 tdb->file->map_size += addition;
439 const void *tdb_access_read(struct tdb_context *tdb,
440 tdb_off_t off, tdb_len_t len, bool convert)
442 const void *ret = NULL;
444 if (likely(!(tdb->flags & TDB_CONVERT))) {
445 ret = tdb->methods->direct(tdb, off, len, false);
447 if (TDB_PTR_IS_ERR(ret)) {
452 struct tdb_access_hdr *hdr;
453 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
454 if (TDB_PTR_IS_ERR(hdr)) {
457 hdr->next = tdb->access;
461 tdb_convert(tdb, (void *)ret, len);
464 tdb->direct_access++;
469 void *tdb_access_write(struct tdb_context *tdb,
470 tdb_off_t off, tdb_len_t len, bool convert)
474 if (tdb->read_only) {
475 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
476 "Write to read-only database");
477 return TDB_ERR_PTR(TDB_ERR_RDONLY);
480 if (likely(!(tdb->flags & TDB_CONVERT))) {
481 ret = tdb->methods->direct(tdb, off, len, true);
483 if (TDB_PTR_IS_ERR(ret)) {
489 struct tdb_access_hdr *hdr;
490 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
491 if (TDB_PTR_IS_ERR(hdr)) {
494 hdr->next = tdb->access;
498 hdr->convert = convert;
501 tdb_convert(tdb, (void *)ret, len);
503 tdb->direct_access++;
508 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
510 struct tdb_access_hdr **hp;
512 for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
519 void tdb_access_release(struct tdb_context *tdb, const void *p)
521 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
528 tdb->direct_access--;
531 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
533 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
534 enum TDB_ERROR ecode;
539 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
541 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
545 tdb->direct_access--;
552 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
555 enum TDB_ERROR ecode;
557 if (unlikely(!tdb->file->map_ptr))
560 ecode = tdb_oob(tdb, off + len, true);
561 if (unlikely(ecode != TDB_SUCCESS))
562 return TDB_ERR_PTR(ecode);
563 return (char *)tdb->file->map_ptr + off;
566 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
568 if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
572 static const struct tdb_methods io_methods = {
581 initialise the default methods table
583 void tdb_io_init(struct tdb_context *tdb)
585 tdb->methods = &io_methods;