2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_context *tdb)
34 if (tdb->flags & TDB_INTERNAL)
38 munmap(tdb->map_ptr, tdb->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52 MAP_SHARED, tdb->fd, 0);
55 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
57 if (tdb->map_ptr == MAP_FAILED) {
59 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60 "tdb_mmap failed for size %lld (%s)",
61 (long long)tdb->map_size, strerror(errno));
65 /* check for an out of bounds access - if it is out of bounds then
66 see if the database has been expanded by someone else and expand
68 note that "len" is the minimum length needed for the db
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
76 /* We can't hold pointers during this: we could unmap! */
77 assert(!tdb->direct_access
78 || (tdb->flags & TDB_NOLOCK)
79 || tdb_has_expansion_lock(tdb));
81 if (len <= tdb->map_size)
83 if (tdb->flags & TDB_INTERNAL) {
85 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86 "tdb_oob len %lld beyond internal"
89 (long long)tdb->map_size);
94 ecode = tdb_lock_expand(tdb, F_RDLCK);
95 if (ecode != TDB_SUCCESS) {
99 if (fstat(tdb->fd, &st) != 0) {
100 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101 "Failed to fstat file: %s", strerror(errno));
102 tdb_unlock_expand(tdb, F_RDLCK);
106 tdb_unlock_expand(tdb, F_RDLCK);
108 if (st.st_size < (size_t)len) {
110 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111 "tdb_oob len %zu beyond eof at %zu",
112 (size_t)len, st.st_size);
117 /* Unmap, update size, remap */
120 tdb->map_size = st.st_size;
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
128 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129 uint64_t i, *p = (uint64_t *)buf;
130 for (i = 0; i < size / 8; i++)
131 p[i] = bswap_64(p[i]);
136 /* Return first non-zero offset in offset array, or end, or -ve error. */
137 /* FIXME: Return the off? */
138 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
139 tdb_off_t base, uint64_t start, uint64_t end)
144 /* Zero vs non-zero is the same unconverted: minor optimization. */
145 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
146 (end - start) * sizeof(tdb_off_t), false);
147 if (TDB_PTR_IS_ERR(val)) {
148 return TDB_PTR_ERR(val);
151 for (i = 0; i < (end - start); i++) {
155 tdb_access_release(tdb, val);
159 /* Return first zero offset in num offset array, or num, or -ve error. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
166 /* Zero vs non-zero is the same unconverted: minor optimization. */
167 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168 if (TDB_PTR_IS_ERR(val)) {
169 return TDB_PTR_ERR(val);
172 for (i = 0; i < num; i++) {
176 tdb_access_release(tdb, val);
180 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
182 char buf[8192] = { 0 };
183 void *p = tdb->methods->direct(tdb, off, len, true);
184 enum TDB_ERROR ecode = TDB_SUCCESS;
186 assert(!tdb->read_only);
187 if (TDB_PTR_IS_ERR(p)) {
188 return TDB_PTR_ERR(p);
195 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
196 ecode = tdb->methods->twrite(tdb, off, buf, todo);
197 if (ecode != TDB_SUCCESS) {
206 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
209 enum TDB_ERROR ecode;
211 if (likely(!(tdb->flags & TDB_CONVERT))) {
212 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
214 if (TDB_PTR_IS_ERR(p)) {
215 return TDB_PTR_ERR(p);
221 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
222 if (ecode != TDB_SUCCESS) {
228 /* write a lump of data at a specified offset */
229 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
230 const void *buf, tdb_len_t len)
232 enum TDB_ERROR ecode;
234 if (tdb->read_only) {
235 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
236 "Write to read-only database");
239 /* FIXME: Bogus optimization? */
244 ecode = tdb->methods->oob(tdb, off + len, 0);
245 if (ecode != TDB_SUCCESS) {
250 memcpy(off + (char *)tdb->map_ptr, buf, len);
253 ret = pwrite(tdb->fd, buf, len, off);
255 /* This shouldn't happen: we avoid sparse files. */
259 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
260 "tdb_write: %zi at %zu len=%zu (%s)",
261 ret, (size_t)off, (size_t)len,
268 /* read a lump of data at a specified offset */
269 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
270 void *buf, tdb_len_t len)
272 enum TDB_ERROR ecode;
274 ecode = tdb->methods->oob(tdb, off + len, 0);
275 if (ecode != TDB_SUCCESS) {
280 memcpy(buf, off + (char *)tdb->map_ptr, len);
282 ssize_t r = pread(tdb->fd, buf, len, off);
284 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
285 "tdb_read failed with %zi at %zu "
286 "len=%zu (%s) map_size=%zu",
287 r, (size_t)off, (size_t)len,
289 (size_t)tdb->map_size);
295 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
296 const void *rec, size_t len)
298 enum TDB_ERROR ecode;
300 if (unlikely((tdb->flags & TDB_CONVERT))) {
301 void *conv = malloc(len);
303 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
304 "tdb_write: no memory converting"
307 memcpy(conv, rec, len);
308 ecode = tdb->methods->twrite(tdb, off,
309 tdb_convert(tdb, conv, len), len);
312 ecode = tdb->methods->twrite(tdb, off, rec, len);
317 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
318 void *rec, size_t len)
320 enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
321 tdb_convert(tdb, rec, len);
325 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
326 tdb_off_t off, tdb_off_t val)
328 if (tdb->read_only) {
329 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
330 "Write to read-only database");
333 if (likely(!(tdb->flags & TDB_CONVERT))) {
334 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
336 if (TDB_PTR_IS_ERR(p)) {
337 return TDB_PTR_ERR(p);
344 return tdb_write_convert(tdb, off, &val, sizeof(val));
347 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
348 tdb_len_t len, unsigned int prefix)
351 enum TDB_ERROR ecode;
353 /* some systems don't like zero length malloc */
354 buf = malloc(prefix + len ? prefix + len : 1);
356 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
357 "tdb_alloc_read malloc failed len=%zu",
358 (size_t)(prefix + len));
359 return TDB_ERR_PTR(TDB_ERR_OOM);
361 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
362 if (unlikely(ecode != TDB_SUCCESS)) {
364 return TDB_ERR_PTR(ecode);
370 /* read a lump of data, allocating the space for it */
371 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
373 return _tdb_alloc_read(tdb, offset, len, 0);
376 static enum TDB_ERROR fill(struct tdb_context *tdb,
377 const void *buf, size_t size,
378 tdb_off_t off, tdb_len_t len)
381 size_t n = len > size ? size : len;
382 ssize_t ret = pwrite(tdb->fd, buf, n, off);
387 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
389 " %zi at %zu len=%zu (%s)",
390 ret, (size_t)off, (size_t)len,
399 /* expand a file. we prefer to use ftruncate, as that is what posix
400 says to use for mmap expansion */
401 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
405 enum TDB_ERROR ecode;
407 if (tdb->read_only) {
408 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
409 "Expand on read-only database");
412 if (tdb->flags & TDB_INTERNAL) {
413 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
415 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
416 "No memory to expand database");
419 tdb->map_size += addition;
421 /* Unmap before trying to write; old TDB claimed OpenBSD had
422 * problem with this otherwise. */
425 /* If this fails, we try to fill anyway. */
426 if (ftruncate(tdb->fd, tdb->map_size + addition))
429 /* now fill the file with something. This ensures that the
430 file isn't sparse, which would be very bad if we ran out of
431 disk. This must be done with write, not via mmap */
432 memset(buf, 0x43, sizeof(buf));
433 ecode = fill(tdb, buf, sizeof(buf), tdb->map_size, addition);
434 if (ecode != TDB_SUCCESS)
436 tdb->map_size += addition;
442 const void *tdb_access_read(struct tdb_context *tdb,
443 tdb_off_t off, tdb_len_t len, bool convert)
445 const void *ret = NULL;
447 if (likely(!(tdb->flags & TDB_CONVERT))) {
448 ret = tdb->methods->direct(tdb, off, len, false);
450 if (TDB_PTR_IS_ERR(ret)) {
455 struct tdb_access_hdr *hdr;
456 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
457 if (TDB_PTR_IS_ERR(hdr)) {
460 hdr->next = tdb->access;
464 tdb_convert(tdb, (void *)ret, len);
467 tdb->direct_access++;
472 void *tdb_access_write(struct tdb_context *tdb,
473 tdb_off_t off, tdb_len_t len, bool convert)
477 if (tdb->read_only) {
478 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
479 "Write to read-only database");
480 return TDB_ERR_PTR(TDB_ERR_RDONLY);
483 if (likely(!(tdb->flags & TDB_CONVERT))) {
484 ret = tdb->methods->direct(tdb, off, len, true);
486 if (TDB_PTR_IS_ERR(ret)) {
492 struct tdb_access_hdr *hdr;
493 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
494 if (TDB_PTR_IS_ERR(hdr)) {
497 hdr->next = tdb->access;
501 hdr->convert = convert;
504 tdb_convert(tdb, (void *)ret, len);
506 tdb->direct_access++;
511 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
513 struct tdb_access_hdr **hp;
515 for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
522 void tdb_access_release(struct tdb_context *tdb, const void *p)
524 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
531 tdb->direct_access--;
534 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
536 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
537 enum TDB_ERROR ecode;
542 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
544 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
548 tdb->direct_access--;
555 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
558 enum TDB_ERROR ecode;
560 if (unlikely(!tdb->map_ptr))
563 ecode = tdb_oob(tdb, off + len, true);
564 if (unlikely(ecode != TDB_SUCCESS))
565 return TDB_ERR_PTR(ecode);
566 return (char *)tdb->map_ptr + off;
569 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
571 if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
575 static const struct tdb_methods io_methods = {
584 initialise the default methods table
586 void tdb_io_init(struct tdb_context *tdb)
588 tdb->methods = &io_methods;