2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
38 munmap(file->map_ptr, file->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
47 if (tdb->flags & TDB_INTERNAL)
50 if (tdb->flags & TDB_NOMMAP)
53 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
54 mmap_flags = PROT_READ;
56 mmap_flags = PROT_READ | PROT_WRITE;
58 /* size_t can be smaller than off_t. */
59 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
60 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
62 MAP_SHARED, tdb->file->fd, 0);
64 tdb->file->map_ptr = MAP_FAILED;
67 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
69 if (tdb->file->map_ptr == MAP_FAILED) {
70 tdb->file->map_ptr = NULL;
71 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
72 "tdb_mmap failed for size %lld (%s)",
73 (long long)tdb->file->map_size, strerror(errno));
77 /* check for an out of bounds access - if it is out of bounds then
78 see if the database has been expanded by someone else and expand
80 note that "len" is the minimum length needed for the db.
82 If probe is true, len being too large isn't a failure.
84 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb,
85 tdb_off_t off, tdb_len_t len, bool probe)
90 /* We can't hold pointers during this: we could unmap! */
91 assert(!tdb->tdb2.direct_access
92 || (tdb->flags & TDB_NOLOCK)
93 || tdb_has_expansion_lock(tdb));
95 if (len + off < len) {
99 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
100 "tdb_oob off %llu len %llu wrap\n",
101 (long long)off, (long long)len);
104 if (len + off <= tdb->file->map_size)
106 if (tdb->flags & TDB_INTERNAL) {
110 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111 "tdb_oob len %lld beyond internal"
113 (long long)(off + len),
114 (long long)tdb->file->map_size);
118 ecode = tdb_lock_expand(tdb, F_RDLCK);
119 if (ecode != TDB_SUCCESS) {
123 if (fstat(tdb->file->fd, &st) != 0) {
124 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
125 "Failed to fstat file: %s", strerror(errno));
126 tdb_unlock_expand(tdb, F_RDLCK);
130 tdb_unlock_expand(tdb, F_RDLCK);
132 if (st.st_size < off + len) {
136 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
137 "tdb_oob len %llu beyond eof at %zu",
138 (long long)(off + len), st.st_size);
142 /* Unmap, update size, remap */
143 tdb_munmap(tdb->file);
145 tdb->file->map_size = st.st_size;
150 /* Endian conversion: we only ever deal with 8 byte quantities */
151 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
153 assert(size % 8 == 0);
154 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
155 uint64_t i, *p = (uint64_t *)buf;
156 for (i = 0; i < size / 8; i++)
157 p[i] = bswap_64(p[i]);
162 /* Return first non-zero offset in offset array, or end, or -ve error. */
163 /* FIXME: Return the off? */
164 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
165 tdb_off_t base, uint64_t start, uint64_t end)
170 /* Zero vs non-zero is the same unconverted: minor optimization. */
171 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
172 (end - start) * sizeof(tdb_off_t), false);
173 if (TDB_PTR_IS_ERR(val)) {
174 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
177 for (i = 0; i < (end - start); i++) {
181 tdb_access_release(tdb, val);
185 /* Return first zero offset in num offset array, or num, or -ve error. */
186 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
192 /* Zero vs non-zero is the same unconverted: minor optimization. */
193 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
194 if (TDB_PTR_IS_ERR(val)) {
195 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
198 for (i = 0; i < num; i++) {
202 tdb_access_release(tdb, val);
206 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
208 char buf[8192] = { 0 };
209 void *p = tdb->tdb2.io->direct(tdb, off, len, true);
210 enum TDB_ERROR ecode = TDB_SUCCESS;
212 assert(!(tdb->flags & TDB_RDONLY));
213 if (TDB_PTR_IS_ERR(p)) {
214 return TDB_PTR_ERR(p);
221 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
222 ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo);
223 if (ecode != TDB_SUCCESS) {
232 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
235 enum TDB_ERROR ecode;
237 if (likely(!(tdb->flags & TDB_CONVERT))) {
238 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
240 if (TDB_PTR_IS_ERR(p)) {
241 return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
247 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
248 if (ecode != TDB_SUCCESS) {
249 return TDB_ERR_TO_OFF(ecode);
254 /* write a lump of data at a specified offset */
255 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
256 const void *buf, tdb_len_t len)
258 enum TDB_ERROR ecode;
260 if (tdb->flags & TDB_RDONLY) {
261 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
262 "Write to read-only database");
265 ecode = tdb->tdb2.io->oob(tdb, off, len, false);
266 if (ecode != TDB_SUCCESS) {
270 if (tdb->file->map_ptr) {
271 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
274 ret = pwrite(tdb->file->fd, buf, len, off);
276 /* This shouldn't happen: we avoid sparse files. */
280 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
281 "tdb_write: %zi at %zu len=%zu (%s)",
282 ret, (size_t)off, (size_t)len,
289 /* read a lump of data at a specified offset */
290 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
291 void *buf, tdb_len_t len)
293 enum TDB_ERROR ecode;
295 ecode = tdb->tdb2.io->oob(tdb, off, len, false);
296 if (ecode != TDB_SUCCESS) {
300 if (tdb->file->map_ptr) {
301 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
303 ssize_t r = pread(tdb->file->fd, buf, len, off);
305 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
306 "tdb_read failed with %zi at %zu "
307 "len=%zu (%s) map_size=%zu",
308 r, (size_t)off, (size_t)len,
310 (size_t)tdb->file->map_size);
316 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
317 const void *rec, size_t len)
319 enum TDB_ERROR ecode;
321 if (unlikely((tdb->flags & TDB_CONVERT))) {
322 void *conv = malloc(len);
324 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
325 "tdb_write: no memory converting"
328 memcpy(conv, rec, len);
329 ecode = tdb->tdb2.io->twrite(tdb, off,
330 tdb_convert(tdb, conv, len), len);
333 ecode = tdb->tdb2.io->twrite(tdb, off, rec, len);
338 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
339 void *rec, size_t len)
341 enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len);
342 tdb_convert(tdb, rec, len);
346 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
347 tdb_off_t off, tdb_off_t val)
349 if (tdb->flags & TDB_RDONLY) {
350 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
351 "Write to read-only database");
354 if (likely(!(tdb->flags & TDB_CONVERT))) {
355 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
357 if (TDB_PTR_IS_ERR(p)) {
358 return TDB_PTR_ERR(p);
365 return tdb_write_convert(tdb, off, &val, sizeof(val));
368 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
369 tdb_len_t len, unsigned int prefix)
372 enum TDB_ERROR ecode;
374 /* some systems don't like zero length malloc */
375 buf = malloc(prefix + len ? prefix + len : 1);
377 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
378 "tdb_alloc_read malloc failed len=%zu",
379 (size_t)(prefix + len));
380 return TDB_ERR_PTR(TDB_ERR_OOM);
382 ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len);
383 if (unlikely(ecode != TDB_SUCCESS)) {
385 return TDB_ERR_PTR(ecode);
391 /* read a lump of data, allocating the space for it */
392 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
394 return _tdb_alloc_read(tdb, offset, len, 0);
397 static enum TDB_ERROR fill(struct tdb_context *tdb,
398 const void *buf, size_t size,
399 tdb_off_t off, tdb_len_t len)
402 size_t n = len > size ? size : len;
403 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
408 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
410 " %zi at %zu len=%zu (%s)",
411 ret, (size_t)off, (size_t)len,
420 /* expand a file. we prefer to use ftruncate, as that is what posix
421 says to use for mmap expansion */
422 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
426 enum TDB_ERROR ecode;
428 if (tdb->flags & TDB_RDONLY) {
429 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
430 "Expand on read-only database");
433 if (tdb->flags & TDB_INTERNAL) {
434 char *new = realloc(tdb->file->map_ptr,
435 tdb->file->map_size + addition);
437 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
438 "No memory to expand database");
440 tdb->file->map_ptr = new;
441 tdb->file->map_size += addition;
443 /* Unmap before trying to write; old TDB claimed OpenBSD had
444 * problem with this otherwise. */
445 tdb_munmap(tdb->file);
447 /* If this fails, we try to fill anyway. */
448 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
451 /* now fill the file with something. This ensures that the
452 file isn't sparse, which would be very bad if we ran out of
453 disk. This must be done with write, not via mmap */
454 memset(buf, 0x43, sizeof(buf));
455 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
457 if (ecode != TDB_SUCCESS)
459 tdb->file->map_size += addition;
465 const void *tdb_access_read(struct tdb_context *tdb,
466 tdb_off_t off, tdb_len_t len, bool convert)
470 if (likely(!(tdb->flags & TDB_CONVERT))) {
471 ret = tdb->tdb2.io->direct(tdb, off, len, false);
473 if (TDB_PTR_IS_ERR(ret)) {
478 struct tdb_access_hdr *hdr;
479 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
480 if (TDB_PTR_IS_ERR(hdr)) {
483 hdr->next = tdb->tdb2.access;
484 tdb->tdb2.access = hdr;
487 tdb_convert(tdb, (void *)ret, len);
490 tdb->tdb2.direct_access++;
495 void *tdb_access_write(struct tdb_context *tdb,
496 tdb_off_t off, tdb_len_t len, bool convert)
500 if (tdb->flags & TDB_RDONLY) {
501 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
502 "Write to read-only database");
503 return TDB_ERR_PTR(TDB_ERR_RDONLY);
506 if (likely(!(tdb->flags & TDB_CONVERT))) {
507 ret = tdb->tdb2.io->direct(tdb, off, len, true);
509 if (TDB_PTR_IS_ERR(ret)) {
515 struct tdb_access_hdr *hdr;
516 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
517 if (TDB_PTR_IS_ERR(hdr)) {
520 hdr->next = tdb->tdb2.access;
521 tdb->tdb2.access = hdr;
524 hdr->convert = convert;
527 tdb_convert(tdb, (void *)ret, len);
529 tdb->tdb2.direct_access++;
534 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
536 struct tdb_access_hdr **hp;
538 for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) {
545 void tdb_access_release(struct tdb_context *tdb, const void *p)
547 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
554 tdb->tdb2.direct_access--;
557 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
559 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
560 enum TDB_ERROR ecode;
565 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
567 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
571 tdb->tdb2.direct_access--;
578 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
581 enum TDB_ERROR ecode;
583 if (unlikely(!tdb->file->map_ptr))
586 ecode = tdb_oob(tdb, off, len, false);
587 if (unlikely(ecode != TDB_SUCCESS))
588 return TDB_ERR_PTR(ecode);
589 return (char *)tdb->file->map_ptr + off;
592 void tdb_inc_seqnum(struct tdb_context *tdb)
596 if (tdb->flags & TDB_VERSION1) {
597 tdb1_increment_seqnum_nonblock(tdb);
601 if (likely(!(tdb->flags & TDB_CONVERT))) {
604 direct = tdb->tdb2.io->direct(tdb,
605 offsetof(struct tdb_header,
607 sizeof(*direct), true);
608 if (likely(direct)) {
609 /* Don't let it go negative, even briefly */
610 if (unlikely((*direct) + 1) < 0)
617 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
618 if (!TDB_OFF_IS_ERR(seq)) {
620 if (unlikely((int64_t)seq < 0))
622 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
626 static const struct tdb_methods io_methods = {
635 initialise the default methods table
637 void tdb_io_init(struct tdb_context *tdb)
639 tdb->tdb2.io = &io_methods;