2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
38 munmap(file->map_ptr, file->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 /* size_t can be smaller than off_t. */
52 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
53 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
55 MAP_SHARED, tdb->file->fd, 0);
57 tdb->file->map_ptr = MAP_FAILED;
60 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
62 if (tdb->file->map_ptr == MAP_FAILED) {
63 tdb->file->map_ptr = NULL;
64 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
65 "tdb_mmap failed for size %lld (%s)",
66 (long long)tdb->file->map_size, strerror(errno));
70 /* check for an out of bounds access - if it is out of bounds then
71 see if the database has been expanded by someone else and expand
73 note that "len" is the minimum length needed for the db
75 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
81 /* We can't hold pointers during this: we could unmap! */
82 assert(!tdb->direct_access
83 || (tdb->flags & TDB_NOLOCK)
84 || tdb_has_expansion_lock(tdb));
86 if (len <= tdb->file->map_size)
88 if (tdb->flags & TDB_INTERNAL) {
90 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
91 "tdb_oob len %lld beyond internal"
94 (long long)tdb->file->map_size);
99 ecode = tdb_lock_expand(tdb, F_RDLCK);
100 if (ecode != TDB_SUCCESS) {
104 if (fstat(tdb->file->fd, &st) != 0) {
105 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
106 "Failed to fstat file: %s", strerror(errno));
107 tdb_unlock_expand(tdb, F_RDLCK);
111 tdb_unlock_expand(tdb, F_RDLCK);
113 if (st.st_size < (size_t)len) {
115 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
116 "tdb_oob len %zu beyond eof at %zu",
117 (size_t)len, st.st_size);
122 /* Unmap, update size, remap */
123 tdb_munmap(tdb->file);
125 tdb->file->map_size = st.st_size;
130 /* Endian conversion: we only ever deal with 8 byte quantities */
131 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
133 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
134 uint64_t i, *p = (uint64_t *)buf;
135 for (i = 0; i < size / 8; i++)
136 p[i] = bswap_64(p[i]);
141 /* Return first non-zero offset in offset array, or end, or -ve error. */
142 /* FIXME: Return the off? */
143 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
144 tdb_off_t base, uint64_t start, uint64_t end)
149 /* Zero vs non-zero is the same unconverted: minor optimization. */
150 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
151 (end - start) * sizeof(tdb_off_t), false);
152 if (TDB_PTR_IS_ERR(val)) {
153 return TDB_PTR_ERR(val);
156 for (i = 0; i < (end - start); i++) {
160 tdb_access_release(tdb, val);
164 /* Return first zero offset in num offset array, or num, or -ve error. */
165 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
171 /* Zero vs non-zero is the same unconverted: minor optimization. */
172 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
173 if (TDB_PTR_IS_ERR(val)) {
174 return TDB_PTR_ERR(val);
177 for (i = 0; i < num; i++) {
181 tdb_access_release(tdb, val);
185 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
187 char buf[8192] = { 0 };
188 void *p = tdb->methods->direct(tdb, off, len, true);
189 enum TDB_ERROR ecode = TDB_SUCCESS;
191 assert(!tdb->read_only);
192 if (TDB_PTR_IS_ERR(p)) {
193 return TDB_PTR_ERR(p);
200 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
201 ecode = tdb->methods->twrite(tdb, off, buf, todo);
202 if (ecode != TDB_SUCCESS) {
211 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
214 enum TDB_ERROR ecode;
216 if (likely(!(tdb->flags & TDB_CONVERT))) {
217 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
219 if (TDB_PTR_IS_ERR(p)) {
220 return TDB_PTR_ERR(p);
226 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
227 if (ecode != TDB_SUCCESS) {
233 /* write a lump of data at a specified offset */
234 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
235 const void *buf, tdb_len_t len)
237 enum TDB_ERROR ecode;
239 if (tdb->read_only) {
240 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
241 "Write to read-only database");
244 ecode = tdb->methods->oob(tdb, off + len, 0);
245 if (ecode != TDB_SUCCESS) {
249 if (tdb->file->map_ptr) {
250 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
253 ret = pwrite(tdb->file->fd, buf, len, off);
255 /* This shouldn't happen: we avoid sparse files. */
259 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
260 "tdb_write: %zi at %zu len=%zu (%s)",
261 ret, (size_t)off, (size_t)len,
268 /* read a lump of data at a specified offset */
269 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
270 void *buf, tdb_len_t len)
272 enum TDB_ERROR ecode;
274 ecode = tdb->methods->oob(tdb, off + len, 0);
275 if (ecode != TDB_SUCCESS) {
279 if (tdb->file->map_ptr) {
280 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
282 ssize_t r = pread(tdb->file->fd, buf, len, off);
284 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
285 "tdb_read failed with %zi at %zu "
286 "len=%zu (%s) map_size=%zu",
287 r, (size_t)off, (size_t)len,
289 (size_t)tdb->file->map_size);
295 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
296 const void *rec, size_t len)
298 enum TDB_ERROR ecode;
300 if (unlikely((tdb->flags & TDB_CONVERT))) {
301 void *conv = malloc(len);
303 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
304 "tdb_write: no memory converting"
307 memcpy(conv, rec, len);
308 ecode = tdb->methods->twrite(tdb, off,
309 tdb_convert(tdb, conv, len), len);
312 ecode = tdb->methods->twrite(tdb, off, rec, len);
317 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
318 void *rec, size_t len)
320 enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
321 tdb_convert(tdb, rec, len);
325 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
326 tdb_off_t off, tdb_off_t val)
328 if (tdb->read_only) {
329 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
330 "Write to read-only database");
333 if (likely(!(tdb->flags & TDB_CONVERT))) {
334 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
336 if (TDB_PTR_IS_ERR(p)) {
337 return TDB_PTR_ERR(p);
344 return tdb_write_convert(tdb, off, &val, sizeof(val));
347 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
348 tdb_len_t len, unsigned int prefix)
351 enum TDB_ERROR ecode;
353 /* some systems don't like zero length malloc */
354 buf = malloc(prefix + len ? prefix + len : 1);
356 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
357 "tdb_alloc_read malloc failed len=%zu",
358 (size_t)(prefix + len));
359 return TDB_ERR_PTR(TDB_ERR_OOM);
361 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
362 if (unlikely(ecode != TDB_SUCCESS)) {
364 return TDB_ERR_PTR(ecode);
370 /* read a lump of data, allocating the space for it */
371 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
373 return _tdb_alloc_read(tdb, offset, len, 0);
376 static enum TDB_ERROR fill(struct tdb_context *tdb,
377 const void *buf, size_t size,
378 tdb_off_t off, tdb_len_t len)
381 size_t n = len > size ? size : len;
382 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
387 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
389 " %zi at %zu len=%zu (%s)",
390 ret, (size_t)off, (size_t)len,
399 /* expand a file. we prefer to use ftruncate, as that is what posix
400 says to use for mmap expansion */
401 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
405 enum TDB_ERROR ecode;
407 if (tdb->read_only) {
408 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
409 "Expand on read-only database");
412 if (tdb->flags & TDB_INTERNAL) {
413 char *new = realloc(tdb->file->map_ptr,
414 tdb->file->map_size + addition);
416 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
417 "No memory to expand database");
419 tdb->file->map_ptr = new;
420 tdb->file->map_size += addition;
422 /* Unmap before trying to write; old TDB claimed OpenBSD had
423 * problem with this otherwise. */
424 tdb_munmap(tdb->file);
426 /* If this fails, we try to fill anyway. */
427 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
430 /* now fill the file with something. This ensures that the
431 file isn't sparse, which would be very bad if we ran out of
432 disk. This must be done with write, not via mmap */
433 memset(buf, 0x43, sizeof(buf));
434 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
436 if (ecode != TDB_SUCCESS)
438 tdb->file->map_size += addition;
444 const void *tdb_access_read(struct tdb_context *tdb,
445 tdb_off_t off, tdb_len_t len, bool convert)
449 if (likely(!(tdb->flags & TDB_CONVERT))) {
450 ret = tdb->methods->direct(tdb, off, len, false);
452 if (TDB_PTR_IS_ERR(ret)) {
457 struct tdb_access_hdr *hdr;
458 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
459 if (TDB_PTR_IS_ERR(hdr)) {
462 hdr->next = tdb->access;
466 tdb_convert(tdb, (void *)ret, len);
469 tdb->direct_access++;
474 void *tdb_access_write(struct tdb_context *tdb,
475 tdb_off_t off, tdb_len_t len, bool convert)
479 if (tdb->read_only) {
480 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
481 "Write to read-only database");
482 return TDB_ERR_PTR(TDB_ERR_RDONLY);
485 if (likely(!(tdb->flags & TDB_CONVERT))) {
486 ret = tdb->methods->direct(tdb, off, len, true);
488 if (TDB_PTR_IS_ERR(ret)) {
494 struct tdb_access_hdr *hdr;
495 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
496 if (TDB_PTR_IS_ERR(hdr)) {
499 hdr->next = tdb->access;
503 hdr->convert = convert;
506 tdb_convert(tdb, (void *)ret, len);
508 tdb->direct_access++;
513 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
515 struct tdb_access_hdr **hp;
517 for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
524 void tdb_access_release(struct tdb_context *tdb, const void *p)
526 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
533 tdb->direct_access--;
536 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
538 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
539 enum TDB_ERROR ecode;
544 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
546 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
550 tdb->direct_access--;
557 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
560 enum TDB_ERROR ecode;
562 if (unlikely(!tdb->file->map_ptr))
565 ecode = tdb_oob(tdb, off + len, true);
566 if (unlikely(ecode != TDB_SUCCESS))
567 return TDB_ERR_PTR(ecode);
568 return (char *)tdb->file->map_ptr + off;
571 void tdb_inc_seqnum(struct tdb_context *tdb)
575 if (likely(!(tdb->flags & TDB_CONVERT))) {
578 direct = tdb->methods->direct(tdb,
579 offsetof(struct tdb_header,
581 sizeof(*direct), true);
582 if (likely(direct)) {
583 /* Don't let it go negative, even briefly */
584 if (unlikely((*direct) + 1) < 0)
591 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
592 if (!TDB_OFF_IS_ERR(seq)) {
594 if (unlikely((int64_t)seq < 0))
596 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
600 static const struct tdb_methods io_methods = {
609 initialise the default methods table
611 void tdb_io_init(struct tdb_context *tdb)
613 tdb->methods = &io_methods;