2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
38 munmap(file->map_ptr, file->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
47 if (tdb->flags & TDB_INTERNAL)
50 if (tdb->flags & TDB_NOMMAP)
53 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
54 mmap_flags = PROT_READ;
56 mmap_flags = PROT_READ | PROT_WRITE;
58 /* size_t can be smaller than off_t. */
59 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
60 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
62 MAP_SHARED, tdb->file->fd, 0);
64 tdb->file->map_ptr = MAP_FAILED;
67 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
69 if (tdb->file->map_ptr == MAP_FAILED) {
70 tdb->file->map_ptr = NULL;
71 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
72 "tdb_mmap failed for size %lld (%s)",
73 (long long)tdb->file->map_size, strerror(errno));
77 /* check for an out of bounds access - if it is out of bounds then
78 see if the database has been expanded by someone else and expand
80 note that "len" is the minimum length needed for the db.
82 If probe is true, len being too large isn't a failure.
84 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
90 /* We can't hold pointers during this: we could unmap! */
91 assert(!tdb->tdb2.direct_access
92 || (tdb->flags & TDB_NOLOCK)
93 || tdb_has_expansion_lock(tdb));
95 if (len <= tdb->file->map_size)
97 if (tdb->flags & TDB_INTERNAL) {
101 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
102 "tdb_oob len %lld beyond internal"
105 (long long)tdb->file->map_size);
109 ecode = tdb_lock_expand(tdb, F_RDLCK);
110 if (ecode != TDB_SUCCESS) {
114 if (fstat(tdb->file->fd, &st) != 0) {
115 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
116 "Failed to fstat file: %s", strerror(errno));
117 tdb_unlock_expand(tdb, F_RDLCK);
121 tdb_unlock_expand(tdb, F_RDLCK);
123 if (st.st_size < (size_t)len) {
127 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
128 "tdb_oob len %zu beyond eof at %zu",
129 (size_t)len, st.st_size);
133 /* Unmap, update size, remap */
134 tdb_munmap(tdb->file);
136 tdb->file->map_size = st.st_size;
141 /* Endian conversion: we only ever deal with 8 byte quantities */
142 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
144 assert(size % 8 == 0);
145 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
146 uint64_t i, *p = (uint64_t *)buf;
147 for (i = 0; i < size / 8; i++)
148 p[i] = bswap_64(p[i]);
153 /* Return first non-zero offset in offset array, or end, or -ve error. */
154 /* FIXME: Return the off? */
155 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
156 tdb_off_t base, uint64_t start, uint64_t end)
161 /* Zero vs non-zero is the same unconverted: minor optimization. */
162 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
163 (end - start) * sizeof(tdb_off_t), false);
164 if (TDB_PTR_IS_ERR(val)) {
165 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
168 for (i = 0; i < (end - start); i++) {
172 tdb_access_release(tdb, val);
176 /* Return first zero offset in num offset array, or num, or -ve error. */
177 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
183 /* Zero vs non-zero is the same unconverted: minor optimization. */
184 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
185 if (TDB_PTR_IS_ERR(val)) {
186 return TDB_ERR_TO_OFF(TDB_PTR_ERR(val));
189 for (i = 0; i < num; i++) {
193 tdb_access_release(tdb, val);
197 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
199 char buf[8192] = { 0 };
200 void *p = tdb->tdb2.io->direct(tdb, off, len, true);
201 enum TDB_ERROR ecode = TDB_SUCCESS;
203 assert(!(tdb->flags & TDB_RDONLY));
204 if (TDB_PTR_IS_ERR(p)) {
205 return TDB_PTR_ERR(p);
212 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
213 ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo);
214 if (ecode != TDB_SUCCESS) {
223 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
226 enum TDB_ERROR ecode;
228 if (likely(!(tdb->flags & TDB_CONVERT))) {
229 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
231 if (TDB_PTR_IS_ERR(p)) {
232 return TDB_ERR_TO_OFF(TDB_PTR_ERR(p));
238 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
239 if (ecode != TDB_SUCCESS) {
240 return TDB_ERR_TO_OFF(ecode);
245 /* write a lump of data at a specified offset */
246 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
247 const void *buf, tdb_len_t len)
249 enum TDB_ERROR ecode;
251 if (tdb->flags & TDB_RDONLY) {
252 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
253 "Write to read-only database");
256 ecode = tdb->tdb2.io->oob(tdb, off + len, false);
257 if (ecode != TDB_SUCCESS) {
261 if (tdb->file->map_ptr) {
262 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
265 ret = pwrite(tdb->file->fd, buf, len, off);
267 /* This shouldn't happen: we avoid sparse files. */
271 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
272 "tdb_write: %zi at %zu len=%zu (%s)",
273 ret, (size_t)off, (size_t)len,
280 /* read a lump of data at a specified offset */
281 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
282 void *buf, tdb_len_t len)
284 enum TDB_ERROR ecode;
286 ecode = tdb->tdb2.io->oob(tdb, off + len, false);
287 if (ecode != TDB_SUCCESS) {
291 if (tdb->file->map_ptr) {
292 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
294 ssize_t r = pread(tdb->file->fd, buf, len, off);
296 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
297 "tdb_read failed with %zi at %zu "
298 "len=%zu (%s) map_size=%zu",
299 r, (size_t)off, (size_t)len,
301 (size_t)tdb->file->map_size);
307 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
308 const void *rec, size_t len)
310 enum TDB_ERROR ecode;
312 if (unlikely((tdb->flags & TDB_CONVERT))) {
313 void *conv = malloc(len);
315 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
316 "tdb_write: no memory converting"
319 memcpy(conv, rec, len);
320 ecode = tdb->tdb2.io->twrite(tdb, off,
321 tdb_convert(tdb, conv, len), len);
324 ecode = tdb->tdb2.io->twrite(tdb, off, rec, len);
329 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
330 void *rec, size_t len)
332 enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len);
333 tdb_convert(tdb, rec, len);
337 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
338 tdb_off_t off, tdb_off_t val)
340 if (tdb->flags & TDB_RDONLY) {
341 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
342 "Write to read-only database");
345 if (likely(!(tdb->flags & TDB_CONVERT))) {
346 tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p),
348 if (TDB_PTR_IS_ERR(p)) {
349 return TDB_PTR_ERR(p);
356 return tdb_write_convert(tdb, off, &val, sizeof(val));
359 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
360 tdb_len_t len, unsigned int prefix)
363 enum TDB_ERROR ecode;
365 /* some systems don't like zero length malloc */
366 buf = malloc(prefix + len ? prefix + len : 1);
368 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
369 "tdb_alloc_read malloc failed len=%zu",
370 (size_t)(prefix + len));
371 return TDB_ERR_PTR(TDB_ERR_OOM);
373 ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len);
374 if (unlikely(ecode != TDB_SUCCESS)) {
376 return TDB_ERR_PTR(ecode);
382 /* read a lump of data, allocating the space for it */
383 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
385 return _tdb_alloc_read(tdb, offset, len, 0);
388 static enum TDB_ERROR fill(struct tdb_context *tdb,
389 const void *buf, size_t size,
390 tdb_off_t off, tdb_len_t len)
393 size_t n = len > size ? size : len;
394 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
399 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
401 " %zi at %zu len=%zu (%s)",
402 ret, (size_t)off, (size_t)len,
411 /* expand a file. we prefer to use ftruncate, as that is what posix
412 says to use for mmap expansion */
413 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
417 enum TDB_ERROR ecode;
419 if (tdb->flags & TDB_RDONLY) {
420 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
421 "Expand on read-only database");
424 if (tdb->flags & TDB_INTERNAL) {
425 char *new = realloc(tdb->file->map_ptr,
426 tdb->file->map_size + addition);
428 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
429 "No memory to expand database");
431 tdb->file->map_ptr = new;
432 tdb->file->map_size += addition;
434 /* Unmap before trying to write; old TDB claimed OpenBSD had
435 * problem with this otherwise. */
436 tdb_munmap(tdb->file);
438 /* If this fails, we try to fill anyway. */
439 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
442 /* now fill the file with something. This ensures that the
443 file isn't sparse, which would be very bad if we ran out of
444 disk. This must be done with write, not via mmap */
445 memset(buf, 0x43, sizeof(buf));
446 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
448 if (ecode != TDB_SUCCESS)
450 tdb->file->map_size += addition;
456 const void *tdb_access_read(struct tdb_context *tdb,
457 tdb_off_t off, tdb_len_t len, bool convert)
461 if (likely(!(tdb->flags & TDB_CONVERT))) {
462 ret = tdb->tdb2.io->direct(tdb, off, len, false);
464 if (TDB_PTR_IS_ERR(ret)) {
469 struct tdb_access_hdr *hdr;
470 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
471 if (TDB_PTR_IS_ERR(hdr)) {
474 hdr->next = tdb->tdb2.access;
475 tdb->tdb2.access = hdr;
478 tdb_convert(tdb, (void *)ret, len);
481 tdb->tdb2.direct_access++;
486 void *tdb_access_write(struct tdb_context *tdb,
487 tdb_off_t off, tdb_len_t len, bool convert)
491 if (tdb->flags & TDB_RDONLY) {
492 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
493 "Write to read-only database");
494 return TDB_ERR_PTR(TDB_ERR_RDONLY);
497 if (likely(!(tdb->flags & TDB_CONVERT))) {
498 ret = tdb->tdb2.io->direct(tdb, off, len, true);
500 if (TDB_PTR_IS_ERR(ret)) {
506 struct tdb_access_hdr *hdr;
507 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
508 if (TDB_PTR_IS_ERR(hdr)) {
511 hdr->next = tdb->tdb2.access;
512 tdb->tdb2.access = hdr;
515 hdr->convert = convert;
518 tdb_convert(tdb, (void *)ret, len);
520 tdb->tdb2.direct_access++;
525 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
527 struct tdb_access_hdr **hp;
529 for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) {
536 void tdb_access_release(struct tdb_context *tdb, const void *p)
538 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
545 tdb->tdb2.direct_access--;
548 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
550 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
551 enum TDB_ERROR ecode;
556 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
558 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
562 tdb->tdb2.direct_access--;
569 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
572 enum TDB_ERROR ecode;
574 if (unlikely(!tdb->file->map_ptr))
577 ecode = tdb_oob(tdb, off + len, false);
578 if (unlikely(ecode != TDB_SUCCESS))
579 return TDB_ERR_PTR(ecode);
580 return (char *)tdb->file->map_ptr + off;
583 void tdb_inc_seqnum(struct tdb_context *tdb)
587 if (tdb->flags & TDB_VERSION1) {
588 tdb1_increment_seqnum_nonblock(tdb);
592 if (likely(!(tdb->flags & TDB_CONVERT))) {
595 direct = tdb->tdb2.io->direct(tdb,
596 offsetof(struct tdb_header,
598 sizeof(*direct), true);
599 if (likely(direct)) {
600 /* Don't let it go negative, even briefly */
601 if (unlikely((*direct) + 1) < 0)
608 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
609 if (!TDB_OFF_IS_ERR(seq)) {
611 if (unlikely((int64_t)seq < 0))
613 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
617 static const struct tdb_methods io_methods = {
626 initialise the default methods table
628 void tdb_io_init(struct tdb_context *tdb)
630 tdb->tdb2.io = &io_methods;