2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the tdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
30 #include <ccan/likely/likely.h>
32 void tdb_munmap(struct tdb_file *file)
38 munmap(file->map_ptr, file->map_size);
43 void tdb_mmap(struct tdb_context *tdb)
45 if (tdb->flags & TDB_INTERNAL)
48 if (tdb->flags & TDB_NOMMAP)
51 /* size_t can be smaller than off_t. */
52 if ((size_t)tdb->file->map_size == tdb->file->map_size) {
53 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
55 MAP_SHARED, tdb->file->fd, 0);
57 tdb->file->map_ptr = MAP_FAILED;
60 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
62 if (tdb->file->map_ptr == MAP_FAILED) {
63 tdb->file->map_ptr = NULL;
64 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
65 "tdb_mmap failed for size %lld (%s)",
66 (long long)tdb->file->map_size, strerror(errno));
70 /* check for an out of bounds access - if it is out of bounds then
71 see if the database has been expanded by someone else and expand
73 note that "len" is the minimum length needed for the db.
75 If probe is true, len being too large isn't a failure.
77 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
83 /* We can't hold pointers during this: we could unmap! */
84 assert(!tdb->direct_access
85 || (tdb->flags & TDB_NOLOCK)
86 || tdb_has_expansion_lock(tdb));
88 if (len <= tdb->file->map_size)
90 if (tdb->flags & TDB_INTERNAL) {
94 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
95 "tdb_oob len %lld beyond internal"
98 (long long)tdb->file->map_size);
102 ecode = tdb_lock_expand(tdb, F_RDLCK);
103 if (ecode != TDB_SUCCESS) {
107 if (fstat(tdb->file->fd, &st) != 0) {
108 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
109 "Failed to fstat file: %s", strerror(errno));
110 tdb_unlock_expand(tdb, F_RDLCK);
114 tdb_unlock_expand(tdb, F_RDLCK);
116 if (st.st_size < (size_t)len) {
120 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
121 "tdb_oob len %zu beyond eof at %zu",
122 (size_t)len, st.st_size);
126 /* Unmap, update size, remap */
127 tdb_munmap(tdb->file);
129 tdb->file->map_size = st.st_size;
134 /* Endian conversion: we only ever deal with 8 byte quantities */
135 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
137 assert(size % 8 == 0);
138 if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
139 uint64_t i, *p = (uint64_t *)buf;
140 for (i = 0; i < size / 8; i++)
141 p[i] = bswap_64(p[i]);
146 /* Return first non-zero offset in offset array, or end, or -ve error. */
147 /* FIXME: Return the off? */
148 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
149 tdb_off_t base, uint64_t start, uint64_t end)
154 /* Zero vs non-zero is the same unconverted: minor optimization. */
155 val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
156 (end - start) * sizeof(tdb_off_t), false);
157 if (TDB_PTR_IS_ERR(val)) {
158 return TDB_PTR_ERR(val);
161 for (i = 0; i < (end - start); i++) {
165 tdb_access_release(tdb, val);
169 /* Return first zero offset in num offset array, or num, or -ve error. */
170 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
176 /* Zero vs non-zero is the same unconverted: minor optimization. */
177 val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
178 if (TDB_PTR_IS_ERR(val)) {
179 return TDB_PTR_ERR(val);
182 for (i = 0; i < num; i++) {
186 tdb_access_release(tdb, val);
190 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
192 char buf[8192] = { 0 };
193 void *p = tdb->methods->direct(tdb, off, len, true);
194 enum TDB_ERROR ecode = TDB_SUCCESS;
196 assert(!tdb->read_only);
197 if (TDB_PTR_IS_ERR(p)) {
198 return TDB_PTR_ERR(p);
205 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
206 ecode = tdb->methods->twrite(tdb, off, buf, todo);
207 if (ecode != TDB_SUCCESS) {
216 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
219 enum TDB_ERROR ecode;
221 if (likely(!(tdb->flags & TDB_CONVERT))) {
222 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
224 if (TDB_PTR_IS_ERR(p)) {
225 return TDB_PTR_ERR(p);
231 ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
232 if (ecode != TDB_SUCCESS) {
238 /* write a lump of data at a specified offset */
239 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
240 const void *buf, tdb_len_t len)
242 enum TDB_ERROR ecode;
244 if (tdb->read_only) {
245 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
246 "Write to read-only database");
249 ecode = tdb->methods->oob(tdb, off + len, false);
250 if (ecode != TDB_SUCCESS) {
254 if (tdb->file->map_ptr) {
255 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
258 ret = pwrite(tdb->file->fd, buf, len, off);
260 /* This shouldn't happen: we avoid sparse files. */
264 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
265 "tdb_write: %zi at %zu len=%zu (%s)",
266 ret, (size_t)off, (size_t)len,
273 /* read a lump of data at a specified offset */
274 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
275 void *buf, tdb_len_t len)
277 enum TDB_ERROR ecode;
279 ecode = tdb->methods->oob(tdb, off + len, false);
280 if (ecode != TDB_SUCCESS) {
284 if (tdb->file->map_ptr) {
285 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
287 ssize_t r = pread(tdb->file->fd, buf, len, off);
289 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
290 "tdb_read failed with %zi at %zu "
291 "len=%zu (%s) map_size=%zu",
292 r, (size_t)off, (size_t)len,
294 (size_t)tdb->file->map_size);
300 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
301 const void *rec, size_t len)
303 enum TDB_ERROR ecode;
305 if (unlikely((tdb->flags & TDB_CONVERT))) {
306 void *conv = malloc(len);
308 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
309 "tdb_write: no memory converting"
312 memcpy(conv, rec, len);
313 ecode = tdb->methods->twrite(tdb, off,
314 tdb_convert(tdb, conv, len), len);
317 ecode = tdb->methods->twrite(tdb, off, rec, len);
322 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
323 void *rec, size_t len)
325 enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
326 tdb_convert(tdb, rec, len);
330 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
331 tdb_off_t off, tdb_off_t val)
333 if (tdb->read_only) {
334 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
335 "Write to read-only database");
338 if (likely(!(tdb->flags & TDB_CONVERT))) {
339 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
341 if (TDB_PTR_IS_ERR(p)) {
342 return TDB_PTR_ERR(p);
349 return tdb_write_convert(tdb, off, &val, sizeof(val));
352 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
353 tdb_len_t len, unsigned int prefix)
356 enum TDB_ERROR ecode;
358 /* some systems don't like zero length malloc */
359 buf = malloc(prefix + len ? prefix + len : 1);
361 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
362 "tdb_alloc_read malloc failed len=%zu",
363 (size_t)(prefix + len));
364 return TDB_ERR_PTR(TDB_ERR_OOM);
366 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
367 if (unlikely(ecode != TDB_SUCCESS)) {
369 return TDB_ERR_PTR(ecode);
375 /* read a lump of data, allocating the space for it */
376 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
378 return _tdb_alloc_read(tdb, offset, len, 0);
381 static enum TDB_ERROR fill(struct tdb_context *tdb,
382 const void *buf, size_t size,
383 tdb_off_t off, tdb_len_t len)
386 size_t n = len > size ? size : len;
387 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
392 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
394 " %zi at %zu len=%zu (%s)",
395 ret, (size_t)off, (size_t)len,
404 /* expand a file. we prefer to use ftruncate, as that is what posix
405 says to use for mmap expansion */
406 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
410 enum TDB_ERROR ecode;
412 if (tdb->read_only) {
413 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
414 "Expand on read-only database");
417 if (tdb->flags & TDB_INTERNAL) {
418 char *new = realloc(tdb->file->map_ptr,
419 tdb->file->map_size + addition);
421 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
422 "No memory to expand database");
424 tdb->file->map_ptr = new;
425 tdb->file->map_size += addition;
427 /* Unmap before trying to write; old TDB claimed OpenBSD had
428 * problem with this otherwise. */
429 tdb_munmap(tdb->file);
431 /* If this fails, we try to fill anyway. */
432 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
435 /* now fill the file with something. This ensures that the
436 file isn't sparse, which would be very bad if we ran out of
437 disk. This must be done with write, not via mmap */
438 memset(buf, 0x43, sizeof(buf));
439 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
441 if (ecode != TDB_SUCCESS)
443 tdb->file->map_size += addition;
449 const void *tdb_access_read(struct tdb_context *tdb,
450 tdb_off_t off, tdb_len_t len, bool convert)
454 if (likely(!(tdb->flags & TDB_CONVERT))) {
455 ret = tdb->methods->direct(tdb, off, len, false);
457 if (TDB_PTR_IS_ERR(ret)) {
462 struct tdb_access_hdr *hdr;
463 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
464 if (TDB_PTR_IS_ERR(hdr)) {
467 hdr->next = tdb->access;
471 tdb_convert(tdb, (void *)ret, len);
474 tdb->direct_access++;
479 void *tdb_access_write(struct tdb_context *tdb,
480 tdb_off_t off, tdb_len_t len, bool convert)
484 if (tdb->read_only) {
485 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
486 "Write to read-only database");
487 return TDB_ERR_PTR(TDB_ERR_RDONLY);
490 if (likely(!(tdb->flags & TDB_CONVERT))) {
491 ret = tdb->methods->direct(tdb, off, len, true);
493 if (TDB_PTR_IS_ERR(ret)) {
499 struct tdb_access_hdr *hdr;
500 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
501 if (TDB_PTR_IS_ERR(hdr)) {
504 hdr->next = tdb->access;
508 hdr->convert = convert;
511 tdb_convert(tdb, (void *)ret, len);
513 tdb->direct_access++;
518 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
520 struct tdb_access_hdr **hp;
522 for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
529 void tdb_access_release(struct tdb_context *tdb, const void *p)
531 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
538 tdb->direct_access--;
541 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
543 struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
544 enum TDB_ERROR ecode;
549 ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
551 ecode = tdb_write(tdb, hdr->off, p, hdr->len);
555 tdb->direct_access--;
562 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
565 enum TDB_ERROR ecode;
567 if (unlikely(!tdb->file->map_ptr))
570 ecode = tdb_oob(tdb, off + len, false);
571 if (unlikely(ecode != TDB_SUCCESS))
572 return TDB_ERR_PTR(ecode);
573 return (char *)tdb->file->map_ptr + off;
576 void tdb_inc_seqnum(struct tdb_context *tdb)
580 if (likely(!(tdb->flags & TDB_CONVERT))) {
583 direct = tdb->methods->direct(tdb,
584 offsetof(struct tdb_header,
586 sizeof(*direct), true);
587 if (likely(direct)) {
588 /* Don't let it go negative, even briefly */
589 if (unlikely((*direct) + 1) < 0)
596 seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
597 if (!TDB_OFF_IS_ERR(seq)) {
599 if (unlikely((int64_t)seq < 0))
601 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
605 static const struct tdb_methods io_methods = {
614 initialise the default methods table
616 void tdb_io_init(struct tdb_context *tdb)
618 tdb->methods = &io_methods;