2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb1_private.h"
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
34 /* check for an out of bounds access - if it is out of bounds then
35 see if the database has been expanded by someone else and expand
37 note that "len" is the minimum length needed for the db
39 static int tdb1_oob(struct tdb_context *tdb, tdb1_off_t off, tdb1_len_t len,
43 if (len + off < len) {
45 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
46 "tdb1_oob off %d len %d wrap\n",
52 if (off + len <= tdb->file->map_size)
54 if (tdb->flags & TDB_INTERNAL) {
56 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
57 "tdb1_oob len %d beyond internal malloc size %u",
58 (int)(off + len), (int)tdb->file->map_size);
63 if (fstat(tdb->file->fd, &st) == -1) {
64 tdb->last_error = TDB_ERR_IO;
68 if (st.st_size < (size_t)off + len) {
70 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
71 "tdb1_oob len %u beyond eof at %u",
72 (int)(off + len), (int)st.st_size);
77 /* Beware >4G files! */
78 if ((tdb1_off_t)st.st_size != st.st_size) {
79 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
80 "tdb1_oob len %llu too large!\n",
81 (long long)st.st_size);
85 /* Unmap, update size, remap */
86 if (tdb1_munmap(tdb) == -1) {
87 tdb->last_error = TDB_ERR_IO;
90 tdb->file->map_size = st.st_size;
95 /* write a lump of data at a specified offset */
96 static int tdb1_write(struct tdb_context *tdb, tdb1_off_t off,
97 const void *buf, tdb1_len_t len)
103 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
104 tdb->last_error = TDB_ERR_RDONLY;
108 if (tdb->tdb1.io->tdb1_oob(tdb, off, len, 0) != 0)
111 if (tdb->file->map_ptr) {
112 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
114 ssize_t written = pwrite(tdb->file->fd, buf, len, off);
115 if ((written != (ssize_t)len) && (written != -1)) {
116 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
117 "tdb1_write: wrote only "
118 "%d of %d bytes at %d, trying once more",
119 (int)written, len, off);
120 written = pwrite(tdb->file->fd,
121 (const char *)buf+written,
126 /* Ensure ecode is set for log fn. */
127 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
128 "tdb1_write failed at %d "
130 off, len, strerror(errno));
132 } else if (written != (ssize_t)len) {
133 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
134 "tdb1_write: failed to "
135 "write %d bytes at %d in two attempts",
143 /* Endian conversion: we only ever deal with 4 byte quantities */
144 void *tdb1_convert(void *buf, uint32_t size)
146 uint32_t i, *p = (uint32_t *)buf;
147 for (i = 0; i < size / 4; i++)
148 p[i] = TDB1_BYTEREV(p[i]);
153 /* read a lump of data at a specified offset, maybe convert */
154 static int tdb1_read(struct tdb_context *tdb, tdb1_off_t off, void *buf,
155 tdb1_len_t len, int cv)
157 if (tdb->tdb1.io->tdb1_oob(tdb, off, len, 0) != 0) {
161 if (tdb->file->map_ptr) {
162 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
164 ssize_t ret = pread(tdb->file->fd, buf, len, off);
165 if (ret != (ssize_t)len) {
166 /* Ensure ecode is set for log fn. */
167 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
168 "tdb1_read failed at %d "
169 "len=%d ret=%d (%s) map_size=%d",
170 (int)off, (int)len, (int)ret,
172 (int)tdb->file->map_size);
177 tdb1_convert(buf, len);
185 do an unlocked scan of the hash table heads to find the next non-zero head. The value
186 will then be confirmed with the lock held
188 static void tdb1_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
191 if (tdb->file->map_ptr) {
192 for (;h < tdb->tdb1.header.hash_size;h++) {
193 if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->file->map_ptr)) {
199 for (;h < tdb->tdb1.header.hash_size;h++) {
200 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
209 int tdb1_munmap(struct tdb_context *tdb)
211 if (tdb->flags & TDB_INTERNAL)
215 if (tdb->file->map_ptr) {
218 ret = munmap(tdb->file->map_ptr, tdb->file->map_size);
223 tdb->file->map_ptr = NULL;
227 void tdb1_mmap(struct tdb_context *tdb)
229 if (tdb->flags & TDB_INTERNAL)
233 if (!(tdb->flags & TDB_NOMMAP)) {
235 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
236 mmap_flags = PROT_READ;
238 mmap_flags = PROT_READ | PROT_WRITE;
240 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
242 MAP_SHARED|MAP_FILE, tdb->file->fd, 0);
245 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
248 if (tdb->file->map_ptr == MAP_FAILED) {
249 tdb->file->map_ptr = NULL;
250 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
251 "tdb1_mmap failed for size %d (%s)",
252 tdb->file->map_size, strerror(errno));
255 tdb->file->map_ptr = NULL;
258 tdb->file->map_ptr = NULL;
262 /* expand a file. we prefer to use ftruncate, as that is what posix
263 says to use for mmap expansion */
264 static int tdb1_expand_file(struct tdb_context *tdb, tdb1_off_t size, tdb1_off_t addition)
268 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
269 tdb->last_error = TDB_ERR_RDONLY;
273 if (ftruncate(tdb->file->fd, size+addition) == -1) {
275 ssize_t written = pwrite(tdb->file->fd, &b, 1,
276 (size+addition) - 1);
278 /* try once more, potentially revealing errno */
279 written = pwrite(tdb->file->fd, &b, 1,
280 (size+addition) - 1);
283 /* again - give up, guessing errno */
287 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
288 "expand_file to %d failed (%s)",
295 /* now fill the file with something. This ensures that the
296 file isn't sparse, which would be very bad if we ran out of
297 disk. This must be done with write, not via mmap */
298 memset(buf, TDB1_PAD_BYTE, sizeof(buf));
300 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
301 ssize_t written = pwrite(tdb->file->fd, buf, n, size);
303 /* prevent infinite loops: try _once_ more */
304 written = pwrite(tdb->file->fd, buf, n, size);
307 /* give up, trying to provide a useful errno */
308 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
310 "returned 0 twice: giving up!");
313 } else if (written == -1) {
314 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
315 "expand_file write of "
316 "%d bytes failed (%s)", (int)n,
319 } else if (written != n) {
320 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
321 "expand_file: wrote "
322 "only %d of %d bytes - retrying",
323 (int)written, (int)n);
328 tdb->stats.expands++;
333 /* You need 'size', this tells you how much you should expand by. */
334 tdb1_off_t tdb1_expand_adjust(tdb1_off_t map_size, tdb1_off_t size, int page_size)
336 tdb1_off_t new_size, top_size;
338 /* limit size in order to avoid using up huge amounts of memory for
339 * in memory tdbs if an oddball huge record creeps in */
340 if (size > 100 * 1024) {
341 top_size = map_size + size * 2;
343 top_size = map_size + size * 100;
346 /* always make room for at least top_size more records, and at
347 least 25% more space. if the DB is smaller than 100MiB,
348 otherwise grow it by 10% only. */
349 if (map_size > 100 * 1024 * 1024) {
350 new_size = map_size * 1.10;
352 new_size = map_size * 1.25;
355 /* Round the database up to a multiple of the page size */
356 new_size = MAX(top_size, new_size);
357 return TDB1_ALIGN(new_size, page_size) - map_size;
360 /* expand the database at least size bytes by expanding the underlying
361 file and doing the mmap again if necessary */
362 int tdb1_expand(struct tdb_context *tdb, tdb1_off_t size)
364 struct tdb1_record rec;
367 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
368 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
369 "lock failed in tdb1_expand");
373 /* must know about any previous expansions by another process */
374 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
376 size = tdb1_expand_adjust(tdb->file->map_size, size,
377 tdb->tdb1.page_size);
379 if (!(tdb->flags & TDB_INTERNAL))
383 * We must ensure the file is unmapped before doing this
384 * to ensure consistency with systems like OpenBSD where
385 * writes and mmaps are not consistent.
388 /* expand the file itself */
389 if (!(tdb->flags & TDB_INTERNAL)) {
390 if (tdb->tdb1.io->tdb1_expand_file(tdb, tdb->file->map_size, size) != 0)
394 tdb->file->map_size += size;
396 if (tdb->flags & TDB_INTERNAL) {
397 char *new_map_ptr = (char *)realloc(tdb->file->map_ptr,
398 tdb->file->map_size);
400 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM,
402 "tdb1_expand: no memory");
403 tdb->file->map_size -= size;
406 tdb->file->map_ptr = new_map_ptr;
409 * We must ensure the file is remapped before adding the space
410 * to ensure consistency with systems like OpenBSD where
411 * writes and mmaps are not consistent.
414 /* We're ok if the mmap fails as we'll fallback to read/write */
418 /* form a new freelist record */
419 memset(&rec,'\0',sizeof(rec));
420 rec.rec_len = size - sizeof(rec);
422 /* link it into the free list */
423 offset = tdb->file->map_size - size;
424 if (tdb1_free(tdb, offset, &rec) == -1)
427 tdb1_unlock(tdb, -1, F_WRLCK);
430 tdb1_unlock(tdb, -1, F_WRLCK);
434 /* read/write a tdb1_off_t */
435 int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
437 return tdb->tdb1.io->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
440 int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
443 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
447 /* read a lump of data, allocating the space for it */
448 unsigned char *tdb1_alloc_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t len)
452 /* some systems don't like zero length malloc */
454 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
455 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
456 "tdb1_alloc_read malloc failed"
458 len, strerror(errno));
461 if (tdb->tdb1.io->tdb1_read(tdb, offset, buf, len, 0) == -1) {
468 /* Give a piece of tdb data to a parser */
469 enum TDB_ERROR tdb1_parse_data(struct tdb_context *tdb, TDB_DATA key,
470 tdb1_off_t offset, tdb1_len_t len,
471 enum TDB_ERROR (*parser)(TDB_DATA key,
477 enum TDB_ERROR result;
481 if ((tdb->tdb1.transaction == NULL) && (tdb->file->map_ptr != NULL)) {
483 * Optimize by avoiding the malloc/memcpy/free, point the
484 * parser directly at the mmap area.
486 if (tdb->tdb1.io->tdb1_oob(tdb, offset, len, 0) != 0) {
487 return tdb->last_error;
489 data.dptr = offset + (unsigned char *)tdb->file->map_ptr;
490 return parser(key, data, private_data);
493 if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
494 return tdb->last_error;
497 result = parser(key, data, private_data);
502 /* read/write a record */
503 int tdb1_rec_read(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
505 if (tdb->tdb1.io->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
507 if (TDB1_BAD_MAGIC(rec)) {
508 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
509 "tdb1_rec_read bad magic 0x%x at offset=%d",
513 return tdb->tdb1.io->tdb1_oob(tdb, rec->next, sizeof(*rec), 0);
516 int tdb1_rec_write(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
518 struct tdb1_record r = *rec;
519 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
522 static const struct tdb1_methods io1_methods = {
525 tdb1_next_hash_chain,
531 initialise the default methods table
533 void tdb1_io_init(struct tdb_context *tdb)
535 tdb->tdb1.io = &io1_methods;
538 enum TDB_ERROR tdb1_probe_length(struct tdb_context *tdb)
540 tdb->last_error = TDB_SUCCESS;
541 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, true);
542 return tdb->last_error;