2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb1_private.h"
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
34 /* check for an out of bounds access - if it is out of bounds then
35 see if the database has been expanded by someone else and expand
37 note that "len" is the minimum length needed for the db
39 static int tdb1_oob(struct tdb_context *tdb, tdb1_off_t len, int probe)
42 if (len <= tdb->file->map_size)
44 if (tdb->flags & TDB_INTERNAL) {
46 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
47 "tdb1_oob len %d beyond internal malloc size %d",
48 (int)len, (int)tdb->file->map_size);
53 if (fstat(tdb->file->fd, &st) == -1) {
54 tdb->last_error = TDB_ERR_IO;
58 if (st.st_size < (size_t)len) {
60 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
61 "tdb1_oob len %d beyond eof at %d",
62 (int)len, (int)st.st_size);
67 /* Unmap, update size, remap */
68 if (tdb1_munmap(tdb) == -1) {
69 tdb->last_error = TDB_ERR_IO;
72 tdb->file->map_size = st.st_size;
77 /* write a lump of data at a specified offset */
78 static int tdb1_write(struct tdb_context *tdb, tdb1_off_t off,
79 const void *buf, tdb1_len_t len)
85 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
86 tdb->last_error = TDB_ERR_RDONLY;
90 if (tdb->tdb1.io->tdb1_oob(tdb, off + len, 0) != 0)
93 if (tdb->file->map_ptr) {
94 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
96 ssize_t written = pwrite(tdb->file->fd, buf, len, off);
97 if ((written != (ssize_t)len) && (written != -1)) {
98 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
99 "tdb1_write: wrote only "
100 "%d of %d bytes at %d, trying once more",
101 (int)written, len, off);
102 written = pwrite(tdb->file->fd,
103 (const char *)buf+written,
108 /* Ensure ecode is set for log fn. */
109 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
110 "tdb1_write failed at %d "
112 off, len, strerror(errno));
114 } else if (written != (ssize_t)len) {
115 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
116 "tdb1_write: failed to "
117 "write %d bytes at %d in two attempts",
125 /* Endian conversion: we only ever deal with 4 byte quantities */
126 void *tdb1_convert(void *buf, uint32_t size)
128 uint32_t i, *p = (uint32_t *)buf;
129 for (i = 0; i < size / 4; i++)
130 p[i] = TDB1_BYTEREV(p[i]);
135 /* read a lump of data at a specified offset, maybe convert */
136 static int tdb1_read(struct tdb_context *tdb, tdb1_off_t off, void *buf,
137 tdb1_len_t len, int cv)
139 if (tdb->tdb1.io->tdb1_oob(tdb, off + len, 0) != 0) {
143 if (tdb->file->map_ptr) {
144 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
146 ssize_t ret = pread(tdb->file->fd, buf, len, off);
147 if (ret != (ssize_t)len) {
148 /* Ensure ecode is set for log fn. */
149 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
150 "tdb1_read failed at %d "
151 "len=%d ret=%d (%s) map_size=%d",
152 (int)off, (int)len, (int)ret,
154 (int)tdb->file->map_size);
159 tdb1_convert(buf, len);
167 do an unlocked scan of the hash table heads to find the next non-zero head. The value
168 will then be confirmed with the lock held
170 static void tdb1_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
173 if (tdb->file->map_ptr) {
174 for (;h < tdb->tdb1.header.hash_size;h++) {
175 if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->file->map_ptr)) {
181 for (;h < tdb->tdb1.header.hash_size;h++) {
182 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
191 int tdb1_munmap(struct tdb_context *tdb)
193 if (tdb->flags & TDB_INTERNAL)
197 if (tdb->file->map_ptr) {
200 ret = munmap(tdb->file->map_ptr, tdb->file->map_size);
205 tdb->file->map_ptr = NULL;
209 void tdb1_mmap(struct tdb_context *tdb)
211 if (tdb->flags & TDB_INTERNAL)
215 if (!(tdb->flags & TDB_NOMMAP)) {
217 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
218 mmap_flags = PROT_READ;
220 mmap_flags = PROT_READ | PROT_WRITE;
222 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
224 MAP_SHARED|MAP_FILE, tdb->file->fd, 0);
227 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
230 if (tdb->file->map_ptr == MAP_FAILED) {
231 tdb->file->map_ptr = NULL;
232 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
233 "tdb1_mmap failed for size %d (%s)",
234 tdb->file->map_size, strerror(errno));
237 tdb->file->map_ptr = NULL;
240 tdb->file->map_ptr = NULL;
244 /* expand a file. we prefer to use ftruncate, as that is what posix
245 says to use for mmap expansion */
246 static int tdb1_expand_file(struct tdb_context *tdb, tdb1_off_t size, tdb1_off_t addition)
250 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
251 tdb->last_error = TDB_ERR_RDONLY;
255 if (ftruncate(tdb->file->fd, size+addition) == -1) {
257 ssize_t written = pwrite(tdb->file->fd, &b, 1,
258 (size+addition) - 1);
260 /* try once more, potentially revealing errno */
261 written = pwrite(tdb->file->fd, &b, 1,
262 (size+addition) - 1);
265 /* again - give up, guessing errno */
269 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
270 "expand_file to %d failed (%s)",
277 /* now fill the file with something. This ensures that the
278 file isn't sparse, which would be very bad if we ran out of
279 disk. This must be done with write, not via mmap */
280 memset(buf, TDB1_PAD_BYTE, sizeof(buf));
282 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
283 ssize_t written = pwrite(tdb->file->fd, buf, n, size);
285 /* prevent infinite loops: try _once_ more */
286 written = pwrite(tdb->file->fd, buf, n, size);
289 /* give up, trying to provide a useful errno */
290 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
292 "returned 0 twice: giving up!");
295 } else if (written == -1) {
296 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
297 "expand_file write of "
298 "%d bytes failed (%s)", (int)n,
301 } else if (written != n) {
302 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
303 "expand_file: wrote "
304 "only %d of %d bytes - retrying",
305 (int)written, (int)n);
314 /* expand the database at least size bytes by expanding the underlying
315 file and doing the mmap again if necessary */
316 int tdb1_expand(struct tdb_context *tdb, tdb1_off_t size)
318 struct tdb1_record rec;
319 tdb1_off_t offset, new_size, top_size, map_size;
321 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
322 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
323 "lock failed in tdb1_expand");
327 /* must know about any previous expansions by another process */
328 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size + 1, 1);
330 /* limit size in order to avoid using up huge amounts of memory for
331 * in memory tdbs if an oddball huge record creeps in */
332 if (size > 100 * 1024) {
333 top_size = tdb->file->map_size + size * 2;
335 top_size = tdb->file->map_size + size * 100;
338 /* always make room for at least top_size more records, and at
339 least 25% more space. if the DB is smaller than 100MiB,
340 otherwise grow it by 10% only. */
341 if (tdb->file->map_size > 100 * 1024 * 1024) {
342 map_size = tdb->file->map_size * 1.10;
344 map_size = tdb->file->map_size * 1.25;
347 /* Round the database up to a multiple of the page size */
348 new_size = MAX(top_size, map_size);
349 size = TDB1_ALIGN(new_size, tdb->tdb1.page_size) - tdb->file->map_size;
351 if (!(tdb->flags & TDB_INTERNAL))
355 * We must ensure the file is unmapped before doing this
356 * to ensure consistency with systems like OpenBSD where
357 * writes and mmaps are not consistent.
360 /* expand the file itself */
361 if (!(tdb->flags & TDB_INTERNAL)) {
362 if (tdb->tdb1.io->tdb1_expand_file(tdb, tdb->file->map_size, size) != 0)
366 tdb->file->map_size += size;
368 if (tdb->flags & TDB_INTERNAL) {
369 char *new_map_ptr = (char *)realloc(tdb->file->map_ptr,
370 tdb->file->map_size);
372 tdb->file->map_size -= size;
375 tdb->file->map_ptr = new_map_ptr;
378 * We must ensure the file is remapped before adding the space
379 * to ensure consistency with systems like OpenBSD where
380 * writes and mmaps are not consistent.
383 /* We're ok if the mmap fails as we'll fallback to read/write */
387 /* form a new freelist record */
388 memset(&rec,'\0',sizeof(rec));
389 rec.rec_len = size - sizeof(rec);
391 /* link it into the free list */
392 offset = tdb->file->map_size - size;
393 if (tdb1_free(tdb, offset, &rec) == -1)
396 tdb1_unlock(tdb, -1, F_WRLCK);
399 tdb1_unlock(tdb, -1, F_WRLCK);
403 /* read/write a tdb1_off_t */
404 int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
406 return tdb->tdb1.io->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
409 int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
412 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
416 /* read a lump of data, allocating the space for it */
417 unsigned char *tdb1_alloc_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t len)
421 /* some systems don't like zero length malloc */
423 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
424 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
425 "tdb1_alloc_read malloc failed"
427 len, strerror(errno));
430 if (tdb->tdb1.io->tdb1_read(tdb, offset, buf, len, 0) == -1) {
437 /* Give a piece of tdb data to a parser */
439 int tdb1_parse_data(struct tdb_context *tdb, TDB_DATA key,
440 tdb1_off_t offset, tdb1_len_t len,
441 int (*parser)(TDB_DATA key, TDB_DATA data,
450 if ((tdb->tdb1.transaction == NULL) && (tdb->file->map_ptr != NULL)) {
452 * Optimize by avoiding the malloc/memcpy/free, point the
453 * parser directly at the mmap area.
455 if (tdb->tdb1.io->tdb1_oob(tdb, offset+len, 0) != 0) {
458 data.dptr = offset + (unsigned char *)tdb->file->map_ptr;
459 return parser(key, data, private_data);
462 if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
466 result = parser(key, data, private_data);
471 /* read/write a record */
472 int tdb1_rec_read(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
474 if (tdb->tdb1.io->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
476 if (TDB1_BAD_MAGIC(rec)) {
477 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
478 "tdb1_rec_read bad magic 0x%x at offset=%d",
482 return tdb->tdb1.io->tdb1_oob(tdb, rec->next+sizeof(*rec), 0);
485 int tdb1_rec_write(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
487 struct tdb1_record r = *rec;
488 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
491 static const struct tdb1_methods io1_methods = {
494 tdb1_next_hash_chain,
500 initialise the default methods table
502 void tdb1_io_init(struct tdb_context *tdb)
504 tdb->tdb1.io = &io1_methods;
507 enum TDB_ERROR tdb1_probe_length(struct tdb_context *tdb)
509 tdb->last_error = TDB_SUCCESS;
510 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size + 1, true);
511 return tdb->last_error;