2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb1_private.h"
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
34 /* check for an out of bounds access - if it is out of bounds then
35 see if the database has been expanded by someone else and expand
37 note that "len" is the minimum length needed for the db
39 static int tdb1_oob(struct tdb1_context *tdb, tdb1_off_t len, int probe)
42 if (len <= tdb->map_size)
44 if (tdb->flags & TDB1_INTERNAL) {
46 /* Ensure ecode is set for log fn. */
47 tdb->ecode = TDB1_ERR_IO;
48 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_oob len %d beyond internal malloc size %d\n",
49 (int)len, (int)tdb->map_size));
54 if (fstat(tdb->fd, &st) == -1) {
55 tdb->ecode = TDB1_ERR_IO;
59 if (st.st_size < (size_t)len) {
61 /* Ensure ecode is set for log fn. */
62 tdb->ecode = TDB1_ERR_IO;
63 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_oob len %d beyond eof at %d\n",
64 (int)len, (int)st.st_size));
69 /* Unmap, update size, remap */
70 if (tdb1_munmap(tdb) == -1) {
71 tdb->ecode = TDB1_ERR_IO;
74 tdb->map_size = st.st_size;
79 /* write a lump of data at a specified offset */
80 static int tdb1_write(struct tdb1_context *tdb, tdb1_off_t off,
81 const void *buf, tdb1_len_t len)
87 if (tdb->read_only || tdb->traverse_read) {
88 tdb->ecode = TDB1_ERR_RDONLY;
92 if (tdb->methods->tdb1_oob(tdb, off + len, 0) != 0)
96 memcpy(off + (char *)tdb->map_ptr, buf, len);
98 ssize_t written = pwrite(tdb->fd, buf, len, off);
99 if ((written != (ssize_t)len) && (written != -1)) {
101 tdb->ecode = TDB1_ERR_IO;
102 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_write: wrote only "
103 "%d of %d bytes at %d, trying once more\n",
104 (int)written, len, off));
105 written = pwrite(tdb->fd, (const char *)buf+written,
110 /* Ensure ecode is set for log fn. */
111 tdb->ecode = TDB1_ERR_IO;
112 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_write failed at %d "
113 "len=%d (%s)\n", off, len, strerror(errno)));
115 } else if (written != (ssize_t)len) {
116 tdb->ecode = TDB1_ERR_IO;
117 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_write: failed to "
118 "write %d bytes at %d in two attempts\n",
126 /* Endian conversion: we only ever deal with 4 byte quantities */
127 void *tdb1_convert(void *buf, uint32_t size)
129 uint32_t i, *p = (uint32_t *)buf;
130 for (i = 0; i < size / 4; i++)
131 p[i] = TDB1_BYTEREV(p[i]);
136 /* read a lump of data at a specified offset, maybe convert */
137 static int tdb1_read(struct tdb1_context *tdb, tdb1_off_t off, void *buf,
138 tdb1_len_t len, int cv)
140 if (tdb->methods->tdb1_oob(tdb, off + len, 0) != 0) {
145 memcpy(buf, off + (char *)tdb->map_ptr, len);
147 ssize_t ret = pread(tdb->fd, buf, len, off);
148 if (ret != (ssize_t)len) {
149 /* Ensure ecode is set for log fn. */
150 tdb->ecode = TDB1_ERR_IO;
151 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_read failed at %d "
152 "len=%d ret=%d (%s) map_size=%d\n",
153 (int)off, (int)len, (int)ret, strerror(errno),
154 (int)tdb->map_size));
159 tdb1_convert(buf, len);
167 do an unlocked scan of the hash table heads to find the next non-zero head. The value
168 will then be confirmed with the lock held
170 static void tdb1_next_hash_chain(struct tdb1_context *tdb, uint32_t *chain)
174 for (;h < tdb->header.hash_size;h++) {
175 if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
181 for (;h < tdb->header.hash_size;h++) {
182 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
191 int tdb1_munmap(struct tdb1_context *tdb)
193 if (tdb->flags & TDB1_INTERNAL)
200 ret = munmap(tdb->map_ptr, tdb->map_size);
209 void tdb1_mmap(struct tdb1_context *tdb)
211 if (tdb->flags & TDB1_INTERNAL)
215 if (!(tdb->flags & TDB1_NOMMAP)) {
216 tdb->map_ptr = mmap(NULL, tdb->map_size,
217 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
218 MAP_SHARED|MAP_FILE, tdb->fd, 0);
221 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
224 if (tdb->map_ptr == MAP_FAILED) {
226 TDB1_LOG((tdb, TDB1_DEBUG_WARNING, "tdb1_mmap failed for size %d (%s)\n",
227 tdb->map_size, strerror(errno)));
237 /* expand a file. we prefer to use ftruncate, as that is what posix
238 says to use for mmap expansion */
239 static int tdb1_expand_file(struct tdb1_context *tdb, tdb1_off_t size, tdb1_off_t addition)
243 if (tdb->read_only || tdb->traverse_read) {
244 tdb->ecode = TDB1_ERR_RDONLY;
248 if (ftruncate(tdb->fd, size+addition) == -1) {
250 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
252 /* try once more, potentially revealing errno */
253 written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
256 /* again - give up, guessing errno */
260 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
261 size+addition, strerror(errno)));
266 /* now fill the file with something. This ensures that the
267 file isn't sparse, which would be very bad if we ran out of
268 disk. This must be done with write, not via mmap */
269 memset(buf, TDB1_PAD_BYTE, sizeof(buf));
271 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
272 ssize_t written = pwrite(tdb->fd, buf, n, size);
274 /* prevent infinite loops: try _once_ more */
275 written = pwrite(tdb->fd, buf, n, size);
278 /* give up, trying to provide a useful errno */
279 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "expand_file write "
280 "returned 0 twice: giving up!\n"));
283 } else if (written == -1) {
284 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "expand_file write of "
285 "%d bytes failed (%s)\n", (int)n,
288 } else if (written != n) {
289 TDB1_LOG((tdb, TDB1_DEBUG_WARNING, "expand_file: wrote "
290 "only %d of %d bytes - retrying\n", (int)written,
300 /* expand the database at least size bytes by expanding the underlying
301 file and doing the mmap again if necessary */
302 int tdb1_expand(struct tdb1_context *tdb, tdb1_off_t size)
304 struct tdb1_record rec;
305 tdb1_off_t offset, new_size, top_size, map_size;
307 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
308 TDB1_LOG((tdb, TDB1_DEBUG_ERROR, "lock failed in tdb1_expand\n"));
312 /* must know about any previous expansions by another process */
313 tdb->methods->tdb1_oob(tdb, tdb->map_size + 1, 1);
315 /* limit size in order to avoid using up huge amounts of memory for
316 * in memory tdbs if an oddball huge record creeps in */
317 if (size > 100 * 1024) {
318 top_size = tdb->map_size + size * 2;
320 top_size = tdb->map_size + size * 100;
323 /* always make room for at least top_size more records, and at
324 least 25% more space. if the DB is smaller than 100MiB,
325 otherwise grow it by 10% only. */
326 if (tdb->map_size > 100 * 1024 * 1024) {
327 map_size = tdb->map_size * 1.10;
329 map_size = tdb->map_size * 1.25;
332 /* Round the database up to a multiple of the page size */
333 new_size = MAX(top_size, map_size);
334 size = TDB1_ALIGN(new_size, tdb->page_size) - tdb->map_size;
336 if (!(tdb->flags & TDB1_INTERNAL))
340 * We must ensure the file is unmapped before doing this
341 * to ensure consistency with systems like OpenBSD where
342 * writes and mmaps are not consistent.
345 /* expand the file itself */
346 if (!(tdb->flags & TDB1_INTERNAL)) {
347 if (tdb->methods->tdb1_expand_file(tdb, tdb->map_size, size) != 0)
351 tdb->map_size += size;
353 if (tdb->flags & TDB1_INTERNAL) {
354 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
357 tdb->map_size -= size;
360 tdb->map_ptr = new_map_ptr;
363 * We must ensure the file is remapped before adding the space
364 * to ensure consistency with systems like OpenBSD where
365 * writes and mmaps are not consistent.
368 /* We're ok if the mmap fails as we'll fallback to read/write */
372 /* form a new freelist record */
373 memset(&rec,'\0',sizeof(rec));
374 rec.rec_len = size - sizeof(rec);
376 /* link it into the free list */
377 offset = tdb->map_size - size;
378 if (tdb1_free(tdb, offset, &rec) == -1)
381 tdb1_unlock(tdb, -1, F_WRLCK);
384 tdb1_unlock(tdb, -1, F_WRLCK);
388 /* read/write a tdb1_off_t */
389 int tdb1_ofs_read(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
391 return tdb->methods->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
394 int tdb1_ofs_write(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
397 return tdb->methods->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
401 /* read a lump of data, allocating the space for it */
402 unsigned char *tdb1_alloc_read(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_len_t len)
406 /* some systems don't like zero length malloc */
408 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
409 /* Ensure ecode is set for log fn. */
410 tdb->ecode = TDB1_ERR_OOM;
411 TDB1_LOG((tdb, TDB1_DEBUG_ERROR,"tdb1_alloc_read malloc failed len=%d (%s)\n",
412 len, strerror(errno)));
415 if (tdb->methods->tdb1_read(tdb, offset, buf, len, 0) == -1) {
422 /* Give a piece of tdb data to a parser */
424 int tdb1_parse_data(struct tdb1_context *tdb, TDB1_DATA key,
425 tdb1_off_t offset, tdb1_len_t len,
426 int (*parser)(TDB1_DATA key, TDB1_DATA data,
435 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
437 * Optimize by avoiding the malloc/memcpy/free, point the
438 * parser directly at the mmap area.
440 if (tdb->methods->tdb1_oob(tdb, offset+len, 0) != 0) {
443 data.dptr = offset + (unsigned char *)tdb->map_ptr;
444 return parser(key, data, private_data);
447 if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
451 result = parser(key, data, private_data);
456 /* read/write a record */
457 int tdb1_rec_read(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
459 if (tdb->methods->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
461 if (TDB1_BAD_MAGIC(rec)) {
462 /* Ensure ecode is set for log fn. */
463 tdb->ecode = TDB1_ERR_CORRUPT;
464 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
467 return tdb->methods->tdb1_oob(tdb, rec->next+sizeof(*rec), 0);
470 int tdb1_rec_write(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
472 struct tdb1_record r = *rec;
473 return tdb->methods->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
476 static const struct tdb1_methods io1_methods = {
479 tdb1_next_hash_chain,
485 initialise the default methods table
487 void tdb1_io_init(struct tdb1_context *tdb)
489 tdb->methods = &io1_methods;