2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "tdb1_private.h"
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
34 /* check for an out of bounds access - if it is out of bounds then
35 see if the database has been expanded by someone else and expand
37 note that "len" is the minimum length needed for the db
39 static int tdb1_oob(struct tdb_context *tdb, tdb1_off_t off, tdb1_len_t len,
43 if (len + off < len) {
45 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
46 "tdb1_oob off %d len %d wrap\n",
52 if (off + len <= tdb->file->map_size)
54 if (tdb->flags & TDB_INTERNAL) {
56 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
57 "tdb1_oob len %d beyond internal malloc size %u",
58 (int)(off + len), (int)tdb->file->map_size);
63 if (fstat(tdb->file->fd, &st) == -1) {
64 tdb->last_error = TDB_ERR_IO;
68 if (st.st_size < (size_t)off + len) {
70 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
71 "tdb1_oob len %u beyond eof at %u",
72 (int)(off + len), (int)st.st_size);
77 /* Beware >4G files! */
78 if ((tdb1_off_t)st.st_size != st.st_size) {
79 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
80 "tdb1_oob len %llu too large!\n",
81 (long long)st.st_size);
85 /* Unmap, update size, remap */
86 if (tdb1_munmap(tdb) == -1) {
87 tdb->last_error = TDB_ERR_IO;
90 tdb->file->map_size = st.st_size;
95 /* write a lump of data at a specified offset */
96 static int tdb1_write(struct tdb_context *tdb, tdb1_off_t off,
97 const void *buf, tdb1_len_t len)
103 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
104 tdb->last_error = TDB_ERR_RDONLY;
108 if (tdb->tdb1.io->tdb1_oob(tdb, off, len, 0) != 0)
111 if (tdb->file->map_ptr) {
112 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
114 ssize_t written = pwrite(tdb->file->fd, buf, len, off);
115 if ((written != (ssize_t)len) && (written != -1)) {
116 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
117 "tdb1_write: wrote only "
118 "%d of %d bytes at %d, trying once more",
119 (int)written, len, off);
120 written = pwrite(tdb->file->fd,
121 (const char *)buf+written,
126 /* Ensure ecode is set for log fn. */
127 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
128 "tdb1_write failed at %d "
130 off, len, strerror(errno));
132 } else if (written != (ssize_t)len) {
133 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
134 "tdb1_write: failed to "
135 "write %d bytes at %d in two attempts",
143 /* Endian conversion: we only ever deal with 4 byte quantities */
144 void *tdb1_convert(void *buf, uint32_t size)
146 uint32_t i, *p = (uint32_t *)buf;
147 for (i = 0; i < size / 4; i++)
148 p[i] = TDB1_BYTEREV(p[i]);
153 /* read a lump of data at a specified offset, maybe convert */
154 static int tdb1_read(struct tdb_context *tdb, tdb1_off_t off, void *buf,
155 tdb1_len_t len, int cv)
157 if (tdb->tdb1.io->tdb1_oob(tdb, off, len, 0) != 0) {
161 if (tdb->file->map_ptr) {
162 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
164 ssize_t ret = pread(tdb->file->fd, buf, len, off);
165 if (ret != (ssize_t)len) {
166 /* Ensure ecode is set for log fn. */
167 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
168 "tdb1_read failed at %d "
169 "len=%d ret=%d (%s) map_size=%d",
170 (int)off, (int)len, (int)ret,
172 (int)tdb->file->map_size);
177 tdb1_convert(buf, len);
185 do an unlocked scan of the hash table heads to find the next non-zero head. The value
186 will then be confirmed with the lock held
188 static void tdb1_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
191 if (tdb->file->map_ptr) {
192 for (;h < tdb->tdb1.header.hash_size;h++) {
193 if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->file->map_ptr)) {
199 for (;h < tdb->tdb1.header.hash_size;h++) {
200 if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
209 int tdb1_munmap(struct tdb_context *tdb)
211 if (tdb->flags & TDB_INTERNAL)
215 if (tdb->file->map_ptr) {
218 ret = munmap(tdb->file->map_ptr, tdb->file->map_size);
223 tdb->file->map_ptr = NULL;
227 void tdb1_mmap(struct tdb_context *tdb)
229 if (tdb->flags & TDB_INTERNAL)
233 if (!(tdb->flags & TDB_NOMMAP)) {
235 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY)
236 mmap_flags = PROT_READ;
238 mmap_flags = PROT_READ | PROT_WRITE;
240 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
242 MAP_SHARED|MAP_FILE, tdb->file->fd, 0);
245 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
248 if (tdb->file->map_ptr == MAP_FAILED) {
249 tdb->file->map_ptr = NULL;
250 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
251 "tdb1_mmap failed for size %d (%s)",
252 tdb->file->map_size, strerror(errno));
255 tdb->file->map_ptr = NULL;
258 tdb->file->map_ptr = NULL;
262 /* expand a file. we prefer to use ftruncate, as that is what posix
263 says to use for mmap expansion */
264 static int tdb1_expand_file(struct tdb_context *tdb, tdb1_off_t size, tdb1_off_t addition)
268 if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
269 tdb->last_error = TDB_ERR_RDONLY;
273 if (ftruncate(tdb->file->fd, size+addition) == -1) {
275 ssize_t written = pwrite(tdb->file->fd, &b, 1,
276 (size+addition) - 1);
278 /* try once more, potentially revealing errno */
279 written = pwrite(tdb->file->fd, &b, 1,
280 (size+addition) - 1);
283 /* again - give up, guessing errno */
287 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
288 "expand_file to %d failed (%s)",
295 /* now fill the file with something. This ensures that the
296 file isn't sparse, which would be very bad if we ran out of
297 disk. This must be done with write, not via mmap */
298 memset(buf, TDB1_PAD_BYTE, sizeof(buf));
300 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
301 ssize_t written = pwrite(tdb->file->fd, buf, n, size);
303 /* prevent infinite loops: try _once_ more */
304 written = pwrite(tdb->file->fd, buf, n, size);
307 /* give up, trying to provide a useful errno */
308 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
310 "returned 0 twice: giving up!");
313 } else if (written == -1) {
314 tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
315 "expand_file write of "
316 "%d bytes failed (%s)", (int)n,
319 } else if (written != n) {
320 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
321 "expand_file: wrote "
322 "only %d of %d bytes - retrying",
323 (int)written, (int)n);
328 tdb->stats.expands++;
333 /* expand the database at least size bytes by expanding the underlying
334 file and doing the mmap again if necessary */
335 int tdb1_expand(struct tdb_context *tdb, tdb1_off_t size)
337 struct tdb1_record rec;
338 tdb1_off_t offset, new_size, top_size, map_size;
340 if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
341 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
342 "lock failed in tdb1_expand");
346 /* must know about any previous expansions by another process */
347 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, 1);
349 /* limit size in order to avoid using up huge amounts of memory for
350 * in memory tdbs if an oddball huge record creeps in */
351 if (size > 100 * 1024) {
352 top_size = tdb->file->map_size + size * 2;
354 top_size = tdb->file->map_size + size * 100;
357 /* always make room for at least top_size more records, and at
358 least 25% more space. if the DB is smaller than 100MiB,
359 otherwise grow it by 10% only. */
360 if (tdb->file->map_size > 100 * 1024 * 1024) {
361 map_size = tdb->file->map_size * 1.10;
363 map_size = tdb->file->map_size * 1.25;
366 /* Round the database up to a multiple of the page size */
367 new_size = MAX(top_size, map_size);
368 size = TDB1_ALIGN(new_size, tdb->tdb1.page_size) - tdb->file->map_size;
370 if (!(tdb->flags & TDB_INTERNAL))
374 * We must ensure the file is unmapped before doing this
375 * to ensure consistency with systems like OpenBSD where
376 * writes and mmaps are not consistent.
379 /* expand the file itself */
380 if (!(tdb->flags & TDB_INTERNAL)) {
381 if (tdb->tdb1.io->tdb1_expand_file(tdb, tdb->file->map_size, size) != 0)
385 tdb->file->map_size += size;
387 if (tdb->flags & TDB_INTERNAL) {
388 char *new_map_ptr = (char *)realloc(tdb->file->map_ptr,
389 tdb->file->map_size);
391 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM,
393 "tdb1_expand: no memory");
394 tdb->file->map_size -= size;
397 tdb->file->map_ptr = new_map_ptr;
400 * We must ensure the file is remapped before adding the space
401 * to ensure consistency with systems like OpenBSD where
402 * writes and mmaps are not consistent.
405 /* We're ok if the mmap fails as we'll fallback to read/write */
409 /* form a new freelist record */
410 memset(&rec,'\0',sizeof(rec));
411 rec.rec_len = size - sizeof(rec);
413 /* link it into the free list */
414 offset = tdb->file->map_size - size;
415 if (tdb1_free(tdb, offset, &rec) == -1)
418 tdb1_unlock(tdb, -1, F_WRLCK);
421 tdb1_unlock(tdb, -1, F_WRLCK);
425 /* read/write a tdb1_off_t */
426 int tdb1_ofs_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
428 return tdb->tdb1.io->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
431 int tdb1_ofs_write(struct tdb_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
434 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
438 /* read a lump of data, allocating the space for it */
439 unsigned char *tdb1_alloc_read(struct tdb_context *tdb, tdb1_off_t offset, tdb1_len_t len)
443 /* some systems don't like zero length malloc */
445 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
446 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
447 "tdb1_alloc_read malloc failed"
449 len, strerror(errno));
452 if (tdb->tdb1.io->tdb1_read(tdb, offset, buf, len, 0) == -1) {
459 /* Give a piece of tdb data to a parser */
460 enum TDB_ERROR tdb1_parse_data(struct tdb_context *tdb, TDB_DATA key,
461 tdb1_off_t offset, tdb1_len_t len,
462 enum TDB_ERROR (*parser)(TDB_DATA key,
468 enum TDB_ERROR result;
472 if ((tdb->tdb1.transaction == NULL) && (tdb->file->map_ptr != NULL)) {
474 * Optimize by avoiding the malloc/memcpy/free, point the
475 * parser directly at the mmap area.
477 if (tdb->tdb1.io->tdb1_oob(tdb, offset, len, 0) != 0) {
478 return tdb->last_error;
480 data.dptr = offset + (unsigned char *)tdb->file->map_ptr;
481 return parser(key, data, private_data);
484 if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
485 return tdb->last_error;
488 result = parser(key, data, private_data);
493 /* read/write a record */
494 int tdb1_rec_read(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
496 if (tdb->tdb1.io->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
498 if (TDB1_BAD_MAGIC(rec)) {
499 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
500 "tdb1_rec_read bad magic 0x%x at offset=%d",
504 return tdb->tdb1.io->tdb1_oob(tdb, rec->next, sizeof(*rec), 0);
507 int tdb1_rec_write(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
509 struct tdb1_record r = *rec;
510 return tdb->tdb1.io->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
513 static const struct tdb1_methods io1_methods = {
516 tdb1_next_hash_chain,
522 initialise the default methods table
524 void tdb1_io_init(struct tdb_context *tdb)
526 tdb->tdb1.io = &io1_methods;
529 enum TDB_ERROR tdb1_probe_length(struct tdb_context *tdb)
531 tdb->last_error = TDB_SUCCESS;
532 tdb->tdb1.io->tdb1_oob(tdb, tdb->file->map_size, 1, true);
533 return tdb->last_error;