2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb1_private.h"
30 /* read a freelist record and check for simple errors */
31 int tdb1_rec_free_read(struct tdb1_context *tdb, tdb1_off_t off, struct tdb1_record *rec)
33 if (tdb->methods->tdb1_read(tdb, off, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
36 if (rec->magic == TDB1_MAGIC) {
37 /* this happens when a app is showdown while deleting a record - we should
38 not completely fail when this happens */
39 TDB1_LOG((tdb, TDB1_DEBUG_WARNING, "tdb1_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
41 rec->magic = TDB1_FREE_MAGIC;
42 if (tdb->methods->tdb1_write(tdb, off, rec, sizeof(*rec)) == -1)
46 if (rec->magic != TDB1_FREE_MAGIC) {
47 /* Ensure ecode is set for log fn. */
48 tdb->ecode = TDB1_ERR_CORRUPT;
49 TDB1_LOG((tdb, TDB1_DEBUG_WARNING, "tdb1_rec_free_read bad magic 0x%x at offset=%d\n",
53 if (tdb->methods->tdb1_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
59 /* update a record tailer (must hold allocation lock) */
60 static int update_tailer(struct tdb1_context *tdb, tdb1_off_t offset,
61 const struct tdb1_record *rec)
65 /* Offset of tailer from record header */
66 totalsize = sizeof(*rec) + rec->rec_len;
67 return tdb1_ofs_write(tdb, offset + totalsize - sizeof(tdb1_off_t),
71 /* Add an element into the freelist. Merge adjacent records if
73 int tdb1_free(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
75 /* Allocation and tailer lock */
76 if (tdb1_lock(tdb, -1, F_WRLCK) != 0)
79 /* set an initial tailer, so if we fail we don't leave a bogus record */
80 if (update_tailer(tdb, offset, rec) != 0) {
81 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_free: update_tailer failed!\n"));
86 if (offset - sizeof(tdb1_off_t) > TDB1_DATA_START(tdb->header.hash_size)) {
87 tdb1_off_t left = offset - sizeof(tdb1_off_t);
91 /* Read in tailer and jump back to header */
92 if (tdb1_ofs_read(tdb, left, &leftsize) == -1) {
93 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_free: left offset read failed at %u\n", left));
97 /* it could be uninitialised data */
98 if (leftsize == 0 || leftsize == TDB1_PAD_U32) {
102 left = offset - leftsize;
104 if (leftsize > offset ||
105 left < TDB1_DATA_START(tdb->header.hash_size)) {
109 /* Now read in the left record */
110 if (tdb->methods->tdb1_read(tdb, left, &l, sizeof(l), TDB1_DOCONV()) == -1) {
111 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_free: left read failed at %u (%u)\n", left, leftsize));
115 /* If it's free, expand to include it. */
116 if (l.magic == TDB1_FREE_MAGIC) {
117 /* we now merge the new record into the left record, rather than the other
118 way around. This makes the operation O(1) instead of O(n). This change
119 prevents traverse from being O(n^2) after a lot of deletes */
120 l.rec_len += sizeof(*rec) + rec->rec_len;
121 if (tdb1_rec_write(tdb, left, &l) == -1) {
122 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_free: update_left failed at %u\n", left));
125 if (update_tailer(tdb, left, &l) == -1) {
126 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_free: update_tailer failed at %u\n", offset));
129 tdb1_unlock(tdb, -1, F_WRLCK);
136 /* Now, prepend to free list */
137 rec->magic = TDB1_FREE_MAGIC;
139 if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec->next) == -1 ||
140 tdb1_rec_write(tdb, offset, rec) == -1 ||
141 tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
142 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_free record write failed at offset=%d\n", offset));
146 /* And we're done. */
147 tdb1_unlock(tdb, -1, F_WRLCK);
151 tdb1_unlock(tdb, -1, F_WRLCK);
158 the core of tdb1_allocate - called when we have decided which
159 free list entry to use
161 Note that we try to allocate by grabbing data from the end of an existing record,
162 not the beginning. This is so the left merge in a free is more likely to be
163 able to free up the record without fragmentation
165 static tdb1_off_t tdb1_allocate_ofs(struct tdb1_context *tdb,
166 tdb1_len_t length, tdb1_off_t rec_ptr,
167 struct tdb1_record *rec, tdb1_off_t last_ptr)
169 #define MIN_REC_SIZE (sizeof(struct tdb1_record) + sizeof(tdb1_off_t) + 8)
171 if (rec->rec_len < length + MIN_REC_SIZE) {
172 /* we have to grab the whole record */
174 /* unlink it from the previous record */
175 if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1) {
179 /* mark it not free */
180 rec->magic = TDB1_MAGIC;
181 if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
187 /* we're going to just shorten the existing record */
188 rec->rec_len -= (length + sizeof(*rec));
189 if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
192 if (update_tailer(tdb, rec_ptr, rec) == -1) {
196 /* and setup the new record */
197 rec_ptr += sizeof(*rec) + rec->rec_len;
199 memset(rec, '\0', sizeof(*rec));
200 rec->rec_len = length;
201 rec->magic = TDB1_MAGIC;
203 if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
207 if (update_tailer(tdb, rec_ptr, rec) == -1) {
214 /* allocate some space from the free list. The offset returned points
215 to a unconnected tdb1_record within the database with room for at
216 least length bytes of total data
218 0 is returned if the space could not be allocated
220 tdb1_off_t tdb1_allocate(struct tdb1_context *tdb, tdb1_len_t length, struct tdb1_record *rec)
222 tdb1_off_t rec_ptr, last_ptr, newrec_ptr;
224 tdb1_off_t rec_ptr, last_ptr;
227 float multiplier = 1.0;
229 if (tdb1_lock(tdb, -1, F_WRLCK) == -1)
232 /* over-allocate to reduce fragmentation */
235 /* Extra bytes required for tailer */
236 length += sizeof(tdb1_off_t);
237 length = TDB1_ALIGN(length, TDB1_ALIGNMENT);
240 last_ptr = TDB1_FREELIST_TOP;
242 /* read in the freelist top */
243 if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec_ptr) == -1)
247 bestfit.last_ptr = 0;
251 this is a best fit allocation strategy. Originally we used
252 a first fit strategy, but it suffered from massive fragmentation
253 issues when faced with a slowly increasing record size.
256 if (tdb1_rec_free_read(tdb, rec_ptr, rec) == -1) {
260 if (rec->rec_len >= length) {
261 if (bestfit.rec_ptr == 0 ||
262 rec->rec_len < bestfit.rec_len) {
263 bestfit.rec_len = rec->rec_len;
264 bestfit.rec_ptr = rec_ptr;
265 bestfit.last_ptr = last_ptr;
269 /* move to the next record */
273 /* if we've found a record that is big enough, then
274 stop searching if its also not too big. The
275 definition of 'too big' changes as we scan
277 if (bestfit.rec_len > 0 &&
278 bestfit.rec_len < length * multiplier) {
282 /* this multiplier means we only extremely rarely
283 search more than 50 or so records. At 50 records we
284 accept records up to 11 times larger than what we
289 if (bestfit.rec_ptr != 0) {
290 if (tdb1_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
294 newrec_ptr = tdb1_allocate_ofs(tdb, length, bestfit.rec_ptr,
295 rec, bestfit.last_ptr);
296 tdb1_unlock(tdb, -1, F_WRLCK);
300 /* we didn't find enough space. See if we can expand the
301 database and if we can then try again */
302 if (tdb1_expand(tdb, length + sizeof(*rec)) == 0)
305 tdb1_unlock(tdb, -1, F_WRLCK);