2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb1_private.h"
30 /* read a freelist record and check for simple errors */
31 int tdb1_rec_free_read(struct tdb_context *tdb, tdb1_off_t off, struct tdb1_record *rec)
33 if (tdb->tdb1.io->tdb1_read(tdb, off, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
36 if (rec->magic == TDB1_MAGIC) {
37 /* this happens when a app is showdown while deleting a record - we should
38 not completely fail when this happens */
39 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_WARNING,
40 "tdb1_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
42 rec->magic = TDB1_FREE_MAGIC;
43 if (tdb->tdb1.io->tdb1_write(tdb, off, rec, sizeof(*rec)) == -1)
47 if (rec->magic != TDB1_FREE_MAGIC) {
48 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
49 "tdb1_rec_free_read bad magic 0x%x at offset=%d\n",
53 if (tdb->tdb1.io->tdb1_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
59 /* update a record tailer (must hold allocation lock) */
60 static int update_tailer(struct tdb_context *tdb, tdb1_off_t offset,
61 const struct tdb1_record *rec)
65 /* Offset of tailer from record header */
66 totalsize = sizeof(*rec) + rec->rec_len;
67 return tdb1_ofs_write(tdb, offset + totalsize - sizeof(tdb1_off_t),
71 /* Add an element into the freelist. Merge adjacent records if
73 int tdb1_free(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
75 /* Allocation and tailer lock */
76 if (tdb1_lock(tdb, -1, F_WRLCK) != 0)
79 /* set an initial tailer, so if we fail we don't leave a bogus record */
80 if (update_tailer(tdb, offset, rec) != 0) {
81 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
82 "tdb_free: update_tailer failed!\n");
87 if (offset - sizeof(tdb1_off_t) > TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
88 tdb1_off_t left = offset - sizeof(tdb1_off_t);
92 /* Read in tailer and jump back to header */
93 if (tdb1_ofs_read(tdb, left, &leftsize) == -1) {
94 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
95 "tdb1_free: left offset read failed at %u", left);
99 /* it could be uninitialised data */
100 if (leftsize == 0 || leftsize == TDB1_PAD_U32) {
104 left = offset - leftsize;
106 if (leftsize > offset ||
107 left < TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
111 /* Now read in the left record */
112 if (tdb->tdb1.io->tdb1_read(tdb, left, &l, sizeof(l), TDB1_DOCONV()) == -1) {
113 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
114 "tdb1_free: left read failed at %u (%u)", left, leftsize);
118 /* If it's free, expand to include it. */
119 if (l.magic == TDB1_FREE_MAGIC) {
120 /* we now merge the new record into the left record, rather than the other
121 way around. This makes the operation O(1) instead of O(n). This change
122 prevents traverse from being O(n^2) after a lot of deletes */
123 l.rec_len += sizeof(*rec) + rec->rec_len;
124 if (tdb1_rec_write(tdb, left, &l) == -1) {
125 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
126 "tdb1_free: update_left failed at %u", left);
129 if (update_tailer(tdb, left, &l) == -1) {
130 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
131 "tdb1_free: update_tailer failed at %u", offset);
134 tdb1_unlock(tdb, -1, F_WRLCK);
141 /* Now, prepend to free list */
142 rec->magic = TDB1_FREE_MAGIC;
144 if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec->next) == -1 ||
145 tdb1_rec_write(tdb, offset, rec) == -1 ||
146 tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
147 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
148 "tdb1_free record write failed at offset=%d",
153 /* And we're done. */
154 tdb1_unlock(tdb, -1, F_WRLCK);
158 tdb1_unlock(tdb, -1, F_WRLCK);
165 the core of tdb1_allocate - called when we have decided which
166 free list entry to use
168 Note that we try to allocate by grabbing data from the end of an existing record,
169 not the beginning. This is so the left merge in a free is more likely to be
170 able to free up the record without fragmentation
172 static tdb1_off_t tdb1_allocate_ofs(struct tdb_context *tdb,
173 tdb1_len_t length, tdb1_off_t rec_ptr,
174 struct tdb1_record *rec, tdb1_off_t last_ptr)
176 #define MIN_REC_SIZE (sizeof(struct tdb1_record) + sizeof(tdb1_off_t) + 8)
178 if (rec->rec_len < length + MIN_REC_SIZE) {
179 /* we have to grab the whole record */
181 /* unlink it from the previous record */
182 if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1) {
186 /* mark it not free */
187 rec->magic = TDB1_MAGIC;
188 if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
194 /* we're going to just shorten the existing record */
195 rec->rec_len -= (length + sizeof(*rec));
196 if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
199 if (update_tailer(tdb, rec_ptr, rec) == -1) {
203 /* and setup the new record */
204 rec_ptr += sizeof(*rec) + rec->rec_len;
206 memset(rec, '\0', sizeof(*rec));
207 rec->rec_len = length;
208 rec->magic = TDB1_MAGIC;
210 if (tdb1_rec_write(tdb, rec_ptr, rec) == -1) {
214 if (update_tailer(tdb, rec_ptr, rec) == -1) {
221 /* allocate some space from the free list. The offset returned points
222 to a unconnected tdb1_record within the database with room for at
223 least length bytes of total data
225 0 is returned if the space could not be allocated
227 tdb1_off_t tdb1_allocate(struct tdb_context *tdb, tdb1_len_t length, struct tdb1_record *rec)
229 tdb1_off_t rec_ptr, last_ptr, newrec_ptr;
231 tdb1_off_t rec_ptr, last_ptr;
234 float multiplier = 1.0;
236 if (tdb1_lock(tdb, -1, F_WRLCK) == -1)
239 /* over-allocate to reduce fragmentation */
242 /* Extra bytes required for tailer */
243 length += sizeof(tdb1_off_t);
244 length = TDB1_ALIGN(length, TDB1_ALIGNMENT);
247 last_ptr = TDB1_FREELIST_TOP;
249 /* read in the freelist top */
250 if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec_ptr) == -1)
254 bestfit.last_ptr = 0;
258 this is a best fit allocation strategy. Originally we used
259 a first fit strategy, but it suffered from massive fragmentation
260 issues when faced with a slowly increasing record size.
263 if (tdb1_rec_free_read(tdb, rec_ptr, rec) == -1) {
267 if (rec->rec_len >= length) {
268 if (bestfit.rec_ptr == 0 ||
269 rec->rec_len < bestfit.rec_len) {
270 bestfit.rec_len = rec->rec_len;
271 bestfit.rec_ptr = rec_ptr;
272 bestfit.last_ptr = last_ptr;
276 /* move to the next record */
280 /* if we've found a record that is big enough, then
281 stop searching if its also not too big. The
282 definition of 'too big' changes as we scan
284 if (bestfit.rec_len > 0 &&
285 bestfit.rec_len < length * multiplier) {
289 /* this multiplier means we only extremely rarely
290 search more than 50 or so records. At 50 records we
291 accept records up to 11 times larger than what we
296 if (bestfit.rec_ptr != 0) {
297 if (tdb1_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
301 newrec_ptr = tdb1_allocate_ofs(tdb, length, bestfit.rec_ptr,
302 rec, bestfit.last_ptr);
303 tdb1_unlock(tdb, -1, F_WRLCK);
307 /* we didn't find enough space. See if we can expand the
308 database and if we can then try again */
309 if (tdb1_expand(tdb, length + sizeof(*rec)) == 0)
312 tdb1_unlock(tdb, -1, F_WRLCK);