2 Trivial Database 2: fetch, store and misc routines.
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include <ccan/asprintf/asprintf.h>
22 static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
26 struct tdb_used_record *rec,
29 uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
32 ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
33 keylen + dataroom, h);
34 if (ecode == TDB_SUCCESS) {
35 ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
40 static enum TDB_ERROR replace_data(struct tdb_context *tdb,
42 struct tdb_data key, struct tdb_data dbuf,
43 tdb_off_t old_off, tdb_len_t old_room,
49 /* Allocate a new record. */
50 new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
52 if (TDB_OFF_IS_ERR(new_off)) {
53 return TDB_OFF_TO_ERR(new_off);
56 /* We didn't like the existing one: remove it. */
59 ecode = add_free_record(tdb, old_off,
60 sizeof(struct tdb_used_record)
61 + key.dsize + old_room,
63 if (ecode == TDB_SUCCESS)
64 ecode = replace_in_hash(tdb, h, new_off);
66 ecode = add_to_hash(tdb, h, new_off);
68 if (ecode != TDB_SUCCESS) {
72 new_off += sizeof(struct tdb_used_record);
73 ecode = tdb->tdb2.io->twrite(tdb, new_off, key.dptr, key.dsize);
74 if (ecode != TDB_SUCCESS) {
79 ecode = tdb->tdb2.io->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
80 if (ecode != TDB_SUCCESS) {
84 if (tdb->flags & TDB_SEQNUM)
90 static enum TDB_ERROR update_data(struct tdb_context *tdb,
97 ecode = tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
98 if (ecode == TDB_SUCCESS && extra) {
99 /* Put a zero in; future versions may append other data. */
100 ecode = tdb->tdb2.io->twrite(tdb, off + dbuf.dsize, "", 1);
102 if (tdb->flags & TDB_SEQNUM)
108 enum TDB_ERROR tdb_store(struct tdb_context *tdb,
109 struct tdb_data key, struct tdb_data dbuf, int flag)
113 tdb_len_t old_room = 0;
114 struct tdb_used_record rec;
115 enum TDB_ERROR ecode;
117 if (tdb->flags & TDB_VERSION1) {
118 if (tdb1_store(tdb, key, dbuf, flag) == -1)
119 return tdb->last_error;
123 off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
124 if (TDB_OFF_IS_ERR(off)) {
125 return tdb->last_error = TDB_OFF_TO_ERR(off);
128 /* Now we have lock on this hash bucket. */
129 if (flag == TDB_INSERT) {
131 ecode = TDB_ERR_EXISTS;
136 old_room = rec_data_length(&rec)
137 + rec_extra_padding(&rec);
138 if (old_room >= dbuf.dsize) {
139 /* Can modify in-place. Easy! */
140 ecode = update_rec_hdr(tdb, off,
141 key.dsize, dbuf.dsize,
143 if (ecode != TDB_SUCCESS) {
146 ecode = update_data(tdb,
149 old_room - dbuf.dsize);
150 if (ecode != TDB_SUCCESS) {
153 tdb_unlock_hashes(tdb, h.hlock_start,
154 h.hlock_range, F_WRLCK);
155 return tdb->last_error = TDB_SUCCESS;
158 if (flag == TDB_MODIFY) {
159 /* if the record doesn't exist and we
160 are in TDB_MODIFY mode then we should fail
162 ecode = TDB_ERR_NOEXIST;
168 /* If we didn't use the old record, this implies we're growing. */
169 ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
171 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
172 return tdb->last_error = ecode;
175 enum TDB_ERROR tdb_append(struct tdb_context *tdb,
176 struct tdb_data key, struct tdb_data dbuf)
180 struct tdb_used_record rec;
181 tdb_len_t old_room = 0, old_dlen;
182 unsigned char *newdata;
183 struct tdb_data new_dbuf;
184 enum TDB_ERROR ecode;
186 if (tdb->flags & TDB_VERSION1) {
187 if (tdb1_append(tdb, key, dbuf) == -1)
188 return tdb->last_error;
192 off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
193 if (TDB_OFF_IS_ERR(off)) {
194 return tdb->last_error = TDB_OFF_TO_ERR(off);
198 old_dlen = rec_data_length(&rec);
199 old_room = old_dlen + rec_extra_padding(&rec);
201 /* Fast path: can append in place. */
202 if (rec_extra_padding(&rec) >= dbuf.dsize) {
203 ecode = update_rec_hdr(tdb, off, key.dsize,
204 old_dlen + dbuf.dsize, &rec,
206 if (ecode != TDB_SUCCESS) {
210 off += sizeof(rec) + key.dsize + old_dlen;
211 ecode = update_data(tdb, off, dbuf,
212 rec_extra_padding(&rec));
217 newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
219 ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
221 " failed to allocate %zu bytes",
222 (size_t)(key.dsize + old_dlen
226 ecode = tdb->tdb2.io->tread(tdb, off + sizeof(rec) + key.dsize,
228 if (ecode != TDB_SUCCESS) {
229 goto out_free_newdata;
231 memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
232 new_dbuf.dptr = newdata;
233 new_dbuf.dsize = old_dlen + dbuf.dsize;
239 /* If they're using tdb_append(), it implies they're growing record. */
240 ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
245 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
246 return tdb->last_error = ecode;
249 enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
250 struct tdb_data *data)
253 struct tdb_used_record rec;
255 enum TDB_ERROR ecode;
257 if (tdb->flags & TDB_VERSION1)
258 return tdb1_fetch(tdb, key, data);
260 off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
261 if (TDB_OFF_IS_ERR(off)) {
262 return tdb->last_error = TDB_OFF_TO_ERR(off);
266 ecode = TDB_ERR_NOEXIST;
268 data->dsize = rec_data_length(&rec);
269 data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
271 if (TDB_PTR_IS_ERR(data->dptr)) {
272 ecode = TDB_PTR_ERR(data->dptr);
277 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
278 return tdb->last_error = ecode;
281 bool tdb_exists(struct tdb_context *tdb, TDB_DATA key)
284 struct tdb_used_record rec;
287 if (tdb->flags & TDB_VERSION1) {
288 return tdb1_exists(tdb, key);
291 off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
292 if (TDB_OFF_IS_ERR(off)) {
293 tdb->last_error = TDB_OFF_TO_ERR(off);
296 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
298 tdb->last_error = TDB_SUCCESS;
299 return off ? true : false;
302 enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
305 struct tdb_used_record rec;
307 enum TDB_ERROR ecode;
309 if (tdb->flags & TDB_VERSION1) {
310 if (tdb1_delete(tdb, key) == -1)
311 return tdb->last_error;
315 off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
316 if (TDB_OFF_IS_ERR(off)) {
317 return tdb->last_error = TDB_OFF_TO_ERR(off);
321 ecode = TDB_ERR_NOEXIST;
325 ecode = delete_from_hash(tdb, &h);
326 if (ecode != TDB_SUCCESS) {
330 /* Free the deleted entry. */
332 ecode = add_free_record(tdb, off,
333 sizeof(struct tdb_used_record)
334 + rec_key_length(&rec)
335 + rec_data_length(&rec)
336 + rec_extra_padding(&rec),
337 TDB_LOCK_WAIT, true);
339 if (tdb->flags & TDB_SEQNUM)
343 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
344 return tdb->last_error = ecode;
347 unsigned int tdb_get_flags(struct tdb_context *tdb)
352 static bool inside_transaction(const struct tdb_context *tdb)
354 if (tdb->flags & TDB_VERSION1)
355 return tdb->tdb1.transaction != NULL;
357 return tdb->tdb2.transaction != NULL;
360 static bool readonly_changable(struct tdb_context *tdb, const char *caller)
362 if (inside_transaction(tdb)) {
363 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
366 " TDB_RDONLY inside transaction",
373 void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
375 if (tdb->flags & TDB_INTERNAL) {
376 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
378 "tdb_add_flag: internal db");
383 tdb->flags |= TDB_NOLOCK;
386 tdb->flags |= TDB_NOMMAP;
387 tdb_munmap(tdb->file);
390 tdb->flags |= TDB_NOSYNC;
393 tdb->flags |= TDB_SEQNUM;
395 case TDB_ALLOW_NESTING:
396 tdb->flags |= TDB_ALLOW_NESTING;
399 if (readonly_changable(tdb, "tdb_add_flag"))
400 tdb->flags |= TDB_RDONLY;
403 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
405 "tdb_add_flag: Unknown flag %u",
410 void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
412 if (tdb->flags & TDB_INTERNAL) {
413 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
415 "tdb_remove_flag: internal db");
420 tdb->flags &= ~TDB_NOLOCK;
423 tdb->flags &= ~TDB_NOMMAP;
427 tdb->flags &= ~TDB_NOSYNC;
430 tdb->flags &= ~TDB_SEQNUM;
432 case TDB_ALLOW_NESTING:
433 tdb->flags &= ~TDB_ALLOW_NESTING;
436 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) {
437 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
439 "tdb_remove_flag: can't"
440 " remove TDB_RDONLY on tdb"
441 " opened with O_RDONLY");
444 if (readonly_changable(tdb, "tdb_remove_flag"))
445 tdb->flags &= ~TDB_RDONLY;
448 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
450 "tdb_remove_flag: Unknown flag %u",
455 const char *tdb_errorstr(enum TDB_ERROR ecode)
457 /* Gcc warns if you miss a case in the switch, so use that. */
458 switch (TDB_ERR_TO_OFF(ecode)) {
459 case TDB_ERR_TO_OFF(TDB_SUCCESS): return "Success";
460 case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT): return "Corrupt database";
461 case TDB_ERR_TO_OFF(TDB_ERR_IO): return "IO Error";
462 case TDB_ERR_TO_OFF(TDB_ERR_LOCK): return "Locking error";
463 case TDB_ERR_TO_OFF(TDB_ERR_OOM): return "Out of memory";
464 case TDB_ERR_TO_OFF(TDB_ERR_EXISTS): return "Record exists";
465 case TDB_ERR_TO_OFF(TDB_ERR_EINVAL): return "Invalid parameter";
466 case TDB_ERR_TO_OFF(TDB_ERR_NOEXIST): return "Record does not exist";
467 case TDB_ERR_TO_OFF(TDB_ERR_RDONLY): return "write not permitted";
469 return "Invalid error code";
472 enum TDB_ERROR tdb_error(struct tdb_context *tdb)
474 return tdb->last_error;
477 enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
478 enum TDB_ERROR ecode,
479 enum tdb_log_level level,
480 const char *fmt, ...)
485 /* tdb_open paths care about errno, so save it. */
486 int saved_errno = errno;
492 len = vasprintf(&message, fmt, ap);
496 tdb->log_fn(tdb, TDB_LOG_ERROR, TDB_ERR_OOM,
497 "out of memory formatting message:", tdb->log_data);
498 tdb->log_fn(tdb, level, ecode, fmt, tdb->log_data);
500 tdb->log_fn(tdb, level, ecode, message, tdb->log_data);
507 enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
509 enum TDB_ERROR (*parse)(TDB_DATA k,
515 struct tdb_used_record rec;
517 enum TDB_ERROR ecode;
519 if (tdb->flags & TDB_VERSION1) {
520 return tdb->last_error = tdb1_parse_record(tdb, key, parse,
524 off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
525 if (TDB_OFF_IS_ERR(off)) {
526 return tdb->last_error = TDB_OFF_TO_ERR(off);
530 ecode = TDB_ERR_NOEXIST;
533 dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize,
534 rec_data_length(&rec), false);
535 if (TDB_PTR_IS_ERR(dptr)) {
536 ecode = TDB_PTR_ERR(dptr);
538 TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec));
540 ecode = parse(key, d, data);
541 tdb_access_release(tdb, dptr);
545 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
546 return tdb->last_error = ecode;
549 const char *tdb_name(const struct tdb_context *tdb)
554 int64_t tdb_get_seqnum(struct tdb_context *tdb)
558 if (tdb->flags & TDB_VERSION1) {
560 tdb->last_error = TDB_SUCCESS;
561 val = tdb1_get_seqnum(tdb);
563 if (tdb->last_error != TDB_SUCCESS)
564 return TDB_ERR_TO_OFF(tdb->last_error);
569 off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
570 if (TDB_OFF_IS_ERR(off))
571 tdb->last_error = TDB_OFF_TO_ERR(off);
573 tdb->last_error = TDB_SUCCESS;
578 int tdb_fd(const struct tdb_context *tdb)
580 return tdb->file->fd;
583 struct traverse_state {
584 enum TDB_ERROR error;
585 struct tdb_context *dest_db;
589 traverse function for repacking
591 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
592 struct traverse_state *state)
594 state->error = tdb_store(state->dest_db, key, data, TDB_INSERT);
595 if (state->error != TDB_SUCCESS) {
601 enum TDB_ERROR tdb_repack(struct tdb_context *tdb)
603 struct tdb_context *tmp_db;
604 struct traverse_state state;
606 state.error = tdb_transaction_start(tdb);
607 if (state.error != TDB_SUCCESS) {
611 tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
612 if (tmp_db == NULL) {
613 state.error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
615 " Failed to create tmp_db");
616 tdb_transaction_cancel(tdb);
617 return tdb->last_error = state.error;
620 state.dest_db = tmp_db;
621 if (tdb_traverse(tdb, repack_traverse, &state) < 0) {
625 state.error = tdb_wipe_all(tdb);
626 if (state.error != TDB_SUCCESS) {
631 if (tdb_traverse(tmp_db, repack_traverse, &state) < 0) {
636 return tdb_transaction_commit(tdb);
639 tdb_transaction_cancel(tdb);