]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb.c
tdb2: suppress failtest more than once on mmap.
[ccan] / ccan / tdb2 / tdb.c
1  /*
2    Trivial Database 2: fetch, store and misc routines.
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/asprintf/asprintf.h>
20 #include <stdarg.h>
21
22 static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
23                                      tdb_off_t off,
24                                      tdb_len_t keylen,
25                                      tdb_len_t datalen,
26                                      struct tdb_used_record *rec,
27                                      uint64_t h)
28 {
29         uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
30         enum TDB_ERROR ecode;
31
32         ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
33                            keylen + dataroom, h);
34         if (ecode == TDB_SUCCESS) {
35                 ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
36         }
37         return ecode;
38 }
39
40 static enum TDB_ERROR replace_data(struct tdb_context *tdb,
41                                    struct hash_info *h,
42                                    struct tdb_data key, struct tdb_data dbuf,
43                                    tdb_off_t old_off, tdb_len_t old_room,
44                                    bool growing)
45 {
46         tdb_off_t new_off;
47         enum TDB_ERROR ecode;
48
49         /* Allocate a new record. */
50         new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
51                         growing);
52         if (TDB_OFF_IS_ERR(new_off)) {
53                 return TDB_OFF_TO_ERR(new_off);
54         }
55
56         /* We didn't like the existing one: remove it. */
57         if (old_off) {
58                 tdb->stats.frees++;
59                 ecode = add_free_record(tdb, old_off,
60                                         sizeof(struct tdb_used_record)
61                                         + key.dsize + old_room,
62                                         TDB_LOCK_WAIT, true);
63                 if (ecode == TDB_SUCCESS)
64                         ecode = replace_in_hash(tdb, h, new_off);
65         } else {
66                 ecode = add_to_hash(tdb, h, new_off);
67         }
68         if (ecode != TDB_SUCCESS) {
69                 return ecode;
70         }
71
72         new_off += sizeof(struct tdb_used_record);
73         ecode = tdb->tdb2.io->twrite(tdb, new_off, key.dptr, key.dsize);
74         if (ecode != TDB_SUCCESS) {
75                 return ecode;
76         }
77
78         new_off += key.dsize;
79         ecode = tdb->tdb2.io->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
80         if (ecode != TDB_SUCCESS) {
81                 return ecode;
82         }
83
84         if (tdb->flags & TDB_SEQNUM)
85                 tdb_inc_seqnum(tdb);
86
87         return TDB_SUCCESS;
88 }
89
90 static enum TDB_ERROR update_data(struct tdb_context *tdb,
91                                   tdb_off_t off,
92                                   struct tdb_data dbuf,
93                                   tdb_len_t extra)
94 {
95         enum TDB_ERROR ecode;
96
97         ecode = tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
98         if (ecode == TDB_SUCCESS && extra) {
99                 /* Put a zero in; future versions may append other data. */
100                 ecode = tdb->tdb2.io->twrite(tdb, off + dbuf.dsize, "", 1);
101         }
102         if (tdb->flags & TDB_SEQNUM)
103                 tdb_inc_seqnum(tdb);
104
105         return ecode;
106 }
107
108 enum TDB_ERROR tdb_store(struct tdb_context *tdb,
109                          struct tdb_data key, struct tdb_data dbuf, int flag)
110 {
111         struct hash_info h;
112         tdb_off_t off;
113         tdb_len_t old_room = 0;
114         struct tdb_used_record rec;
115         enum TDB_ERROR ecode;
116
117         if (tdb->flags & TDB_VERSION1) {
118                 if (tdb1_store(tdb, key, dbuf, flag) == -1)
119                         return tdb->last_error;
120                 return TDB_SUCCESS;
121         }
122
123         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
124         if (TDB_OFF_IS_ERR(off)) {
125                 return tdb->last_error = TDB_OFF_TO_ERR(off);
126         }
127
128         /* Now we have lock on this hash bucket. */
129         if (flag == TDB_INSERT) {
130                 if (off) {
131                         ecode = TDB_ERR_EXISTS;
132                         goto out;
133                 }
134         } else {
135                 if (off) {
136                         old_room = rec_data_length(&rec)
137                                 + rec_extra_padding(&rec);
138                         if (old_room >= dbuf.dsize) {
139                                 /* Can modify in-place.  Easy! */
140                                 ecode = update_rec_hdr(tdb, off,
141                                                        key.dsize, dbuf.dsize,
142                                                        &rec, h.h);
143                                 if (ecode != TDB_SUCCESS) {
144                                         goto out;
145                                 }
146                                 ecode = update_data(tdb,
147                                                     off + sizeof(rec)
148                                                     + key.dsize, dbuf,
149                                                     old_room - dbuf.dsize);
150                                 if (ecode != TDB_SUCCESS) {
151                                         goto out;
152                                 }
153                                 tdb_unlock_hashes(tdb, h.hlock_start,
154                                                   h.hlock_range, F_WRLCK);
155                                 return tdb->last_error = TDB_SUCCESS;
156                         }
157                 } else {
158                         if (flag == TDB_MODIFY) {
159                                 /* if the record doesn't exist and we
160                                    are in TDB_MODIFY mode then we should fail
161                                    the store */
162                                 ecode = TDB_ERR_NOEXIST;
163                                 goto out;
164                         }
165                 }
166         }
167
168         /* If we didn't use the old record, this implies we're growing. */
169         ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
170 out:
171         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
172         return tdb->last_error = ecode;
173 }
174
175 enum TDB_ERROR tdb_append(struct tdb_context *tdb,
176                           struct tdb_data key, struct tdb_data dbuf)
177 {
178         struct hash_info h;
179         tdb_off_t off;
180         struct tdb_used_record rec;
181         tdb_len_t old_room = 0, old_dlen;
182         unsigned char *newdata;
183         struct tdb_data new_dbuf;
184         enum TDB_ERROR ecode;
185
186         if (tdb->flags & TDB_VERSION1) {
187                 if (tdb1_append(tdb, key, dbuf) == -1)
188                         return tdb->last_error;
189                 return TDB_SUCCESS;
190         }
191
192         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
193         if (TDB_OFF_IS_ERR(off)) {
194                 return tdb->last_error = TDB_OFF_TO_ERR(off);
195         }
196
197         if (off) {
198                 old_dlen = rec_data_length(&rec);
199                 old_room = old_dlen + rec_extra_padding(&rec);
200
201                 /* Fast path: can append in place. */
202                 if (rec_extra_padding(&rec) >= dbuf.dsize) {
203                         ecode = update_rec_hdr(tdb, off, key.dsize,
204                                                old_dlen + dbuf.dsize, &rec,
205                                                h.h);
206                         if (ecode != TDB_SUCCESS) {
207                                 goto out;
208                         }
209
210                         off += sizeof(rec) + key.dsize + old_dlen;
211                         ecode = update_data(tdb, off, dbuf,
212                                             rec_extra_padding(&rec));
213                         goto out;
214                 }
215
216                 /* Slow path. */
217                 newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
218                 if (!newdata) {
219                         ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
220                                            "tdb_append:"
221                                            " failed to allocate %zu bytes",
222                                            (size_t)(key.dsize + old_dlen
223                                                     + dbuf.dsize));
224                         goto out;
225                 }
226                 ecode = tdb->tdb2.io->tread(tdb, off + sizeof(rec) + key.dsize,
227                                             newdata, old_dlen);
228                 if (ecode != TDB_SUCCESS) {
229                         goto out_free_newdata;
230                 }
231                 memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
232                 new_dbuf.dptr = newdata;
233                 new_dbuf.dsize = old_dlen + dbuf.dsize;
234         } else {
235                 newdata = NULL;
236                 new_dbuf = dbuf;
237         }
238
239         /* If they're using tdb_append(), it implies they're growing record. */
240         ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
241
242 out_free_newdata:
243         free(newdata);
244 out:
245         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
246         return tdb->last_error = ecode;
247 }
248
249 enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
250                          struct tdb_data *data)
251 {
252         tdb_off_t off;
253         struct tdb_used_record rec;
254         struct hash_info h;
255         enum TDB_ERROR ecode;
256
257         if (tdb->flags & TDB_VERSION1)
258                 return tdb1_fetch(tdb, key, data);
259
260         off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
261         if (TDB_OFF_IS_ERR(off)) {
262                 return tdb->last_error = TDB_OFF_TO_ERR(off);
263         }
264
265         if (!off) {
266                 ecode = TDB_ERR_NOEXIST;
267         } else {
268                 data->dsize = rec_data_length(&rec);
269                 data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
270                                             data->dsize);
271                 if (TDB_PTR_IS_ERR(data->dptr)) {
272                         ecode = TDB_PTR_ERR(data->dptr);
273                 } else
274                         ecode = TDB_SUCCESS;
275         }
276
277         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
278         return tdb->last_error = ecode;
279 }
280
281 bool tdb_exists(struct tdb_context *tdb, TDB_DATA key)
282 {
283         tdb_off_t off;
284         struct tdb_used_record rec;
285         struct hash_info h;
286
287         if (tdb->flags & TDB_VERSION1) {
288                 return tdb1_exists(tdb, key);
289         }
290
291         off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
292         if (TDB_OFF_IS_ERR(off)) {
293                 tdb->last_error = TDB_OFF_TO_ERR(off);
294                 return false;
295         }
296         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
297
298         tdb->last_error = TDB_SUCCESS;
299         return off ? true : false;
300 }
301
302 enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
303 {
304         tdb_off_t off;
305         struct tdb_used_record rec;
306         struct hash_info h;
307         enum TDB_ERROR ecode;
308
309         if (tdb->flags & TDB_VERSION1) {
310                 if (tdb1_delete(tdb, key) == -1)
311                         return tdb->last_error;
312                 return TDB_SUCCESS;
313         }
314
315         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
316         if (TDB_OFF_IS_ERR(off)) {
317                 return tdb->last_error = TDB_OFF_TO_ERR(off);
318         }
319
320         if (!off) {
321                 ecode = TDB_ERR_NOEXIST;
322                 goto unlock;
323         }
324
325         ecode = delete_from_hash(tdb, &h);
326         if (ecode != TDB_SUCCESS) {
327                 goto unlock;
328         }
329
330         /* Free the deleted entry. */
331         tdb->stats.frees++;
332         ecode = add_free_record(tdb, off,
333                                 sizeof(struct tdb_used_record)
334                                 + rec_key_length(&rec)
335                                 + rec_data_length(&rec)
336                                 + rec_extra_padding(&rec),
337                                 TDB_LOCK_WAIT, true);
338
339         if (tdb->flags & TDB_SEQNUM)
340                 tdb_inc_seqnum(tdb);
341
342 unlock:
343         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
344         return tdb->last_error = ecode;
345 }
346
347 unsigned int tdb_get_flags(struct tdb_context *tdb)
348 {
349         return tdb->flags;
350 }
351
352 static bool inside_transaction(const struct tdb_context *tdb)
353 {
354         if (tdb->flags & TDB_VERSION1)
355                 return tdb->tdb1.transaction != NULL;
356         else
357                 return tdb->tdb2.transaction != NULL;
358 }
359
360 static bool readonly_changable(struct tdb_context *tdb, const char *caller)
361 {
362         if (inside_transaction(tdb)) {
363                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
364                                              TDB_LOG_USE_ERROR,
365                                              "%s: can't change"
366                                              " TDB_RDONLY inside transaction",
367                                              caller);
368                 return false;
369         }
370         return true;
371 }
372
373 void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
374 {
375         if (tdb->flags & TDB_INTERNAL) {
376                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
377                                              TDB_LOG_USE_ERROR,
378                                              "tdb_add_flag: internal db");
379                 return;
380         }
381         switch (flag) {
382         case TDB_NOLOCK:
383                 tdb->flags |= TDB_NOLOCK;
384                 break;
385         case TDB_NOMMAP:
386                 tdb->flags |= TDB_NOMMAP;
387                 tdb_munmap(tdb->file);
388                 break;
389         case TDB_NOSYNC:
390                 tdb->flags |= TDB_NOSYNC;
391                 break;
392         case TDB_SEQNUM:
393                 tdb->flags |= TDB_SEQNUM;
394                 break;
395         case TDB_ALLOW_NESTING:
396                 tdb->flags |= TDB_ALLOW_NESTING;
397                 break;
398         case TDB_RDONLY:
399                 if (readonly_changable(tdb, "tdb_add_flag"))
400                         tdb->flags |= TDB_RDONLY;
401                 break;
402         default:
403                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
404                                              TDB_LOG_USE_ERROR,
405                                              "tdb_add_flag: Unknown flag %u",
406                                              flag);
407         }
408 }
409
410 void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
411 {
412         if (tdb->flags & TDB_INTERNAL) {
413                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
414                                              TDB_LOG_USE_ERROR,
415                                              "tdb_remove_flag: internal db");
416                 return;
417         }
418         switch (flag) {
419         case TDB_NOLOCK:
420                 tdb->flags &= ~TDB_NOLOCK;
421                 break;
422         case TDB_NOMMAP:
423                 tdb->flags &= ~TDB_NOMMAP;
424                 tdb_mmap(tdb);
425                 break;
426         case TDB_NOSYNC:
427                 tdb->flags &= ~TDB_NOSYNC;
428                 break;
429         case TDB_SEQNUM:
430                 tdb->flags &= ~TDB_SEQNUM;
431                 break;
432         case TDB_ALLOW_NESTING:
433                 tdb->flags &= ~TDB_ALLOW_NESTING;
434                 break;
435         case TDB_RDONLY:
436                 if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) {
437                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
438                                                      TDB_LOG_USE_ERROR,
439                                                      "tdb_remove_flag: can't"
440                                                      " remove TDB_RDONLY on tdb"
441                                                      " opened with O_RDONLY");
442                         break;
443                 }
444                 if (readonly_changable(tdb, "tdb_remove_flag"))
445                         tdb->flags &= ~TDB_RDONLY;
446                 break;
447         default:
448                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
449                                              TDB_LOG_USE_ERROR,
450                                              "tdb_remove_flag: Unknown flag %u",
451                                              flag);
452         }
453 }
454
455 const char *tdb_errorstr(enum TDB_ERROR ecode)
456 {
457         /* Gcc warns if you miss a case in the switch, so use that. */
458         switch (TDB_ERR_TO_OFF(ecode)) {
459         case TDB_ERR_TO_OFF(TDB_SUCCESS): return "Success";
460         case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT): return "Corrupt database";
461         case TDB_ERR_TO_OFF(TDB_ERR_IO): return "IO Error";
462         case TDB_ERR_TO_OFF(TDB_ERR_LOCK): return "Locking error";
463         case TDB_ERR_TO_OFF(TDB_ERR_OOM): return "Out of memory";
464         case TDB_ERR_TO_OFF(TDB_ERR_EXISTS): return "Record exists";
465         case TDB_ERR_TO_OFF(TDB_ERR_EINVAL): return "Invalid parameter";
466         case TDB_ERR_TO_OFF(TDB_ERR_NOEXIST): return "Record does not exist";
467         case TDB_ERR_TO_OFF(TDB_ERR_RDONLY): return "write not permitted";
468         }
469         return "Invalid error code";
470 }
471
472 enum TDB_ERROR tdb_error(struct tdb_context *tdb)
473 {
474         return tdb->last_error;
475 }
476
477 enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
478                                enum TDB_ERROR ecode,
479                                enum tdb_log_level level,
480                                const char *fmt, ...)
481 {
482         char *message;
483         va_list ap;
484         size_t len;
485         /* tdb_open paths care about errno, so save it. */
486         int saved_errno = errno;
487
488         if (!tdb->log_fn)
489                 return ecode;
490
491         va_start(ap, fmt);
492         len = vasprintf(&message, fmt, ap);
493         va_end(ap);
494
495         if (len < 0) {
496                 tdb->log_fn(tdb, TDB_LOG_ERROR, TDB_ERR_OOM,
497                             "out of memory formatting message:", tdb->log_data);
498                 tdb->log_fn(tdb, level, ecode, fmt, tdb->log_data);
499         } else {
500                 tdb->log_fn(tdb, level, ecode, message, tdb->log_data);
501                 free(message);
502         }
503         errno = saved_errno;
504         return ecode;
505 }
506
507 enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
508                                  TDB_DATA key,
509                                  enum TDB_ERROR (*parse)(TDB_DATA k,
510                                                          TDB_DATA d,
511                                                          void *data),
512                                  void *data)
513 {
514         tdb_off_t off;
515         struct tdb_used_record rec;
516         struct hash_info h;
517         enum TDB_ERROR ecode;
518
519         if (tdb->flags & TDB_VERSION1) {
520                 return tdb->last_error = tdb1_parse_record(tdb, key, parse,
521                                                            data);
522         }
523
524         off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
525         if (TDB_OFF_IS_ERR(off)) {
526                 return tdb->last_error = TDB_OFF_TO_ERR(off);
527         }
528
529         if (!off) {
530                 ecode = TDB_ERR_NOEXIST;
531         } else {
532                 const void *dptr;
533                 dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize,
534                                        rec_data_length(&rec), false);
535                 if (TDB_PTR_IS_ERR(dptr)) {
536                         ecode = TDB_PTR_ERR(dptr);
537                 } else {
538                         TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec));
539
540                         ecode = parse(key, d, data);
541                         tdb_access_release(tdb, dptr);
542                 }
543         }
544
545         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
546         return tdb->last_error = ecode;
547 }
548
549 const char *tdb_name(const struct tdb_context *tdb)
550 {
551         return tdb->name;
552 }
553
554 int64_t tdb_get_seqnum(struct tdb_context *tdb)
555 {
556         tdb_off_t off;
557
558         if (tdb->flags & TDB_VERSION1) {
559                 tdb1_off_t val;
560                 tdb->last_error = TDB_SUCCESS;
561                 val = tdb1_get_seqnum(tdb);
562
563                 if (tdb->last_error != TDB_SUCCESS)
564                         return TDB_ERR_TO_OFF(tdb->last_error);
565                 else
566                         return val;
567         }
568
569         off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
570         if (TDB_OFF_IS_ERR(off))
571                 tdb->last_error = TDB_OFF_TO_ERR(off);
572         else
573                 tdb->last_error = TDB_SUCCESS;
574         return off;
575 }
576         
577
578 int tdb_fd(const struct tdb_context *tdb)
579 {
580         return tdb->file->fd;
581 }
582
583 struct traverse_state {
584         enum TDB_ERROR error;
585         struct tdb_context *dest_db;
586 };
587
588 /*
589   traverse function for repacking
590  */
591 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
592                            struct traverse_state *state)
593 {
594         state->error = tdb_store(state->dest_db, key, data, TDB_INSERT);
595         if (state->error != TDB_SUCCESS) {
596                 return -1;
597         }
598         return 0;
599 }
600
601 enum TDB_ERROR tdb_repack(struct tdb_context *tdb)
602 {
603         struct tdb_context *tmp_db;
604         struct traverse_state state;
605
606         state.error = tdb_transaction_start(tdb);
607         if (state.error != TDB_SUCCESS) {
608                 return state.error;
609         }
610
611         tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
612         if (tmp_db == NULL) {
613                 state.error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
614                                          __location__
615                                          " Failed to create tmp_db");
616                 tdb_transaction_cancel(tdb);
617                 return tdb->last_error = state.error;
618         }
619
620         state.dest_db = tmp_db;
621         if (tdb_traverse(tdb, repack_traverse, &state) < 0) {
622                 goto fail;
623         }
624
625         state.error = tdb_wipe_all(tdb);
626         if (state.error != TDB_SUCCESS) {
627                 goto fail;
628         }
629
630         state.dest_db = tdb;
631         if (tdb_traverse(tmp_db, repack_traverse, &state) < 0) {
632                 goto fail;
633         }
634
635         tdb_close(tmp_db);
636         return tdb_transaction_commit(tdb);
637
638 fail:
639         tdb_transaction_cancel(tdb);
640         tdb_close(tmp_db);
641         return state.error;
642 }