]> git.ozlabs.org Git - ccan/blob - ccan/ntdb/ntdb.c
ntdb: assume HAVE_CCAN in header.
[ccan] / ccan / ntdb / ntdb.c
1  /*
2    Trivial Database 2: fetch, store and misc routines.
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #ifndef HAVE_LIBREPLACE
20 #include <stdarg.h>
21 #endif
22
23 static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
24                                      ntdb_off_t off,
25                                      ntdb_len_t keylen,
26                                      ntdb_len_t datalen,
27                                      struct ntdb_used_record *rec)
28 {
29         uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
30         enum NTDB_ERROR ecode;
31
32         ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
33                            keylen + dataroom);
34         if (ecode == NTDB_SUCCESS) {
35                 ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
36         }
37         return ecode;
38 }
39
40 static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
41                                    struct hash_info *h,
42                                    NTDB_DATA key, NTDB_DATA dbuf,
43                                    ntdb_off_t old_off, ntdb_len_t old_room,
44                                    bool growing)
45 {
46         ntdb_off_t new_off;
47         enum NTDB_ERROR ecode;
48
49         /* Allocate a new record. */
50         new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing);
51         if (NTDB_OFF_IS_ERR(new_off)) {
52                 return NTDB_OFF_TO_ERR(new_off);
53         }
54
55         /* We didn't like the existing one: remove it. */
56         if (old_off) {
57                 ntdb->stats.frees++;
58                 ecode = add_free_record(ntdb, old_off,
59                                         sizeof(struct ntdb_used_record)
60                                         + key.dsize + old_room,
61                                         NTDB_LOCK_WAIT, true);
62                 if (ecode == NTDB_SUCCESS)
63                         ecode = replace_in_hash(ntdb, h, new_off);
64         } else {
65                 ecode = add_to_hash(ntdb, h, new_off);
66         }
67         if (ecode != NTDB_SUCCESS) {
68                 return ecode;
69         }
70
71         new_off += sizeof(struct ntdb_used_record);
72         ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize);
73         if (ecode != NTDB_SUCCESS) {
74                 return ecode;
75         }
76
77         new_off += key.dsize;
78         ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize);
79         if (ecode != NTDB_SUCCESS) {
80                 return ecode;
81         }
82
83         if (ntdb->flags & NTDB_SEQNUM)
84                 ntdb_inc_seqnum(ntdb);
85
86         return NTDB_SUCCESS;
87 }
88
89 static enum NTDB_ERROR update_data(struct ntdb_context *ntdb,
90                                   ntdb_off_t off,
91                                   NTDB_DATA dbuf,
92                                   ntdb_len_t extra)
93 {
94         enum NTDB_ERROR ecode;
95
96         ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize);
97         if (ecode == NTDB_SUCCESS && extra) {
98                 /* Put a zero in; future versions may append other data. */
99                 ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1);
100         }
101         if (ntdb->flags & NTDB_SEQNUM)
102                 ntdb_inc_seqnum(ntdb);
103
104         return ecode;
105 }
106
107 _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
108                          NTDB_DATA key, NTDB_DATA dbuf, int flag)
109 {
110         struct hash_info h;
111         ntdb_off_t off;
112         ntdb_len_t old_room = 0;
113         struct ntdb_used_record rec;
114         enum NTDB_ERROR ecode;
115
116         off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
117         if (NTDB_OFF_IS_ERR(off)) {
118                 return NTDB_OFF_TO_ERR(off);
119         }
120
121         /* Now we have lock on this hash bucket. */
122         if (flag == NTDB_INSERT) {
123                 if (off) {
124                         ecode = NTDB_ERR_EXISTS;
125                         goto out;
126                 }
127         } else {
128                 if (off) {
129                         old_room = rec_data_length(&rec)
130                                 + rec_extra_padding(&rec);
131                         if (old_room >= dbuf.dsize) {
132                                 /* Can modify in-place.  Easy! */
133                                 ecode = update_rec_hdr(ntdb, off,
134                                                        key.dsize, dbuf.dsize,
135                                                        &rec);
136                                 if (ecode != NTDB_SUCCESS) {
137                                         goto out;
138                                 }
139                                 ecode = update_data(ntdb,
140                                                     off + sizeof(rec)
141                                                     + key.dsize, dbuf,
142                                                     old_room - dbuf.dsize);
143                                 if (ecode != NTDB_SUCCESS) {
144                                         goto out;
145                                 }
146                                 ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
147                                 return NTDB_SUCCESS;
148                         }
149                 } else {
150                         if (flag == NTDB_MODIFY) {
151                                 /* if the record doesn't exist and we
152                                    are in NTDB_MODIFY mode then we should fail
153                                    the store */
154                                 ecode = NTDB_ERR_NOEXIST;
155                                 goto out;
156                         }
157                 }
158         }
159
160         /* If we didn't use the old record, this implies we're growing. */
161         ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
162 out:
163         ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
164         return ecode;
165 }
166
167 _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
168                           NTDB_DATA key, NTDB_DATA dbuf)
169 {
170         struct hash_info h;
171         ntdb_off_t off;
172         struct ntdb_used_record rec;
173         ntdb_len_t old_room = 0, old_dlen;
174         unsigned char *newdata;
175         NTDB_DATA new_dbuf;
176         enum NTDB_ERROR ecode;
177
178         off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
179         if (NTDB_OFF_IS_ERR(off)) {
180                 return NTDB_OFF_TO_ERR(off);
181         }
182
183         if (off) {
184                 old_dlen = rec_data_length(&rec);
185                 old_room = old_dlen + rec_extra_padding(&rec);
186
187                 /* Fast path: can append in place. */
188                 if (rec_extra_padding(&rec) >= dbuf.dsize) {
189                         ecode = update_rec_hdr(ntdb, off, key.dsize,
190                                                old_dlen + dbuf.dsize, &rec);
191                         if (ecode != NTDB_SUCCESS) {
192                                 goto out;
193                         }
194
195                         off += sizeof(rec) + key.dsize + old_dlen;
196                         ecode = update_data(ntdb, off, dbuf,
197                                             rec_extra_padding(&rec));
198                         goto out;
199                 }
200
201                 /* Slow path. */
202                 newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize,
203                                      ntdb->alloc_data);
204                 if (!newdata) {
205                         ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
206                                            "ntdb_append:"
207                                            " failed to allocate %zu bytes",
208                                            (size_t)(key.dsize + old_dlen
209                                                     + dbuf.dsize));
210                         goto out;
211                 }
212                 ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize,
213                                        newdata, old_dlen);
214                 if (ecode != NTDB_SUCCESS) {
215                         goto out_free_newdata;
216                 }
217                 memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
218                 new_dbuf.dptr = newdata;
219                 new_dbuf.dsize = old_dlen + dbuf.dsize;
220         } else {
221                 newdata = NULL;
222                 new_dbuf = dbuf;
223         }
224
225         /* If they're using ntdb_append(), it implies they're growing record. */
226         ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true);
227
228 out_free_newdata:
229         ntdb->free_fn(newdata, ntdb->alloc_data);
230 out:
231         ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
232         return ecode;
233 }
234
235 _PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
236                                     NTDB_DATA *data)
237 {
238         ntdb_off_t off;
239         struct ntdb_used_record rec;
240         struct hash_info h;
241         enum NTDB_ERROR ecode;
242         const char *keyp;
243
244         off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
245         if (NTDB_OFF_IS_ERR(off)) {
246                 return NTDB_OFF_TO_ERR(off);
247         }
248
249         if (!off) {
250                 ecode = NTDB_ERR_NOEXIST;
251         } else {
252                 data->dsize = rec_data_length(&rec);
253                 data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data);
254                 if (unlikely(!data->dptr)) {
255                         ecode = NTDB_ERR_OOM;
256                 } else {
257                         memcpy(data->dptr, keyp + key.dsize, data->dsize);
258                         ecode = NTDB_SUCCESS;
259                 }
260                 ntdb_access_release(ntdb, keyp);
261         }
262
263         ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
264         return ecode;
265 }
266
267 _PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
268 {
269         ntdb_off_t off;
270         struct ntdb_used_record rec;
271         struct hash_info h;
272
273         off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
274         if (NTDB_OFF_IS_ERR(off)) {
275                 return false;
276         }
277         ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
278
279         return off ? true : false;
280 }
281
282 _PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
283 {
284         ntdb_off_t off;
285         struct ntdb_used_record rec;
286         struct hash_info h;
287         enum NTDB_ERROR ecode;
288
289         off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
290         if (NTDB_OFF_IS_ERR(off)) {
291                 return NTDB_OFF_TO_ERR(off);
292         }
293
294         if (!off) {
295                 ecode = NTDB_ERR_NOEXIST;
296                 goto unlock;
297         }
298
299         ecode = delete_from_hash(ntdb, &h);
300         if (ecode != NTDB_SUCCESS) {
301                 goto unlock;
302         }
303
304         /* Free the deleted entry. */
305         ntdb->stats.frees++;
306         ecode = add_free_record(ntdb, off,
307                                 sizeof(struct ntdb_used_record)
308                                 + rec_key_length(&rec)
309                                 + rec_data_length(&rec)
310                                 + rec_extra_padding(&rec),
311                                 NTDB_LOCK_WAIT, true);
312
313         if (ntdb->flags & NTDB_SEQNUM)
314                 ntdb_inc_seqnum(ntdb);
315
316 unlock:
317         ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
318         return ecode;
319 }
320
321 _PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb)
322 {
323         return ntdb->flags;
324 }
325
326 static bool inside_transaction(const struct ntdb_context *ntdb)
327 {
328         return ntdb->transaction != NULL;
329 }
330
331 static bool readonly_changable(struct ntdb_context *ntdb, const char *caller)
332 {
333         if (inside_transaction(ntdb)) {
334                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
335                             "%s: can't change"
336                             " NTDB_RDONLY inside transaction",
337                             caller);
338                 return false;
339         }
340         return true;
341 }
342
343 _PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag)
344 {
345         if (ntdb->flags & NTDB_INTERNAL) {
346                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
347                             "ntdb_add_flag: internal db");
348                 return;
349         }
350         switch (flag) {
351         case NTDB_NOLOCK:
352                 ntdb->flags |= NTDB_NOLOCK;
353                 break;
354         case NTDB_NOMMAP:
355                 if (ntdb->file->direct_count) {
356                         ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
357                                     "ntdb_add_flag: Can't get NTDB_NOMMAP from"
358                                     " ntdb_parse_record!");
359                         return;
360                 }
361                 ntdb->flags |= NTDB_NOMMAP;
362 #ifndef HAVE_INCOHERENT_MMAP
363                 ntdb_munmap(ntdb);
364 #endif
365                 break;
366         case NTDB_NOSYNC:
367                 ntdb->flags |= NTDB_NOSYNC;
368                 break;
369         case NTDB_SEQNUM:
370                 ntdb->flags |= NTDB_SEQNUM;
371                 break;
372         case NTDB_ALLOW_NESTING:
373                 ntdb->flags |= NTDB_ALLOW_NESTING;
374                 break;
375         case NTDB_RDONLY:
376                 if (readonly_changable(ntdb, "ntdb_add_flag"))
377                         ntdb->flags |= NTDB_RDONLY;
378                 break;
379         default:
380                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
381                             "ntdb_add_flag: Unknown flag %u", flag);
382         }
383 }
384
385 _PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag)
386 {
387         if (ntdb->flags & NTDB_INTERNAL) {
388                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
389                             "ntdb_remove_flag: internal db");
390                 return;
391         }
392         switch (flag) {
393         case NTDB_NOLOCK:
394                 ntdb->flags &= ~NTDB_NOLOCK;
395                 break;
396         case NTDB_NOMMAP:
397                 ntdb->flags &= ~NTDB_NOMMAP;
398 #ifndef HAVE_INCOHERENT_MMAP
399                 /* If mmap incoherent, we were mmaping anyway. */
400                 ntdb_mmap(ntdb);
401 #endif
402                 break;
403         case NTDB_NOSYNC:
404                 ntdb->flags &= ~NTDB_NOSYNC;
405                 break;
406         case NTDB_SEQNUM:
407                 ntdb->flags &= ~NTDB_SEQNUM;
408                 break;
409         case NTDB_ALLOW_NESTING:
410                 ntdb->flags &= ~NTDB_ALLOW_NESTING;
411                 break;
412         case NTDB_RDONLY:
413                 if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) {
414                         ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
415                                     "ntdb_remove_flag: can't"
416                                     " remove NTDB_RDONLY on ntdb"
417                                     " opened with O_RDONLY");
418                         break;
419                 }
420                 if (readonly_changable(ntdb, "ntdb_remove_flag"))
421                         ntdb->flags &= ~NTDB_RDONLY;
422                 break;
423         default:
424                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
425                             "ntdb_remove_flag: Unknown flag %u",
426                             flag);
427         }
428 }
429
430 _PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode)
431 {
432         /* Gcc warns if you miss a case in the switch, so use that. */
433         switch (NTDB_ERR_TO_OFF(ecode)) {
434         case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success";
435         case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database";
436         case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error";
437         case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error";
438         case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory";
439         case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists";
440         case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter";
441         case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist";
442         case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted";
443         }
444         return "Invalid error code";
445 }
446
447 enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb,
448                                enum NTDB_ERROR ecode,
449                                enum ntdb_log_level level,
450                                const char *fmt, ...)
451 {
452         char *message;
453         va_list ap;
454         size_t len;
455         /* ntdb_open paths care about errno, so save it. */
456         int saved_errno = errno;
457
458         if (!ntdb->log_fn)
459                 return ecode;
460
461         va_start(ap, fmt);
462         len = vsnprintf(NULL, 0, fmt, ap);
463         va_end(ap);
464
465         message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data);
466         if (!message) {
467                 ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM,
468                             "out of memory formatting message:", ntdb->log_data);
469                 ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data);
470         } else {
471                 va_start(ap, fmt);
472                 vsnprintf(message, len+1, fmt, ap);
473                 va_end(ap);
474                 ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data);
475                 ntdb->free_fn(message, ntdb->alloc_data);
476         }
477         errno = saved_errno;
478         return ecode;
479 }
480
481 _PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
482                                  NTDB_DATA key,
483                                  enum NTDB_ERROR (*parse)(NTDB_DATA k,
484                                                          NTDB_DATA d,
485                                                          void *data),
486                                  void *data)
487 {
488         ntdb_off_t off;
489         struct ntdb_used_record rec;
490         struct hash_info h;
491         enum NTDB_ERROR ecode;
492         const char *keyp;
493
494         off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
495         if (NTDB_OFF_IS_ERR(off)) {
496                 return NTDB_OFF_TO_ERR(off);
497         }
498
499         if (!off) {
500                 ecode = NTDB_ERR_NOEXIST;
501         } else {
502                 unsigned int old_flags;
503                 NTDB_DATA d = ntdb_mkdata(keyp + key.dsize,
504                                           rec_data_length(&rec));
505
506                 /*
507                  * Make sure they don't try to write db, since they
508                  * have read lock!  They can if they've done
509                  * ntdb_lockall(): if it was ntdb_lockall_read, that'll
510                  * stop them doing a write operation anyway.
511                  */
512                 old_flags = ntdb->flags;
513                 if (!ntdb->file->allrecord_lock.count &&
514                     !(ntdb->flags & NTDB_NOLOCK)) {
515                         ntdb->flags |= NTDB_RDONLY;
516                 }
517                 ecode = parse(key, d, data);
518                 ntdb->flags = old_flags;
519                 ntdb_access_release(ntdb, keyp);
520         }
521
522         ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
523         return ecode;
524 }
525
526 _PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb)
527 {
528         return ntdb->name;
529 }
530
531 _PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb)
532 {
533         return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
534 }
535
536
537 _PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb)
538 {
539         return ntdb->file->fd;
540 }
541
542 struct traverse_state {
543         enum NTDB_ERROR error;
544         struct ntdb_context *dest_db;
545 };
546
547 /*
548   traverse function for repacking
549  */
550 static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data,
551                            struct traverse_state *state)
552 {
553         state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT);
554         if (state->error != NTDB_SUCCESS) {
555                 return -1;
556         }
557         return 0;
558 }
559
560 _PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb)
561 {
562         struct ntdb_context *tmp_db;
563         struct traverse_state state;
564
565         state.error = ntdb_transaction_start(ntdb);
566         if (state.error != NTDB_SUCCESS) {
567                 return state.error;
568         }
569
570         tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
571         if (tmp_db == NULL) {
572                 state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
573                                          __location__
574                                          " Failed to create tmp_db");
575                 ntdb_transaction_cancel(ntdb);
576                 return state.error;
577         }
578
579         state.dest_db = tmp_db;
580         if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) {
581                 goto fail;
582         }
583
584         state.error = ntdb_wipe_all(ntdb);
585         if (state.error != NTDB_SUCCESS) {
586                 goto fail;
587         }
588
589         state.dest_db = ntdb;
590         if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) {
591                 goto fail;
592         }
593
594         ntdb_close(tmp_db);
595         return ntdb_transaction_commit(ntdb);
596
597 fail:
598         ntdb_transaction_cancel(ntdb);
599         ntdb_close(tmp_db);
600         return state.error;
601 }