tdb2: move mmap into struct tdb_file
[ccan] / ccan / tdb2 / tdb.c
1 #include "private.h"
2 #include <ccan/asprintf/asprintf.h>
3 #include <stdarg.h>
4
5 /* The null return. */
6 struct tdb_data tdb_null = { .dptr = NULL, .dsize = 0 };
7
8 static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
9                                      tdb_off_t off,
10                                      tdb_len_t keylen,
11                                      tdb_len_t datalen,
12                                      struct tdb_used_record *rec,
13                                      uint64_t h)
14 {
15         uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
16         enum TDB_ERROR ecode;
17
18         ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
19                            keylen + dataroom, h);
20         if (ecode == TDB_SUCCESS) {
21                 ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
22         }
23         return ecode;
24 }
25
26 static enum TDB_ERROR replace_data(struct tdb_context *tdb,
27                                    struct hash_info *h,
28                                    struct tdb_data key, struct tdb_data dbuf,
29                                    tdb_off_t old_off, tdb_len_t old_room,
30                                    bool growing)
31 {
32         tdb_off_t new_off;
33         enum TDB_ERROR ecode;
34
35         /* Allocate a new record. */
36         new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
37                         growing);
38         if (TDB_OFF_IS_ERR(new_off)) {
39                 return new_off;
40         }
41
42         /* We didn't like the existing one: remove it. */
43         if (old_off) {
44                 add_stat(tdb, frees, 1);
45                 ecode = add_free_record(tdb, old_off,
46                                         sizeof(struct tdb_used_record)
47                                         + key.dsize + old_room);
48                 if (ecode == TDB_SUCCESS)
49                         ecode = replace_in_hash(tdb, h, new_off);
50         } else {
51                 ecode = add_to_hash(tdb, h, new_off);
52         }
53         if (ecode != TDB_SUCCESS) {
54                 return ecode;
55         }
56
57         new_off += sizeof(struct tdb_used_record);
58         ecode = tdb->methods->twrite(tdb, new_off, key.dptr, key.dsize);
59         if (ecode != TDB_SUCCESS) {
60                 return ecode;
61         }
62
63         new_off += key.dsize;
64         ecode = tdb->methods->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
65         if (ecode != TDB_SUCCESS) {
66                 return ecode;
67         }
68
69         /* FIXME: tdb_increment_seqnum(tdb); */
70         return TDB_SUCCESS;
71 }
72
73 static enum TDB_ERROR update_data(struct tdb_context *tdb,
74                                   tdb_off_t off,
75                                   struct tdb_data dbuf,
76                                   tdb_len_t extra)
77 {
78         enum TDB_ERROR ecode;
79
80         ecode = tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
81         if (ecode == TDB_SUCCESS && extra) {
82                 /* Put a zero in; future versions may append other data. */
83                 ecode = tdb->methods->twrite(tdb, off + dbuf.dsize, "", 1);
84         }
85         return ecode;
86 }
87
88 enum TDB_ERROR tdb_store(struct tdb_context *tdb,
89                          struct tdb_data key, struct tdb_data dbuf, int flag)
90 {
91         struct hash_info h;
92         tdb_off_t off;
93         tdb_len_t old_room = 0;
94         struct tdb_used_record rec;
95         enum TDB_ERROR ecode;
96
97         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
98         if (TDB_OFF_IS_ERR(off)) {
99                 return off;
100         }
101
102         /* Now we have lock on this hash bucket. */
103         if (flag == TDB_INSERT) {
104                 if (off) {
105                         ecode = TDB_ERR_EXISTS;
106                         goto out;
107                 }
108         } else {
109                 if (off) {
110                         old_room = rec_data_length(&rec)
111                                 + rec_extra_padding(&rec);
112                         if (old_room >= dbuf.dsize) {
113                                 /* Can modify in-place.  Easy! */
114                                 ecode = update_rec_hdr(tdb, off,
115                                                        key.dsize, dbuf.dsize,
116                                                        &rec, h.h);
117                                 if (ecode != TDB_SUCCESS) {
118                                         goto out;
119                                 }
120                                 ecode = update_data(tdb,
121                                                     off + sizeof(rec)
122                                                     + key.dsize, dbuf,
123                                                     old_room - dbuf.dsize);
124                                 if (ecode != TDB_SUCCESS) {
125                                         goto out;
126                                 }
127                                 tdb_unlock_hashes(tdb, h.hlock_start,
128                                                   h.hlock_range, F_WRLCK);
129                                 return TDB_SUCCESS;
130                         }
131                 } else {
132                         if (flag == TDB_MODIFY) {
133                                 /* if the record doesn't exist and we
134                                    are in TDB_MODIFY mode then we should fail
135                                    the store */
136                                 ecode = TDB_ERR_NOEXIST;
137                                 goto out;
138                         }
139                 }
140         }
141
142         /* If we didn't use the old record, this implies we're growing. */
143         ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
144 out:
145         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
146         return ecode;
147 }
148
149 enum TDB_ERROR tdb_append(struct tdb_context *tdb,
150                           struct tdb_data key, struct tdb_data dbuf)
151 {
152         struct hash_info h;
153         tdb_off_t off;
154         struct tdb_used_record rec;
155         tdb_len_t old_room = 0, old_dlen;
156         unsigned char *newdata;
157         struct tdb_data new_dbuf;
158         enum TDB_ERROR ecode;
159
160         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
161         if (TDB_OFF_IS_ERR(off)) {
162                 return off;
163         }
164
165         if (off) {
166                 old_dlen = rec_data_length(&rec);
167                 old_room = old_dlen + rec_extra_padding(&rec);
168
169                 /* Fast path: can append in place. */
170                 if (rec_extra_padding(&rec) >= dbuf.dsize) {
171                         ecode = update_rec_hdr(tdb, off, key.dsize,
172                                                old_dlen + dbuf.dsize, &rec,
173                                                h.h);
174                         if (ecode != TDB_SUCCESS) {
175                                 goto out;
176                         }
177
178                         off += sizeof(rec) + key.dsize + old_dlen;
179                         ecode = update_data(tdb, off, dbuf,
180                                             rec_extra_padding(&rec));
181                         goto out;
182                 }
183
184                 /* Slow path. */
185                 newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
186                 if (!newdata) {
187                         ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
188                                            "tdb_append:"
189                                            " failed to allocate %zu bytes",
190                                            (size_t)(key.dsize + old_dlen
191                                                     + dbuf.dsize));
192                         goto out;
193                 }
194                 ecode = tdb->methods->tread(tdb, off + sizeof(rec) + key.dsize,
195                                             newdata, old_dlen);
196                 if (ecode != TDB_SUCCESS) {
197                         goto out_free_newdata;
198                 }
199                 memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
200                 new_dbuf.dptr = newdata;
201                 new_dbuf.dsize = old_dlen + dbuf.dsize;
202         } else {
203                 newdata = NULL;
204                 new_dbuf = dbuf;
205         }
206
207         /* If they're using tdb_append(), it implies they're growing record. */
208         ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
209
210 out_free_newdata:
211         free(newdata);
212 out:
213         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
214         return ecode;
215 }
216
217 enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
218                          struct tdb_data *data)
219 {
220         tdb_off_t off;
221         struct tdb_used_record rec;
222         struct hash_info h;
223         enum TDB_ERROR ecode;
224
225         off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
226         if (TDB_OFF_IS_ERR(off)) {
227                 return off;
228         }
229
230         if (!off) {
231                 ecode = TDB_ERR_NOEXIST;
232         } else {
233                 data->dsize = rec_data_length(&rec);
234                 data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
235                                             data->dsize);
236                 if (TDB_PTR_IS_ERR(data->dptr)) {
237                         ecode = TDB_PTR_ERR(data->dptr);
238                 } else
239                         ecode = TDB_SUCCESS;
240         }
241
242         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
243         return ecode;
244 }
245
246 enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
247 {
248         tdb_off_t off;
249         struct tdb_used_record rec;
250         struct hash_info h;
251         enum TDB_ERROR ecode;
252
253         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
254         if (TDB_OFF_IS_ERR(off)) {
255                 return off;
256         }
257
258         if (!off) {
259                 ecode = TDB_ERR_NOEXIST;
260                 goto unlock;
261         }
262
263         ecode = delete_from_hash(tdb, &h);
264         if (ecode != TDB_SUCCESS) {
265                 goto unlock;
266         }
267
268         /* Free the deleted entry. */
269         add_stat(tdb, frees, 1);
270         ecode = add_free_record(tdb, off,
271                                 sizeof(struct tdb_used_record)
272                                 + rec_key_length(&rec)
273                                 + rec_data_length(&rec)
274                                 + rec_extra_padding(&rec));
275
276 unlock:
277         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
278         return ecode;
279 }
280
281 unsigned int tdb_get_flags(struct tdb_context *tdb)
282 {
283         return tdb->flags;
284 }
285
286 void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
287 {
288         if (tdb->flags & TDB_INTERNAL) {
289                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
290                            "tdb_add_flag: internal db");
291                 return;
292         }
293         switch (flag) {
294         case TDB_NOLOCK:
295                 tdb->flags |= TDB_NOLOCK;
296                 break;
297         case TDB_NOMMAP:
298                 tdb->flags |= TDB_NOMMAP;
299                 tdb_munmap(tdb->file);
300                 break;
301         case TDB_NOSYNC:
302                 tdb->flags |= TDB_NOSYNC;
303                 break;
304         default:
305                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
306                            "tdb_add_flag: Unknown flag %u", flag);
307         }
308 }
309
310 void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
311 {
312         if (tdb->flags & TDB_INTERNAL) {
313                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
314                            "tdb_remove_flag: internal db");
315                 return;
316         }
317         switch (flag) {
318         case TDB_NOLOCK:
319                 tdb->flags &= ~TDB_NOLOCK;
320                 break;
321         case TDB_NOMMAP:
322                 tdb->flags &= ~TDB_NOMMAP;
323                 tdb_mmap(tdb);
324                 break;
325         case TDB_NOSYNC:
326                 tdb->flags &= ~TDB_NOSYNC;
327                 break;
328         default:
329                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
330                            "tdb_remove_flag: Unknown flag %u", flag);
331         }
332 }
333
334 const char *tdb_errorstr(enum TDB_ERROR ecode)
335 {
336         /* Gcc warns if you miss a case in the switch, so use that. */
337         switch (ecode) {
338         case TDB_SUCCESS: return "Success";
339         case TDB_ERR_CORRUPT: return "Corrupt database";
340         case TDB_ERR_IO: return "IO Error";
341         case TDB_ERR_LOCK: return "Locking error";
342         case TDB_ERR_OOM: return "Out of memory";
343         case TDB_ERR_EXISTS: return "Record exists";
344         case TDB_ERR_EINVAL: return "Invalid parameter";
345         case TDB_ERR_NOEXIST: return "Record does not exist";
346         case TDB_ERR_RDONLY: return "write not permitted";
347         }
348         return "Invalid error code";
349 }
350
351 enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
352                                enum TDB_ERROR ecode,
353                                enum tdb_log_level level,
354                                const char *fmt, ...)
355 {
356         char *message;
357         va_list ap;
358         size_t len;
359         /* tdb_open paths care about errno, so save it. */
360         int saved_errno = errno;
361
362         if (!tdb->logfn)
363                 return ecode;
364
365         va_start(ap, fmt);
366         len = vasprintf(&message, fmt, ap);
367         va_end(ap);
368
369         if (len < 0) {
370                 tdb->logfn(tdb, TDB_LOG_ERROR, tdb->log_private,
371                            "out of memory formatting message:");
372                 tdb->logfn(tdb, level, tdb->log_private, fmt);
373         } else {
374                 tdb->logfn(tdb, level, tdb->log_private, message);
375                 free(message);
376         }
377         errno = saved_errno;
378         return ecode;
379 }