tdb2: make tdb_check typesafe.
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
71                               bool probe)
72 {
73         struct stat st;
74         enum TDB_ERROR ecode;
75
76         /* We can't hold pointers during this: we could unmap! */
77         assert(!tdb->direct_access
78                || (tdb->flags & TDB_NOLOCK)
79                || tdb_has_expansion_lock(tdb));
80
81         if (len <= tdb->map_size)
82                 return 0;
83         if (tdb->flags & TDB_INTERNAL) {
84                 if (!probe) {
85                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86                                  "tdb_oob len %lld beyond internal"
87                                  " malloc size %lld",
88                                  (long long)len,
89                                  (long long)tdb->map_size);
90                 }
91                 return TDB_ERR_IO;
92         }
93
94         ecode = tdb_lock_expand(tdb, F_RDLCK);
95         if (ecode != TDB_SUCCESS) {
96                 return ecode;
97         }
98
99         if (fstat(tdb->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return TDB_ERR_IO;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return TDB_ERR_IO;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb);
119
120         tdb->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return TDB_SUCCESS;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* Return first non-zero offset in offset array, or end, or -ve error. */
137 /* FIXME: Return the off? */
138 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
139                               tdb_off_t base, uint64_t start, uint64_t end)
140 {
141         uint64_t i;
142         const uint64_t *val;
143
144         /* Zero vs non-zero is the same unconverted: minor optimization. */
145         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
146                               (end - start) * sizeof(tdb_off_t), false);
147         if (TDB_PTR_IS_ERR(val)) {
148                 return TDB_PTR_ERR(val);
149         }
150
151         for (i = 0; i < (end - start); i++) {
152                 if (val[i])
153                         break;
154         }
155         tdb_access_release(tdb, val);
156         return start + i;
157 }
158
159 /* Return first zero offset in num offset array, or num, or -ve error. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
161                            uint64_t num)
162 {
163         uint64_t i;
164         const uint64_t *val;
165
166         /* Zero vs non-zero is the same unconverted: minor optimization. */
167         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168         if (TDB_PTR_IS_ERR(val)) {
169                 return TDB_PTR_ERR(val);
170         }
171
172         for (i = 0; i < num; i++) {
173                 if (!val[i])
174                         break;
175         }
176         tdb_access_release(tdb, val);
177         return i;
178 }
179
180 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
181 {
182         char buf[8192] = { 0 };
183         void *p = tdb->methods->direct(tdb, off, len, true);
184         enum TDB_ERROR ecode = TDB_SUCCESS;
185
186         assert(!tdb->read_only);
187         if (TDB_PTR_IS_ERR(p)) {
188                 return TDB_PTR_ERR(p);
189         }
190         if (p) {
191                 memset(p, 0, len);
192                 return ecode;
193         }
194         while (len) {
195                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
196                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
197                 if (ecode != TDB_SUCCESS) {
198                         break;
199                 }
200                 len -= todo;
201                 off += todo;
202         }
203         return ecode;
204 }
205
206 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
207 {
208         tdb_off_t ret;
209         enum TDB_ERROR ecode;
210
211         if (likely(!(tdb->flags & TDB_CONVERT))) {
212                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
213                                                     false);
214                 if (TDB_PTR_IS_ERR(p)) {
215                         return TDB_PTR_ERR(p);
216                 }
217                 if (p)
218                         return *p;
219         }
220
221         ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
222         if (ecode != TDB_SUCCESS) {
223                 return ecode;
224         }
225         return ret;
226 }
227
228 /* write a lump of data at a specified offset */
229 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
230                                 const void *buf, tdb_len_t len)
231 {
232         enum TDB_ERROR ecode;
233
234         if (tdb->read_only) {
235                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
236                                   "Write to read-only database");
237         }
238
239         ecode = tdb->methods->oob(tdb, off + len, 0);
240         if (ecode != TDB_SUCCESS) {
241                 return ecode;
242         }
243
244         if (tdb->map_ptr) {
245                 memcpy(off + (char *)tdb->map_ptr, buf, len);
246         } else {
247                 ssize_t ret;
248                 ret = pwrite(tdb->fd, buf, len, off);
249                 if (ret != len) {
250                         /* This shouldn't happen: we avoid sparse files. */
251                         if (ret >= 0)
252                                 errno = ENOSPC;
253
254                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
255                                           "tdb_write: %zi at %zu len=%zu (%s)",
256                                           ret, (size_t)off, (size_t)len,
257                                           strerror(errno));
258                 }
259         }
260         return TDB_SUCCESS;
261 }
262
263 /* read a lump of data at a specified offset */
264 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
265                                void *buf, tdb_len_t len)
266 {
267         enum TDB_ERROR ecode;
268
269         ecode = tdb->methods->oob(tdb, off + len, 0);
270         if (ecode != TDB_SUCCESS) {
271                 return ecode;
272         }
273
274         if (tdb->map_ptr) {
275                 memcpy(buf, off + (char *)tdb->map_ptr, len);
276         } else {
277                 ssize_t r = pread(tdb->fd, buf, len, off);
278                 if (r != len) {
279                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
280                                           "tdb_read failed with %zi at %zu "
281                                           "len=%zu (%s) map_size=%zu",
282                                           r, (size_t)off, (size_t)len,
283                                           strerror(errno),
284                                           (size_t)tdb->map_size);
285                 }
286         }
287         return TDB_SUCCESS;
288 }
289
290 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
291                                  const void *rec, size_t len)
292 {
293         enum TDB_ERROR ecode;
294
295         if (unlikely((tdb->flags & TDB_CONVERT))) {
296                 void *conv = malloc(len);
297                 if (!conv) {
298                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
299                                           "tdb_write: no memory converting"
300                                           " %zu bytes", len);
301                 }
302                 memcpy(conv, rec, len);
303                 ecode = tdb->methods->twrite(tdb, off,
304                                            tdb_convert(tdb, conv, len), len);
305                 free(conv);
306         } else {
307                 ecode = tdb->methods->twrite(tdb, off, rec, len);
308         }
309         return ecode;
310 }
311
312 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
313                                 void *rec, size_t len)
314 {
315         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
316         tdb_convert(tdb, rec, len);
317         return ecode;
318 }
319
320 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
321                              tdb_off_t off, tdb_off_t val)
322 {
323         if (tdb->read_only) {
324                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
325                                   "Write to read-only database");
326         }
327
328         if (likely(!(tdb->flags & TDB_CONVERT))) {
329                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
330                                                     true);
331                 if (TDB_PTR_IS_ERR(p)) {
332                         return TDB_PTR_ERR(p);
333                 }
334                 if (p) {
335                         *p = val;
336                         return TDB_SUCCESS;
337                 }
338         }
339         return tdb_write_convert(tdb, off, &val, sizeof(val));
340 }
341
342 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
343                              tdb_len_t len, unsigned int prefix)
344 {
345         void *buf;
346         enum TDB_ERROR ecode;
347
348         /* some systems don't like zero length malloc */
349         buf = malloc(prefix + len ? prefix + len : 1);
350         if (!buf) {
351                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
352                            "tdb_alloc_read malloc failed len=%zu",
353                            (size_t)(prefix + len));
354                 return TDB_ERR_PTR(TDB_ERR_OOM);
355         } else {
356                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
357                 if (unlikely(ecode != TDB_SUCCESS)) {
358                         free(buf);
359                         return TDB_ERR_PTR(ecode);
360                 }
361         }
362         return buf;
363 }
364
365 /* read a lump of data, allocating the space for it */
366 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
367 {
368         return _tdb_alloc_read(tdb, offset, len, 0);
369 }
370
371 static enum TDB_ERROR fill(struct tdb_context *tdb,
372                            const void *buf, size_t size,
373                            tdb_off_t off, tdb_len_t len)
374 {
375         while (len) {
376                 size_t n = len > size ? size : len;
377                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
378                 if (ret != n) {
379                         if (ret >= 0)
380                                 errno = ENOSPC;
381
382                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
383                                           "fill failed:"
384                                           " %zi at %zu len=%zu (%s)",
385                                           ret, (size_t)off, (size_t)len,
386                                           strerror(errno));
387                 }
388                 len -= n;
389                 off += n;
390         }
391         return TDB_SUCCESS;
392 }
393
394 /* expand a file.  we prefer to use ftruncate, as that is what posix
395   says to use for mmap expansion */
396 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
397                                       tdb_len_t addition)
398 {
399         char buf[8192];
400         enum TDB_ERROR ecode;
401
402         if (tdb->read_only) {
403                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
404                                   "Expand on read-only database");
405         }
406
407         if (tdb->flags & TDB_INTERNAL) {
408                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
409                 if (!new) {
410                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
411                                           "No memory to expand database");
412                 }
413                 tdb->map_ptr = new;
414                 tdb->map_size += addition;
415         } else {
416                 /* Unmap before trying to write; old TDB claimed OpenBSD had
417                  * problem with this otherwise. */
418                 tdb_munmap(tdb);
419
420                 /* If this fails, we try to fill anyway. */
421                 if (ftruncate(tdb->fd, tdb->map_size + addition))
422                         ;
423
424                 /* now fill the file with something. This ensures that the
425                    file isn't sparse, which would be very bad if we ran out of
426                    disk. This must be done with write, not via mmap */
427                 memset(buf, 0x43, sizeof(buf));
428                 ecode = fill(tdb, buf, sizeof(buf), tdb->map_size, addition);
429                 if (ecode != TDB_SUCCESS)
430                         return ecode;
431                 tdb->map_size += addition;
432                 tdb_mmap(tdb);
433         }
434         return TDB_SUCCESS;
435 }
436
437 const void *tdb_access_read(struct tdb_context *tdb,
438                             tdb_off_t off, tdb_len_t len, bool convert)
439 {
440         const void *ret = NULL;
441
442         if (likely(!(tdb->flags & TDB_CONVERT))) {
443                 ret = tdb->methods->direct(tdb, off, len, false);
444
445                 if (TDB_PTR_IS_ERR(ret)) {
446                         return ret;
447                 }
448         }
449         if (!ret) {
450                 struct tdb_access_hdr *hdr;
451                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
452                 if (TDB_PTR_IS_ERR(hdr)) {
453                         return hdr;
454                 }
455                 hdr->next = tdb->access;
456                 tdb->access = hdr;
457                 ret = hdr + 1;
458                 if (convert) {
459                         tdb_convert(tdb, (void *)ret, len);
460                 }
461         } else
462                 tdb->direct_access++;
463
464         return ret;
465 }
466
467 void *tdb_access_write(struct tdb_context *tdb,
468                        tdb_off_t off, tdb_len_t len, bool convert)
469 {
470         void *ret = NULL;
471
472         if (tdb->read_only) {
473                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
474                            "Write to read-only database");
475                 return TDB_ERR_PTR(TDB_ERR_RDONLY);
476         }
477
478         if (likely(!(tdb->flags & TDB_CONVERT))) {
479                 ret = tdb->methods->direct(tdb, off, len, true);
480
481                 if (TDB_PTR_IS_ERR(ret)) {
482                         return ret;
483                 }
484         }
485
486         if (!ret) {
487                 struct tdb_access_hdr *hdr;
488                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
489                 if (TDB_PTR_IS_ERR(hdr)) {
490                         return hdr;
491                 }
492                 hdr->next = tdb->access;
493                 tdb->access = hdr;
494                 hdr->off = off;
495                 hdr->len = len;
496                 hdr->convert = convert;
497                 ret = hdr + 1;
498                 if (convert)
499                         tdb_convert(tdb, (void *)ret, len);
500         } else
501                 tdb->direct_access++;
502
503         return ret;
504 }
505
506 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
507 {
508         struct tdb_access_hdr **hp;
509
510         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
511                 if (*hp + 1 == p)
512                         return hp;
513         }
514         return NULL;
515 }
516
517 void tdb_access_release(struct tdb_context *tdb, const void *p)
518 {
519         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
520
521         if (hp) {
522                 hdr = *hp;
523                 *hp = hdr->next;
524                 free(hdr);
525         } else
526                 tdb->direct_access--;
527 }
528
529 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
530 {
531         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
532         enum TDB_ERROR ecode;
533
534         if (hp) {
535                 hdr = *hp;
536                 if (hdr->convert)
537                         ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
538                 else
539                         ecode = tdb_write(tdb, hdr->off, p, hdr->len);
540                 *hp = hdr->next;
541                 free(hdr);
542         } else {
543                 tdb->direct_access--;
544                 ecode = TDB_SUCCESS;
545         }
546
547         return ecode;
548 }
549
550 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
551                         bool write_mode)
552 {
553         enum TDB_ERROR ecode;
554
555         if (unlikely(!tdb->map_ptr))
556                 return NULL;
557
558         ecode = tdb_oob(tdb, off + len, true);
559         if (unlikely(ecode != TDB_SUCCESS))
560                 return TDB_ERR_PTR(ecode);
561         return (char *)tdb->map_ptr + off;
562 }
563
564 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
565 {
566         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
567                 *s += val;
568 }
569
570 static const struct tdb_methods io_methods = {
571         tdb_read,
572         tdb_write,
573         tdb_oob,
574         tdb_expand_file,
575         tdb_direct,
576 };
577
578 /*
579   initialise the default methods table
580 */
581 void tdb_io_init(struct tdb_context *tdb)
582 {
583         tdb->methods = &io_methods;
584 }