a9016c4f38ea17a3148d9585302eae792dadc05c
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
71                               bool probe)
72 {
73         struct stat st;
74         enum TDB_ERROR ecode;
75
76         /* We can't hold pointers during this: we could unmap! */
77         assert(!tdb->direct_access
78                || (tdb->flags & TDB_NOLOCK)
79                || tdb_has_expansion_lock(tdb));
80
81         if (len <= tdb->map_size)
82                 return 0;
83         if (tdb->flags & TDB_INTERNAL) {
84                 if (!probe) {
85                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86                                  "tdb_oob len %lld beyond internal"
87                                  " malloc size %lld",
88                                  (long long)len,
89                                  (long long)tdb->map_size);
90                 }
91                 return TDB_ERR_IO;
92         }
93
94         ecode = tdb_lock_expand(tdb, F_RDLCK);
95         if (ecode != TDB_SUCCESS) {
96                 return ecode;
97         }
98
99         if (fstat(tdb->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return TDB_ERR_IO;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return TDB_ERR_IO;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb);
119
120         tdb->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return TDB_SUCCESS;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* Return first non-zero offset in offset array, or end, or -ve error. */
137 /* FIXME: Return the off? */
138 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
139                               tdb_off_t base, uint64_t start, uint64_t end)
140 {
141         uint64_t i;
142         const uint64_t *val;
143
144         /* Zero vs non-zero is the same unconverted: minor optimization. */
145         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
146                               (end - start) * sizeof(tdb_off_t), false);
147         if (TDB_PTR_IS_ERR(val)) {
148                 return TDB_PTR_ERR(val);
149         }
150
151         for (i = 0; i < (end - start); i++) {
152                 if (val[i])
153                         break;
154         }
155         tdb_access_release(tdb, val);
156         return start + i;
157 }
158
159 /* Return first zero offset in num offset array, or num, or -ve error. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
161                            uint64_t num)
162 {
163         uint64_t i;
164         const uint64_t *val;
165
166         /* Zero vs non-zero is the same unconverted: minor optimization. */
167         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168         if (TDB_PTR_IS_ERR(val)) {
169                 return TDB_PTR_ERR(val);
170         }
171
172         for (i = 0; i < num; i++) {
173                 if (!val[i])
174                         break;
175         }
176         tdb_access_release(tdb, val);
177         return i;
178 }
179
180 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
181 {
182         char buf[8192] = { 0 };
183         void *p = tdb->methods->direct(tdb, off, len, true);
184         enum TDB_ERROR ecode = TDB_SUCCESS;
185
186         assert(!tdb->read_only);
187         if (TDB_PTR_IS_ERR(p)) {
188                 return TDB_PTR_ERR(p);
189         }
190         if (p) {
191                 memset(p, 0, len);
192                 return ecode;
193         }
194         while (len) {
195                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
196                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
197                 if (ecode != TDB_SUCCESS) {
198                         break;
199                 }
200                 len -= todo;
201                 off += todo;
202         }
203         return ecode;
204 }
205
206 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
207 {
208         tdb_off_t ret;
209         enum TDB_ERROR ecode;
210
211         if (likely(!(tdb->flags & TDB_CONVERT))) {
212                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
213                                                     false);
214                 if (TDB_PTR_IS_ERR(p)) {
215                         return TDB_PTR_ERR(p);
216                 }
217                 if (p)
218                         return *p;
219         }
220
221         ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
222         if (ecode != TDB_SUCCESS) {
223                 return ecode;
224         }
225         return ret;
226 }
227
228 /* write a lump of data at a specified offset */
229 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
230                                 const void *buf, tdb_len_t len)
231 {
232         enum TDB_ERROR ecode;
233
234         if (tdb->read_only) {
235                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
236                                   "Write to read-only database");
237         }
238
239         /* FIXME: Bogus optimization? */
240         if (len == 0) {
241                 return TDB_SUCCESS;
242         }
243
244         ecode = tdb->methods->oob(tdb, off + len, 0);
245         if (ecode != TDB_SUCCESS) {
246                 return ecode;
247         }
248
249         if (tdb->map_ptr) {
250                 memcpy(off + (char *)tdb->map_ptr, buf, len);
251         } else {
252                 ssize_t ret;
253                 ret = pwrite(tdb->fd, buf, len, off);
254                 if (ret < len) {
255                         /* This shouldn't happen: we avoid sparse files. */
256                         if (ret >= 0)
257                                 errno = ENOSPC;
258
259                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
260                                           "tdb_write: %zi at %zu len=%zu (%s)",
261                                           ret, (size_t)off, (size_t)len,
262                                           strerror(errno));
263                 }
264         }
265         return TDB_SUCCESS;
266 }
267
268 /* read a lump of data at a specified offset */
269 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
270                                void *buf, tdb_len_t len)
271 {
272         enum TDB_ERROR ecode;
273
274         ecode = tdb->methods->oob(tdb, off + len, 0);
275         if (ecode != TDB_SUCCESS) {
276                 return ecode;
277         }
278
279         if (tdb->map_ptr) {
280                 memcpy(buf, off + (char *)tdb->map_ptr, len);
281         } else {
282                 ssize_t r = pread(tdb->fd, buf, len, off);
283                 if (r != len) {
284                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
285                                           "tdb_read failed with %zi at %zu "
286                                           "len=%zu (%s) map_size=%zu",
287                                           r, (size_t)off, (size_t)len,
288                                           strerror(errno),
289                                           (size_t)tdb->map_size);
290                 }
291         }
292         return TDB_SUCCESS;
293 }
294
295 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
296                                  const void *rec, size_t len)
297 {
298         enum TDB_ERROR ecode;
299
300         if (unlikely((tdb->flags & TDB_CONVERT))) {
301                 void *conv = malloc(len);
302                 if (!conv) {
303                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
304                                           "tdb_write: no memory converting"
305                                           " %zu bytes", len);
306                 }
307                 memcpy(conv, rec, len);
308                 ecode = tdb->methods->twrite(tdb, off,
309                                            tdb_convert(tdb, conv, len), len);
310                 free(conv);
311         } else {
312                 ecode = tdb->methods->twrite(tdb, off, rec, len);
313         }
314         return ecode;
315 }
316
317 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
318                                 void *rec, size_t len)
319 {
320         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
321         tdb_convert(tdb, rec, len);
322         return ecode;
323 }
324
325 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
326                              tdb_off_t off, tdb_off_t val)
327 {
328         if (tdb->read_only) {
329                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
330                                   "Write to read-only database");
331         }
332
333         if (likely(!(tdb->flags & TDB_CONVERT))) {
334                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
335                                                     true);
336                 if (TDB_PTR_IS_ERR(p)) {
337                         return TDB_PTR_ERR(p);
338                 }
339                 if (p) {
340                         *p = val;
341                         return TDB_SUCCESS;
342                 }
343         }
344         return tdb_write_convert(tdb, off, &val, sizeof(val));
345 }
346
347 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
348                              tdb_len_t len, unsigned int prefix)
349 {
350         void *buf;
351         enum TDB_ERROR ecode;
352
353         /* some systems don't like zero length malloc */
354         buf = malloc(prefix + len ? prefix + len : 1);
355         if (!buf) {
356                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
357                            "tdb_alloc_read malloc failed len=%zu",
358                            (size_t)(prefix + len));
359                 return TDB_ERR_PTR(TDB_ERR_OOM);
360         } else {
361                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
362                 if (unlikely(ecode != TDB_SUCCESS)) {
363                         free(buf);
364                         return TDB_ERR_PTR(ecode);
365                 }
366         }
367         return buf;
368 }
369
370 /* read a lump of data, allocating the space for it */
371 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
372 {
373         return _tdb_alloc_read(tdb, offset, len, 0);
374 }
375
376 static enum TDB_ERROR fill(struct tdb_context *tdb,
377                            const void *buf, size_t size,
378                            tdb_off_t off, tdb_len_t len)
379 {
380         while (len) {
381                 size_t n = len > size ? size : len;
382                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
383                 if (ret < n) {
384                         if (ret >= 0)
385                                 errno = ENOSPC;
386
387                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
388                                           "fill failed:"
389                                           " %zi at %zu len=%zu (%s)",
390                                           ret, (size_t)off, (size_t)len,
391                                           strerror(errno));
392                 }
393                 len -= n;
394                 off += n;
395         }
396         return TDB_SUCCESS;
397 }
398
399 /* expand a file.  we prefer to use ftruncate, as that is what posix
400   says to use for mmap expansion */
401 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
402                                       tdb_len_t addition)
403 {
404         char buf[8192];
405         enum TDB_ERROR ecode;
406
407         if (tdb->read_only) {
408                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
409                                   "Expand on read-only database");
410         }
411
412         if (tdb->flags & TDB_INTERNAL) {
413                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
414                 if (!new) {
415                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
416                                           "No memory to expand database");
417                 }
418                 tdb->map_ptr = new;
419                 tdb->map_size += addition;
420         } else {
421                 /* Unmap before trying to write; old TDB claimed OpenBSD had
422                  * problem with this otherwise. */
423                 tdb_munmap(tdb);
424
425                 /* If this fails, we try to fill anyway. */
426                 if (ftruncate(tdb->fd, tdb->map_size + addition))
427                         ;
428
429                 /* now fill the file with something. This ensures that the
430                    file isn't sparse, which would be very bad if we ran out of
431                    disk. This must be done with write, not via mmap */
432                 memset(buf, 0x43, sizeof(buf));
433                 ecode = fill(tdb, buf, sizeof(buf), tdb->map_size, addition);
434                 if (ecode != TDB_SUCCESS)
435                         return ecode;
436                 tdb->map_size += addition;
437                 tdb_mmap(tdb);
438         }
439         return TDB_SUCCESS;
440 }
441
442 const void *tdb_access_read(struct tdb_context *tdb,
443                             tdb_off_t off, tdb_len_t len, bool convert)
444 {
445         const void *ret = NULL;
446
447         if (likely(!(tdb->flags & TDB_CONVERT))) {
448                 ret = tdb->methods->direct(tdb, off, len, false);
449
450                 if (TDB_PTR_IS_ERR(ret)) {
451                         return ret;
452                 }
453         }
454         if (!ret) {
455                 struct tdb_access_hdr *hdr;
456                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
457                 if (TDB_PTR_IS_ERR(hdr)) {
458                         return hdr;
459                 }
460                 hdr->next = tdb->access;
461                 tdb->access = hdr;
462                 ret = hdr + 1;
463                 if (convert) {
464                         tdb_convert(tdb, (void *)ret, len);
465                 }
466         } else
467                 tdb->direct_access++;
468
469         return ret;
470 }
471
472 void *tdb_access_write(struct tdb_context *tdb,
473                        tdb_off_t off, tdb_len_t len, bool convert)
474 {
475         void *ret = NULL;
476
477         if (tdb->read_only) {
478                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
479                            "Write to read-only database");
480                 return TDB_ERR_PTR(TDB_ERR_RDONLY);
481         }
482
483         if (likely(!(tdb->flags & TDB_CONVERT))) {
484                 ret = tdb->methods->direct(tdb, off, len, true);
485
486                 if (TDB_PTR_IS_ERR(ret)) {
487                         return ret;
488                 }
489         }
490
491         if (!ret) {
492                 struct tdb_access_hdr *hdr;
493                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
494                 if (TDB_PTR_IS_ERR(hdr)) {
495                         return hdr;
496                 }
497                 hdr->next = tdb->access;
498                 tdb->access = hdr;
499                 hdr->off = off;
500                 hdr->len = len;
501                 hdr->convert = convert;
502                 ret = hdr + 1;
503                 if (convert)
504                         tdb_convert(tdb, (void *)ret, len);
505         } else
506                 tdb->direct_access++;
507
508         return ret;
509 }
510
511 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
512 {
513         struct tdb_access_hdr **hp;
514
515         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
516                 if (*hp + 1 == p)
517                         return hp;
518         }
519         return NULL;
520 }
521
522 void tdb_access_release(struct tdb_context *tdb, const void *p)
523 {
524         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
525
526         if (hp) {
527                 hdr = *hp;
528                 *hp = hdr->next;
529                 free(hdr);
530         } else
531                 tdb->direct_access--;
532 }
533
534 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
535 {
536         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
537         enum TDB_ERROR ecode;
538
539         if (hp) {
540                 hdr = *hp;
541                 if (hdr->convert)
542                         ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
543                 else
544                         ecode = tdb_write(tdb, hdr->off, p, hdr->len);
545                 *hp = hdr->next;
546                 free(hdr);
547         } else {
548                 tdb->direct_access--;
549                 ecode = TDB_SUCCESS;
550         }
551
552         return ecode;
553 }
554
555 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
556                         bool write_mode)
557 {
558         enum TDB_ERROR ecode;
559
560         if (unlikely(!tdb->map_ptr))
561                 return NULL;
562
563         ecode = tdb_oob(tdb, off + len, true);
564         if (unlikely(ecode != TDB_SUCCESS))
565                 return TDB_ERR_PTR(ecode);
566         return (char *)tdb->map_ptr + off;
567 }
568
569 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
570 {
571         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
572                 *s += val;
573 }
574
575 static const struct tdb_methods io_methods = {
576         tdb_read,
577         tdb_write,
578         tdb_oob,
579         tdb_expand_file,
580         tdb_direct,
581 };
582
583 /*
584   initialise the default methods table
585 */
586 void tdb_io_init(struct tdb_context *tdb)
587 {
588         tdb->methods = &io_methods;
589 }