a0c5f5232554a3d37f1b36f3d1e2ff0fdda8657e
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
71                               bool probe)
72 {
73         struct stat st;
74         enum TDB_ERROR ecode;
75
76         /* We can't hold pointers during this: we could unmap! */
77         assert(!tdb->direct_access
78                || (tdb->flags & TDB_NOLOCK)
79                || tdb_has_expansion_lock(tdb));
80
81         if (len <= tdb->map_size)
82                 return 0;
83         if (tdb->flags & TDB_INTERNAL) {
84                 if (!probe) {
85                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86                                  "tdb_oob len %lld beyond internal"
87                                  " malloc size %lld",
88                                  (long long)len,
89                                  (long long)tdb->map_size);
90                 }
91                 return TDB_ERR_IO;
92         }
93
94         ecode = tdb_lock_expand(tdb, F_RDLCK);
95         if (ecode != TDB_SUCCESS) {
96                 return ecode;
97         }
98
99         if (fstat(tdb->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return TDB_ERR_IO;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return TDB_ERR_IO;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb);
119
120         tdb->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return TDB_SUCCESS;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* FIXME: Return the off? */
137 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
138                               tdb_off_t base, uint64_t start, uint64_t end)
139 {
140         uint64_t i;
141         const uint64_t *val;
142
143         /* Zero vs non-zero is the same unconverted: minor optimization. */
144         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
145                               (end - start) * sizeof(tdb_off_t), false);
146         if (!val)
147                 return end;
148
149         for (i = 0; i < (end - start); i++) {
150                 if (val[i])
151                         break;
152         }
153         tdb_access_release(tdb, val);
154         return start + i;
155 }
156
157 /* Return first zero offset in num offset array, or num. */
158 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
159                            uint64_t num)
160 {
161         uint64_t i;
162         const uint64_t *val;
163
164         /* Zero vs non-zero is the same unconverted: minor optimization. */
165         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
166         if (!val)
167                 return num;
168
169         for (i = 0; i < num; i++) {
170                 if (!val[i])
171                         break;
172         }
173         tdb_access_release(tdb, val);
174         return i;
175 }
176
177 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
178 {
179         char buf[8192] = { 0 };
180         void *p = tdb->methods->direct(tdb, off, len, true);
181         enum TDB_ERROR ecode;
182
183         assert(!tdb->read_only);
184         if (p) {
185                 memset(p, 0, len);
186                 return 0;
187         }
188         while (len) {
189                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
190                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
191                 if (ecode != TDB_SUCCESS) {
192                         tdb->ecode = ecode;
193                         return -1;
194                 }
195                 len -= todo;
196                 off += todo;
197         }
198         return 0;
199 }
200
201 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
202 {
203         tdb_off_t ret;
204
205         if (likely(!(tdb->flags & TDB_CONVERT))) {
206                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
207                                                     false);
208                 if (p)
209                         return *p;
210         }
211
212         if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
213                 return TDB_OFF_ERR;
214         return ret;
215 }
216
217 /* write a lump of data at a specified offset */
218 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
219                                 const void *buf, tdb_len_t len)
220 {
221         enum TDB_ERROR ecode;
222
223         if (tdb->read_only) {
224                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
225                                   "Write to read-only database");
226         }
227
228         /* FIXME: Bogus optimization? */
229         if (len == 0) {
230                 return TDB_SUCCESS;
231         }
232
233         ecode = tdb->methods->oob(tdb, off + len, 0);
234         if (ecode != TDB_SUCCESS) {
235                 return ecode;
236         }
237
238         if (tdb->map_ptr) {
239                 memcpy(off + (char *)tdb->map_ptr, buf, len);
240         } else {
241                 ssize_t ret;
242                 ret = pwrite(tdb->fd, buf, len, off);
243                 if (ret < len) {
244                         /* This shouldn't happen: we avoid sparse files. */
245                         if (ret >= 0)
246                                 errno = ENOSPC;
247
248                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
249                                           "tdb_write: %zi at %zu len=%zu (%s)",
250                                           ret, (size_t)off, (size_t)len,
251                                           strerror(errno));
252                 }
253         }
254         return TDB_SUCCESS;
255 }
256
257 /* read a lump of data at a specified offset */
258 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
259                                void *buf, tdb_len_t len)
260 {
261         enum TDB_ERROR ecode;
262
263         ecode = tdb->methods->oob(tdb, off + len, 0);
264         if (ecode != TDB_SUCCESS) {
265                 return ecode;
266         }
267
268         if (tdb->map_ptr) {
269                 memcpy(buf, off + (char *)tdb->map_ptr, len);
270         } else {
271                 ssize_t r = pread(tdb->fd, buf, len, off);
272                 if (r != len) {
273                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
274                                           "tdb_read failed with %zi at %zu "
275                                           "len=%zu (%s) map_size=%zu",
276                                           r, (size_t)off, (size_t)len,
277                                           strerror(errno),
278                                           (size_t)tdb->map_size);
279                 }
280         }
281         return TDB_SUCCESS;
282 }
283
284 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
285                       const void *rec, size_t len)
286 {
287         enum TDB_ERROR ecode;
288
289         if (unlikely((tdb->flags & TDB_CONVERT))) {
290                 void *conv = malloc(len);
291                 if (!conv) {
292                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
293                                    "tdb_write: no memory converting"
294                                    " %zu bytes", len);
295                         return -1;
296                 }
297                 memcpy(conv, rec, len);
298                 ecode = tdb->methods->twrite(tdb, off,
299                                            tdb_convert(tdb, conv, len), len);
300                 free(conv);
301         } else {
302                 ecode = tdb->methods->twrite(tdb, off, rec, len);
303         }
304
305         if (ecode != TDB_SUCCESS) {
306                 tdb->ecode = ecode;
307                 return -1;
308         }
309         return 0;
310 }
311
312 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
313                       void *rec, size_t len)
314 {
315         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
316         tdb_convert(tdb, rec, len);
317         if (ecode != TDB_SUCCESS) {
318                 tdb->ecode = ecode;
319                 return -1;
320         }
321         return 0;
322 }
323
324 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
325 {
326         if (tdb->read_only) {
327                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
328                            "Write to read-only database");
329                 return -1;
330         }
331
332         if (likely(!(tdb->flags & TDB_CONVERT))) {
333                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
334                                                     true);
335                 if (p) {
336                         *p = val;
337                         return 0;
338                 }
339         }
340         return tdb_write_convert(tdb, off, &val, sizeof(val));
341 }
342
343 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
344                              tdb_len_t len, unsigned int prefix)
345 {
346         void *buf;
347         enum TDB_ERROR ecode;
348
349         /* some systems don't like zero length malloc */
350         buf = malloc(prefix + len ? prefix + len : 1);
351         if (!buf) {
352                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
353                            "tdb_alloc_read malloc failed len=%zu",
354                            (size_t)(prefix + len));
355         } else {
356                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
357                 if (unlikely(ecode != TDB_SUCCESS)) {
358                         tdb->ecode = ecode;
359                         free(buf);
360                         buf = NULL;
361                 }
362         }
363         return buf;
364 }
365
366 /* read a lump of data, allocating the space for it */
367 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
368 {
369         return _tdb_alloc_read(tdb, offset, len, 0);
370 }
371
372 static int fill(struct tdb_context *tdb,
373                 const void *buf, size_t size,
374                 tdb_off_t off, tdb_len_t len)
375 {
376         while (len) {
377                 size_t n = len > size ? size : len;
378                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
379                 if (ret < n) {
380                         if (ret >= 0)
381                                 errno = ENOSPC;
382
383                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
384                                    "fill failed: %zi at %zu len=%zu (%s)",
385                                    ret, (size_t)off, (size_t)len,
386                                    strerror(errno));
387                         return -1;
388                 }
389                 len -= n;
390                 off += n;
391         }
392         return 0;
393 }
394
395 /* expand a file.  we prefer to use ftruncate, as that is what posix
396   says to use for mmap expansion */
397 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
398                                       tdb_len_t addition)
399 {
400         char buf[8192];
401
402         if (tdb->read_only) {
403                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
404                                   "Expand on read-only database");
405         }
406
407         if (tdb->flags & TDB_INTERNAL) {
408                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
409                 if (!new) {
410                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
411                                           "No memory to expand database");
412                 }
413                 tdb->map_ptr = new;
414                 tdb->map_size += addition;
415         } else {
416                 /* Unmap before trying to write; old TDB claimed OpenBSD had
417                  * problem with this otherwise. */
418                 tdb_munmap(tdb);
419
420                 /* If this fails, we try to fill anyway. */
421                 if (ftruncate(tdb->fd, tdb->map_size + addition))
422                         ;
423
424                 /* now fill the file with something. This ensures that the
425                    file isn't sparse, which would be very bad if we ran out of
426                    disk. This must be done with write, not via mmap */
427                 memset(buf, 0x43, sizeof(buf));
428                 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
429                         return tdb->ecode;
430                 tdb->map_size += addition;
431                 tdb_mmap(tdb);
432         }
433         return TDB_SUCCESS;
434 }
435
436 const void *tdb_access_read(struct tdb_context *tdb,
437                             tdb_off_t off, tdb_len_t len, bool convert)
438 {
439         const void *ret = NULL;
440
441         if (likely(!(tdb->flags & TDB_CONVERT)))
442                 ret = tdb->methods->direct(tdb, off, len, false);
443
444         if (!ret) {
445                 struct tdb_access_hdr *hdr;
446                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
447                 if (hdr) {
448                         hdr->next = tdb->access;
449                         tdb->access = hdr;
450                         ret = hdr + 1;
451                         if (convert)
452                                 tdb_convert(tdb, (void *)ret, len);
453                 }
454         } else
455                 tdb->direct_access++;
456
457         return ret;
458 }
459
460 void *tdb_access_write(struct tdb_context *tdb,
461                        tdb_off_t off, tdb_len_t len, bool convert)
462 {
463         void *ret = NULL;
464
465         if (tdb->read_only) {
466                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
467                            "Write to read-only database");
468                 return NULL;
469         }
470
471         if (likely(!(tdb->flags & TDB_CONVERT)))
472                 ret = tdb->methods->direct(tdb, off, len, true);
473
474         if (!ret) {
475                 struct tdb_access_hdr *hdr;
476                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
477                 if (hdr) {
478                         hdr->next = tdb->access;
479                         tdb->access = hdr;
480                         hdr->off = off;
481                         hdr->len = len;
482                         hdr->convert = convert;
483                         ret = hdr + 1;
484                         if (convert)
485                                 tdb_convert(tdb, (void *)ret, len);
486                 }
487         } else
488                 tdb->direct_access++;
489
490         return ret;
491 }
492
493 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
494 {
495         struct tdb_access_hdr **hp;
496
497         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
498                 if (*hp + 1 == p)
499                         return hp;
500         }
501         return NULL;
502 }
503
504 void tdb_access_release(struct tdb_context *tdb, const void *p)
505 {
506         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
507
508         if (hp) {
509                 hdr = *hp;
510                 *hp = hdr->next;
511                 free(hdr);
512         } else
513                 tdb->direct_access--;
514 }
515
516 int tdb_access_commit(struct tdb_context *tdb, void *p)
517 {
518         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
519         int ret = 0;
520
521         if (hp) {
522                 hdr = *hp;
523                 if (hdr->convert)
524                         ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
525                 else
526                         ret = tdb_write(tdb, hdr->off, p, hdr->len);
527                 *hp = hdr->next;
528                 free(hdr);
529         } else
530                 tdb->direct_access--;
531
532         return ret;
533 }
534
535 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
536                         bool write_mode)
537 {
538         if (unlikely(!tdb->map_ptr))
539                 return NULL;
540
541         if (unlikely(tdb_oob(tdb, off + len, true) != TDB_SUCCESS))
542                 return NULL;
543         return (char *)tdb->map_ptr + off;
544 }
545
546 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
547 {
548         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
549                 *s += val;
550 }
551
552 static const struct tdb_methods io_methods = {
553         tdb_read,
554         tdb_write,
555         tdb_oob,
556         tdb_expand_file,
557         tdb_direct,
558 };
559
560 /*
561   initialise the default methods table
562 */
563 void tdb_io_init(struct tdb_context *tdb)
564 {
565         tdb->methods = &io_methods;
566 }