tdb2: restore file filling code.
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
71 {
72         struct stat st;
73         enum TDB_ERROR ecode;
74
75         /* We can't hold pointers during this: we could unmap! */
76         assert(!tdb->direct_access
77                || (tdb->flags & TDB_NOLOCK)
78                || tdb_has_expansion_lock(tdb));
79
80         if (len <= tdb->map_size)
81                 return 0;
82         if (tdb->flags & TDB_INTERNAL) {
83                 if (!probe) {
84                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
85                                  "tdb_oob len %lld beyond internal"
86                                  " malloc size %lld",
87                                  (long long)len,
88                                  (long long)tdb->map_size);
89                 }
90                 return -1;
91         }
92
93         ecode = tdb_lock_expand(tdb, F_RDLCK);
94         if (ecode != TDB_SUCCESS) {
95                 tdb->ecode = ecode;
96                 return -1;
97         }
98
99         if (fstat(tdb->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return -1;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return -1;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb);
119
120         tdb->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return 0;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* FIXME: Return the off? */
137 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
138                               tdb_off_t base, uint64_t start, uint64_t end)
139 {
140         uint64_t i;
141         const uint64_t *val;
142
143         /* Zero vs non-zero is the same unconverted: minor optimization. */
144         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
145                               (end - start) * sizeof(tdb_off_t), false);
146         if (!val)
147                 return end;
148
149         for (i = 0; i < (end - start); i++) {
150                 if (val[i])
151                         break;
152         }
153         tdb_access_release(tdb, val);
154         return start + i;
155 }
156
157 /* Return first zero offset in num offset array, or num. */
158 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
159                            uint64_t num)
160 {
161         uint64_t i;
162         const uint64_t *val;
163
164         /* Zero vs non-zero is the same unconverted: minor optimization. */
165         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
166         if (!val)
167                 return num;
168
169         for (i = 0; i < num; i++) {
170                 if (!val[i])
171                         break;
172         }
173         tdb_access_release(tdb, val);
174         return i;
175 }
176
177 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
178 {
179         char buf[8192] = { 0 };
180         void *p = tdb->methods->direct(tdb, off, len, true);
181
182         assert(!tdb->read_only);
183         if (p) {
184                 memset(p, 0, len);
185                 return 0;
186         }
187         while (len) {
188                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
189                 if (tdb->methods->twrite(tdb, off, buf, todo) == -1)
190                         return -1;
191                 len -= todo;
192                 off += todo;
193         }
194         return 0;
195 }
196
197 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
198 {
199         tdb_off_t ret;
200
201         if (likely(!(tdb->flags & TDB_CONVERT))) {
202                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
203                                                     false);
204                 if (p)
205                         return *p;
206         }
207
208         if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
209                 return TDB_OFF_ERR;
210         return ret;
211 }
212
213 /* write a lump of data at a specified offset */
214 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
215                      const void *buf, tdb_len_t len)
216 {
217         if (tdb->read_only) {
218                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
219                            "Write to read-only database");
220                 return -1;
221         }
222
223         /* FIXME: Bogus optimization? */
224         if (len == 0) {
225                 return 0;
226         }
227
228         if (tdb->methods->oob(tdb, off + len, 0) != 0)
229                 return -1;
230
231         if (tdb->map_ptr) {
232                 memcpy(off + (char *)tdb->map_ptr, buf, len);
233         } else {
234                 ssize_t ret;
235                 ret = pwrite(tdb->fd, buf, len, off);
236                 if (ret < len) {
237                         /* This shouldn't happen: we avoid sparse files. */
238                         if (ret >= 0)
239                                 errno = ENOSPC;
240
241                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
242                                    "tdb_write: %zi at %zu len=%zu (%s)",
243                                    ret, (size_t)off, (size_t)len,
244                                    strerror(errno));
245                         return -1;
246                 }
247         }
248         return 0;
249 }
250
251 /* read a lump of data at a specified offset */
252 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
253                     tdb_len_t len)
254 {
255         if (tdb->methods->oob(tdb, off + len, 0) != 0) {
256                 return -1;
257         }
258
259         if (tdb->map_ptr) {
260                 memcpy(buf, off + (char *)tdb->map_ptr, len);
261         } else {
262                 ssize_t r = pread(tdb->fd, buf, len, off);
263                 if (r != len) {
264                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
265                                    "tdb_read failed with %zi at %zu "
266                                    "len=%zu (%s) map_size=%zu",
267                                    r, (size_t)off, (size_t)len,
268                                    strerror(errno),
269                                    (size_t)tdb->map_size);
270                         return -1;
271                 }
272         }
273         return 0;
274 }
275
276 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
277                       const void *rec, size_t len)
278 {
279         int ret;
280         if (unlikely((tdb->flags & TDB_CONVERT))) {
281                 void *conv = malloc(len);
282                 if (!conv) {
283                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
284                                    "tdb_write: no memory converting"
285                                    " %zu bytes", len);
286                         return -1;
287                 }
288                 memcpy(conv, rec, len);
289                 ret = tdb->methods->twrite(tdb, off,
290                                            tdb_convert(tdb, conv, len), len);
291                 free(conv);
292         } else
293                 ret = tdb->methods->twrite(tdb, off, rec, len);
294
295         return ret;
296 }
297
298 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
299                       void *rec, size_t len)
300 {
301         int ret = tdb->methods->tread(tdb, off, rec, len);
302         tdb_convert(tdb, rec, len);
303         return ret;
304 }
305
306 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
307 {
308         if (tdb->read_only) {
309                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
310                            "Write to read-only database");
311                 return -1;
312         }
313
314         if (likely(!(tdb->flags & TDB_CONVERT))) {
315                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
316                                                     true);
317                 if (p) {
318                         *p = val;
319                         return 0;
320                 }
321         }
322         return tdb_write_convert(tdb, off, &val, sizeof(val));
323 }
324
325 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
326                              tdb_len_t len, unsigned int prefix)
327 {
328         void *buf;
329
330         /* some systems don't like zero length malloc */
331         buf = malloc(prefix + len ? prefix + len : 1);
332         if (!buf) {
333                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
334                            "tdb_alloc_read malloc failed len=%zu",
335                            (size_t)(prefix + len));
336         } else if (unlikely(tdb->methods->tread(tdb, offset, buf+prefix, len)
337                             == -1)) {
338                 free(buf);
339                 buf = NULL;
340         }
341         return buf;
342 }
343
344 /* read a lump of data, allocating the space for it */
345 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
346 {
347         return _tdb_alloc_read(tdb, offset, len, 0);
348 }
349
350 static int fill(struct tdb_context *tdb,
351                 const void *buf, size_t size,
352                 tdb_off_t off, tdb_len_t len)
353 {
354         while (len) {
355                 size_t n = len > size ? size : len;
356                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
357                 if (ret < n) {
358                         if (ret >= 0)
359                                 errno = ENOSPC;
360
361                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
362                                    "fill failed: %zi at %zu len=%zu (%s)",
363                                    ret, (size_t)off, (size_t)len,
364                                    strerror(errno));
365                         return -1;
366                 }
367                 len -= n;
368                 off += n;
369         }
370         return 0;
371 }
372
373 /* expand a file.  we prefer to use ftruncate, as that is what posix
374   says to use for mmap expansion */
375 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
376 {
377         char buf[8192];
378
379         if (tdb->read_only) {
380                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
381                            "Expand on read-only database");
382                 return -1;
383         }
384
385         if (tdb->flags & TDB_INTERNAL) {
386                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
387                 if (!new) {
388                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
389                                    "No memory to expand database");
390                         return -1;
391                 }
392                 tdb->map_ptr = new;
393                 tdb->map_size += addition;
394         } else {
395                 /* Unmap before trying to write; old TDB claimed OpenBSD had
396                  * problem with this otherwise. */
397                 tdb_munmap(tdb);
398
399                 /* If this fails, we try to fill anyway. */
400                 if (ftruncate(tdb->fd, tdb->map_size + addition))
401                         ;
402
403                 /* now fill the file with something. This ensures that the
404                    file isn't sparse, which would be very bad if we ran out of
405                    disk. This must be done with write, not via mmap */
406                 memset(buf, 0x43, sizeof(buf));
407                 if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
408                         return -1;
409                 tdb->map_size += addition;
410                 tdb_mmap(tdb);
411         }
412         return 0;
413 }
414
415 const void *tdb_access_read(struct tdb_context *tdb,
416                             tdb_off_t off, tdb_len_t len, bool convert)
417 {
418         const void *ret = NULL;
419
420         if (likely(!(tdb->flags & TDB_CONVERT)))
421                 ret = tdb->methods->direct(tdb, off, len, false);
422
423         if (!ret) {
424                 struct tdb_access_hdr *hdr;
425                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
426                 if (hdr) {
427                         hdr->next = tdb->access;
428                         tdb->access = hdr;
429                         ret = hdr + 1;
430                         if (convert)
431                                 tdb_convert(tdb, (void *)ret, len);
432                 }
433         } else
434                 tdb->direct_access++;
435
436         return ret;
437 }
438
439 void *tdb_access_write(struct tdb_context *tdb,
440                        tdb_off_t off, tdb_len_t len, bool convert)
441 {
442         void *ret = NULL;
443
444         if (tdb->read_only) {
445                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
446                            "Write to read-only database");
447                 return NULL;
448         }
449
450         if (likely(!(tdb->flags & TDB_CONVERT)))
451                 ret = tdb->methods->direct(tdb, off, len, true);
452
453         if (!ret) {
454                 struct tdb_access_hdr *hdr;
455                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
456                 if (hdr) {
457                         hdr->next = tdb->access;
458                         tdb->access = hdr;
459                         hdr->off = off;
460                         hdr->len = len;
461                         hdr->convert = convert;
462                         ret = hdr + 1;
463                         if (convert)
464                                 tdb_convert(tdb, (void *)ret, len);
465                 }
466         } else
467                 tdb->direct_access++;
468
469         return ret;
470 }
471
472 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
473 {
474         struct tdb_access_hdr **hp;
475
476         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
477                 if (*hp + 1 == p)
478                         return hp;
479         }
480         return NULL;
481 }
482
483 void tdb_access_release(struct tdb_context *tdb, const void *p)
484 {
485         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
486
487         if (hp) {
488                 hdr = *hp;
489                 *hp = hdr->next;
490                 free(hdr);
491         } else
492                 tdb->direct_access--;
493 }
494
495 int tdb_access_commit(struct tdb_context *tdb, void *p)
496 {
497         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
498         int ret = 0;
499
500         if (hp) {
501                 hdr = *hp;
502                 if (hdr->convert)
503                         ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
504                 else
505                         ret = tdb_write(tdb, hdr->off, p, hdr->len);
506                 *hp = hdr->next;
507                 free(hdr);
508         } else
509                 tdb->direct_access--;
510
511         return ret;
512 }
513
514 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
515                         bool write_mode)
516 {
517         if (unlikely(!tdb->map_ptr))
518                 return NULL;
519
520         if (unlikely(tdb_oob(tdb, off + len, true) == -1))
521                 return NULL;
522         return (char *)tdb->map_ptr + off;
523 }
524
525 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
526 {
527         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
528                 *s += val;
529 }
530
531 static const struct tdb_methods io_methods = {
532         tdb_read,
533         tdb_write,
534         tdb_oob,
535         tdb_expand_file,
536         tdb_direct,
537 };
538
539 /*
540   initialise the default methods table
541 */
542 void tdb_io_init(struct tdb_context *tdb)
543 {
544         tdb->methods = &io_methods;
545 }