]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/io.c
failtest: don't insist parents and children write the same thing to files.
[ccan] / ccan / tdb2 / io.c
1  /* 
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_DEBUG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary 
68    note that "len" is the minimum length needed for the db
69 */
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
71 {
72         struct stat st;
73
74         /* We can't hold pointers during this: we could unmap! */
75         assert(!tdb->direct_access
76                || (tdb->flags & TDB_NOLOCK)
77                || tdb_has_expansion_lock(tdb));
78
79         if (len <= tdb->map_size)
80                 return 0;
81         if (tdb->flags & TDB_INTERNAL) {
82                 if (!probe) {
83                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
84                                  "tdb_oob len %lld beyond internal"
85                                  " malloc size %lld",
86                                  (long long)len,
87                                  (long long)tdb->map_size);
88                 }
89                 return -1;
90         }
91
92         if (tdb_lock_expand(tdb, F_RDLCK) != 0)
93                 return -1;
94
95         if (fstat(tdb->fd, &st) != 0) {
96                 tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
97                            "Failed to fstat file: %s", strerror(errno));
98                 tdb_unlock_expand(tdb, F_RDLCK);
99                 return -1;
100         }
101
102         tdb_unlock_expand(tdb, F_RDLCK);
103
104         if (st.st_size < (size_t)len) {
105                 if (!probe) {
106                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
107                                    "tdb_oob len %zu beyond eof at %zu",
108                                    (size_t)len, st.st_size);
109                 }
110                 return -1;
111         }
112
113         /* Unmap, update size, remap */
114         tdb_munmap(tdb);
115
116         tdb->map_size = st.st_size;
117         tdb_mmap(tdb);
118         return 0;
119 }
120
121 /* Endian conversion: we only ever deal with 8 byte quantities */
122 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
123 {
124         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
125                 uint64_t i, *p = (uint64_t *)buf;
126                 for (i = 0; i < size / 8; i++)
127                         p[i] = bswap_64(p[i]);
128         }
129         return buf;
130 }
131
132 /* FIXME: Return the off? */
133 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
134                               tdb_off_t base, uint64_t start, uint64_t end)
135 {
136         uint64_t i;
137         const uint64_t *val;
138
139         /* Zero vs non-zero is the same unconverted: minor optimization. */
140         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
141                               (end - start) * sizeof(tdb_off_t), false);
142         if (!val)
143                 return end;
144
145         for (i = 0; i < (end - start); i++) {
146                 if (val[i])
147                         break;
148         }
149         tdb_access_release(tdb, val);
150         return start + i;
151 }
152
153 /* Return first zero offset in num offset array, or num. */
154 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
155                            uint64_t num)
156 {
157         uint64_t i;
158         const uint64_t *val;
159
160         /* Zero vs non-zero is the same unconverted: minor optimization. */
161         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
162         if (!val)
163                 return num;
164
165         for (i = 0; i < num; i++) {
166                 if (!val[i])
167                         break;
168         }
169         tdb_access_release(tdb, val);
170         return i;
171 }
172
173 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
174 {
175         char buf[8192] = { 0 };
176         void *p = tdb->methods->direct(tdb, off, len, true);
177
178         assert(!tdb->read_only);
179         if (p) {
180                 memset(p, 0, len);
181                 return 0;
182         }
183         while (len) {
184                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
185                 if (tdb->methods->write(tdb, off, buf, todo) == -1)
186                         return -1;
187                 len -= todo;
188                 off += todo;
189         }
190         return 0;
191 }
192
193 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
194 {
195         tdb_off_t ret;
196
197         if (likely(!(tdb->flags & TDB_CONVERT))) {
198                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
199                                                     false);
200                 if (p)
201                         return *p;
202         }
203
204         if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
205                 return TDB_OFF_ERR;
206         return ret;
207 }
208
209 /* Even on files, we can get partial writes due to signals. */
210 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
211 {
212         while (len) {
213                 ssize_t ret;
214                 ret = pwrite(fd, buf, len, off);
215                 if (ret < 0)
216                         return false;
217                 if (ret == 0) {
218                         errno = ENOSPC;
219                         return false;
220                 }
221                 buf = (char *)buf + ret;
222                 off += ret;
223                 len -= ret;
224         }
225         return true;
226 }
227
228 /* Even on files, we can get partial reads due to signals. */
229 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
230 {
231         while (len) {
232                 ssize_t ret;
233                 ret = pread(fd, buf, len, off);
234                 if (ret < 0)
235                         return false;
236                 if (ret == 0) {
237                         /* ETOOSHORT? */
238                         errno = EWOULDBLOCK;
239                         return false;
240                 }
241                 buf = (char *)buf + ret;
242                 off += ret;
243                 len -= ret;
244         }
245         return true;
246 }
247
248 bool tdb_read_all(int fd, void *buf, size_t len)
249 {
250         while (len) {
251                 ssize_t ret;
252                 ret = read(fd, buf, len);
253                 if (ret < 0)
254                         return false;
255                 if (ret == 0) {
256                         /* ETOOSHORT? */
257                         errno = EWOULDBLOCK;
258                         return false;
259                 }
260                 buf = (char *)buf + ret;
261                 len -= ret;
262         }
263         return true;
264 }
265
266 /* write a lump of data at a specified offset */
267 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
268                      const void *buf, tdb_len_t len)
269 {
270         if (tdb->read_only) {
271                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
272                            "Write to read-only database");
273                 return -1;
274         }
275
276         /* FIXME: Bogus optimization? */
277         if (len == 0) {
278                 return 0;
279         }
280
281         if (tdb->methods->oob(tdb, off + len, 0) != 0)
282                 return -1;
283
284         if (tdb->map_ptr) {
285                 memcpy(off + (char *)tdb->map_ptr, buf, len);
286         } else {
287                 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
288                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
289                                    "tdb_write failed at %zu len=%zu (%s)",
290                                    (size_t)off, (size_t)len, strerror(errno));
291                         return -1;
292                 }
293         }
294         return 0;
295 }
296
297 /* read a lump of data at a specified offset */
298 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
299                     tdb_len_t len)
300 {
301         if (tdb->methods->oob(tdb, off + len, 0) != 0) {
302                 return -1;
303         }
304
305         if (tdb->map_ptr) {
306                 memcpy(buf, off + (char *)tdb->map_ptr, len);
307         } else {
308                 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
309                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
310                                    "tdb_read failed at %zu "
311                                    "len=%zu (%s) map_size=%zu",
312                                  (size_t)off, (size_t)len,
313                                  strerror(errno),
314                                  (size_t)tdb->map_size);
315                         return -1;
316                 }
317         }
318         return 0;
319 }
320
321 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
322                       const void *rec, size_t len)
323 {
324         int ret;
325         if (unlikely((tdb->flags & TDB_CONVERT))) {
326                 void *conv = malloc(len);
327                 if (!conv) {
328                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL,
329                                    "tdb_write: no memory converting"
330                                    " %zu bytes", len);
331                         return -1;
332                 }
333                 memcpy(conv, rec, len);
334                 ret = tdb->methods->write(tdb, off,
335                                           tdb_convert(tdb, conv, len), len);
336                 free(conv);
337         } else
338                 ret = tdb->methods->write(tdb, off, rec, len);
339
340         return ret;
341 }
342
343 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
344                       void *rec, size_t len)
345 {
346         int ret = tdb->methods->read(tdb, off, rec, len);
347         tdb_convert(tdb, rec, len);
348         return ret;
349 }
350
351 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
352 {
353         if (tdb->read_only) {
354                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
355                            "Write to read-only database");
356                 return -1;
357         }
358
359         if (likely(!(tdb->flags & TDB_CONVERT))) {
360                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
361                                                     true);
362                 if (p) {
363                         *p = val;
364                         return 0;
365                 }
366         }
367         return tdb_write_convert(tdb, off, &val, sizeof(val));
368 }
369
370 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
371                              tdb_len_t len, unsigned int prefix)
372 {
373         void *buf;
374
375         /* some systems don't like zero length malloc */
376         buf = malloc(prefix + len ? prefix + len : 1);
377         if (!buf) {
378                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_ERROR,
379                            "tdb_alloc_read malloc failed len=%zu",
380                            (size_t)(prefix + len));
381         } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix,
382                                                len) == -1)) {
383                 free(buf);
384                 buf = NULL;
385         }
386         return buf;
387 }
388
389 /* read a lump of data, allocating the space for it */
390 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
391 {
392         return _tdb_alloc_read(tdb, offset, len, 0);
393 }
394
395 static int fill(struct tdb_context *tdb,
396                 const void *buf, size_t size,
397                 tdb_off_t off, tdb_len_t len)
398 {
399         while (len) {
400                 size_t n = len > size ? size : len;
401
402                 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
403                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
404                                  "fill write failed: giving up!");
405                         return -1;
406                 }
407                 len -= n;
408                 off += n;
409         }
410         return 0;
411 }
412
413 /* expand a file.  we prefer to use ftruncate, as that is what posix
414   says to use for mmap expansion */
415 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
416 {
417         char buf[8192];
418
419         if (tdb->read_only) {
420                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
421                            "Expand on read-only database");
422                 return -1;
423         }
424
425         if (tdb->flags & TDB_INTERNAL) {
426                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
427                 if (!new) {
428                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL,
429                                    "No memory to expand database");
430                         return -1;
431                 }
432                 tdb->map_ptr = new;
433                 tdb->map_size += addition;
434         } else {
435                 /* Unmap before trying to write; old TDB claimed OpenBSD had
436                  * problem with this otherwise. */
437                 tdb_munmap(tdb);
438
439                 /* If this fails, we try to fill anyway. */
440                 if (ftruncate(tdb->fd, tdb->map_size + addition))
441                         ;
442
443                 /* now fill the file with something. This ensures that the
444                    file isn't sparse, which would be very bad if we ran out of
445                    disk. This must be done with write, not via mmap */
446                 memset(buf, 0x43, sizeof(buf));
447                 if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
448                         return -1;
449                 tdb->map_size += addition;
450                 tdb_mmap(tdb);
451         }
452         return 0;
453 }
454
455 const void *tdb_access_read(struct tdb_context *tdb,
456                             tdb_off_t off, tdb_len_t len, bool convert)
457 {
458         const void *ret = NULL; 
459
460         if (likely(!(tdb->flags & TDB_CONVERT)))
461                 ret = tdb->methods->direct(tdb, off, len, false);
462
463         if (!ret) {
464                 struct tdb_access_hdr *hdr;
465                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
466                 if (hdr) {
467                         hdr->next = tdb->access;
468                         tdb->access = hdr;
469                         ret = hdr + 1;
470                         if (convert)
471                                 tdb_convert(tdb, (void *)ret, len);
472                 }
473         } else
474                 tdb->direct_access++;
475
476         return ret;
477 }
478
479 void *tdb_access_write(struct tdb_context *tdb,
480                        tdb_off_t off, tdb_len_t len, bool convert)
481 {
482         void *ret = NULL;
483
484         if (tdb->read_only) {
485                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
486                            "Write to read-only database");
487                 return NULL;
488         }
489
490         if (likely(!(tdb->flags & TDB_CONVERT)))
491                 ret = tdb->methods->direct(tdb, off, len, true);
492
493         if (!ret) {
494                 struct tdb_access_hdr *hdr;
495                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
496                 if (hdr) {
497                         hdr->next = tdb->access;
498                         tdb->access = hdr;
499                         hdr->off = off;
500                         hdr->len = len;
501                         hdr->convert = convert;
502                         ret = hdr + 1;
503                         if (convert)
504                                 tdb_convert(tdb, (void *)ret, len);
505                 }
506         } else
507                 tdb->direct_access++;
508
509         return ret;
510 }
511
512 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
513 {
514         struct tdb_access_hdr **hp;
515
516         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
517                 if (*hp + 1 == p)
518                         return hp;
519         }
520         return NULL;
521 }
522
523 void tdb_access_release(struct tdb_context *tdb, const void *p)
524 {
525         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
526
527         if (hp) {
528                 hdr = *hp;
529                 *hp = hdr->next;
530                 free(hdr);
531         } else
532                 tdb->direct_access--;
533 }
534
535 int tdb_access_commit(struct tdb_context *tdb, void *p)
536 {
537         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
538         int ret = 0;
539
540         if (hp) {
541                 hdr = *hp;
542                 if (hdr->convert)
543                         ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
544                 else
545                         ret = tdb_write(tdb, hdr->off, p, hdr->len);
546                 *hp = hdr->next;
547                 free(hdr);
548         } else
549                 tdb->direct_access--;
550
551         return ret;
552 }
553
554 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
555                         bool write)
556 {
557         if (unlikely(!tdb->map_ptr))
558                 return NULL;
559
560         if (unlikely(tdb_oob(tdb, off + len, true) == -1))
561                 return NULL;
562         return (char *)tdb->map_ptr + off;
563 }
564
565 void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val)
566 {
567         if ((uintptr_t)stat < (uintptr_t)tdb->stats + tdb->stats->size)
568                 *stat += val;
569 }
570
571 static const struct tdb_methods io_methods = {
572         tdb_read,
573         tdb_write,
574         tdb_oob,
575         tdb_expand_file,
576         tdb_direct,
577 };
578
579 /*
580   initialise the default methods table
581 */
582 void tdb_io_init(struct tdb_context *tdb)
583 {
584         tdb->methods = &io_methods;
585 }