82c49406abc1e962a93ad24096203e6a25906245
[ccan] / ccan / tdb2 / io.c
1  /* 
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_DEBUG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary 
68    note that "len" is the minimum length needed for the db
69 */
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
71 {
72         struct stat st;
73
74         /* We can't hold pointers during this: we could unmap! */
75         assert(!tdb->direct_access
76                || (tdb->flags & TDB_NOLOCK)
77                || tdb_has_expansion_lock(tdb));
78
79         if (len <= tdb->map_size)
80                 return 0;
81         if (tdb->flags & TDB_INTERNAL) {
82                 if (!probe) {
83                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
84                                  "tdb_oob len %lld beyond internal"
85                                  " malloc size %lld",
86                                  (long long)len,
87                                  (long long)tdb->map_size);
88                 }
89                 return -1;
90         }
91
92         if (tdb_lock_expand(tdb, F_RDLCK) != 0)
93                 return -1;
94
95         if (fstat(tdb->fd, &st) != 0) {
96                 tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
97                            "Failed to fstat file: %s", strerror(errno));
98                 tdb_unlock_expand(tdb, F_RDLCK);
99                 return -1;
100         }
101
102         tdb_unlock_expand(tdb, F_RDLCK);
103
104         if (st.st_size < (size_t)len) {
105                 if (!probe) {
106                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
107                                    "tdb_oob len %zu beyond eof at %zu",
108                                    (size_t)len, st.st_size);
109                 }
110                 return -1;
111         }
112
113         /* Unmap, update size, remap */
114         tdb_munmap(tdb);
115
116         tdb->map_size = st.st_size;
117         tdb_mmap(tdb);
118         return 0;
119 }
120
121 /* Endian conversion: we only ever deal with 8 byte quantities */
122 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
123 {
124         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
125                 uint64_t i, *p = (uint64_t *)buf;
126                 for (i = 0; i < size / 8; i++)
127                         p[i] = bswap_64(p[i]);
128         }
129         return buf;
130 }
131
132 /* FIXME: Return the off? */
133 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
134                               tdb_off_t base, uint64_t start, uint64_t end)
135 {
136         uint64_t i;
137         const uint64_t *val;
138
139         /* Zero vs non-zero is the same unconverted: minor optimization. */
140         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
141                               (end - start) * sizeof(tdb_off_t), false);
142         if (!val)
143                 return end;
144
145         for (i = 0; i < (end - start); i++) {
146                 if (val[i])
147                         break;
148         }
149         tdb_access_release(tdb, val);
150         return start + i;
151 }
152
153 /* Return first zero offset in num offset array, or num. */
154 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
155                            uint64_t num)
156 {
157         uint64_t i;
158         const uint64_t *val;
159
160         /* Zero vs non-zero is the same unconverted: minor optimization. */
161         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
162         if (!val)
163                 return num;
164
165         for (i = 0; i < num; i++) {
166                 if (!val[i])
167                         break;
168         }
169         tdb_access_release(tdb, val);
170         return i;
171 }
172
173 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
174 {
175         char buf[8192] = { 0 };
176         void *p = tdb->methods->direct(tdb, off, len);
177
178         assert(!tdb->read_only);
179         if (p) {
180                 memset(p, 0, len);
181                 return 0;
182         }
183         while (len) {
184                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
185                 if (tdb->methods->write(tdb, off, buf, todo) == -1)
186                         return -1;
187                 len -= todo;
188                 off += todo;
189         }
190         return 0;
191 }
192
193 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
194 {
195         tdb_off_t ret;
196
197         if (likely(!(tdb->flags & TDB_CONVERT))) {
198                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p));
199                 if (p)
200                         return *p;
201         }
202
203         if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
204                 return TDB_OFF_ERR;
205         return ret;
206 }
207
208 /* Even on files, we can get partial writes due to signals. */
209 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
210 {
211         while (len) {
212                 ssize_t ret;
213                 ret = pwrite(fd, buf, len, off);
214                 if (ret < 0)
215                         return false;
216                 if (ret == 0) {
217                         errno = ENOSPC;
218                         return false;
219                 }
220                 buf = (char *)buf + ret;
221                 off += ret;
222                 len -= ret;
223         }
224         return true;
225 }
226
227 /* Even on files, we can get partial reads due to signals. */
228 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
229 {
230         while (len) {
231                 ssize_t ret;
232                 ret = pread(fd, buf, len, off);
233                 if (ret < 0)
234                         return false;
235                 if (ret == 0) {
236                         /* ETOOSHORT? */
237                         errno = EWOULDBLOCK;
238                         return false;
239                 }
240                 buf = (char *)buf + ret;
241                 off += ret;
242                 len -= ret;
243         }
244         return true;
245 }
246
247 bool tdb_read_all(int fd, void *buf, size_t len)
248 {
249         while (len) {
250                 ssize_t ret;
251                 ret = read(fd, buf, len);
252                 if (ret < 0)
253                         return false;
254                 if (ret == 0) {
255                         /* ETOOSHORT? */
256                         errno = EWOULDBLOCK;
257                         return false;
258                 }
259                 buf = (char *)buf + ret;
260                 len -= ret;
261         }
262         return true;
263 }
264
265 /* write a lump of data at a specified offset */
266 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
267                      const void *buf, tdb_len_t len)
268 {
269         if (tdb->read_only) {
270                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
271                            "Write to read-only database");
272                 return -1;
273         }
274
275         /* FIXME: Bogus optimization? */
276         if (len == 0) {
277                 return 0;
278         }
279
280         if (tdb->methods->oob(tdb, off + len, 0) != 0)
281                 return -1;
282
283         if (tdb->map_ptr) {
284                 memcpy(off + (char *)tdb->map_ptr, buf, len);
285         } else {
286                 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
287                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
288                                    "tdb_write failed at %zu len=%zu (%s)",
289                                    (size_t)off, (size_t)len, strerror(errno));
290                         return -1;
291                 }
292         }
293         return 0;
294 }
295
296 /* read a lump of data at a specified offset */
297 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
298                     tdb_len_t len)
299 {
300         if (tdb->methods->oob(tdb, off + len, 0) != 0) {
301                 return -1;
302         }
303
304         if (tdb->map_ptr) {
305                 memcpy(buf, off + (char *)tdb->map_ptr, len);
306         } else {
307                 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
308                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
309                                    "tdb_read failed at %zu "
310                                    "len=%zu (%s) map_size=%zu",
311                                  (size_t)off, (size_t)len,
312                                  strerror(errno),
313                                  (size_t)tdb->map_size);
314                         return -1;
315                 }
316         }
317         return 0;
318 }
319
320 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
321                       const void *rec, size_t len)
322 {
323         int ret;
324         if (unlikely((tdb->flags & TDB_CONVERT))) {
325                 void *conv = malloc(len);
326                 if (!conv) {
327                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL,
328                                    "tdb_write: no memory converting"
329                                    " %zu bytes", len);
330                         return -1;
331                 }
332                 memcpy(conv, rec, len);
333                 ret = tdb->methods->write(tdb, off,
334                                           tdb_convert(tdb, conv, len), len);
335                 free(conv);
336         } else
337                 ret = tdb->methods->write(tdb, off, rec, len);
338
339         return ret;
340 }
341
342 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
343                       void *rec, size_t len)
344 {
345         int ret = tdb->methods->read(tdb, off, rec, len);
346         tdb_convert(tdb, rec, len);
347         return ret;
348 }
349
350 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
351 {
352         if (tdb->read_only) {
353                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
354                            "Write to read-only database");
355                 return -1;
356         }
357
358         if (likely(!(tdb->flags & TDB_CONVERT))) {
359                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p));
360                 if (p) {
361                         *p = val;
362                         return 0;
363                 }
364         }
365         return tdb_write_convert(tdb, off, &val, sizeof(val));
366 }
367
368 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
369                              tdb_len_t len, unsigned int prefix)
370 {
371         void *buf;
372
373         /* some systems don't like zero length malloc */
374         buf = malloc(prefix + len ? prefix + len : 1);
375         if (!buf) {
376                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_ERROR,
377                            "tdb_alloc_read malloc failed len=%zu",
378                            (size_t)(prefix + len));
379         } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix,
380                                                len) == -1)) {
381                 free(buf);
382                 buf = NULL;
383         }
384         return buf;
385 }
386
387 /* read a lump of data, allocating the space for it */
388 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
389 {
390         return _tdb_alloc_read(tdb, offset, len, 0);
391 }
392
393 static int fill(struct tdb_context *tdb,
394                 const void *buf, size_t size,
395                 tdb_off_t off, tdb_len_t len)
396 {
397         while (len) {
398                 size_t n = len > size ? size : len;
399
400                 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
401                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
402                                  "fill write failed: giving up!");
403                         return -1;
404                 }
405                 len -= n;
406                 off += n;
407         }
408         return 0;
409 }
410
411 /* expand a file.  we prefer to use ftruncate, as that is what posix
412   says to use for mmap expansion */
413 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
414 {
415         char buf[8192];
416
417         if (tdb->read_only) {
418                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
419                            "Expand on read-only database");
420                 return -1;
421         }
422
423         if (tdb->flags & TDB_INTERNAL) {
424                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
425                 if (!new) {
426                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL,
427                                    "No memory to expand database");
428                         return -1;
429                 }
430                 tdb->map_ptr = new;
431                 tdb->map_size += addition;
432         } else {
433                 /* Unmap before trying to write; old TDB claimed OpenBSD had
434                  * problem with this otherwise. */
435                 tdb_munmap(tdb);
436
437                 /* If this fails, we try to fill anyway. */
438                 if (ftruncate(tdb->fd, tdb->map_size + addition))
439                         ;
440
441                 /* now fill the file with something. This ensures that the
442                    file isn't sparse, which would be very bad if we ran out of
443                    disk. This must be done with write, not via mmap */
444                 memset(buf, 0x43, sizeof(buf));
445                 if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
446                         return -1;
447                 tdb->map_size += addition;
448                 tdb_mmap(tdb);
449         }
450         return 0;
451 }
452
453 /* This is only neded for tdb_access_commit, but used everywhere to simplify. */
454 struct tdb_access_hdr {
455         tdb_off_t off;
456         tdb_len_t len;
457         bool convert;
458 };
459
460 const void *tdb_access_read(struct tdb_context *tdb,
461                             tdb_off_t off, tdb_len_t len, bool convert)
462 {
463         const void *ret = NULL; 
464
465         if (likely(!(tdb->flags & TDB_CONVERT)))
466                 ret = tdb->methods->direct(tdb, off, len);
467
468         if (!ret) {
469                 struct tdb_access_hdr *hdr;
470                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
471                 if (hdr) {
472                         ret = hdr + 1;
473                         if (convert)
474                                 tdb_convert(tdb, (void *)ret, len);
475                 }
476         } else
477                 tdb->direct_access++;
478
479         return ret;
480 }
481
482 void *tdb_access_write(struct tdb_context *tdb,
483                        tdb_off_t off, tdb_len_t len, bool convert)
484 {
485         void *ret = NULL;
486
487         if (tdb->read_only) {
488                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
489                            "Write to read-only database");
490                 return NULL;
491         }
492
493         if (likely(!(tdb->flags & TDB_CONVERT)))
494                 ret = tdb->methods->direct(tdb, off, len);
495
496         if (!ret) {
497                 struct tdb_access_hdr *hdr;
498                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
499                 if (hdr) {
500                         hdr->off = off;
501                         hdr->len = len;
502                         hdr->convert = convert;
503                         ret = hdr + 1;
504                         if (convert)
505                                 tdb_convert(tdb, (void *)ret, len);
506                 }
507         } else
508                 tdb->direct_access++;
509
510         return ret;
511 }
512
513 bool is_direct(const struct tdb_context *tdb, const void *p)
514 {
515         return (tdb->map_ptr
516                 && (char *)p >= (char *)tdb->map_ptr
517                 && (char *)p < (char *)tdb->map_ptr + tdb->map_size);
518 }
519
520 void tdb_access_release(struct tdb_context *tdb, const void *p)
521 {
522         if (is_direct(tdb, p))
523                 tdb->direct_access--;
524         else
525                 free((struct tdb_access_hdr *)p - 1);
526 }
527
528 int tdb_access_commit(struct tdb_context *tdb, void *p)
529 {
530         int ret = 0;
531
532         if (!tdb->map_ptr
533             || (char *)p < (char *)tdb->map_ptr
534             || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) {
535                 struct tdb_access_hdr *hdr;
536
537                 hdr = (struct tdb_access_hdr *)p - 1;
538                 if (hdr->convert)
539                         ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
540                 else
541                         ret = tdb_write(tdb, hdr->off, p, hdr->len);
542                 free(hdr);
543         } else
544                 tdb->direct_access--;
545
546         return ret;
547 }
548
549 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
550 {
551         if (unlikely(!tdb->map_ptr))
552                 return NULL;
553
554         if (unlikely(tdb_oob(tdb, off + len, true) == -1))
555                 return NULL;
556         return (char *)tdb->map_ptr + off;
557 }
558
559 void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val)
560 {
561         if ((uintptr_t)stat < (uintptr_t)tdb->stats + tdb->stats->size)
562                 *stat += val;
563 }
564
565 static const struct tdb_methods io_methods = {
566         tdb_read,
567         tdb_write,
568         tdb_oob,
569         tdb_expand_file,
570         tdb_direct,
571 };
572
573 /*
574   initialise the default methods table
575 */
576 void tdb_io_init(struct tdb_context *tdb)
577 {
578         tdb->methods = &io_methods;
579 }