7fc268ea551bc8dcaed8cc05a3c04999e09a9cfb
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_DEBUG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
71 {
72         struct stat st;
73
74         /* We can't hold pointers during this: we could unmap! */
75         assert(!tdb->direct_access
76                || (tdb->flags & TDB_NOLOCK)
77                || tdb_has_expansion_lock(tdb));
78
79         if (len <= tdb->map_size)
80                 return 0;
81         if (tdb->flags & TDB_INTERNAL) {
82                 if (!probe) {
83                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
84                                  "tdb_oob len %lld beyond internal"
85                                  " malloc size %lld",
86                                  (long long)len,
87                                  (long long)tdb->map_size);
88                 }
89                 return -1;
90         }
91
92         if (tdb_lock_expand(tdb, F_RDLCK) != 0)
93                 return -1;
94
95         if (fstat(tdb->fd, &st) != 0) {
96                 tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
97                            "Failed to fstat file: %s", strerror(errno));
98                 tdb_unlock_expand(tdb, F_RDLCK);
99                 return -1;
100         }
101
102         tdb_unlock_expand(tdb, F_RDLCK);
103
104         if (st.st_size < (size_t)len) {
105                 if (!probe) {
106                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
107                                    "tdb_oob len %zu beyond eof at %zu",
108                                    (size_t)len, st.st_size);
109                 }
110                 return -1;
111         }
112
113         /* Unmap, update size, remap */
114         tdb_munmap(tdb);
115
116         tdb->map_size = st.st_size;
117         tdb_mmap(tdb);
118         return 0;
119 }
120
121 /* Endian conversion: we only ever deal with 8 byte quantities */
122 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
123 {
124         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
125                 uint64_t i, *p = (uint64_t *)buf;
126                 for (i = 0; i < size / 8; i++)
127                         p[i] = bswap_64(p[i]);
128         }
129         return buf;
130 }
131
132 /* FIXME: Return the off? */
133 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
134                               tdb_off_t base, uint64_t start, uint64_t end)
135 {
136         uint64_t i;
137         const uint64_t *val;
138
139         /* Zero vs non-zero is the same unconverted: minor optimization. */
140         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
141                               (end - start) * sizeof(tdb_off_t), false);
142         if (!val)
143                 return end;
144
145         for (i = 0; i < (end - start); i++) {
146                 if (val[i])
147                         break;
148         }
149         tdb_access_release(tdb, val);
150         return start + i;
151 }
152
153 /* Return first zero offset in num offset array, or num. */
154 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
155                            uint64_t num)
156 {
157         uint64_t i;
158         const uint64_t *val;
159
160         /* Zero vs non-zero is the same unconverted: minor optimization. */
161         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
162         if (!val)
163                 return num;
164
165         for (i = 0; i < num; i++) {
166                 if (!val[i])
167                         break;
168         }
169         tdb_access_release(tdb, val);
170         return i;
171 }
172
173 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
174 {
175         char buf[8192] = { 0 };
176         void *p = tdb->methods->direct(tdb, off, len, true);
177
178         assert(!tdb->read_only);
179         if (p) {
180                 memset(p, 0, len);
181                 return 0;
182         }
183         while (len) {
184                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
185                 if (tdb->methods->twrite(tdb, off, buf, todo) == -1)
186                         return -1;
187                 len -= todo;
188                 off += todo;
189         }
190         return 0;
191 }
192
193 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
194 {
195         tdb_off_t ret;
196
197         if (likely(!(tdb->flags & TDB_CONVERT))) {
198                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
199                                                     false);
200                 if (p)
201                         return *p;
202         }
203
204         if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
205                 return TDB_OFF_ERR;
206         return ret;
207 }
208
209 /* write a lump of data at a specified offset */
210 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
211                      const void *buf, tdb_len_t len)
212 {
213         if (tdb->read_only) {
214                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
215                            "Write to read-only database");
216                 return -1;
217         }
218
219         /* FIXME: Bogus optimization? */
220         if (len == 0) {
221                 return 0;
222         }
223
224         if (tdb->methods->oob(tdb, off + len, 0) != 0)
225                 return -1;
226
227         if (tdb->map_ptr) {
228                 memcpy(off + (char *)tdb->map_ptr, buf, len);
229         } else {
230                 ssize_t ret;
231                 ret = pwrite(tdb->fd, buf, len, off);
232                 if (ret < len) {
233                         /* This shouldn't happen: we avoid sparse files. */
234                         if (ret >= 0)
235                                 errno = ENOSPC;
236
237                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
238                                    "tdb_write: %zi at %zu len=%zu (%s)",
239                                    ret, (size_t)off, (size_t)len,
240                                    strerror(errno));
241                         return -1;
242                 }
243         }
244         return 0;
245 }
246
247 /* read a lump of data at a specified offset */
248 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
249                     tdb_len_t len)
250 {
251         if (tdb->methods->oob(tdb, off + len, 0) != 0) {
252                 return -1;
253         }
254
255         if (tdb->map_ptr) {
256                 memcpy(buf, off + (char *)tdb->map_ptr, len);
257         } else {
258                 ssize_t r = pread(tdb->fd, buf, len, off);
259                 if (r != len) {
260                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
261                                    "tdb_read failed with %zi at %zu "
262                                    "len=%zu (%s) map_size=%zu",
263                                    r, (size_t)off, (size_t)len,
264                                    strerror(errno),
265                                    (size_t)tdb->map_size);
266                         return -1;
267                 }
268         }
269         return 0;
270 }
271
272 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
273                       const void *rec, size_t len)
274 {
275         int ret;
276         if (unlikely((tdb->flags & TDB_CONVERT))) {
277                 void *conv = malloc(len);
278                 if (!conv) {
279                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL,
280                                    "tdb_write: no memory converting"
281                                    " %zu bytes", len);
282                         return -1;
283                 }
284                 memcpy(conv, rec, len);
285                 ret = tdb->methods->twrite(tdb, off,
286                                            tdb_convert(tdb, conv, len), len);
287                 free(conv);
288         } else
289                 ret = tdb->methods->twrite(tdb, off, rec, len);
290
291         return ret;
292 }
293
294 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
295                       void *rec, size_t len)
296 {
297         int ret = tdb->methods->tread(tdb, off, rec, len);
298         tdb_convert(tdb, rec, len);
299         return ret;
300 }
301
302 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
303 {
304         if (tdb->read_only) {
305                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
306                            "Write to read-only database");
307                 return -1;
308         }
309
310         if (likely(!(tdb->flags & TDB_CONVERT))) {
311                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
312                                                     true);
313                 if (p) {
314                         *p = val;
315                         return 0;
316                 }
317         }
318         return tdb_write_convert(tdb, off, &val, sizeof(val));
319 }
320
321 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
322                              tdb_len_t len, unsigned int prefix)
323 {
324         void *buf;
325
326         /* some systems don't like zero length malloc */
327         buf = malloc(prefix + len ? prefix + len : 1);
328         if (!buf) {
329                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_ERROR,
330                            "tdb_alloc_read malloc failed len=%zu",
331                            (size_t)(prefix + len));
332         } else if (unlikely(tdb->methods->tread(tdb, offset, buf+prefix, len)
333                             == -1)) {
334                 free(buf);
335                 buf = NULL;
336         }
337         return buf;
338 }
339
340 /* read a lump of data, allocating the space for it */
341 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
342 {
343         return _tdb_alloc_read(tdb, offset, len, 0);
344 }
345
346 static int fill(struct tdb_context *tdb,
347                 const void *buf, size_t size,
348                 tdb_off_t off, tdb_len_t len)
349 {
350         while (len) {
351                 size_t n = len > size ? size : len;
352                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
353                 if (ret < n) {
354                         if (ret >= 0)
355                                 errno = ENOSPC;
356
357                         tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL,
358                                    "fill failed: %zi at %zu len=%zu (%s)",
359                                    ret, (size_t)off, (size_t)len,
360                                    strerror(errno));
361                         return -1;
362                 }
363                 len -= n;
364                 off += n;
365         }
366         return 0;
367 }
368
369 /* expand a file.  we prefer to use ftruncate, as that is what posix
370   says to use for mmap expansion */
371 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
372 {
373         char buf[8192];
374
375         if (tdb->read_only) {
376                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
377                            "Expand on read-only database");
378                 return -1;
379         }
380
381         if (tdb->flags & TDB_INTERNAL) {
382                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
383                 if (!new) {
384                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL,
385                                    "No memory to expand database");
386                         return -1;
387                 }
388                 tdb->map_ptr = new;
389                 tdb->map_size += addition;
390         } else {
391                 /* Unmap before trying to write; old TDB claimed OpenBSD had
392                  * problem with this otherwise. */
393                 tdb_munmap(tdb);
394
395                 /* If this fails, we try to fill anyway. */
396                 if (ftruncate(tdb->fd, tdb->map_size + addition))
397                         ;
398
399                 /* now fill the file with something. This ensures that the
400                    file isn't sparse, which would be very bad if we ran out of
401                    disk. This must be done with write, not via mmap */
402                 memset(buf, 0x43, sizeof(buf));
403                 if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
404                         return -1;
405                 tdb->map_size += addition;
406                 tdb_mmap(tdb);
407         }
408         return 0;
409 }
410
411 const void *tdb_access_read(struct tdb_context *tdb,
412                             tdb_off_t off, tdb_len_t len, bool convert)
413 {
414         const void *ret = NULL;
415
416         if (likely(!(tdb->flags & TDB_CONVERT)))
417                 ret = tdb->methods->direct(tdb, off, len, false);
418
419         if (!ret) {
420                 struct tdb_access_hdr *hdr;
421                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
422                 if (hdr) {
423                         hdr->next = tdb->access;
424                         tdb->access = hdr;
425                         ret = hdr + 1;
426                         if (convert)
427                                 tdb_convert(tdb, (void *)ret, len);
428                 }
429         } else
430                 tdb->direct_access++;
431
432         return ret;
433 }
434
435 void *tdb_access_write(struct tdb_context *tdb,
436                        tdb_off_t off, tdb_len_t len, bool convert)
437 {
438         void *ret = NULL;
439
440         if (tdb->read_only) {
441                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING,
442                            "Write to read-only database");
443                 return NULL;
444         }
445
446         if (likely(!(tdb->flags & TDB_CONVERT)))
447                 ret = tdb->methods->direct(tdb, off, len, true);
448
449         if (!ret) {
450                 struct tdb_access_hdr *hdr;
451                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
452                 if (hdr) {
453                         hdr->next = tdb->access;
454                         tdb->access = hdr;
455                         hdr->off = off;
456                         hdr->len = len;
457                         hdr->convert = convert;
458                         ret = hdr + 1;
459                         if (convert)
460                                 tdb_convert(tdb, (void *)ret, len);
461                 }
462         } else
463                 tdb->direct_access++;
464
465         return ret;
466 }
467
468 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
469 {
470         struct tdb_access_hdr **hp;
471
472         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
473                 if (*hp + 1 == p)
474                         return hp;
475         }
476         return NULL;
477 }
478
479 void tdb_access_release(struct tdb_context *tdb, const void *p)
480 {
481         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
482
483         if (hp) {
484                 hdr = *hp;
485                 *hp = hdr->next;
486                 free(hdr);
487         } else
488                 tdb->direct_access--;
489 }
490
491 int tdb_access_commit(struct tdb_context *tdb, void *p)
492 {
493         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
494         int ret = 0;
495
496         if (hp) {
497                 hdr = *hp;
498                 if (hdr->convert)
499                         ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
500                 else
501                         ret = tdb_write(tdb, hdr->off, p, hdr->len);
502                 *hp = hdr->next;
503                 free(hdr);
504         } else
505                 tdb->direct_access--;
506
507         return ret;
508 }
509
510 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
511                         bool write_mode)
512 {
513         if (unlikely(!tdb->map_ptr))
514                 return NULL;
515
516         if (unlikely(tdb_oob(tdb, off + len, true) == -1))
517                 return NULL;
518         return (char *)tdb->map_ptr + off;
519 }
520
521 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
522 {
523         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
524                 *s += val;
525 }
526
527 static const struct tdb_methods io_methods = {
528         tdb_read,
529         tdb_write,
530         tdb_oob,
531         tdb_expand_file,
532         tdb_direct,
533 };
534
535 /*
536   initialise the default methods table
537 */
538 void tdb_io_init(struct tdb_context *tdb)
539 {
540         tdb->methods = &io_methods;
541 }