]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/io.c
71238afea9aeed1e5ee10b23a038670d2aeab436
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
71                               bool probe)
72 {
73         struct stat st;
74         enum TDB_ERROR ecode;
75
76         /* We can't hold pointers during this: we could unmap! */
77         assert(!tdb->direct_access
78                || (tdb->flags & TDB_NOLOCK)
79                || tdb_has_expansion_lock(tdb));
80
81         if (len <= tdb->map_size)
82                 return 0;
83         if (tdb->flags & TDB_INTERNAL) {
84                 if (!probe) {
85                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86                                  "tdb_oob len %lld beyond internal"
87                                  " malloc size %lld",
88                                  (long long)len,
89                                  (long long)tdb->map_size);
90                 }
91                 return TDB_ERR_IO;
92         }
93
94         ecode = tdb_lock_expand(tdb, F_RDLCK);
95         if (ecode != TDB_SUCCESS) {
96                 return ecode;
97         }
98
99         if (fstat(tdb->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return TDB_ERR_IO;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return TDB_ERR_IO;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb);
119
120         tdb->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return TDB_SUCCESS;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* FIXME: Return the off? */
137 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
138                               tdb_off_t base, uint64_t start, uint64_t end)
139 {
140         uint64_t i;
141         const uint64_t *val;
142
143         /* Zero vs non-zero is the same unconverted: minor optimization. */
144         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
145                               (end - start) * sizeof(tdb_off_t), false);
146         if (!val)
147                 return end;
148
149         for (i = 0; i < (end - start); i++) {
150                 if (val[i])
151                         break;
152         }
153         tdb_access_release(tdb, val);
154         return start + i;
155 }
156
157 /* Return first zero offset in num offset array, or num. */
158 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
159                            uint64_t num)
160 {
161         uint64_t i;
162         const uint64_t *val;
163
164         /* Zero vs non-zero is the same unconverted: minor optimization. */
165         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
166         if (!val)
167                 return num;
168
169         for (i = 0; i < num; i++) {
170                 if (!val[i])
171                         break;
172         }
173         tdb_access_release(tdb, val);
174         return i;
175 }
176
177 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
178 {
179         char buf[8192] = { 0 };
180         void *p = tdb->methods->direct(tdb, off, len, true);
181         enum TDB_ERROR ecode = TDB_SUCCESS;
182
183         assert(!tdb->read_only);
184         if (p) {
185                 memset(p, 0, len);
186                 return ecode;
187         }
188         while (len) {
189                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
190                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
191                 if (ecode != TDB_SUCCESS) {
192                         break;
193                 }
194                 len -= todo;
195                 off += todo;
196         }
197         return ecode;
198 }
199
200 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
201 {
202         tdb_off_t ret;
203         enum TDB_ERROR ecode;
204
205         if (likely(!(tdb->flags & TDB_CONVERT))) {
206                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
207                                                     false);
208                 if (p)
209                         return *p;
210         }
211
212         ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
213         if (ecode != TDB_SUCCESS) {
214                 tdb->ecode = ecode;
215                 return TDB_OFF_ERR;
216         }
217         return ret;
218 }
219
220 /* write a lump of data at a specified offset */
221 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
222                                 const void *buf, tdb_len_t len)
223 {
224         enum TDB_ERROR ecode;
225
226         if (tdb->read_only) {
227                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
228                                   "Write to read-only database");
229         }
230
231         /* FIXME: Bogus optimization? */
232         if (len == 0) {
233                 return TDB_SUCCESS;
234         }
235
236         ecode = tdb->methods->oob(tdb, off + len, 0);
237         if (ecode != TDB_SUCCESS) {
238                 return ecode;
239         }
240
241         if (tdb->map_ptr) {
242                 memcpy(off + (char *)tdb->map_ptr, buf, len);
243         } else {
244                 ssize_t ret;
245                 ret = pwrite(tdb->fd, buf, len, off);
246                 if (ret < len) {
247                         /* This shouldn't happen: we avoid sparse files. */
248                         if (ret >= 0)
249                                 errno = ENOSPC;
250
251                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
252                                           "tdb_write: %zi at %zu len=%zu (%s)",
253                                           ret, (size_t)off, (size_t)len,
254                                           strerror(errno));
255                 }
256         }
257         return TDB_SUCCESS;
258 }
259
260 /* read a lump of data at a specified offset */
261 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
262                                void *buf, tdb_len_t len)
263 {
264         enum TDB_ERROR ecode;
265
266         ecode = tdb->methods->oob(tdb, off + len, 0);
267         if (ecode != TDB_SUCCESS) {
268                 return ecode;
269         }
270
271         if (tdb->map_ptr) {
272                 memcpy(buf, off + (char *)tdb->map_ptr, len);
273         } else {
274                 ssize_t r = pread(tdb->fd, buf, len, off);
275                 if (r != len) {
276                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
277                                           "tdb_read failed with %zi at %zu "
278                                           "len=%zu (%s) map_size=%zu",
279                                           r, (size_t)off, (size_t)len,
280                                           strerror(errno),
281                                           (size_t)tdb->map_size);
282                 }
283         }
284         return TDB_SUCCESS;
285 }
286
287 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
288                                  const void *rec, size_t len)
289 {
290         enum TDB_ERROR ecode;
291
292         if (unlikely((tdb->flags & TDB_CONVERT))) {
293                 void *conv = malloc(len);
294                 if (!conv) {
295                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
296                                           "tdb_write: no memory converting"
297                                           " %zu bytes", len);
298                 }
299                 memcpy(conv, rec, len);
300                 ecode = tdb->methods->twrite(tdb, off,
301                                            tdb_convert(tdb, conv, len), len);
302                 free(conv);
303         } else {
304                 ecode = tdb->methods->twrite(tdb, off, rec, len);
305         }
306         return ecode;
307 }
308
309 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
310                                 void *rec, size_t len)
311 {
312         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
313         tdb_convert(tdb, rec, len);
314         return ecode;
315 }
316
317 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
318                              tdb_off_t off, tdb_off_t val)
319 {
320         if (tdb->read_only) {
321                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
322                                   "Write to read-only database");
323         }
324
325         if (likely(!(tdb->flags & TDB_CONVERT))) {
326                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
327                                                     true);
328                 if (p) {
329                         *p = val;
330                         return TDB_SUCCESS;
331                 }
332         }
333         return tdb_write_convert(tdb, off, &val, sizeof(val));
334 }
335
336 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
337                              tdb_len_t len, unsigned int prefix)
338 {
339         void *buf;
340         enum TDB_ERROR ecode;
341
342         /* some systems don't like zero length malloc */
343         buf = malloc(prefix + len ? prefix + len : 1);
344         if (!buf) {
345                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
346                            "tdb_alloc_read malloc failed len=%zu",
347                            (size_t)(prefix + len));
348         } else {
349                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
350                 if (unlikely(ecode != TDB_SUCCESS)) {
351                         tdb->ecode = ecode;
352                         free(buf);
353                         buf = NULL;
354                 }
355         }
356         return buf;
357 }
358
359 /* read a lump of data, allocating the space for it */
360 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
361 {
362         return _tdb_alloc_read(tdb, offset, len, 0);
363 }
364
365 static enum TDB_ERROR fill(struct tdb_context *tdb,
366                            const void *buf, size_t size,
367                            tdb_off_t off, tdb_len_t len)
368 {
369         while (len) {
370                 size_t n = len > size ? size : len;
371                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
372                 if (ret < n) {
373                         if (ret >= 0)
374                                 errno = ENOSPC;
375
376                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
377                                           "fill failed:"
378                                           " %zi at %zu len=%zu (%s)",
379                                           ret, (size_t)off, (size_t)len,
380                                           strerror(errno));
381                 }
382                 len -= n;
383                 off += n;
384         }
385         return TDB_SUCCESS;
386 }
387
388 /* expand a file.  we prefer to use ftruncate, as that is what posix
389   says to use for mmap expansion */
390 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
391                                       tdb_len_t addition)
392 {
393         char buf[8192];
394         enum TDB_ERROR ecode;
395
396         if (tdb->read_only) {
397                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
398                                   "Expand on read-only database");
399         }
400
401         if (tdb->flags & TDB_INTERNAL) {
402                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
403                 if (!new) {
404                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
405                                           "No memory to expand database");
406                 }
407                 tdb->map_ptr = new;
408                 tdb->map_size += addition;
409         } else {
410                 /* Unmap before trying to write; old TDB claimed OpenBSD had
411                  * problem with this otherwise. */
412                 tdb_munmap(tdb);
413
414                 /* If this fails, we try to fill anyway. */
415                 if (ftruncate(tdb->fd, tdb->map_size + addition))
416                         ;
417
418                 /* now fill the file with something. This ensures that the
419                    file isn't sparse, which would be very bad if we ran out of
420                    disk. This must be done with write, not via mmap */
421                 memset(buf, 0x43, sizeof(buf));
422                 ecode = fill(tdb, buf, sizeof(buf), tdb->map_size, addition);
423                 if (ecode != TDB_SUCCESS)
424                         return ecode;
425                 tdb->map_size += addition;
426                 tdb_mmap(tdb);
427         }
428         return TDB_SUCCESS;
429 }
430
431 const void *tdb_access_read(struct tdb_context *tdb,
432                             tdb_off_t off, tdb_len_t len, bool convert)
433 {
434         const void *ret = NULL;
435
436         if (likely(!(tdb->flags & TDB_CONVERT)))
437                 ret = tdb->methods->direct(tdb, off, len, false);
438
439         if (!ret) {
440                 struct tdb_access_hdr *hdr;
441                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
442                 if (hdr) {
443                         hdr->next = tdb->access;
444                         tdb->access = hdr;
445                         ret = hdr + 1;
446                         if (convert)
447                                 tdb_convert(tdb, (void *)ret, len);
448                 }
449         } else
450                 tdb->direct_access++;
451
452         return ret;
453 }
454
455 void *tdb_access_write(struct tdb_context *tdb,
456                        tdb_off_t off, tdb_len_t len, bool convert)
457 {
458         void *ret = NULL;
459
460         if (tdb->read_only) {
461                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
462                            "Write to read-only database");
463                 return NULL;
464         }
465
466         if (likely(!(tdb->flags & TDB_CONVERT)))
467                 ret = tdb->methods->direct(tdb, off, len, true);
468
469         if (!ret) {
470                 struct tdb_access_hdr *hdr;
471                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
472                 if (hdr) {
473                         hdr->next = tdb->access;
474                         tdb->access = hdr;
475                         hdr->off = off;
476                         hdr->len = len;
477                         hdr->convert = convert;
478                         ret = hdr + 1;
479                         if (convert)
480                                 tdb_convert(tdb, (void *)ret, len);
481                 }
482         } else
483                 tdb->direct_access++;
484
485         return ret;
486 }
487
488 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
489 {
490         struct tdb_access_hdr **hp;
491
492         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
493                 if (*hp + 1 == p)
494                         return hp;
495         }
496         return NULL;
497 }
498
499 void tdb_access_release(struct tdb_context *tdb, const void *p)
500 {
501         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
502
503         if (hp) {
504                 hdr = *hp;
505                 *hp = hdr->next;
506                 free(hdr);
507         } else
508                 tdb->direct_access--;
509 }
510
511 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
512 {
513         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
514         enum TDB_ERROR ecode;
515
516         if (hp) {
517                 hdr = *hp;
518                 if (hdr->convert)
519                         ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
520                 else
521                         ecode = tdb_write(tdb, hdr->off, p, hdr->len);
522                 *hp = hdr->next;
523                 free(hdr);
524         } else {
525                 tdb->direct_access--;
526                 ecode = TDB_SUCCESS;
527         }
528
529         return ecode;
530 }
531
532 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
533                         bool write_mode)
534 {
535         if (unlikely(!tdb->map_ptr))
536                 return NULL;
537
538         if (unlikely(tdb_oob(tdb, off + len, true) != TDB_SUCCESS))
539                 return NULL;
540         return (char *)tdb->map_ptr + off;
541 }
542
543 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
544 {
545         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
546                 *s += val;
547 }
548
549 static const struct tdb_methods io_methods = {
550         tdb_read,
551         tdb_write,
552         tdb_oob,
553         tdb_expand_file,
554         tdb_direct,
555 };
556
557 /*
558   initialise the default methods table
559 */
560 void tdb_io_init(struct tdb_context *tdb)
561 {
562         tdb->methods = &io_methods;
563 }