]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/io.c
tdb2: cleanup oob handling.
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_file *file)
33 {
34         if (file->fd == -1)
35                 return;
36
37         if (file->map_ptr) {
38                 munmap(file->map_ptr, file->map_size);
39                 file->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         /* size_t can be smaller than off_t. */
52         if ((size_t)tdb->file->map_size == tdb->file->map_size) {
53                 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
54                                           tdb->mmap_flags,
55                                           MAP_SHARED, tdb->file->fd, 0);
56         } else
57                 tdb->file->map_ptr = MAP_FAILED;
58
59         /*
60          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
61          */
62         if (tdb->file->map_ptr == MAP_FAILED) {
63                 tdb->file->map_ptr = NULL;
64                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
65                            "tdb_mmap failed for size %lld (%s)",
66                            (long long)tdb->file->map_size, strerror(errno));
67         }
68 }
69
70 /* check for an out of bounds access - if it is out of bounds then
71    see if the database has been expanded by someone else and expand
72    if necessary
73    note that "len" is the minimum length needed for the db.
74
75    If probe is true, len being too large isn't a failure.
76 */
77 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
78                               bool probe)
79 {
80         struct stat st;
81         enum TDB_ERROR ecode;
82
83         /* We can't hold pointers during this: we could unmap! */
84         assert(!tdb->direct_access
85                || (tdb->flags & TDB_NOLOCK)
86                || tdb_has_expansion_lock(tdb));
87
88         if (len <= tdb->file->map_size)
89                 return TDB_SUCCESS;
90         if (tdb->flags & TDB_INTERNAL) {
91                 if (probe)
92                         return TDB_SUCCESS;
93
94                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
95                            "tdb_oob len %lld beyond internal"
96                            " malloc size %lld",
97                            (long long)len,
98                            (long long)tdb->file->map_size);
99                 return TDB_ERR_IO;
100         }
101
102         ecode = tdb_lock_expand(tdb, F_RDLCK);
103         if (ecode != TDB_SUCCESS) {
104                 return ecode;
105         }
106
107         if (fstat(tdb->file->fd, &st) != 0) {
108                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
109                            "Failed to fstat file: %s", strerror(errno));
110                 tdb_unlock_expand(tdb, F_RDLCK);
111                 return TDB_ERR_IO;
112         }
113
114         tdb_unlock_expand(tdb, F_RDLCK);
115
116         if (st.st_size < (size_t)len) {
117                 if (probe)
118                         return TDB_SUCCESS;
119
120                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
121                            "tdb_oob len %zu beyond eof at %zu",
122                            (size_t)len, st.st_size);
123                 return TDB_ERR_IO;
124         }
125
126         /* Unmap, update size, remap */
127         tdb_munmap(tdb->file);
128
129         tdb->file->map_size = st.st_size;
130         tdb_mmap(tdb);
131         return TDB_SUCCESS;
132 }
133
134 /* Endian conversion: we only ever deal with 8 byte quantities */
135 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
136 {
137         assert(size % 8 == 0);
138         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
139                 uint64_t i, *p = (uint64_t *)buf;
140                 for (i = 0; i < size / 8; i++)
141                         p[i] = bswap_64(p[i]);
142         }
143         return buf;
144 }
145
146 /* Return first non-zero offset in offset array, or end, or -ve error. */
147 /* FIXME: Return the off? */
148 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
149                               tdb_off_t base, uint64_t start, uint64_t end)
150 {
151         uint64_t i;
152         const uint64_t *val;
153
154         /* Zero vs non-zero is the same unconverted: minor optimization. */
155         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
156                               (end - start) * sizeof(tdb_off_t), false);
157         if (TDB_PTR_IS_ERR(val)) {
158                 return TDB_PTR_ERR(val);
159         }
160
161         for (i = 0; i < (end - start); i++) {
162                 if (val[i])
163                         break;
164         }
165         tdb_access_release(tdb, val);
166         return start + i;
167 }
168
169 /* Return first zero offset in num offset array, or num, or -ve error. */
170 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
171                            uint64_t num)
172 {
173         uint64_t i;
174         const uint64_t *val;
175
176         /* Zero vs non-zero is the same unconverted: minor optimization. */
177         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
178         if (TDB_PTR_IS_ERR(val)) {
179                 return TDB_PTR_ERR(val);
180         }
181
182         for (i = 0; i < num; i++) {
183                 if (!val[i])
184                         break;
185         }
186         tdb_access_release(tdb, val);
187         return i;
188 }
189
190 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
191 {
192         char buf[8192] = { 0 };
193         void *p = tdb->methods->direct(tdb, off, len, true);
194         enum TDB_ERROR ecode = TDB_SUCCESS;
195
196         assert(!tdb->read_only);
197         if (TDB_PTR_IS_ERR(p)) {
198                 return TDB_PTR_ERR(p);
199         }
200         if (p) {
201                 memset(p, 0, len);
202                 return ecode;
203         }
204         while (len) {
205                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
206                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
207                 if (ecode != TDB_SUCCESS) {
208                         break;
209                 }
210                 len -= todo;
211                 off += todo;
212         }
213         return ecode;
214 }
215
216 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
217 {
218         tdb_off_t ret;
219         enum TDB_ERROR ecode;
220
221         if (likely(!(tdb->flags & TDB_CONVERT))) {
222                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
223                                                     false);
224                 if (TDB_PTR_IS_ERR(p)) {
225                         return TDB_PTR_ERR(p);
226                 }
227                 if (p)
228                         return *p;
229         }
230
231         ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
232         if (ecode != TDB_SUCCESS) {
233                 return ecode;
234         }
235         return ret;
236 }
237
238 /* write a lump of data at a specified offset */
239 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
240                                 const void *buf, tdb_len_t len)
241 {
242         enum TDB_ERROR ecode;
243
244         if (tdb->read_only) {
245                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
246                                   "Write to read-only database");
247         }
248
249         ecode = tdb->methods->oob(tdb, off + len, false);
250         if (ecode != TDB_SUCCESS) {
251                 return ecode;
252         }
253
254         if (tdb->file->map_ptr) {
255                 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
256         } else {
257                 ssize_t ret;
258                 ret = pwrite(tdb->file->fd, buf, len, off);
259                 if (ret != len) {
260                         /* This shouldn't happen: we avoid sparse files. */
261                         if (ret >= 0)
262                                 errno = ENOSPC;
263
264                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
265                                           "tdb_write: %zi at %zu len=%zu (%s)",
266                                           ret, (size_t)off, (size_t)len,
267                                           strerror(errno));
268                 }
269         }
270         return TDB_SUCCESS;
271 }
272
273 /* read a lump of data at a specified offset */
274 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
275                                void *buf, tdb_len_t len)
276 {
277         enum TDB_ERROR ecode;
278
279         ecode = tdb->methods->oob(tdb, off + len, false);
280         if (ecode != TDB_SUCCESS) {
281                 return ecode;
282         }
283
284         if (tdb->file->map_ptr) {
285                 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
286         } else {
287                 ssize_t r = pread(tdb->file->fd, buf, len, off);
288                 if (r != len) {
289                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
290                                           "tdb_read failed with %zi at %zu "
291                                           "len=%zu (%s) map_size=%zu",
292                                           r, (size_t)off, (size_t)len,
293                                           strerror(errno),
294                                           (size_t)tdb->file->map_size);
295                 }
296         }
297         return TDB_SUCCESS;
298 }
299
300 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
301                                  const void *rec, size_t len)
302 {
303         enum TDB_ERROR ecode;
304
305         if (unlikely((tdb->flags & TDB_CONVERT))) {
306                 void *conv = malloc(len);
307                 if (!conv) {
308                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
309                                           "tdb_write: no memory converting"
310                                           " %zu bytes", len);
311                 }
312                 memcpy(conv, rec, len);
313                 ecode = tdb->methods->twrite(tdb, off,
314                                            tdb_convert(tdb, conv, len), len);
315                 free(conv);
316         } else {
317                 ecode = tdb->methods->twrite(tdb, off, rec, len);
318         }
319         return ecode;
320 }
321
322 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
323                                 void *rec, size_t len)
324 {
325         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
326         tdb_convert(tdb, rec, len);
327         return ecode;
328 }
329
330 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
331                              tdb_off_t off, tdb_off_t val)
332 {
333         if (tdb->read_only) {
334                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
335                                   "Write to read-only database");
336         }
337
338         if (likely(!(tdb->flags & TDB_CONVERT))) {
339                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
340                                                     true);
341                 if (TDB_PTR_IS_ERR(p)) {
342                         return TDB_PTR_ERR(p);
343                 }
344                 if (p) {
345                         *p = val;
346                         return TDB_SUCCESS;
347                 }
348         }
349         return tdb_write_convert(tdb, off, &val, sizeof(val));
350 }
351
352 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
353                              tdb_len_t len, unsigned int prefix)
354 {
355         unsigned char *buf;
356         enum TDB_ERROR ecode;
357
358         /* some systems don't like zero length malloc */
359         buf = malloc(prefix + len ? prefix + len : 1);
360         if (!buf) {
361                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
362                            "tdb_alloc_read malloc failed len=%zu",
363                            (size_t)(prefix + len));
364                 return TDB_ERR_PTR(TDB_ERR_OOM);
365         } else {
366                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
367                 if (unlikely(ecode != TDB_SUCCESS)) {
368                         free(buf);
369                         return TDB_ERR_PTR(ecode);
370                 }
371         }
372         return buf;
373 }
374
375 /* read a lump of data, allocating the space for it */
376 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
377 {
378         return _tdb_alloc_read(tdb, offset, len, 0);
379 }
380
381 static enum TDB_ERROR fill(struct tdb_context *tdb,
382                            const void *buf, size_t size,
383                            tdb_off_t off, tdb_len_t len)
384 {
385         while (len) {
386                 size_t n = len > size ? size : len;
387                 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
388                 if (ret != n) {
389                         if (ret >= 0)
390                                 errno = ENOSPC;
391
392                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
393                                           "fill failed:"
394                                           " %zi at %zu len=%zu (%s)",
395                                           ret, (size_t)off, (size_t)len,
396                                           strerror(errno));
397                 }
398                 len -= n;
399                 off += n;
400         }
401         return TDB_SUCCESS;
402 }
403
404 /* expand a file.  we prefer to use ftruncate, as that is what posix
405   says to use for mmap expansion */
406 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
407                                       tdb_len_t addition)
408 {
409         char buf[8192];
410         enum TDB_ERROR ecode;
411
412         if (tdb->read_only) {
413                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
414                                   "Expand on read-only database");
415         }
416
417         if (tdb->flags & TDB_INTERNAL) {
418                 char *new = realloc(tdb->file->map_ptr,
419                                     tdb->file->map_size + addition);
420                 if (!new) {
421                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
422                                           "No memory to expand database");
423                 }
424                 tdb->file->map_ptr = new;
425                 tdb->file->map_size += addition;
426         } else {
427                 /* Unmap before trying to write; old TDB claimed OpenBSD had
428                  * problem with this otherwise. */
429                 tdb_munmap(tdb->file);
430
431                 /* If this fails, we try to fill anyway. */
432                 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
433                         ;
434
435                 /* now fill the file with something. This ensures that the
436                    file isn't sparse, which would be very bad if we ran out of
437                    disk. This must be done with write, not via mmap */
438                 memset(buf, 0x43, sizeof(buf));
439                 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
440                              addition);
441                 if (ecode != TDB_SUCCESS)
442                         return ecode;
443                 tdb->file->map_size += addition;
444                 tdb_mmap(tdb);
445         }
446         return TDB_SUCCESS;
447 }
448
449 const void *tdb_access_read(struct tdb_context *tdb,
450                             tdb_off_t off, tdb_len_t len, bool convert)
451 {
452         void *ret = NULL;
453
454         if (likely(!(tdb->flags & TDB_CONVERT))) {
455                 ret = tdb->methods->direct(tdb, off, len, false);
456
457                 if (TDB_PTR_IS_ERR(ret)) {
458                         return ret;
459                 }
460         }
461         if (!ret) {
462                 struct tdb_access_hdr *hdr;
463                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
464                 if (TDB_PTR_IS_ERR(hdr)) {
465                         return hdr;
466                 }
467                 hdr->next = tdb->access;
468                 tdb->access = hdr;
469                 ret = hdr + 1;
470                 if (convert) {
471                         tdb_convert(tdb, (void *)ret, len);
472                 }
473         } else
474                 tdb->direct_access++;
475
476         return ret;
477 }
478
479 void *tdb_access_write(struct tdb_context *tdb,
480                        tdb_off_t off, tdb_len_t len, bool convert)
481 {
482         void *ret = NULL;
483
484         if (tdb->read_only) {
485                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
486                            "Write to read-only database");
487                 return TDB_ERR_PTR(TDB_ERR_RDONLY);
488         }
489
490         if (likely(!(tdb->flags & TDB_CONVERT))) {
491                 ret = tdb->methods->direct(tdb, off, len, true);
492
493                 if (TDB_PTR_IS_ERR(ret)) {
494                         return ret;
495                 }
496         }
497
498         if (!ret) {
499                 struct tdb_access_hdr *hdr;
500                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
501                 if (TDB_PTR_IS_ERR(hdr)) {
502                         return hdr;
503                 }
504                 hdr->next = tdb->access;
505                 tdb->access = hdr;
506                 hdr->off = off;
507                 hdr->len = len;
508                 hdr->convert = convert;
509                 ret = hdr + 1;
510                 if (convert)
511                         tdb_convert(tdb, (void *)ret, len);
512         } else
513                 tdb->direct_access++;
514
515         return ret;
516 }
517
518 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
519 {
520         struct tdb_access_hdr **hp;
521
522         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
523                 if (*hp + 1 == p)
524                         return hp;
525         }
526         return NULL;
527 }
528
529 void tdb_access_release(struct tdb_context *tdb, const void *p)
530 {
531         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
532
533         if (hp) {
534                 hdr = *hp;
535                 *hp = hdr->next;
536                 free(hdr);
537         } else
538                 tdb->direct_access--;
539 }
540
541 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
542 {
543         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
544         enum TDB_ERROR ecode;
545
546         if (hp) {
547                 hdr = *hp;
548                 if (hdr->convert)
549                         ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
550                 else
551                         ecode = tdb_write(tdb, hdr->off, p, hdr->len);
552                 *hp = hdr->next;
553                 free(hdr);
554         } else {
555                 tdb->direct_access--;
556                 ecode = TDB_SUCCESS;
557         }
558
559         return ecode;
560 }
561
562 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
563                         bool write_mode)
564 {
565         enum TDB_ERROR ecode;
566
567         if (unlikely(!tdb->file->map_ptr))
568                 return NULL;
569
570         ecode = tdb_oob(tdb, off + len, false);
571         if (unlikely(ecode != TDB_SUCCESS))
572                 return TDB_ERR_PTR(ecode);
573         return (char *)tdb->file->map_ptr + off;
574 }
575
576 void tdb_inc_seqnum(struct tdb_context *tdb)
577 {
578         tdb_off_t seq;
579
580         if (likely(!(tdb->flags & TDB_CONVERT))) {
581                 int64_t *direct;
582
583                 direct = tdb->methods->direct(tdb,
584                                               offsetof(struct tdb_header,
585                                                        seqnum),
586                                               sizeof(*direct), true);
587                 if (likely(direct)) {
588                         /* Don't let it go negative, even briefly */
589                         if (unlikely((*direct) + 1) < 0)
590                                 *direct = 0;
591                         (*direct)++;
592                         return;
593                 }
594         }
595
596         seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
597         if (!TDB_OFF_IS_ERR(seq)) {
598                 seq++;
599                 if (unlikely((int64_t)seq < 0))
600                         seq = 0;
601                 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
602         }
603 }
604
605 static const struct tdb_methods io_methods = {
606         tdb_read,
607         tdb_write,
608         tdb_oob,
609         tdb_expand_file,
610         tdb_direct,
611 };
612
613 /*
614   initialise the default methods table
615 */
616 void tdb_io_init(struct tdb_context *tdb)
617 {
618         tdb->methods = &io_methods;
619 }