]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/io.c
tdb2: rework some io.c functions to encode errors in their pointer returns.
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
71                               bool probe)
72 {
73         struct stat st;
74         enum TDB_ERROR ecode;
75
76         /* We can't hold pointers during this: we could unmap! */
77         assert(!tdb->direct_access
78                || (tdb->flags & TDB_NOLOCK)
79                || tdb_has_expansion_lock(tdb));
80
81         if (len <= tdb->map_size)
82                 return 0;
83         if (tdb->flags & TDB_INTERNAL) {
84                 if (!probe) {
85                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86                                  "tdb_oob len %lld beyond internal"
87                                  " malloc size %lld",
88                                  (long long)len,
89                                  (long long)tdb->map_size);
90                 }
91                 return TDB_ERR_IO;
92         }
93
94         ecode = tdb_lock_expand(tdb, F_RDLCK);
95         if (ecode != TDB_SUCCESS) {
96                 return ecode;
97         }
98
99         if (fstat(tdb->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return TDB_ERR_IO;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return TDB_ERR_IO;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb);
119
120         tdb->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return TDB_SUCCESS;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* FIXME: Return the off? */
137 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
138                               tdb_off_t base, uint64_t start, uint64_t end)
139 {
140         uint64_t i;
141         const uint64_t *val;
142
143         /* Zero vs non-zero is the same unconverted: minor optimization. */
144         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
145                               (end - start) * sizeof(tdb_off_t), false);
146         if (TDB_PTR_IS_ERR(val)) {
147                 tdb->ecode = TDB_PTR_ERR(val);
148                 return end;
149         }
150
151         for (i = 0; i < (end - start); i++) {
152                 if (val[i])
153                         break;
154         }
155         tdb_access_release(tdb, val);
156         return start + i;
157 }
158
159 /* Return first zero offset in num offset array, or num. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
161                            uint64_t num)
162 {
163         uint64_t i;
164         const uint64_t *val;
165
166         /* Zero vs non-zero is the same unconverted: minor optimization. */
167         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168         if (TDB_PTR_IS_ERR(val)) {
169                 tdb->ecode = TDB_PTR_ERR(val);
170                 return num;
171         }
172
173         for (i = 0; i < num; i++) {
174                 if (!val[i])
175                         break;
176         }
177         tdb_access_release(tdb, val);
178         return i;
179 }
180
181 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
182 {
183         char buf[8192] = { 0 };
184         void *p = tdb->methods->direct(tdb, off, len, true);
185         enum TDB_ERROR ecode = TDB_SUCCESS;
186
187         assert(!tdb->read_only);
188         if (TDB_PTR_IS_ERR(p)) {
189                 return TDB_PTR_ERR(p);
190         }
191         if (p) {
192                 memset(p, 0, len);
193                 return ecode;
194         }
195         while (len) {
196                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
197                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
198                 if (ecode != TDB_SUCCESS) {
199                         break;
200                 }
201                 len -= todo;
202                 off += todo;
203         }
204         return ecode;
205 }
206
207 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
208 {
209         tdb_off_t ret;
210         enum TDB_ERROR ecode;
211
212         if (likely(!(tdb->flags & TDB_CONVERT))) {
213                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
214                                                     false);
215                 if (TDB_PTR_IS_ERR(p)) {
216                         tdb->ecode = TDB_PTR_ERR(p);
217                         return TDB_OFF_ERR;
218                 }
219                 if (p)
220                         return *p;
221         }
222
223         ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
224         if (ecode != TDB_SUCCESS) {
225                 tdb->ecode = ecode;
226                 return TDB_OFF_ERR;
227         }
228         return ret;
229 }
230
231 /* write a lump of data at a specified offset */
232 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
233                                 const void *buf, tdb_len_t len)
234 {
235         enum TDB_ERROR ecode;
236
237         if (tdb->read_only) {
238                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
239                                   "Write to read-only database");
240         }
241
242         /* FIXME: Bogus optimization? */
243         if (len == 0) {
244                 return TDB_SUCCESS;
245         }
246
247         ecode = tdb->methods->oob(tdb, off + len, 0);
248         if (ecode != TDB_SUCCESS) {
249                 return ecode;
250         }
251
252         if (tdb->map_ptr) {
253                 memcpy(off + (char *)tdb->map_ptr, buf, len);
254         } else {
255                 ssize_t ret;
256                 ret = pwrite(tdb->fd, buf, len, off);
257                 if (ret < len) {
258                         /* This shouldn't happen: we avoid sparse files. */
259                         if (ret >= 0)
260                                 errno = ENOSPC;
261
262                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
263                                           "tdb_write: %zi at %zu len=%zu (%s)",
264                                           ret, (size_t)off, (size_t)len,
265                                           strerror(errno));
266                 }
267         }
268         return TDB_SUCCESS;
269 }
270
271 /* read a lump of data at a specified offset */
272 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
273                                void *buf, tdb_len_t len)
274 {
275         enum TDB_ERROR ecode;
276
277         ecode = tdb->methods->oob(tdb, off + len, 0);
278         if (ecode != TDB_SUCCESS) {
279                 return ecode;
280         }
281
282         if (tdb->map_ptr) {
283                 memcpy(buf, off + (char *)tdb->map_ptr, len);
284         } else {
285                 ssize_t r = pread(tdb->fd, buf, len, off);
286                 if (r != len) {
287                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
288                                           "tdb_read failed with %zi at %zu "
289                                           "len=%zu (%s) map_size=%zu",
290                                           r, (size_t)off, (size_t)len,
291                                           strerror(errno),
292                                           (size_t)tdb->map_size);
293                 }
294         }
295         return TDB_SUCCESS;
296 }
297
298 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
299                                  const void *rec, size_t len)
300 {
301         enum TDB_ERROR ecode;
302
303         if (unlikely((tdb->flags & TDB_CONVERT))) {
304                 void *conv = malloc(len);
305                 if (!conv) {
306                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
307                                           "tdb_write: no memory converting"
308                                           " %zu bytes", len);
309                 }
310                 memcpy(conv, rec, len);
311                 ecode = tdb->methods->twrite(tdb, off,
312                                            tdb_convert(tdb, conv, len), len);
313                 free(conv);
314         } else {
315                 ecode = tdb->methods->twrite(tdb, off, rec, len);
316         }
317         return ecode;
318 }
319
320 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
321                                 void *rec, size_t len)
322 {
323         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
324         tdb_convert(tdb, rec, len);
325         return ecode;
326 }
327
328 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
329                              tdb_off_t off, tdb_off_t val)
330 {
331         if (tdb->read_only) {
332                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
333                                   "Write to read-only database");
334         }
335
336         if (likely(!(tdb->flags & TDB_CONVERT))) {
337                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
338                                                     true);
339                 if (TDB_PTR_IS_ERR(p)) {
340                         return TDB_PTR_ERR(p);
341                 }
342                 if (p) {
343                         *p = val;
344                         return TDB_SUCCESS;
345                 }
346         }
347         return tdb_write_convert(tdb, off, &val, sizeof(val));
348 }
349
350 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
351                              tdb_len_t len, unsigned int prefix)
352 {
353         void *buf;
354         enum TDB_ERROR ecode;
355
356         /* some systems don't like zero length malloc */
357         buf = malloc(prefix + len ? prefix + len : 1);
358         if (!buf) {
359                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
360                            "tdb_alloc_read malloc failed len=%zu",
361                            (size_t)(prefix + len));
362                 return TDB_ERR_PTR(TDB_ERR_OOM);
363         } else {
364                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
365                 if (unlikely(ecode != TDB_SUCCESS)) {
366                         free(buf);
367                         return TDB_ERR_PTR(ecode);
368                 }
369         }
370         return buf;
371 }
372
373 /* read a lump of data, allocating the space for it */
374 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
375 {
376         return _tdb_alloc_read(tdb, offset, len, 0);
377 }
378
379 static enum TDB_ERROR fill(struct tdb_context *tdb,
380                            const void *buf, size_t size,
381                            tdb_off_t off, tdb_len_t len)
382 {
383         while (len) {
384                 size_t n = len > size ? size : len;
385                 ssize_t ret = pwrite(tdb->fd, buf, n, off);
386                 if (ret < n) {
387                         if (ret >= 0)
388                                 errno = ENOSPC;
389
390                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
391                                           "fill failed:"
392                                           " %zi at %zu len=%zu (%s)",
393                                           ret, (size_t)off, (size_t)len,
394                                           strerror(errno));
395                 }
396                 len -= n;
397                 off += n;
398         }
399         return TDB_SUCCESS;
400 }
401
402 /* expand a file.  we prefer to use ftruncate, as that is what posix
403   says to use for mmap expansion */
404 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
405                                       tdb_len_t addition)
406 {
407         char buf[8192];
408         enum TDB_ERROR ecode;
409
410         if (tdb->read_only) {
411                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
412                                   "Expand on read-only database");
413         }
414
415         if (tdb->flags & TDB_INTERNAL) {
416                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
417                 if (!new) {
418                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
419                                           "No memory to expand database");
420                 }
421                 tdb->map_ptr = new;
422                 tdb->map_size += addition;
423         } else {
424                 /* Unmap before trying to write; old TDB claimed OpenBSD had
425                  * problem with this otherwise. */
426                 tdb_munmap(tdb);
427
428                 /* If this fails, we try to fill anyway. */
429                 if (ftruncate(tdb->fd, tdb->map_size + addition))
430                         ;
431
432                 /* now fill the file with something. This ensures that the
433                    file isn't sparse, which would be very bad if we ran out of
434                    disk. This must be done with write, not via mmap */
435                 memset(buf, 0x43, sizeof(buf));
436                 ecode = fill(tdb, buf, sizeof(buf), tdb->map_size, addition);
437                 if (ecode != TDB_SUCCESS)
438                         return ecode;
439                 tdb->map_size += addition;
440                 tdb_mmap(tdb);
441         }
442         return TDB_SUCCESS;
443 }
444
445 const void *tdb_access_read(struct tdb_context *tdb,
446                             tdb_off_t off, tdb_len_t len, bool convert)
447 {
448         const void *ret = NULL;
449
450         if (likely(!(tdb->flags & TDB_CONVERT))) {
451                 ret = tdb->methods->direct(tdb, off, len, false);
452
453                 if (TDB_PTR_IS_ERR(ret)) {
454                         return ret;
455                 }
456         }
457         if (!ret) {
458                 struct tdb_access_hdr *hdr;
459                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
460                 if (TDB_PTR_IS_ERR(hdr)) {
461                         return hdr;
462                 }
463                 hdr->next = tdb->access;
464                 tdb->access = hdr;
465                 ret = hdr + 1;
466                 if (convert) {
467                         tdb_convert(tdb, (void *)ret, len);
468                 }
469         } else
470                 tdb->direct_access++;
471
472         return ret;
473 }
474
475 void *tdb_access_write(struct tdb_context *tdb,
476                        tdb_off_t off, tdb_len_t len, bool convert)
477 {
478         void *ret = NULL;
479
480         if (tdb->read_only) {
481                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
482                            "Write to read-only database");
483                 return TDB_ERR_PTR(TDB_ERR_RDONLY);
484         }
485
486         if (likely(!(tdb->flags & TDB_CONVERT))) {
487                 ret = tdb->methods->direct(tdb, off, len, true);
488
489                 if (TDB_PTR_IS_ERR(ret)) {
490                         return ret;
491                 }
492         }
493
494         if (!ret) {
495                 struct tdb_access_hdr *hdr;
496                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
497                 if (TDB_PTR_IS_ERR(hdr)) {
498                         return hdr;
499                 }
500                 hdr->next = tdb->access;
501                 tdb->access = hdr;
502                 hdr->off = off;
503                 hdr->len = len;
504                 hdr->convert = convert;
505                 ret = hdr + 1;
506                 if (convert)
507                         tdb_convert(tdb, (void *)ret, len);
508         } else
509                 tdb->direct_access++;
510
511         return ret;
512 }
513
514 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
515 {
516         struct tdb_access_hdr **hp;
517
518         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
519                 if (*hp + 1 == p)
520                         return hp;
521         }
522         return NULL;
523 }
524
525 void tdb_access_release(struct tdb_context *tdb, const void *p)
526 {
527         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
528
529         if (hp) {
530                 hdr = *hp;
531                 *hp = hdr->next;
532                 free(hdr);
533         } else
534                 tdb->direct_access--;
535 }
536
537 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
538 {
539         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
540         enum TDB_ERROR ecode;
541
542         if (hp) {
543                 hdr = *hp;
544                 if (hdr->convert)
545                         ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
546                 else
547                         ecode = tdb_write(tdb, hdr->off, p, hdr->len);
548                 *hp = hdr->next;
549                 free(hdr);
550         } else {
551                 tdb->direct_access--;
552                 ecode = TDB_SUCCESS;
553         }
554
555         return ecode;
556 }
557
558 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
559                         bool write_mode)
560 {
561         enum TDB_ERROR ecode;
562
563         if (unlikely(!tdb->map_ptr))
564                 return NULL;
565
566         ecode = tdb_oob(tdb, off + len, true);
567         if (unlikely(ecode != TDB_SUCCESS))
568                 return TDB_ERR_PTR(ecode);
569         return (char *)tdb->map_ptr + off;
570 }
571
572 void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
573 {
574         if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
575                 *s += val;
576 }
577
578 static const struct tdb_methods io_methods = {
579         tdb_read,
580         tdb_write,
581         tdb_oob,
582         tdb_expand_file,
583         tdb_direct,
584 };
585
586 /*
587   initialise the default methods table
588 */
589 void tdb_io_init(struct tdb_context *tdb)
590 {
591         tdb->methods = &io_methods;
592 }