]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/io.c
tdb2: TDB_ATTRIBUTE_STATS access via tdb_get_attribute.
[ccan] / ccan / tdb2 / io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_file *file)
33 {
34         if (file->fd == -1)
35                 return;
36
37         if (file->map_ptr) {
38                 munmap(file->map_ptr, file->map_size);
39                 file->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->file->map_ptr = mmap(NULL, tdb->file->map_size, tdb->mmap_flags,
52                                   MAP_SHARED, tdb->file->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->file->map_ptr == MAP_FAILED) {
58                 tdb->file->map_ptr = NULL;
59                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
60                            "tdb_mmap failed for size %lld (%s)",
61                            (long long)tdb->file->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary
68    note that "len" is the minimum length needed for the db
69 */
70 static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
71                               bool probe)
72 {
73         struct stat st;
74         enum TDB_ERROR ecode;
75
76         /* We can't hold pointers during this: we could unmap! */
77         assert(!tdb->direct_access
78                || (tdb->flags & TDB_NOLOCK)
79                || tdb_has_expansion_lock(tdb));
80
81         if (len <= tdb->file->map_size)
82                 return 0;
83         if (tdb->flags & TDB_INTERNAL) {
84                 if (!probe) {
85                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
86                                  "tdb_oob len %lld beyond internal"
87                                  " malloc size %lld",
88                                  (long long)len,
89                                  (long long)tdb->file->map_size);
90                 }
91                 return TDB_ERR_IO;
92         }
93
94         ecode = tdb_lock_expand(tdb, F_RDLCK);
95         if (ecode != TDB_SUCCESS) {
96                 return ecode;
97         }
98
99         if (fstat(tdb->file->fd, &st) != 0) {
100                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
101                            "Failed to fstat file: %s", strerror(errno));
102                 tdb_unlock_expand(tdb, F_RDLCK);
103                 return TDB_ERR_IO;
104         }
105
106         tdb_unlock_expand(tdb, F_RDLCK);
107
108         if (st.st_size < (size_t)len) {
109                 if (!probe) {
110                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
111                                    "tdb_oob len %zu beyond eof at %zu",
112                                    (size_t)len, st.st_size);
113                 }
114                 return TDB_ERR_IO;
115         }
116
117         /* Unmap, update size, remap */
118         tdb_munmap(tdb->file);
119
120         tdb->file->map_size = st.st_size;
121         tdb_mmap(tdb);
122         return TDB_SUCCESS;
123 }
124
125 /* Endian conversion: we only ever deal with 8 byte quantities */
126 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
127 {
128         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
129                 uint64_t i, *p = (uint64_t *)buf;
130                 for (i = 0; i < size / 8; i++)
131                         p[i] = bswap_64(p[i]);
132         }
133         return buf;
134 }
135
136 /* Return first non-zero offset in offset array, or end, or -ve error. */
137 /* FIXME: Return the off? */
138 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
139                               tdb_off_t base, uint64_t start, uint64_t end)
140 {
141         uint64_t i;
142         const uint64_t *val;
143
144         /* Zero vs non-zero is the same unconverted: minor optimization. */
145         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
146                               (end - start) * sizeof(tdb_off_t), false);
147         if (TDB_PTR_IS_ERR(val)) {
148                 return TDB_PTR_ERR(val);
149         }
150
151         for (i = 0; i < (end - start); i++) {
152                 if (val[i])
153                         break;
154         }
155         tdb_access_release(tdb, val);
156         return start + i;
157 }
158
159 /* Return first zero offset in num offset array, or num, or -ve error. */
160 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
161                            uint64_t num)
162 {
163         uint64_t i;
164         const uint64_t *val;
165
166         /* Zero vs non-zero is the same unconverted: minor optimization. */
167         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
168         if (TDB_PTR_IS_ERR(val)) {
169                 return TDB_PTR_ERR(val);
170         }
171
172         for (i = 0; i < num; i++) {
173                 if (!val[i])
174                         break;
175         }
176         tdb_access_release(tdb, val);
177         return i;
178 }
179
180 enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
181 {
182         char buf[8192] = { 0 };
183         void *p = tdb->methods->direct(tdb, off, len, true);
184         enum TDB_ERROR ecode = TDB_SUCCESS;
185
186         assert(!tdb->read_only);
187         if (TDB_PTR_IS_ERR(p)) {
188                 return TDB_PTR_ERR(p);
189         }
190         if (p) {
191                 memset(p, 0, len);
192                 return ecode;
193         }
194         while (len) {
195                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
196                 ecode = tdb->methods->twrite(tdb, off, buf, todo);
197                 if (ecode != TDB_SUCCESS) {
198                         break;
199                 }
200                 len -= todo;
201                 off += todo;
202         }
203         return ecode;
204 }
205
206 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
207 {
208         tdb_off_t ret;
209         enum TDB_ERROR ecode;
210
211         if (likely(!(tdb->flags & TDB_CONVERT))) {
212                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
213                                                     false);
214                 if (TDB_PTR_IS_ERR(p)) {
215                         return TDB_PTR_ERR(p);
216                 }
217                 if (p)
218                         return *p;
219         }
220
221         ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
222         if (ecode != TDB_SUCCESS) {
223                 return ecode;
224         }
225         return ret;
226 }
227
228 /* write a lump of data at a specified offset */
229 static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
230                                 const void *buf, tdb_len_t len)
231 {
232         enum TDB_ERROR ecode;
233
234         if (tdb->read_only) {
235                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
236                                   "Write to read-only database");
237         }
238
239         ecode = tdb->methods->oob(tdb, off + len, 0);
240         if (ecode != TDB_SUCCESS) {
241                 return ecode;
242         }
243
244         if (tdb->file->map_ptr) {
245                 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
246         } else {
247                 ssize_t ret;
248                 ret = pwrite(tdb->file->fd, buf, len, off);
249                 if (ret != len) {
250                         /* This shouldn't happen: we avoid sparse files. */
251                         if (ret >= 0)
252                                 errno = ENOSPC;
253
254                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
255                                           "tdb_write: %zi at %zu len=%zu (%s)",
256                                           ret, (size_t)off, (size_t)len,
257                                           strerror(errno));
258                 }
259         }
260         return TDB_SUCCESS;
261 }
262
263 /* read a lump of data at a specified offset */
264 static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
265                                void *buf, tdb_len_t len)
266 {
267         enum TDB_ERROR ecode;
268
269         ecode = tdb->methods->oob(tdb, off + len, 0);
270         if (ecode != TDB_SUCCESS) {
271                 return ecode;
272         }
273
274         if (tdb->file->map_ptr) {
275                 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
276         } else {
277                 ssize_t r = pread(tdb->file->fd, buf, len, off);
278                 if (r != len) {
279                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
280                                           "tdb_read failed with %zi at %zu "
281                                           "len=%zu (%s) map_size=%zu",
282                                           r, (size_t)off, (size_t)len,
283                                           strerror(errno),
284                                           (size_t)tdb->file->map_size);
285                 }
286         }
287         return TDB_SUCCESS;
288 }
289
290 enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
291                                  const void *rec, size_t len)
292 {
293         enum TDB_ERROR ecode;
294
295         if (unlikely((tdb->flags & TDB_CONVERT))) {
296                 void *conv = malloc(len);
297                 if (!conv) {
298                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
299                                           "tdb_write: no memory converting"
300                                           " %zu bytes", len);
301                 }
302                 memcpy(conv, rec, len);
303                 ecode = tdb->methods->twrite(tdb, off,
304                                            tdb_convert(tdb, conv, len), len);
305                 free(conv);
306         } else {
307                 ecode = tdb->methods->twrite(tdb, off, rec, len);
308         }
309         return ecode;
310 }
311
312 enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
313                                 void *rec, size_t len)
314 {
315         enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
316         tdb_convert(tdb, rec, len);
317         return ecode;
318 }
319
320 enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
321                              tdb_off_t off, tdb_off_t val)
322 {
323         if (tdb->read_only) {
324                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
325                                   "Write to read-only database");
326         }
327
328         if (likely(!(tdb->flags & TDB_CONVERT))) {
329                 tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
330                                                     true);
331                 if (TDB_PTR_IS_ERR(p)) {
332                         return TDB_PTR_ERR(p);
333                 }
334                 if (p) {
335                         *p = val;
336                         return TDB_SUCCESS;
337                 }
338         }
339         return tdb_write_convert(tdb, off, &val, sizeof(val));
340 }
341
342 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
343                              tdb_len_t len, unsigned int prefix)
344 {
345         unsigned char *buf;
346         enum TDB_ERROR ecode;
347
348         /* some systems don't like zero length malloc */
349         buf = malloc(prefix + len ? prefix + len : 1);
350         if (!buf) {
351                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
352                            "tdb_alloc_read malloc failed len=%zu",
353                            (size_t)(prefix + len));
354                 return TDB_ERR_PTR(TDB_ERR_OOM);
355         } else {
356                 ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
357                 if (unlikely(ecode != TDB_SUCCESS)) {
358                         free(buf);
359                         return TDB_ERR_PTR(ecode);
360                 }
361         }
362         return buf;
363 }
364
365 /* read a lump of data, allocating the space for it */
366 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
367 {
368         return _tdb_alloc_read(tdb, offset, len, 0);
369 }
370
371 static enum TDB_ERROR fill(struct tdb_context *tdb,
372                            const void *buf, size_t size,
373                            tdb_off_t off, tdb_len_t len)
374 {
375         while (len) {
376                 size_t n = len > size ? size : len;
377                 ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
378                 if (ret != n) {
379                         if (ret >= 0)
380                                 errno = ENOSPC;
381
382                         return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
383                                           "fill failed:"
384                                           " %zi at %zu len=%zu (%s)",
385                                           ret, (size_t)off, (size_t)len,
386                                           strerror(errno));
387                 }
388                 len -= n;
389                 off += n;
390         }
391         return TDB_SUCCESS;
392 }
393
394 /* expand a file.  we prefer to use ftruncate, as that is what posix
395   says to use for mmap expansion */
396 static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
397                                       tdb_len_t addition)
398 {
399         char buf[8192];
400         enum TDB_ERROR ecode;
401
402         if (tdb->read_only) {
403                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
404                                   "Expand on read-only database");
405         }
406
407         if (tdb->flags & TDB_INTERNAL) {
408                 char *new = realloc(tdb->file->map_ptr,
409                                     tdb->file->map_size + addition);
410                 if (!new) {
411                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
412                                           "No memory to expand database");
413                 }
414                 tdb->file->map_ptr = new;
415                 tdb->file->map_size += addition;
416         } else {
417                 /* Unmap before trying to write; old TDB claimed OpenBSD had
418                  * problem with this otherwise. */
419                 tdb_munmap(tdb->file);
420
421                 /* If this fails, we try to fill anyway. */
422                 if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
423                         ;
424
425                 /* now fill the file with something. This ensures that the
426                    file isn't sparse, which would be very bad if we ran out of
427                    disk. This must be done with write, not via mmap */
428                 memset(buf, 0x43, sizeof(buf));
429                 ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
430                              addition);
431                 if (ecode != TDB_SUCCESS)
432                         return ecode;
433                 tdb->file->map_size += addition;
434                 tdb_mmap(tdb);
435         }
436         return TDB_SUCCESS;
437 }
438
439 const void *tdb_access_read(struct tdb_context *tdb,
440                             tdb_off_t off, tdb_len_t len, bool convert)
441 {
442         void *ret = NULL;
443
444         if (likely(!(tdb->flags & TDB_CONVERT))) {
445                 ret = tdb->methods->direct(tdb, off, len, false);
446
447                 if (TDB_PTR_IS_ERR(ret)) {
448                         return ret;
449                 }
450         }
451         if (!ret) {
452                 struct tdb_access_hdr *hdr;
453                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
454                 if (TDB_PTR_IS_ERR(hdr)) {
455                         return hdr;
456                 }
457                 hdr->next = tdb->access;
458                 tdb->access = hdr;
459                 ret = hdr + 1;
460                 if (convert) {
461                         tdb_convert(tdb, (void *)ret, len);
462                 }
463         } else
464                 tdb->direct_access++;
465
466         return ret;
467 }
468
469 void *tdb_access_write(struct tdb_context *tdb,
470                        tdb_off_t off, tdb_len_t len, bool convert)
471 {
472         void *ret = NULL;
473
474         if (tdb->read_only) {
475                 tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
476                            "Write to read-only database");
477                 return TDB_ERR_PTR(TDB_ERR_RDONLY);
478         }
479
480         if (likely(!(tdb->flags & TDB_CONVERT))) {
481                 ret = tdb->methods->direct(tdb, off, len, true);
482
483                 if (TDB_PTR_IS_ERR(ret)) {
484                         return ret;
485                 }
486         }
487
488         if (!ret) {
489                 struct tdb_access_hdr *hdr;
490                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
491                 if (TDB_PTR_IS_ERR(hdr)) {
492                         return hdr;
493                 }
494                 hdr->next = tdb->access;
495                 tdb->access = hdr;
496                 hdr->off = off;
497                 hdr->len = len;
498                 hdr->convert = convert;
499                 ret = hdr + 1;
500                 if (convert)
501                         tdb_convert(tdb, (void *)ret, len);
502         } else
503                 tdb->direct_access++;
504
505         return ret;
506 }
507
508 static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
509 {
510         struct tdb_access_hdr **hp;
511
512         for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
513                 if (*hp + 1 == p)
514                         return hp;
515         }
516         return NULL;
517 }
518
519 void tdb_access_release(struct tdb_context *tdb, const void *p)
520 {
521         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
522
523         if (hp) {
524                 hdr = *hp;
525                 *hp = hdr->next;
526                 free(hdr);
527         } else
528                 tdb->direct_access--;
529 }
530
531 enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
532 {
533         struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
534         enum TDB_ERROR ecode;
535
536         if (hp) {
537                 hdr = *hp;
538                 if (hdr->convert)
539                         ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
540                 else
541                         ecode = tdb_write(tdb, hdr->off, p, hdr->len);
542                 *hp = hdr->next;
543                 free(hdr);
544         } else {
545                 tdb->direct_access--;
546                 ecode = TDB_SUCCESS;
547         }
548
549         return ecode;
550 }
551
552 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
553                         bool write_mode)
554 {
555         enum TDB_ERROR ecode;
556
557         if (unlikely(!tdb->file->map_ptr))
558                 return NULL;
559
560         ecode = tdb_oob(tdb, off + len, true);
561         if (unlikely(ecode != TDB_SUCCESS))
562                 return TDB_ERR_PTR(ecode);
563         return (char *)tdb->file->map_ptr + off;
564 }
565
566 void tdb_inc_seqnum(struct tdb_context *tdb)
567 {
568         tdb_off_t seq;
569
570         if (likely(!(tdb->flags & TDB_CONVERT))) {
571                 int64_t *direct;
572
573                 direct = tdb->methods->direct(tdb,
574                                               offsetof(struct tdb_header,
575                                                        seqnum),
576                                               sizeof(*direct), true);
577                 if (likely(direct)) {
578                         /* Don't let it go negative, even briefly */
579                         if (unlikely((*direct) + 1) < 0)
580                                 *direct = 0;
581                         (*direct)++;
582                         return;
583                 }
584         }
585
586         seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
587         if (!TDB_OFF_IS_ERR(seq)) {
588                 seq++;
589                 if (unlikely((int64_t)seq < 0))
590                         seq = 0;
591                 tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
592         }
593 }
594
595 static const struct tdb_methods io_methods = {
596         tdb_read,
597         tdb_write,
598         tdb_oob,
599         tdb_expand_file,
600         tdb_direct,
601 };
602
603 /*
604   initialise the default methods table
605 */
606 void tdb_io_init(struct tdb_context *tdb)
607 {
608         tdb->methods = &io_methods;
609 }