tdb2: remove tdb_get()
[ccan] / ccan / tdb2 / io.c
1  /* 
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9    Copyright (C) Rusty Russell                     2010
10
11      ** NOTE! The following LGPL license applies to the tdb
12      ** library. This does NOT imply that all of Samba is released
13      ** under the LGPL
14
15    This library is free software; you can redistribute it and/or
16    modify it under the terms of the GNU Lesser General Public
17    License as published by the Free Software Foundation; either
18    version 3 of the License, or (at your option) any later version.
19
20    This library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 */
28 #include "private.h"
29 #include <assert.h>
30 #include <ccan/likely/likely.h>
31
32 void tdb_munmap(struct tdb_context *tdb)
33 {
34         if (tdb->flags & TDB_INTERNAL)
35                 return;
36
37         if (tdb->map_ptr) {
38                 munmap(tdb->map_ptr, tdb->map_size);
39                 tdb->map_ptr = NULL;
40         }
41 }
42
43 void tdb_mmap(struct tdb_context *tdb)
44 {
45         if (tdb->flags & TDB_INTERNAL)
46                 return;
47
48         if (tdb->flags & TDB_NOMMAP)
49                 return;
50
51         tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
52                             MAP_SHARED, tdb->fd, 0);
53
54         /*
55          * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
56          */
57         if (tdb->map_ptr == MAP_FAILED) {
58                 tdb->map_ptr = NULL;
59                 tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
60                          "tdb_mmap failed for size %lld (%s)\n", 
61                          (long long)tdb->map_size, strerror(errno));
62         }
63 }
64
65 /* check for an out of bounds access - if it is out of bounds then
66    see if the database has been expanded by someone else and expand
67    if necessary 
68    note that "len" is the minimum length needed for the db
69 */
70 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
71 {
72         struct stat st;
73         int ret;
74
75         /* We can't hold pointers during this: we could unmap! */
76         assert(!tdb->direct_access
77                || (tdb->flags & TDB_NOLOCK)
78                || tdb_has_expansion_lock(tdb));
79
80         if (len <= tdb->map_size)
81                 return 0;
82         if (tdb->flags & TDB_INTERNAL) {
83                 if (!probe) {
84                         /* Ensure ecode is set for log fn. */
85                         tdb->ecode = TDB_ERR_IO;
86                         tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
87                                  "tdb_oob len %lld beyond internal"
88                                  " malloc size %lld\n",
89                                  (long long)len,
90                                  (long long)tdb->map_size);
91                 }
92                 return -1;
93         }
94
95         if (tdb_lock_expand(tdb, F_RDLCK) != 0)
96                 return -1;
97
98         ret = fstat(tdb->fd, &st);
99
100         tdb_unlock_expand(tdb, F_RDLCK);
101
102         if (ret == -1) {
103                 tdb->ecode = TDB_ERR_IO;
104                 return -1;
105         }
106
107         if (st.st_size < (size_t)len) {
108                 if (!probe) {
109                         /* Ensure ecode is set for log fn. */
110                         tdb->ecode = TDB_ERR_IO;
111                         tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
112                                  "tdb_oob len %lld beyond eof at %lld\n",
113                                  (long long)len, (long long)st.st_size);
114                 }
115                 return -1;
116         }
117
118         /* Unmap, update size, remap */
119         tdb_munmap(tdb);
120
121         tdb->map_size = st.st_size;
122         tdb_mmap(tdb);
123         return 0;
124 }
125
126 /* Endian conversion: we only ever deal with 8 byte quantities */
127 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
128 {
129         if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
130                 uint64_t i, *p = (uint64_t *)buf;
131                 for (i = 0; i < size / 8; i++)
132                         p[i] = bswap_64(p[i]);
133         }
134         return buf;
135 }
136
137 /* FIXME: Return the off? */
138 uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
139                               tdb_off_t base, uint64_t start, uint64_t end)
140 {
141         uint64_t i;
142         const uint64_t *val;
143
144         /* Zero vs non-zero is the same unconverted: minor optimization. */
145         val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
146                               (end - start) * sizeof(tdb_off_t), false);
147         if (!val)
148                 return end;
149
150         for (i = 0; i < (end - start); i++) {
151                 if (val[i])
152                         break;
153         }
154         tdb_access_release(tdb, val);
155         return start + i;
156 }
157
158 /* Return first zero offset in num offset array, or num. */
159 uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
160                            uint64_t num)
161 {
162         uint64_t i;
163         const uint64_t *val;
164
165         /* Zero vs non-zero is the same unconverted: minor optimization. */
166         val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
167         if (!val)
168                 return num;
169
170         for (i = 0; i < num; i++) {
171                 if (!val[i])
172                         break;
173         }
174         tdb_access_release(tdb, val);
175         return i;
176 }
177
178 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
179 {
180         char buf[8192] = { 0 };
181         void *p = tdb->methods->direct(tdb, off, len);
182         if (p) {
183                 memset(p, 0, len);
184                 return 0;
185         }
186         while (len) {
187                 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
188                 if (tdb->methods->write(tdb, off, buf, todo) == -1)
189                         return -1;
190                 len -= todo;
191                 off += todo;
192         }
193         return 0;
194 }
195
196 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
197 {
198         tdb_off_t ret;
199
200         if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
201                 return TDB_OFF_ERR;
202         return ret;
203 }
204
205 /* Even on files, we can get partial writes due to signals. */
206 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
207 {
208         while (len) {
209                 ssize_t ret;
210                 ret = pwrite(fd, buf, len, off);
211                 if (ret < 0)
212                         return false;
213                 if (ret == 0) {
214                         errno = ENOSPC;
215                         return false;
216                 }
217                 buf = (char *)buf + ret;
218                 off += ret;
219                 len -= ret;
220         }
221         return true;
222 }
223
224 /* Even on files, we can get partial reads due to signals. */
225 bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
226 {
227         while (len) {
228                 ssize_t ret;
229                 ret = pread(fd, buf, len, off);
230                 if (ret < 0)
231                         return false;
232                 if (ret == 0) {
233                         /* ETOOSHORT? */
234                         errno = EWOULDBLOCK;
235                         return false;
236                 }
237                 buf = (char *)buf + ret;
238                 off += ret;
239                 len -= ret;
240         }
241         return true;
242 }
243
244 bool tdb_read_all(int fd, void *buf, size_t len)
245 {
246         while (len) {
247                 ssize_t ret;
248                 ret = read(fd, buf, len);
249                 if (ret < 0)
250                         return false;
251                 if (ret == 0) {
252                         /* ETOOSHORT? */
253                         errno = EWOULDBLOCK;
254                         return false;
255                 }
256                 buf = (char *)buf + ret;
257                 len -= ret;
258         }
259         return true;
260 }
261
262 /* write a lump of data at a specified offset */
263 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
264                      const void *buf, tdb_len_t len)
265 {
266         if (len == 0) {
267                 return 0;
268         }
269
270         if (tdb->read_only) {
271                 tdb->ecode = TDB_ERR_RDONLY;
272                 return -1;
273         }
274
275         if (tdb->methods->oob(tdb, off + len, 0) != 0)
276                 return -1;
277
278         if (tdb->map_ptr) {
279                 memcpy(off + (char *)tdb->map_ptr, buf, len);
280         } else {
281                 if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
282                         tdb->ecode = TDB_ERR_IO;
283                         tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
284                                  "tdb_write failed at %llu len=%llu (%s)\n",
285                                  (long long)off, (long long)len,
286                                  strerror(errno));
287                         return -1;
288                 }
289         }
290         return 0;
291 }
292
293 /* read a lump of data at a specified offset */
294 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
295                     tdb_len_t len)
296 {
297         if (tdb->methods->oob(tdb, off + len, 0) != 0) {
298                 return -1;
299         }
300
301         if (tdb->map_ptr) {
302                 memcpy(buf, off + (char *)tdb->map_ptr, len);
303         } else {
304                 if (!tdb_pread_all(tdb->fd, buf, len, off)) {
305                         /* Ensure ecode is set for log fn. */
306                         tdb->ecode = TDB_ERR_IO;
307                         tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
308                                  "tdb_read failed at %lld "
309                                  "len=%lld (%s) map_size=%lld\n",
310                                  (long long)off, (long long)len,
311                                  strerror(errno),
312                                  (long long)tdb->map_size);
313                         return -1;
314                 }
315         }
316         return 0;
317 }
318
319 int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
320                       const void *rec, size_t len)
321 {
322         int ret;
323         if (unlikely((tdb->flags & TDB_CONVERT))) {
324                 void *conv = malloc(len);
325                 if (!conv) {
326                         tdb->ecode = TDB_ERR_OOM;
327                         tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
328                                  "tdb_write: no memory converting %zu bytes\n",
329                                  len);
330                         return -1;
331                 }
332                 memcpy(conv, rec, len);
333                 ret = tdb->methods->write(tdb, off,
334                                           tdb_convert(tdb, conv, len), len);
335                 free(conv);
336         } else
337                 ret = tdb->methods->write(tdb, off, rec, len);
338
339         return ret;
340 }
341
342 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
343                       void *rec, size_t len)
344 {
345         int ret = tdb->methods->read(tdb, off, rec, len);
346         tdb_convert(tdb, rec, len);
347         return ret;
348 }
349
350 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
351 {
352         return tdb_write_convert(tdb, off, &val, sizeof(val));
353 }
354
355 static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
356                              tdb_len_t len, unsigned int prefix)
357 {
358         void *buf;
359
360         /* some systems don't like zero length malloc */
361         buf = malloc(prefix + len ? prefix + len : 1);
362         if (unlikely(!buf)) {
363                 tdb->ecode = TDB_ERR_OOM;
364                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
365                          "tdb_alloc_read malloc failed len=%lld\n",
366                          (long long)prefix + len);
367         } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix, len))) {
368                 free(buf);
369                 buf = NULL;
370         }
371         return buf;
372 }
373
374 /* read a lump of data, allocating the space for it */
375 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
376 {
377         return _tdb_alloc_read(tdb, offset, len, 0);
378 }
379
380 static int fill(struct tdb_context *tdb,
381                 const void *buf, size_t size,
382                 tdb_off_t off, tdb_len_t len)
383 {
384         while (len) {
385                 size_t n = len > size ? size : len;
386
387                 if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
388                         tdb->ecode = TDB_ERR_IO;
389                         tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
390                                  "fill write failed: giving up!\n");
391                         return -1;
392                 }
393                 len -= n;
394                 off += n;
395         }
396         return 0;
397 }
398
399 /* expand a file.  we prefer to use ftruncate, as that is what posix
400   says to use for mmap expansion */
401 static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
402 {
403         char buf[8192];
404
405         if (tdb->read_only) {
406                 tdb->ecode = TDB_ERR_RDONLY;
407                 return -1;
408         }
409
410         if (tdb->flags & TDB_INTERNAL) {
411                 char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
412                 if (!new) {
413                         tdb->ecode = TDB_ERR_OOM;
414                         return -1;
415                 }
416                 tdb->map_ptr = new;
417                 tdb->map_size += addition;
418         } else {
419                 /* Unmap before trying to write; old TDB claimed OpenBSD had
420                  * problem with this otherwise. */
421                 tdb_munmap(tdb);
422
423                 /* If this fails, we try to fill anyway. */
424                 if (ftruncate(tdb->fd, tdb->map_size + addition))
425                         ;
426
427                 /* now fill the file with something. This ensures that the
428                    file isn't sparse, which would be very bad if we ran out of
429                    disk. This must be done with write, not via mmap */
430                 memset(buf, 0x43, sizeof(buf));
431                 if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
432                         return -1;
433                 tdb->map_size += addition;
434                 tdb_mmap(tdb);
435         }
436         return 0;
437 }
438
439 /* This is only neded for tdb_access_commit, but used everywhere to simplify. */
440 struct tdb_access_hdr {
441         tdb_off_t off;
442         tdb_len_t len;
443         bool convert;
444 };
445
446 const void *tdb_access_read(struct tdb_context *tdb,
447                             tdb_off_t off, tdb_len_t len, bool convert)
448 {
449         const void *ret = NULL; 
450
451         if (likely(!(tdb->flags & TDB_CONVERT)))
452                 ret = tdb->methods->direct(tdb, off, len);
453
454         if (!ret) {
455                 struct tdb_access_hdr *hdr;
456                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
457                 if (hdr) {
458                         ret = hdr + 1;
459                         if (convert)
460                                 tdb_convert(tdb, (void *)ret, len);
461                 }
462         } else
463                 tdb->direct_access++;
464
465         return ret;
466 }
467
468 void *tdb_access_write(struct tdb_context *tdb,
469                        tdb_off_t off, tdb_len_t len, bool convert)
470 {
471         void *ret = NULL;
472
473         if (likely(!(tdb->flags & TDB_CONVERT)))
474                 ret = tdb->methods->direct(tdb, off, len);
475
476         if (!ret) {
477                 struct tdb_access_hdr *hdr;
478                 hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
479                 if (hdr) {
480                         hdr->off = off;
481                         hdr->len = len;
482                         hdr->convert = convert;
483                         ret = hdr + 1;
484                         if (convert)
485                                 tdb_convert(tdb, (void *)ret, len);
486                 }
487         } else
488                 tdb->direct_access++;
489
490         return ret;
491 }
492
493 bool is_direct(const struct tdb_context *tdb, const void *p)
494 {
495         return (tdb->map_ptr
496                 && (char *)p >= (char *)tdb->map_ptr
497                 && (char *)p < (char *)tdb->map_ptr + tdb->map_size);
498 }
499
500 void tdb_access_release(struct tdb_context *tdb, const void *p)
501 {
502         if (is_direct(tdb, p))
503                 tdb->direct_access--;
504         else
505                 free((struct tdb_access_hdr *)p - 1);
506 }
507
508 int tdb_access_commit(struct tdb_context *tdb, void *p)
509 {
510         int ret = 0;
511
512         if (!tdb->map_ptr
513             || (char *)p < (char *)tdb->map_ptr
514             || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) {
515                 struct tdb_access_hdr *hdr;
516
517                 hdr = (struct tdb_access_hdr *)p - 1;
518                 if (hdr->convert)
519                         ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
520                 else
521                         ret = tdb_write(tdb, hdr->off, p, hdr->len);
522                 free(hdr);
523         } else
524                 tdb->direct_access--;
525
526         return ret;
527 }
528
529 static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
530 {
531         if (unlikely(!tdb->map_ptr))
532                 return NULL;
533
534         if (unlikely(tdb_oob(tdb, off + len, true) == -1))
535                 return NULL;
536         return (char *)tdb->map_ptr + off;
537 }
538
539 void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val)
540 {
541         if ((uintptr_t)stat < (uintptr_t)tdb->stats + tdb->stats->size)
542                 *stat += val;
543 }
544
545 static const struct tdb_methods io_methods = {
546         tdb_read,
547         tdb_write,
548         tdb_oob,
549         tdb_expand_file,
550         tdb_direct,
551 };
552
553 /*
554   initialise the default methods table
555 */
556 void tdb_io_init(struct tdb_context *tdb)
557 {
558         tdb->methods = &io_methods;
559 }