tdb2: Make tdb1 use the tdb_file structure.
[ccan] / ccan / tdb2 / tdb1_io.c
1  /*
2    Unix SMB/CIFS implementation.
3
4    trivial database library
5
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell              2000
8    Copyright (C) Jeremy Allison                    2000-2003
9
10      ** NOTE! The following LGPL license applies to the tdb
11      ** library. This does NOT imply that all of Samba is released
12      ** under the LGPL
13
14    This library is free software; you can redistribute it and/or
15    modify it under the terms of the GNU Lesser General Public
16    License as published by the Free Software Foundation; either
17    version 3 of the License, or (at your option) any later version.
18
19    This library is distributed in the hope that it will be useful,
20    but WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    Lesser General Public License for more details.
23
24    You should have received a copy of the GNU Lesser General Public
25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 */
27
28
29 #include "tdb1_private.h"
30 #ifndef MAX
31 #define MAX(a,b) ((a) > (b) ? (a) : (b))
32 #endif
33
34 /* check for an out of bounds access - if it is out of bounds then
35    see if the database has been expanded by someone else and expand
36    if necessary
37    note that "len" is the minimum length needed for the db
38 */
39 static int tdb1_oob(struct tdb1_context *tdb, tdb1_off_t len, int probe)
40 {
41         struct stat st;
42         if (len <= tdb->file->map_size)
43                 return 0;
44         if (tdb->flags & TDB_INTERNAL) {
45                 if (!probe) {
46                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
47                                                 "tdb1_oob len %d beyond internal malloc size %d",
48                                                 (int)len, (int)tdb->file->map_size);
49                 }
50                 return -1;
51         }
52
53         if (fstat(tdb->file->fd, &st) == -1) {
54                 tdb->last_error = TDB_ERR_IO;
55                 return -1;
56         }
57
58         if (st.st_size < (size_t)len) {
59                 if (!probe) {
60                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
61                                                 "tdb1_oob len %d beyond eof at %d",
62                                                 (int)len, (int)st.st_size);
63                 }
64                 return -1;
65         }
66
67         /* Unmap, update size, remap */
68         if (tdb1_munmap(tdb) == -1) {
69                 tdb->last_error = TDB_ERR_IO;
70                 return -1;
71         }
72         tdb->file->map_size = st.st_size;
73         tdb1_mmap(tdb);
74         return 0;
75 }
76
77 /* write a lump of data at a specified offset */
78 static int tdb1_write(struct tdb1_context *tdb, tdb1_off_t off,
79                      const void *buf, tdb1_len_t len)
80 {
81         if (len == 0) {
82                 return 0;
83         }
84
85         if (tdb->read_only || tdb->traverse_read) {
86                 tdb->last_error = TDB_ERR_RDONLY;
87                 return -1;
88         }
89
90         if (tdb->methods->tdb1_oob(tdb, off + len, 0) != 0)
91                 return -1;
92
93         if (tdb->file->map_ptr) {
94                 memcpy(off + (char *)tdb->file->map_ptr, buf, len);
95         } else {
96                 ssize_t written = pwrite(tdb->file->fd, buf, len, off);
97                 if ((written != (ssize_t)len) && (written != -1)) {
98                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
99                                    "tdb1_write: wrote only "
100                                    "%d of %d bytes at %d, trying once more",
101                                    (int)written, len, off);
102                         written = pwrite(tdb->file->fd,
103                                          (const char *)buf+written,
104                                          len-written,
105                                          off+written);
106                 }
107                 if (written == -1) {
108                         /* Ensure ecode is set for log fn. */
109                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
110                                                 "tdb1_write failed at %d "
111                                                 "len=%d (%s)",
112                                                 off, len, strerror(errno));
113                         return -1;
114                 } else if (written != (ssize_t)len) {
115                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
116                                                 "tdb1_write: failed to "
117                                                 "write %d bytes at %d in two attempts",
118                                                 len, off);
119                         return -1;
120                 }
121         }
122         return 0;
123 }
124
125 /* Endian conversion: we only ever deal with 4 byte quantities */
126 void *tdb1_convert(void *buf, uint32_t size)
127 {
128         uint32_t i, *p = (uint32_t *)buf;
129         for (i = 0; i < size / 4; i++)
130                 p[i] = TDB1_BYTEREV(p[i]);
131         return buf;
132 }
133
134
135 /* read a lump of data at a specified offset, maybe convert */
136 static int tdb1_read(struct tdb1_context *tdb, tdb1_off_t off, void *buf,
137                     tdb1_len_t len, int cv)
138 {
139         if (tdb->methods->tdb1_oob(tdb, off + len, 0) != 0) {
140                 return -1;
141         }
142
143         if (tdb->file->map_ptr) {
144                 memcpy(buf, off + (char *)tdb->file->map_ptr, len);
145         } else {
146                 ssize_t ret = pread(tdb->file->fd, buf, len, off);
147                 if (ret != (ssize_t)len) {
148                         /* Ensure ecode is set for log fn. */
149                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
150                                                 "tdb1_read failed at %d "
151                                                 "len=%d ret=%d (%s) map_size=%d",
152                                                 (int)off, (int)len, (int)ret,
153                                                 strerror(errno),
154                                                 (int)tdb->file->map_size);
155                         return -1;
156                 }
157         }
158         if (cv) {
159                 tdb1_convert(buf, len);
160         }
161         return 0;
162 }
163
164
165
166 /*
167   do an unlocked scan of the hash table heads to find the next non-zero head. The value
168   will then be confirmed with the lock held
169 */
170 static void tdb1_next_hash_chain(struct tdb1_context *tdb, uint32_t *chain)
171 {
172         uint32_t h = *chain;
173         if (tdb->file->map_ptr) {
174                 for (;h < tdb->header.hash_size;h++) {
175                         if (0 != *(uint32_t *)(TDB1_HASH_TOP(h) + (unsigned char *)tdb->file->map_ptr)) {
176                                 break;
177                         }
178                 }
179         } else {
180                 uint32_t off=0;
181                 for (;h < tdb->header.hash_size;h++) {
182                         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(h), &off) != 0 || off != 0) {
183                                 break;
184                         }
185                 }
186         }
187         (*chain) = h;
188 }
189
190
191 int tdb1_munmap(struct tdb1_context *tdb)
192 {
193         if (tdb->flags & TDB_INTERNAL)
194                 return 0;
195
196 #if HAVE_MMAP
197         if (tdb->file->map_ptr) {
198                 int ret;
199
200                 ret = munmap(tdb->file->map_ptr, tdb->file->map_size);
201                 if (ret != 0)
202                         return ret;
203         }
204 #endif
205         tdb->file->map_ptr = NULL;
206         return 0;
207 }
208
209 void tdb1_mmap(struct tdb1_context *tdb)
210 {
211         if (tdb->flags & TDB_INTERNAL)
212                 return;
213
214 #if HAVE_MMAP
215         if (!(tdb->flags & TDB_NOMMAP)) {
216                 tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
217                                     PROT_READ|(tdb->read_only? 0:PROT_WRITE),
218                                     MAP_SHARED|MAP_FILE, tdb->file->fd, 0);
219
220                 /*
221                  * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
222                  */
223
224                 if (tdb->file->map_ptr == MAP_FAILED) {
225                         tdb->file->map_ptr = NULL;
226                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
227                                    "tdb1_mmap failed for size %d (%s)",
228                                    tdb->file->map_size, strerror(errno));
229                 }
230         } else {
231                 tdb->file->map_ptr = NULL;
232         }
233 #else
234         tdb->file->map_ptr = NULL;
235 #endif
236 }
237
238 /* expand a file.  we prefer to use ftruncate, as that is what posix
239   says to use for mmap expansion */
240 static int tdb1_expand_file(struct tdb1_context *tdb, tdb1_off_t size, tdb1_off_t addition)
241 {
242         char buf[8192];
243
244         if (tdb->read_only || tdb->traverse_read) {
245                 tdb->last_error = TDB_ERR_RDONLY;
246                 return -1;
247         }
248
249         if (ftruncate(tdb->file->fd, size+addition) == -1) {
250                 char b = 0;
251                 ssize_t written = pwrite(tdb->file->fd, &b, 1,
252                                          (size+addition) - 1);
253                 if (written == 0) {
254                         /* try once more, potentially revealing errno */
255                         written = pwrite(tdb->file->fd, &b, 1,
256                                          (size+addition) - 1);
257                 }
258                 if (written == 0) {
259                         /* again - give up, guessing errno */
260                         errno = ENOSPC;
261                 }
262                 if (written != 1) {
263                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
264                                                 "expand_file to %d failed (%s)",
265                                                 size+addition,
266                                                 strerror(errno));
267                         return -1;
268                 }
269         }
270
271         /* now fill the file with something. This ensures that the
272            file isn't sparse, which would be very bad if we ran out of
273            disk. This must be done with write, not via mmap */
274         memset(buf, TDB1_PAD_BYTE, sizeof(buf));
275         while (addition) {
276                 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
277                 ssize_t written = pwrite(tdb->file->fd, buf, n, size);
278                 if (written == 0) {
279                         /* prevent infinite loops: try _once_ more */
280                         written = pwrite(tdb->file->fd, buf, n, size);
281                 }
282                 if (written == 0) {
283                         /* give up, trying to provide a useful errno */
284                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
285                                                 "expand_file write "
286                                                 "returned 0 twice: giving up!");
287                         errno = ENOSPC;
288                         return -1;
289                 } else if (written == -1) {
290                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
291                                                 "expand_file write of "
292                                                 "%d bytes failed (%s)", (int)n,
293                                                 strerror(errno));
294                         return -1;
295                 } else if (written != n) {
296                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_WARNING,
297                                    "expand_file: wrote "
298                                    "only %d of %d bytes - retrying",
299                                    (int)written, (int)n);
300                 }
301                 addition -= written;
302                 size += written;
303         }
304         return 0;
305 }
306
307
308 /* expand the database at least size bytes by expanding the underlying
309    file and doing the mmap again if necessary */
310 int tdb1_expand(struct tdb1_context *tdb, tdb1_off_t size)
311 {
312         struct tdb1_record rec;
313         tdb1_off_t offset, new_size, top_size, map_size;
314
315         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
316                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
317                            "lock failed in tdb1_expand");
318                 return -1;
319         }
320
321         /* must know about any previous expansions by another process */
322         tdb->methods->tdb1_oob(tdb, tdb->file->map_size + 1, 1);
323
324         /* limit size in order to avoid using up huge amounts of memory for
325          * in memory tdbs if an oddball huge record creeps in */
326         if (size > 100 * 1024) {
327                 top_size = tdb->file->map_size + size * 2;
328         } else {
329                 top_size = tdb->file->map_size + size * 100;
330         }
331
332         /* always make room for at least top_size more records, and at
333            least 25% more space. if the DB is smaller than 100MiB,
334            otherwise grow it by 10% only. */
335         if (tdb->file->map_size > 100 * 1024 * 1024) {
336                 map_size = tdb->file->map_size * 1.10;
337         } else {
338                 map_size = tdb->file->map_size * 1.25;
339         }
340
341         /* Round the database up to a multiple of the page size */
342         new_size = MAX(top_size, map_size);
343         size = TDB1_ALIGN(new_size, tdb->page_size) - tdb->file->map_size;
344
345         if (!(tdb->flags & TDB_INTERNAL))
346                 tdb1_munmap(tdb);
347
348         /*
349          * We must ensure the file is unmapped before doing this
350          * to ensure consistency with systems like OpenBSD where
351          * writes and mmaps are not consistent.
352          */
353
354         /* expand the file itself */
355         if (!(tdb->flags & TDB_INTERNAL)) {
356                 if (tdb->methods->tdb1_expand_file(tdb, tdb->file->map_size, size) != 0)
357                         goto fail;
358         }
359
360         tdb->file->map_size += size;
361
362         if (tdb->flags & TDB_INTERNAL) {
363                 char *new_map_ptr = (char *)realloc(tdb->file->map_ptr,
364                                                     tdb->file->map_size);
365                 if (!new_map_ptr) {
366                         tdb->file->map_size -= size;
367                         goto fail;
368                 }
369                 tdb->file->map_ptr = new_map_ptr;
370         } else {
371                 /*
372                  * We must ensure the file is remapped before adding the space
373                  * to ensure consistency with systems like OpenBSD where
374                  * writes and mmaps are not consistent.
375                  */
376
377                 /* We're ok if the mmap fails as we'll fallback to read/write */
378                 tdb1_mmap(tdb);
379         }
380
381         /* form a new freelist record */
382         memset(&rec,'\0',sizeof(rec));
383         rec.rec_len = size - sizeof(rec);
384
385         /* link it into the free list */
386         offset = tdb->file->map_size - size;
387         if (tdb1_free(tdb, offset, &rec) == -1)
388                 goto fail;
389
390         tdb1_unlock(tdb, -1, F_WRLCK);
391         return 0;
392  fail:
393         tdb1_unlock(tdb, -1, F_WRLCK);
394         return -1;
395 }
396
397 /* read/write a tdb1_off_t */
398 int tdb1_ofs_read(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
399 {
400         return tdb->methods->tdb1_read(tdb, offset, (char*)d, sizeof(*d), TDB1_DOCONV());
401 }
402
403 int tdb1_ofs_write(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_off_t *d)
404 {
405         tdb1_off_t off = *d;
406         return tdb->methods->tdb1_write(tdb, offset, TDB1_CONV(off), sizeof(*d));
407 }
408
409
410 /* read a lump of data, allocating the space for it */
411 unsigned char *tdb1_alloc_read(struct tdb1_context *tdb, tdb1_off_t offset, tdb1_len_t len)
412 {
413         unsigned char *buf;
414
415         /* some systems don't like zero length malloc */
416
417         if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
418                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
419                                              "tdb1_alloc_read malloc failed"
420                                              " len=%d (%s)",
421                                              len, strerror(errno));
422                 return NULL;
423         }
424         if (tdb->methods->tdb1_read(tdb, offset, buf, len, 0) == -1) {
425                 SAFE_FREE(buf);
426                 return NULL;
427         }
428         return buf;
429 }
430
431 /* Give a piece of tdb data to a parser */
432
433 int tdb1_parse_data(struct tdb1_context *tdb, TDB_DATA key,
434                    tdb1_off_t offset, tdb1_len_t len,
435                    int (*parser)(TDB_DATA key, TDB_DATA data,
436                                  void *private_data),
437                    void *private_data)
438 {
439         TDB_DATA data;
440         int result;
441
442         data.dsize = len;
443
444         if ((tdb->transaction == NULL) && (tdb->file->map_ptr != NULL)) {
445                 /*
446                  * Optimize by avoiding the malloc/memcpy/free, point the
447                  * parser directly at the mmap area.
448                  */
449                 if (tdb->methods->tdb1_oob(tdb, offset+len, 0) != 0) {
450                         return -1;
451                 }
452                 data.dptr = offset + (unsigned char *)tdb->file->map_ptr;
453                 return parser(key, data, private_data);
454         }
455
456         if (!(data.dptr = tdb1_alloc_read(tdb, offset, len))) {
457                 return -1;
458         }
459
460         result = parser(key, data, private_data);
461         free(data.dptr);
462         return result;
463 }
464
465 /* read/write a record */
466 int tdb1_rec_read(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
467 {
468         if (tdb->methods->tdb1_read(tdb, offset, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
469                 return -1;
470         if (TDB1_BAD_MAGIC(rec)) {
471                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
472                                         "tdb1_rec_read bad magic 0x%x at offset=%d",
473                                         rec->magic, offset);
474                 return -1;
475         }
476         return tdb->methods->tdb1_oob(tdb, rec->next+sizeof(*rec), 0);
477 }
478
479 int tdb1_rec_write(struct tdb1_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
480 {
481         struct tdb1_record r = *rec;
482         return tdb->methods->tdb1_write(tdb, offset, TDB1_CONV(r), sizeof(r));
483 }
484
485 static const struct tdb1_methods io1_methods = {
486         tdb1_read,
487         tdb1_write,
488         tdb1_next_hash_chain,
489         tdb1_oob,
490         tdb1_expand_file,
491 };
492
493 /*
494   initialise the default methods table
495 */
496 void tdb1_io_init(struct tdb1_context *tdb)
497 {
498         tdb->methods = &io1_methods;
499 }