]> git.ozlabs.org Git - ccan/blobdiff - ccan/tdb2/transaction.c
tdb2: handle non-transaction-page-aligned sizes in recovery.
[ccan] / ccan / tdb2 / transaction.c
index dd2f0e2507d095cb367ca06f865baea26d6ed1a2..73ceb9620265ee70b86320871c57aba0488c429f 100644 (file)
@@ -615,7 +615,7 @@ static tdb_len_t tdb_recovery_size(struct tdb_context *tdb)
        tdb_len_t recovery_size = 0;
        int i;
 
-       recovery_size = sizeof(tdb_len_t);
+       recovery_size = 0;
        for (i=0;i<tdb->transaction->num_blocks;i++) {
                if (i * PAGESIZE >= tdb->transaction->old_map_size) {
                        break;
@@ -688,7 +688,8 @@ static enum TDB_ERROR tdb_recovery_allocate(struct tdb_context *tdb,
        if (recovery_head != 0) {
                tdb->stats.frees++;
                ecode = add_free_record(tdb, recovery_head,
-                                       sizeof(rec) + rec.max_len);
+                                       sizeof(rec) + rec.max_len,
+                                       TDB_LOCK_WAIT, true);
                if (ecode != TDB_SUCCESS) {
                        return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
                                          "tdb_recovery_allocate:"
@@ -700,10 +701,11 @@ static enum TDB_ERROR tdb_recovery_allocate(struct tdb_context *tdb,
        /* the tdb_free() call might have increased the recovery size */
        *recovery_size = tdb_recovery_size(tdb);
 
-       /* round up to a multiple of page size */
+       /* round up to a multiple of page size. Overallocate, since each
+        * such allocation forces us to expand the file. */
        *recovery_max_size
-               = (((sizeof(rec) + *recovery_size) + PAGESIZE-1)
-                  & ~(PAGESIZE-1))
+               = (((sizeof(rec) + *recovery_size + *recovery_size / 2)
+                   + PAGESIZE-1) & ~(PAGESIZE-1))
                - sizeof(rec);
        *recovery_offset = tdb->file->map_size;
        recovery_head = *recovery_offset;
@@ -768,7 +770,7 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
        const struct tdb_methods *methods = tdb->transaction->io_methods;
        struct tdb_recovery_record *rec;
        tdb_off_t old_map_size = tdb->transaction->old_map_size;
-       uint64_t magic, tailer;
+       uint64_t magic;
        int i;
        enum TDB_ERROR ecode;
 
@@ -813,6 +815,7 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
                if (offset >= old_map_size) {
                        continue;
                }
+
                if (offset + length > tdb->file->map_size) {
                        free(data);
                        return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
@@ -827,9 +830,19 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
                /* the recovery area contains the old data, not the
                   new data, so we have to call the original tdb_read
                   method to get it */
-               ecode = methods->tread(tdb, offset,
-                                      p + sizeof(offset) + sizeof(length),
-                                      length);
+               if (offset + length > old_map_size) {
+                       /* Short read at EOF, and zero fill. */
+                       unsigned int len = old_map_size - offset;
+                       ecode = methods->tread(tdb, offset,
+                                              p + sizeof(offset) + sizeof(length),
+                                              len);
+                       memset(p + sizeof(offset) + sizeof(length) + len, 0,
+                              length - len);
+               } else {
+                       ecode = methods->tread(tdb, offset,
+                                              p + sizeof(offset) + sizeof(length),
+                                              length);
+               }
                if (ecode != TDB_SUCCESS) {
                        free(data);
                        return ecode;
@@ -837,11 +850,6 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
                p += sizeof(offset) + sizeof(length) + length;
        }
 
-       /* and the tailer */
-       tailer = sizeof(*rec) + recovery_max_size;
-       memcpy(p, &tailer, sizeof(tailer));
-       tdb_convert(tdb, p, sizeof(tailer));
-
        /* write the recovery data to the recovery area */
        ecode = methods->twrite(tdb, recovery_offset, data,
                                sizeof(*rec) + recovery_size);