tdb: don't free old recovery area when expanding if already at EOF.
authorRusty Russell <rusty@rustcorp.com.au>
Tue, 20 Dec 2011 07:10:57 +0000 (17:40 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Tue, 20 Dec 2011 07:10:57 +0000 (17:40 +1030)
We allocate a new recovery area by expanding the file.  But if the
recovery area is already at the end of file (as shown in at least one
client case), we can simply expand the record, rather than freeing it
and creating a new one.

ccan/tdb/test/run-transaction-expand.c
ccan/tdb/transaction.c

index a46669d6458bb56f98b2e872ae76255da8badc7e..26426b2f1fb2092c0b4fa03ffd291445aba02d54 100644 (file)
@@ -55,7 +55,7 @@ int main(int argc, char *argv[])
        struct tdb_record rec;
        tdb_off_t off;
 
-       plan_tests(2);
+       plan_tests(4);
        tdb = tdb_open_ex("run-transaction-expand.tdb",
                          1024, TDB_CLEAR_IF_FIRST,
                          O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL);
@@ -76,6 +76,30 @@ int main(int argc, char *argv[])
        /* We should only be about 5 times larger than largest record. */
        ok1(tdb->map_size < 6 * i * getpagesize());
        tdb_close(tdb);
+
+       tdb = tdb_open_ex("run-transaction-expand.tdb",
+                         1024, TDB_CLEAR_IF_FIRST,
+                         O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL);
+       ok1(tdb);
+
+       data.dsize = 0;
+
+       /* Simulate a slowly growing record, repacking to keep
+        * recovery area at end. */
+       for (i = 0; i < 1000; i++) {
+               write_record(tdb, getpagesize(), &data);
+               if (i % 10 == 0)
+                       tdb_repack(tdb);
+       }
+
+       tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &off);
+       tdb_read(tdb, off, &rec, sizeof(rec), DOCONV());
+       diag("TDB size = %zu, recovery = %u-%u",
+            (size_t)tdb->map_size, off, off + sizeof(rec) + rec.rec_len);
+
+       /* We should only be about 4 times larger than largest record. */
+       ok1(tdb->map_size < 5 * i * getpagesize());
+       tdb_close(tdb);
        free(data.dptr);
 
        return exit_status();
index 525e0642d2b6088bb92d1a4bfd1f8b1232d4a42f..e34dee05c6bd80d148e6965ba1908e9265d7b01a 100644 (file)
@@ -653,7 +653,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb,
 {
        struct tdb_record rec;
        const struct tdb_methods *methods = tdb->transaction->io_methods;
-       tdb_off_t recovery_head;
+       tdb_off_t recovery_head, new_end;
 
        if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery head\n"));
@@ -676,6 +676,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb,
 
        *recovery_size = tdb_recovery_size(tdb);
 
+       /* Existing recovery area? */
        if (recovery_head != 0 && *recovery_size <= rec.rec_len) {
                /* it fits in the existing area */
                *recovery_max_size = rec.rec_len;
@@ -683,33 +684,45 @@ static int tdb_recovery_allocate(struct tdb_context *tdb,
                return 0;
        }
 
-       /* we need to free up the old recovery area, then allocate a
-          new one at the end of the file. Note that we cannot use
-          tdb_allocate() to allocate the new one as that might return
-          us an area that is being currently used (as of the start of
-          the transaction) */
-       if (recovery_head != 0) {
-               if (tdb_free(tdb, recovery_head, &rec) == -1) {
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to free previous recovery area\n"));
-                       return -1;
+       /* If recovery area in middle of file, we need a new one. */
+       if (recovery_head == 0
+           || recovery_head + sizeof(rec) + rec.rec_len != tdb->map_size) {
+               /* we need to free up the old recovery area, then allocate a
+                  new one at the end of the file. Note that we cannot use
+                  tdb_allocate() to allocate the new one as that might return
+                  us an area that is being currently used (as of the start of
+                  the transaction) */
+               if (recovery_head) {
+                       if (tdb_free(tdb, recovery_head, &rec) == -1) {
+                               TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                                        "tdb_recovery_allocate: failed to"
+                                        " free previous recovery area\n"));
+                               return -1;
+                       }
+
+                       /* the tdb_free() call might have increased
+                        * the recovery size */
+                       *recovery_size = tdb_recovery_size(tdb);
                }
+
+               /* New head will be at end of file. */
+               recovery_head = tdb->map_size;
        }
 
-       /* the tdb_free() call might have increased the recovery size */
-       *recovery_size = tdb_recovery_size(tdb);
+       /* Now we know where it will be. */
+       *recovery_offset = recovery_head;
 
-       /* round up to a multiple of page size */
+       /* Expand by more than we need, so we don't do it often. */
        *recovery_max_size = tdb_expand_adjust(tdb->map_size,
                                               *recovery_size,
                                               tdb->page_size)
                - sizeof(rec);
 
-       *recovery_offset = tdb->map_size;
-       recovery_head = *recovery_offset;
+       new_end = recovery_head + sizeof(rec) + *recovery_max_size;
 
        if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, 
-                                    (tdb->map_size - tdb->transaction->old_map_size) +
-                                    sizeof(rec) + *recovery_max_size) == -1) {
+                                    new_end - tdb->transaction->old_map_size)
+           == -1) {
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to create recovery area\n"));
                return -1;
        }