]> git.ozlabs.org Git - ccan/blobdiff - ccan/tdb2/transaction.c
tdb2: reduce transaction before writing to recovery area.
[ccan] / ccan / tdb2 / transaction.c
index 7a2ebbe9a3bb111e0a2c7ea2c4573a6ac2f82b44..4bdc3f32d019b6abe6e9519be298b027a7b804e6 100644 (file)
@@ -757,6 +757,44 @@ static void set_recovery_header(struct tdb_recovery_record *rec,
        rec->eof = oldsize;
 }
 
+static unsigned int same(const unsigned char *new,
+                        const unsigned char *old,
+                        unsigned int length)
+{
+       unsigned int i;
+
+       for (i = 0; i < length; i++) {
+               if (new[i] != old[i])
+                       break;
+       }
+       return i;
+}
+
+static unsigned int different(const unsigned char *new,
+                             const unsigned char *old,
+                             unsigned int length,
+                             unsigned int min_same,
+                             unsigned int *samelen)
+{
+       unsigned int i;
+
+       *samelen = 0;
+       for (i = 0; i < length; i++) {
+               if (new[i] == old[i]) {
+                       (*samelen)++;
+               } else {
+                       if (*samelen >= min_same) {
+                               return i - *samelen;
+                       }
+                       *samelen = 0;
+               }
+       }
+
+       if (*samelen < min_same)
+               *samelen = 0;
+       return length - *samelen;
+}
+
 /*
   setup the recovery data that will be used on a crash during commit
 */
@@ -791,9 +829,6 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
        }
 
        rec = (struct tdb_recovery_record *)data;
-       set_recovery_header(rec, TDB_RECOVERY_INVALID_MAGIC,
-                           recovery_size, recovery_max_size, old_map_size);
-       tdb_convert(tdb, rec, sizeof(*rec));
 
        /* build the recovery data into a single blob to allow us to do a single
           large write, which should be more efficient */
@@ -801,6 +836,8 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
        for (i=0;i<tdb->transaction->num_blocks;i++) {
                tdb_off_t offset;
                tdb_len_t length;
+               unsigned int off;
+               unsigned char buffer[PAGESIZE];
 
                if (tdb->transaction->blocks[i] == NULL) {
                        continue;
@@ -815,6 +852,7 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
                if (offset >= old_map_size) {
                        continue;
                }
+
                if (offset + length > tdb->file->map_size) {
                        free(data);
                        return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
@@ -822,40 +860,60 @@ static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb,
                                          " transaction data over new region"
                                          " boundary");
                }
-               memcpy(p, &offset, sizeof(offset));
-               memcpy(p + sizeof(offset), &length, sizeof(length));
-               tdb_convert(tdb, p, sizeof(offset) + sizeof(length));
-
-               /* the recovery area contains the old data, not the
-                  new data, so we have to call the original tdb_read
-                  method to get it */
-               ecode = methods->tread(tdb, offset,
-                                      p + sizeof(offset) + sizeof(length),
-                                      length);
+               if (offset + length > old_map_size) {
+                       /* Short read at EOF. */
+                       length = old_map_size - offset;
+               }
+               ecode = methods->tread(tdb, offset, buffer, length);
                if (ecode != TDB_SUCCESS) {
                        free(data);
                        return ecode;
                }
-               p += sizeof(offset) + sizeof(length) + length;
+
+               /* Skip over anything the same at the start. */
+               off = same(tdb->transaction->blocks[i], buffer, length);
+               offset += off;
+
+               while (off < length) {
+                       tdb_len_t len;
+                       unsigned int samelen;
+
+                       len = different(tdb->transaction->blocks[i] + off,
+                                       buffer + off, length - off,
+                                       sizeof(offset) + sizeof(len) + 1,
+                                       &samelen);
+
+                       memcpy(p, &offset, sizeof(offset));
+                       memcpy(p + sizeof(offset), &len, sizeof(len));
+                       tdb_convert(tdb, p, sizeof(offset) + sizeof(len));
+                       p += sizeof(offset) + sizeof(len);
+                       memcpy(p, buffer + off, len);
+                       p += len;
+                       off += len + samelen;
+                       offset += len + samelen;
+               }
        }
 
+       /* Now we know size, set up rec header. */
+       set_recovery_header(rec, TDB_RECOVERY_INVALID_MAGIC,
+                           p - data - sizeof(*rec),
+                           recovery_max_size, old_map_size);
+       tdb_convert(tdb, rec, sizeof(*rec));
+
        /* write the recovery data to the recovery area */
-       ecode = methods->twrite(tdb, recovery_offset, data,
-                               sizeof(*rec) + recovery_size);
+       ecode = methods->twrite(tdb, recovery_offset, data, p - data);
        if (ecode != TDB_SUCCESS) {
                free(data);
                return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
                                  "tdb_transaction_setup_recovery:"
                                  " failed to write recovery data");
        }
-       transaction_write_existing(tdb, recovery_offset, data,
-                                  sizeof(*rec) + recovery_size);
+       transaction_write_existing(tdb, recovery_offset, data, p - data);
 
        /* as we don't have ordered writes, we have to sync the recovery
           data before we update the magic to indicate that the recovery
           data is present */
-       ecode = transaction_sync(tdb, recovery_offset,
-                                sizeof(*rec) + recovery_size);
+       ecode = transaction_sync(tdb, recovery_offset, p - data);
        if (ecode != TDB_SUCCESS) {
                free(data);
                return ecode;