Import 898b5edfe757cb145960b8f3631029bfd5592119 from ctdb:
authorRusty Russell <rusty@rustcorp.com.au>
Tue, 2 Feb 2010 02:00:40 +0000 (12:30 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Tue, 2 Feb 2010 02:00:40 +0000 (12:30 +1030)
Author: Volker Lendecke <vl@samba.org>  2010-01-30 03:51:09

    tdb: fix an early release of the global lock that can cause data corruption

    There was a bug in tdb where the

                    tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);

    (ending the transaction-"mutex") was done before the

                            /* remove the recovery marker */

    This means that when a transaction is committed there is a window where another
    opener of the file sees the transaction marker while the transaction committer
    is still fully functional and working on it. This led to transaction being
    rolled back by that second opener of the file while transaction_commit() gave
    no error to the caller.

    This patch moves the F_UNLCK to after the recovery marker was removed, closing
    this window.

ccan/tdb/test/external-transaction.c
ccan/tdb/transaction.c

index 1211da9e76d5c1d54702f3725fc34e30eb8d9658..d899a6c669b01ef21621ae853437d561cd49689c 100644 (file)
@@ -10,6 +10,7 @@
 #include <errno.h>
 #include <ccan/tdb/tdb.h>
 #include <ccan/tap/tap.h>
+#include <stdio.h>
 
 static volatile sig_atomic_t alarmed;
 static void do_alarm(int signum)
index 742f188f500dbd4bba8dcbe4f6d080a2663ca7ab..c5580f205d39c074fbbe52f9f5545659638112c3 100644 (file)
@@ -135,6 +135,9 @@ struct tdb_transaction {
        bool prepared;
        tdb_off_t magic_offset;
 
+       /* set when the GLOBAL_LOCK has been taken */
+       bool global_lock_taken;
+
        /* old file size before transaction */
        tdb_len_t old_map_size;
 
@@ -501,6 +504,11 @@ int _tdb_transaction_cancel(struct tdb_context *tdb, int ltype)
                }
        }
 
+       if (tdb->transaction->global_lock_taken) {
+               tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
+               tdb->transaction->global_lock_taken = false;
+       }
+
        /* remove any global lock created during the transaction */
        if (tdb->global_lock.count != 0) {
                tdb_brunlock(tdb, tdb->global_lock.ltype,
@@ -960,11 +968,12 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                return -1;
        }
 
+       tdb->transaction->global_lock_taken = true;
+
        if (!(tdb->flags & TDB_NOSYNC)) {
                /* write the recovery data to the end of the file */
                if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n"));
-                       tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
                        _tdb_transaction_cancel(tdb, F_WRLCK);
                        return -1;
                }
@@ -979,7 +988,6 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                                             tdb->transaction->old_map_size) == -1) {
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: expansion failed\n"));
-                       tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
                        _tdb_transaction_cancel(tdb, F_WRLCK);
                        return -1;
                }
@@ -1069,7 +1077,6 @@ int tdb_transaction_commit(struct tdb_context *tdb)
                        tdb_transaction_recover(tdb); 
 
                        _tdb_transaction_cancel(tdb, F_WRLCK);
-                       tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
 
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n"));
                        return -1;
@@ -1085,8 +1092,6 @@ int tdb_transaction_commit(struct tdb_context *tdb)
                return -1;
        }
 
-       tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
-
        /*
          TODO: maybe write to some dummy hdr field, or write to magic
          offset without mmap, before the last sync, instead of the