tdb: new test for dying during a transaction.
authorRusty Russell <rusty@rustcorp.com.au>
Thu, 4 Feb 2010 01:45:12 +0000 (12:15 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Thu, 4 Feb 2010 01:45:12 +0000 (12:15 +1030)
This demonstrates some serious failings, which get cleaned up in
following patches.

12 files changed:
ccan/tdb/test/external-transaction.c
ccan/tdb/test/external-transaction.h
ccan/tdb/test/run-3G-file.c
ccan/tdb/test/run-bad-tdb-header.c
ccan/tdb/test/run-die-during-transaction.c [new file with mode: 0644]
ccan/tdb/test/run-endian.c
ccan/tdb/test/run-nested-transactions.c
ccan/tdb/test/run-nested-traverse.c
ccan/tdb/test/run-open-during-transaction.c
ccan/tdb/test/run-traverse-in-transaction.c
ccan/tdb/test/run-zero-append.c
ccan/tdb/test/run.c

index d899a6c669b01ef21621ae853437d561cd49689c..37dd50841eb14b66d00d8b01dd435a5eaaa9a681 100644 (file)
@@ -9,8 +9,12 @@
 #include <string.h>
 #include <errno.h>
 #include <ccan/tdb/tdb.h>
+#include <ccan/tdb/tdb_private.h>
 #include <ccan/tap/tap.h>
 #include <stdio.h>
+#include <stdarg.h>
+
+static struct tdb_context *tdb;
 
 static volatile sig_atomic_t alarmed;
 static void do_alarm(int signum)
@@ -18,20 +22,61 @@ static void do_alarm(int signum)
        alarmed++;
 }
 
+static void taplog(struct tdb_context *tdb,
+                  enum tdb_debug_level level,
+                  const char *fmt, ...)
+{
+       va_list ap;
+       char line[200];
+
+       va_start(ap, fmt);
+       vsprintf(line, fmt, ap);
+       va_end(ap);
+
+       diag("external: %s", line);
+}
+
 static int do_operation(enum operation op, const char *name)
 {
+       struct tdb_logging_context logctx = { taplog, NULL };
+
        TDB_DATA k = { .dptr = (void *)"a", .dsize = 1 };
        TDB_DATA d = { .dptr = (void *)"b", .dsize = 1 };
-       struct tdb_context *tdb;
 
-       tdb = tdb_open(name, 0, op == OPEN_WITH_CLEAR_IF_FIRST ?
-                      TDB_CLEAR_IF_FIRST : TDB_DEFAULT, O_RDWR, 0);
-       if (!tdb)
-               return -1;
+       if (op <= KEEP_OPENED) {
+               tdb = tdb_open_ex(name, 0, op == OPEN_WITH_CLEAR_IF_FIRST ?
+                                 TDB_CLEAR_IF_FIRST : TDB_DEFAULT, O_RDWR, 0,
+                                 &logctx, NULL);
+               if (!tdb)
+                       return -1;
+       }
 
-       if (op == OPEN || op == OPEN_WITH_CLEAR_IF_FIRST) {
-               tdb_close(tdb);
+       if (op == KEEP_OPENED) {
                return 0;
+       } else if (op == OPEN || op == OPEN_WITH_CLEAR_IF_FIRST || op == CLOSE) {
+               tdb_close(tdb);
+               tdb = NULL;
+               return 1;
+       } else if (op == STORE_KEEP_OPENED) {
+               if (tdb_store(tdb, k, d, 0) != 0)
+                       return -2;
+               return 1;
+       } else if (op == FETCH_KEEP_OPENED) {
+               TDB_DATA ret;
+               ret = tdb_fetch(tdb, k);
+               if (ret.dptr == NULL) {
+                       if (tdb_error(tdb) == TDB_ERR_NOEXIST)
+                               return 1;
+                       return -3;
+               }
+               if (ret.dsize != 1 || *(char *)ret.dptr != 'b')
+                       return -4;
+               free(ret.dptr);
+               return 1;
+       } else if (op == CHECK_KEEP_OPENED) {
+               return tdb_check(tdb, NULL, 0) == 0;
+       } else if (op == NEEDS_RECOVERY_KEEP_OPENED) {
+               return tdb_maybe_needs_recovery(tdb);
        }
 
        alarmed = 0;
@@ -41,21 +86,29 @@ static int do_operation(enum operation op, const char *name)
        if (tdb_transaction_start(tdb) != 0)
                goto maybe_alarmed;
 
+       alarm(0);
        if (tdb_store(tdb, k, d, 0) != 0) {
                tdb_transaction_cancel(tdb);
                tdb_close(tdb);
+               tdb = NULL;
                return -2;
        }
 
        if (tdb_transaction_commit(tdb) == 0) {
                tdb_delete(tdb, k);
-               tdb_close(tdb);
+               if (op != TRANSACTION_KEEP_OPENED) {
+                       tdb_close(tdb);
+                       tdb = NULL;
+               }
                return 1;
        }
 
        tdb_delete(tdb, k);
 maybe_alarmed:
-       tdb_close(tdb);
+       if (op != TRANSACTION_KEEP_OPENED) {
+               tdb_close(tdb);
+               tdb = NULL;
+       }
        if (alarmed)
                return 0;
        return -3;
@@ -68,7 +121,7 @@ struct agent {
 /* Do this before doing any tdb stuff.  Return handle, or NULL. */
 struct agent *prepare_external_agent(void)
 {
-       int pid;
+       int pid, ret;
        int command[2], response[2];
        struct sigaction act = { .sa_handler = do_alarm };
        char name[1+PATH_MAX];
@@ -94,7 +147,7 @@ struct agent *prepare_external_agent(void)
        close(response[0]);
        sigaction(SIGALRM, &act, NULL);
 
-       while (read(command[0], name, sizeof(name)) != 0) {
+       while ((ret = read(command[0], name, sizeof(name))) > 0) {
                int result;
 
                result = do_operation(name[0], name+1);
@@ -102,11 +155,12 @@ struct agent *prepare_external_agent(void)
                    != sizeof(result))
                        err(1, "Writing response");
        }
+       diag("external: read %i: %s", ret, strerror(errno));
        exit(0);
 }
 
 /* Ask the external agent to try to do an operation. */
-bool external_agent_operation(struct agent *agent,
+int external_agent_operation(struct agent *agent,
                              enum operation op, const char *tdbname)
 {
        int res;
index 843b697917522f13690352cbe03798fb253a273e..c0197b194a6bb04f8ee4df0182482e331faf6d4c 100644 (file)
@@ -6,15 +6,22 @@ enum operation {
        OPEN,
        OPEN_WITH_CLEAR_IF_FIRST,
        TRANSACTION,
+       KEEP_OPENED,
+       TRANSACTION_KEEP_OPENED,
+       FETCH_KEEP_OPENED,
+       STORE_KEEP_OPENED,
+       CHECK_KEEP_OPENED,
+       NEEDS_RECOVERY_KEEP_OPENED,
+       CLOSE,
 };
 
 /* Do this before doing any tdb stuff.  Return handle, or -1. */
 struct agent *prepare_external_agent(void);
 
 /* Ask the external agent to try to do an operation. */
-bool external_agent_operation(struct agent *handle,
-                             enum operation op,
-                             const char *tdbname);
+int external_agent_operation(struct agent *handle,
+                            enum operation op,
+                            const char *tdbname);
 
 /* Ask... */
 void external_agent_operation_start(struct agent *agent,
index 11923458cdbd2a784a97c1f8d06801763f932647..0e24e890562f7ad8ad6e2f71006b9cb2facc7601 100644 (file)
@@ -10,6 +10,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>
index 08753402470e33f993c87e25a8f5b78f43fc3a98..3f9b66913870308796de478b36482784f4d4cff1 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>
diff --git a/ccan/tdb/test/run-die-during-transaction.c b/ccan/tdb/test/run-die-during-transaction.c
new file mode 100644 (file)
index 0000000..7e61b48
--- /dev/null
@@ -0,0 +1,280 @@
+#define _XOPEN_SOURCE 500
+#include <unistd.h>
+static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
+static ssize_t write_check(int fd, const void *buf, size_t count);
+static int fcntl_check(int fd, int cmd, ... /* arg */ );
+static int ftruncate_check(int fd, off_t length);
+
+#define pwrite pwrite_check
+#define write write_check
+#define fcntl fcntl_check
+#define ftruncate ftruncate_check
+
+#include <ccan/tdb/tdb.h>
+#include <ccan/tdb/io.c>
+#include <ccan/tdb/tdb.c>
+#include <ccan/tdb/lock.c>
+#include <ccan/tdb/freelist.c>
+#include <ccan/tdb/traverse.c>
+#include <ccan/tdb/transaction.c>
+#include <ccan/tdb/error.c>
+#include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
+#include <ccan/tap/tap.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <err.h>
+#include <setjmp.h>
+#include "external-transaction.h"
+
+#undef write
+#undef pwrite
+#undef fcntl
+#undef ftruncate
+
+static bool in_transaction;
+static bool suppress_logging;
+static int target, current;
+static jmp_buf jmpbuf;
+#define TEST_DBNAME "/tmp/test7.tdb"
+
+/* We save the locks so we can reaquire them. */
+struct lock {
+       struct lock *next;
+       off_t off;
+       unsigned int len;
+       int type;
+};
+static struct lock *locks;
+
+static void taplog(struct tdb_context *tdb,
+                  enum tdb_debug_level level,
+                  const char *fmt, ...)
+{
+       va_list ap;
+       char line[200];
+
+       if (suppress_logging)
+               return;
+
+       va_start(ap, fmt);
+       vsprintf(line, fmt, ap);
+       va_end(ap);
+
+       diag("%s", line);
+}
+
+static void check_file_contents(int fd)
+{
+       if (current++ == target) {
+               longjmp(jmpbuf, 1);
+       }
+}
+
+static ssize_t pwrite_check(int fd,
+                           const void *buf, size_t count, off_t offset)
+{
+       ssize_t ret;
+
+       if (in_transaction)
+               check_file_contents(fd);
+
+       ret = pwrite(fd, buf, count, offset);
+       if (ret != count)
+               return ret;
+
+       if (in_transaction)
+               check_file_contents(fd);
+       return ret;
+}
+
+static ssize_t write_check(int fd, const void *buf, size_t count)
+{
+       ssize_t ret;
+
+       if (in_transaction)
+               check_file_contents(fd);
+
+       ret = write(fd, buf, count);
+       if (ret != count)
+               return ret;
+
+       if (in_transaction)
+               check_file_contents(fd);
+       return ret;
+}
+
+extern int fcntl(int fd, int cmd, ... /* arg */ );
+
+static int fcntl_check(int fd, int cmd, ... /* arg */ )
+{
+       va_list ap;
+       int ret, arg3;
+       struct flock *fl;
+
+       if (cmd != F_SETLK && cmd != F_SETLKW) {
+               /* This may be totally bogus, but we don't know in general. */
+               va_start(ap, cmd);
+               arg3 = va_arg(ap, int);
+               va_end(ap);
+
+               return fcntl(fd, cmd, arg3);
+       }
+
+       va_start(ap, cmd);
+       fl = va_arg(ap, struct flock *);
+       va_end(ap);
+
+       ret = fcntl(fd, cmd, fl);
+       if (ret == 0) {
+               if (fl->l_type == F_UNLCK) {
+                       struct lock **l;
+                       struct lock *old = NULL;
+                       
+                       for (l = &locks; *l; l = &(*l)->next) {
+                               if ((*l)->off == fl->l_start
+                                   && (*l)->len == fl->l_len) {
+                                       old = *l;
+                                       *l = (*l)->next;
+                                       free(old);
+                                       break;
+                               }
+                       }
+                       if (!old)
+                               errx(1, "Unknown lock");
+               } else {
+                       struct lock *new = malloc(sizeof *new);
+                       new->off = fl->l_start;
+                       new->len = fl->l_len;
+                       new->type = fl->l_type;
+                       new->next = locks;
+                       locks = new;
+               }
+       }
+
+       if (in_transaction && fl->l_type == F_UNLCK)
+               check_file_contents(fd);
+       return ret;
+}
+
+static int ftruncate_check(int fd, off_t length)
+{
+       int ret;
+
+       if (in_transaction)
+               check_file_contents(fd);
+
+       ret = ftruncate(fd, length);
+
+       if (in_transaction)
+               check_file_contents(fd);
+       return ret;
+}
+
+static bool test_death(enum operation op, struct agent *agent)
+{
+       struct tdb_context *tdb;
+       TDB_DATA key, data;
+       struct tdb_logging_context logctx = { taplog, NULL };
+       int needed_recovery = 0;
+
+       current = target = 0;
+reset:
+       if (setjmp(jmpbuf) != 0) {
+               /* We're partway through.  Simulate our death. */
+               close(tdb->fd);
+               in_transaction = false;
+
+               if (external_agent_operation(agent, NEEDS_RECOVERY_KEEP_OPENED,
+                                            ""))
+                       needed_recovery++;
+
+               if (external_agent_operation(agent, op, "") != 1) {
+                       diag("Step %u op failed", current);
+                       return false;
+               }
+
+               if (external_agent_operation(agent, NEEDS_RECOVERY_KEEP_OPENED,
+                                            "")) {
+                       diag("Still needs recovery after step %u", current);
+                       return false;
+               }
+
+               if (external_agent_operation(agent, CHECK_KEEP_OPENED, "")
+                   != 1) {
+                       diag("Step %u check failed", current);
+                       return false;
+               }
+
+               external_agent_operation(agent, CLOSE, "");
+               /* Suppress logging as this tries to use closed fd. */
+               suppress_logging = true;
+               tdb_close(tdb);
+               suppress_logging = false;
+               target++;
+               current = 0;
+               goto reset;
+       }
+
+       unlink(TEST_DBNAME);
+       tdb = tdb_open_ex(TEST_DBNAME, 1024, TDB_NOMMAP,
+                         O_CREAT|O_TRUNC|O_RDWR, 0600, &logctx, NULL);
+
+       if (external_agent_operation(agent, KEEP_OPENED, TEST_DBNAME) != 0)
+               errx(1, "Agent failed to open?");
+
+       if (tdb_transaction_start(tdb) != 0)
+               return false;
+
+       in_transaction = true;
+       key.dsize = strlen("hi");
+       key.dptr = (void *)"hi";
+       data.dptr = (void *)"world";
+       data.dsize = strlen("world");
+
+       if (tdb_store(tdb, key, data, TDB_INSERT) != 0)
+               return false;
+       if (tdb_transaction_commit(tdb) != 0)
+               return false;
+
+       in_transaction = false;
+
+       /* We made it! */
+       diag("Completed %u runs", current);
+       tdb_close(tdb);
+       external_agent_operation(agent, CLOSE, "");
+
+       ok1(needed_recovery);
+       return true;
+}
+
+int main(int argc, char *argv[])
+{
+       enum operation ops[] = { FETCH_KEEP_OPENED,
+                                STORE_KEEP_OPENED,
+                                TRANSACTION_KEEP_OPENED };
+       struct agent *agent;
+       int i;
+
+       plan_tests(6);
+       agent = prepare_external_agent();
+       if (!agent)
+               err(1, "preparing agent");
+
+       /* Nice ourselves down: we can't tell the difference between agent
+        * blocking on lock, and agent not scheduled. */
+       nice(15);
+
+       for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) {
+               diag("Testing %s after death",
+                    ops[i] == TRANSACTION_KEEP_OPENED ? "transaction"
+                    : ops[i] == FETCH_KEEP_OPENED ? "fetch"
+                    : ops[i] == STORE_KEEP_OPENED ? "store"
+                    : NULL);
+
+               ok1(test_death(ops[i], agent));
+       }
+
+       return exit_status();
+}
index 6583c53ce0b06c778e7889cab831281a410d3d82..25335a7f5710c71482bf1107fde4242e8e610cfc 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>
index 50da919e24001db187bd17d1cee3d997ef8125e3..fd28895527d6e9d5e159b4f826d4fc3b4413603e 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>
index 4470610a37c342b876f119e7ec4b56910f2a3bf0..054af0d21bc3c5fb62c8b4d34a4632f294d9f97c 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>
index 5119004b97fb6d2af561f83ced86fc9c4afa3af8..27356e785f6b00c59539887b5e82ae1a641c77a6 100644 (file)
@@ -19,6 +19,7 @@ static int ftruncate_check(int fd, off_t length);
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>
@@ -80,7 +81,7 @@ static void check_for_agent(int fd, bool block)
        if (!external_agent_operation_check(agent, block, &res))
                return;
 
-       if (res != 0)
+       if (res != 1)
                err(1, "Agent failed open");
        agent_pending = false;
 
index 37a4f6d3ffd9ee760fef70de4746fc922f66285b..0a74055b6b0b778f35f0d81d327e31acd1723d64 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>
index 35f4fbe32cf4d824172ac169d2a95af7d3b50810..999fe1d7f406da41a59ddb383fabb460c3a1d30b 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>
index 53057433d50037e922fb49a4fdf9ac8cecc67471..f7fc2a0a08d8edb7d2ace398691858d0cce759d6 100644 (file)
@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>