tdb2: use failtest for opening and checking database.
authorRusty Russell <rusty@rustcorp.com.au>
Tue, 1 Mar 2011 12:49:20 +0000 (23:19 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Tue, 1 Mar 2011 12:49:20 +0000 (23:19 +1030)
This is a fairly sophisticated use of failtest:
1) There are a few places where we can inject failures without revealing it
   at the API level, eg. opening /dev/urandom, or allocation failure in logging.
2) We want to be sure that (almost) all failures cause a message to be logged.
3) We need to exit as soon as possible when a failure is injected, to avoid
   combinatorial explosion.
4) We don't want to simply exit on any log message, since we want to be sure
   that cleanup happens.

This test found four different bugs failure paths.  Erk!

ccan/tdb2/_info
ccan/tdb2/test/run-01-new_database.c

index 49131d2edc511d5d03ca7f2e6e1fccfa33ebd1eb..ae740cc7174fd7395fd3e6fba25657da2f7d9edb 100644 (file)
@@ -80,6 +80,7 @@ int main(int argc, char *argv[])
                printf("ccan/compiler\n");
                printf("ccan/build_assert\n");
                printf("ccan/ilog\n");
+               printf("ccan/failtest\n");
                printf("ccan/tally\n");
                return 0;
        }
index ea385b60e325dbcd54eccf742c62f373509cc62f..defba0b00f3a2a3f794b299468b3bc03caa3868d 100644 (file)
@@ -1,3 +1,4 @@
+#include <ccan/failtest/failtest_override.h>
 #include <ccan/tdb2/tdb.c>
 #include <ccan/tdb2/free.c>
 #include <ccan/tdb2/lock.c>
@@ -6,8 +7,115 @@
 #include <ccan/tdb2/transaction.c>
 #include <ccan/tdb2/check.c>
 #include <ccan/tap/tap.h>
+#include <ccan/failtest/failtest.h>
 #include "logging.h"
 
+/* FIXME: Check these! */
+#define INITIAL_TDB_MALLOC     "tdb.c", 182, FAILTEST_MALLOC
+#define LOGGING_MALLOC         "tdb.c", 739, FAILTEST_MALLOC
+#define URANDOM_OPEN           "tdb.c", 49, FAILTEST_OPEN
+#define URANDOM_READ           "tdb.c", 29, FAILTEST_READ
+
+static bool failmatch(const struct failtest_call *call,
+                     const char *file, int line, enum failtest_call_type type)
+{
+       return call->type == type
+               && call->line == line
+               && ((strcmp(call->file, file) == 0)
+                   || (strends(call->file, file)
+                       && (call->file[strlen(call->file) - strlen(file) - 1]
+                           == '/')));
+}
+
+static const struct failtest_call *
+find_repeat(const struct failtest_call *start, const struct failtest_call *end,
+           const struct failtest_call *call)
+{
+       const struct failtest_call *i;
+
+       for (i = start; i < end; i++) {
+               if (failmatch(i, call->file, call->line, call->type))
+                       return i;
+       }
+       return NULL;
+}
+
+static bool is_nonblocking_lock(const struct failtest_call *call)
+{
+       return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK;
+}
+
+/* Some places we soldier on despite errors: only fail them once. */
+static enum failtest_result
+block_repeat_failures(struct failtest_call *history, unsigned num)
+{
+       const struct failtest_call *i, *last = &history[num-1];
+
+       if (failmatch(last, INITIAL_TDB_MALLOC)
+           || failmatch(last, LOGGING_MALLOC)
+           || failmatch(last, URANDOM_OPEN)
+           || failmatch(last, URANDOM_READ)) {
+               if (find_repeat(history, last, last))
+                       return FAIL_DONT_FAIL;
+               return FAIL_PROBE;
+       }
+
+       /* Unlock or non-blocking lock is fail-once. */
+       if (last->type == FAILTEST_FCNTL
+           && last->u.fcntl.arg.fl.l_type == F_UNLCK) {
+               /* Find a previous unlock at this point? */
+               for (i = find_repeat(history, last, last);
+                    i;
+                    i = find_repeat(history, i, last)) {
+                       if (i->u.fcntl.arg.fl.l_type == F_UNLCK)
+                               return FAIL_DONT_FAIL;
+               }
+               return FAIL_PROBE;
+       } else if (is_nonblocking_lock(last)) {
+               /* Find a previous non-blocking lock at this point? */
+               for (i = find_repeat(history, last, last);
+                    i;
+                    i = find_repeat(history, i, last)) {
+                       if (is_nonblocking_lock(i))
+                               return FAIL_DONT_FAIL;
+               }
+               return FAIL_PROBE;
+       }
+
+       return FAIL_OK;
+}
+
+static bool exit_check(struct failtest_call *history, unsigned num)
+{
+       unsigned int i;
+
+       for (i = 0; i < num; i++) {
+               if (!history[i].fail)
+                       continue;
+               /* Failing the /dev/urandom open doesn't count: we fall back. */
+               if (failmatch(&history[i], URANDOM_OPEN))
+                       continue;
+
+               /* Similarly with read fail. */
+               if (failmatch(&history[i], URANDOM_READ))
+                       continue;
+
+               /* Initial allocation of tdb doesn't log. */
+               if (failmatch(&history[i], INITIAL_TDB_MALLOC))
+                       continue;
+
+               /* We don't block "failures" on non-blocking locks. */
+               if (is_nonblocking_lock(&history[i]))
+                       continue;
+
+               if (!tap_log_messages)
+                       diag("We didn't log for %u (%s:%u)",
+                            i, history[i].file, history[i].line);
+               return tap_log_messages != 0;
+       }
+       return true;
+}
+
 int main(int argc, char *argv[])
 {
        unsigned int i;
@@ -16,16 +124,23 @@ int main(int argc, char *argv[])
                        TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, 
                        TDB_NOMMAP|TDB_CONVERT };
 
-       plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+       failtest_init(argc, argv);
+       failtest_hook = block_repeat_failures;
+       failtest_exit_check = exit_check;
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
        for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
                tdb = tdb_open("run-new_database.tdb", flags[i],
                               O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
-               ok1(tdb);
+               if (!ok1(tdb))
+                       failtest_exit(exit_status());
                if (tdb) {
-                       ok1(tdb_check(tdb, NULL, NULL) == 0);
+                       bool ok = ok1(tdb_check(tdb, NULL, NULL) == 0);
                        tdb_close(tdb);
+                       if (!ok)
+                               failtest_exit(exit_status());
                }
+               if (!ok1(tap_log_messages == 0))
+                       break;
        }
-       ok1(tap_log_messages == 0);
-       return exit_status();
+       failtest_exit(exit_status());
 }