]> git.ozlabs.org Git - ccan/blobdiff - ccan/failtest/failtest.c
failtest: handle EINTR from poll.
[ccan] / ccan / failtest / failtest.c
index 1874d620286d697c9c063319f8435e52dd5823aa..7a6f5220e60cd5ed7d1035922b267751c999a84e 100644 (file)
@@ -1,3 +1,4 @@
+#include "config.h"
 #include <stdarg.h>
 #include <string.h>
 #include <stdio.h>
 #include <ccan/failtest/failtest.h>
 #include <ccan/build_assert/build_assert.h>
 
-bool (*failtest_hook)(struct failtest_call *history, unsigned num)
-= failtest_default_hook;
+enum failtest_result (*failtest_hook)(struct failtest_call *, unsigned);
 
 static int tracefd = -1;
 
 unsigned int failtest_timeout_ms = 20000;
 
 const char *failpath;
+const char *debugpath;
 
 enum info_type {
        WRITE,
@@ -47,6 +48,7 @@ static struct failtest_call *history = NULL;
 static unsigned int history_num = 0;
 static int control_fd = -1;
 static struct timeval start;
+static unsigned int probe_count = 0;
 
 static struct write_call *child_writes = NULL;
 static unsigned int child_writes_num = 0;
@@ -55,7 +57,7 @@ static pid_t lock_owner;
 static struct lock_info *locks = NULL;
 static unsigned int lock_num = 0;
 
-static const char info_to_arg[] = "mceoprwf";
+static const char info_to_arg[] = "mceoxprwf";
 
 /* Dummy call used for failtest_undo wrappers. */
 static struct failtest_call unrecorded_call;
@@ -82,13 +84,9 @@ static struct failtest_call *add_history_(enum failtest_call_type type,
 #define add_history(type, file, line, elem) \
        add_history_((type), (file), (line), (elem), sizeof(*(elem)))
 
+/* We do a fake call inside a sizeof(), to check types. */
 #define set_cleanup(call, clean, type)                 \
-       (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL)), (clean))
-
-bool failtest_default_hook(struct failtest_call *history, unsigned num)
-{
-       return true;
-}
+       (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
 
 static bool read_write_info(int fd)
 {
@@ -146,11 +144,23 @@ static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
        exit(1);
 }
 
+static void trace(const char *fmt, ...)
+{
+       va_list ap;
+
+       if (tracefd == -1)
+               return;
+
+       va_start(ap, fmt);
+       vdprintf(tracefd, fmt, ap);
+       va_end(ap);
+}
+
 static pid_t child;
 
-static void hand_down(int signal)
+static void hand_down(int signum)
 {
-       kill(child, signal);
+       kill(child, signum);
 }
 
 static void release_locks(void)
@@ -219,18 +229,6 @@ static void get_locks(void)
        lock_owner = getpid();
 }
 
-static void trace_str(const char *str)
-{
-       ssize_t ret;
-
-       while ((ret = write(tracefd, str, strlen(str))) <= 0) {
-               str += ret;
-               if (!*str)
-                       return;
-       }
-       err(1, "Writing trace.");
-}
-
 struct saved_file {
        struct saved_file *next;
        int fd;
@@ -304,6 +302,48 @@ static void restore_files(struct saved_file *s)
        }
 }
 
+/* Free up memory, so valgrind doesn't report leaks. */
+static void free_everything(void)
+{
+       unsigned int i;
+
+       /* We don't do this in cleanup: needed even for failed opens. */
+       for (i = 0; i < history_num; i++) {
+               if (history[i].type == FAILTEST_OPEN)
+                       free((char *)history[i].u.open.pathname);
+       }
+       free(history);
+}
+
+static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
+{
+       int i;
+
+       /* For children, we don't care if they "failed" the testing. */
+       if (control_fd != -1)
+               status = 0;
+
+       if (forced_cleanup)
+               history_num--;
+
+       /* Cleanup everything, in reverse order. */
+       for (i = history_num - 1; i >= 0; i--) {
+               if (!history[i].cleanup)
+                       continue;
+               if (!forced_cleanup) {
+                       printf("Leak at %s:%u: --failpath=%s\n",
+                              history[i].file, history[i].line,
+                              failpath_string());
+                       status = 1;
+               }
+               history[i].cleanup(&history[i].u);
+       }
+
+       free_everything();
+       tell_parent(SUCCESS);
+       exit(status);
+}
+
 static bool should_fail(struct failtest_call *call)
 {
        int status;
@@ -313,6 +353,10 @@ static bool should_fail(struct failtest_call *call)
        size_t outlen = 0;
        struct saved_file *files;
 
+       /* Are we probing? */
+       if (probe_count && --probe_count == 0)
+               failtest_cleanup(true, 0);
+
        if (call == &unrecorded_call)
                return false;
 
@@ -321,17 +365,56 @@ static bool should_fail(struct failtest_call *call)
                if (*failpath == '+')
                        failpath = NULL;
                else {
-                       if (tolower(*failpath) != info_to_arg[call->type])
+                       if (tolower((unsigned char)*failpath)
+                           != info_to_arg[call->type])
                                errx(1, "Failpath expected '%c' got '%c'\n",
                                     info_to_arg[call->type], *failpath);
-                       call->fail = isupper(*(failpath++));
+                       call->fail = isupper((unsigned char)*(failpath++));
                        return call->fail;
                }
        }
 
-       if (!failtest_hook(history, history_num)) {
-               call->fail = false;
-               return false;
+       /* Attach debugger if they asked for it. */
+       if (debugpath && history_num == strlen(debugpath)) {
+               unsigned int i;
+
+               for (i = 0; i < history_num; i++) {
+                       unsigned char c = info_to_arg[history[i].type];
+                       if (history[i].fail)
+                               c = toupper(c);
+                       if (c != debugpath[i])
+                               break;
+               }
+               if (i == history_num) {
+                       char str[80];
+
+                       /* Don't timeout. */
+                       signal(SIGUSR1, SIG_IGN);
+                       sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
+                               getpid(), getpid());
+                       system(str);
+                       sleep(5);
+               }
+       }
+
+       if (failtest_hook) {
+               switch (failtest_hook(history, history_num)) {
+               case FAIL_OK:
+                       break;
+               case FAIL_DONT_FAIL:
+                       call->fail = false;
+                       return false;
+               case FAIL_PROBE:
+                       /* Already down probe path?  Stop now. */
+                       if (probe_count)
+                               failtest_cleanup(true, 0);
+                       /* FIXME: We should run *parent* and run probe until
+                        * calls match up again. */
+                       probe_count = 3;
+                       break;
+               default:
+                       abort();
+               }
        }
 
        files = save_files();
@@ -350,7 +433,7 @@ static bool should_fail(struct failtest_call *call)
        if (child == 0) {
                if (tracefd != -1) {
                        struct timeval now;
-                       char str[50], *p;
+                       const char *p;
                        gettimeofday(&now, NULL);
                        if (now.tv_usec < start.tv_usec) {
                                now.tv_sec--;
@@ -358,20 +441,16 @@ static bool should_fail(struct failtest_call *call)
                        }
                        now.tv_usec -= start.tv_usec;
                        now.tv_sec -= start.tv_sec;
-                       sprintf(str, "%u (%u.%02u): ", getpid(),
-                               (int)now.tv_sec, (int)now.tv_usec / 10000);
-                       trace_str(str);
                        p = failpath_string();
-                       trace_str(p);
-                       free(p);
-                       trace_str("(");
-                       p = strchr(history[history_num-1].file, '/');
+                       trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
+                             (int)now.tv_sec, (int)now.tv_usec / 10000, p);
+                       free((char *)p);
+                       p = strrchr(history[history_num-1].file, '/');
                        if (p)
-                               trace_str(p+1);
+                               trace("%s", p+1);
                        else
-                               trace_str(history[history_num-1].file);
-                       sprintf(str, ":%u)\n", history[history_num-1].line);
-                       trace_str(str);
+                               trace("%s", history[history_num-1].file);
+                       trace(":%u)\n", history[history_num-1].line);
                }
                close(control[0]);
                close(output[0]);
@@ -404,8 +483,13 @@ static bool should_fail(struct failtest_call *call)
                else
                        ret = poll(pfd, 2, failtest_timeout_ms);
 
-               if (ret <= 0)
+               if (ret == 0)
                        hand_down(SIGUSR1);
+               if (ret < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       err(1, "Poll returned %i", ret);
+               }
 
                if (pfd[0].revents & POLLIN) {
                        ssize_t len;
@@ -431,9 +515,13 @@ static bool should_fail(struct failtest_call *call)
        close(output[0]);
        close(control[0]);
        waitpid(child, &status, 0);
-       if (!WIFEXITED(status))
-               child_fail(out, outlen, "Killed by signal %u: ",
-                          WTERMSIG(status));
+       if (!WIFEXITED(status)) {
+               if (WTERMSIG(status) == SIGUSR1)
+                       child_fail(out, outlen, "Timed out");
+               else
+                       child_fail(out, outlen, "Killed by signal %u: ",
+                                  WTERMSIG(status));
+       }
        /* Child printed failure already, just pass up exit code. */
        if (type == FAILURE) {
                fprintf(stderr, "%.*s", (int)outlen, out);
@@ -797,9 +885,20 @@ add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
 }
 
 /* We trap this so we can record it: we don't fail it. */
-int failtest_close(int fd)
+int failtest_close(int fd, const char *file, unsigned line)
 {
        int i;
+       struct close_call call;
+       struct failtest_call *p;
+
+       call.fd = fd;
+       p = add_history(FAILTEST_CLOSE, file, line, &call);
+       p->fail = false;
+
+       /* Consume close from failpath. */
+       if (failpath)
+               if (should_fail(p))
+                       abort();
 
        if (fd < 0)
                return close(fd);
@@ -889,7 +988,6 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
        }
 
        p = add_history(FAILTEST_FCNTL, file, line, &call);
-       get_locks();
 
        if (should_fail(p)) {
                p->u.fcntl.ret = -1;
@@ -898,6 +996,7 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
                else
                        p->error = EDEADLK;
        } else {
+               get_locks();
                p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
                                       &p->u.fcntl.arg.fl);
                if (p->u.fcntl.ret == -1)
@@ -927,44 +1026,20 @@ void failtest_init(int argc, char *argv[])
                } else if (strcmp(argv[i], "--tracepath") == 0) {
                        tracefd = dup(STDERR_FILENO);
                        failtest_timeout_ms = -1;
+               } else if (!strncmp(argv[i], "--debugpath=",
+                                   strlen("--debugpath="))) {
+                       debugpath = argv[i] + strlen("--debugpath=");
                }
        }
        gettimeofday(&start, NULL);
 }
 
-/* Free up memory, so valgrind doesn't report leaks. */
-static void free_everything(void)
-{
-       unsigned int i;
-
-       /* We don't do this in cleanup: needed even for failed opens. */
-       for (i = 0; i < history_num; i++) {
-               if (history[i].type == FAILTEST_OPEN)
-                       free((char *)history[i].u.open.pathname);
-       }
-       free(history);
-}
-
 void failtest_exit(int status)
 {
-       int i;
-
-       if (control_fd == -1) {
-               free_everything();
-               exit(status);
-       }
-
        if (failtest_exit_check) {
                if (!failtest_exit_check(history, history_num))
                        child_fail(NULL, 0, "failtest_exit_check failed\n");
        }
 
-       /* Cleanup everything, in reverse order. */
-       for (i = history_num - 1; i >= 0; i--)
-               if (history[i].cleanup)
-                       history[i].cleanup(&history[i].u);
-
-       free_everything();
-       tell_parent(SUCCESS);
-       exit(0);
+       failtest_cleanup(false, status);
 }