X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ffailtest%2Ffailtest.c;h=bac0aa350ac24378e8af2b949cb0781a7699906a;hp=2a39679f4827eadcd05b183587534bb040aa1c8a;hb=f18b792838ee010b42091639b673c7b28606aa47;hpb=f0d8f32b2868b84ec7e6bba8ae892950eef491c4 diff --git a/ccan/failtest/failtest.c b/ccan/failtest/failtest.c index 2a39679f..bac0aa35 100644 --- a/ccan/failtest/failtest.c +++ b/ccan/failtest/failtest.c @@ -1,3 +1,5 @@ +/* Licensed under LGPL - see LICENSE file for details */ +#include #include #include #include @@ -11,20 +13,21 @@ #include #include #include +#include #include +#include #include #include -#include #include -bool (*failtest_hook)(struct failtest_call *history, unsigned num) -= failtest_default_hook; +enum failtest_result (*failtest_hook)(struct failtest_call *, unsigned); static int tracefd = -1; unsigned int failtest_timeout_ms = 20000; const char *failpath; +const char *debugpath; enum info_type { WRITE, @@ -47,6 +50,7 @@ static struct failtest_call *history = NULL; static unsigned int history_num = 0; static int control_fd = -1; static struct timeval start; +static unsigned int probe_count = 0; static struct write_call *child_writes = NULL; static unsigned int child_writes_num = 0; @@ -55,7 +59,9 @@ static pid_t lock_owner; static struct lock_info *locks = NULL; static unsigned int lock_num = 0; -static const char info_to_arg[] = "mceoprwf"; +static pid_t orig_pid; + +static const char info_to_arg[] = "mceoxprwf"; /* Dummy call used for failtest_undo wrappers. */ static struct failtest_call unrecorded_call; @@ -82,13 +88,9 @@ static struct failtest_call *add_history_(enum failtest_call_type type, #define add_history(type, file, line, elem) \ add_history_((type), (file), (line), (elem), sizeof(*(elem))) +/* We do a fake call inside a sizeof(), to check types. */ #define set_cleanup(call, clean, type) \ - (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL)), (clean)) - -bool failtest_default_hook(struct failtest_call *history, unsigned num) -{ - return true; -} + (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean)) static bool read_write_info(int fd) { @@ -146,11 +148,23 @@ static void child_fail(const char *out, size_t outlen, const char *fmt, ...) exit(1); } +static void trace(const char *fmt, ...) +{ + va_list ap; + + if (tracefd == -1) + return; + + va_start(ap, fmt); + vdprintf(tracefd, fmt, ap); + va_end(ap); +} + static pid_t child; -static void hand_down(int signal) +static void hand_down(int signum) { - kill(child, signal); + kill(child, signum); } static void release_locks(void) @@ -219,40 +233,28 @@ static void get_locks(void) lock_owner = getpid(); } -static void trace_str(const char *str) -{ - ssize_t ret; - - while ((ret = write(tracefd, str, strlen(str))) <= 0) { - str += ret; - if (!*str) - return; - } - err(1, "Writing trace."); -} - struct saved_file { struct saved_file *next; int fd; void *contents; - off_t len; + off_t off, len; }; static struct saved_file *save_file(struct saved_file *next, int fd) { struct saved_file *s = malloc(sizeof(*s)); - off_t orig = lseek(fd, 0, SEEK_CUR); - - /* Special file? Erk... */ - assert(orig != -1); s->next = next; s->fd = fd; + s->off = lseek(fd, 0, SEEK_CUR); + /* Special file? Erk... */ + assert(s->off != -1); s->len = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); s->contents = malloc(s->len); - read(fd, s->contents, s->len); - lseek(fd, orig, SEEK_SET); + if (read(fd, s->contents, s->len) != s->len) + err(1, "Failed to save %zu bytes", (size_t)s->len); + lseek(fd, s->off, SEEK_SET); return s; } @@ -264,34 +266,26 @@ static struct saved_file *save_files(void) { struct saved_file *files = NULL; int i; - fd_set closed; /* Figure out the set of live fds. */ - FD_ZERO(&closed); for (i = history_num - 2; i >= 0; i--) { - /* FIXME: Handle dup. */ - if (history[i].type == FAILTEST_CLOSE) { - assert(!FD_ISSET(history[i].u.close.fd, &closed)); - FD_SET(history[i].u.close.fd, &closed); - } else if (history[i].type == FAILTEST_OPEN) { + if (history[i].type == FAILTEST_OPEN) { int fd = history[i].u.open.ret; /* Only do successful, writable fds. */ if (fd < 0) continue; - /* If it wasn't closed again... */ - if (!FD_ISSET(fd, &closed)) { - if ((history[i].u.open.flags & O_RDWR) - == O_RDWR) { - files = save_file(files, fd); - } else if ((history[i].u.open.flags & O_WRONLY) - == O_WRONLY) { - /* FIXME: Handle O_WRONLY. Open with - * O_RDWR? */ - abort(); - } - } else - FD_CLR(history[i].u.open.ret, &closed); + /* If it was closed, cleanup == NULL. */ + if (!history[i].cleanup) + continue; + + if ((history[i].u.open.flags & O_RDWR) == O_RDWR) { + files = save_file(files, fd); + } else if ((history[i].u.open.flags & O_WRONLY) + == O_WRONLY) { + /* FIXME: Handle O_WRONLY. Open with O_RDWR? */ + abort(); + } } } @@ -302,17 +296,72 @@ static void restore_files(struct saved_file *s) { while (s) { struct saved_file *next = s->next; - off_t orig = lseek(s->fd, 0, SEEK_CUR); lseek(s->fd, 0, SEEK_SET); - write(s->fd, s->contents, s->len); + if (write(s->fd, s->contents, s->len) != s->len) + err(1, "Failed to restore %zu bytes", (size_t)s->len); + if (ftruncate(s->fd, s->len) != 0) + err(1, "Failed to trim file to length %zu", + (size_t)s->len); + free(s->contents); + lseek(s->fd, s->off, SEEK_SET); + free(s); + s = next; + } +} + +static void free_files(struct saved_file *s) +{ + while (s) { + struct saved_file *next = s->next; free(s->contents); - lseek(s->fd, orig, SEEK_SET); free(s); s = next; } } +/* Free up memory, so valgrind doesn't report leaks. */ +static void free_everything(void) +{ + unsigned int i; + + /* We don't do this in cleanup: needed even for failed opens. */ + for (i = 0; i < history_num; i++) { + if (history[i].type == FAILTEST_OPEN) + free((char *)history[i].u.open.pathname); + } + free(history); +} + +static NORETURN void failtest_cleanup(bool forced_cleanup, int status) +{ + int i; + + /* For children, we don't care if they "failed" the testing. */ + if (control_fd != -1) + status = 0; + + if (forced_cleanup) + history_num--; + + /* Cleanup everything, in reverse order. */ + for (i = history_num - 1; i >= 0; i--) { + if (!history[i].cleanup) + continue; + if (!forced_cleanup) { + printf("Leak at %s:%u: --failpath=%s\n", + history[i].file, history[i].line, + failpath_string()); + status = 1; + } + history[i].cleanup(&history[i].u); + } + + free_everything(); + tell_parent(SUCCESS); + exit(status); +} + static bool should_fail(struct failtest_call *call) { int status; @@ -322,6 +371,10 @@ static bool should_fail(struct failtest_call *call) size_t outlen = 0; struct saved_file *files; + /* Are we probing? */ + if (probe_count && --probe_count == 0 && control_fd != -1) + failtest_cleanup(true, 0); + if (call == &unrecorded_call) return false; @@ -329,18 +382,65 @@ static bool should_fail(struct failtest_call *call) /* + means continue after end, like normal. */ if (*failpath == '+') failpath = NULL; - else { - if (tolower(*failpath) != info_to_arg[call->type]) + else if (*failpath == '\0') { + /* Continue, but don't inject errors. */ + return call->fail = false; + } else { + if (tolower((unsigned char)*failpath) + != info_to_arg[call->type]) errx(1, "Failpath expected '%c' got '%c'\n", info_to_arg[call->type], *failpath); - call->fail = isupper(*(failpath++)); + call->fail = isupper((unsigned char)*(failpath++)); return call->fail; } } - if (!failtest_hook(history, history_num)) { - call->fail = false; - return false; + /* Attach debugger if they asked for it. */ + if (debugpath && history_num == strlen(debugpath)) { + unsigned int i; + + for (i = 0; i < history_num; i++) { + unsigned char c = info_to_arg[history[i].type]; + if (history[i].fail) + c = toupper(c); + if (c != debugpath[i]) + break; + } + if (i == history_num) { + char str[80]; + + /* Don't timeout. */ + signal(SIGUSR1, SIG_IGN); + sprintf(str, "xterm -e gdb /proc/%d/exe %d &", + getpid(), getpid()); + if (system(str) == 0) + sleep(5); + } + } + + if (failtest_hook) { + switch (failtest_hook(history, history_num)) { + case FAIL_OK: + break; + case FAIL_PROBE: + /* Already down probe path? Stop now. */ + if (!probe_count) { + /* FIXME: We should run *parent* and + * run probe until calls match up again. */ + probe_count = 3; + break; + } else { + /* Child should give up now. */ + if (control_fd != -1) + failtest_cleanup(true, 0); + /* Parent, don't fail again. */ + } + case FAIL_DONT_FAIL: + call->fail = false; + return false; + default: + abort(); + } } files = save_files(); @@ -358,29 +458,20 @@ static bool should_fail(struct failtest_call *call) if (child == 0) { if (tracefd != -1) { - struct timeval now; - char str[50], *p; - gettimeofday(&now, NULL); - if (now.tv_usec < start.tv_usec) { - now.tv_sec--; - now.tv_usec += 1000000; - } - now.tv_usec -= start.tv_usec; - now.tv_sec -= start.tv_sec; - sprintf(str, "%u (%u.%02u): ", getpid(), - (int)now.tv_sec, (int)now.tv_usec / 10000); - trace_str(str); + struct timeval diff; + const char *p; + + diff = time_sub(time_now(), start); p = failpath_string(); - trace_str(p); - free(p); - trace_str("("); - p = strchr(history[history_num-1].file, '/'); + trace("%u->%u (%u.%02u): %s (", getppid(), getpid(), + (int)diff.tv_sec, (int)diff.tv_usec / 10000, p); + free((char *)p); + p = strrchr(history[history_num-1].file, '/'); if (p) - trace_str(p+1); + trace("%s", p+1); else - trace_str(history[history_num-1].file); - sprintf(str, ":%u)\n", history[history_num-1].line); - trace_str(str); + trace("%s", history[history_num-1].file); + trace(":%u)\n", history[history_num-1].line); } close(control[0]); close(output[0]); @@ -389,6 +480,8 @@ static bool should_fail(struct failtest_call *call) if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO) close(output[1]); control_fd = control[1]; + /* Valgrind spots the leak if we don't free these. */ + free_files(files); return true; } @@ -413,8 +506,13 @@ static bool should_fail(struct failtest_call *call) else ret = poll(pfd, 2, failtest_timeout_ms); - if (ret <= 0) + if (ret == 0) hand_down(SIGUSR1); + if (ret < 0) { + if (errno == EINTR) + continue; + err(1, "Poll returned %i", ret); + } if (pfd[0].revents & POLLIN) { ssize_t len; @@ -440,9 +538,13 @@ static bool should_fail(struct failtest_call *call) close(output[0]); close(control[0]); waitpid(child, &status, 0); - if (!WIFEXITED(status)) - child_fail(out, outlen, "Killed by signal %u: ", - WTERMSIG(status)); + if (!WIFEXITED(status)) { + if (WTERMSIG(status) == SIGUSR1) + child_fail(out, outlen, "Timed out"); + else + child_fail(out, outlen, "Killed by signal %u: ", + WTERMSIG(status)); + } /* Child printed failure already, just pass up exit code. */ if (type == FAILURE) { fprintf(stderr, "%.*s", (int)outlen, out); @@ -590,21 +692,25 @@ int failtest_open(const char *pathname, va_start(ap, line); call.flags = va_arg(ap, int); if (call.flags & O_CREAT) { - call.mode = va_arg(ap, mode_t); + call.mode = va_arg(ap, int); va_end(ap); } p = add_history(FAILTEST_OPEN, file, line, &call); /* Avoid memory leak! */ if (p == &unrecorded_call) free((char *)call.pathname); - if (should_fail(p)) { + p->u.open.ret = open(pathname, call.flags, call.mode); + + if (!failpath && p->u.open.ret == -1) { + p->fail = false; + p->error = errno; + } else if (should_fail(p)) { + close(p->u.open.ret); p->u.open.ret = -1; /* FIXME: Play with error codes? */ p->error = EACCES; } else { - p->u.open.ret = open(pathname, call.flags, call.mode); set_cleanup(p, cleanup_open, struct open_call); - p->u.open.dup_fd = p->u.open.ret; } errno = p->error; return p->u.open.ret; @@ -639,11 +745,6 @@ int failtest_pipe(int pipefd[2], const char *file, unsigned line) return p->u.pipe.ret; } -static void cleanup_read(struct read_call *call) -{ - lseek(call->fd, call->off, SEEK_SET); -} - ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off, const char *file, unsigned line) { @@ -661,61 +762,26 @@ ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off, p->error = EIO; } else { p->u.read.ret = pread(fd, buf, count, off); - set_cleanup(p, cleanup_read, struct read_call); } errno = p->error; return p->u.read.ret; } -static void cleanup_write(struct write_call *call) -{ - lseek(call->dup_fd, call->off, SEEK_SET); - write(call->dup_fd, call->saved_contents, call->saved_len); - lseek(call->dup_fd, call->off, SEEK_SET); - ftruncate(call->dup_fd, call->old_filelen); - free(call->saved_contents); -} - ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off, const char *file, unsigned line) { struct failtest_call *p; struct write_call call; - call.fd = call.dup_fd = fd; + call.fd = fd; call.buf = buf; call.count = count; call.off = off; p = add_history(FAILTEST_WRITE, file, line, &call); - /* Save old contents if we can */ - if (p->u.write.off != -1) { - ssize_t ret; - p->u.write.old_filelen = lseek(fd, 0, SEEK_END); - - /* Write past end of file? Nothing to save.*/ - if (p->u.write.old_filelen <= p->u.write.off) - p->u.write.saved_len = 0; - /* Write which goes over end of file? Partial save. */ - else if (p->u.write.off + count > p->u.write.old_filelen) - p->u.write.saved_len = p->u.write.old_filelen - - p->u.write.off; - /* Full save. */ - else - p->u.write.saved_len = count; - - p->u.write.saved_contents = malloc(p->u.write.saved_len); - lseek(fd, p->u.write.off, SEEK_SET); - ret = read(fd, p->u.write.saved_contents, p->u.write.saved_len); - if (ret != p->u.write.saved_len) - err(1, "Expected %i bytes, got %i", - (int)p->u.write.saved_len, (int)ret); - lseek(fd, p->u.write.off, SEEK_SET); - set_cleanup(p, cleanup_write, struct write_call); - } - - /* If we're a child, tell parent about write. */ - if (control_fd != -1) { + /* If we're a child, we need to make sure we write the same thing + * to non-files as the parent does, so tell it. */ + if (control_fd != -1 && off == (off_t)-1) { enum info_type type = WRITE; write_all(control_fd, &type, sizeof(type)); @@ -729,7 +795,7 @@ ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off, p->error = EIO; } else { /* FIXME: We assume same write order in parent and child */ - if (child_writes_num != 0) { + if (off == (off_t)-1 && child_writes_num != 0) { if (child_writes[0].fd != fd) errx(1, "Child wrote to fd %u, not %u?", child_writes[0].fd, fd); @@ -841,49 +907,50 @@ add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type) return locks; } -/* We only trap this so we can dup fds in case we need to restore. */ -int failtest_close(int fd) +/* We trap this so we can record it: we don't fail it. */ +int failtest_close(int fd, const char *file, unsigned line) { - int new_fd = -1, i; + int i; + struct close_call call; + struct failtest_call *p; + + call.fd = fd; + p = add_history(FAILTEST_CLOSE, file, line, &call); + p->fail = false; + + /* Consume close from failpath. */ + if (failpath) + if (should_fail(p)) + abort(); if (fd < 0) return close(fd); - /* Trace history to find source of fd, and if we need to cleanup writes. */ + /* Trace history to find source of fd. */ for (i = history_num-1; i >= 0; i--) { switch (history[i].type) { - case FAILTEST_WRITE: - if (history[i].u.write.fd != fd) - break; - if (!history[i].cleanup) - break; - /* We need to save fd so we can restore file. */ - if (new_fd == -1) - new_fd = dup(fd); - history[i].u.write.dup_fd = new_fd; - break; - case FAILTEST_READ: - /* We don't need to cleanup reads on closed fds. */ - if (history[i].u.read.fd != fd) - break; - history[i].cleanup = NULL; - break; case FAILTEST_PIPE: - /* From a pipe? We don't ever restore pipes... */ + /* From a pipe? */ if (history[i].u.pipe.fds[0] == fd) { - assert(new_fd == -1); + assert(!history[i].u.pipe.closed[0]); history[i].u.pipe.closed[0] = true; + if (history[i].u.pipe.closed[1]) + history[i].cleanup = NULL; goto out; } if (history[i].u.pipe.fds[1] == fd) { - assert(new_fd == -1); + assert(!history[i].u.pipe.closed[1]); history[i].u.pipe.closed[1] = true; + if (history[i].u.pipe.closed[0]) + history[i].cleanup = NULL; goto out; } break; case FAILTEST_OPEN: if (history[i].u.open.ret == fd) { - history[i].u.open.dup_fd = new_fd; + assert((void *)history[i].cleanup + == (void *)cleanup_open); + history[i].cleanup = NULL; goto out; } break; @@ -944,7 +1011,6 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...) } p = add_history(FAILTEST_FCNTL, file, line, &call); - get_locks(); if (should_fail(p)) { p->u.fcntl.ret = -1; @@ -953,6 +1019,7 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...) else p->error = EDEADLK; } else { + get_locks(); p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd, &p->u.fcntl.arg.fl); if (p->u.fcntl.ret == -1) @@ -972,54 +1039,44 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...) return p->u.fcntl.ret; } +pid_t failtest_getpid(const char *file, unsigned line) +{ + /* You must call failtest_init first! */ + assert(orig_pid); + return orig_pid; +} + void failtest_init(int argc, char *argv[]) { unsigned int i; + orig_pid = getpid(); + for (i = 1; i < argc; i++) { if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) { failpath = argv[i] + strlen("--failpath="); } else if (strcmp(argv[i], "--tracepath") == 0) { tracefd = dup(STDERR_FILENO); failtest_timeout_ms = -1; + } else if (!strncmp(argv[i], "--debugpath=", + strlen("--debugpath="))) { + debugpath = argv[i] + strlen("--debugpath="); } } - gettimeofday(&start, NULL); + start = time_now(); } -/* Free up memory, so valgrind doesn't report leaks. */ -static void free_everything(void) +bool failtest_has_failed(void) { - unsigned int i; - - /* We don't do this in cleanup: needed even for failed opens. */ - for (i = 0; i < history_num; i++) { - if (history[i].type == FAILTEST_OPEN) - free((char *)history[i].u.open.pathname); - } - free(history); + return control_fd != -1; } void failtest_exit(int status) { - int i; - - if (control_fd == -1) { - free_everything(); - exit(status); - } - if (failtest_exit_check) { if (!failtest_exit_check(history, history_num)) child_fail(NULL, 0, "failtest_exit_check failed\n"); } - /* Cleanup everything, in reverse order. */ - for (i = history_num - 1; i >= 0; i--) - if (history[i].cleanup) - history[i].cleanup(&history[i].u); - - free_everything(); - tell_parent(SUCCESS); - exit(0); + failtest_cleanup(false, status); }