X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ffailtest%2Ffailtest.c;h=ad7e0feac1ec00be4b161c545e3e5c497a8cd8f6;hp=4b5ab10ada23714123f65cfaf24f2e6a3909a72f;hb=df34919b32720329544195900ce57f1a229842f8;hpb=0d66a226c98d0c5b5bced64be3663e52ea6cfb9d diff --git a/ccan/failtest/failtest.c b/ccan/failtest/failtest.c index 4b5ab10a..ad7e0fea 100644 --- a/ccan/failtest/failtest.c +++ b/ccan/failtest/failtest.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -19,9 +20,12 @@ bool (*failtest_hook)(struct failtest_call *history, unsigned num) = failtest_default_hook; +static int tracefd = -1; + unsigned int failtest_timeout_ms = 20000; const char *failpath; +const char *debugpath; enum info_type { WRITE, @@ -31,26 +35,6 @@ enum info_type { UNEXPECTED }; -struct write_info_hdr { - size_t len; - off_t offset; - int fd; -}; - -struct fd_orig { - int fd; - off_t offset; - size_t size; - bool dupped; -}; - -struct write_info { - struct write_info_hdr hdr; - char *data; - size_t oldlen; - char *olddata; -}; - struct lock_info { int fd; /* end is inclusive: you can't have a 0-byte lock. */ @@ -63,16 +47,11 @@ bool (*failtest_exit_check)(struct failtest_call *history, unsigned num); static struct failtest_call *history = NULL; static unsigned int history_num = 0; static int control_fd = -1; +static struct timeval start; -static struct write_info *writes = NULL; -static unsigned int writes_num = 0; - -static struct write_info *child_writes = NULL; +static struct write_call *child_writes = NULL; static unsigned int child_writes_num = 0; -static struct fd_orig *fd_orig = NULL; -static unsigned int fd_orig_num = 0; - static pid_t lock_owner; static struct lock_info *locks = NULL; static unsigned int lock_num = 0; @@ -96,6 +75,7 @@ static struct failtest_call *add_history_(enum failtest_call_type type, history[history_num].type = type; history[history_num].file = file; history[history_num].line = line; + history[history_num].cleanup = NULL; memcpy(&history[history_num].u, elem, elem_size); return &history[history_num++]; } @@ -103,22 +83,8 @@ static struct failtest_call *add_history_(enum failtest_call_type type, #define add_history(type, file, line, elem) \ add_history_((type), (file), (line), (elem), sizeof(*(elem))) -static void save_fd_orig(int fd) -{ - unsigned int i; - - for (i = 0; i < fd_orig_num; i++) - if (fd_orig[i].fd == fd) - return; - - fd_orig = realloc(fd_orig, (fd_orig_num + 1) * sizeof(*fd_orig)); - fd_orig[fd_orig_num].fd = fd; - fd_orig[fd_orig_num].dupped = false; - fd_orig[fd_orig_num].offset = lseek(fd, 0, SEEK_CUR); - fd_orig[fd_orig_num].size = lseek(fd, 0, SEEK_END); - lseek(fd, fd_orig[fd_orig_num].offset, SEEK_SET); - fd_orig_num++; -} +#define set_cleanup(call, clean, type) \ + (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL)), (clean)) bool failtest_default_hook(struct failtest_call *history, unsigned num) { @@ -127,34 +93,36 @@ bool failtest_default_hook(struct failtest_call *history, unsigned num) static bool read_write_info(int fd) { - struct write_info_hdr hdr; - - if (!read_all(fd, &hdr, sizeof(hdr))) - return false; + struct write_call *w; + char *buf; + /* We don't need all of this, but it's simple. */ child_writes = realloc(child_writes, (child_writes_num+1) * sizeof(child_writes[0])); - child_writes[child_writes_num].hdr = hdr; - child_writes[child_writes_num].data = malloc(hdr.len); - if (!read_all(fd, child_writes[child_writes_num].data, hdr.len)) + w = &child_writes[child_writes_num]; + if (!read_all(fd, w, sizeof(*w))) + return false; + + w->buf = buf = malloc(w->count); + if (!read_all(fd, buf, w->count)) return false; child_writes_num++; return true; } -static void print_reproduce(void) +static char *failpath_string(void) { unsigned int i; + char *ret = malloc(history_num + 1); - printf("To reproduce: --failpath="); for (i = 0; i < history_num; i++) { + ret[i] = info_to_arg[history[i].type]; if (history[i].fail) - printf("%c", toupper(info_to_arg[history[i].type])); - else - printf("%c", info_to_arg[history[i].type]); + ret[i] = toupper(ret[i]); } - printf("\n"); + ret[i] = '\0'; + return ret; } static void tell_parent(enum info_type type) @@ -166,13 +134,15 @@ static void tell_parent(enum info_type type) static void child_fail(const char *out, size_t outlen, const char *fmt, ...) { va_list ap; + char *path = failpath_string(); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "%.*s", (int)outlen, out); - print_reproduce(); + printf("To reproduce: --failpath=%s\n", path); + free(path); tell_parent(FAILURE); exit(1); } @@ -250,6 +220,91 @@ static void get_locks(void) lock_owner = getpid(); } +static void trace_str(const char *str) +{ + ssize_t ret; + + while ((ret = write(tracefd, str, strlen(str))) <= 0) { + str += ret; + if (!*str) + return; + } + err(1, "Writing trace."); +} + +struct saved_file { + struct saved_file *next; + int fd; + void *contents; + off_t off, len; +}; + +static struct saved_file *save_file(struct saved_file *next, int fd) +{ + struct saved_file *s = malloc(sizeof(*s)); + + s->next = next; + s->fd = fd; + s->off = lseek(fd, 0, SEEK_CUR); + /* Special file? Erk... */ + assert(s->off != -1); + s->len = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + s->contents = malloc(s->len); + read(fd, s->contents, s->len); + lseek(fd, s->off, SEEK_SET); + return s; +} + +/* We have little choice but to save and restore open files: mmap means we + * can really intercept changes in the child. + * + * We could do non-mmap'ed files on demand, however. */ +static struct saved_file *save_files(void) +{ + struct saved_file *files = NULL; + int i; + + /* Figure out the set of live fds. */ + for (i = history_num - 2; i >= 0; i--) { + if (history[i].type == FAILTEST_OPEN) { + int fd = history[i].u.open.ret; + /* Only do successful, writable fds. */ + if (fd < 0) + continue; + + /* If it was closed, cleanup == NULL. */ + if (!history[i].cleanup) + continue; + + if ((history[i].u.open.flags & O_RDWR) == O_RDWR) { + files = save_file(files, fd); + } else if ((history[i].u.open.flags & O_WRONLY) + == O_WRONLY) { + /* FIXME: Handle O_WRONLY. Open with O_RDWR? */ + abort(); + } + } + } + + return files; +} + +static void restore_files(struct saved_file *s) +{ + while (s) { + struct saved_file *next = s->next; + + lseek(s->fd, 0, SEEK_SET); + write(s->fd, s->contents, s->len); + ftruncate(s->fd, s->len); + free(s->contents); + lseek(s->fd, s->off, SEEK_SET); + free(s); + s = next; + } +} + static bool should_fail(struct failtest_call *call) { int status; @@ -257,16 +312,45 @@ static bool should_fail(struct failtest_call *call) enum info_type type = UNEXPECTED; char *out = NULL; size_t outlen = 0; + struct saved_file *files; if (call == &unrecorded_call) return false; if (failpath) { - if (tolower(*failpath) != info_to_arg[call->type]) - errx(1, "Failpath expected '%c' got '%c'\n", - info_to_arg[call->type], *failpath); - call->fail = isupper(*(failpath++)); - return call->fail; + /* + means continue after end, like normal. */ + if (*failpath == '+') + failpath = NULL; + else { + if (tolower(*failpath) != info_to_arg[call->type]) + errx(1, "Failpath expected '%c' got '%c'\n", + info_to_arg[call->type], *failpath); + call->fail = isupper(*(failpath++)); + return call->fail; + } + } + + /* Attach debugger if they asked for it. */ + if (debugpath && history_num == strlen(debugpath)) { + unsigned int i; + + for (i = 0; i < history_num; i++) { + char c = info_to_arg[history[i].type]; + if (history[i].fail) + c = toupper(c); + if (c != debugpath[i]) + break; + } + if (i == history_num) { + char str[80]; + + /* Don't timeout. */ + signal(SIGUSR1, SIG_IGN); + sprintf(str, "xterm -e gdb /proc/%d/exe %d &", + getpid(), getpid()); + system(str); + sleep(5); + } } if (!failtest_hook(history, history_num)) { @@ -274,6 +358,8 @@ static bool should_fail(struct failtest_call *call) return false; } + files = save_files(); + /* We're going to fail in the child. */ call->fail = true; if (pipe(control) != 0 || pipe(output) != 0) @@ -286,6 +372,31 @@ static bool should_fail(struct failtest_call *call) err(1, "forking failed"); if (child == 0) { + if (tracefd != -1) { + struct timeval now; + char str[50], *p; + gettimeofday(&now, NULL); + if (now.tv_usec < start.tv_usec) { + now.tv_sec--; + now.tv_usec += 1000000; + } + now.tv_usec -= start.tv_usec; + now.tv_sec -= start.tv_sec; + sprintf(str, "%u (%u.%02u): ", getpid(), + (int)now.tv_sec, (int)now.tv_usec / 10000); + trace_str(str); + p = failpath_string(); + trace_str(p); + free(p); + trace_str("("); + p = strchr(history[history_num-1].file, '/'); + if (p) + trace_str(p+1); + else + trace_str(history[history_num-1].file); + sprintf(str, ":%u)\n", history[history_num-1].line); + trace_str(str); + } close(control[0]); close(output[0]); dup2(output[1], STDOUT_FILENO); @@ -360,11 +471,18 @@ static bool should_fail(struct failtest_call *call) free(out); signal(SIGUSR1, SIG_DFL); + restore_files(files); + /* We continue onwards without failing. */ call->fail = false; return false; } +static void cleanup_calloc(struct calloc_call *call) +{ + free(call->ret); +} + void *failtest_calloc(size_t nmemb, size_t size, const char *file, unsigned line) { @@ -379,11 +497,17 @@ void *failtest_calloc(size_t nmemb, size_t size, p->error = ENOMEM; } else { p->u.calloc.ret = calloc(nmemb, size); + set_cleanup(p, cleanup_calloc, struct calloc_call); } errno = p->error; return p->u.calloc.ret; } +static void cleanup_malloc(struct malloc_call *call) +{ + free(call->ret); +} + void *failtest_malloc(size_t size, const char *file, unsigned line) { struct failtest_call *p; @@ -396,11 +520,49 @@ void *failtest_malloc(size_t size, const char *file, unsigned line) p->error = ENOMEM; } else { p->u.calloc.ret = malloc(size); + set_cleanup(p, cleanup_malloc, struct malloc_call); } errno = p->error; return p->u.calloc.ret; } +static void cleanup_realloc(struct realloc_call *call) +{ + free(call->ret); +} + +/* Walk back and find out if we got this ptr from a previous routine. */ +static void fixup_ptr_history(void *ptr, unsigned int last) +{ + int i; + + /* Start at end of history, work back. */ + for (i = last - 1; i >= 0; i--) { + switch (history[i].type) { + case FAILTEST_REALLOC: + if (history[i].u.realloc.ret == ptr) { + history[i].cleanup = NULL; + return; + } + break; + case FAILTEST_MALLOC: + if (history[i].u.malloc.ret == ptr) { + history[i].cleanup = NULL; + return; + } + break; + case FAILTEST_CALLOC: + if (history[i].u.calloc.ret == ptr) { + history[i].cleanup = NULL; + return; + } + break; + default: + break; + } + } +} + void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line) { struct failtest_call *p; @@ -413,12 +575,25 @@ void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line) p->u.realloc.ret = NULL; p->error = ENOMEM; } else { + fixup_ptr_history(ptr, history_num-1); p->u.realloc.ret = realloc(ptr, size); + set_cleanup(p, cleanup_realloc, struct realloc_call); } errno = p->error; return p->u.realloc.ret; } +void failtest_free(void *ptr) +{ + fixup_ptr_history(ptr, history_num); + free(ptr); +} + +static void cleanup_open(struct open_call *call) +{ + close(call->ret); +} + int failtest_open(const char *pathname, const char *file, unsigned line, ...) { @@ -437,17 +612,31 @@ int failtest_open(const char *pathname, /* Avoid memory leak! */ if (p == &unrecorded_call) free((char *)call.pathname); - if (should_fail(p)) { + p->u.open.ret = open(pathname, call.flags, call.mode); + + if (!failpath && p->u.open.ret == -1) { + p->fail = false; + p->error = errno; + } else if (should_fail(p)) { + close(p->u.open.ret); p->u.open.ret = -1; /* FIXME: Play with error codes? */ p->error = EACCES; } else { - p->u.open.ret = open(pathname, call.flags, call.mode); + set_cleanup(p, cleanup_open, struct open_call); } errno = p->error; return p->u.open.ret; } +static void cleanup_pipe(struct pipe_call *call) +{ + if (!call->closed[0]) + close(call->fds[0]); + if (!call->closed[1]) + close(call->fds[1]); +} + int failtest_pipe(int pipefd[2], const char *file, unsigned line) { struct failtest_call *p; @@ -460,6 +649,8 @@ int failtest_pipe(int pipefd[2], const char *file, unsigned line) p->error = EMFILE; } else { p->u.pipe.ret = pipe(p->u.pipe.fds); + p->u.pipe.closed[0] = p->u.pipe.closed[1] = false; + set_cleanup(p, cleanup_pipe, struct pipe_call); } /* This causes valgrind to notice if they use pipefd[] after failure */ memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds)); @@ -478,10 +669,6 @@ ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off, call.off = off; p = add_history(FAILTEST_READ, file, line, &call); - /* This is going to change seek offset, so save it. */ - if (control_fd != -1) - save_fd_orig(fd); - /* FIXME: Try partial read returns. */ if (should_fail(p)) { p->u.read.ret = -1; @@ -493,18 +680,11 @@ ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off, return p->u.read.ret; } -static struct write_info *new_write(void) -{ - writes = realloc(writes, (writes_num + 1) * sizeof(*writes)); - return &writes[writes_num++]; -} - ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off, const char *file, unsigned line) { struct failtest_call *p; struct write_call call; - off_t offset; call.fd = fd; call.buf = buf; @@ -512,30 +692,14 @@ ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off, call.off = off; p = add_history(FAILTEST_WRITE, file, line, &call); - offset = lseek(fd, 0, SEEK_CUR); - - /* If we're a child, save contents and tell parent about write. */ - if (control_fd != -1) { - struct write_info *winfo = new_write(); + /* If we're a child, we need to make sure we write the same thing + * to non-files as the parent does, so tell it. */ + if (control_fd != -1 && off == (off_t)-1) { enum info_type type = WRITE; - save_fd_orig(fd); - - winfo->hdr.len = count; - winfo->hdr.fd = fd; - winfo->data = malloc(count); - memcpy(winfo->data, buf, count); - winfo->hdr.offset = offset; - if (winfo->hdr.offset != (off_t)-1) { - lseek(fd, offset, SEEK_SET); - winfo->olddata = malloc(count); - winfo->oldlen = read(fd, winfo->olddata, count); - if (winfo->oldlen == -1) - winfo->oldlen = 0; - } write_all(control_fd, &type, sizeof(type)); - write_all(control_fd, &winfo->hdr, sizeof(winfo->hdr)); - write_all(control_fd, winfo->data, count); + write_all(control_fd, &p->u.write, sizeof(p->u.write)); + write_all(control_fd, buf, count); } /* FIXME: Try partial write returns. */ @@ -544,30 +708,30 @@ ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off, p->error = EIO; } else { /* FIXME: We assume same write order in parent and child */ - if (child_writes_num != 0) { - if (child_writes[0].hdr.fd != fd) + if (off == (off_t)-1 && child_writes_num != 0) { + if (child_writes[0].fd != fd) errx(1, "Child wrote to fd %u, not %u?", - child_writes[0].hdr.fd, fd); - if (child_writes[0].hdr.offset != offset) + child_writes[0].fd, fd); + if (child_writes[0].off != p->u.write.off) errx(1, "Child wrote to offset %zu, not %zu?", - (size_t)child_writes[0].hdr.offset, - (size_t)offset); - if (child_writes[0].hdr.len != count) + (size_t)child_writes[0].off, + (size_t)p->u.write.off); + if (child_writes[0].count != count) errx(1, "Child wrote length %zu, not %zu?", - child_writes[0].hdr.len, count); - if (memcmp(child_writes[0].data, buf, count)) { + child_writes[0].count, count); + if (memcmp(child_writes[0].buf, buf, count)) { child_fail(NULL, 0, "Child wrote differently to" " fd %u than we did!\n", fd); } - free(child_writes[0].data); + free((char *)child_writes[0].buf); child_writes_num--; memmove(&child_writes[0], &child_writes[1], sizeof(child_writes[0]) * child_writes_num); /* Is this is a socket or pipe, child wrote it already. */ - if (offset == (off_t)-1) { + if (p->u.write.off == (off_t)-1) { p->u.write.ret = count; errno = p->error; return p->u.write.ret; @@ -656,24 +820,48 @@ add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type) return locks; } -/* We only trap this so we can dup fds in case we need to restore. */ +/* We trap this so we can record it: we don't fail it. */ int failtest_close(int fd) { - unsigned int i; - int newfd = -1; - - for (i = 0; i < fd_orig_num; i++) { - if (fd_orig[i].fd == fd) { - fd_orig[i].fd = newfd = dup(fd); - fd_orig[i].dupped = true; + int i; + + if (fd < 0) + return close(fd); + + /* Trace history to find source of fd. */ + for (i = history_num-1; i >= 0; i--) { + switch (history[i].type) { + case FAILTEST_PIPE: + /* From a pipe? */ + if (history[i].u.pipe.fds[0] == fd) { + assert(!history[i].u.pipe.closed[0]); + history[i].u.pipe.closed[0] = true; + if (history[i].u.pipe.closed[1]) + history[i].cleanup = NULL; + goto out; + } + if (history[i].u.pipe.fds[1] == fd) { + assert(!history[i].u.pipe.closed[1]); + history[i].u.pipe.closed[1] = true; + if (history[i].u.pipe.closed[0]) + history[i].cleanup = NULL; + goto out; + } + break; + case FAILTEST_OPEN: + if (history[i].u.open.ret == fd) { + assert((void *)history[i].cleanup + == (void *)cleanup_open); + history[i].cleanup = NULL; + goto out; + } + break; + default: + break; } } - for (i = 0; i < writes_num; i++) { - if (writes[i].hdr.fd == fd) - writes[i].hdr.fd = newfd; - } - +out: locks = add_lock(locks, fd, 0, off_max(), F_UNLCK); return close(fd); } @@ -755,10 +943,20 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...) void failtest_init(int argc, char *argv[]) { - if (argc == 2 - && strncmp(argv[1], "--failpath=", strlen("--failpath=")) == 0) { - failpath = argv[1] + strlen("--failpath="); + unsigned int i; + + for (i = 1; i < argc; i++) { + if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) { + failpath = argv[i] + strlen("--failpath="); + } else if (strcmp(argv[i], "--tracepath") == 0) { + tracefd = dup(STDERR_FILENO); + failtest_timeout_ms = -1; + } else if (!strncmp(argv[i], "--debugpath=", + strlen("--debugpath="))) { + debugpath = argv[i] + strlen("--debugpath="); + } } + gettimeofday(&start, NULL); } /* Free up memory, so valgrind doesn't report leaks. */ @@ -766,13 +964,7 @@ static void free_everything(void) { unsigned int i; - for (i = 0; i < writes_num; i++) { - free(writes[i].data); - if (writes[i].hdr.offset != (off_t)-1) - free(writes[i].olddata); - } - free(writes); - free(fd_orig); + /* We don't do this in cleanup: needed even for failed opens. */ for (i = 0; i < history_num; i++) { if (history[i].type == FAILTEST_OPEN) free((char *)history[i].u.open.pathname); @@ -782,7 +974,7 @@ static void free_everything(void) void failtest_exit(int status) { - unsigned int i; + int i; if (control_fd == -1) { free_everything(); @@ -794,26 +986,10 @@ void failtest_exit(int status) child_fail(NULL, 0, "failtest_exit_check failed\n"); } - /* Restore any stuff we overwrote. */ - for (i = 0; i < writes_num; i++) { - if (writes[i].hdr.offset == (off_t)-1) - continue; - if (writes[i].oldlen != 0) { - lseek(writes[i].hdr.fd, writes[i].hdr.offset, - SEEK_SET); - write(writes[i].hdr.fd, writes[i].olddata, - writes[i].oldlen); - } - } - - /* Fix up fd offsets, restore sizes. */ - for (i = 0; i < fd_orig_num; i++) { - lseek(fd_orig[i].fd, fd_orig[i].offset, SEEK_SET); - ftruncate(fd_orig[i].fd, fd_orig[i].size); - /* Free up any file descriptors we dup'ed. */ - if (fd_orig[i].dupped) - close(fd_orig[i].fd); - } + /* Cleanup everything, in reverse order. */ + for (i = history_num - 1; i >= 0; i--) + if (history[i].cleanup) + history[i].cleanup(&history[i].u); free_everything(); tell_parent(SUCCESS);