X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ffailtest%2Ffailtest.c;h=56ace7431847c5ac9fd059b77d4b9271958a3fb7;hp=5d7a54f187f1f627ed6c950e9c7f06dfef82b1e1;hb=acb6106ca2778d74af91f9b92bc32df179195b6b;hpb=020f8ef45a751b9fc9f3739e8e8f8f9ddae69be8 diff --git a/ccan/failtest/failtest.c b/ccan/failtest/failtest.c index 5d7a54f1..56ace743 100644 --- a/ccan/failtest/failtest.c +++ b/ccan/failtest/failtest.c @@ -1,3 +1,5 @@ +/* Licensed under LGPL - see LICENSE file for details */ +#include #include #include #include @@ -10,138 +12,238 @@ #include #include #include -#include +#include +#include +#include +#include +#include #include #include -#include +#include +#include +#include +#include + +enum failtest_result (*failtest_hook)(struct tlist_calls *); -bool (*failtest_hook)(struct failtest_call *history, unsigned num) -= failtest_default_hook; +static int tracefd = -1; +static int warnfd; unsigned int failtest_timeout_ms = 20000; const char *failpath; +const char *debugpath; enum info_type { WRITE, + RELEASE_LOCKS, FAILURE, SUCCESS, UNEXPECTED }; -struct write_info_hdr { - size_t len; - off_t offset; +struct lock_info { int fd; + /* end is inclusive: you can't have a 0-byte lock. */ + off_t start, end; + int type; }; -struct fd_orig { - int fd; - off_t offset; - size_t size; - bool dupped; -}; +/* We hash the call location together with its backtrace. */ +static size_t hash_call(const struct failtest_call *call) +{ + return hash(call->file, strlen(call->file), + hash(&call->line, 1, + hash(call->backtrace, call->backtrace_num, + call->type))); +} -struct write_info { - struct write_info_hdr hdr; - char *data; - size_t oldlen; - char *olddata; -}; +static bool call_eq(const struct failtest_call *call1, + const struct failtest_call *call2) +{ + unsigned int i; -bool (*failtest_exit_check)(struct failtest_call *history, unsigned num); + if (strcmp(call1->file, call2->file) != 0 + || call1->line != call2->line + || call1->type != call2->type + || call1->backtrace_num != call2->backtrace_num) + return false; -static struct failtest_call *history = NULL; -static unsigned int history_num = 0; -static int control_fd = -1; + for (i = 0; i < call1->backtrace_num; i++) + if (call1->backtrace[i] != call2->backtrace[i]) + return false; -static struct write_info *writes = NULL; -static unsigned int writes_num = 0; + return true; +} + +/* Defines struct failtable. */ +HTABLE_DEFINE_TYPE(struct failtest_call, (struct failtest_call *), hash_call, + call_eq, failtable); + +bool (*failtest_exit_check)(struct tlist_calls *history); + +static struct tlist_calls history = TLIST_INIT(history); +static int control_fd = -1; +static struct timeval start; +static bool probing = false; +static struct failtable failtable; -static struct write_info *child_writes = NULL; +static struct write_call *child_writes = NULL; static unsigned int child_writes_num = 0; -static struct fd_orig *fd_orig = NULL; -static unsigned int fd_orig_num = 0; +static pid_t lock_owner; +static struct lock_info *locks = NULL; +static unsigned int lock_num = 0; -static const char info_to_arg[] = "mceoprw"; +static pid_t orig_pid; + +static const char info_to_arg[] = "mceoxprwfa"; /* Dummy call used for failtest_undo wrappers. */ static struct failtest_call unrecorded_call; +#if HAVE_BACKTRACE +#include + +static void **get_backtrace(unsigned int *num) +{ + static unsigned int max_back = 100; + void **ret; + +again: + ret = malloc(max_back * sizeof(void *)); + *num = backtrace(ret, max_back); + if (*num == max_back) { + free(ret); + max_back *= 2; + goto again; + } + return ret; +} +#else +/* This will test slightly less, since will consider all of the same + * calls as identical. But, it's slightly faster! */ +static void **get_backtrace(unsigned int *num) +{ + *num = 0; + return NULL; +} +#endif /* HAVE_BACKTRACE */ + static struct failtest_call *add_history_(enum failtest_call_type type, const char *file, unsigned int line, const void *elem, size_t elem_size) { + struct failtest_call *call; + /* NULL file is how we suppress failure. */ if (!file) return &unrecorded_call; - history = realloc(history, (history_num + 1) * sizeof(*history)); - history[history_num].type = type; - history[history_num].file = file; - history[history_num].line = line; - memcpy(&history[history_num].u, elem, elem_size); - return &history[history_num++]; + call = malloc(sizeof *call); + call->type = type; + call->file = file; + call->line = line; + call->cleanup = NULL; + call->backtrace = get_backtrace(&call->backtrace_num); + memcpy(&call->u, elem, elem_size); + tlist_add_tail(&history, call, list); + return call; } #define add_history(type, file, line, elem) \ add_history_((type), (file), (line), (elem), sizeof(*(elem))) -static void save_fd_orig(int fd) -{ - unsigned int i; +/* We do a fake call inside a sizeof(), to check types. */ +#define set_cleanup(call, clean, type) \ + (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean)) - for (i = 0; i < fd_orig_num; i++) - if (fd_orig[i].fd == fd) - return; - - fd_orig = realloc(fd_orig, (fd_orig_num + 1) * sizeof(*fd_orig)); - fd_orig[fd_orig_num].fd = fd; - fd_orig[fd_orig_num].dupped = false; - fd_orig[fd_orig_num].offset = lseek(fd, 0, SEEK_CUR); - fd_orig[fd_orig_num].size = lseek(fd, 0, SEEK_END); - lseek(fd, fd_orig[fd_orig_num].offset, SEEK_SET); - fd_orig_num++; -} -bool failtest_default_hook(struct failtest_call *history, unsigned num) +/* Dup the fd to a high value (out of the way I hope!), and close the old fd. */ +static int move_fd_to_high(int fd) { - return true; + int i; + + for (i = FD_SETSIZE - 1; i >= 0; i--) { + if (fcntl(i, F_GETFL) == -1 && errno == EBADF) { + if (dup2(fd, i) == -1) + err(1, "Failed to dup fd %i to %i", fd, i); + close(fd); + return i; + } + } + /* Nothing? Really? Er... ok? */ + return fd; } static bool read_write_info(int fd) { - struct write_info_hdr hdr; - - if (!read_all(fd, &hdr, sizeof(hdr))) - return false; + struct write_call *w; + char *buf; + /* We don't need all of this, but it's simple. */ child_writes = realloc(child_writes, (child_writes_num+1) * sizeof(child_writes[0])); - child_writes[child_writes_num].hdr = hdr; - child_writes[child_writes_num].data = malloc(hdr.len); - if (!read_all(fd, child_writes[child_writes_num].data, hdr.len)) + w = &child_writes[child_writes_num]; + if (!read_all(fd, w, sizeof(*w))) + return false; + + w->buf = buf = malloc(w->count); + if (!read_all(fd, buf, w->count)) return false; child_writes_num++; return true; } -static void print_reproduce(void) +static char *failpath_string(void) { - unsigned int i; - - printf("To reproduce: --failpath="); - for (i = 0; i < history_num; i++) { - if (history[i].fail) - printf("%c", toupper(info_to_arg[history[i].type])); - else - printf("%c", info_to_arg[history[i].type]); + struct failtest_call *i; + char *ret = strdup(""); + unsigned len = 0; + + /* Inefficient, but who cares? */ + tlist_for_each(&history, i, list) { + ret = realloc(ret, len + 2); + ret[len] = info_to_arg[i->type]; + if (i->fail) + ret[len] = toupper(ret[len]); + ret[++len] = '\0'; } - printf("\n"); + return ret; +} + +static void warn_via_fd(int e, const char *fmt, va_list ap) +{ + char *p = failpath_string(); + + vdprintf(warnfd, fmt, ap); + if (e != -1) + dprintf(warnfd, ": %s", strerror(e)); + dprintf(warnfd, " [%s]\n", p); + free(p); +} + +static void fwarn(const char *fmt, ...) +{ + va_list ap; + int e = errno; + + va_start(ap, fmt); + warn_via_fd(e, fmt, ap); + va_end(ap); +} + + +static void fwarnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + warn_via_fd(-1, fmt, ap); + va_end(ap); } static void tell_parent(enum info_type type) @@ -153,22 +255,244 @@ static void tell_parent(enum info_type type) static void child_fail(const char *out, size_t outlen, const char *fmt, ...) { va_list ap; + char *path = failpath_string(); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "%.*s", (int)outlen, out); - print_reproduce(); + printf("To reproduce: --failpath=%s\n", path); + free(path); tell_parent(FAILURE); exit(1); } +static void trace(const char *fmt, ...) +{ + va_list ap; + + if (tracefd == -1) + return; + + va_start(ap, fmt); + vdprintf(tracefd, fmt, ap); + va_end(ap); +} + static pid_t child; -static void hand_down(int signal) +static void hand_down(int signum) { - kill(child, signal); + kill(child, signum); +} + +static void release_locks(void) +{ + /* Locks were never acquired/reacquired? */ + if (lock_owner == 0) + return; + + /* We own them? Release them all. */ + if (lock_owner == getpid()) { + unsigned int i; + struct flock fl; + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + for (i = 0; i < lock_num; i++) + fcntl(locks[i].fd, F_SETLK, &fl); + } else { + /* Our parent must have them; pass request up. */ + enum info_type type = RELEASE_LOCKS; + assert(control_fd != -1); + write_all(control_fd, &type, sizeof(type)); + } + lock_owner = 0; +} + +/* off_t is a signed type. Getting its max is non-trivial. */ +static off_t off_max(void) +{ + BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8); + if (sizeof(off_t) == 4) + return (off_t)0x7FFFFFF; + else + return (off_t)0x7FFFFFFFFFFFFFFULL; +} + +static void get_locks(void) +{ + unsigned int i; + struct flock fl; + + if (lock_owner == getpid()) + return; + + if (lock_owner != 0) { + enum info_type type = RELEASE_LOCKS; + assert(control_fd != -1); + write_all(control_fd, &type, sizeof(type)); + } + + fl.l_whence = SEEK_SET; + + for (i = 0; i < lock_num; i++) { + fl.l_type = locks[i].type; + fl.l_start = locks[i].start; + if (locks[i].end == off_max()) + fl.l_len = 0; + else + fl.l_len = locks[i].end - locks[i].start + 1; + + if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0) + abort(); + } + lock_owner = getpid(); +} + +struct saved_file { + struct saved_file *next; + int fd; + void *contents; + off_t off, len; +}; + +static struct saved_file *save_file(struct saved_file *next, int fd) +{ + struct saved_file *s = malloc(sizeof(*s)); + + s->next = next; + s->fd = fd; + s->off = lseek(fd, 0, SEEK_CUR); + /* Special file? Erk... */ + assert(s->off != -1); + s->len = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + s->contents = malloc(s->len); + if (read(fd, s->contents, s->len) != s->len) + err(1, "Failed to save %zu bytes", (size_t)s->len); + lseek(fd, s->off, SEEK_SET); + return s; +} + +/* We have little choice but to save and restore open files: mmap means we + * can really intercept changes in the child. + * + * We could do non-mmap'ed files on demand, however. */ +static struct saved_file *save_files(void) +{ + struct saved_file *files = NULL; + struct failtest_call *i; + + /* Figure out the set of live fds. */ + tlist_for_each_rev(&history, i, list) { + if (i->type == FAILTEST_OPEN) { + int fd = i->u.open.ret; + /* Only do successful, writable fds. */ + if (fd < 0) + continue; + + /* If it was closed, cleanup == NULL. */ + if (!i->cleanup) + continue; + + if ((i->u.open.flags & O_RDWR) == O_RDWR) { + files = save_file(files, fd); + } else if ((i->u.open.flags & O_WRONLY) + == O_WRONLY) { + /* FIXME: Handle O_WRONLY. Open with O_RDWR? */ + abort(); + } + } + } + + return files; +} + +static void restore_files(struct saved_file *s) +{ + while (s) { + struct saved_file *next = s->next; + + lseek(s->fd, 0, SEEK_SET); + if (write(s->fd, s->contents, s->len) != s->len) + err(1, "Failed to restore %zu bytes", (size_t)s->len); + if (ftruncate(s->fd, s->len) != 0) + err(1, "Failed to trim file to length %zu", + (size_t)s->len); + free(s->contents); + lseek(s->fd, s->off, SEEK_SET); + free(s); + s = next; + } +} + +static void free_files(struct saved_file *s) +{ + while (s) { + struct saved_file *next = s->next; + free(s->contents); + free(s); + s = next; + } +} + +static void free_call(struct failtest_call *call) +{ + /* We don't do this in cleanup: needed even for failed opens. */ + if (call->type == FAILTEST_OPEN) + free((char *)call->u.open.pathname); + free(call->backtrace); + tlist_del_from(&history, call, list); + free(call); +} + +/* Free up memory, so valgrind doesn't report leaks. */ +static void free_everything(void) +{ + struct failtest_call *i; + + while ((i = tlist_top(&history, struct failtest_call, list)) != NULL) + free_call(i); + + failtable_clear(&failtable); +} + +static NORETURN void failtest_cleanup(bool forced_cleanup, int status) +{ + struct failtest_call *i; + + /* For children, we don't care if they "failed" the testing. */ + if (control_fd != -1) + status = 0; + + if (forced_cleanup) { + /* We didn't actually do final operation: remove it. */ + i = tlist_tail(&history, struct failtest_call, list); + free_call(i); + } + + /* Cleanup everything, in reverse order. */ + tlist_for_each_rev(&history, i, list) { + if (!i->cleanup) + continue; + if (!forced_cleanup) { + printf("Leak at %s:%u: --failpath=%s\n", + i->file, i->line, failpath_string()); + status = 1; + } + i->cleanup(&i->u); + } + + free_everything(); + if (status == 0) + tell_parent(SUCCESS); + else + tell_parent(FAILURE); + exit(status); } static bool should_fail(struct failtest_call *call) @@ -178,22 +502,88 @@ static bool should_fail(struct failtest_call *call) enum info_type type = UNEXPECTED; char *out = NULL; size_t outlen = 0; + struct saved_file *files; + struct failtest_call *dup; if (call == &unrecorded_call) return false; if (failpath) { - if (tolower(*failpath) != info_to_arg[call->type]) - errx(1, "Failpath expected '%c' got '%c'\n", - info_to_arg[call->type], *failpath); - return isupper(*(failpath++)); + /* + means continue after end, like normal. */ + if (*failpath == '+') + failpath = NULL; + else if (*failpath == '\0') { + /* Continue, but don't inject errors. */ + return call->fail = false; + } else { + if (tolower((unsigned char)*failpath) + != info_to_arg[call->type]) + errx(1, "Failpath expected '%c' got '%c'\n", + info_to_arg[call->type], *failpath); + call->fail = cisupper(*(failpath++)); + return call->fail; + } } - if (!failtest_hook(history, history_num)) { - call->fail = false; - return false; + /* Attach debugger if they asked for it. */ + if (debugpath) { + char *path; + + /* Pretend this last call matches whatever path wanted: + * keeps valgrind happy. */ + call->fail = cisupper(debugpath[strlen(debugpath)-1]); + path = failpath_string(); + + if (streq(path, debugpath)) { + char str[80]; + + /* Don't timeout. */ + signal(SIGUSR1, SIG_IGN); + sprintf(str, "xterm -e gdb /proc/%d/exe %d &", + getpid(), getpid()); + if (system(str) == 0) + sleep(5); + } else { + /* Ignore last character: could be upper or lower. */ + path[strlen(path)-1] = '\0'; + if (!strstarts(debugpath, path)) { + fprintf(stderr, + "--debugpath not followed: %s\n", path); + debugpath = NULL; + } + } + free(path); + } + + /* Are we probing? If so, we never fail twice. */ + if (probing) + return call->fail = false; + + /* Don't more than once in the same place. */ + dup = failtable_get(&failtable, call); + if (dup) + return call->fail = false; + + if (failtest_hook) { + switch (failtest_hook(&history)) { + case FAIL_OK: + break; + case FAIL_PROBE: + probing = true; + break; + case FAIL_DONT_FAIL: + call->fail = false; + return false; + default: + abort(); + } } + /* Add it to our table of calls. */ + failtable_add(&failtable, call); + + files = save_files(); + /* We're going to fail in the child. */ call->fail = true; if (pipe(control) != 0 || pipe(output) != 0) @@ -206,13 +596,35 @@ static bool should_fail(struct failtest_call *call) err(1, "forking failed"); if (child == 0) { + if (tracefd != -1) { + struct timeval diff; + const char *p; + char *failpath; + struct failtest_call *c; + + c = tlist_tail(&history, struct failtest_call, list); + diff = time_sub(time_now(), start); + failpath = failpath_string(); + trace("%u->%u (%u.%02u): %s (", getppid(), getpid(), + (int)diff.tv_sec, (int)diff.tv_usec / 10000, + failpath); + free(failpath); + p = strrchr(c->file, '/'); + if (p) + trace("%s", p+1); + else + trace("%s", c->file); + trace(":%u)\n", c->line); + } close(control[0]); close(output[0]); dup2(output[1], STDOUT_FILENO); dup2(output[1], STDERR_FILENO); if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO) close(output[1]); - control_fd = control[1]; + control_fd = move_fd_to_high(control[1]); + /* Valgrind spots the leak if we don't free these. */ + free_files(files); return true; } @@ -237,8 +649,13 @@ static bool should_fail(struct failtest_call *call) else ret = poll(pfd, 2, failtest_timeout_ms); - if (ret <= 0) + if (ret == 0) hand_down(SIGUSR1); + if (ret < 0) { + if (errno == EINTR) + continue; + err(1, "Poll returned %i", ret); + } if (pfd[0].revents & POLLIN) { ssize_t len; @@ -251,6 +668,9 @@ static bool should_fail(struct failtest_call *call) if (type == WRITE) { if (!read_write_info(control[0])) break; + } else if (type == RELEASE_LOCKS) { + release_locks(); + /* FIXME: Tell them we're done... */ } } } else if (pfd[0].revents & POLLHUP) { @@ -261,9 +681,13 @@ static bool should_fail(struct failtest_call *call) close(output[0]); close(control[0]); waitpid(child, &status, 0); - if (!WIFEXITED(status)) - child_fail(out, outlen, "Killed by signal %u: ", - WTERMSIG(status)); + if (!WIFEXITED(status)) { + if (WTERMSIG(status) == SIGUSR1) + child_fail(out, outlen, "Timed out"); + else + child_fail(out, outlen, "Killed by signal %u: ", + WTERMSIG(status)); + } /* Child printed failure already, just pass up exit code. */ if (type == FAILURE) { fprintf(stderr, "%.*s", (int)outlen, out); @@ -277,11 +701,21 @@ static bool should_fail(struct failtest_call *call) free(out); signal(SIGUSR1, SIG_DFL); + restore_files(files); + + /* Only child does probe. */ + probing = false; + /* We continue onwards without failing. */ call->fail = false; return false; } +static void cleanup_calloc(struct calloc_call *call) +{ + free(call->ret); +} + void *failtest_calloc(size_t nmemb, size_t size, const char *file, unsigned line) { @@ -296,11 +730,17 @@ void *failtest_calloc(size_t nmemb, size_t size, p->error = ENOMEM; } else { p->u.calloc.ret = calloc(nmemb, size); + set_cleanup(p, cleanup_calloc, struct calloc_call); } errno = p->error; return p->u.calloc.ret; } +static void cleanup_malloc(struct malloc_call *call) +{ + free(call->ret); +} + void *failtest_malloc(size_t size, const char *file, unsigned line) { struct failtest_call *p; @@ -309,13 +749,51 @@ void *failtest_malloc(size_t size, const char *file, unsigned line) p = add_history(FAILTEST_MALLOC, file, line, &call); if (should_fail(p)) { - p->u.calloc.ret = NULL; + p->u.malloc.ret = NULL; p->error = ENOMEM; } else { - p->u.calloc.ret = malloc(size); + p->u.malloc.ret = malloc(size); + set_cleanup(p, cleanup_malloc, struct malloc_call); } errno = p->error; - return p->u.calloc.ret; + return p->u.malloc.ret; +} + +static void cleanup_realloc(struct realloc_call *call) +{ + free(call->ret); +} + +/* Walk back and find out if we got this ptr from a previous routine. */ +static void fixup_ptr_history(void *ptr) +{ + struct failtest_call *i; + + /* Start at end of history, work back. */ + tlist_for_each_rev(&history, i, list) { + switch (i->type) { + case FAILTEST_REALLOC: + if (i->u.realloc.ret == ptr) { + i->cleanup = NULL; + return; + } + break; + case FAILTEST_MALLOC: + if (i->u.malloc.ret == ptr) { + i->cleanup = NULL; + return; + } + break; + case FAILTEST_CALLOC: + if (i->u.calloc.ret == ptr) { + i->cleanup = NULL; + return; + } + break; + default: + break; + } + } } void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line) @@ -330,41 +808,94 @@ void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line) p->u.realloc.ret = NULL; p->error = ENOMEM; } else { + /* Don't catch this one in the history fixup... */ + p->u.realloc.ret = NULL; + fixup_ptr_history(ptr); p->u.realloc.ret = realloc(ptr, size); + set_cleanup(p, cleanup_realloc, struct realloc_call); } errno = p->error; return p->u.realloc.ret; } -int failtest_open(const char *pathname, int flags, +void failtest_free(void *ptr) +{ + fixup_ptr_history(ptr); + free(ptr); +} + +static void cleanup_open(struct open_call *call) +{ + close(call->ret); +} + +int failtest_open(const char *pathname, const char *file, unsigned line, ...) { struct failtest_call *p; struct open_call call; + va_list ap; call.pathname = strdup(pathname); - call.flags = flags; - if (flags & O_CREAT) { - va_list ap; - va_start(ap, line); - call.mode = va_arg(ap, mode_t); + va_start(ap, line); + call.flags = va_arg(ap, int); + if (call.flags & O_CREAT) { + call.mode = va_arg(ap, int); va_end(ap); } p = add_history(FAILTEST_OPEN, file, line, &call); /* Avoid memory leak! */ if (p == &unrecorded_call) free((char *)call.pathname); - if (should_fail(p)) { + p->u.open.ret = open(pathname, call.flags, call.mode); + + if (p->u.open.ret == -1) { + p->fail = false; + p->error = errno; + } else if (should_fail(p)) { + close(p->u.open.ret); p->u.open.ret = -1; /* FIXME: Play with error codes? */ p->error = EACCES; } else { - p->u.open.ret = open(pathname, flags, call.mode); + set_cleanup(p, cleanup_open, struct open_call); } errno = p->error; return p->u.open.ret; } +void *failtest_mmap(void *addr, size_t length, int prot, int flags, + int fd, off_t offset, const char *file, unsigned line) +{ + struct failtest_call *p; + struct mmap_call call; + + call.addr = addr; + call.length = length; + call.prot = prot; + call.flags = flags; + call.offset = offset; + call.fd = fd; + + p = add_history(FAILTEST_MMAP, file, line, &call); + if (should_fail(p)) { + p->u.mmap.ret = MAP_FAILED; + p->error = ENOMEM; + } else { + p->u.mmap.ret = mmap(addr, length, prot, flags, fd, offset); + } + errno = p->error; + return p->u.mmap.ret; +} + +static void cleanup_pipe(struct pipe_call *call) +{ + if (!call->closed[0]) + close(call->fds[0]); + if (!call->closed[1]) + close(call->fds[1]); +} + int failtest_pipe(int pipefd[2], const char *file, unsigned line) { struct failtest_call *p; @@ -377,6 +908,8 @@ int failtest_pipe(int pipefd[2], const char *file, unsigned line) p->error = EMFILE; } else { p->u.pipe.ret = pipe(p->u.pipe.fds); + p->u.pipe.closed[0] = p->u.pipe.closed[1] = false; + set_cleanup(p, cleanup_pipe, struct pipe_call); } /* This causes valgrind to notice if they use pipefd[] after failure */ memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds)); @@ -384,73 +917,48 @@ int failtest_pipe(int pipefd[2], const char *file, unsigned line) return p->u.pipe.ret; } -ssize_t failtest_read(int fd, void *buf, size_t count, - const char *file, unsigned line) +ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off, + const char *file, unsigned line) { struct failtest_call *p; struct read_call call; call.fd = fd; call.buf = buf; call.count = count; + call.off = off; p = add_history(FAILTEST_READ, file, line, &call); - /* This is going to change seek offset, so save it. */ - if (control_fd != -1) - save_fd_orig(fd); - /* FIXME: Try partial read returns. */ if (should_fail(p)) { p->u.read.ret = -1; p->error = EIO; } else { - p->u.read.ret = read(fd, buf, count); + p->u.read.ret = pread(fd, buf, count, off); } errno = p->error; return p->u.read.ret; } -static struct write_info *new_write(void) -{ - writes = realloc(writes, (writes_num + 1) * sizeof(*writes)); - return &writes[writes_num++]; -} - -ssize_t failtest_write(int fd, const void *buf, size_t count, - const char *file, unsigned line) +ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off, + const char *file, unsigned line) { struct failtest_call *p; struct write_call call; - off_t offset; call.fd = fd; call.buf = buf; call.count = count; + call.off = off; p = add_history(FAILTEST_WRITE, file, line, &call); - offset = lseek(fd, 0, SEEK_CUR); - - /* If we're a child, save contents and tell parent about write. */ - if (control_fd != -1) { - struct write_info *winfo = new_write(); + /* If we're a child, we need to make sure we write the same thing + * to non-files as the parent does, so tell it. */ + if (control_fd != -1 && off == (off_t)-1) { enum info_type type = WRITE; - save_fd_orig(fd); - - winfo->hdr.len = count; - winfo->hdr.fd = fd; - winfo->data = malloc(count); - memcpy(winfo->data, buf, count); - winfo->hdr.offset = offset; - if (winfo->hdr.offset != (off_t)-1) { - lseek(fd, offset, SEEK_SET); - winfo->olddata = malloc(count); - winfo->oldlen = read(fd, winfo->olddata, count); - if (winfo->oldlen == -1) - winfo->oldlen = 0; - } write_all(control_fd, &type, sizeof(type)); - write_all(control_fd, &winfo->hdr, sizeof(winfo->hdr)); - write_all(control_fd, winfo->data, count); + write_all(control_fd, &p->u.write, sizeof(p->u.write)); + write_all(control_fd, buf, count); } /* FIXME: Try partial write returns. */ @@ -459,124 +967,290 @@ ssize_t failtest_write(int fd, const void *buf, size_t count, p->error = EIO; } else { /* FIXME: We assume same write order in parent and child */ - if (child_writes_num != 0) { - if (child_writes[0].hdr.fd != fd) + if (off == (off_t)-1 && child_writes_num != 0) { + if (child_writes[0].fd != fd) errx(1, "Child wrote to fd %u, not %u?", - child_writes[0].hdr.fd, fd); - if (child_writes[0].hdr.offset != offset) + child_writes[0].fd, fd); + if (child_writes[0].off != p->u.write.off) errx(1, "Child wrote to offset %zu, not %zu?", - (size_t)child_writes[0].hdr.offset, - (size_t)offset); - if (child_writes[0].hdr.len != count) + (size_t)child_writes[0].off, + (size_t)p->u.write.off); + if (child_writes[0].count != count) errx(1, "Child wrote length %zu, not %zu?", - child_writes[0].hdr.len, count); - if (memcmp(child_writes[0].data, buf, count)) { + child_writes[0].count, count); + if (memcmp(child_writes[0].buf, buf, count)) { child_fail(NULL, 0, "Child wrote differently to" " fd %u than we did!\n", fd); } - free(child_writes[0].data); + free((char *)child_writes[0].buf); child_writes_num--; memmove(&child_writes[0], &child_writes[1], sizeof(child_writes[0]) * child_writes_num); /* Is this is a socket or pipe, child wrote it already. */ - if (offset == (off_t)-1) { + if (p->u.write.off == (off_t)-1) { p->u.write.ret = count; errno = p->error; return p->u.write.ret; } } - p->u.write.ret = write(fd, buf, count); + p->u.write.ret = pwrite(fd, buf, count, off); } errno = p->error; return p->u.write.ret; } -/* We only trap this so we can dup fds in case we need to restore. */ -int failtest_close(int fd) +ssize_t failtest_read(int fd, void *buf, size_t count, + const char *file, unsigned line) +{ + return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR), + file, line); +} + +ssize_t failtest_write(int fd, const void *buf, size_t count, + const char *file, unsigned line) +{ + return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR), + file, line); +} + +static struct lock_info *WARN_UNUSED_RESULT +add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type) { unsigned int i; - int newfd = -1; + struct lock_info *l; + + for (i = 0; i < lock_num; i++) { + l = &locks[i]; - for (i = 0; i < fd_orig_num; i++) { - if (fd_orig[i].fd == fd) { - fd_orig[i].fd = newfd = dup(fd); - fd_orig[i].dupped = true; + if (l->fd != fd) + continue; + /* Four cases we care about: + * Start overlap: + * l = | | + * new = | | + * Mid overlap: + * l = | | + * new = | | + * End overlap: + * l = | | + * new = | | + * Total overlap: + * l = | | + * new = | | + */ + if (start > l->start && end < l->end) { + /* Mid overlap: trim entry, add new one. */ + off_t new_start, new_end; + new_start = end + 1; + new_end = l->end; + l->end = start - 1; + locks = add_lock(locks, + fd, new_start, new_end, l->type); + l = &locks[i]; + } else if (start <= l->start && end >= l->end) { + /* Total overlap: eliminate entry. */ + l->end = 0; + l->start = 1; + } else if (end >= l->start && end < l->end) { + /* Start overlap: trim entry. */ + l->start = end + 1; + } else if (start > l->start && start <= l->end) { + /* End overlap: trim entry. */ + l->end = start-1; + } + /* Nothing left? Remove it. */ + if (l->end < l->start) { + memmove(l, l + 1, (--lock_num - i) * sizeof(l[0])); + i--; } } - for (i = 0; i < writes_num; i++) { - if (writes[i].hdr.fd == fd) - writes[i].hdr.fd = newfd; + if (type != F_UNLCK) { + locks = realloc(locks, (lock_num + 1) * sizeof(*locks)); + l = &locks[lock_num++]; + l->fd = fd; + l->start = start; + l->end = end; + l->type = type; } - return close(fd); + return locks; } -void failtest_init(int argc, char *argv[]) +/* We trap this so we can record it: we don't fail it. */ +int failtest_close(int fd, const char *file, unsigned line) { - if (argc == 2 - && strncmp(argv[1], "--failpath=", strlen("--failpath=")) == 0) { - failpath = argv[1] + strlen("--failpath="); + struct failtest_call *i; + struct close_call call; + struct failtest_call *p; + + call.fd = fd; + p = add_history(FAILTEST_CLOSE, file, line, &call); + p->fail = false; + + /* Consume close from failpath. */ + if (failpath) + if (should_fail(p)) + abort(); + + if (fd < 0) + return close(fd); + + /* Trace history to find source of fd. */ + tlist_for_each_rev(&history, i, list) { + switch (i->type) { + case FAILTEST_PIPE: + /* From a pipe? */ + if (i->u.pipe.fds[0] == fd) { + assert(!i->u.pipe.closed[0]); + i->u.pipe.closed[0] = true; + if (i->u.pipe.closed[1]) + i->cleanup = NULL; + goto out; + } + if (i->u.pipe.fds[1] == fd) { + assert(!i->u.pipe.closed[1]); + i->u.pipe.closed[1] = true; + if (i->u.pipe.closed[0]) + i->cleanup = NULL; + goto out; + } + break; + case FAILTEST_OPEN: + if (i->u.open.ret == fd) { + assert((void *)i->cleanup + == (void *)cleanup_open); + i->cleanup = NULL; + goto out; + } + break; + default: + break; + } } + +out: + locks = add_lock(locks, fd, 0, off_max(), F_UNLCK); + return close(fd); } -/* Free up memory, so valgrind doesn't report leaks. */ -static void free_everything(void) +/* Zero length means "to end of file" */ +static off_t end_of(off_t start, off_t len) { - unsigned int i; + if (len == 0) + return off_max(); + return start + len - 1; +} + +/* FIXME: This only handles locks, really. */ +int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...) +{ + struct failtest_call *p; + struct fcntl_call call; + va_list ap; - for (i = 0; i < writes_num; i++) { - free(writes[i].data); - if (writes[i].hdr.offset != (off_t)-1) - free(writes[i].olddata); + call.fd = fd; + call.cmd = cmd; + + /* Argument extraction. */ + switch (cmd) { + case F_SETFL: + case F_SETFD: + va_start(ap, cmd); + call.arg.l = va_arg(ap, long); + va_end(ap); + return fcntl(fd, cmd, call.arg.l); + case F_GETFD: + case F_GETFL: + return fcntl(fd, cmd); + case F_GETLK: + get_locks(); + va_start(ap, cmd); + call.arg.fl = *va_arg(ap, struct flock *); + va_end(ap); + return fcntl(fd, cmd, &call.arg.fl); + case F_SETLK: + case F_SETLKW: + va_start(ap, cmd); + call.arg.fl = *va_arg(ap, struct flock *); + va_end(ap); + break; + default: + /* This means you need to implement it here. */ + err(1, "failtest: unknown fcntl %u", cmd); } - free(writes); - free(fd_orig); - for (i = 0; i < history_num; i++) { - if (history[i].type == FAILTEST_OPEN) - free((char *)history[i].u.open.pathname); + + p = add_history(FAILTEST_FCNTL, file, line, &call); + + if (should_fail(p)) { + p->u.fcntl.ret = -1; + if (p->u.fcntl.cmd == F_SETLK) + p->error = EAGAIN; + else + p->error = EDEADLK; + } else { + get_locks(); + p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd, + &p->u.fcntl.arg.fl); + if (p->u.fcntl.ret == -1) + p->error = errno; + else { + /* We don't handle anything else yet. */ + assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET); + locks = add_lock(locks, + p->u.fcntl.fd, + p->u.fcntl.arg.fl.l_start, + end_of(p->u.fcntl.arg.fl.l_start, + p->u.fcntl.arg.fl.l_len), + p->u.fcntl.arg.fl.l_type); + } } - free(history); + errno = p->error; + return p->u.fcntl.ret; } -void failtest_exit(int status) +pid_t failtest_getpid(const char *file, unsigned line) +{ + /* You must call failtest_init first! */ + assert(orig_pid); + return orig_pid; +} + +void failtest_init(int argc, char *argv[]) { unsigned int i; - if (control_fd == -1) { - free_everything(); - exit(status); + orig_pid = getpid(); + + warnfd = move_fd_to_high(dup(STDERR_FILENO)); + for (i = 1; i < argc; i++) { + if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) { + failpath = argv[i] + strlen("--failpath="); + } else if (strcmp(argv[i], "--tracepath") == 0) { + tracefd = warnfd; + failtest_timeout_ms = -1; + } else if (!strncmp(argv[i], "--debugpath=", + strlen("--debugpath="))) { + debugpath = argv[i] + strlen("--debugpath="); + } } + failtable_init(&failtable); + start = time_now(); +} +bool failtest_has_failed(void) +{ + return control_fd != -1; +} + +void failtest_exit(int status) +{ if (failtest_exit_check) { - if (!failtest_exit_check(history, history_num)) + if (!failtest_exit_check(&history)) child_fail(NULL, 0, "failtest_exit_check failed\n"); } - /* Restore any stuff we overwrote. */ - for (i = 0; i < writes_num; i++) { - if (writes[i].hdr.offset == (off_t)-1) - continue; - if (writes[i].oldlen != 0) { - lseek(writes[i].hdr.fd, writes[i].hdr.offset, - SEEK_SET); - write(writes[i].hdr.fd, writes[i].olddata, - writes[i].oldlen); - } - } - - /* Fix up fd offsets, restore sizes. */ - for (i = 0; i < fd_orig_num; i++) { - lseek(fd_orig[i].fd, fd_orig[i].offset, SEEK_SET); - ftruncate(fd_orig[i].fd, fd_orig[i].size); - /* Free up any file descriptors we dup'ed. */ - if (fd_orig[i].dupped) - close(fd_orig[i].fd); - } - - free_everything(); - tell_parent(SUCCESS); - exit(0); + failtest_cleanup(false, status); }