X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ffailtest%2Ffailtest.c;h=bac0aa350ac24378e8af2b949cb0781a7699906a;hp=2a39679f4827eadcd05b183587534bb040aa1c8a;hb=f18b792838ee010b42091639b673c7b28606aa47;hpb=f0d8f32b2868b84ec7e6bba8ae892950eef491c4

diff --git a/ccan/failtest/failtest.c b/ccan/failtest/failtest.c
index 2a39679f..bac0aa35 100644
--- a/ccan/failtest/failtest.c
+++ b/ccan/failtest/failtest.c
@@ -1,3 +1,5 @@
+/* Licensed under LGPL - see LICENSE file for details */
+#include <ccan/failtest/failtest.h>
 #include <stdarg.h>
 #include <string.h>
 #include <stdio.h>
@@ -11,20 +13,21 @@
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <sys/time.h>
+#include <signal.h>
 #include <assert.h>
+#include <ccan/time/time.h>
 #include <ccan/read_write_all/read_write_all.h>
 #include <ccan/failtest/failtest_proto.h>
-#include <ccan/failtest/failtest.h>
 #include <ccan/build_assert/build_assert.h>
 
-bool (*failtest_hook)(struct failtest_call *history, unsigned num)
-= failtest_default_hook;
+enum failtest_result (*failtest_hook)(struct failtest_call *, unsigned);
 
 static int tracefd = -1;
 
 unsigned int failtest_timeout_ms = 20000;
 
 const char *failpath;
+const char *debugpath;
 
 enum info_type {
 	WRITE,
@@ -47,6 +50,7 @@ static struct failtest_call *history = NULL;
 static unsigned int history_num = 0;
 static int control_fd = -1;
 static struct timeval start;
+static unsigned int probe_count = 0;
 
 static struct write_call *child_writes = NULL;
 static unsigned int child_writes_num = 0;
@@ -55,7 +59,9 @@ static pid_t lock_owner;
 static struct lock_info *locks = NULL;
 static unsigned int lock_num = 0;
 
-static const char info_to_arg[] = "mceoprwf";
+static pid_t orig_pid;
+
+static const char info_to_arg[] = "mceoxprwf";
 
 /* Dummy call used for failtest_undo wrappers. */
 static struct failtest_call unrecorded_call;
@@ -82,13 +88,9 @@ static struct failtest_call *add_history_(enum failtest_call_type type,
 #define add_history(type, file, line, elem) \
 	add_history_((type), (file), (line), (elem), sizeof(*(elem)))
 
+/* We do a fake call inside a sizeof(), to check types. */
 #define set_cleanup(call, clean, type)			\
-	(call)->cleanup = (void *)((void)sizeof(clean((type *)NULL)), (clean))
-
-bool failtest_default_hook(struct failtest_call *history, unsigned num)
-{
-	return true;
-}
+	(call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
 
 static bool read_write_info(int fd)
 {
@@ -146,11 +148,23 @@ static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
 	exit(1);
 }
 
+static void trace(const char *fmt, ...)
+{
+	va_list ap;
+
+	if (tracefd == -1)
+		return;
+
+	va_start(ap, fmt);
+	vdprintf(tracefd, fmt, ap);
+	va_end(ap);
+}
+
 static pid_t child;
 
-static void hand_down(int signal)
+static void hand_down(int signum)
 {
-	kill(child, signal);
+	kill(child, signum);
 }
 
 static void release_locks(void)
@@ -219,40 +233,28 @@ static void get_locks(void)
 	lock_owner = getpid();
 }
 
-static void trace_str(const char *str)
-{
-	ssize_t ret;
-
-	while ((ret = write(tracefd, str, strlen(str))) <= 0) {
-		str += ret;
-		if (!*str)
-			return;
-	}
-	err(1, "Writing trace.");
-}
-
 struct saved_file {
 	struct saved_file *next;
 	int fd;
 	void *contents;
-	off_t len;
+	off_t off, len;
 };
 
 static struct saved_file *save_file(struct saved_file *next, int fd)
 {
 	struct saved_file *s = malloc(sizeof(*s));
-	off_t orig = lseek(fd, 0, SEEK_CUR);
-
-	/* Special file?  Erk... */
-	assert(orig != -1);
 
 	s->next = next;
 	s->fd = fd;
+	s->off = lseek(fd, 0, SEEK_CUR);
+	/* Special file?  Erk... */
+	assert(s->off != -1);
 	s->len = lseek(fd, 0, SEEK_END);
 	lseek(fd, 0, SEEK_SET);
 	s->contents = malloc(s->len);
-	read(fd, s->contents, s->len);
-	lseek(fd, orig, SEEK_SET);
+	if (read(fd, s->contents, s->len) != s->len)
+		err(1, "Failed to save %zu bytes", (size_t)s->len);
+	lseek(fd, s->off, SEEK_SET);
 	return s;
 }
 	
@@ -264,34 +266,26 @@ static struct saved_file *save_files(void)
 {
 	struct saved_file *files = NULL;
 	int i;
-	fd_set closed;
 
 	/* Figure out the set of live fds. */
-	FD_ZERO(&closed);
 	for (i = history_num - 2; i >= 0; i--) {
-		/* FIXME: Handle dup. */
-		if (history[i].type == FAILTEST_CLOSE) {
-			assert(!FD_ISSET(history[i].u.close.fd, &closed));
-			FD_SET(history[i].u.close.fd, &closed);
-		} else if (history[i].type == FAILTEST_OPEN) {
+		if (history[i].type == FAILTEST_OPEN) {
 			int fd = history[i].u.open.ret;
 			/* Only do successful, writable fds. */
 			if (fd < 0)
 				continue;
 
-			/* If it wasn't closed again... */
-			if (!FD_ISSET(fd, &closed)) {
-				if ((history[i].u.open.flags & O_RDWR)
-				    == O_RDWR) {
-					files = save_file(files, fd);
-				} else if ((history[i].u.open.flags & O_WRONLY)
-					   == O_WRONLY) {
-					/* FIXME: Handle O_WRONLY.  Open with
-					 * O_RDWR? */
-					abort();
-				}
-			} else
-				FD_CLR(history[i].u.open.ret, &closed);
+			/* If it was closed, cleanup == NULL. */
+			if (!history[i].cleanup)
+				continue;
+
+			if ((history[i].u.open.flags & O_RDWR) == O_RDWR) {
+				files = save_file(files, fd);
+			} else if ((history[i].u.open.flags & O_WRONLY)
+				   == O_WRONLY) {
+				/* FIXME: Handle O_WRONLY.  Open with O_RDWR? */
+				abort();
+			}
 		}
 	}
 
@@ -302,17 +296,72 @@ static void restore_files(struct saved_file *s)
 {
 	while (s) {
 		struct saved_file *next = s->next;
-		off_t orig = lseek(s->fd, 0, SEEK_CUR);
 
 		lseek(s->fd, 0, SEEK_SET);
-		write(s->fd, s->contents, s->len);
+		if (write(s->fd, s->contents, s->len) != s->len)
+			err(1, "Failed to restore %zu bytes", (size_t)s->len);
+		if (ftruncate(s->fd, s->len) != 0)
+			err(1, "Failed to trim file to length %zu",
+			    (size_t)s->len);
+		free(s->contents);
+		lseek(s->fd, s->off, SEEK_SET);
+		free(s);
+		s = next;
+	}
+}
+
+static void free_files(struct saved_file *s)
+{
+	while (s) {
+		struct saved_file *next = s->next;
 		free(s->contents);
-		lseek(s->fd, orig, SEEK_SET);
 		free(s);
 		s = next;
 	}
 }
 
+/* Free up memory, so valgrind doesn't report leaks. */
+static void free_everything(void)
+{
+	unsigned int i;
+
+	/* We don't do this in cleanup: needed even for failed opens. */
+	for (i = 0; i < history_num; i++) {
+		if (history[i].type == FAILTEST_OPEN)
+			free((char *)history[i].u.open.pathname);
+	}
+	free(history);
+}
+
+static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
+{
+	int i;
+
+	/* For children, we don't care if they "failed" the testing. */
+	if (control_fd != -1)
+		status = 0;
+
+	if (forced_cleanup)
+		history_num--;
+
+	/* Cleanup everything, in reverse order. */
+	for (i = history_num - 1; i >= 0; i--) {
+		if (!history[i].cleanup)
+			continue;
+		if (!forced_cleanup) {
+			printf("Leak at %s:%u: --failpath=%s\n",
+			       history[i].file, history[i].line,
+			       failpath_string());
+			status = 1;
+		}
+		history[i].cleanup(&history[i].u);
+	}
+
+	free_everything();
+	tell_parent(SUCCESS);
+	exit(status);
+}
+
 static bool should_fail(struct failtest_call *call)
 {
 	int status;
@@ -322,6 +371,10 @@ static bool should_fail(struct failtest_call *call)
 	size_t outlen = 0;
 	struct saved_file *files;
 
+	/* Are we probing? */
+	if (probe_count && --probe_count == 0 && control_fd != -1)
+		failtest_cleanup(true, 0);
+
 	if (call == &unrecorded_call)
 		return false;
 
@@ -329,18 +382,65 @@ static bool should_fail(struct failtest_call *call)
 		/* + means continue after end, like normal. */
 		if (*failpath == '+')
 			failpath = NULL;
-		else {
-			if (tolower(*failpath) != info_to_arg[call->type])
+		else if (*failpath == '\0') {
+			/* Continue, but don't inject errors. */
+			return call->fail = false;
+		} else {
+			if (tolower((unsigned char)*failpath)
+			    != info_to_arg[call->type])
 				errx(1, "Failpath expected '%c' got '%c'\n",
 				     info_to_arg[call->type], *failpath);
-			call->fail = isupper(*(failpath++));
+			call->fail = isupper((unsigned char)*(failpath++));
 			return call->fail;
 		}
 	}
 
-	if (!failtest_hook(history, history_num)) {
-		call->fail = false;
-		return false;
+	/* Attach debugger if they asked for it. */
+	if (debugpath && history_num == strlen(debugpath)) {
+		unsigned int i;
+
+		for (i = 0; i < history_num; i++) {
+			unsigned char c = info_to_arg[history[i].type];
+			if (history[i].fail)
+				c = toupper(c);
+			if (c != debugpath[i])
+				break;
+		}
+		if (i == history_num) {
+			char str[80];
+
+			/* Don't timeout. */
+			signal(SIGUSR1, SIG_IGN);
+			sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
+				getpid(), getpid());
+			if (system(str) == 0)
+				sleep(5);
+		}
+	}
+
+	if (failtest_hook) {
+		switch (failtest_hook(history, history_num)) {
+		case FAIL_OK:
+			break;
+		case FAIL_PROBE:
+			/* Already down probe path?  Stop now. */
+			if (!probe_count) {
+				/* FIXME: We should run *parent* and
+				 * run probe until calls match up again. */
+				probe_count = 3;
+				break;
+			} else {
+				/* Child should give up now. */
+				if (control_fd != -1)
+					failtest_cleanup(true, 0);
+				/* Parent, don't fail again. */
+			}
+		case FAIL_DONT_FAIL:
+			call->fail = false;
+			return false;
+		default:
+			abort();
+		}
 	}
 
 	files = save_files();
@@ -358,29 +458,20 @@ static bool should_fail(struct failtest_call *call)
 
 	if (child == 0) {
 		if (tracefd != -1) {
-			struct timeval now;
-			char str[50], *p;
-			gettimeofday(&now, NULL);
-			if (now.tv_usec < start.tv_usec) {
-				now.tv_sec--;
-				now.tv_usec += 1000000;
-			}
-			now.tv_usec -= start.tv_usec;
-			now.tv_sec -= start.tv_sec;
-			sprintf(str, "%u (%u.%02u): ", getpid(),
-				(int)now.tv_sec, (int)now.tv_usec / 10000);
-			trace_str(str);
+			struct timeval diff;
+			const char *p;
+
+			diff = time_sub(time_now(), start);
 			p = failpath_string();
-			trace_str(p);
-			free(p);
-			trace_str("(");
-			p = strchr(history[history_num-1].file, '/');
+			trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
+			      (int)diff.tv_sec, (int)diff.tv_usec / 10000, p);
+			free((char *)p);
+			p = strrchr(history[history_num-1].file, '/');
 			if (p)
-				trace_str(p+1);
+				trace("%s", p+1);
 			else
-				trace_str(history[history_num-1].file);
-			sprintf(str, ":%u)\n", history[history_num-1].line);
-			trace_str(str);
+				trace("%s", history[history_num-1].file);
+			trace(":%u)\n", history[history_num-1].line);
 		}
 		close(control[0]);
 		close(output[0]);
@@ -389,6 +480,8 @@ static bool should_fail(struct failtest_call *call)
 		if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
 			close(output[1]);
 		control_fd = control[1];
+		/* Valgrind spots the leak if we don't free these. */
+		free_files(files);
 		return true;
 	}
 
@@ -413,8 +506,13 @@ static bool should_fail(struct failtest_call *call)
 		else
 			ret = poll(pfd, 2, failtest_timeout_ms);
 
-		if (ret <= 0)
+		if (ret == 0)
 			hand_down(SIGUSR1);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+			err(1, "Poll returned %i", ret);
+		}
 
 		if (pfd[0].revents & POLLIN) {
 			ssize_t len;
@@ -440,9 +538,13 @@ static bool should_fail(struct failtest_call *call)
 	close(output[0]);
 	close(control[0]);
 	waitpid(child, &status, 0);
-	if (!WIFEXITED(status))
-		child_fail(out, outlen, "Killed by signal %u: ",
-			   WTERMSIG(status));
+	if (!WIFEXITED(status)) {
+		if (WTERMSIG(status) == SIGUSR1)
+			child_fail(out, outlen, "Timed out");
+		else
+			child_fail(out, outlen, "Killed by signal %u: ",
+				   WTERMSIG(status));
+	}
 	/* Child printed failure already, just pass up exit code. */
 	if (type == FAILURE) {
 		fprintf(stderr, "%.*s", (int)outlen, out);
@@ -590,21 +692,25 @@ int failtest_open(const char *pathname,
 	va_start(ap, line);
 	call.flags = va_arg(ap, int);
 	if (call.flags & O_CREAT) {
-		call.mode = va_arg(ap, mode_t);
+		call.mode = va_arg(ap, int);
 		va_end(ap);
 	}
 	p = add_history(FAILTEST_OPEN, file, line, &call);
 	/* Avoid memory leak! */
 	if (p == &unrecorded_call)
 		free((char *)call.pathname);
-	if (should_fail(p)) {
+	p->u.open.ret = open(pathname, call.flags, call.mode);
+
+	if (!failpath && p->u.open.ret == -1) {
+		p->fail = false;
+		p->error = errno;
+	} else if (should_fail(p)) {
+		close(p->u.open.ret);
 		p->u.open.ret = -1;
 		/* FIXME: Play with error codes? */
 		p->error = EACCES;
 	} else {
-		p->u.open.ret = open(pathname, call.flags, call.mode);
 		set_cleanup(p, cleanup_open, struct open_call);
-		p->u.open.dup_fd = p->u.open.ret;
 	}
 	errno = p->error;
 	return p->u.open.ret;
@@ -639,11 +745,6 @@ int failtest_pipe(int pipefd[2], const char *file, unsigned line)
 	return p->u.pipe.ret;
 }
 
-static void cleanup_read(struct read_call *call)
-{
-	lseek(call->fd, call->off, SEEK_SET);
-}
-
 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
 		       const char *file, unsigned line)
 {
@@ -661,61 +762,26 @@ ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
 		p->error = EIO;
 	} else {
 		p->u.read.ret = pread(fd, buf, count, off);
-		set_cleanup(p, cleanup_read, struct read_call);
 	}
 	errno = p->error;
 	return p->u.read.ret;
 }
 
-static void cleanup_write(struct write_call *call)
-{
-	lseek(call->dup_fd, call->off, SEEK_SET);
-	write(call->dup_fd, call->saved_contents, call->saved_len);
-	lseek(call->dup_fd, call->off, SEEK_SET);
-	ftruncate(call->dup_fd, call->old_filelen);
-	free(call->saved_contents);
-}
-
 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
 			const char *file, unsigned line)
 {
 	struct failtest_call *p;
 	struct write_call call;
 
-	call.fd = call.dup_fd = fd;
+	call.fd = fd;
 	call.buf = buf;
 	call.count = count;
 	call.off = off;
 	p = add_history(FAILTEST_WRITE, file, line, &call);
 
-	/* Save old contents if we can */
-	if (p->u.write.off != -1) {
-		ssize_t ret;
-		p->u.write.old_filelen = lseek(fd, 0, SEEK_END);
-
-		/* Write past end of file?  Nothing to save.*/
-		if (p->u.write.old_filelen <= p->u.write.off)
-			p->u.write.saved_len = 0;
-		/* Write which goes over end of file?  Partial save. */
-		else if (p->u.write.off + count > p->u.write.old_filelen)
-			p->u.write.saved_len = p->u.write.old_filelen
-				- p->u.write.off;
-		/* Full save. */
-		else
-			p->u.write.saved_len = count;
-
-		p->u.write.saved_contents = malloc(p->u.write.saved_len);
-		lseek(fd, p->u.write.off, SEEK_SET);
-		ret = read(fd, p->u.write.saved_contents, p->u.write.saved_len);
-		if (ret != p->u.write.saved_len)
-			err(1, "Expected %i bytes, got %i",
-			    (int)p->u.write.saved_len, (int)ret);
-		lseek(fd, p->u.write.off, SEEK_SET);
-		set_cleanup(p, cleanup_write, struct write_call);
-	}
-
-	/* If we're a child, tell parent about write. */
-	if (control_fd != -1) {
+	/* If we're a child, we need to make sure we write the same thing
+	 * to non-files as the parent does, so tell it. */
+	if (control_fd != -1 && off == (off_t)-1) {
 		enum info_type type = WRITE;
 
 		write_all(control_fd, &type, sizeof(type));
@@ -729,7 +795,7 @@ ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
 		p->error = EIO;
 	} else {
 		/* FIXME: We assume same write order in parent and child */
-		if (child_writes_num != 0) {
+		if (off == (off_t)-1 && child_writes_num != 0) {
 			if (child_writes[0].fd != fd)
 				errx(1, "Child wrote to fd %u, not %u?",
 				     child_writes[0].fd, fd);
@@ -841,49 +907,50 @@ add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
 	return locks;
 }
 
-/* We only trap this so we can dup fds in case we need to restore. */
-int failtest_close(int fd)
+/* We trap this so we can record it: we don't fail it. */
+int failtest_close(int fd, const char *file, unsigned line)
 {
-	int new_fd = -1, i;
+	int i;
+	struct close_call call;
+	struct failtest_call *p;
+
+	call.fd = fd;
+	p = add_history(FAILTEST_CLOSE, file, line, &call);
+	p->fail = false;
+
+	/* Consume close from failpath. */
+	if (failpath)
+		if (should_fail(p))
+			abort();
 
 	if (fd < 0)
 		return close(fd);
 
-	/* Trace history to find source of fd, and if we need to cleanup writes. */
+	/* Trace history to find source of fd. */
 	for (i = history_num-1; i >= 0; i--) {
 		switch (history[i].type) {
-		case FAILTEST_WRITE:
-			if (history[i].u.write.fd != fd)
-				break;
-			if (!history[i].cleanup)
-				break;
-			/* We need to save fd so we can restore file. */
-			if (new_fd == -1)
-				new_fd = dup(fd);
-			history[i].u.write.dup_fd = new_fd;
-			break;
-		case FAILTEST_READ:
-			/* We don't need to cleanup reads on closed fds. */
-			if (history[i].u.read.fd != fd)
-				break;
-			history[i].cleanup = NULL;
-			break;
 		case FAILTEST_PIPE:
-			/* From a pipe?  We don't ever restore pipes... */
+			/* From a pipe? */
 			if (history[i].u.pipe.fds[0] == fd) {
-				assert(new_fd == -1);
+				assert(!history[i].u.pipe.closed[0]);
 				history[i].u.pipe.closed[0] = true;
+				if (history[i].u.pipe.closed[1])
+					history[i].cleanup = NULL;
 				goto out;
 			}
 			if (history[i].u.pipe.fds[1] == fd) {
-				assert(new_fd == -1);
+				assert(!history[i].u.pipe.closed[1]);
 				history[i].u.pipe.closed[1] = true;
+				if (history[i].u.pipe.closed[0])
+					history[i].cleanup = NULL;
 				goto out;
 			}
 			break;
 		case FAILTEST_OPEN:
 			if (history[i].u.open.ret == fd) {
-				history[i].u.open.dup_fd = new_fd;
+				assert((void *)history[i].cleanup
+				       == (void *)cleanup_open);
+				history[i].cleanup = NULL;
 				goto out;
 			}
 			break;
@@ -944,7 +1011,6 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
 	}
 
 	p = add_history(FAILTEST_FCNTL, file, line, &call);
-	get_locks();
 
 	if (should_fail(p)) {
 		p->u.fcntl.ret = -1;
@@ -953,6 +1019,7 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
 		else
 			p->error = EDEADLK;
 	} else {
+		get_locks();
 		p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
 				       &p->u.fcntl.arg.fl);
 		if (p->u.fcntl.ret == -1)
@@ -972,54 +1039,44 @@ int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
 	return p->u.fcntl.ret;
 }
 
+pid_t failtest_getpid(const char *file, unsigned line)
+{
+	/* You must call failtest_init first! */
+	assert(orig_pid);
+	return orig_pid;
+}
+	
 void failtest_init(int argc, char *argv[])
 {
 	unsigned int i;
 
+	orig_pid = getpid();
+		
 	for (i = 1; i < argc; i++) {
 		if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
 			failpath = argv[i] + strlen("--failpath=");
 		} else if (strcmp(argv[i], "--tracepath") == 0) {
 			tracefd = dup(STDERR_FILENO);
 			failtest_timeout_ms = -1;
+		} else if (!strncmp(argv[i], "--debugpath=",
+				    strlen("--debugpath="))) {
+			debugpath = argv[i] + strlen("--debugpath=");
 		}
 	}
-	gettimeofday(&start, NULL);
+	start = time_now();
 }
 
-/* Free up memory, so valgrind doesn't report leaks. */
-static void free_everything(void)
+bool failtest_has_failed(void)
 {
-	unsigned int i;
-
-	/* We don't do this in cleanup: needed even for failed opens. */
-	for (i = 0; i < history_num; i++) {
-		if (history[i].type == FAILTEST_OPEN)
-			free((char *)history[i].u.open.pathname);
-	}
-	free(history);
+	return control_fd != -1;
 }
 
 void failtest_exit(int status)
 {
-	int i;
-
-	if (control_fd == -1) {
-		free_everything();
-		exit(status);
-	}
-
 	if (failtest_exit_check) {
 		if (!failtest_exit_check(history, history_num))
 			child_fail(NULL, 0, "failtest_exit_check failed\n");
 	}
 
-	/* Cleanup everything, in reverse order. */
-	for (i = history_num - 1; i >= 0; i--)
-		if (history[i].cleanup)
-			history[i].cleanup(&history[i].u);
-
-	free_everything();
-	tell_parent(SUCCESS);
-	exit(0);
+	failtest_cleanup(false, status);
 }