10 #include <sys/types.h>
15 #include <ccan/read_write_all/read_write_all.h>
16 #include <ccan/failtest/failtest_proto.h>
17 #include <ccan/failtest/failtest.h>
18 #include <ccan/build_assert/build_assert.h>
20 bool (*failtest_hook)(struct failtest_call *history, unsigned num)
21 = failtest_default_hook;
23 static int tracefd = -1;
25 unsigned int failtest_timeout_ms = 20000;
37 struct write_info_hdr {
44 struct write_info_hdr hdr;
50 /* end is inclusive: you can't have a 0-byte lock. */
55 bool (*failtest_exit_check)(struct failtest_call *history, unsigned num);
57 static struct failtest_call *history = NULL;
58 static unsigned int history_num = 0;
59 static int control_fd = -1;
60 static struct timeval start;
62 static struct write_info *writes = NULL;
63 static unsigned int writes_num = 0;
65 static struct write_info *child_writes = NULL;
66 static unsigned int child_writes_num = 0;
68 static pid_t lock_owner;
69 static struct lock_info *locks = NULL;
70 static unsigned int lock_num = 0;
72 static const char info_to_arg[] = "mceoprwf";
74 /* Dummy call used for failtest_undo wrappers. */
75 static struct failtest_call unrecorded_call;
77 static struct failtest_call *add_history_(enum failtest_call_type type,
83 /* NULL file is how we suppress failure. */
85 return &unrecorded_call;
87 history = realloc(history, (history_num + 1) * sizeof(*history));
88 history[history_num].type = type;
89 history[history_num].file = file;
90 history[history_num].line = line;
91 history[history_num].cleanup = NULL;
92 memcpy(&history[history_num].u, elem, elem_size);
93 return &history[history_num++];
96 #define add_history(type, file, line, elem) \
97 add_history_((type), (file), (line), (elem), sizeof(*(elem)))
99 #define set_cleanup(call, clean, type) \
100 (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL)), (clean))
102 bool failtest_default_hook(struct failtest_call *history, unsigned num)
107 static bool read_write_info(int fd)
109 struct write_info_hdr hdr;
111 if (!read_all(fd, &hdr, sizeof(hdr)))
114 child_writes = realloc(child_writes,
115 (child_writes_num+1) * sizeof(child_writes[0]));
116 child_writes[child_writes_num].hdr = hdr;
117 child_writes[child_writes_num].data = malloc(hdr.len);
118 if (!read_all(fd, child_writes[child_writes_num].data, hdr.len))
125 static char *failpath_string(void)
128 char *ret = malloc(history_num + 1);
130 for (i = 0; i < history_num; i++) {
131 ret[i] = info_to_arg[history[i].type];
133 ret[i] = toupper(ret[i]);
139 static void tell_parent(enum info_type type)
141 if (control_fd != -1)
142 write_all(control_fd, &type, sizeof(type));
145 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
148 char *path = failpath_string();
151 vfprintf(stderr, fmt, ap);
154 fprintf(stderr, "%.*s", (int)outlen, out);
155 printf("To reproduce: --failpath=%s\n", path);
157 tell_parent(FAILURE);
163 static void hand_down(int signal)
168 static void release_locks(void)
170 /* Locks were never acquired/reacquired? */
174 /* We own them? Release them all. */
175 if (lock_owner == getpid()) {
179 fl.l_whence = SEEK_SET;
183 for (i = 0; i < lock_num; i++)
184 fcntl(locks[i].fd, F_SETLK, &fl);
186 /* Our parent must have them; pass request up. */
187 enum info_type type = RELEASE_LOCKS;
188 assert(control_fd != -1);
189 write_all(control_fd, &type, sizeof(type));
194 /* off_t is a signed type. Getting its max is non-trivial. */
195 static off_t off_max(void)
197 BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
198 if (sizeof(off_t) == 4)
199 return (off_t)0x7FFFFFF;
201 return (off_t)0x7FFFFFFFFFFFFFFULL;
204 static void get_locks(void)
209 if (lock_owner == getpid())
212 if (lock_owner != 0) {
213 enum info_type type = RELEASE_LOCKS;
214 assert(control_fd != -1);
215 write_all(control_fd, &type, sizeof(type));
218 fl.l_whence = SEEK_SET;
220 for (i = 0; i < lock_num; i++) {
221 fl.l_type = locks[i].type;
222 fl.l_start = locks[i].start;
223 if (locks[i].end == off_max())
226 fl.l_len = locks[i].end - locks[i].start + 1;
228 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
231 lock_owner = getpid();
234 static void trace_str(const char *str)
238 while ((ret = write(tracefd, str, strlen(str))) <= 0) {
243 err(1, "Writing trace.");
246 static bool should_fail(struct failtest_call *call)
249 int control[2], output[2];
250 enum info_type type = UNEXPECTED;
254 if (call == &unrecorded_call)
258 /* + means continue after end, like normal. */
259 if (*failpath == '+')
262 if (tolower(*failpath) != info_to_arg[call->type])
263 errx(1, "Failpath expected '%c' got '%c'\n",
264 info_to_arg[call->type], *failpath);
265 call->fail = isupper(*(failpath++));
270 if (!failtest_hook(history, history_num)) {
275 /* We're going to fail in the child. */
277 if (pipe(control) != 0 || pipe(output) != 0)
278 err(1, "opening pipe");
280 /* Prevent double-printing (in child and parent) */
284 err(1, "forking failed");
290 gettimeofday(&now, NULL);
291 if (now.tv_usec < start.tv_usec) {
293 now.tv_usec += 1000000;
295 now.tv_usec -= start.tv_usec;
296 now.tv_sec -= start.tv_sec;
297 sprintf(str, "%u (%u.%02u): ", getpid(),
298 (int)now.tv_sec, (int)now.tv_usec / 10000);
300 p = failpath_string();
304 p = strchr(history[history_num-1].file, '/');
308 trace_str(history[history_num-1].file);
309 sprintf(str, ":%u)\n", history[history_num-1].line);
314 dup2(output[1], STDOUT_FILENO);
315 dup2(output[1], STDERR_FILENO);
316 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
318 control_fd = control[1];
322 signal(SIGUSR1, hand_down);
327 /* We grab output so we can display it; we grab writes so we
330 struct pollfd pfd[2];
333 pfd[0].fd = output[0];
334 pfd[0].events = POLLIN|POLLHUP;
335 pfd[1].fd = control[0];
336 pfd[1].events = POLLIN|POLLHUP;
339 ret = poll(pfd, 1, failtest_timeout_ms);
341 ret = poll(pfd, 2, failtest_timeout_ms);
346 if (pfd[0].revents & POLLIN) {
349 out = realloc(out, outlen + 8192);
350 len = read(output[0], out + outlen, 8192);
352 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
353 if (read_all(control[0], &type, sizeof(type))) {
355 if (!read_write_info(control[0]))
357 } else if (type == RELEASE_LOCKS) {
359 /* FIXME: Tell them we're done... */
362 } else if (pfd[0].revents & POLLHUP) {
365 } while (type != FAILURE);
369 waitpid(child, &status, 0);
370 if (!WIFEXITED(status))
371 child_fail(out, outlen, "Killed by signal %u: ",
373 /* Child printed failure already, just pass up exit code. */
374 if (type == FAILURE) {
375 fprintf(stderr, "%.*s", (int)outlen, out);
377 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
379 if (WEXITSTATUS(status) != 0)
380 child_fail(out, outlen, "Exited with status %i: ",
381 WEXITSTATUS(status));
384 signal(SIGUSR1, SIG_DFL);
386 /* We continue onwards without failing. */
391 static void cleanup_calloc(struct calloc_call *call)
396 void *failtest_calloc(size_t nmemb, size_t size,
397 const char *file, unsigned line)
399 struct failtest_call *p;
400 struct calloc_call call;
403 p = add_history(FAILTEST_CALLOC, file, line, &call);
405 if (should_fail(p)) {
406 p->u.calloc.ret = NULL;
409 p->u.calloc.ret = calloc(nmemb, size);
410 set_cleanup(p, cleanup_calloc, struct calloc_call);
413 return p->u.calloc.ret;
416 static void cleanup_malloc(struct malloc_call *call)
421 void *failtest_malloc(size_t size, const char *file, unsigned line)
423 struct failtest_call *p;
424 struct malloc_call call;
427 p = add_history(FAILTEST_MALLOC, file, line, &call);
428 if (should_fail(p)) {
429 p->u.calloc.ret = NULL;
432 p->u.calloc.ret = malloc(size);
433 set_cleanup(p, cleanup_malloc, struct malloc_call);
436 return p->u.calloc.ret;
439 static void cleanup_realloc(struct realloc_call *call)
444 /* Walk back and find out if we got this ptr from a previous routine. */
445 static void fixup_ptr_history(void *ptr, unsigned int last)
449 /* Start at end of history, work back. */
450 for (i = last - 1; i >= 0; i--) {
451 switch (history[i].type) {
452 case FAILTEST_REALLOC:
453 if (history[i].u.realloc.ret == ptr) {
454 history[i].cleanup = NULL;
458 case FAILTEST_MALLOC:
459 if (history[i].u.malloc.ret == ptr) {
460 history[i].cleanup = NULL;
464 case FAILTEST_CALLOC:
465 if (history[i].u.calloc.ret == ptr) {
466 history[i].cleanup = NULL;
476 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
478 struct failtest_call *p;
479 struct realloc_call call;
481 p = add_history(FAILTEST_REALLOC, file, line, &call);
483 /* FIXME: Try one child moving allocation, one not. */
484 if (should_fail(p)) {
485 p->u.realloc.ret = NULL;
488 fixup_ptr_history(ptr, history_num-1);
489 p->u.realloc.ret = realloc(ptr, size);
490 set_cleanup(p, cleanup_realloc, struct realloc_call);
493 return p->u.realloc.ret;
496 void failtest_free(void *ptr)
498 fixup_ptr_history(ptr, history_num);
502 static void cleanup_open(struct open_call *call)
507 int failtest_open(const char *pathname,
508 const char *file, unsigned line, ...)
510 struct failtest_call *p;
511 struct open_call call;
514 call.pathname = strdup(pathname);
516 call.flags = va_arg(ap, int);
517 if (call.flags & O_CREAT) {
518 call.mode = va_arg(ap, mode_t);
521 p = add_history(FAILTEST_OPEN, file, line, &call);
522 /* Avoid memory leak! */
523 if (p == &unrecorded_call)
524 free((char *)call.pathname);
525 if (should_fail(p)) {
527 /* FIXME: Play with error codes? */
530 p->u.open.ret = open(pathname, call.flags, call.mode);
531 set_cleanup(p, cleanup_open, struct open_call);
532 p->u.open.dup_fd = p->u.open.ret;
535 return p->u.open.ret;
538 static void cleanup_pipe(struct pipe_call *call)
540 if (!call->closed[0])
542 if (!call->closed[1])
546 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
548 struct failtest_call *p;
549 struct pipe_call call;
551 p = add_history(FAILTEST_PIPE, file, line, &call);
552 if (should_fail(p)) {
554 /* FIXME: Play with error codes? */
557 p->u.pipe.ret = pipe(p->u.pipe.fds);
558 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
559 set_cleanup(p, cleanup_pipe, struct pipe_call);
561 /* This causes valgrind to notice if they use pipefd[] after failure */
562 memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
564 return p->u.pipe.ret;
567 static void cleanup_read(struct read_call *call)
569 lseek(call->fd, call->off, SEEK_SET);
572 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
573 const char *file, unsigned line)
575 struct failtest_call *p;
576 struct read_call call;
581 p = add_history(FAILTEST_READ, file, line, &call);
583 /* FIXME: Try partial read returns. */
584 if (should_fail(p)) {
588 p->u.read.ret = pread(fd, buf, count, off);
589 set_cleanup(p, cleanup_read, struct read_call);
592 return p->u.read.ret;
595 static struct write_info *new_write(void)
597 writes = realloc(writes, (writes_num + 1) * sizeof(*writes));
598 return &writes[writes_num++];
601 static void cleanup_write(struct write_call *call)
603 lseek(call->dup_fd, call->off, SEEK_SET);
604 write(call->dup_fd, call->saved_contents, call->saved_len);
605 lseek(call->dup_fd, call->off, SEEK_SET);
606 ftruncate(call->dup_fd, call->old_filelen);
607 free(call->saved_contents);
610 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
611 const char *file, unsigned line)
613 struct failtest_call *p;
614 struct write_call call;
616 call.fd = call.dup_fd = fd;
620 p = add_history(FAILTEST_WRITE, file, line, &call);
622 /* Save old contents if we can */
623 if (p->u.write.off != -1) {
625 p->u.write.old_filelen = lseek(fd, 0, SEEK_END);
627 /* Write past end of file? Nothing to save.*/
628 if (p->u.write.old_filelen <= p->u.write.off)
629 p->u.write.saved_len = 0;
630 /* Write which goes over end of file? Partial save. */
631 else if (p->u.write.off + count > p->u.write.old_filelen)
632 p->u.write.saved_len = p->u.write.old_filelen
636 p->u.write.saved_len = count;
638 p->u.write.saved_contents = malloc(p->u.write.saved_len);
639 lseek(fd, p->u.write.off, SEEK_SET);
640 ret = read(fd, p->u.write.saved_contents, p->u.write.saved_len);
641 if (ret != p->u.write.saved_len)
642 err(1, "Expected %i bytes, got %i",
643 (int)p->u.write.saved_len, (int)ret);
644 lseek(fd, p->u.write.off, SEEK_SET);
645 set_cleanup(p, cleanup_write, struct write_call);
648 /* If we're a child, tell parent about write. */
649 if (control_fd != -1) {
650 struct write_info *winfo = new_write();
651 enum info_type type = WRITE;
653 winfo->hdr.len = count;
655 winfo->data = malloc(count);
656 memcpy(winfo->data, buf, count);
657 winfo->hdr.offset = off;
658 write_all(control_fd, &type, sizeof(type));
659 write_all(control_fd, &winfo->hdr, sizeof(winfo->hdr));
660 write_all(control_fd, winfo->data, count);
663 /* FIXME: Try partial write returns. */
664 if (should_fail(p)) {
668 /* FIXME: We assume same write order in parent and child */
669 if (child_writes_num != 0) {
670 if (child_writes[0].hdr.fd != fd)
671 errx(1, "Child wrote to fd %u, not %u?",
672 child_writes[0].hdr.fd, fd);
673 if (child_writes[0].hdr.offset != p->u.write.off)
674 errx(1, "Child wrote to offset %zu, not %zu?",
675 (size_t)child_writes[0].hdr.offset,
676 (size_t)p->u.write.off);
677 if (child_writes[0].hdr.len != count)
678 errx(1, "Child wrote length %zu, not %zu?",
679 child_writes[0].hdr.len, count);
680 if (memcmp(child_writes[0].data, buf, count)) {
682 "Child wrote differently to"
683 " fd %u than we did!\n", fd);
685 free(child_writes[0].data);
687 memmove(&child_writes[0], &child_writes[1],
688 sizeof(child_writes[0]) * child_writes_num);
690 /* Is this is a socket or pipe, child wrote it
692 if (p->u.write.off == (off_t)-1) {
693 p->u.write.ret = count;
695 return p->u.write.ret;
698 p->u.write.ret = pwrite(fd, buf, count, off);
701 return p->u.write.ret;
704 ssize_t failtest_read(int fd, void *buf, size_t count,
705 const char *file, unsigned line)
707 return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
711 ssize_t failtest_write(int fd, const void *buf, size_t count,
712 const char *file, unsigned line)
714 return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
718 static struct lock_info *WARN_UNUSED_RESULT
719 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
724 for (i = 0; i < lock_num; i++) {
729 /* Four cases we care about:
743 if (start > l->start && end < l->end) {
744 /* Mid overlap: trim entry, add new one. */
745 off_t new_start, new_end;
749 locks = add_lock(locks,
750 fd, new_start, new_end, l->type);
752 } else if (start <= l->start && end >= l->end) {
753 /* Total overlap: eliminate entry. */
756 } else if (end >= l->start && end < l->end) {
757 /* Start overlap: trim entry. */
759 } else if (start > l->start && start <= l->end) {
760 /* End overlap: trim entry. */
763 /* Nothing left? Remove it. */
764 if (l->end < l->start) {
765 memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
770 if (type != F_UNLCK) {
771 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
772 l = &locks[lock_num++];
781 /* We only trap this so we can dup fds in case we need to restore. */
782 int failtest_close(int fd)
789 /* Trace history to find source of fd, and if we need to cleanup writes. */
790 for (i = history_num-1; i >= 0; i--) {
791 switch (history[i].type) {
793 if (history[i].u.write.fd != fd)
795 if (!history[i].cleanup)
797 /* We need to save fd so we can restore file. */
800 history[i].u.write.dup_fd = new_fd;
803 /* We don't need to cleanup reads on closed fds. */
804 if (history[i].u.read.fd != fd)
806 history[i].cleanup = NULL;
809 /* From a pipe? We don't ever restore pipes... */
810 if (history[i].u.pipe.fds[0] == fd) {
811 assert(new_fd == -1);
812 history[i].u.pipe.closed[0] = true;
815 if (history[i].u.pipe.fds[1] == fd) {
816 assert(new_fd == -1);
817 history[i].u.pipe.closed[1] = true;
822 if (history[i].u.open.ret == fd) {
823 history[i].u.open.dup_fd = new_fd;
833 locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
837 /* Zero length means "to end of file" */
838 static off_t end_of(off_t start, off_t len)
842 return start + len - 1;
845 /* FIXME: This only handles locks, really. */
846 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
848 struct failtest_call *p;
849 struct fcntl_call call;
855 /* Argument extraction. */
860 call.arg.l = va_arg(ap, long);
862 return fcntl(fd, cmd, call.arg.l);
865 return fcntl(fd, cmd);
869 call.arg.fl = *va_arg(ap, struct flock *);
871 return fcntl(fd, cmd, &call.arg.fl);
875 call.arg.fl = *va_arg(ap, struct flock *);
879 /* This means you need to implement it here. */
880 err(1, "failtest: unknown fcntl %u", cmd);
883 p = add_history(FAILTEST_FCNTL, file, line, &call);
886 if (should_fail(p)) {
888 if (p->u.fcntl.cmd == F_SETLK)
893 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
895 if (p->u.fcntl.ret == -1)
898 /* We don't handle anything else yet. */
899 assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
900 locks = add_lock(locks,
902 p->u.fcntl.arg.fl.l_start,
903 end_of(p->u.fcntl.arg.fl.l_start,
904 p->u.fcntl.arg.fl.l_len),
905 p->u.fcntl.arg.fl.l_type);
909 return p->u.fcntl.ret;
912 void failtest_init(int argc, char *argv[])
916 for (i = 1; i < argc; i++) {
917 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
918 failpath = argv[i] + strlen("--failpath=");
919 } else if (strcmp(argv[i], "--tracepath") == 0) {
920 tracefd = dup(STDERR_FILENO);
921 failtest_timeout_ms = -1;
924 gettimeofday(&start, NULL);
927 /* Free up memory, so valgrind doesn't report leaks. */
928 static void free_everything(void)
932 for (i = 0; i < writes_num; i++) {
933 free(writes[i].data);
937 /* We don't do this in cleanup: needed even for failed opens. */
938 for (i = 0; i < history_num; i++) {
939 if (history[i].type == FAILTEST_OPEN)
940 free((char *)history[i].u.open.pathname);
945 void failtest_exit(int status)
949 if (control_fd == -1) {
954 if (failtest_exit_check) {
955 if (!failtest_exit_check(history, history_num))
956 child_fail(NULL, 0, "failtest_exit_check failed\n");
959 /* Cleanup everything, in reverse order. */
960 for (i = history_num - 1; i >= 0; i--)
961 if (history[i].cleanup)
962 history[i].cleanup(&history[i].u);
965 tell_parent(SUCCESS);