]> git.ozlabs.org Git - ccan/blob - ccan/failtest/failtest.c
failtest: override getpid() as well.
[ccan] / ccan / failtest / failtest.c
1 #include "config.h"
2 #include <stdarg.h>
3 #include <string.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include <ctype.h>
7 #include <err.h>
8 #include <unistd.h>
9 #include <poll.h>
10 #include <errno.h>
11 #include <sys/types.h>
12 #include <sys/wait.h>
13 #include <sys/stat.h>
14 #include <sys/time.h>
15 #include <signal.h>
16 #include <assert.h>
17 #include <ccan/read_write_all/read_write_all.h>
18 #include <ccan/failtest/failtest_proto.h>
19 #include <ccan/failtest/failtest.h>
20 #include <ccan/build_assert/build_assert.h>
21
22 enum failtest_result (*failtest_hook)(struct failtest_call *, unsigned);
23
24 static int tracefd = -1;
25
26 unsigned int failtest_timeout_ms = 20000;
27
28 const char *failpath;
29 const char *debugpath;
30
31 enum info_type {
32         WRITE,
33         RELEASE_LOCKS,
34         FAILURE,
35         SUCCESS,
36         UNEXPECTED
37 };
38
39 struct lock_info {
40         int fd;
41         /* end is inclusive: you can't have a 0-byte lock. */
42         off_t start, end;
43         int type;
44 };
45
46 bool (*failtest_exit_check)(struct failtest_call *history, unsigned num);
47
48 static struct failtest_call *history = NULL;
49 static unsigned int history_num = 0;
50 static int control_fd = -1;
51 static struct timeval start;
52 static unsigned int probe_count = 0;
53
54 static struct write_call *child_writes = NULL;
55 static unsigned int child_writes_num = 0;
56
57 static pid_t lock_owner;
58 static struct lock_info *locks = NULL;
59 static unsigned int lock_num = 0;
60
61 static pid_t orig_pid;
62
63 static const char info_to_arg[] = "mceoxprwf";
64
65 /* Dummy call used for failtest_undo wrappers. */
66 static struct failtest_call unrecorded_call;
67
68 static struct failtest_call *add_history_(enum failtest_call_type type,
69                                           const char *file,
70                                           unsigned int line,
71                                           const void *elem,
72                                           size_t elem_size)
73 {
74         /* NULL file is how we suppress failure. */
75         if (!file)
76                 return &unrecorded_call;
77
78         history = realloc(history, (history_num + 1) * sizeof(*history));
79         history[history_num].type = type;
80         history[history_num].file = file;
81         history[history_num].line = line;
82         history[history_num].cleanup = NULL;
83         memcpy(&history[history_num].u, elem, elem_size);
84         return &history[history_num++];
85 }
86
87 #define add_history(type, file, line, elem) \
88         add_history_((type), (file), (line), (elem), sizeof(*(elem)))
89
90 /* We do a fake call inside a sizeof(), to check types. */
91 #define set_cleanup(call, clean, type)                  \
92         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
93
94 static bool read_write_info(int fd)
95 {
96         struct write_call *w;
97         char *buf;
98
99         /* We don't need all of this, but it's simple. */
100         child_writes = realloc(child_writes,
101                                (child_writes_num+1) * sizeof(child_writes[0]));
102         w = &child_writes[child_writes_num];
103         if (!read_all(fd, w, sizeof(*w)))
104                 return false;
105
106         w->buf = buf = malloc(w->count);
107         if (!read_all(fd, buf, w->count))
108                 return false;
109
110         child_writes_num++;
111         return true;
112 }
113
114 static char *failpath_string(void)
115 {
116         unsigned int i;
117         char *ret = malloc(history_num + 1);
118
119         for (i = 0; i < history_num; i++) {
120                 ret[i] = info_to_arg[history[i].type];
121                 if (history[i].fail)
122                         ret[i] = toupper(ret[i]);
123         }
124         ret[i] = '\0';
125         return ret;
126 }
127
128 static void tell_parent(enum info_type type)
129 {
130         if (control_fd != -1)
131                 write_all(control_fd, &type, sizeof(type));
132 }
133
134 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
135 {
136         va_list ap;
137         char *path = failpath_string();
138
139         va_start(ap, fmt);
140         vfprintf(stderr, fmt, ap);
141         va_end(ap);
142
143         fprintf(stderr, "%.*s", (int)outlen, out);
144         printf("To reproduce: --failpath=%s\n", path);
145         free(path);
146         tell_parent(FAILURE);
147         exit(1);
148 }
149
150 static void trace(const char *fmt, ...)
151 {
152         va_list ap;
153
154         if (tracefd == -1)
155                 return;
156
157         va_start(ap, fmt);
158         vdprintf(tracefd, fmt, ap);
159         va_end(ap);
160 }
161
162 static pid_t child;
163
164 static void hand_down(int signum)
165 {
166         kill(child, signum);
167 }
168
169 static void release_locks(void)
170 {
171         /* Locks were never acquired/reacquired? */
172         if (lock_owner == 0)
173                 return;
174
175         /* We own them?  Release them all. */
176         if (lock_owner == getpid()) {
177                 unsigned int i;
178                 struct flock fl;
179                 fl.l_type = F_UNLCK;
180                 fl.l_whence = SEEK_SET;
181                 fl.l_start = 0;
182                 fl.l_len = 0;
183
184                 for (i = 0; i < lock_num; i++)
185                         fcntl(locks[i].fd, F_SETLK, &fl);
186         } else {
187                 /* Our parent must have them; pass request up. */
188                 enum info_type type = RELEASE_LOCKS;
189                 assert(control_fd != -1);
190                 write_all(control_fd, &type, sizeof(type));
191         }
192         lock_owner = 0;
193 }
194
195 /* off_t is a signed type.  Getting its max is non-trivial. */
196 static off_t off_max(void)
197 {
198         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
199         if (sizeof(off_t) == 4)
200                 return (off_t)0x7FFFFFF;
201         else
202                 return (off_t)0x7FFFFFFFFFFFFFFULL;
203 }
204
205 static void get_locks(void)
206 {
207         unsigned int i;
208         struct flock fl;
209
210         if (lock_owner == getpid())
211                 return;
212
213         if (lock_owner != 0) {
214                 enum info_type type = RELEASE_LOCKS;
215                 assert(control_fd != -1);
216                 write_all(control_fd, &type, sizeof(type));
217         }
218
219         fl.l_whence = SEEK_SET;
220
221         for (i = 0; i < lock_num; i++) {
222                 fl.l_type = locks[i].type;
223                 fl.l_start = locks[i].start;
224                 if (locks[i].end == off_max())
225                         fl.l_len = 0;
226                 else
227                         fl.l_len = locks[i].end - locks[i].start + 1;
228
229                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
230                         abort();
231         }
232         lock_owner = getpid();
233 }
234
235 struct saved_file {
236         struct saved_file *next;
237         int fd;
238         void *contents;
239         off_t off, len;
240 };
241
242 static struct saved_file *save_file(struct saved_file *next, int fd)
243 {
244         struct saved_file *s = malloc(sizeof(*s));
245
246         s->next = next;
247         s->fd = fd;
248         s->off = lseek(fd, 0, SEEK_CUR);
249         /* Special file?  Erk... */
250         assert(s->off != -1);
251         s->len = lseek(fd, 0, SEEK_END);
252         lseek(fd, 0, SEEK_SET);
253         s->contents = malloc(s->len);
254         if (read(fd, s->contents, s->len) != s->len)
255                 err(1, "Failed to save %zu bytes", (size_t)s->len);
256         lseek(fd, s->off, SEEK_SET);
257         return s;
258 }
259         
260 /* We have little choice but to save and restore open files: mmap means we
261  * can really intercept changes in the child.
262  *
263  * We could do non-mmap'ed files on demand, however. */
264 static struct saved_file *save_files(void)
265 {
266         struct saved_file *files = NULL;
267         int i;
268
269         /* Figure out the set of live fds. */
270         for (i = history_num - 2; i >= 0; i--) {
271                 if (history[i].type == FAILTEST_OPEN) {
272                         int fd = history[i].u.open.ret;
273                         /* Only do successful, writable fds. */
274                         if (fd < 0)
275                                 continue;
276
277                         /* If it was closed, cleanup == NULL. */
278                         if (!history[i].cleanup)
279                                 continue;
280
281                         if ((history[i].u.open.flags & O_RDWR) == O_RDWR) {
282                                 files = save_file(files, fd);
283                         } else if ((history[i].u.open.flags & O_WRONLY)
284                                    == O_WRONLY) {
285                                 /* FIXME: Handle O_WRONLY.  Open with O_RDWR? */
286                                 abort();
287                         }
288                 }
289         }
290
291         return files;
292 }
293
294 static void restore_files(struct saved_file *s)
295 {
296         while (s) {
297                 struct saved_file *next = s->next;
298
299                 lseek(s->fd, 0, SEEK_SET);
300                 if (write(s->fd, s->contents, s->len) != s->len)
301                         err(1, "Failed to restore %zu bytes", (size_t)s->len);
302                 if (ftruncate(s->fd, s->len) != 0)
303                         err(1, "Failed to trim file to length %zu",
304                             (size_t)s->len);
305                 free(s->contents);
306                 lseek(s->fd, s->off, SEEK_SET);
307                 free(s);
308                 s = next;
309         }
310 }
311
312 /* Free up memory, so valgrind doesn't report leaks. */
313 static void free_everything(void)
314 {
315         unsigned int i;
316
317         /* We don't do this in cleanup: needed even for failed opens. */
318         for (i = 0; i < history_num; i++) {
319                 if (history[i].type == FAILTEST_OPEN)
320                         free((char *)history[i].u.open.pathname);
321         }
322         free(history);
323 }
324
325 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
326 {
327         int i;
328
329         /* For children, we don't care if they "failed" the testing. */
330         if (control_fd != -1)
331                 status = 0;
332
333         if (forced_cleanup)
334                 history_num--;
335
336         /* Cleanup everything, in reverse order. */
337         for (i = history_num - 1; i >= 0; i--) {
338                 if (!history[i].cleanup)
339                         continue;
340                 if (!forced_cleanup) {
341                         printf("Leak at %s:%u: --failpath=%s\n",
342                                history[i].file, history[i].line,
343                                failpath_string());
344                         status = 1;
345                 }
346                 history[i].cleanup(&history[i].u);
347         }
348
349         free_everything();
350         tell_parent(SUCCESS);
351         exit(status);
352 }
353
354 static bool should_fail(struct failtest_call *call)
355 {
356         int status;
357         int control[2], output[2];
358         enum info_type type = UNEXPECTED;
359         char *out = NULL;
360         size_t outlen = 0;
361         struct saved_file *files;
362
363         /* Are we probing? */
364         if (probe_count && --probe_count == 0)
365                 failtest_cleanup(true, 0);
366
367         if (call == &unrecorded_call)
368                 return false;
369
370         if (failpath) {
371                 /* + means continue after end, like normal. */
372                 if (*failpath == '+')
373                         failpath = NULL;
374                 else if (*failpath == '\0') {
375                         /* Continue, but don't inject errors. */
376                         return call->fail = false;
377                 } else {
378                         if (tolower((unsigned char)*failpath)
379                             != info_to_arg[call->type])
380                                 errx(1, "Failpath expected '%c' got '%c'\n",
381                                      info_to_arg[call->type], *failpath);
382                         call->fail = isupper((unsigned char)*(failpath++));
383                         return call->fail;
384                 }
385         }
386
387         /* Attach debugger if they asked for it. */
388         if (debugpath && history_num == strlen(debugpath)) {
389                 unsigned int i;
390
391                 for (i = 0; i < history_num; i++) {
392                         unsigned char c = info_to_arg[history[i].type];
393                         if (history[i].fail)
394                                 c = toupper(c);
395                         if (c != debugpath[i])
396                                 break;
397                 }
398                 if (i == history_num) {
399                         char str[80];
400
401                         /* Don't timeout. */
402                         signal(SIGUSR1, SIG_IGN);
403                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
404                                 getpid(), getpid());
405                         if (system(str) == 0)
406                                 sleep(5);
407                 }
408         }
409
410         if (failtest_hook) {
411                 switch (failtest_hook(history, history_num)) {
412                 case FAIL_OK:
413                         break;
414                 case FAIL_DONT_FAIL:
415                         call->fail = false;
416                         return false;
417                 case FAIL_PROBE:
418                         /* Already down probe path?  Stop now. */
419                         if (probe_count)
420                                 failtest_cleanup(true, 0);
421                         /* FIXME: We should run *parent* and run probe until
422                          * calls match up again. */
423                         probe_count = 3;
424                         break;
425                 default:
426                         abort();
427                 }
428         }
429
430         files = save_files();
431
432         /* We're going to fail in the child. */
433         call->fail = true;
434         if (pipe(control) != 0 || pipe(output) != 0)
435                 err(1, "opening pipe");
436
437         /* Prevent double-printing (in child and parent) */
438         fflush(stdout);
439         child = fork();
440         if (child == -1)
441                 err(1, "forking failed");
442
443         if (child == 0) {
444                 if (tracefd != -1) {
445                         struct timeval now;
446                         const char *p;
447                         gettimeofday(&now, NULL);
448                         if (now.tv_usec < start.tv_usec) {
449                                 now.tv_sec--;
450                                 now.tv_usec += 1000000;
451                         }
452                         now.tv_usec -= start.tv_usec;
453                         now.tv_sec -= start.tv_sec;
454                         p = failpath_string();
455                         trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
456                               (int)now.tv_sec, (int)now.tv_usec / 10000, p);
457                         free((char *)p);
458                         p = strrchr(history[history_num-1].file, '/');
459                         if (p)
460                                 trace("%s", p+1);
461                         else
462                                 trace("%s", history[history_num-1].file);
463                         trace(":%u)\n", history[history_num-1].line);
464                 }
465                 close(control[0]);
466                 close(output[0]);
467                 dup2(output[1], STDOUT_FILENO);
468                 dup2(output[1], STDERR_FILENO);
469                 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
470                         close(output[1]);
471                 control_fd = control[1];
472                 return true;
473         }
474
475         signal(SIGUSR1, hand_down);
476
477         close(control[1]);
478         close(output[1]);
479
480         /* We grab output so we can display it; we grab writes so we
481          * can compare. */
482         do {
483                 struct pollfd pfd[2];
484                 int ret;
485
486                 pfd[0].fd = output[0];
487                 pfd[0].events = POLLIN|POLLHUP;
488                 pfd[1].fd = control[0];
489                 pfd[1].events = POLLIN|POLLHUP;
490
491                 if (type == SUCCESS)
492                         ret = poll(pfd, 1, failtest_timeout_ms);
493                 else
494                         ret = poll(pfd, 2, failtest_timeout_ms);
495
496                 if (ret == 0)
497                         hand_down(SIGUSR1);
498                 if (ret < 0) {
499                         if (errno == EINTR)
500                                 continue;
501                         err(1, "Poll returned %i", ret);
502                 }
503
504                 if (pfd[0].revents & POLLIN) {
505                         ssize_t len;
506
507                         out = realloc(out, outlen + 8192);
508                         len = read(output[0], out + outlen, 8192);
509                         outlen += len;
510                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
511                         if (read_all(control[0], &type, sizeof(type))) {
512                                 if (type == WRITE) {
513                                         if (!read_write_info(control[0]))
514                                                 break;
515                                 } else if (type == RELEASE_LOCKS) {
516                                         release_locks();
517                                         /* FIXME: Tell them we're done... */
518                                 }
519                         }
520                 } else if (pfd[0].revents & POLLHUP) {
521                         break;
522                 }
523         } while (type != FAILURE);
524
525         close(output[0]);
526         close(control[0]);
527         waitpid(child, &status, 0);
528         if (!WIFEXITED(status)) {
529                 if (WTERMSIG(status) == SIGUSR1)
530                         child_fail(out, outlen, "Timed out");
531                 else
532                         child_fail(out, outlen, "Killed by signal %u: ",
533                                    WTERMSIG(status));
534         }
535         /* Child printed failure already, just pass up exit code. */
536         if (type == FAILURE) {
537                 fprintf(stderr, "%.*s", (int)outlen, out);
538                 tell_parent(type);
539                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
540         }
541         if (WEXITSTATUS(status) != 0)
542                 child_fail(out, outlen, "Exited with status %i: ",
543                            WEXITSTATUS(status));
544
545         free(out);
546         signal(SIGUSR1, SIG_DFL);
547
548         restore_files(files);
549
550         /* We continue onwards without failing. */
551         call->fail = false;
552         return false;
553 }
554
555 static void cleanup_calloc(struct calloc_call *call)
556 {
557         free(call->ret);
558 }
559
560 void *failtest_calloc(size_t nmemb, size_t size,
561                       const char *file, unsigned line)
562 {
563         struct failtest_call *p;
564         struct calloc_call call;
565         call.nmemb = nmemb;
566         call.size = size;
567         p = add_history(FAILTEST_CALLOC, file, line, &call);
568
569         if (should_fail(p)) {
570                 p->u.calloc.ret = NULL;
571                 p->error = ENOMEM;
572         } else {
573                 p->u.calloc.ret = calloc(nmemb, size);
574                 set_cleanup(p, cleanup_calloc, struct calloc_call);
575         }
576         errno = p->error;
577         return p->u.calloc.ret;
578 }
579
580 static void cleanup_malloc(struct malloc_call *call)
581 {
582         free(call->ret);
583 }
584
585 void *failtest_malloc(size_t size, const char *file, unsigned line)
586 {
587         struct failtest_call *p;
588         struct malloc_call call;
589         call.size = size;
590
591         p = add_history(FAILTEST_MALLOC, file, line, &call);
592         if (should_fail(p)) {
593                 p->u.calloc.ret = NULL;
594                 p->error = ENOMEM;
595         } else {
596                 p->u.calloc.ret = malloc(size);
597                 set_cleanup(p, cleanup_malloc, struct malloc_call);
598         }
599         errno = p->error;
600         return p->u.calloc.ret;
601 }
602
603 static void cleanup_realloc(struct realloc_call *call)
604 {
605         free(call->ret);
606 }
607
608 /* Walk back and find out if we got this ptr from a previous routine. */
609 static void fixup_ptr_history(void *ptr, unsigned int last)
610 {
611         int i;
612
613         /* Start at end of history, work back. */
614         for (i = last - 1; i >= 0; i--) {
615                 switch (history[i].type) {
616                 case FAILTEST_REALLOC:
617                         if (history[i].u.realloc.ret == ptr) {
618                                 history[i].cleanup = NULL;
619                                 return;
620                         }
621                         break;
622                 case FAILTEST_MALLOC:
623                         if (history[i].u.malloc.ret == ptr) {
624                                 history[i].cleanup = NULL;
625                                 return;
626                         }
627                         break;
628                 case FAILTEST_CALLOC:
629                         if (history[i].u.calloc.ret == ptr) {
630                                 history[i].cleanup = NULL;
631                                 return;
632                         }
633                         break;
634                 default:
635                         break;
636                 }
637         }
638 }
639
640 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
641 {
642         struct failtest_call *p;
643         struct realloc_call call;
644         call.size = size;
645         p = add_history(FAILTEST_REALLOC, file, line, &call);
646
647         /* FIXME: Try one child moving allocation, one not. */
648         if (should_fail(p)) {
649                 p->u.realloc.ret = NULL;
650                 p->error = ENOMEM;
651         } else {
652                 fixup_ptr_history(ptr, history_num-1);
653                 p->u.realloc.ret = realloc(ptr, size);
654                 set_cleanup(p, cleanup_realloc, struct realloc_call);
655         }
656         errno = p->error;
657         return p->u.realloc.ret;
658 }
659
660 void failtest_free(void *ptr)
661 {
662         fixup_ptr_history(ptr, history_num);
663         free(ptr);
664 }
665
666 static void cleanup_open(struct open_call *call)
667 {
668         close(call->ret);
669 }
670
671 int failtest_open(const char *pathname,
672                   const char *file, unsigned line, ...)
673 {
674         struct failtest_call *p;
675         struct open_call call;
676         va_list ap;
677
678         call.pathname = strdup(pathname);
679         va_start(ap, line);
680         call.flags = va_arg(ap, int);
681         if (call.flags & O_CREAT) {
682                 call.mode = va_arg(ap, int);
683                 va_end(ap);
684         }
685         p = add_history(FAILTEST_OPEN, file, line, &call);
686         /* Avoid memory leak! */
687         if (p == &unrecorded_call)
688                 free((char *)call.pathname);
689         p->u.open.ret = open(pathname, call.flags, call.mode);
690
691         if (!failpath && p->u.open.ret == -1) {
692                 p->fail = false;
693                 p->error = errno;
694         } else if (should_fail(p)) {
695                 close(p->u.open.ret);
696                 p->u.open.ret = -1;
697                 /* FIXME: Play with error codes? */
698                 p->error = EACCES;
699         } else {
700                 set_cleanup(p, cleanup_open, struct open_call);
701         }
702         errno = p->error;
703         return p->u.open.ret;
704 }
705
706 static void cleanup_pipe(struct pipe_call *call)
707 {
708         if (!call->closed[0])
709                 close(call->fds[0]);
710         if (!call->closed[1])
711                 close(call->fds[1]);
712 }
713
714 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
715 {
716         struct failtest_call *p;
717         struct pipe_call call;
718
719         p = add_history(FAILTEST_PIPE, file, line, &call);
720         if (should_fail(p)) {
721                 p->u.open.ret = -1;
722                 /* FIXME: Play with error codes? */
723                 p->error = EMFILE;
724         } else {
725                 p->u.pipe.ret = pipe(p->u.pipe.fds);
726                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
727                 set_cleanup(p, cleanup_pipe, struct pipe_call);
728         }
729         /* This causes valgrind to notice if they use pipefd[] after failure */
730         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
731         errno = p->error;
732         return p->u.pipe.ret;
733 }
734
735 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
736                        const char *file, unsigned line)
737 {
738         struct failtest_call *p;
739         struct read_call call;
740         call.fd = fd;
741         call.buf = buf;
742         call.count = count;
743         call.off = off;
744         p = add_history(FAILTEST_READ, file, line, &call);
745
746         /* FIXME: Try partial read returns. */
747         if (should_fail(p)) {
748                 p->u.read.ret = -1;
749                 p->error = EIO;
750         } else {
751                 p->u.read.ret = pread(fd, buf, count, off);
752         }
753         errno = p->error;
754         return p->u.read.ret;
755 }
756
757 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
758                         const char *file, unsigned line)
759 {
760         struct failtest_call *p;
761         struct write_call call;
762
763         call.fd = fd;
764         call.buf = buf;
765         call.count = count;
766         call.off = off;
767         p = add_history(FAILTEST_WRITE, file, line, &call);
768
769         /* If we're a child, we need to make sure we write the same thing
770          * to non-files as the parent does, so tell it. */
771         if (control_fd != -1 && off == (off_t)-1) {
772                 enum info_type type = WRITE;
773
774                 write_all(control_fd, &type, sizeof(type));
775                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
776                 write_all(control_fd, buf, count);
777         }
778
779         /* FIXME: Try partial write returns. */
780         if (should_fail(p)) {
781                 p->u.write.ret = -1;
782                 p->error = EIO;
783         } else {
784                 /* FIXME: We assume same write order in parent and child */
785                 if (off == (off_t)-1 && child_writes_num != 0) {
786                         if (child_writes[0].fd != fd)
787                                 errx(1, "Child wrote to fd %u, not %u?",
788                                      child_writes[0].fd, fd);
789                         if (child_writes[0].off != p->u.write.off)
790                                 errx(1, "Child wrote to offset %zu, not %zu?",
791                                      (size_t)child_writes[0].off,
792                                      (size_t)p->u.write.off);
793                         if (child_writes[0].count != count)
794                                 errx(1, "Child wrote length %zu, not %zu?",
795                                      child_writes[0].count, count);
796                         if (memcmp(child_writes[0].buf, buf, count)) {
797                                 child_fail(NULL, 0,
798                                            "Child wrote differently to"
799                                            " fd %u than we did!\n", fd);
800                         }
801                         free((char *)child_writes[0].buf);
802                         child_writes_num--;
803                         memmove(&child_writes[0], &child_writes[1],
804                                 sizeof(child_writes[0]) * child_writes_num);
805
806                         /* Is this is a socket or pipe, child wrote it
807                            already. */
808                         if (p->u.write.off == (off_t)-1) {
809                                 p->u.write.ret = count;
810                                 errno = p->error;
811                                 return p->u.write.ret;
812                         }
813                 }
814                 p->u.write.ret = pwrite(fd, buf, count, off);
815         }
816         errno = p->error;
817         return p->u.write.ret;
818 }
819
820 ssize_t failtest_read(int fd, void *buf, size_t count,
821                       const char *file, unsigned line)
822 {
823         return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
824                               file, line);
825 }
826
827 ssize_t failtest_write(int fd, const void *buf, size_t count,
828                        const char *file, unsigned line)
829 {
830         return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
831                                file, line);
832 }
833
834 static struct lock_info *WARN_UNUSED_RESULT
835 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
836 {
837         unsigned int i;
838         struct lock_info *l;
839
840         for (i = 0; i < lock_num; i++) {
841                 l = &locks[i];
842
843                 if (l->fd != fd)
844                         continue;
845                 /* Four cases we care about:
846                  * Start overlap:
847                  *      l =    |      |
848                  *      new = |   |
849                  * Mid overlap:
850                  *      l =    |      |
851                  *      new =    |  |
852                  * End overlap:
853                  *      l =    |      |
854                  *      new =      |    |
855                  * Total overlap:
856                  *      l =    |      |
857                  *      new = |         |
858                  */
859                 if (start > l->start && end < l->end) {
860                         /* Mid overlap: trim entry, add new one. */
861                         off_t new_start, new_end;
862                         new_start = end + 1;
863                         new_end = l->end;
864                         l->end = start - 1;
865                         locks = add_lock(locks,
866                                          fd, new_start, new_end, l->type);
867                         l = &locks[i];
868                 } else if (start <= l->start && end >= l->end) {
869                         /* Total overlap: eliminate entry. */
870                         l->end = 0;
871                         l->start = 1;
872                 } else if (end >= l->start && end < l->end) {
873                         /* Start overlap: trim entry. */
874                         l->start = end + 1;
875                 } else if (start > l->start && start <= l->end) {
876                         /* End overlap: trim entry. */
877                         l->end = start-1;
878                 }
879                 /* Nothing left?  Remove it. */
880                 if (l->end < l->start) {
881                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
882                         i--;
883                 }
884         }
885
886         if (type != F_UNLCK) {
887                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
888                 l = &locks[lock_num++];
889                 l->fd = fd;
890                 l->start = start;
891                 l->end = end;
892                 l->type = type;
893         }
894         return locks;
895 }
896
897 /* We trap this so we can record it: we don't fail it. */
898 int failtest_close(int fd, const char *file, unsigned line)
899 {
900         int i;
901         struct close_call call;
902         struct failtest_call *p;
903
904         call.fd = fd;
905         p = add_history(FAILTEST_CLOSE, file, line, &call);
906         p->fail = false;
907
908         /* Consume close from failpath. */
909         if (failpath)
910                 if (should_fail(p))
911                         abort();
912
913         if (fd < 0)
914                 return close(fd);
915
916         /* Trace history to find source of fd. */
917         for (i = history_num-1; i >= 0; i--) {
918                 switch (history[i].type) {
919                 case FAILTEST_PIPE:
920                         /* From a pipe? */
921                         if (history[i].u.pipe.fds[0] == fd) {
922                                 assert(!history[i].u.pipe.closed[0]);
923                                 history[i].u.pipe.closed[0] = true;
924                                 if (history[i].u.pipe.closed[1])
925                                         history[i].cleanup = NULL;
926                                 goto out;
927                         }
928                         if (history[i].u.pipe.fds[1] == fd) {
929                                 assert(!history[i].u.pipe.closed[1]);
930                                 history[i].u.pipe.closed[1] = true;
931                                 if (history[i].u.pipe.closed[0])
932                                         history[i].cleanup = NULL;
933                                 goto out;
934                         }
935                         break;
936                 case FAILTEST_OPEN:
937                         if (history[i].u.open.ret == fd) {
938                                 assert((void *)history[i].cleanup
939                                        == (void *)cleanup_open);
940                                 history[i].cleanup = NULL;
941                                 goto out;
942                         }
943                         break;
944                 default:
945                         break;
946                 }
947         }
948
949 out:
950         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
951         return close(fd);
952 }
953
954 /* Zero length means "to end of file" */
955 static off_t end_of(off_t start, off_t len)
956 {
957         if (len == 0)
958                 return off_max();
959         return start + len - 1;
960 }
961
962 /* FIXME: This only handles locks, really. */
963 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
964 {
965         struct failtest_call *p;
966         struct fcntl_call call;
967         va_list ap;
968
969         call.fd = fd;
970         call.cmd = cmd;
971
972         /* Argument extraction. */
973         switch (cmd) {
974         case F_SETFL:
975         case F_SETFD:
976                 va_start(ap, cmd);
977                 call.arg.l = va_arg(ap, long);
978                 va_end(ap);
979                 return fcntl(fd, cmd, call.arg.l);
980         case F_GETFD:
981         case F_GETFL:
982                 return fcntl(fd, cmd);
983         case F_GETLK:
984                 get_locks();
985                 va_start(ap, cmd);
986                 call.arg.fl = *va_arg(ap, struct flock *);
987                 va_end(ap);
988                 return fcntl(fd, cmd, &call.arg.fl);
989         case F_SETLK:
990         case F_SETLKW:
991                 va_start(ap, cmd);
992                 call.arg.fl = *va_arg(ap, struct flock *);
993                 va_end(ap);
994                 break;
995         default:
996                 /* This means you need to implement it here. */
997                 err(1, "failtest: unknown fcntl %u", cmd);
998         }
999
1000         p = add_history(FAILTEST_FCNTL, file, line, &call);
1001
1002         if (should_fail(p)) {
1003                 p->u.fcntl.ret = -1;
1004                 if (p->u.fcntl.cmd == F_SETLK)
1005                         p->error = EAGAIN;
1006                 else
1007                         p->error = EDEADLK;
1008         } else {
1009                 get_locks();
1010                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1011                                        &p->u.fcntl.arg.fl);
1012                 if (p->u.fcntl.ret == -1)
1013                         p->error = errno;
1014                 else {
1015                         /* We don't handle anything else yet. */
1016                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1017                         locks = add_lock(locks,
1018                                          p->u.fcntl.fd,
1019                                          p->u.fcntl.arg.fl.l_start,
1020                                          end_of(p->u.fcntl.arg.fl.l_start,
1021                                                 p->u.fcntl.arg.fl.l_len),
1022                                          p->u.fcntl.arg.fl.l_type);
1023                 }
1024         }
1025         errno = p->error;
1026         return p->u.fcntl.ret;
1027 }
1028
1029 pid_t failtest_getpid(const char *file, unsigned line)
1030 {
1031         /* You must call failtest_init first! */
1032         assert(orig_pid);
1033         return orig_pid;
1034 }
1035         
1036 void failtest_init(int argc, char *argv[])
1037 {
1038         unsigned int i;
1039
1040         orig_pid = getpid();
1041                 
1042         for (i = 1; i < argc; i++) {
1043                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1044                         failpath = argv[i] + strlen("--failpath=");
1045                 } else if (strcmp(argv[i], "--tracepath") == 0) {
1046                         tracefd = dup(STDERR_FILENO);
1047                         failtest_timeout_ms = -1;
1048                 } else if (!strncmp(argv[i], "--debugpath=",
1049                                     strlen("--debugpath="))) {
1050                         debugpath = argv[i] + strlen("--debugpath=");
1051                 }
1052         }
1053         gettimeofday(&start, NULL);
1054 }
1055
1056 void failtest_exit(int status)
1057 {
1058         if (failtest_exit_check) {
1059                 if (!failtest_exit_check(history, history_num))
1060                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1061         }
1062
1063         failtest_cleanup(false, status);
1064 }