failtest: use a linked list for history, not an array.
[ccan] / ccan / failtest / failtest.c
1 /* Licensed under LGPL - see LICENSE file for details */
2 #include <ccan/failtest/failtest.h>
3 #include <stdarg.h>
4 #include <string.h>
5 #include <stdio.h>
6 #include <stdarg.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <unistd.h>
10 #include <poll.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <sys/time.h>
16 #include <signal.h>
17 #include <assert.h>
18 #include <ccan/time/time.h>
19 #include <ccan/read_write_all/read_write_all.h>
20 #include <ccan/failtest/failtest_proto.h>
21 #include <ccan/build_assert/build_assert.h>
22 #include <ccan/str/str.h>
23
24 enum failtest_result (*failtest_hook)(struct tlist_calls *);
25
26 static int tracefd = -1;
27
28 unsigned int failtest_timeout_ms = 20000;
29
30 const char *failpath;
31 const char *debugpath;
32
33 enum info_type {
34         WRITE,
35         RELEASE_LOCKS,
36         FAILURE,
37         SUCCESS,
38         UNEXPECTED
39 };
40
41 struct lock_info {
42         int fd;
43         /* end is inclusive: you can't have a 0-byte lock. */
44         off_t start, end;
45         int type;
46 };
47
48 bool (*failtest_exit_check)(struct tlist_calls *history);
49
50 static struct tlist_calls history = TLIST_INIT(history);
51 static int control_fd = -1;
52 static struct timeval start;
53 static unsigned int probe_count = 0;
54
55 static struct write_call *child_writes = NULL;
56 static unsigned int child_writes_num = 0;
57
58 static pid_t lock_owner;
59 static struct lock_info *locks = NULL;
60 static unsigned int lock_num = 0;
61
62 static pid_t orig_pid;
63
64 static const char info_to_arg[] = "mceoxprwf";
65
66 /* Dummy call used for failtest_undo wrappers. */
67 static struct failtest_call unrecorded_call;
68
69 static struct failtest_call *add_history_(enum failtest_call_type type,
70                                           const char *file,
71                                           unsigned int line,
72                                           const void *elem,
73                                           size_t elem_size)
74 {
75         struct failtest_call *call;
76
77         /* NULL file is how we suppress failure. */
78         if (!file)
79                 return &unrecorded_call;
80
81         call = malloc(sizeof *call);
82         call->type = type;
83         call->file = file;
84         call->line = line;
85         call->cleanup = NULL;
86         memcpy(&call->u, elem, elem_size);
87         tlist_add_tail(&history, call, list);
88         return call;
89 }
90
91 #define add_history(type, file, line, elem) \
92         add_history_((type), (file), (line), (elem), sizeof(*(elem)))
93
94 /* We do a fake call inside a sizeof(), to check types. */
95 #define set_cleanup(call, clean, type)                  \
96         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
97
98 static bool read_write_info(int fd)
99 {
100         struct write_call *w;
101         char *buf;
102
103         /* We don't need all of this, but it's simple. */
104         child_writes = realloc(child_writes,
105                                (child_writes_num+1) * sizeof(child_writes[0]));
106         w = &child_writes[child_writes_num];
107         if (!read_all(fd, w, sizeof(*w)))
108                 return false;
109
110         w->buf = buf = malloc(w->count);
111         if (!read_all(fd, buf, w->count))
112                 return false;
113
114         child_writes_num++;
115         return true;
116 }
117
118 static char *failpath_string(void)
119 {
120         struct failtest_call *i;
121         char *ret = strdup("");
122         unsigned len = 0;
123
124         /* Inefficient, but who cares? */
125         tlist_for_each(&history, i, list) {
126                 ret = realloc(ret, len + 2);
127                 ret[len] = info_to_arg[i->type];
128                 if (i->fail)
129                         ret[len] = toupper(ret[len]);
130                 ret[++len] = '\0';
131         }
132         return ret;
133 }
134
135 static void tell_parent(enum info_type type)
136 {
137         if (control_fd != -1)
138                 write_all(control_fd, &type, sizeof(type));
139 }
140
141 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
142 {
143         va_list ap;
144         char *path = failpath_string();
145
146         va_start(ap, fmt);
147         vfprintf(stderr, fmt, ap);
148         va_end(ap);
149
150         fprintf(stderr, "%.*s", (int)outlen, out);
151         printf("To reproduce: --failpath=%s\n", path);
152         free(path);
153         tell_parent(FAILURE);
154         exit(1);
155 }
156
157 static void trace(const char *fmt, ...)
158 {
159         va_list ap;
160
161         if (tracefd == -1)
162                 return;
163
164         va_start(ap, fmt);
165         vdprintf(tracefd, fmt, ap);
166         va_end(ap);
167 }
168
169 static pid_t child;
170
171 static void hand_down(int signum)
172 {
173         kill(child, signum);
174 }
175
176 static void release_locks(void)
177 {
178         /* Locks were never acquired/reacquired? */
179         if (lock_owner == 0)
180                 return;
181
182         /* We own them?  Release them all. */
183         if (lock_owner == getpid()) {
184                 unsigned int i;
185                 struct flock fl;
186                 fl.l_type = F_UNLCK;
187                 fl.l_whence = SEEK_SET;
188                 fl.l_start = 0;
189                 fl.l_len = 0;
190
191                 for (i = 0; i < lock_num; i++)
192                         fcntl(locks[i].fd, F_SETLK, &fl);
193         } else {
194                 /* Our parent must have them; pass request up. */
195                 enum info_type type = RELEASE_LOCKS;
196                 assert(control_fd != -1);
197                 write_all(control_fd, &type, sizeof(type));
198         }
199         lock_owner = 0;
200 }
201
202 /* off_t is a signed type.  Getting its max is non-trivial. */
203 static off_t off_max(void)
204 {
205         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
206         if (sizeof(off_t) == 4)
207                 return (off_t)0x7FFFFFF;
208         else
209                 return (off_t)0x7FFFFFFFFFFFFFFULL;
210 }
211
212 static void get_locks(void)
213 {
214         unsigned int i;
215         struct flock fl;
216
217         if (lock_owner == getpid())
218                 return;
219
220         if (lock_owner != 0) {
221                 enum info_type type = RELEASE_LOCKS;
222                 assert(control_fd != -1);
223                 write_all(control_fd, &type, sizeof(type));
224         }
225
226         fl.l_whence = SEEK_SET;
227
228         for (i = 0; i < lock_num; i++) {
229                 fl.l_type = locks[i].type;
230                 fl.l_start = locks[i].start;
231                 if (locks[i].end == off_max())
232                         fl.l_len = 0;
233                 else
234                         fl.l_len = locks[i].end - locks[i].start + 1;
235
236                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
237                         abort();
238         }
239         lock_owner = getpid();
240 }
241
242 struct saved_file {
243         struct saved_file *next;
244         int fd;
245         void *contents;
246         off_t off, len;
247 };
248
249 static struct saved_file *save_file(struct saved_file *next, int fd)
250 {
251         struct saved_file *s = malloc(sizeof(*s));
252
253         s->next = next;
254         s->fd = fd;
255         s->off = lseek(fd, 0, SEEK_CUR);
256         /* Special file?  Erk... */
257         assert(s->off != -1);
258         s->len = lseek(fd, 0, SEEK_END);
259         lseek(fd, 0, SEEK_SET);
260         s->contents = malloc(s->len);
261         if (read(fd, s->contents, s->len) != s->len)
262                 err(1, "Failed to save %zu bytes", (size_t)s->len);
263         lseek(fd, s->off, SEEK_SET);
264         return s;
265 }
266         
267 /* We have little choice but to save and restore open files: mmap means we
268  * can really intercept changes in the child.
269  *
270  * We could do non-mmap'ed files on demand, however. */
271 static struct saved_file *save_files(void)
272 {
273         struct saved_file *files = NULL;
274         struct failtest_call *i;
275
276         /* Figure out the set of live fds. */
277         tlist_for_each_rev(&history, i, list) {
278                 if (i->type == FAILTEST_OPEN) {
279                         int fd = i->u.open.ret;
280                         /* Only do successful, writable fds. */
281                         if (fd < 0)
282                                 continue;
283
284                         /* If it was closed, cleanup == NULL. */
285                         if (!i->cleanup)
286                                 continue;
287
288                         if ((i->u.open.flags & O_RDWR) == O_RDWR) {
289                                 files = save_file(files, fd);
290                         } else if ((i->u.open.flags & O_WRONLY)
291                                    == O_WRONLY) {
292                                 /* FIXME: Handle O_WRONLY.  Open with O_RDWR? */
293                                 abort();
294                         }
295                 }
296         }
297
298         return files;
299 }
300
301 static void restore_files(struct saved_file *s)
302 {
303         while (s) {
304                 struct saved_file *next = s->next;
305
306                 lseek(s->fd, 0, SEEK_SET);
307                 if (write(s->fd, s->contents, s->len) != s->len)
308                         err(1, "Failed to restore %zu bytes", (size_t)s->len);
309                 if (ftruncate(s->fd, s->len) != 0)
310                         err(1, "Failed to trim file to length %zu",
311                             (size_t)s->len);
312                 free(s->contents);
313                 lseek(s->fd, s->off, SEEK_SET);
314                 free(s);
315                 s = next;
316         }
317 }
318
319 static void free_files(struct saved_file *s)
320 {
321         while (s) {
322                 struct saved_file *next = s->next;
323                 free(s->contents);
324                 free(s);
325                 s = next;
326         }
327 }
328
329 static void free_call(struct failtest_call *call)
330 {
331         /* We don't do this in cleanup: needed even for failed opens. */
332         if (call->type == FAILTEST_OPEN)
333                 free((char *)call->u.open.pathname);
334         tlist_del_from(&history, call, list);
335         free(call);
336 }
337
338 /* Free up memory, so valgrind doesn't report leaks. */
339 static void free_everything(void)
340 {
341         struct failtest_call *i;
342
343         while ((i = tlist_top(&history, struct failtest_call, list)) != NULL)
344                 free_call(i);
345 }
346
347 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
348 {
349         struct failtest_call *i;
350
351         /* For children, we don't care if they "failed" the testing. */
352         if (control_fd != -1)
353                 status = 0;
354
355         if (forced_cleanup) {
356                 /* We didn't actually do final operation: remove it. */
357                 i = tlist_tail(&history, struct failtest_call, list);
358                 free_call(i);
359         }
360
361         /* Cleanup everything, in reverse order. */
362         tlist_for_each_rev(&history, i, list) {
363                 if (!i->cleanup)
364                         continue;
365                 if (!forced_cleanup) {
366                         printf("Leak at %s:%u: --failpath=%s\n",
367                                i->file, i->line, failpath_string());
368                         status = 1;
369                 }
370                 i->cleanup(&i->u);
371         }
372
373         free_everything();
374         tell_parent(SUCCESS);
375         exit(status);
376 }
377
378 static bool should_fail(struct failtest_call *call)
379 {
380         int status;
381         int control[2], output[2];
382         enum info_type type = UNEXPECTED;
383         char *out = NULL;
384         size_t outlen = 0;
385         struct saved_file *files;
386
387         /* Are we probing? */
388         if (probe_count && --probe_count == 0 && control_fd != -1)
389                 failtest_cleanup(true, 0);
390
391         if (call == &unrecorded_call)
392                 return false;
393
394         if (failpath) {
395                 /* + means continue after end, like normal. */
396                 if (*failpath == '+')
397                         failpath = NULL;
398                 else if (*failpath == '\0') {
399                         /* Continue, but don't inject errors. */
400                         return call->fail = false;
401                 } else {
402                         if (tolower((unsigned char)*failpath)
403                             != info_to_arg[call->type])
404                                 errx(1, "Failpath expected '%c' got '%c'\n",
405                                      info_to_arg[call->type], *failpath);
406                         call->fail = cisupper(*(failpath++));
407                         return call->fail;
408                 }
409         }
410
411         /* Attach debugger if they asked for it. */
412         if (debugpath) {
413                 char *path = failpath_string();
414
415                 if (streq(path, debugpath)) {
416                         char str[80];
417
418                         /* Don't timeout. */
419                         signal(SIGUSR1, SIG_IGN);
420                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
421                                 getpid(), getpid());
422                         if (system(str) == 0)
423                                 sleep(5);
424                 } else if (!strstarts(path, debugpath)) {
425                         fprintf(stderr, "--debugpath not followed: %s\n", path);
426                         debugpath = NULL;
427                 }
428                 free(path);
429         }
430
431         if (failtest_hook) {
432                 switch (failtest_hook(&history)) {
433                 case FAIL_OK:
434                         break;
435                 case FAIL_PROBE:
436                         /* Already down probe path?  Stop now. */
437                         if (!probe_count) {
438                                 /* FIXME: We should run *parent* and
439                                  * run probe until calls match up again. */
440                                 probe_count = 3;
441                                 break;
442                         } else {
443                                 /* Child should give up now. */
444                                 if (control_fd != -1)
445                                         failtest_cleanup(true, 0);
446                                 /* Parent, don't fail again. */
447                         }
448                 case FAIL_DONT_FAIL:
449                         call->fail = false;
450                         return false;
451                 default:
452                         abort();
453                 }
454         }
455
456         files = save_files();
457
458         /* We're going to fail in the child. */
459         call->fail = true;
460         if (pipe(control) != 0 || pipe(output) != 0)
461                 err(1, "opening pipe");
462
463         /* Prevent double-printing (in child and parent) */
464         fflush(stdout);
465         child = fork();
466         if (child == -1)
467                 err(1, "forking failed");
468
469         if (child == 0) {
470                 if (tracefd != -1) {
471                         struct timeval diff;
472                         const char *p;
473                         char *failpath;
474                         struct failtest_call *c;
475
476                         c = tlist_tail(&history, struct failtest_call, list);
477                         diff = time_sub(time_now(), start);
478                         failpath = failpath_string();
479                         trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
480                               (int)diff.tv_sec, (int)diff.tv_usec / 10000,
481                               failpath);
482                         free(failpath);
483                         p = strrchr(c->file, '/');
484                         if (p)
485                                 trace("%s", p+1);
486                         else
487                                 trace("%s", c->file);
488                         trace(":%u)\n", c->line);
489                 }
490                 close(control[0]);
491                 close(output[0]);
492                 dup2(output[1], STDOUT_FILENO);
493                 dup2(output[1], STDERR_FILENO);
494                 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
495                         close(output[1]);
496                 control_fd = control[1];
497                 /* Valgrind spots the leak if we don't free these. */
498                 free_files(files);
499                 return true;
500         }
501
502         signal(SIGUSR1, hand_down);
503
504         close(control[1]);
505         close(output[1]);
506
507         /* We grab output so we can display it; we grab writes so we
508          * can compare. */
509         do {
510                 struct pollfd pfd[2];
511                 int ret;
512
513                 pfd[0].fd = output[0];
514                 pfd[0].events = POLLIN|POLLHUP;
515                 pfd[1].fd = control[0];
516                 pfd[1].events = POLLIN|POLLHUP;
517
518                 if (type == SUCCESS)
519                         ret = poll(pfd, 1, failtest_timeout_ms);
520                 else
521                         ret = poll(pfd, 2, failtest_timeout_ms);
522
523                 if (ret == 0)
524                         hand_down(SIGUSR1);
525                 if (ret < 0) {
526                         if (errno == EINTR)
527                                 continue;
528                         err(1, "Poll returned %i", ret);
529                 }
530
531                 if (pfd[0].revents & POLLIN) {
532                         ssize_t len;
533
534                         out = realloc(out, outlen + 8192);
535                         len = read(output[0], out + outlen, 8192);
536                         outlen += len;
537                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
538                         if (read_all(control[0], &type, sizeof(type))) {
539                                 if (type == WRITE) {
540                                         if (!read_write_info(control[0]))
541                                                 break;
542                                 } else if (type == RELEASE_LOCKS) {
543                                         release_locks();
544                                         /* FIXME: Tell them we're done... */
545                                 }
546                         }
547                 } else if (pfd[0].revents & POLLHUP) {
548                         break;
549                 }
550         } while (type != FAILURE);
551
552         close(output[0]);
553         close(control[0]);
554         waitpid(child, &status, 0);
555         if (!WIFEXITED(status)) {
556                 if (WTERMSIG(status) == SIGUSR1)
557                         child_fail(out, outlen, "Timed out");
558                 else
559                         child_fail(out, outlen, "Killed by signal %u: ",
560                                    WTERMSIG(status));
561         }
562         /* Child printed failure already, just pass up exit code. */
563         if (type == FAILURE) {
564                 fprintf(stderr, "%.*s", (int)outlen, out);
565                 tell_parent(type);
566                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
567         }
568         if (WEXITSTATUS(status) != 0)
569                 child_fail(out, outlen, "Exited with status %i: ",
570                            WEXITSTATUS(status));
571
572         free(out);
573         signal(SIGUSR1, SIG_DFL);
574
575         restore_files(files);
576
577         /* We continue onwards without failing. */
578         call->fail = false;
579         return false;
580 }
581
582 static void cleanup_calloc(struct calloc_call *call)
583 {
584         free(call->ret);
585 }
586
587 void *failtest_calloc(size_t nmemb, size_t size,
588                       const char *file, unsigned line)
589 {
590         struct failtest_call *p;
591         struct calloc_call call;
592         call.nmemb = nmemb;
593         call.size = size;
594         p = add_history(FAILTEST_CALLOC, file, line, &call);
595
596         if (should_fail(p)) {
597                 p->u.calloc.ret = NULL;
598                 p->error = ENOMEM;
599         } else {
600                 p->u.calloc.ret = calloc(nmemb, size);
601                 set_cleanup(p, cleanup_calloc, struct calloc_call);
602         }
603         errno = p->error;
604         return p->u.calloc.ret;
605 }
606
607 static void cleanup_malloc(struct malloc_call *call)
608 {
609         free(call->ret);
610 }
611
612 void *failtest_malloc(size_t size, const char *file, unsigned line)
613 {
614         struct failtest_call *p;
615         struct malloc_call call;
616         call.size = size;
617
618         p = add_history(FAILTEST_MALLOC, file, line, &call);
619         if (should_fail(p)) {
620                 p->u.malloc.ret = NULL;
621                 p->error = ENOMEM;
622         } else {
623                 p->u.malloc.ret = malloc(size);
624                 set_cleanup(p, cleanup_malloc, struct malloc_call);
625         }
626         errno = p->error;
627         return p->u.malloc.ret;
628 }
629
630 static void cleanup_realloc(struct realloc_call *call)
631 {
632         free(call->ret);
633 }
634
635 /* Walk back and find out if we got this ptr from a previous routine. */
636 static void fixup_ptr_history(void *ptr)
637 {
638         struct failtest_call *i;
639
640         /* Start at end of history, work back. */
641         tlist_for_each_rev(&history, i, list) {
642                 switch (i->type) {
643                 case FAILTEST_REALLOC:
644                         if (i->u.realloc.ret == ptr) {
645                                 i->cleanup = NULL;
646                                 return;
647                         }
648                         break;
649                 case FAILTEST_MALLOC:
650                         if (i->u.malloc.ret == ptr) {
651                                 i->cleanup = NULL;
652                                 return;
653                         }
654                         break;
655                 case FAILTEST_CALLOC:
656                         if (i->u.calloc.ret == ptr) {
657                                 i->cleanup = NULL;
658                                 return;
659                         }
660                         break;
661                 default:
662                         break;
663                 }
664         }
665 }
666
667 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
668 {
669         struct failtest_call *p;
670         struct realloc_call call;
671         call.size = size;
672         p = add_history(FAILTEST_REALLOC, file, line, &call);
673
674         /* FIXME: Try one child moving allocation, one not. */
675         if (should_fail(p)) {
676                 p->u.realloc.ret = NULL;
677                 p->error = ENOMEM;
678         } else {
679                 /* Don't catch this one in the history fixup... */
680                 p->u.realloc.ret = NULL;
681                 fixup_ptr_history(ptr);
682                 p->u.realloc.ret = realloc(ptr, size);
683                 set_cleanup(p, cleanup_realloc, struct realloc_call);
684         }
685         errno = p->error;
686         return p->u.realloc.ret;
687 }
688
689 void failtest_free(void *ptr)
690 {
691         fixup_ptr_history(ptr);
692         free(ptr);
693 }
694
695 static void cleanup_open(struct open_call *call)
696 {
697         close(call->ret);
698 }
699
700 int failtest_open(const char *pathname,
701                   const char *file, unsigned line, ...)
702 {
703         struct failtest_call *p;
704         struct open_call call;
705         va_list ap;
706
707         call.pathname = strdup(pathname);
708         va_start(ap, line);
709         call.flags = va_arg(ap, int);
710         if (call.flags & O_CREAT) {
711                 call.mode = va_arg(ap, int);
712                 va_end(ap);
713         }
714         p = add_history(FAILTEST_OPEN, file, line, &call);
715         /* Avoid memory leak! */
716         if (p == &unrecorded_call)
717                 free((char *)call.pathname);
718         p->u.open.ret = open(pathname, call.flags, call.mode);
719
720         if (p->u.open.ret == -1) {
721                 p->fail = false;
722                 p->error = errno;
723         } else if (should_fail(p)) {
724                 close(p->u.open.ret);
725                 p->u.open.ret = -1;
726                 /* FIXME: Play with error codes? */
727                 p->error = EACCES;
728         } else {
729                 set_cleanup(p, cleanup_open, struct open_call);
730         }
731         errno = p->error;
732         return p->u.open.ret;
733 }
734
735 static void cleanup_pipe(struct pipe_call *call)
736 {
737         if (!call->closed[0])
738                 close(call->fds[0]);
739         if (!call->closed[1])
740                 close(call->fds[1]);
741 }
742
743 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
744 {
745         struct failtest_call *p;
746         struct pipe_call call;
747
748         p = add_history(FAILTEST_PIPE, file, line, &call);
749         if (should_fail(p)) {
750                 p->u.open.ret = -1;
751                 /* FIXME: Play with error codes? */
752                 p->error = EMFILE;
753         } else {
754                 p->u.pipe.ret = pipe(p->u.pipe.fds);
755                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
756                 set_cleanup(p, cleanup_pipe, struct pipe_call);
757         }
758         /* This causes valgrind to notice if they use pipefd[] after failure */
759         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
760         errno = p->error;
761         return p->u.pipe.ret;
762 }
763
764 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
765                        const char *file, unsigned line)
766 {
767         struct failtest_call *p;
768         struct read_call call;
769         call.fd = fd;
770         call.buf = buf;
771         call.count = count;
772         call.off = off;
773         p = add_history(FAILTEST_READ, file, line, &call);
774
775         /* FIXME: Try partial read returns. */
776         if (should_fail(p)) {
777                 p->u.read.ret = -1;
778                 p->error = EIO;
779         } else {
780                 p->u.read.ret = pread(fd, buf, count, off);
781         }
782         errno = p->error;
783         return p->u.read.ret;
784 }
785
786 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
787                         const char *file, unsigned line)
788 {
789         struct failtest_call *p;
790         struct write_call call;
791
792         call.fd = fd;
793         call.buf = buf;
794         call.count = count;
795         call.off = off;
796         p = add_history(FAILTEST_WRITE, file, line, &call);
797
798         /* If we're a child, we need to make sure we write the same thing
799          * to non-files as the parent does, so tell it. */
800         if (control_fd != -1 && off == (off_t)-1) {
801                 enum info_type type = WRITE;
802
803                 write_all(control_fd, &type, sizeof(type));
804                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
805                 write_all(control_fd, buf, count);
806         }
807
808         /* FIXME: Try partial write returns. */
809         if (should_fail(p)) {
810                 p->u.write.ret = -1;
811                 p->error = EIO;
812         } else {
813                 /* FIXME: We assume same write order in parent and child */
814                 if (off == (off_t)-1 && child_writes_num != 0) {
815                         if (child_writes[0].fd != fd)
816                                 errx(1, "Child wrote to fd %u, not %u?",
817                                      child_writes[0].fd, fd);
818                         if (child_writes[0].off != p->u.write.off)
819                                 errx(1, "Child wrote to offset %zu, not %zu?",
820                                      (size_t)child_writes[0].off,
821                                      (size_t)p->u.write.off);
822                         if (child_writes[0].count != count)
823                                 errx(1, "Child wrote length %zu, not %zu?",
824                                      child_writes[0].count, count);
825                         if (memcmp(child_writes[0].buf, buf, count)) {
826                                 child_fail(NULL, 0,
827                                            "Child wrote differently to"
828                                            " fd %u than we did!\n", fd);
829                         }
830                         free((char *)child_writes[0].buf);
831                         child_writes_num--;
832                         memmove(&child_writes[0], &child_writes[1],
833                                 sizeof(child_writes[0]) * child_writes_num);
834
835                         /* Is this is a socket or pipe, child wrote it
836                            already. */
837                         if (p->u.write.off == (off_t)-1) {
838                                 p->u.write.ret = count;
839                                 errno = p->error;
840                                 return p->u.write.ret;
841                         }
842                 }
843                 p->u.write.ret = pwrite(fd, buf, count, off);
844         }
845         errno = p->error;
846         return p->u.write.ret;
847 }
848
849 ssize_t failtest_read(int fd, void *buf, size_t count,
850                       const char *file, unsigned line)
851 {
852         return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
853                               file, line);
854 }
855
856 ssize_t failtest_write(int fd, const void *buf, size_t count,
857                        const char *file, unsigned line)
858 {
859         return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
860                                file, line);
861 }
862
863 static struct lock_info *WARN_UNUSED_RESULT
864 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
865 {
866         unsigned int i;
867         struct lock_info *l;
868
869         for (i = 0; i < lock_num; i++) {
870                 l = &locks[i];
871
872                 if (l->fd != fd)
873                         continue;
874                 /* Four cases we care about:
875                  * Start overlap:
876                  *      l =    |      |
877                  *      new = |   |
878                  * Mid overlap:
879                  *      l =    |      |
880                  *      new =    |  |
881                  * End overlap:
882                  *      l =    |      |
883                  *      new =      |    |
884                  * Total overlap:
885                  *      l =    |      |
886                  *      new = |         |
887                  */
888                 if (start > l->start && end < l->end) {
889                         /* Mid overlap: trim entry, add new one. */
890                         off_t new_start, new_end;
891                         new_start = end + 1;
892                         new_end = l->end;
893                         l->end = start - 1;
894                         locks = add_lock(locks,
895                                          fd, new_start, new_end, l->type);
896                         l = &locks[i];
897                 } else if (start <= l->start && end >= l->end) {
898                         /* Total overlap: eliminate entry. */
899                         l->end = 0;
900                         l->start = 1;
901                 } else if (end >= l->start && end < l->end) {
902                         /* Start overlap: trim entry. */
903                         l->start = end + 1;
904                 } else if (start > l->start && start <= l->end) {
905                         /* End overlap: trim entry. */
906                         l->end = start-1;
907                 }
908                 /* Nothing left?  Remove it. */
909                 if (l->end < l->start) {
910                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
911                         i--;
912                 }
913         }
914
915         if (type != F_UNLCK) {
916                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
917                 l = &locks[lock_num++];
918                 l->fd = fd;
919                 l->start = start;
920                 l->end = end;
921                 l->type = type;
922         }
923         return locks;
924 }
925
926 /* We trap this so we can record it: we don't fail it. */
927 int failtest_close(int fd, const char *file, unsigned line)
928 {
929         struct failtest_call *i;
930         struct close_call call;
931         struct failtest_call *p;
932
933         call.fd = fd;
934         p = add_history(FAILTEST_CLOSE, file, line, &call);
935         p->fail = false;
936
937         /* Consume close from failpath. */
938         if (failpath)
939                 if (should_fail(p))
940                         abort();
941
942         if (fd < 0)
943                 return close(fd);
944
945         /* Trace history to find source of fd. */
946         tlist_for_each_rev(&history, i, list) {
947                 switch (i->type) {
948                 case FAILTEST_PIPE:
949                         /* From a pipe? */
950                         if (i->u.pipe.fds[0] == fd) {
951                                 assert(!i->u.pipe.closed[0]);
952                                 i->u.pipe.closed[0] = true;
953                                 if (i->u.pipe.closed[1])
954                                         i->cleanup = NULL;
955                                 goto out;
956                         }
957                         if (i->u.pipe.fds[1] == fd) {
958                                 assert(!i->u.pipe.closed[1]);
959                                 i->u.pipe.closed[1] = true;
960                                 if (i->u.pipe.closed[0])
961                                         i->cleanup = NULL;
962                                 goto out;
963                         }
964                         break;
965                 case FAILTEST_OPEN:
966                         if (i->u.open.ret == fd) {
967                                 assert((void *)i->cleanup
968                                        == (void *)cleanup_open);
969                                 i->cleanup = NULL;
970                                 goto out;
971                         }
972                         break;
973                 default:
974                         break;
975                 }
976         }
977
978 out:
979         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
980         return close(fd);
981 }
982
983 /* Zero length means "to end of file" */
984 static off_t end_of(off_t start, off_t len)
985 {
986         if (len == 0)
987                 return off_max();
988         return start + len - 1;
989 }
990
991 /* FIXME: This only handles locks, really. */
992 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
993 {
994         struct failtest_call *p;
995         struct fcntl_call call;
996         va_list ap;
997
998         call.fd = fd;
999         call.cmd = cmd;
1000
1001         /* Argument extraction. */
1002         switch (cmd) {
1003         case F_SETFL:
1004         case F_SETFD:
1005                 va_start(ap, cmd);
1006                 call.arg.l = va_arg(ap, long);
1007                 va_end(ap);
1008                 return fcntl(fd, cmd, call.arg.l);
1009         case F_GETFD:
1010         case F_GETFL:
1011                 return fcntl(fd, cmd);
1012         case F_GETLK:
1013                 get_locks();
1014                 va_start(ap, cmd);
1015                 call.arg.fl = *va_arg(ap, struct flock *);
1016                 va_end(ap);
1017                 return fcntl(fd, cmd, &call.arg.fl);
1018         case F_SETLK:
1019         case F_SETLKW:
1020                 va_start(ap, cmd);
1021                 call.arg.fl = *va_arg(ap, struct flock *);
1022                 va_end(ap);
1023                 break;
1024         default:
1025                 /* This means you need to implement it here. */
1026                 err(1, "failtest: unknown fcntl %u", cmd);
1027         }
1028
1029         p = add_history(FAILTEST_FCNTL, file, line, &call);
1030
1031         if (should_fail(p)) {
1032                 p->u.fcntl.ret = -1;
1033                 if (p->u.fcntl.cmd == F_SETLK)
1034                         p->error = EAGAIN;
1035                 else
1036                         p->error = EDEADLK;
1037         } else {
1038                 get_locks();
1039                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1040                                        &p->u.fcntl.arg.fl);
1041                 if (p->u.fcntl.ret == -1)
1042                         p->error = errno;
1043                 else {
1044                         /* We don't handle anything else yet. */
1045                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1046                         locks = add_lock(locks,
1047                                          p->u.fcntl.fd,
1048                                          p->u.fcntl.arg.fl.l_start,
1049                                          end_of(p->u.fcntl.arg.fl.l_start,
1050                                                 p->u.fcntl.arg.fl.l_len),
1051                                          p->u.fcntl.arg.fl.l_type);
1052                 }
1053         }
1054         errno = p->error;
1055         return p->u.fcntl.ret;
1056 }
1057
1058 pid_t failtest_getpid(const char *file, unsigned line)
1059 {
1060         /* You must call failtest_init first! */
1061         assert(orig_pid);
1062         return orig_pid;
1063 }
1064         
1065 void failtest_init(int argc, char *argv[])
1066 {
1067         unsigned int i;
1068
1069         orig_pid = getpid();
1070                 
1071         for (i = 1; i < argc; i++) {
1072                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1073                         failpath = argv[i] + strlen("--failpath=");
1074                 } else if (strcmp(argv[i], "--tracepath") == 0) {
1075                         tracefd = dup(STDERR_FILENO);
1076                         failtest_timeout_ms = -1;
1077                 } else if (!strncmp(argv[i], "--debugpath=",
1078                                     strlen("--debugpath="))) {
1079                         debugpath = argv[i] + strlen("--debugpath=");
1080                 }
1081         }
1082         start = time_now();
1083 }
1084
1085 bool failtest_has_failed(void)
1086 {
1087         return control_fd != -1;
1088 }
1089
1090 void failtest_exit(int status)
1091 {
1092         if (failtest_exit_check) {
1093                 if (!failtest_exit_check(&history))
1094                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1095         }
1096
1097         failtest_cleanup(false, status);
1098 }