tdb2: copy tdb1's changed expansion logic.
[ccan] / ccan / failtest / failtest.c
1 /* Licensed under LGPL - see LICENSE file for details */
2 #include <ccan/failtest/failtest.h>
3 #include <stdarg.h>
4 #include <string.h>
5 #include <stdio.h>
6 #include <stdarg.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <unistd.h>
10 #include <poll.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <sys/time.h>
16 #include <sys/mman.h>
17 #include <signal.h>
18 #include <assert.h>
19 #include <ccan/time/time.h>
20 #include <ccan/read_write_all/read_write_all.h>
21 #include <ccan/failtest/failtest_proto.h>
22 #include <ccan/build_assert/build_assert.h>
23 #include <ccan/hash/hash.h>
24 #include <ccan/htable/htable_type.h>
25 #include <ccan/str/str.h>
26 #include <ccan/compiler/compiler.h>
27
28 enum failtest_result (*failtest_hook)(struct tlist_calls *);
29
30 static FILE *tracef = NULL, *warnf;
31 static int traceindent = 0;
32
33 unsigned int failtest_timeout_ms = 20000;
34
35 const char *failpath;
36 const char *debugpath;
37
38 enum info_type {
39         WRITE,
40         RELEASE_LOCKS,
41         FAILURE,
42         SUCCESS,
43         UNEXPECTED
44 };
45
46 struct lock_info {
47         int fd;
48         /* end is inclusive: you can't have a 0-byte lock. */
49         off_t start, end;
50         int type;
51 };
52
53 /* We hash the call location together with its backtrace. */
54 static size_t hash_call(const struct failtest_call *call)
55 {
56         return hash(call->file, strlen(call->file),
57                     hash(&call->line, 1,
58                          hash(call->backtrace, call->backtrace_num,
59                               call->type)));
60 }
61
62 static bool call_eq(const struct failtest_call *call1,
63                     const struct failtest_call *call2)
64 {
65         unsigned int i;
66
67         if (strcmp(call1->file, call2->file) != 0
68             || call1->line != call2->line
69             || call1->type != call2->type
70             || call1->backtrace_num != call2->backtrace_num)
71                 return false;
72
73         for (i = 0; i < call1->backtrace_num; i++)
74                 if (call1->backtrace[i] != call2->backtrace[i])
75                         return false;
76
77         return true;
78 }
79
80 /* Defines struct failtable. */
81 HTABLE_DEFINE_TYPE(struct failtest_call, (struct failtest_call *), hash_call,
82                    call_eq, failtable);
83
84 bool (*failtest_exit_check)(struct tlist_calls *history);
85
86 /* The entire history of all calls. */
87 static struct tlist_calls history = TLIST_INIT(history);
88 /* If we're a child, the fd two write control info to the parent. */
89 static int control_fd = -1;
90 /* If we're a child, this is the first call we did ourselves. */
91 static struct failtest_call *our_history_start = NULL;
92 /* For printing runtime with --trace. */
93 static struct timeval start;
94 /* Set when failtest_hook returns FAIL_PROBE */
95 static bool probing = false;
96 /* Table to track duplicates. */
97 static struct failtable failtable;
98
99 /* Array of writes which our child did.  We report them on failure. */
100 static struct write_call *child_writes = NULL;
101 static unsigned int child_writes_num = 0;
102
103 /* fcntl locking info. */
104 static pid_t lock_owner;
105 static struct lock_info *locks = NULL;
106 static unsigned int lock_num = 0;
107
108 /* Our original pid, which we return to anyone who asks. */
109 static pid_t orig_pid;
110
111 /* Mapping from failtest_type to char. */
112 static const char info_to_arg[] = "mceoxprwfal";
113
114 /* Dummy call used for failtest_undo wrappers. */
115 static struct failtest_call unrecorded_call;
116
117 struct contents_saved {
118         size_t count;
119         off_t off;
120         off_t old_len;
121         char contents[1];
122 };
123
124 /* File contents, saved in this child only. */
125 struct saved_mmapped_file {
126         struct saved_mmapped_file *next;
127         struct failtest_call *opener;
128         struct contents_saved *s;
129 };
130
131 static struct saved_mmapped_file *saved_mmapped_files;
132
133 #if HAVE_BACKTRACE
134 #include <execinfo.h>
135
136 static void **get_backtrace(unsigned int *num)
137 {
138         static unsigned int max_back = 100;
139         void **ret;
140
141 again:
142         ret = malloc(max_back * sizeof(void *));
143         *num = backtrace(ret, max_back);
144         if (*num == max_back) {
145                 free(ret);
146                 max_back *= 2;
147                 goto again;
148         }
149         return ret;
150 }
151 #else
152 /* This will test slightly less, since will consider all of the same
153  * calls as identical.  But, it's slightly faster! */
154 static void **get_backtrace(unsigned int *num)
155 {
156         *num = 0;
157         return NULL;
158 }
159 #endif /* HAVE_BACKTRACE */
160
161 static struct failtest_call *add_history_(enum failtest_call_type type,
162                                           bool can_leak,
163                                           const char *file,
164                                           unsigned int line,
165                                           const void *elem,
166                                           size_t elem_size)
167 {
168         struct failtest_call *call;
169
170         /* NULL file is how we suppress failure. */
171         if (!file)
172                 return &unrecorded_call;
173
174         call = malloc(sizeof *call);
175         call->type = type;
176         call->can_leak = can_leak;
177         call->file = file;
178         call->line = line;
179         call->cleanup = NULL;
180         call->backtrace = get_backtrace(&call->backtrace_num);
181         memcpy(&call->u, elem, elem_size);
182         tlist_add_tail(&history, call, list);
183         return call;
184 }
185
186 #define add_history(type, can_leak, file, line, elem)           \
187         add_history_((type), (can_leak), (file), (line), (elem), sizeof(*(elem)))
188
189 /* We do a fake call inside a sizeof(), to check types. */
190 #define set_cleanup(call, clean, type)                  \
191         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL, false),1), (clean))
192
193 /* Dup the fd to a high value (out of the way I hope!), and close the old fd. */
194 static int move_fd_to_high(int fd)
195 {
196         int i;
197
198         for (i = FD_SETSIZE - 1; i >= 0; i--) {
199                 if (fcntl(i, F_GETFL) == -1 && errno == EBADF) {
200                         if (dup2(fd, i) == -1)
201                                 err(1, "Failed to dup fd %i to %i", fd, i);
202                         close(fd);
203                         return i;
204                 }
205         }
206         /* Nothing?  Really?  Er... ok? */
207         return fd;
208 }
209
210 static bool read_write_info(int fd)
211 {
212         struct write_call *w;
213         char *buf;
214
215         /* We don't need all of this, but it's simple. */
216         child_writes = realloc(child_writes,
217                                (child_writes_num+1) * sizeof(child_writes[0]));
218         w = &child_writes[child_writes_num];
219         if (!read_all(fd, w, sizeof(*w)))
220                 return false;
221
222         w->buf = buf = malloc(w->count);
223         if (!read_all(fd, buf, w->count))
224                 return false;
225
226         child_writes_num++;
227         return true;
228 }
229
230 static char *failpath_string(void)
231 {
232         struct failtest_call *i;
233         char *ret = strdup("");
234         unsigned len = 0;
235
236         /* Inefficient, but who cares? */
237         tlist_for_each(&history, i, list) {
238                 ret = realloc(ret, len + 2);
239                 ret[len] = info_to_arg[i->type];
240                 if (i->fail)
241                         ret[len] = toupper(ret[len]);
242                 ret[++len] = '\0';
243         }
244         return ret;
245 }
246
247 static void do_warn(int e, const char *fmt, va_list ap)
248 {
249         char *p = failpath_string();
250
251         vfprintf(warnf, fmt, ap);
252         if (e != -1)
253                 fprintf(warnf, ": %s", strerror(e));
254         fprintf(warnf, " [%s]\n", p);
255         free(p);
256 }
257
258 static void fwarn(const char *fmt, ...)
259 {
260         va_list ap;
261         int e = errno;
262
263         va_start(ap, fmt);
264         do_warn(e, fmt, ap);
265         va_end(ap);
266 }
267
268
269 static void fwarnx(const char *fmt, ...)
270 {
271         va_list ap;
272
273         va_start(ap, fmt);
274         do_warn(-1, fmt, ap);
275         va_end(ap);
276 }
277
278 static void tell_parent(enum info_type type)
279 {
280         if (control_fd != -1)
281                 write_all(control_fd, &type, sizeof(type));
282 }
283
284 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
285 {
286         va_list ap;
287         char *path = failpath_string();
288
289         va_start(ap, fmt);
290         vfprintf(stderr, fmt, ap);
291         va_end(ap);
292
293         fprintf(stderr, "%.*s", (int)outlen, out);
294         printf("To reproduce: --failpath=%s\n", path);
295         free(path);
296         tell_parent(FAILURE);
297         exit(1);
298 }
299
300 static void PRINTF_FMT(1, 2) trace(const char *fmt, ...)
301 {
302         va_list ap;
303         unsigned int i;
304         char *p;
305         static int idx;
306
307         if (!tracef)
308                 return;
309
310         for (i = 0; i < traceindent; i++)
311                 fprintf(tracef, "  ");
312
313         p = failpath_string();
314         fprintf(tracef, "%i: %u: %s ", idx++, getpid(), p);
315         va_start(ap, fmt);
316         vfprintf(tracef, fmt, ap);
317         va_end(ap);
318         free(p);
319 }
320
321 static pid_t child;
322
323 static void hand_down(int signum)
324 {
325         kill(child, signum);
326 }
327
328 static void release_locks(void)
329 {
330         /* Locks were never acquired/reacquired? */
331         if (lock_owner == 0)
332                 return;
333
334         /* We own them?  Release them all. */
335         if (lock_owner == getpid()) {
336                 unsigned int i;
337                 struct flock fl;
338                 fl.l_type = F_UNLCK;
339                 fl.l_whence = SEEK_SET;
340                 fl.l_start = 0;
341                 fl.l_len = 0;
342
343                 trace("Releasing %u locks\n", lock_num);
344                 for (i = 0; i < lock_num; i++)
345                         fcntl(locks[i].fd, F_SETLK, &fl);
346         } else {
347                 /* Our parent must have them; pass request up. */
348                 enum info_type type = RELEASE_LOCKS;
349                 assert(control_fd != -1);
350                 write_all(control_fd, &type, sizeof(type));
351         }
352         lock_owner = 0;
353 }
354
355 /* off_t is a signed type.  Getting its max is non-trivial. */
356 static off_t off_max(void)
357 {
358         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
359         if (sizeof(off_t) == 4)
360                 return (off_t)0x7FFFFFF;
361         else
362                 return (off_t)0x7FFFFFFFFFFFFFFULL;
363 }
364
365 static void get_locks(void)
366 {
367         unsigned int i;
368         struct flock fl;
369
370         if (lock_owner == getpid())
371                 return;
372
373         if (lock_owner != 0) {
374                 enum info_type type = RELEASE_LOCKS;
375                 assert(control_fd != -1);
376                 trace("Asking parent to release locks\n");
377                 write_all(control_fd, &type, sizeof(type));
378         }
379
380         fl.l_whence = SEEK_SET;
381
382         for (i = 0; i < lock_num; i++) {
383                 fl.l_type = locks[i].type;
384                 fl.l_start = locks[i].start;
385                 if (locks[i].end == off_max())
386                         fl.l_len = 0;
387                 else
388                         fl.l_len = locks[i].end - locks[i].start + 1;
389
390                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
391                         abort();
392         }
393         trace("Acquired %u locks\n", lock_num);
394         lock_owner = getpid();
395 }
396
397
398 static struct contents_saved *save_contents(const char *filename,
399                                             int fd, size_t count, off_t off,
400                                             const char *why)
401 {
402         struct contents_saved *s = malloc(sizeof(*s) + count);
403         ssize_t ret;
404
405         s->off = off;
406
407         ret = pread(fd, s->contents, count, off);
408         if (ret < 0) {
409                 fwarn("failtest_write: failed to save old contents!");
410                 s->count = 0;
411         } else
412                 s->count = ret;
413
414         /* Use lseek to get the size of file, but we have to restore
415          * file offset */
416         off = lseek(fd, 0, SEEK_CUR);
417         s->old_len = lseek(fd, 0, SEEK_END);
418         lseek(fd, off, SEEK_SET);
419
420         trace("Saving %p %s %zu@%llu after %s (filelength %llu) via fd %i\n",
421               s, filename, s->count, (long long)s->off, why,
422               (long long)s->old_len, fd);
423         return s;
424 }
425
426 static void restore_contents(struct failtest_call *opener,
427                              struct contents_saved *s,
428                              bool restore_offset,
429                              const char *caller)
430 {
431         int fd;
432
433         /* The top parent doesn't need to restore. */
434         if (control_fd == -1)
435                 return;
436
437         /* Has the fd been closed? */
438         if (opener->u.open.closed) {
439                 /* Reopen, replace fd, close silently as we clean up. */
440                 fd = open(opener->u.open.pathname, O_RDWR);
441                 if (fd < 0) {
442                         fwarn("failtest: could not reopen %s to clean up %s!",
443                               opener->u.open.pathname, caller);
444                         return;
445                 }
446                 /* Make it clearly distinguisable from a "normal" fd. */
447                 fd = move_fd_to_high(fd);
448                 trace("Reopening %s to restore it (was fd %i, now %i)\n",
449                       opener->u.open.pathname, opener->u.open.ret, fd);
450                 opener->u.open.ret = fd;
451                 opener->u.open.closed = false;
452         }
453         fd = opener->u.open.ret;
454
455         trace("Restoring %p %s %zu@%llu after %s (filelength %llu) via fd %i\n",
456               s, opener->u.open.pathname, s->count, (long long)s->off, caller,
457               (long long)s->old_len, fd);
458         if (pwrite(fd, s->contents, s->count, s->off) != s->count) {
459                 fwarn("failtest: write failed cleaning up %s for %s!",
460                       opener->u.open.pathname, caller);
461         }
462
463         if (ftruncate(fd, s->old_len) != 0) {
464                 fwarn("failtest_write: truncate failed cleaning up %s for %s!",
465                       opener->u.open.pathname, caller);
466         }
467
468         if (restore_offset) {
469                 trace("Restoring offset of fd %i to %llu\n",
470                       fd, (long long)s->off);
471                 lseek(fd, s->off, SEEK_SET);
472         }
473 }
474
475 /* We save/restore most things on demand, but always do mmaped files. */
476 static void save_mmapped_files(void)
477 {
478         struct failtest_call *i;
479         trace("Saving mmapped files in child\n");
480
481         tlist_for_each_rev(&history, i, list) {
482                 struct mmap_call *m = &i->u.mmap;
483                 struct saved_mmapped_file *s;
484
485                 if (i->type != FAILTEST_MMAP)
486                         continue;
487
488                 /* FIXME: We only handle mmapped files where fd is still open. */
489                 if (m->opener->u.open.closed)
490                         continue;
491
492                 s = malloc(sizeof *s);
493                 s->s = save_contents(m->opener->u.open.pathname,
494                                      m->fd, m->length, m->offset,
495                                      "mmapped file before fork");
496                 s->opener = m->opener;
497                 s->next = saved_mmapped_files;
498                 saved_mmapped_files = s;
499         }
500 }
501
502 static void free_mmapped_files(bool restore)
503 {
504         trace("%s mmapped files in child\n",
505               restore ? "Restoring" : "Discarding");
506         while (saved_mmapped_files) {
507                 struct saved_mmapped_file *next = saved_mmapped_files->next;
508                 if (restore)
509                         restore_contents(saved_mmapped_files->opener,
510                                          saved_mmapped_files->s, false,
511                                          "saved mmap");
512                 free(saved_mmapped_files->s);
513                 free(saved_mmapped_files);
514                 saved_mmapped_files = next;
515         }
516 }
517
518 /* Returns a FAILTEST_OPEN, FAILTEST_PIPE or NULL. */
519 static struct failtest_call *opener_of(int fd)
520 {
521         struct failtest_call *i;
522
523         /* Don't get confused and match genuinely failed opens. */
524         if (fd < 0)
525                 return NULL;
526
527         /* Figure out the set of live fds. */
528         tlist_for_each_rev(&history, i, list) {
529                 if (i->fail)
530                         continue;
531                 switch (i->type) {
532                 case FAILTEST_CLOSE:
533                         if (i->u.close.fd == fd) {
534                                 return NULL;
535                         }
536                         break;
537                 case FAILTEST_OPEN:
538                         if (i->u.open.ret == fd) {
539                                 if (i->u.open.closed)
540                                         return NULL;
541                                 return i;
542                         }
543                         break;
544                 case FAILTEST_PIPE:
545                         if (i->u.pipe.fds[0] == fd || i->u.pipe.fds[1] == fd) {
546                                 return i;
547                         }
548                         break;
549                 default:
550                         break;
551                 }
552         }
553
554         /* FIXME: socket, dup, etc are untracked! */
555         return NULL;
556 }
557
558 static void free_call(struct failtest_call *call)
559 {
560         /* We don't do this in cleanup: needed even for failed opens. */
561         if (call->type == FAILTEST_OPEN)
562                 free((char *)call->u.open.pathname);
563         free(call->backtrace);
564         tlist_del_from(&history, call, list);
565         free(call);
566 }
567
568 /* Free up memory, so valgrind doesn't report leaks. */
569 static void free_everything(void)
570 {
571         struct failtest_call *i;
572
573         while ((i = tlist_top(&history, list)) != NULL)
574                 free_call(i);
575
576         failtable_clear(&failtable);
577 }
578
579 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
580 {
581         struct failtest_call *i;
582         bool restore = true;
583
584         /* For children, we don't care if they "failed" the testing. */
585         if (control_fd != -1)
586                 status = 0;
587         else
588                 /* We don't restore contents for original parent. */
589                 restore = false;
590
591         /* Cleanup everything, in reverse order. */
592         tlist_for_each_rev(&history, i, list) {
593                 /* Don't restore things our parent did. */
594                 if (i == our_history_start)
595                         restore = false;
596
597                 if (i->fail)
598                         continue;
599
600                 if (i->cleanup)
601                         i->cleanup(&i->u, restore);
602
603                 /* But their program shouldn't leak, even on failure. */
604                 if (!forced_cleanup && i->can_leak) {
605                         printf("Leak at %s:%u: --failpath=%s\n",
606                                i->file, i->line, failpath_string());
607                         status = 1;
608                 }
609         }
610
611         /* Put back mmaped files the way our parent (if any) expects. */
612         free_mmapped_files(true);
613
614         free_everything();
615         if (status == 0)
616                 tell_parent(SUCCESS);
617         else
618                 tell_parent(FAILURE);
619         exit(status);
620 }
621
622 static bool following_path(void)
623 {
624         if (!failpath)
625                 return false;
626         /* + means continue after end, like normal. */
627         if (*failpath == '+') {
628                 failpath = NULL;
629                 return false;
630         }
631         return true;
632 }
633
634 static bool follow_path(struct failtest_call *call)
635 {
636         if (*failpath == '\0') {
637                 /* Continue, but don't inject errors. */
638                 return call->fail = false;
639         }
640
641         if (tolower((unsigned char)*failpath) != info_to_arg[call->type])
642                 errx(1, "Failpath expected '%s' got '%c'\n",
643                      failpath, info_to_arg[call->type]);
644         call->fail = cisupper(*(failpath++));
645                         if (call->fail)
646                                 call->can_leak = false;
647         return call->fail;
648 }
649
650 static bool should_fail(struct failtest_call *call)
651 {
652         int status;
653         int control[2], output[2];
654         enum info_type type = UNEXPECTED;
655         char *out = NULL;
656         size_t outlen = 0;
657         struct failtest_call *dup;
658
659         if (call == &unrecorded_call)
660                 return false;
661
662         if (following_path())
663                 return follow_path(call);
664
665         /* Attach debugger if they asked for it. */
666         if (debugpath) {
667                 char *path;
668
669                 /* Pretend this last call matches whatever path wanted:
670                  * keeps valgrind happy. */
671                 call->fail = cisupper(debugpath[strlen(debugpath)-1]);
672                 path = failpath_string();
673
674                 if (streq(path, debugpath)) {
675                         char str[80];
676
677                         /* Don't timeout. */
678                         signal(SIGUSR1, SIG_IGN);
679                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
680                                 getpid(), getpid());
681                         if (system(str) == 0)
682                                 sleep(5);
683                 } else {
684                         /* Ignore last character: could be upper or lower. */
685                         path[strlen(path)-1] = '\0';
686                         if (!strstarts(debugpath, path)) {
687                                 fprintf(stderr,
688                                         "--debugpath not followed: %s\n", path);
689                                 debugpath = NULL;
690                         }
691                 }
692                 free(path);
693         }
694
695         /* Are we probing?  If so, we never fail twice. */
696         if (probing) {
697                 trace("Not failing %c due to FAIL_PROBE return\n",
698                       info_to_arg[call->type]);
699                 return call->fail = false;
700         }
701
702         /* Don't fail more than once in the same place. */
703         dup = failtable_get(&failtable, call);
704         if (dup) {
705                 trace("Not failing %c due to duplicate\n",
706                       info_to_arg[call->type]);
707                 return call->fail = false;
708         }
709
710         if (failtest_hook) {
711                 switch (failtest_hook(&history)) {
712                 case FAIL_OK:
713                         break;
714                 case FAIL_PROBE:
715                         probing = true;
716                         break;
717                 case FAIL_DONT_FAIL:
718                         trace("Not failing %c due to failhook return\n",
719                               info_to_arg[call->type]);
720                         call->fail = false;
721                         return false;
722                 default:
723                         abort();
724                 }
725         }
726
727         /* Add it to our table of calls. */
728         failtable_add(&failtable, call);
729
730         /* We're going to fail in the child. */
731         call->fail = true;
732         if (pipe(control) != 0 || pipe(output) != 0)
733                 err(1, "opening pipe");
734
735         /* Move out the way, to high fds. */
736         control[0] = move_fd_to_high(control[0]);
737         control[1] = move_fd_to_high(control[1]);
738         output[0] = move_fd_to_high(output[0]);
739         output[1] = move_fd_to_high(output[1]);
740
741         /* Prevent double-printing (in child and parent) */
742         fflush(stdout);
743         fflush(warnf);
744         if (tracef)
745                 fflush(tracef);
746         child = fork();
747         if (child == -1)
748                 err(1, "forking failed");
749
750         if (child == 0) {
751                 traceindent++;
752                 if (tracef) {
753                         struct timeval diff;
754                         const char *p;
755                         char *failpath;
756                         struct failtest_call *c;
757
758                         c = tlist_tail(&history, list);
759                         diff = time_sub(time_now(), start);
760                         failpath = failpath_string();
761                         p = strrchr(c->file, '/');
762                         if (p)
763                                 p++;
764                         else
765                                 p = c->file;
766                         trace("%u->%u (%u.%02u): %s (%s:%u)\n",
767                               getppid(), getpid(),
768                               (int)diff.tv_sec, (int)diff.tv_usec / 10000,
769                               failpath, p, c->line);
770                         free(failpath);
771                 }
772                 /* From here on, we have to clean up! */
773                 our_history_start = tlist_tail(&history, list);
774                 close(control[0]);
775                 close(output[0]);
776                 /* Don't swallow stderr if we're tracing. */
777                 if (!tracef) {
778                         dup2(output[1], STDOUT_FILENO);
779                         dup2(output[1], STDERR_FILENO);
780                         if (output[1] != STDOUT_FILENO
781                             && output[1] != STDERR_FILENO)
782                                 close(output[1]);
783                 }
784                 control_fd = move_fd_to_high(control[1]);
785
786                 /* Forget any of our parent's saved files. */
787                 free_mmapped_files(false);
788
789                 /* Now, save any files we need to. */
790                 save_mmapped_files();
791
792                 /* Failed calls can't leak. */
793                 call->can_leak = false;
794
795                 return true;
796         }
797
798         signal(SIGUSR1, hand_down);
799
800         close(control[1]);
801         close(output[1]);
802
803         /* We grab output so we can display it; we grab writes so we
804          * can compare. */
805         do {
806                 struct pollfd pfd[2];
807                 int ret;
808
809                 pfd[0].fd = output[0];
810                 pfd[0].events = POLLIN|POLLHUP;
811                 pfd[1].fd = control[0];
812                 pfd[1].events = POLLIN|POLLHUP;
813
814                 if (type == SUCCESS)
815                         ret = poll(pfd, 1, failtest_timeout_ms);
816                 else
817                         ret = poll(pfd, 2, failtest_timeout_ms);
818
819                 if (ret == 0)
820                         hand_down(SIGUSR1);
821                 if (ret < 0) {
822                         if (errno == EINTR)
823                                 continue;
824                         err(1, "Poll returned %i", ret);
825                 }
826
827                 if (pfd[0].revents & POLLIN) {
828                         ssize_t len;
829
830                         out = realloc(out, outlen + 8192);
831                         len = read(output[0], out + outlen, 8192);
832                         outlen += len;
833                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
834                         if (read_all(control[0], &type, sizeof(type))) {
835                                 if (type == WRITE) {
836                                         if (!read_write_info(control[0]))
837                                                 break;
838                                 } else if (type == RELEASE_LOCKS) {
839                                         release_locks();
840                                         /* FIXME: Tell them we're done... */
841                                 }
842                         }
843                 } else if (pfd[0].revents & POLLHUP) {
844                         break;
845                 }
846         } while (type != FAILURE);
847
848         close(output[0]);
849         close(control[0]);
850         waitpid(child, &status, 0);
851         if (!WIFEXITED(status)) {
852                 if (WTERMSIG(status) == SIGUSR1)
853                         child_fail(out, outlen, "Timed out");
854                 else
855                         child_fail(out, outlen, "Killed by signal %u: ",
856                                    WTERMSIG(status));
857         }
858         /* Child printed failure already, just pass up exit code. */
859         if (type == FAILURE) {
860                 fprintf(stderr, "%.*s", (int)outlen, out);
861                 tell_parent(type);
862                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
863         }
864         if (WEXITSTATUS(status) != 0)
865                 child_fail(out, outlen, "Exited with status %i: ",
866                            WEXITSTATUS(status));
867
868         free(out);
869         signal(SIGUSR1, SIG_DFL);
870
871         /* Only child does probe. */
872         probing = false;
873
874         /* We continue onwards without failing. */
875         call->fail = false;
876         return false;
877 }
878
879 static void cleanup_calloc(struct calloc_call *call, bool restore)
880 {
881         trace("undoing calloc %p\n", call->ret);
882         free(call->ret);
883 }
884
885 void *failtest_calloc(size_t nmemb, size_t size,
886                       const char *file, unsigned line)
887 {
888         struct failtest_call *p;
889         struct calloc_call call;
890         call.nmemb = nmemb;
891         call.size = size;
892         p = add_history(FAILTEST_CALLOC, true, file, line, &call);
893
894         if (should_fail(p)) {
895                 p->u.calloc.ret = NULL;
896                 p->error = ENOMEM;
897         } else {
898                 p->u.calloc.ret = calloc(nmemb, size);
899                 set_cleanup(p, cleanup_calloc, struct calloc_call);
900         }
901         trace("calloc %zu x %zu %s:%u -> %p\n",
902               nmemb, size, file, line, p->u.calloc.ret);
903         errno = p->error;
904         return p->u.calloc.ret;
905 }
906
907 static void cleanup_malloc(struct malloc_call *call, bool restore)
908 {
909         trace("undoing malloc %p\n", call->ret);
910         free(call->ret);
911 }
912
913 void *failtest_malloc(size_t size, const char *file, unsigned line)
914 {
915         struct failtest_call *p;
916         struct malloc_call call;
917         call.size = size;
918
919         p = add_history(FAILTEST_MALLOC, true, file, line, &call);
920         if (should_fail(p)) {
921                 p->u.malloc.ret = NULL;
922                 p->error = ENOMEM;
923         } else {
924                 p->u.malloc.ret = malloc(size);
925                 set_cleanup(p, cleanup_malloc, struct malloc_call);
926         }
927         trace("malloc %zu %s:%u -> %p\n",
928               size, file, line, p->u.malloc.ret);
929         errno = p->error;
930         return p->u.malloc.ret;
931 }
932
933 static void cleanup_realloc(struct realloc_call *call, bool restore)
934 {
935         trace("undoing realloc %p\n", call->ret);
936         free(call->ret);
937 }
938
939 /* Walk back and find out if we got this ptr from a previous routine. */
940 static void fixup_ptr_history(void *ptr, const char *why)
941 {
942         struct failtest_call *i;
943
944         /* Start at end of history, work back. */
945         tlist_for_each_rev(&history, i, list) {
946                 switch (i->type) {
947                 case FAILTEST_REALLOC:
948                         if (i->u.realloc.ret == ptr) {
949                                 trace("found realloc %p %s:%u matching %s\n",
950                                       ptr, i->file, i->line, why);
951                                 i->cleanup = NULL;
952                                 i->can_leak = false;
953                                 return;
954                         }
955                         break;
956                 case FAILTEST_MALLOC:
957                         if (i->u.malloc.ret == ptr) {
958                                 trace("found malloc %p %s:%u matching %s\n",
959                                       ptr, i->file, i->line, why);
960                                 i->cleanup = NULL;
961                                 i->can_leak = false;
962                                 return;
963                         }
964                         break;
965                 case FAILTEST_CALLOC:
966                         if (i->u.calloc.ret == ptr) {
967                                 trace("found calloc %p %s:%u matching %s\n",
968                                       ptr, i->file, i->line, why);
969                                 i->cleanup = NULL;
970                                 i->can_leak = false;
971                                 return;
972                         }
973                         break;
974                 default:
975                         break;
976                 }
977         }
978         trace("Did not find %p matching %s\n", ptr, why);
979 }
980
981 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
982 {
983         struct failtest_call *p;
984         struct realloc_call call;
985         call.size = size;
986         p = add_history(FAILTEST_REALLOC, true, file, line, &call);
987
988         /* FIXME: Try one child moving allocation, one not. */
989         if (should_fail(p)) {
990                 p->u.realloc.ret = NULL;
991                 p->error = ENOMEM;
992         } else {
993                 /* Don't catch this one in the history fixup... */
994                 p->u.realloc.ret = NULL;
995                 fixup_ptr_history(ptr, "realloc");
996                 p->u.realloc.ret = realloc(ptr, size);
997                 set_cleanup(p, cleanup_realloc, struct realloc_call);
998         }
999         trace("realloc %p %s:%u -> %p\n",
1000               ptr, file, line, p->u.realloc.ret);
1001         errno = p->error;
1002         return p->u.realloc.ret;
1003 }
1004
1005 /* FIXME: Record free, so we can terminate fixup_ptr_history correctly.
1006  * If there's an alloc we don't see, it could get confusing if it matches
1007  * a previous allocation we did see. */
1008 void failtest_free(void *ptr)
1009 {
1010         fixup_ptr_history(ptr, "free");
1011         trace("free %p\n", ptr);
1012         free(ptr);
1013 }
1014
1015
1016 static struct contents_saved *save_file(const char *pathname)
1017 {
1018         int fd;
1019         struct contents_saved *s;
1020
1021         fd = open(pathname, O_RDONLY);
1022         if (fd < 0)
1023                 return NULL;
1024
1025         s = save_contents(pathname, fd, lseek(fd, 0, SEEK_END), 0,
1026                           "open with O_TRUNC");
1027         close(fd);
1028         return s;
1029 }
1030
1031 /* Optimization: don't create a child for an open which *we know*
1032  * would fail anyway. */
1033 static bool open_would_fail(const char *pathname, int flags)
1034 {
1035         if ((flags & O_ACCMODE) == O_RDONLY)
1036                 return access(pathname, R_OK) != 0;
1037         if (!(flags & O_CREAT)) {
1038                 if ((flags & O_ACCMODE) == O_WRONLY)
1039                         return access(pathname, W_OK) != 0;
1040                 if ((flags & O_ACCMODE) == O_RDWR)
1041                         return access(pathname, W_OK) != 0
1042                                 || access(pathname, R_OK) != 0;
1043         }
1044         /* FIXME: We could check if it exists, for O_CREAT|O_EXCL */
1045         return false;
1046 }
1047
1048 static void cleanup_open(struct open_call *call, bool restore)
1049 {
1050         if (restore && call->saved)
1051                 restore_contents(container_of(call, struct failtest_call,
1052                                               u.open),
1053                                  call->saved, false, "open with O_TRUNC");
1054         if (!call->closed) {
1055                 trace("Cleaning up open %s by closing fd %i\n",
1056                       call->pathname, call->ret);
1057                 close(call->ret);
1058                 call->closed = true;
1059         }
1060         free(call->saved);
1061 }
1062
1063 int failtest_open(const char *pathname,
1064                   const char *file, unsigned line, ...)
1065 {
1066         struct failtest_call *p;
1067         struct open_call call;
1068         va_list ap;
1069
1070         call.pathname = strdup(pathname);
1071         va_start(ap, line);
1072         call.flags = va_arg(ap, int);
1073         call.always_save = false;
1074         call.closed = false;
1075         if (call.flags & O_CREAT) {
1076                 call.mode = va_arg(ap, int);
1077                 va_end(ap);
1078         }
1079         p = add_history(FAILTEST_OPEN, true, file, line, &call);
1080         /* Avoid memory leak! */
1081         if (p == &unrecorded_call)
1082                 free((char *)call.pathname);
1083
1084         if (should_fail(p)) {
1085                 /* Don't bother inserting failures that would happen anyway. */
1086                 if (open_would_fail(pathname, call.flags)) {
1087                         trace("Open would have failed anyway: stopping\n");
1088                         failtest_cleanup(true, 0);
1089                 }
1090                 p->u.open.ret = -1;
1091                 /* FIXME: Play with error codes? */
1092                 p->error = EACCES;
1093         } else {
1094                 /* Save the old version if they're truncating it. */
1095                 if (call.flags & O_TRUNC)
1096                         p->u.open.saved = save_file(pathname);
1097                 else
1098                         p->u.open.saved = NULL;
1099                 p->u.open.ret = open(pathname, call.flags, call.mode);
1100                 if (p->u.open.ret == -1) {
1101                         p->u.open.closed = true;
1102                         p->can_leak = false;
1103                 } else {
1104                         set_cleanup(p, cleanup_open, struct open_call);
1105                 }
1106         }
1107         trace("open %s %s:%u -> %i (opener %p)\n",
1108               pathname, file, line, p->u.open.ret, &p->u.open);
1109         errno = p->error;
1110         return p->u.open.ret;
1111 }
1112
1113 static void cleanup_mmap(struct mmap_call *mmap, bool restore)
1114 {
1115         trace("cleaning up mmap @%p (opener %p)\n",
1116               mmap->ret, mmap->opener);
1117         if (restore)
1118                 restore_contents(mmap->opener, mmap->saved, false, "mmap");
1119         free(mmap->saved);
1120 }
1121
1122 void *failtest_mmap(void *addr, size_t length, int prot, int flags,
1123                     int fd, off_t offset, const char *file, unsigned line)
1124 {
1125         struct failtest_call *p;
1126         struct mmap_call call;
1127
1128         call.addr = addr;
1129         call.length = length;
1130         call.prot = prot;
1131         call.flags = flags;
1132         call.offset = offset;
1133         call.fd = fd;
1134         call.opener = opener_of(fd);
1135
1136         /* If we don't know what file it was, don't fail. */
1137         if (!call.opener) {
1138                 if (fd != -1) {
1139                         fwarnx("failtest_mmap: couldn't figure out source for"
1140                                " fd %i at %s:%u", fd, file, line);
1141                 }
1142                 addr = mmap(addr, length, prot, flags, fd, offset);
1143                 trace("mmap of fd %i -> %p (opener = NULL)\n", fd, addr);
1144                 return addr;
1145         }
1146
1147         p = add_history(FAILTEST_MMAP, false, file, line, &call);
1148         if (should_fail(p)) {
1149                 p->u.mmap.ret = MAP_FAILED;
1150                 p->error = ENOMEM;
1151         } else {
1152                 p->u.mmap.ret = mmap(addr, length, prot, flags, fd, offset);
1153                 /* Save contents if we're writing to a normal file */
1154                 if (p->u.mmap.ret != MAP_FAILED
1155                     && (prot & PROT_WRITE)
1156                     && call.opener->type == FAILTEST_OPEN) {
1157                         const char *fname = call.opener->u.open.pathname;
1158                         p->u.mmap.saved = save_contents(fname, fd, length,
1159                                                         offset, "being mmapped");
1160                         set_cleanup(p, cleanup_mmap, struct mmap_call);
1161                 }
1162         }
1163         trace("mmap of fd %i %s:%u -> %p (opener = %p)\n",
1164               fd, file, line, addr, call.opener);
1165         errno = p->error;
1166         return p->u.mmap.ret;
1167 }
1168
1169 static void cleanup_pipe(struct pipe_call *call, bool restore)
1170 {
1171         trace("cleaning up pipe fd=%i%s,%i%s\n",
1172               call->fds[0], call->closed[0] ? "(already closed)" : "",
1173               call->fds[1], call->closed[1] ? "(already closed)" : "");
1174         if (!call->closed[0])
1175                 close(call->fds[0]);
1176         if (!call->closed[1])
1177                 close(call->fds[1]);
1178 }
1179
1180 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
1181 {
1182         struct failtest_call *p;
1183         struct pipe_call call;
1184
1185         p = add_history(FAILTEST_PIPE, true, file, line, &call);
1186         if (should_fail(p)) {
1187                 p->u.open.ret = -1;
1188                 /* FIXME: Play with error codes? */
1189                 p->error = EMFILE;
1190         } else {
1191                 p->u.pipe.ret = pipe(p->u.pipe.fds);
1192                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
1193                 set_cleanup(p, cleanup_pipe, struct pipe_call);
1194         }
1195
1196         trace("pipe %s:%u -> %i,%i\n", file, line,
1197               p->u.pipe.ret ? -1 : p->u.pipe.fds[0],
1198               p->u.pipe.ret ? -1 : p->u.pipe.fds[1]);
1199
1200         /* This causes valgrind to notice if they use pipefd[] after failure */
1201         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
1202         errno = p->error;
1203         return p->u.pipe.ret;
1204 }
1205
1206 static void cleanup_read(struct read_call *call, bool restore)
1207 {
1208         if (restore) {
1209                 trace("cleaning up read on fd %i: seeking to %llu\n",
1210                       call->fd, (long long)call->off);
1211
1212                 /* Read (not readv!) moves file offset! */
1213                 if (lseek(call->fd, call->off, SEEK_SET) != call->off) {
1214                         fwarn("Restoring lseek pointer failed (read)");
1215                 }
1216         }
1217 }
1218
1219 static ssize_t failtest_add_read(int fd, void *buf, size_t count, off_t off,
1220                                  bool is_pread, const char *file, unsigned line)
1221 {
1222         struct failtest_call *p;
1223         struct read_call call;
1224         call.fd = fd;
1225         call.buf = buf;
1226         call.count = count;
1227         call.off = off;
1228         p = add_history(FAILTEST_READ, false, file, line, &call);
1229
1230         /* FIXME: Try partial read returns. */
1231         if (should_fail(p)) {
1232                 p->u.read.ret = -1;
1233                 p->error = EIO;
1234         } else {
1235                 if (is_pread)
1236                         p->u.read.ret = pread(fd, buf, count, off);
1237                 else {
1238                         p->u.read.ret = read(fd, buf, count);
1239                         if (p->u.read.ret != -1)
1240                                 set_cleanup(p, cleanup_read, struct read_call);
1241                 }
1242         }
1243         trace("%sread %s:%u fd %i %zu@%llu -> %i\n",
1244               is_pread ? "p" : "", file, line, fd, count, (long long)off,
1245               p->u.read.ret);
1246         errno = p->error;
1247         return p->u.read.ret;
1248 }
1249
1250 static void cleanup_write(struct write_call *write, bool restore)
1251 {
1252         trace("cleaning up write on %s\n", write->opener->u.open.pathname);
1253         if (restore)
1254                 restore_contents(write->opener, write->saved, !write->is_pwrite,
1255                                  "write");
1256         free(write->saved);
1257 }
1258
1259 static ssize_t failtest_add_write(int fd, const void *buf,
1260                                   size_t count, off_t off,
1261                                   bool is_pwrite,
1262                                   const char *file, unsigned line)
1263 {
1264         struct failtest_call *p;
1265         struct write_call call;
1266
1267         call.fd = fd;
1268         call.buf = buf;
1269         call.count = count;
1270         call.off = off;
1271         call.is_pwrite = is_pwrite;
1272         call.opener = opener_of(fd);
1273         p = add_history(FAILTEST_WRITE, false, file, line, &call);
1274
1275         /* If we're a child, we need to make sure we write the same thing
1276          * to non-files as the parent does, so tell it. */
1277         if (control_fd != -1 && off == (off_t)-1) {
1278                 enum info_type type = WRITE;
1279
1280                 write_all(control_fd, &type, sizeof(type));
1281                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
1282                 write_all(control_fd, buf, count);
1283         }
1284
1285         /* FIXME: Try partial write returns. */
1286         if (should_fail(p)) {
1287                 p->u.write.ret = -1;
1288                 p->error = EIO;
1289         } else {
1290                 bool is_file;
1291                 assert(call.opener == p->u.write.opener);
1292
1293                 if (p->u.write.opener) {
1294                         is_file = (p->u.write.opener->type == FAILTEST_OPEN);
1295                 } else {
1296                         /* We can't unwind it, so at least check same
1297                          * in parent and child. */
1298                         is_file = false;
1299                 }
1300
1301                 /* FIXME: We assume same write order in parent and child */
1302                 if (!is_file && child_writes_num != 0) {
1303                         if (child_writes[0].fd != fd)
1304                                 errx(1, "Child wrote to fd %u, not %u?",
1305                                      child_writes[0].fd, fd);
1306                         if (child_writes[0].off != p->u.write.off)
1307                                 errx(1, "Child wrote to offset %zu, not %zu?",
1308                                      (size_t)child_writes[0].off,
1309                                      (size_t)p->u.write.off);
1310                         if (child_writes[0].count != count)
1311                                 errx(1, "Child wrote length %zu, not %zu?",
1312                                      child_writes[0].count, count);
1313                         if (memcmp(child_writes[0].buf, buf, count)) {
1314                                 child_fail(NULL, 0,
1315                                            "Child wrote differently to"
1316                                            " fd %u than we did!\n", fd);
1317                         }
1318                         free((char *)child_writes[0].buf);
1319                         child_writes_num--;
1320                         memmove(&child_writes[0], &child_writes[1],
1321                                 sizeof(child_writes[0]) * child_writes_num);
1322
1323                         /* Child wrote it already. */
1324                         trace("write %s:%i on fd %i already done by child\n",
1325                               file, line, fd);
1326                         p->u.write.ret = count;
1327                         errno = p->error;
1328                         return p->u.write.ret;
1329                 }
1330
1331                 if (is_file) {
1332                         p->u.write.saved = save_contents(call.opener->u.open.pathname,
1333                                                          fd, count, off,
1334                                                          "being overwritten");
1335                         set_cleanup(p, cleanup_write, struct write_call);
1336                 }
1337
1338                 /* Though off is current seek ptr for write case, we need to
1339                  * move it.  write() does that for us. */
1340                 if (p->u.write.is_pwrite)
1341                         p->u.write.ret = pwrite(fd, buf, count, off);
1342                 else
1343                         p->u.write.ret = write(fd, buf, count);
1344         }
1345         trace("%swrite %s:%i %zu@%llu on fd %i -> %i\n",
1346               p->u.write.is_pwrite ? "p" : "",
1347               file, line, count, (long long)off, fd, p->u.write.ret);
1348         errno = p->error;
1349         return p->u.write.ret;
1350 }
1351
1352 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t offset,
1353                         const char *file, unsigned line)
1354 {
1355         return failtest_add_write(fd, buf, count, offset, true, file, line);
1356 }
1357
1358 ssize_t failtest_write(int fd, const void *buf, size_t count,
1359                        const char *file, unsigned line)
1360 {
1361         return failtest_add_write(fd, buf, count, lseek(fd, 0, SEEK_CUR), false,
1362                                   file, line);
1363 }
1364
1365 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
1366                        const char *file, unsigned line)
1367 {
1368         return failtest_add_read(fd, buf, count, off, true, file, line);
1369 }
1370
1371 ssize_t failtest_read(int fd, void *buf, size_t count,
1372                       const char *file, unsigned line)
1373 {
1374         return failtest_add_read(fd, buf, count, lseek(fd, 0, SEEK_CUR), false,
1375                                  file, line);
1376 }
1377
1378 static struct lock_info *WARN_UNUSED_RESULT
1379 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
1380 {
1381         unsigned int i;
1382         struct lock_info *l;
1383
1384         for (i = 0; i < lock_num; i++) {
1385                 l = &locks[i];
1386
1387                 if (l->fd != fd)
1388                         continue;
1389                 /* Four cases we care about:
1390                  * Start overlap:
1391                  *      l =    |      |
1392                  *      new = |   |
1393                  * Mid overlap:
1394                  *      l =    |      |
1395                  *      new =    |  |
1396                  * End overlap:
1397                  *      l =    |      |
1398                  *      new =      |    |
1399                  * Total overlap:
1400                  *      l =    |      |
1401                  *      new = |         |
1402                  */
1403                 if (start > l->start && end < l->end) {
1404                         /* Mid overlap: trim entry, add new one. */
1405                         off_t new_start, new_end;
1406                         new_start = end + 1;
1407                         new_end = l->end;
1408                         trace("splitting lock on fd %i from %llu-%llu"
1409                               " to %llu-%llu\n",
1410                               fd, (long long)l->start, (long long)l->end,
1411                               (long long)l->start, (long long)start - 1);
1412                         l->end = start - 1;
1413                         locks = add_lock(locks,
1414                                          fd, new_start, new_end, l->type);
1415                         l = &locks[i];
1416                 } else if (start <= l->start && end >= l->end) {
1417                         /* Total overlap: eliminate entry. */
1418                         trace("erasing lock on fd %i %llu-%llu\n",
1419                               fd, (long long)l->start, (long long)l->end);
1420                         l->end = 0;
1421                         l->start = 1;
1422                 } else if (end >= l->start && end < l->end) {
1423                         trace("trimming lock on fd %i from %llu-%llu"
1424                               " to %llu-%llu\n",
1425                               fd, (long long)l->start, (long long)l->end,
1426                               (long long)end + 1, (long long)l->end);
1427                         /* Start overlap: trim entry. */
1428                         l->start = end + 1;
1429                 } else if (start > l->start && start <= l->end) {
1430                         trace("trimming lock on fd %i from %llu-%llu"
1431                               " to %llu-%llu\n",
1432                               fd, (long long)l->start, (long long)l->end,
1433                               (long long)l->start, (long long)start - 1);
1434                         /* End overlap: trim entry. */
1435                         l->end = start-1;
1436                 }
1437                 /* Nothing left?  Remove it. */
1438                 if (l->end < l->start) {
1439                         trace("forgetting lock on fd %i\n", fd);
1440                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
1441                         i--;
1442                 }
1443         }
1444
1445         if (type != F_UNLCK) {
1446                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
1447                 l = &locks[lock_num++];
1448                 l->fd = fd;
1449                 l->start = start;
1450                 l->end = end;
1451                 l->type = type;
1452                 trace("new lock on fd %i %llu-%llu\n",
1453                       fd, (long long)l->start, (long long)l->end);
1454         }
1455         return locks;
1456 }
1457
1458 /* We trap this so we can record it: we don't fail it. */
1459 int failtest_close(int fd, const char *file, unsigned line)
1460 {
1461         struct close_call call;
1462         struct failtest_call *p, *opener;
1463
1464         /* Do this before we add ourselves to history! */
1465         opener = opener_of(fd);
1466
1467         call.fd = fd;
1468         p = add_history(FAILTEST_CLOSE, false, file, line, &call);
1469         p->fail = false;
1470
1471         /* Consume close from failpath (shouldn't tell us to fail). */
1472         if (following_path()) {
1473                 if (follow_path(p))
1474                         abort();
1475         }
1476
1477         trace("close on fd %i\n", fd);
1478         if (fd < 0)
1479                 return close(fd);
1480
1481         /* Mark opener as not leaking, remove its cleanup function. */
1482         if (opener) {
1483                 trace("close on fd %i found opener %p\n", fd, opener);
1484                 if (opener->type == FAILTEST_PIPE) {
1485                         /* From a pipe? */
1486                         if (opener->u.pipe.fds[0] == fd) {
1487                                 assert(!opener->u.pipe.closed[0]);
1488                                 opener->u.pipe.closed[0] = true;
1489                         } else if (opener->u.pipe.fds[1] == fd) {
1490                                 assert(!opener->u.pipe.closed[1]);
1491                                 opener->u.pipe.closed[1] = true;
1492                         } else
1493                                 abort();
1494                         opener->can_leak = (!opener->u.pipe.closed[0]
1495                                             || !opener->u.pipe.closed[1]);
1496                 } else if (opener->type == FAILTEST_OPEN) {
1497                         opener->u.open.closed = true;
1498                         opener->can_leak = false;
1499                 } else
1500                         abort();
1501         }
1502
1503         /* Restore offset now, in case parent shared (can't do after close!). */
1504         if (control_fd != -1) {
1505                 struct failtest_call *i;
1506
1507                 tlist_for_each_rev(&history, i, list) {
1508                         if (i == our_history_start)
1509                                 break;
1510                         if (i == opener)
1511                                 break;
1512                         if (i->type == FAILTEST_LSEEK && i->u.lseek.fd == fd) {
1513                                 trace("close on fd %i undoes lseek\n", fd);
1514                                 /* This seeks back. */
1515                                 i->cleanup(&i->u, true);
1516                                 i->cleanup = NULL;
1517                         } else if (i->type == FAILTEST_WRITE
1518                                    && i->u.write.fd == fd
1519                                    && !i->u.write.is_pwrite) {
1520                                 trace("close on fd %i undoes write"
1521                                       " offset change\n", fd);
1522                                 /* Write (not pwrite!) moves file offset! */
1523                                 if (lseek(fd, i->u.write.off, SEEK_SET)
1524                                     != i->u.write.off) {
1525                                         fwarn("Restoring lseek pointer failed (write)");
1526                                 }
1527                         } else if (i->type == FAILTEST_READ
1528                                    && i->u.read.fd == fd) {
1529                                 /* preads don't *have* cleanups */
1530                                 if (i->cleanup) {
1531                                         trace("close on fd %i undoes read"
1532                                               " offset change\n", fd);
1533                                         /* This seeks back. */
1534                                         i->cleanup(&i->u, true);
1535                                         i->cleanup = NULL;
1536                                 }
1537                         }
1538                 }
1539         }
1540
1541         /* Close unlocks everything. */
1542         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
1543         return close(fd);
1544 }
1545
1546 /* Zero length means "to end of file" */
1547 static off_t end_of(off_t start, off_t len)
1548 {
1549         if (len == 0)
1550                 return off_max();
1551         return start + len - 1;
1552 }
1553
1554 /* FIXME: This only handles locks, really. */
1555 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
1556 {
1557         struct failtest_call *p;
1558         struct fcntl_call call;
1559         va_list ap;
1560
1561         call.fd = fd;
1562         call.cmd = cmd;
1563
1564         /* Argument extraction. */
1565         switch (cmd) {
1566         case F_SETFL:
1567         case F_SETFD:
1568                 va_start(ap, cmd);
1569                 call.arg.l = va_arg(ap, long);
1570                 va_end(ap);
1571                 trace("fcntl on fd %i F_SETFL/F_SETFD\n", fd);
1572                 return fcntl(fd, cmd, call.arg.l);
1573         case F_GETFD:
1574         case F_GETFL:
1575                 trace("fcntl on fd %i F_GETFL/F_GETFD\n", fd);
1576                 return fcntl(fd, cmd);
1577         case F_GETLK:
1578                 trace("fcntl on fd %i F_GETLK\n", fd);
1579                 get_locks();
1580                 va_start(ap, cmd);
1581                 call.arg.fl = *va_arg(ap, struct flock *);
1582                 va_end(ap);
1583                 return fcntl(fd, cmd, &call.arg.fl);
1584         case F_SETLK:
1585         case F_SETLKW:
1586                 trace("fcntl on fd %i F_SETLK%s\n",
1587                       fd, cmd == F_SETLKW ? "W" : "");
1588                 va_start(ap, cmd);
1589                 call.arg.fl = *va_arg(ap, struct flock *);
1590                 va_end(ap);
1591                 break;
1592         default:
1593                 /* This means you need to implement it here. */
1594                 err(1, "failtest: unknown fcntl %u", cmd);
1595         }
1596
1597         p = add_history(FAILTEST_FCNTL, false, file, line, &call);
1598
1599         if (should_fail(p)) {
1600                 p->u.fcntl.ret = -1;
1601                 if (p->u.fcntl.cmd == F_SETLK)
1602                         p->error = EAGAIN;
1603                 else
1604                         p->error = EDEADLK;
1605         } else {
1606                 get_locks();
1607                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1608                                        &p->u.fcntl.arg.fl);
1609                 if (p->u.fcntl.ret == -1)
1610                         p->error = errno;
1611                 else {
1612                         /* We don't handle anything else yet. */
1613                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1614                         locks = add_lock(locks,
1615                                          p->u.fcntl.fd,
1616                                          p->u.fcntl.arg.fl.l_start,
1617                                          end_of(p->u.fcntl.arg.fl.l_start,
1618                                                 p->u.fcntl.arg.fl.l_len),
1619                                          p->u.fcntl.arg.fl.l_type);
1620                 }
1621         }
1622         trace("fcntl on fd %i -> %i\n", fd, p->u.fcntl.ret);
1623         errno = p->error;
1624         return p->u.fcntl.ret;
1625 }
1626
1627 static void cleanup_lseek(struct lseek_call *call, bool restore)
1628 {
1629         if (restore) {
1630                 trace("cleaning up lseek on fd %i -> %llu\n",
1631                       call->fd, (long long)call->old_off);
1632                 if (lseek(call->fd, call->old_off, SEEK_SET) != call->old_off)
1633                         fwarn("Restoring lseek pointer failed");
1634         }
1635 }
1636
1637 /* We trap this so we can undo it: we don't fail it. */
1638 off_t failtest_lseek(int fd, off_t offset, int whence, const char *file,
1639                      unsigned int line)
1640 {
1641         struct failtest_call *p;
1642         struct lseek_call call;
1643         call.fd = fd;
1644         call.offset = offset;
1645         call.whence = whence;
1646         call.old_off = lseek(fd, 0, SEEK_CUR);
1647
1648         p = add_history(FAILTEST_LSEEK, false, file, line, &call);
1649         p->fail = false;
1650
1651         /* Consume lseek from failpath. */
1652         if (failpath)
1653                 if (should_fail(p))
1654                         abort();
1655
1656         p->u.lseek.ret = lseek(fd, offset, whence);
1657
1658         if (p->u.lseek.ret != (off_t)-1)
1659                 set_cleanup(p, cleanup_lseek, struct lseek_call);
1660
1661         trace("lseek %s:%u on fd %i from %llu to %llu%s\n",
1662               file, line, fd, (long long)call.old_off, (long long)offset,
1663               whence == SEEK_CUR ? " (from current off)" :
1664               whence == SEEK_END ? " (from end)" :
1665               whence == SEEK_SET ? "" : " (invalid whence)");
1666         return p->u.lseek.ret;
1667 }
1668
1669
1670 pid_t failtest_getpid(const char *file, unsigned line)
1671 {
1672         /* You must call failtest_init first! */
1673         assert(orig_pid);
1674         return orig_pid;
1675 }
1676         
1677 void failtest_init(int argc, char *argv[])
1678 {
1679         unsigned int i;
1680
1681         orig_pid = getpid();
1682
1683         warnf = fdopen(move_fd_to_high(dup(STDERR_FILENO)), "w");
1684         for (i = 1; i < argc; i++) {
1685                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1686                         failpath = argv[i] + strlen("--failpath=");
1687                 } else if (strcmp(argv[i], "--trace") == 0) {
1688                         tracef = warnf;
1689                         failtest_timeout_ms = -1;
1690                 } else if (!strncmp(argv[i], "--debugpath=",
1691                                     strlen("--debugpath="))) {
1692                         debugpath = argv[i] + strlen("--debugpath=");
1693                 }
1694         }
1695         failtable_init(&failtable);
1696         start = time_now();
1697 }
1698
1699 bool failtest_has_failed(void)
1700 {
1701         return control_fd != -1;
1702 }
1703
1704 void failtest_exit(int status)
1705 {
1706         trace("failtest_exit with status %i\n", status);
1707         if (failtest_exit_check) {
1708                 if (!failtest_exit_check(&history))
1709                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1710         }
1711
1712         failtest_cleanup(false, status);
1713 }