failtest: use 64 bit offsets if available.
[ccan] / ccan / failtest / failtest.c
1 /* Licensed under LGPL - see LICENSE file for details */
2 #include <ccan/failtest/failtest.h>
3 #include <stdarg.h>
4 #include <string.h>
5 #include <stdio.h>
6 #include <stdarg.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <unistd.h>
10 #include <poll.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <sys/time.h>
16 #include <signal.h>
17 #include <assert.h>
18 #include <ccan/read_write_all/read_write_all.h>
19 #include <ccan/failtest/failtest_proto.h>
20 #include <ccan/build_assert/build_assert.h>
21
22 enum failtest_result (*failtest_hook)(struct failtest_call *, unsigned);
23
24 static int tracefd = -1;
25
26 unsigned int failtest_timeout_ms = 20000;
27
28 const char *failpath;
29 const char *debugpath;
30
31 enum info_type {
32         WRITE,
33         RELEASE_LOCKS,
34         FAILURE,
35         SUCCESS,
36         UNEXPECTED
37 };
38
39 struct lock_info {
40         int fd;
41         /* end is inclusive: you can't have a 0-byte lock. */
42         off_t start, end;
43         int type;
44 };
45
46 bool (*failtest_exit_check)(struct failtest_call *history, unsigned num);
47
48 static struct failtest_call *history = NULL;
49 static unsigned int history_num = 0;
50 static int control_fd = -1;
51 static struct timeval start;
52 static unsigned int probe_count = 0;
53
54 static struct write_call *child_writes = NULL;
55 static unsigned int child_writes_num = 0;
56
57 static pid_t lock_owner;
58 static struct lock_info *locks = NULL;
59 static unsigned int lock_num = 0;
60
61 static pid_t orig_pid;
62
63 static const char info_to_arg[] = "mceoxprwf";
64
65 /* Dummy call used for failtest_undo wrappers. */
66 static struct failtest_call unrecorded_call;
67
68 static struct failtest_call *add_history_(enum failtest_call_type type,
69                                           const char *file,
70                                           unsigned int line,
71                                           const void *elem,
72                                           size_t elem_size)
73 {
74         /* NULL file is how we suppress failure. */
75         if (!file)
76                 return &unrecorded_call;
77
78         history = realloc(history, (history_num + 1) * sizeof(*history));
79         history[history_num].type = type;
80         history[history_num].file = file;
81         history[history_num].line = line;
82         history[history_num].cleanup = NULL;
83         memcpy(&history[history_num].u, elem, elem_size);
84         return &history[history_num++];
85 }
86
87 #define add_history(type, file, line, elem) \
88         add_history_((type), (file), (line), (elem), sizeof(*(elem)))
89
90 /* We do a fake call inside a sizeof(), to check types. */
91 #define set_cleanup(call, clean, type)                  \
92         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
93
94 static bool read_write_info(int fd)
95 {
96         struct write_call *w;
97         char *buf;
98
99         /* We don't need all of this, but it's simple. */
100         child_writes = realloc(child_writes,
101                                (child_writes_num+1) * sizeof(child_writes[0]));
102         w = &child_writes[child_writes_num];
103         if (!read_all(fd, w, sizeof(*w)))
104                 return false;
105
106         w->buf = buf = malloc(w->count);
107         if (!read_all(fd, buf, w->count))
108                 return false;
109
110         child_writes_num++;
111         return true;
112 }
113
114 static char *failpath_string(void)
115 {
116         unsigned int i;
117         char *ret = malloc(history_num + 1);
118
119         for (i = 0; i < history_num; i++) {
120                 ret[i] = info_to_arg[history[i].type];
121                 if (history[i].fail)
122                         ret[i] = toupper(ret[i]);
123         }
124         ret[i] = '\0';
125         return ret;
126 }
127
128 static void tell_parent(enum info_type type)
129 {
130         if (control_fd != -1)
131                 write_all(control_fd, &type, sizeof(type));
132 }
133
134 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
135 {
136         va_list ap;
137         char *path = failpath_string();
138
139         va_start(ap, fmt);
140         vfprintf(stderr, fmt, ap);
141         va_end(ap);
142
143         fprintf(stderr, "%.*s", (int)outlen, out);
144         printf("To reproduce: --failpath=%s\n", path);
145         free(path);
146         tell_parent(FAILURE);
147         exit(1);
148 }
149
150 static void trace(const char *fmt, ...)
151 {
152         va_list ap;
153
154         if (tracefd == -1)
155                 return;
156
157         va_start(ap, fmt);
158         vdprintf(tracefd, fmt, ap);
159         va_end(ap);
160 }
161
162 static pid_t child;
163
164 static void hand_down(int signum)
165 {
166         kill(child, signum);
167 }
168
169 static void release_locks(void)
170 {
171         /* Locks were never acquired/reacquired? */
172         if (lock_owner == 0)
173                 return;
174
175         /* We own them?  Release them all. */
176         if (lock_owner == getpid()) {
177                 unsigned int i;
178                 struct flock fl;
179                 fl.l_type = F_UNLCK;
180                 fl.l_whence = SEEK_SET;
181                 fl.l_start = 0;
182                 fl.l_len = 0;
183
184                 for (i = 0; i < lock_num; i++)
185                         fcntl(locks[i].fd, F_SETLK, &fl);
186         } else {
187                 /* Our parent must have them; pass request up. */
188                 enum info_type type = RELEASE_LOCKS;
189                 assert(control_fd != -1);
190                 write_all(control_fd, &type, sizeof(type));
191         }
192         lock_owner = 0;
193 }
194
195 /* off_t is a signed type.  Getting its max is non-trivial. */
196 static off_t off_max(void)
197 {
198         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
199         if (sizeof(off_t) == 4)
200                 return (off_t)0x7FFFFFF;
201         else
202                 return (off_t)0x7FFFFFFFFFFFFFFULL;
203 }
204
205 static void get_locks(void)
206 {
207         unsigned int i;
208         struct flock fl;
209
210         if (lock_owner == getpid())
211                 return;
212
213         if (lock_owner != 0) {
214                 enum info_type type = RELEASE_LOCKS;
215                 assert(control_fd != -1);
216                 write_all(control_fd, &type, sizeof(type));
217         }
218
219         fl.l_whence = SEEK_SET;
220
221         for (i = 0; i < lock_num; i++) {
222                 fl.l_type = locks[i].type;
223                 fl.l_start = locks[i].start;
224                 if (locks[i].end == off_max())
225                         fl.l_len = 0;
226                 else
227                         fl.l_len = locks[i].end - locks[i].start + 1;
228
229                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
230                         abort();
231         }
232         lock_owner = getpid();
233 }
234
235 struct saved_file {
236         struct saved_file *next;
237         int fd;
238         void *contents;
239         off_t off, len;
240 };
241
242 static struct saved_file *save_file(struct saved_file *next, int fd)
243 {
244         struct saved_file *s = malloc(sizeof(*s));
245
246         s->next = next;
247         s->fd = fd;
248         s->off = lseek(fd, 0, SEEK_CUR);
249         /* Special file?  Erk... */
250         assert(s->off != -1);
251         s->len = lseek(fd, 0, SEEK_END);
252         lseek(fd, 0, SEEK_SET);
253         s->contents = malloc(s->len);
254         if (read(fd, s->contents, s->len) != s->len)
255                 err(1, "Failed to save %zu bytes", (size_t)s->len);
256         lseek(fd, s->off, SEEK_SET);
257         return s;
258 }
259         
260 /* We have little choice but to save and restore open files: mmap means we
261  * can really intercept changes in the child.
262  *
263  * We could do non-mmap'ed files on demand, however. */
264 static struct saved_file *save_files(void)
265 {
266         struct saved_file *files = NULL;
267         int i;
268
269         /* Figure out the set of live fds. */
270         for (i = history_num - 2; i >= 0; i--) {
271                 if (history[i].type == FAILTEST_OPEN) {
272                         int fd = history[i].u.open.ret;
273                         /* Only do successful, writable fds. */
274                         if (fd < 0)
275                                 continue;
276
277                         /* If it was closed, cleanup == NULL. */
278                         if (!history[i].cleanup)
279                                 continue;
280
281                         if ((history[i].u.open.flags & O_RDWR) == O_RDWR) {
282                                 files = save_file(files, fd);
283                         } else if ((history[i].u.open.flags & O_WRONLY)
284                                    == O_WRONLY) {
285                                 /* FIXME: Handle O_WRONLY.  Open with O_RDWR? */
286                                 abort();
287                         }
288                 }
289         }
290
291         return files;
292 }
293
294 static void restore_files(struct saved_file *s)
295 {
296         while (s) {
297                 struct saved_file *next = s->next;
298
299                 lseek(s->fd, 0, SEEK_SET);
300                 if (write(s->fd, s->contents, s->len) != s->len)
301                         err(1, "Failed to restore %zu bytes", (size_t)s->len);
302                 if (ftruncate(s->fd, s->len) != 0)
303                         err(1, "Failed to trim file to length %zu",
304                             (size_t)s->len);
305                 free(s->contents);
306                 lseek(s->fd, s->off, SEEK_SET);
307                 free(s);
308                 s = next;
309         }
310 }
311
312 static void free_files(struct saved_file *s)
313 {
314         while (s) {
315                 struct saved_file *next = s->next;
316                 free(s->contents);
317                 free(s);
318                 s = next;
319         }
320 }
321
322 /* Free up memory, so valgrind doesn't report leaks. */
323 static void free_everything(void)
324 {
325         unsigned int i;
326
327         /* We don't do this in cleanup: needed even for failed opens. */
328         for (i = 0; i < history_num; i++) {
329                 if (history[i].type == FAILTEST_OPEN)
330                         free((char *)history[i].u.open.pathname);
331         }
332         free(history);
333 }
334
335 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
336 {
337         int i;
338
339         /* For children, we don't care if they "failed" the testing. */
340         if (control_fd != -1)
341                 status = 0;
342
343         if (forced_cleanup)
344                 history_num--;
345
346         /* Cleanup everything, in reverse order. */
347         for (i = history_num - 1; i >= 0; i--) {
348                 if (!history[i].cleanup)
349                         continue;
350                 if (!forced_cleanup) {
351                         printf("Leak at %s:%u: --failpath=%s\n",
352                                history[i].file, history[i].line,
353                                failpath_string());
354                         status = 1;
355                 }
356                 history[i].cleanup(&history[i].u);
357         }
358
359         free_everything();
360         tell_parent(SUCCESS);
361         exit(status);
362 }
363
364 static bool should_fail(struct failtest_call *call)
365 {
366         int status;
367         int control[2], output[2];
368         enum info_type type = UNEXPECTED;
369         char *out = NULL;
370         size_t outlen = 0;
371         struct saved_file *files;
372
373         /* Are we probing? */
374         if (probe_count && --probe_count == 0 && control_fd != -1)
375                 failtest_cleanup(true, 0);
376
377         if (call == &unrecorded_call)
378                 return false;
379
380         if (failpath) {
381                 /* + means continue after end, like normal. */
382                 if (*failpath == '+')
383                         failpath = NULL;
384                 else if (*failpath == '\0') {
385                         /* Continue, but don't inject errors. */
386                         return call->fail = false;
387                 } else {
388                         if (tolower((unsigned char)*failpath)
389                             != info_to_arg[call->type])
390                                 errx(1, "Failpath expected '%c' got '%c'\n",
391                                      info_to_arg[call->type], *failpath);
392                         call->fail = isupper((unsigned char)*(failpath++));
393                         return call->fail;
394                 }
395         }
396
397         /* Attach debugger if they asked for it. */
398         if (debugpath && history_num == strlen(debugpath)) {
399                 unsigned int i;
400
401                 for (i = 0; i < history_num; i++) {
402                         unsigned char c = info_to_arg[history[i].type];
403                         if (history[i].fail)
404                                 c = toupper(c);
405                         if (c != debugpath[i])
406                                 break;
407                 }
408                 if (i == history_num) {
409                         char str[80];
410
411                         /* Don't timeout. */
412                         signal(SIGUSR1, SIG_IGN);
413                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
414                                 getpid(), getpid());
415                         if (system(str) == 0)
416                                 sleep(5);
417                 }
418         }
419
420         if (failtest_hook) {
421                 switch (failtest_hook(history, history_num)) {
422                 case FAIL_OK:
423                         break;
424                 case FAIL_PROBE:
425                         /* Already down probe path?  Stop now. */
426                         if (!probe_count) {
427                                 /* FIXME: We should run *parent* and
428                                  * run probe until calls match up again. */
429                                 probe_count = 3;
430                                 break;
431                         } else {
432                                 /* Child should give up now. */
433                                 if (control_fd != -1)
434                                         failtest_cleanup(true, 0);
435                                 /* Parent, don't fail again. */
436                         }
437                 case FAIL_DONT_FAIL:
438                         call->fail = false;
439                         return false;
440                 default:
441                         abort();
442                 }
443         }
444
445         files = save_files();
446
447         /* We're going to fail in the child. */
448         call->fail = true;
449         if (pipe(control) != 0 || pipe(output) != 0)
450                 err(1, "opening pipe");
451
452         /* Prevent double-printing (in child and parent) */
453         fflush(stdout);
454         child = fork();
455         if (child == -1)
456                 err(1, "forking failed");
457
458         if (child == 0) {
459                 if (tracefd != -1) {
460                         struct timeval now;
461                         const char *p;
462                         gettimeofday(&now, NULL);
463                         if (now.tv_usec < start.tv_usec) {
464                                 now.tv_sec--;
465                                 now.tv_usec += 1000000;
466                         }
467                         now.tv_usec -= start.tv_usec;
468                         now.tv_sec -= start.tv_sec;
469                         p = failpath_string();
470                         trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
471                               (int)now.tv_sec, (int)now.tv_usec / 10000, p);
472                         free((char *)p);
473                         p = strrchr(history[history_num-1].file, '/');
474                         if (p)
475                                 trace("%s", p+1);
476                         else
477                                 trace("%s", history[history_num-1].file);
478                         trace(":%u)\n", history[history_num-1].line);
479                 }
480                 close(control[0]);
481                 close(output[0]);
482                 dup2(output[1], STDOUT_FILENO);
483                 dup2(output[1], STDERR_FILENO);
484                 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
485                         close(output[1]);
486                 control_fd = control[1];
487                 /* Valgrind spots the leak if we don't free these. */
488                 free_files(files);
489                 return true;
490         }
491
492         signal(SIGUSR1, hand_down);
493
494         close(control[1]);
495         close(output[1]);
496
497         /* We grab output so we can display it; we grab writes so we
498          * can compare. */
499         do {
500                 struct pollfd pfd[2];
501                 int ret;
502
503                 pfd[0].fd = output[0];
504                 pfd[0].events = POLLIN|POLLHUP;
505                 pfd[1].fd = control[0];
506                 pfd[1].events = POLLIN|POLLHUP;
507
508                 if (type == SUCCESS)
509                         ret = poll(pfd, 1, failtest_timeout_ms);
510                 else
511                         ret = poll(pfd, 2, failtest_timeout_ms);
512
513                 if (ret == 0)
514                         hand_down(SIGUSR1);
515                 if (ret < 0) {
516                         if (errno == EINTR)
517                                 continue;
518                         err(1, "Poll returned %i", ret);
519                 }
520
521                 if (pfd[0].revents & POLLIN) {
522                         ssize_t len;
523
524                         out = realloc(out, outlen + 8192);
525                         len = read(output[0], out + outlen, 8192);
526                         outlen += len;
527                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
528                         if (read_all(control[0], &type, sizeof(type))) {
529                                 if (type == WRITE) {
530                                         if (!read_write_info(control[0]))
531                                                 break;
532                                 } else if (type == RELEASE_LOCKS) {
533                                         release_locks();
534                                         /* FIXME: Tell them we're done... */
535                                 }
536                         }
537                 } else if (pfd[0].revents & POLLHUP) {
538                         break;
539                 }
540         } while (type != FAILURE);
541
542         close(output[0]);
543         close(control[0]);
544         waitpid(child, &status, 0);
545         if (!WIFEXITED(status)) {
546                 if (WTERMSIG(status) == SIGUSR1)
547                         child_fail(out, outlen, "Timed out");
548                 else
549                         child_fail(out, outlen, "Killed by signal %u: ",
550                                    WTERMSIG(status));
551         }
552         /* Child printed failure already, just pass up exit code. */
553         if (type == FAILURE) {
554                 fprintf(stderr, "%.*s", (int)outlen, out);
555                 tell_parent(type);
556                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
557         }
558         if (WEXITSTATUS(status) != 0)
559                 child_fail(out, outlen, "Exited with status %i: ",
560                            WEXITSTATUS(status));
561
562         free(out);
563         signal(SIGUSR1, SIG_DFL);
564
565         restore_files(files);
566
567         /* We continue onwards without failing. */
568         call->fail = false;
569         return false;
570 }
571
572 static void cleanup_calloc(struct calloc_call *call)
573 {
574         free(call->ret);
575 }
576
577 void *failtest_calloc(size_t nmemb, size_t size,
578                       const char *file, unsigned line)
579 {
580         struct failtest_call *p;
581         struct calloc_call call;
582         call.nmemb = nmemb;
583         call.size = size;
584         p = add_history(FAILTEST_CALLOC, file, line, &call);
585
586         if (should_fail(p)) {
587                 p->u.calloc.ret = NULL;
588                 p->error = ENOMEM;
589         } else {
590                 p->u.calloc.ret = calloc(nmemb, size);
591                 set_cleanup(p, cleanup_calloc, struct calloc_call);
592         }
593         errno = p->error;
594         return p->u.calloc.ret;
595 }
596
597 static void cleanup_malloc(struct malloc_call *call)
598 {
599         free(call->ret);
600 }
601
602 void *failtest_malloc(size_t size, const char *file, unsigned line)
603 {
604         struct failtest_call *p;
605         struct malloc_call call;
606         call.size = size;
607
608         p = add_history(FAILTEST_MALLOC, file, line, &call);
609         if (should_fail(p)) {
610                 p->u.calloc.ret = NULL;
611                 p->error = ENOMEM;
612         } else {
613                 p->u.calloc.ret = malloc(size);
614                 set_cleanup(p, cleanup_malloc, struct malloc_call);
615         }
616         errno = p->error;
617         return p->u.calloc.ret;
618 }
619
620 static void cleanup_realloc(struct realloc_call *call)
621 {
622         free(call->ret);
623 }
624
625 /* Walk back and find out if we got this ptr from a previous routine. */
626 static void fixup_ptr_history(void *ptr, unsigned int last)
627 {
628         int i;
629
630         /* Start at end of history, work back. */
631         for (i = last - 1; i >= 0; i--) {
632                 switch (history[i].type) {
633                 case FAILTEST_REALLOC:
634                         if (history[i].u.realloc.ret == ptr) {
635                                 history[i].cleanup = NULL;
636                                 return;
637                         }
638                         break;
639                 case FAILTEST_MALLOC:
640                         if (history[i].u.malloc.ret == ptr) {
641                                 history[i].cleanup = NULL;
642                                 return;
643                         }
644                         break;
645                 case FAILTEST_CALLOC:
646                         if (history[i].u.calloc.ret == ptr) {
647                                 history[i].cleanup = NULL;
648                                 return;
649                         }
650                         break;
651                 default:
652                         break;
653                 }
654         }
655 }
656
657 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
658 {
659         struct failtest_call *p;
660         struct realloc_call call;
661         call.size = size;
662         p = add_history(FAILTEST_REALLOC, file, line, &call);
663
664         /* FIXME: Try one child moving allocation, one not. */
665         if (should_fail(p)) {
666                 p->u.realloc.ret = NULL;
667                 p->error = ENOMEM;
668         } else {
669                 fixup_ptr_history(ptr, history_num-1);
670                 p->u.realloc.ret = realloc(ptr, size);
671                 set_cleanup(p, cleanup_realloc, struct realloc_call);
672         }
673         errno = p->error;
674         return p->u.realloc.ret;
675 }
676
677 void failtest_free(void *ptr)
678 {
679         fixup_ptr_history(ptr, history_num);
680         free(ptr);
681 }
682
683 static void cleanup_open(struct open_call *call)
684 {
685         close(call->ret);
686 }
687
688 int failtest_open(const char *pathname,
689                   const char *file, unsigned line, ...)
690 {
691         struct failtest_call *p;
692         struct open_call call;
693         va_list ap;
694
695         call.pathname = strdup(pathname);
696         va_start(ap, line);
697         call.flags = va_arg(ap, int);
698         if (call.flags & O_CREAT) {
699                 call.mode = va_arg(ap, int);
700                 va_end(ap);
701         }
702         p = add_history(FAILTEST_OPEN, file, line, &call);
703         /* Avoid memory leak! */
704         if (p == &unrecorded_call)
705                 free((char *)call.pathname);
706         p->u.open.ret = open(pathname, call.flags, call.mode);
707
708         if (!failpath && p->u.open.ret == -1) {
709                 p->fail = false;
710                 p->error = errno;
711         } else if (should_fail(p)) {
712                 close(p->u.open.ret);
713                 p->u.open.ret = -1;
714                 /* FIXME: Play with error codes? */
715                 p->error = EACCES;
716         } else {
717                 set_cleanup(p, cleanup_open, struct open_call);
718         }
719         errno = p->error;
720         return p->u.open.ret;
721 }
722
723 static void cleanup_pipe(struct pipe_call *call)
724 {
725         if (!call->closed[0])
726                 close(call->fds[0]);
727         if (!call->closed[1])
728                 close(call->fds[1]);
729 }
730
731 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
732 {
733         struct failtest_call *p;
734         struct pipe_call call;
735
736         p = add_history(FAILTEST_PIPE, file, line, &call);
737         if (should_fail(p)) {
738                 p->u.open.ret = -1;
739                 /* FIXME: Play with error codes? */
740                 p->error = EMFILE;
741         } else {
742                 p->u.pipe.ret = pipe(p->u.pipe.fds);
743                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
744                 set_cleanup(p, cleanup_pipe, struct pipe_call);
745         }
746         /* This causes valgrind to notice if they use pipefd[] after failure */
747         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
748         errno = p->error;
749         return p->u.pipe.ret;
750 }
751
752 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
753                        const char *file, unsigned line)
754 {
755         struct failtest_call *p;
756         struct read_call call;
757         call.fd = fd;
758         call.buf = buf;
759         call.count = count;
760         call.off = off;
761         p = add_history(FAILTEST_READ, file, line, &call);
762
763         /* FIXME: Try partial read returns. */
764         if (should_fail(p)) {
765                 p->u.read.ret = -1;
766                 p->error = EIO;
767         } else {
768                 p->u.read.ret = pread(fd, buf, count, off);
769         }
770         errno = p->error;
771         return p->u.read.ret;
772 }
773
774 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
775                         const char *file, unsigned line)
776 {
777         struct failtest_call *p;
778         struct write_call call;
779
780         call.fd = fd;
781         call.buf = buf;
782         call.count = count;
783         call.off = off;
784         p = add_history(FAILTEST_WRITE, file, line, &call);
785
786         /* If we're a child, we need to make sure we write the same thing
787          * to non-files as the parent does, so tell it. */
788         if (control_fd != -1 && off == (off_t)-1) {
789                 enum info_type type = WRITE;
790
791                 write_all(control_fd, &type, sizeof(type));
792                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
793                 write_all(control_fd, buf, count);
794         }
795
796         /* FIXME: Try partial write returns. */
797         if (should_fail(p)) {
798                 p->u.write.ret = -1;
799                 p->error = EIO;
800         } else {
801                 /* FIXME: We assume same write order in parent and child */
802                 if (off == (off_t)-1 && child_writes_num != 0) {
803                         if (child_writes[0].fd != fd)
804                                 errx(1, "Child wrote to fd %u, not %u?",
805                                      child_writes[0].fd, fd);
806                         if (child_writes[0].off != p->u.write.off)
807                                 errx(1, "Child wrote to offset %zu, not %zu?",
808                                      (size_t)child_writes[0].off,
809                                      (size_t)p->u.write.off);
810                         if (child_writes[0].count != count)
811                                 errx(1, "Child wrote length %zu, not %zu?",
812                                      child_writes[0].count, count);
813                         if (memcmp(child_writes[0].buf, buf, count)) {
814                                 child_fail(NULL, 0,
815                                            "Child wrote differently to"
816                                            " fd %u than we did!\n", fd);
817                         }
818                         free((char *)child_writes[0].buf);
819                         child_writes_num--;
820                         memmove(&child_writes[0], &child_writes[1],
821                                 sizeof(child_writes[0]) * child_writes_num);
822
823                         /* Is this is a socket or pipe, child wrote it
824                            already. */
825                         if (p->u.write.off == (off_t)-1) {
826                                 p->u.write.ret = count;
827                                 errno = p->error;
828                                 return p->u.write.ret;
829                         }
830                 }
831                 p->u.write.ret = pwrite(fd, buf, count, off);
832         }
833         errno = p->error;
834         return p->u.write.ret;
835 }
836
837 ssize_t failtest_read(int fd, void *buf, size_t count,
838                       const char *file, unsigned line)
839 {
840         return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
841                               file, line);
842 }
843
844 ssize_t failtest_write(int fd, const void *buf, size_t count,
845                        const char *file, unsigned line)
846 {
847         return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
848                                file, line);
849 }
850
851 static struct lock_info *WARN_UNUSED_RESULT
852 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
853 {
854         unsigned int i;
855         struct lock_info *l;
856
857         for (i = 0; i < lock_num; i++) {
858                 l = &locks[i];
859
860                 if (l->fd != fd)
861                         continue;
862                 /* Four cases we care about:
863                  * Start overlap:
864                  *      l =    |      |
865                  *      new = |   |
866                  * Mid overlap:
867                  *      l =    |      |
868                  *      new =    |  |
869                  * End overlap:
870                  *      l =    |      |
871                  *      new =      |    |
872                  * Total overlap:
873                  *      l =    |      |
874                  *      new = |         |
875                  */
876                 if (start > l->start && end < l->end) {
877                         /* Mid overlap: trim entry, add new one. */
878                         off_t new_start, new_end;
879                         new_start = end + 1;
880                         new_end = l->end;
881                         l->end = start - 1;
882                         locks = add_lock(locks,
883                                          fd, new_start, new_end, l->type);
884                         l = &locks[i];
885                 } else if (start <= l->start && end >= l->end) {
886                         /* Total overlap: eliminate entry. */
887                         l->end = 0;
888                         l->start = 1;
889                 } else if (end >= l->start && end < l->end) {
890                         /* Start overlap: trim entry. */
891                         l->start = end + 1;
892                 } else if (start > l->start && start <= l->end) {
893                         /* End overlap: trim entry. */
894                         l->end = start-1;
895                 }
896                 /* Nothing left?  Remove it. */
897                 if (l->end < l->start) {
898                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
899                         i--;
900                 }
901         }
902
903         if (type != F_UNLCK) {
904                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
905                 l = &locks[lock_num++];
906                 l->fd = fd;
907                 l->start = start;
908                 l->end = end;
909                 l->type = type;
910         }
911         return locks;
912 }
913
914 /* We trap this so we can record it: we don't fail it. */
915 int failtest_close(int fd, const char *file, unsigned line)
916 {
917         int i;
918         struct close_call call;
919         struct failtest_call *p;
920
921         call.fd = fd;
922         p = add_history(FAILTEST_CLOSE, file, line, &call);
923         p->fail = false;
924
925         /* Consume close from failpath. */
926         if (failpath)
927                 if (should_fail(p))
928                         abort();
929
930         if (fd < 0)
931                 return close(fd);
932
933         /* Trace history to find source of fd. */
934         for (i = history_num-1; i >= 0; i--) {
935                 switch (history[i].type) {
936                 case FAILTEST_PIPE:
937                         /* From a pipe? */
938                         if (history[i].u.pipe.fds[0] == fd) {
939                                 assert(!history[i].u.pipe.closed[0]);
940                                 history[i].u.pipe.closed[0] = true;
941                                 if (history[i].u.pipe.closed[1])
942                                         history[i].cleanup = NULL;
943                                 goto out;
944                         }
945                         if (history[i].u.pipe.fds[1] == fd) {
946                                 assert(!history[i].u.pipe.closed[1]);
947                                 history[i].u.pipe.closed[1] = true;
948                                 if (history[i].u.pipe.closed[0])
949                                         history[i].cleanup = NULL;
950                                 goto out;
951                         }
952                         break;
953                 case FAILTEST_OPEN:
954                         if (history[i].u.open.ret == fd) {
955                                 assert((void *)history[i].cleanup
956                                        == (void *)cleanup_open);
957                                 history[i].cleanup = NULL;
958                                 goto out;
959                         }
960                         break;
961                 default:
962                         break;
963                 }
964         }
965
966 out:
967         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
968         return close(fd);
969 }
970
971 /* Zero length means "to end of file" */
972 static off_t end_of(off_t start, off_t len)
973 {
974         if (len == 0)
975                 return off_max();
976         return start + len - 1;
977 }
978
979 /* FIXME: This only handles locks, really. */
980 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
981 {
982         struct failtest_call *p;
983         struct fcntl_call call;
984         va_list ap;
985
986         call.fd = fd;
987         call.cmd = cmd;
988
989         /* Argument extraction. */
990         switch (cmd) {
991         case F_SETFL:
992         case F_SETFD:
993                 va_start(ap, cmd);
994                 call.arg.l = va_arg(ap, long);
995                 va_end(ap);
996                 return fcntl(fd, cmd, call.arg.l);
997         case F_GETFD:
998         case F_GETFL:
999                 return fcntl(fd, cmd);
1000         case F_GETLK:
1001                 get_locks();
1002                 va_start(ap, cmd);
1003                 call.arg.fl = *va_arg(ap, struct flock *);
1004                 va_end(ap);
1005                 return fcntl(fd, cmd, &call.arg.fl);
1006         case F_SETLK:
1007         case F_SETLKW:
1008                 va_start(ap, cmd);
1009                 call.arg.fl = *va_arg(ap, struct flock *);
1010                 va_end(ap);
1011                 break;
1012         default:
1013                 /* This means you need to implement it here. */
1014                 err(1, "failtest: unknown fcntl %u", cmd);
1015         }
1016
1017         p = add_history(FAILTEST_FCNTL, file, line, &call);
1018
1019         if (should_fail(p)) {
1020                 p->u.fcntl.ret = -1;
1021                 if (p->u.fcntl.cmd == F_SETLK)
1022                         p->error = EAGAIN;
1023                 else
1024                         p->error = EDEADLK;
1025         } else {
1026                 get_locks();
1027                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1028                                        &p->u.fcntl.arg.fl);
1029                 if (p->u.fcntl.ret == -1)
1030                         p->error = errno;
1031                 else {
1032                         /* We don't handle anything else yet. */
1033                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1034                         locks = add_lock(locks,
1035                                          p->u.fcntl.fd,
1036                                          p->u.fcntl.arg.fl.l_start,
1037                                          end_of(p->u.fcntl.arg.fl.l_start,
1038                                                 p->u.fcntl.arg.fl.l_len),
1039                                          p->u.fcntl.arg.fl.l_type);
1040                 }
1041         }
1042         errno = p->error;
1043         return p->u.fcntl.ret;
1044 }
1045
1046 pid_t failtest_getpid(const char *file, unsigned line)
1047 {
1048         /* You must call failtest_init first! */
1049         assert(orig_pid);
1050         return orig_pid;
1051 }
1052         
1053 void failtest_init(int argc, char *argv[])
1054 {
1055         unsigned int i;
1056
1057         orig_pid = getpid();
1058                 
1059         for (i = 1; i < argc; i++) {
1060                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1061                         failpath = argv[i] + strlen("--failpath=");
1062                 } else if (strcmp(argv[i], "--tracepath") == 0) {
1063                         tracefd = dup(STDERR_FILENO);
1064                         failtest_timeout_ms = -1;
1065                 } else if (!strncmp(argv[i], "--debugpath=",
1066                                     strlen("--debugpath="))) {
1067                         debugpath = argv[i] + strlen("--debugpath=");
1068                 }
1069         }
1070         gettimeofday(&start, NULL);
1071 }
1072
1073 bool failtest_has_failed(void)
1074 {
1075         return control_fd != -1;
1076 }
1077
1078 void failtest_exit(int status)
1079 {
1080         if (failtest_exit_check) {
1081                 if (!failtest_exit_check(history, history_num))
1082                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1083         }
1084
1085         failtest_cleanup(false, status);
1086 }