]> git.ozlabs.org Git - ccan/blob - ccan/failtest/failtest.c
failtest: plug memory leak.
[ccan] / ccan / failtest / failtest.c
1 /* Licensed under LGPL - see LICENSE file for details */
2 #include "config.h"
3 #include <stdarg.h>
4 #include <string.h>
5 #include <stdio.h>
6 #include <stdarg.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <unistd.h>
10 #include <poll.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <sys/time.h>
16 #include <signal.h>
17 #include <assert.h>
18 #include <ccan/read_write_all/read_write_all.h>
19 #include <ccan/failtest/failtest_proto.h>
20 #include <ccan/failtest/failtest.h>
21 #include <ccan/build_assert/build_assert.h>
22
23 enum failtest_result (*failtest_hook)(struct failtest_call *, unsigned);
24
25 static int tracefd = -1;
26
27 unsigned int failtest_timeout_ms = 20000;
28
29 const char *failpath;
30 const char *debugpath;
31
32 enum info_type {
33         WRITE,
34         RELEASE_LOCKS,
35         FAILURE,
36         SUCCESS,
37         UNEXPECTED
38 };
39
40 struct lock_info {
41         int fd;
42         /* end is inclusive: you can't have a 0-byte lock. */
43         off_t start, end;
44         int type;
45 };
46
47 bool (*failtest_exit_check)(struct failtest_call *history, unsigned num);
48
49 static struct failtest_call *history = NULL;
50 static unsigned int history_num = 0;
51 static int control_fd = -1;
52 static struct timeval start;
53 static unsigned int probe_count = 0;
54
55 static struct write_call *child_writes = NULL;
56 static unsigned int child_writes_num = 0;
57
58 static pid_t lock_owner;
59 static struct lock_info *locks = NULL;
60 static unsigned int lock_num = 0;
61
62 static pid_t orig_pid;
63
64 static const char info_to_arg[] = "mceoxprwf";
65
66 /* Dummy call used for failtest_undo wrappers. */
67 static struct failtest_call unrecorded_call;
68
69 static struct failtest_call *add_history_(enum failtest_call_type type,
70                                           const char *file,
71                                           unsigned int line,
72                                           const void *elem,
73                                           size_t elem_size)
74 {
75         /* NULL file is how we suppress failure. */
76         if (!file)
77                 return &unrecorded_call;
78
79         history = realloc(history, (history_num + 1) * sizeof(*history));
80         history[history_num].type = type;
81         history[history_num].file = file;
82         history[history_num].line = line;
83         history[history_num].cleanup = NULL;
84         memcpy(&history[history_num].u, elem, elem_size);
85         return &history[history_num++];
86 }
87
88 #define add_history(type, file, line, elem) \
89         add_history_((type), (file), (line), (elem), sizeof(*(elem)))
90
91 /* We do a fake call inside a sizeof(), to check types. */
92 #define set_cleanup(call, clean, type)                  \
93         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
94
95 static bool read_write_info(int fd)
96 {
97         struct write_call *w;
98         char *buf;
99
100         /* We don't need all of this, but it's simple. */
101         child_writes = realloc(child_writes,
102                                (child_writes_num+1) * sizeof(child_writes[0]));
103         w = &child_writes[child_writes_num];
104         if (!read_all(fd, w, sizeof(*w)))
105                 return false;
106
107         w->buf = buf = malloc(w->count);
108         if (!read_all(fd, buf, w->count))
109                 return false;
110
111         child_writes_num++;
112         return true;
113 }
114
115 static char *failpath_string(void)
116 {
117         unsigned int i;
118         char *ret = malloc(history_num + 1);
119
120         for (i = 0; i < history_num; i++) {
121                 ret[i] = info_to_arg[history[i].type];
122                 if (history[i].fail)
123                         ret[i] = toupper(ret[i]);
124         }
125         ret[i] = '\0';
126         return ret;
127 }
128
129 static void tell_parent(enum info_type type)
130 {
131         if (control_fd != -1)
132                 write_all(control_fd, &type, sizeof(type));
133 }
134
135 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
136 {
137         va_list ap;
138         char *path = failpath_string();
139
140         va_start(ap, fmt);
141         vfprintf(stderr, fmt, ap);
142         va_end(ap);
143
144         fprintf(stderr, "%.*s", (int)outlen, out);
145         printf("To reproduce: --failpath=%s\n", path);
146         free(path);
147         tell_parent(FAILURE);
148         exit(1);
149 }
150
151 static void trace(const char *fmt, ...)
152 {
153         va_list ap;
154
155         if (tracefd == -1)
156                 return;
157
158         va_start(ap, fmt);
159         vdprintf(tracefd, fmt, ap);
160         va_end(ap);
161 }
162
163 static pid_t child;
164
165 static void hand_down(int signum)
166 {
167         kill(child, signum);
168 }
169
170 static void release_locks(void)
171 {
172         /* Locks were never acquired/reacquired? */
173         if (lock_owner == 0)
174                 return;
175
176         /* We own them?  Release them all. */
177         if (lock_owner == getpid()) {
178                 unsigned int i;
179                 struct flock fl;
180                 fl.l_type = F_UNLCK;
181                 fl.l_whence = SEEK_SET;
182                 fl.l_start = 0;
183                 fl.l_len = 0;
184
185                 for (i = 0; i < lock_num; i++)
186                         fcntl(locks[i].fd, F_SETLK, &fl);
187         } else {
188                 /* Our parent must have them; pass request up. */
189                 enum info_type type = RELEASE_LOCKS;
190                 assert(control_fd != -1);
191                 write_all(control_fd, &type, sizeof(type));
192         }
193         lock_owner = 0;
194 }
195
196 /* off_t is a signed type.  Getting its max is non-trivial. */
197 static off_t off_max(void)
198 {
199         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
200         if (sizeof(off_t) == 4)
201                 return (off_t)0x7FFFFFF;
202         else
203                 return (off_t)0x7FFFFFFFFFFFFFFULL;
204 }
205
206 static void get_locks(void)
207 {
208         unsigned int i;
209         struct flock fl;
210
211         if (lock_owner == getpid())
212                 return;
213
214         if (lock_owner != 0) {
215                 enum info_type type = RELEASE_LOCKS;
216                 assert(control_fd != -1);
217                 write_all(control_fd, &type, sizeof(type));
218         }
219
220         fl.l_whence = SEEK_SET;
221
222         for (i = 0; i < lock_num; i++) {
223                 fl.l_type = locks[i].type;
224                 fl.l_start = locks[i].start;
225                 if (locks[i].end == off_max())
226                         fl.l_len = 0;
227                 else
228                         fl.l_len = locks[i].end - locks[i].start + 1;
229
230                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
231                         abort();
232         }
233         lock_owner = getpid();
234 }
235
236 struct saved_file {
237         struct saved_file *next;
238         int fd;
239         void *contents;
240         off_t off, len;
241 };
242
243 static struct saved_file *save_file(struct saved_file *next, int fd)
244 {
245         struct saved_file *s = malloc(sizeof(*s));
246
247         s->next = next;
248         s->fd = fd;
249         s->off = lseek(fd, 0, SEEK_CUR);
250         /* Special file?  Erk... */
251         assert(s->off != -1);
252         s->len = lseek(fd, 0, SEEK_END);
253         lseek(fd, 0, SEEK_SET);
254         s->contents = malloc(s->len);
255         if (read(fd, s->contents, s->len) != s->len)
256                 err(1, "Failed to save %zu bytes", (size_t)s->len);
257         lseek(fd, s->off, SEEK_SET);
258         return s;
259 }
260         
261 /* We have little choice but to save and restore open files: mmap means we
262  * can really intercept changes in the child.
263  *
264  * We could do non-mmap'ed files on demand, however. */
265 static struct saved_file *save_files(void)
266 {
267         struct saved_file *files = NULL;
268         int i;
269
270         /* Figure out the set of live fds. */
271         for (i = history_num - 2; i >= 0; i--) {
272                 if (history[i].type == FAILTEST_OPEN) {
273                         int fd = history[i].u.open.ret;
274                         /* Only do successful, writable fds. */
275                         if (fd < 0)
276                                 continue;
277
278                         /* If it was closed, cleanup == NULL. */
279                         if (!history[i].cleanup)
280                                 continue;
281
282                         if ((history[i].u.open.flags & O_RDWR) == O_RDWR) {
283                                 files = save_file(files, fd);
284                         } else if ((history[i].u.open.flags & O_WRONLY)
285                                    == O_WRONLY) {
286                                 /* FIXME: Handle O_WRONLY.  Open with O_RDWR? */
287                                 abort();
288                         }
289                 }
290         }
291
292         return files;
293 }
294
295 static void restore_files(struct saved_file *s)
296 {
297         while (s) {
298                 struct saved_file *next = s->next;
299
300                 lseek(s->fd, 0, SEEK_SET);
301                 if (write(s->fd, s->contents, s->len) != s->len)
302                         err(1, "Failed to restore %zu bytes", (size_t)s->len);
303                 if (ftruncate(s->fd, s->len) != 0)
304                         err(1, "Failed to trim file to length %zu",
305                             (size_t)s->len);
306                 free(s->contents);
307                 lseek(s->fd, s->off, SEEK_SET);
308                 free(s);
309                 s = next;
310         }
311 }
312
313 static void free_files(struct saved_file *s)
314 {
315         while (s) {
316                 struct saved_file *next = s->next;
317                 free(s->contents);
318                 free(s);
319                 s = next;
320         }
321 }
322
323 /* Free up memory, so valgrind doesn't report leaks. */
324 static void free_everything(void)
325 {
326         unsigned int i;
327
328         /* We don't do this in cleanup: needed even for failed opens. */
329         for (i = 0; i < history_num; i++) {
330                 if (history[i].type == FAILTEST_OPEN)
331                         free((char *)history[i].u.open.pathname);
332         }
333         free(history);
334 }
335
336 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
337 {
338         int i;
339
340         /* For children, we don't care if they "failed" the testing. */
341         if (control_fd != -1)
342                 status = 0;
343
344         if (forced_cleanup)
345                 history_num--;
346
347         /* Cleanup everything, in reverse order. */
348         for (i = history_num - 1; i >= 0; i--) {
349                 if (!history[i].cleanup)
350                         continue;
351                 if (!forced_cleanup) {
352                         printf("Leak at %s:%u: --failpath=%s\n",
353                                history[i].file, history[i].line,
354                                failpath_string());
355                         status = 1;
356                 }
357                 history[i].cleanup(&history[i].u);
358         }
359
360         free_everything();
361         tell_parent(SUCCESS);
362         exit(status);
363 }
364
365 static bool should_fail(struct failtest_call *call)
366 {
367         int status;
368         int control[2], output[2];
369         enum info_type type = UNEXPECTED;
370         char *out = NULL;
371         size_t outlen = 0;
372         struct saved_file *files;
373
374         /* Are we probing? */
375         if (probe_count && --probe_count == 0 && control_fd != -1)
376                 failtest_cleanup(true, 0);
377
378         if (call == &unrecorded_call)
379                 return false;
380
381         if (failpath) {
382                 /* + means continue after end, like normal. */
383                 if (*failpath == '+')
384                         failpath = NULL;
385                 else if (*failpath == '\0') {
386                         /* Continue, but don't inject errors. */
387                         return call->fail = false;
388                 } else {
389                         if (tolower((unsigned char)*failpath)
390                             != info_to_arg[call->type])
391                                 errx(1, "Failpath expected '%c' got '%c'\n",
392                                      info_to_arg[call->type], *failpath);
393                         call->fail = isupper((unsigned char)*(failpath++));
394                         return call->fail;
395                 }
396         }
397
398         /* Attach debugger if they asked for it. */
399         if (debugpath && history_num == strlen(debugpath)) {
400                 unsigned int i;
401
402                 for (i = 0; i < history_num; i++) {
403                         unsigned char c = info_to_arg[history[i].type];
404                         if (history[i].fail)
405                                 c = toupper(c);
406                         if (c != debugpath[i])
407                                 break;
408                 }
409                 if (i == history_num) {
410                         char str[80];
411
412                         /* Don't timeout. */
413                         signal(SIGUSR1, SIG_IGN);
414                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
415                                 getpid(), getpid());
416                         if (system(str) == 0)
417                                 sleep(5);
418                 }
419         }
420
421         if (failtest_hook) {
422                 switch (failtest_hook(history, history_num)) {
423                 case FAIL_OK:
424                         break;
425                 case FAIL_PROBE:
426                         /* Already down probe path?  Stop now. */
427                         if (!probe_count) {
428                                 /* FIXME: We should run *parent* and
429                                  * run probe until calls match up again. */
430                                 probe_count = 3;
431                                 break;
432                         } else {
433                                 /* Child should give up now. */
434                                 if (control_fd != -1)
435                                         failtest_cleanup(true, 0);
436                                 /* Parent, don't fail again. */
437                         }
438                 case FAIL_DONT_FAIL:
439                         call->fail = false;
440                         return false;
441                 default:
442                         abort();
443                 }
444         }
445
446         files = save_files();
447
448         /* We're going to fail in the child. */
449         call->fail = true;
450         if (pipe(control) != 0 || pipe(output) != 0)
451                 err(1, "opening pipe");
452
453         /* Prevent double-printing (in child and parent) */
454         fflush(stdout);
455         child = fork();
456         if (child == -1)
457                 err(1, "forking failed");
458
459         if (child == 0) {
460                 if (tracefd != -1) {
461                         struct timeval now;
462                         const char *p;
463                         gettimeofday(&now, NULL);
464                         if (now.tv_usec < start.tv_usec) {
465                                 now.tv_sec--;
466                                 now.tv_usec += 1000000;
467                         }
468                         now.tv_usec -= start.tv_usec;
469                         now.tv_sec -= start.tv_sec;
470                         p = failpath_string();
471                         trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
472                               (int)now.tv_sec, (int)now.tv_usec / 10000, p);
473                         free((char *)p);
474                         p = strrchr(history[history_num-1].file, '/');
475                         if (p)
476                                 trace("%s", p+1);
477                         else
478                                 trace("%s", history[history_num-1].file);
479                         trace(":%u)\n", history[history_num-1].line);
480                 }
481                 close(control[0]);
482                 close(output[0]);
483                 dup2(output[1], STDOUT_FILENO);
484                 dup2(output[1], STDERR_FILENO);
485                 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
486                         close(output[1]);
487                 control_fd = control[1];
488                 /* Valgrind spots the leak if we don't free these. */
489                 free_files(files);
490                 return true;
491         }
492
493         signal(SIGUSR1, hand_down);
494
495         close(control[1]);
496         close(output[1]);
497
498         /* We grab output so we can display it; we grab writes so we
499          * can compare. */
500         do {
501                 struct pollfd pfd[2];
502                 int ret;
503
504                 pfd[0].fd = output[0];
505                 pfd[0].events = POLLIN|POLLHUP;
506                 pfd[1].fd = control[0];
507                 pfd[1].events = POLLIN|POLLHUP;
508
509                 if (type == SUCCESS)
510                         ret = poll(pfd, 1, failtest_timeout_ms);
511                 else
512                         ret = poll(pfd, 2, failtest_timeout_ms);
513
514                 if (ret == 0)
515                         hand_down(SIGUSR1);
516                 if (ret < 0) {
517                         if (errno == EINTR)
518                                 continue;
519                         err(1, "Poll returned %i", ret);
520                 }
521
522                 if (pfd[0].revents & POLLIN) {
523                         ssize_t len;
524
525                         out = realloc(out, outlen + 8192);
526                         len = read(output[0], out + outlen, 8192);
527                         outlen += len;
528                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
529                         if (read_all(control[0], &type, sizeof(type))) {
530                                 if (type == WRITE) {
531                                         if (!read_write_info(control[0]))
532                                                 break;
533                                 } else if (type == RELEASE_LOCKS) {
534                                         release_locks();
535                                         /* FIXME: Tell them we're done... */
536                                 }
537                         }
538                 } else if (pfd[0].revents & POLLHUP) {
539                         break;
540                 }
541         } while (type != FAILURE);
542
543         close(output[0]);
544         close(control[0]);
545         waitpid(child, &status, 0);
546         if (!WIFEXITED(status)) {
547                 if (WTERMSIG(status) == SIGUSR1)
548                         child_fail(out, outlen, "Timed out");
549                 else
550                         child_fail(out, outlen, "Killed by signal %u: ",
551                                    WTERMSIG(status));
552         }
553         /* Child printed failure already, just pass up exit code. */
554         if (type == FAILURE) {
555                 fprintf(stderr, "%.*s", (int)outlen, out);
556                 tell_parent(type);
557                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
558         }
559         if (WEXITSTATUS(status) != 0)
560                 child_fail(out, outlen, "Exited with status %i: ",
561                            WEXITSTATUS(status));
562
563         free(out);
564         signal(SIGUSR1, SIG_DFL);
565
566         restore_files(files);
567
568         /* We continue onwards without failing. */
569         call->fail = false;
570         return false;
571 }
572
573 static void cleanup_calloc(struct calloc_call *call)
574 {
575         free(call->ret);
576 }
577
578 void *failtest_calloc(size_t nmemb, size_t size,
579                       const char *file, unsigned line)
580 {
581         struct failtest_call *p;
582         struct calloc_call call;
583         call.nmemb = nmemb;
584         call.size = size;
585         p = add_history(FAILTEST_CALLOC, file, line, &call);
586
587         if (should_fail(p)) {
588                 p->u.calloc.ret = NULL;
589                 p->error = ENOMEM;
590         } else {
591                 p->u.calloc.ret = calloc(nmemb, size);
592                 set_cleanup(p, cleanup_calloc, struct calloc_call);
593         }
594         errno = p->error;
595         return p->u.calloc.ret;
596 }
597
598 static void cleanup_malloc(struct malloc_call *call)
599 {
600         free(call->ret);
601 }
602
603 void *failtest_malloc(size_t size, const char *file, unsigned line)
604 {
605         struct failtest_call *p;
606         struct malloc_call call;
607         call.size = size;
608
609         p = add_history(FAILTEST_MALLOC, file, line, &call);
610         if (should_fail(p)) {
611                 p->u.calloc.ret = NULL;
612                 p->error = ENOMEM;
613         } else {
614                 p->u.calloc.ret = malloc(size);
615                 set_cleanup(p, cleanup_malloc, struct malloc_call);
616         }
617         errno = p->error;
618         return p->u.calloc.ret;
619 }
620
621 static void cleanup_realloc(struct realloc_call *call)
622 {
623         free(call->ret);
624 }
625
626 /* Walk back and find out if we got this ptr from a previous routine. */
627 static void fixup_ptr_history(void *ptr, unsigned int last)
628 {
629         int i;
630
631         /* Start at end of history, work back. */
632         for (i = last - 1; i >= 0; i--) {
633                 switch (history[i].type) {
634                 case FAILTEST_REALLOC:
635                         if (history[i].u.realloc.ret == ptr) {
636                                 history[i].cleanup = NULL;
637                                 return;
638                         }
639                         break;
640                 case FAILTEST_MALLOC:
641                         if (history[i].u.malloc.ret == ptr) {
642                                 history[i].cleanup = NULL;
643                                 return;
644                         }
645                         break;
646                 case FAILTEST_CALLOC:
647                         if (history[i].u.calloc.ret == ptr) {
648                                 history[i].cleanup = NULL;
649                                 return;
650                         }
651                         break;
652                 default:
653                         break;
654                 }
655         }
656 }
657
658 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
659 {
660         struct failtest_call *p;
661         struct realloc_call call;
662         call.size = size;
663         p = add_history(FAILTEST_REALLOC, file, line, &call);
664
665         /* FIXME: Try one child moving allocation, one not. */
666         if (should_fail(p)) {
667                 p->u.realloc.ret = NULL;
668                 p->error = ENOMEM;
669         } else {
670                 fixup_ptr_history(ptr, history_num-1);
671                 p->u.realloc.ret = realloc(ptr, size);
672                 set_cleanup(p, cleanup_realloc, struct realloc_call);
673         }
674         errno = p->error;
675         return p->u.realloc.ret;
676 }
677
678 void failtest_free(void *ptr)
679 {
680         fixup_ptr_history(ptr, history_num);
681         free(ptr);
682 }
683
684 static void cleanup_open(struct open_call *call)
685 {
686         close(call->ret);
687 }
688
689 int failtest_open(const char *pathname,
690                   const char *file, unsigned line, ...)
691 {
692         struct failtest_call *p;
693         struct open_call call;
694         va_list ap;
695
696         call.pathname = strdup(pathname);
697         va_start(ap, line);
698         call.flags = va_arg(ap, int);
699         if (call.flags & O_CREAT) {
700                 call.mode = va_arg(ap, int);
701                 va_end(ap);
702         }
703         p = add_history(FAILTEST_OPEN, file, line, &call);
704         /* Avoid memory leak! */
705         if (p == &unrecorded_call)
706                 free((char *)call.pathname);
707         p->u.open.ret = open(pathname, call.flags, call.mode);
708
709         if (!failpath && p->u.open.ret == -1) {
710                 p->fail = false;
711                 p->error = errno;
712         } else if (should_fail(p)) {
713                 close(p->u.open.ret);
714                 p->u.open.ret = -1;
715                 /* FIXME: Play with error codes? */
716                 p->error = EACCES;
717         } else {
718                 set_cleanup(p, cleanup_open, struct open_call);
719         }
720         errno = p->error;
721         return p->u.open.ret;
722 }
723
724 static void cleanup_pipe(struct pipe_call *call)
725 {
726         if (!call->closed[0])
727                 close(call->fds[0]);
728         if (!call->closed[1])
729                 close(call->fds[1]);
730 }
731
732 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
733 {
734         struct failtest_call *p;
735         struct pipe_call call;
736
737         p = add_history(FAILTEST_PIPE, file, line, &call);
738         if (should_fail(p)) {
739                 p->u.open.ret = -1;
740                 /* FIXME: Play with error codes? */
741                 p->error = EMFILE;
742         } else {
743                 p->u.pipe.ret = pipe(p->u.pipe.fds);
744                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
745                 set_cleanup(p, cleanup_pipe, struct pipe_call);
746         }
747         /* This causes valgrind to notice if they use pipefd[] after failure */
748         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
749         errno = p->error;
750         return p->u.pipe.ret;
751 }
752
753 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
754                        const char *file, unsigned line)
755 {
756         struct failtest_call *p;
757         struct read_call call;
758         call.fd = fd;
759         call.buf = buf;
760         call.count = count;
761         call.off = off;
762         p = add_history(FAILTEST_READ, file, line, &call);
763
764         /* FIXME: Try partial read returns. */
765         if (should_fail(p)) {
766                 p->u.read.ret = -1;
767                 p->error = EIO;
768         } else {
769                 p->u.read.ret = pread(fd, buf, count, off);
770         }
771         errno = p->error;
772         return p->u.read.ret;
773 }
774
775 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
776                         const char *file, unsigned line)
777 {
778         struct failtest_call *p;
779         struct write_call call;
780
781         call.fd = fd;
782         call.buf = buf;
783         call.count = count;
784         call.off = off;
785         p = add_history(FAILTEST_WRITE, file, line, &call);
786
787         /* If we're a child, we need to make sure we write the same thing
788          * to non-files as the parent does, so tell it. */
789         if (control_fd != -1 && off == (off_t)-1) {
790                 enum info_type type = WRITE;
791
792                 write_all(control_fd, &type, sizeof(type));
793                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
794                 write_all(control_fd, buf, count);
795         }
796
797         /* FIXME: Try partial write returns. */
798         if (should_fail(p)) {
799                 p->u.write.ret = -1;
800                 p->error = EIO;
801         } else {
802                 /* FIXME: We assume same write order in parent and child */
803                 if (off == (off_t)-1 && child_writes_num != 0) {
804                         if (child_writes[0].fd != fd)
805                                 errx(1, "Child wrote to fd %u, not %u?",
806                                      child_writes[0].fd, fd);
807                         if (child_writes[0].off != p->u.write.off)
808                                 errx(1, "Child wrote to offset %zu, not %zu?",
809                                      (size_t)child_writes[0].off,
810                                      (size_t)p->u.write.off);
811                         if (child_writes[0].count != count)
812                                 errx(1, "Child wrote length %zu, not %zu?",
813                                      child_writes[0].count, count);
814                         if (memcmp(child_writes[0].buf, buf, count)) {
815                                 child_fail(NULL, 0,
816                                            "Child wrote differently to"
817                                            " fd %u than we did!\n", fd);
818                         }
819                         free((char *)child_writes[0].buf);
820                         child_writes_num--;
821                         memmove(&child_writes[0], &child_writes[1],
822                                 sizeof(child_writes[0]) * child_writes_num);
823
824                         /* Is this is a socket or pipe, child wrote it
825                            already. */
826                         if (p->u.write.off == (off_t)-1) {
827                                 p->u.write.ret = count;
828                                 errno = p->error;
829                                 return p->u.write.ret;
830                         }
831                 }
832                 p->u.write.ret = pwrite(fd, buf, count, off);
833         }
834         errno = p->error;
835         return p->u.write.ret;
836 }
837
838 ssize_t failtest_read(int fd, void *buf, size_t count,
839                       const char *file, unsigned line)
840 {
841         return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
842                               file, line);
843 }
844
845 ssize_t failtest_write(int fd, const void *buf, size_t count,
846                        const char *file, unsigned line)
847 {
848         return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
849                                file, line);
850 }
851
852 static struct lock_info *WARN_UNUSED_RESULT
853 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
854 {
855         unsigned int i;
856         struct lock_info *l;
857
858         for (i = 0; i < lock_num; i++) {
859                 l = &locks[i];
860
861                 if (l->fd != fd)
862                         continue;
863                 /* Four cases we care about:
864                  * Start overlap:
865                  *      l =    |      |
866                  *      new = |   |
867                  * Mid overlap:
868                  *      l =    |      |
869                  *      new =    |  |
870                  * End overlap:
871                  *      l =    |      |
872                  *      new =      |    |
873                  * Total overlap:
874                  *      l =    |      |
875                  *      new = |         |
876                  */
877                 if (start > l->start && end < l->end) {
878                         /* Mid overlap: trim entry, add new one. */
879                         off_t new_start, new_end;
880                         new_start = end + 1;
881                         new_end = l->end;
882                         l->end = start - 1;
883                         locks = add_lock(locks,
884                                          fd, new_start, new_end, l->type);
885                         l = &locks[i];
886                 } else if (start <= l->start && end >= l->end) {
887                         /* Total overlap: eliminate entry. */
888                         l->end = 0;
889                         l->start = 1;
890                 } else if (end >= l->start && end < l->end) {
891                         /* Start overlap: trim entry. */
892                         l->start = end + 1;
893                 } else if (start > l->start && start <= l->end) {
894                         /* End overlap: trim entry. */
895                         l->end = start-1;
896                 }
897                 /* Nothing left?  Remove it. */
898                 if (l->end < l->start) {
899                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
900                         i--;
901                 }
902         }
903
904         if (type != F_UNLCK) {
905                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
906                 l = &locks[lock_num++];
907                 l->fd = fd;
908                 l->start = start;
909                 l->end = end;
910                 l->type = type;
911         }
912         return locks;
913 }
914
915 /* We trap this so we can record it: we don't fail it. */
916 int failtest_close(int fd, const char *file, unsigned line)
917 {
918         int i;
919         struct close_call call;
920         struct failtest_call *p;
921
922         call.fd = fd;
923         p = add_history(FAILTEST_CLOSE, file, line, &call);
924         p->fail = false;
925
926         /* Consume close from failpath. */
927         if (failpath)
928                 if (should_fail(p))
929                         abort();
930
931         if (fd < 0)
932                 return close(fd);
933
934         /* Trace history to find source of fd. */
935         for (i = history_num-1; i >= 0; i--) {
936                 switch (history[i].type) {
937                 case FAILTEST_PIPE:
938                         /* From a pipe? */
939                         if (history[i].u.pipe.fds[0] == fd) {
940                                 assert(!history[i].u.pipe.closed[0]);
941                                 history[i].u.pipe.closed[0] = true;
942                                 if (history[i].u.pipe.closed[1])
943                                         history[i].cleanup = NULL;
944                                 goto out;
945                         }
946                         if (history[i].u.pipe.fds[1] == fd) {
947                                 assert(!history[i].u.pipe.closed[1]);
948                                 history[i].u.pipe.closed[1] = true;
949                                 if (history[i].u.pipe.closed[0])
950                                         history[i].cleanup = NULL;
951                                 goto out;
952                         }
953                         break;
954                 case FAILTEST_OPEN:
955                         if (history[i].u.open.ret == fd) {
956                                 assert((void *)history[i].cleanup
957                                        == (void *)cleanup_open);
958                                 history[i].cleanup = NULL;
959                                 goto out;
960                         }
961                         break;
962                 default:
963                         break;
964                 }
965         }
966
967 out:
968         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
969         return close(fd);
970 }
971
972 /* Zero length means "to end of file" */
973 static off_t end_of(off_t start, off_t len)
974 {
975         if (len == 0)
976                 return off_max();
977         return start + len - 1;
978 }
979
980 /* FIXME: This only handles locks, really. */
981 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
982 {
983         struct failtest_call *p;
984         struct fcntl_call call;
985         va_list ap;
986
987         call.fd = fd;
988         call.cmd = cmd;
989
990         /* Argument extraction. */
991         switch (cmd) {
992         case F_SETFL:
993         case F_SETFD:
994                 va_start(ap, cmd);
995                 call.arg.l = va_arg(ap, long);
996                 va_end(ap);
997                 return fcntl(fd, cmd, call.arg.l);
998         case F_GETFD:
999         case F_GETFL:
1000                 return fcntl(fd, cmd);
1001         case F_GETLK:
1002                 get_locks();
1003                 va_start(ap, cmd);
1004                 call.arg.fl = *va_arg(ap, struct flock *);
1005                 va_end(ap);
1006                 return fcntl(fd, cmd, &call.arg.fl);
1007         case F_SETLK:
1008         case F_SETLKW:
1009                 va_start(ap, cmd);
1010                 call.arg.fl = *va_arg(ap, struct flock *);
1011                 va_end(ap);
1012                 break;
1013         default:
1014                 /* This means you need to implement it here. */
1015                 err(1, "failtest: unknown fcntl %u", cmd);
1016         }
1017
1018         p = add_history(FAILTEST_FCNTL, file, line, &call);
1019
1020         if (should_fail(p)) {
1021                 p->u.fcntl.ret = -1;
1022                 if (p->u.fcntl.cmd == F_SETLK)
1023                         p->error = EAGAIN;
1024                 else
1025                         p->error = EDEADLK;
1026         } else {
1027                 get_locks();
1028                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1029                                        &p->u.fcntl.arg.fl);
1030                 if (p->u.fcntl.ret == -1)
1031                         p->error = errno;
1032                 else {
1033                         /* We don't handle anything else yet. */
1034                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1035                         locks = add_lock(locks,
1036                                          p->u.fcntl.fd,
1037                                          p->u.fcntl.arg.fl.l_start,
1038                                          end_of(p->u.fcntl.arg.fl.l_start,
1039                                                 p->u.fcntl.arg.fl.l_len),
1040                                          p->u.fcntl.arg.fl.l_type);
1041                 }
1042         }
1043         errno = p->error;
1044         return p->u.fcntl.ret;
1045 }
1046
1047 pid_t failtest_getpid(const char *file, unsigned line)
1048 {
1049         /* You must call failtest_init first! */
1050         assert(orig_pid);
1051         return orig_pid;
1052 }
1053         
1054 void failtest_init(int argc, char *argv[])
1055 {
1056         unsigned int i;
1057
1058         orig_pid = getpid();
1059                 
1060         for (i = 1; i < argc; i++) {
1061                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1062                         failpath = argv[i] + strlen("--failpath=");
1063                 } else if (strcmp(argv[i], "--tracepath") == 0) {
1064                         tracefd = dup(STDERR_FILENO);
1065                         failtest_timeout_ms = -1;
1066                 } else if (!strncmp(argv[i], "--debugpath=",
1067                                     strlen("--debugpath="))) {
1068                         debugpath = argv[i] + strlen("--debugpath=");
1069                 }
1070         }
1071         gettimeofday(&start, NULL);
1072 }
1073
1074 bool failtest_has_failed(void)
1075 {
1076         return control_fd != -1;
1077 }
1078
1079 void failtest_exit(int status)
1080 {
1081         if (failtest_exit_check) {
1082                 if (!failtest_exit_check(history, history_num))
1083                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1084         }
1085
1086         failtest_cleanup(false, status);
1087 }