]> git.ozlabs.org Git - ccan/blob - ccan/failtest/failtest.c
failtest: generic cleanup hooks
[ccan] / ccan / failtest / failtest.c
1 #include <stdarg.h>
2 #include <string.h>
3 #include <stdio.h>
4 #include <stdarg.h>
5 #include <ctype.h>
6 #include <err.h>
7 #include <unistd.h>
8 #include <poll.h>
9 #include <errno.h>
10 #include <sys/types.h>
11 #include <sys/wait.h>
12 #include <sys/stat.h>
13 #include <sys/time.h>
14 #include <assert.h>
15 #include <ccan/read_write_all/read_write_all.h>
16 #include <ccan/failtest/failtest_proto.h>
17 #include <ccan/failtest/failtest.h>
18 #include <ccan/build_assert/build_assert.h>
19
20 bool (*failtest_hook)(struct failtest_call *history, unsigned num)
21 = failtest_default_hook;
22
23 static int tracefd = -1;
24
25 unsigned int failtest_timeout_ms = 20000;
26
27 const char *failpath;
28
29 enum info_type {
30         WRITE,
31         RELEASE_LOCKS,
32         FAILURE,
33         SUCCESS,
34         UNEXPECTED
35 };
36
37 struct write_info_hdr {
38         size_t len;
39         off_t offset;
40         int fd;
41 };
42
43 struct write_info {
44         struct write_info_hdr hdr;
45         char *data;
46 };
47
48 struct lock_info {
49         int fd;
50         /* end is inclusive: you can't have a 0-byte lock. */
51         off_t start, end;
52         int type;
53 };
54
55 bool (*failtest_exit_check)(struct failtest_call *history, unsigned num);
56
57 static struct failtest_call *history = NULL;
58 static unsigned int history_num = 0;
59 static int control_fd = -1;
60 static struct timeval start;
61
62 static struct write_info *writes = NULL;
63 static unsigned int writes_num = 0;
64
65 static struct write_info *child_writes = NULL;
66 static unsigned int child_writes_num = 0;
67
68 static pid_t lock_owner;
69 static struct lock_info *locks = NULL;
70 static unsigned int lock_num = 0;
71
72 static const char info_to_arg[] = "mceoprwf";
73
74 /* Dummy call used for failtest_undo wrappers. */
75 static struct failtest_call unrecorded_call;
76
77 static struct failtest_call *add_history_(enum failtest_call_type type,
78                                           const char *file,
79                                           unsigned int line,
80                                           const void *elem,
81                                           size_t elem_size)
82 {
83         /* NULL file is how we suppress failure. */
84         if (!file)
85                 return &unrecorded_call;
86
87         history = realloc(history, (history_num + 1) * sizeof(*history));
88         history[history_num].type = type;
89         history[history_num].file = file;
90         history[history_num].line = line;
91         history[history_num].cleanup = NULL;
92         memcpy(&history[history_num].u, elem, elem_size);
93         return &history[history_num++];
94 }
95
96 #define add_history(type, file, line, elem) \
97         add_history_((type), (file), (line), (elem), sizeof(*(elem)))
98
99 #define set_cleanup(call, clean, type)                  \
100         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL)), (clean))
101
102 bool failtest_default_hook(struct failtest_call *history, unsigned num)
103 {
104         return true;
105 }
106
107 static bool read_write_info(int fd)
108 {
109         struct write_info_hdr hdr;
110
111         if (!read_all(fd, &hdr, sizeof(hdr)))
112                 return false;
113
114         child_writes = realloc(child_writes,
115                                (child_writes_num+1) * sizeof(child_writes[0]));
116         child_writes[child_writes_num].hdr = hdr;
117         child_writes[child_writes_num].data = malloc(hdr.len);
118         if (!read_all(fd, child_writes[child_writes_num].data, hdr.len))
119                 return false;
120
121         child_writes_num++;
122         return true;
123 }
124
125 static char *failpath_string(void)
126 {
127         unsigned int i;
128         char *ret = malloc(history_num + 1);
129
130         for (i = 0; i < history_num; i++) {
131                 ret[i] = info_to_arg[history[i].type];
132                 if (history[i].fail)
133                         ret[i] = toupper(ret[i]);
134         }
135         ret[i] = '\0';
136         return ret;
137 }
138
139 static void tell_parent(enum info_type type)
140 {
141         if (control_fd != -1)
142                 write_all(control_fd, &type, sizeof(type));
143 }
144
145 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
146 {
147         va_list ap;
148         char *path = failpath_string();
149
150         va_start(ap, fmt);
151         vfprintf(stderr, fmt, ap);
152         va_end(ap);
153
154         fprintf(stderr, "%.*s", (int)outlen, out);
155         printf("To reproduce: --failpath=%s\n", path);
156         free(path);
157         tell_parent(FAILURE);
158         exit(1);
159 }
160
161 static pid_t child;
162
163 static void hand_down(int signal)
164 {
165         kill(child, signal);
166 }
167
168 static void release_locks(void)
169 {
170         /* Locks were never acquired/reacquired? */
171         if (lock_owner == 0)
172                 return;
173
174         /* We own them?  Release them all. */
175         if (lock_owner == getpid()) {
176                 unsigned int i;
177                 struct flock fl;
178                 fl.l_type = F_UNLCK;
179                 fl.l_whence = SEEK_SET;
180                 fl.l_start = 0;
181                 fl.l_len = 0;
182
183                 for (i = 0; i < lock_num; i++)
184                         fcntl(locks[i].fd, F_SETLK, &fl);
185         } else {
186                 /* Our parent must have them; pass request up. */
187                 enum info_type type = RELEASE_LOCKS;
188                 assert(control_fd != -1);
189                 write_all(control_fd, &type, sizeof(type));
190         }
191         lock_owner = 0;
192 }
193
194 /* off_t is a signed type.  Getting its max is non-trivial. */
195 static off_t off_max(void)
196 {
197         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
198         if (sizeof(off_t) == 4)
199                 return (off_t)0x7FFFFFF;
200         else
201                 return (off_t)0x7FFFFFFFFFFFFFFULL;
202 }
203
204 static void get_locks(void)
205 {
206         unsigned int i;
207         struct flock fl;
208
209         if (lock_owner == getpid())
210                 return;
211
212         if (lock_owner != 0) {
213                 enum info_type type = RELEASE_LOCKS;
214                 assert(control_fd != -1);
215                 write_all(control_fd, &type, sizeof(type));
216         }
217
218         fl.l_whence = SEEK_SET;
219
220         for (i = 0; i < lock_num; i++) {
221                 fl.l_type = locks[i].type;
222                 fl.l_start = locks[i].start;
223                 if (locks[i].end == off_max())
224                         fl.l_len = 0;
225                 else
226                         fl.l_len = locks[i].end - locks[i].start + 1;
227
228                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
229                         abort();
230         }
231         lock_owner = getpid();
232 }
233
234 static void trace_str(const char *str)
235 {
236         ssize_t ret;
237
238         while ((ret = write(tracefd, str, strlen(str))) <= 0) {
239                 str += ret;
240                 if (!*str)
241                         return;
242         }
243         err(1, "Writing trace.");
244 }
245
246 static bool should_fail(struct failtest_call *call)
247 {
248         int status;
249         int control[2], output[2];
250         enum info_type type = UNEXPECTED;
251         char *out = NULL;
252         size_t outlen = 0;
253
254         if (call == &unrecorded_call)
255                 return false;
256
257         if (failpath) {
258                 /* + means continue after end, like normal. */
259                 if (*failpath == '+')
260                         failpath = NULL;
261                 else {
262                         if (tolower(*failpath) != info_to_arg[call->type])
263                                 errx(1, "Failpath expected '%c' got '%c'\n",
264                                      info_to_arg[call->type], *failpath);
265                         call->fail = isupper(*(failpath++));
266                         return call->fail;
267                 }
268         }
269
270         if (!failtest_hook(history, history_num)) {
271                 call->fail = false;
272                 return false;
273         }
274
275         /* We're going to fail in the child. */
276         call->fail = true;
277         if (pipe(control) != 0 || pipe(output) != 0)
278                 err(1, "opening pipe");
279
280         /* Prevent double-printing (in child and parent) */
281         fflush(stdout);
282         child = fork();
283         if (child == -1)
284                 err(1, "forking failed");
285
286         if (child == 0) {
287                 if (tracefd != -1) {
288                         struct timeval now;
289                         char str[50], *p;
290                         gettimeofday(&now, NULL);
291                         if (now.tv_usec < start.tv_usec) {
292                                 now.tv_sec--;
293                                 now.tv_usec += 1000000;
294                         }
295                         now.tv_usec -= start.tv_usec;
296                         now.tv_sec -= start.tv_sec;
297                         sprintf(str, "%u (%u.%02u): ", getpid(),
298                                 (int)now.tv_sec, (int)now.tv_usec / 10000);
299                         trace_str(str);
300                         p = failpath_string();
301                         trace_str(p);
302                         free(p);
303                         trace_str("(");
304                         p = strchr(history[history_num-1].file, '/');
305                         if (p)
306                                 trace_str(p+1);
307                         else
308                                 trace_str(history[history_num-1].file);
309                         sprintf(str, ":%u)\n", history[history_num-1].line);
310                         trace_str(str);
311                 }
312                 close(control[0]);
313                 close(output[0]);
314                 dup2(output[1], STDOUT_FILENO);
315                 dup2(output[1], STDERR_FILENO);
316                 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
317                         close(output[1]);
318                 control_fd = control[1];
319                 return true;
320         }
321
322         signal(SIGUSR1, hand_down);
323
324         close(control[1]);
325         close(output[1]);
326
327         /* We grab output so we can display it; we grab writes so we
328          * can compare. */
329         do {
330                 struct pollfd pfd[2];
331                 int ret;
332
333                 pfd[0].fd = output[0];
334                 pfd[0].events = POLLIN|POLLHUP;
335                 pfd[1].fd = control[0];
336                 pfd[1].events = POLLIN|POLLHUP;
337
338                 if (type == SUCCESS)
339                         ret = poll(pfd, 1, failtest_timeout_ms);
340                 else
341                         ret = poll(pfd, 2, failtest_timeout_ms);
342
343                 if (ret <= 0)
344                         hand_down(SIGUSR1);
345
346                 if (pfd[0].revents & POLLIN) {
347                         ssize_t len;
348
349                         out = realloc(out, outlen + 8192);
350                         len = read(output[0], out + outlen, 8192);
351                         outlen += len;
352                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
353                         if (read_all(control[0], &type, sizeof(type))) {
354                                 if (type == WRITE) {
355                                         if (!read_write_info(control[0]))
356                                                 break;
357                                 } else if (type == RELEASE_LOCKS) {
358                                         release_locks();
359                                         /* FIXME: Tell them we're done... */
360                                 }
361                         }
362                 } else if (pfd[0].revents & POLLHUP) {
363                         break;
364                 }
365         } while (type != FAILURE);
366
367         close(output[0]);
368         close(control[0]);
369         waitpid(child, &status, 0);
370         if (!WIFEXITED(status))
371                 child_fail(out, outlen, "Killed by signal %u: ",
372                            WTERMSIG(status));
373         /* Child printed failure already, just pass up exit code. */
374         if (type == FAILURE) {
375                 fprintf(stderr, "%.*s", (int)outlen, out);
376                 tell_parent(type);
377                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
378         }
379         if (WEXITSTATUS(status) != 0)
380                 child_fail(out, outlen, "Exited with status %i: ",
381                            WEXITSTATUS(status));
382
383         free(out);
384         signal(SIGUSR1, SIG_DFL);
385
386         /* We continue onwards without failing. */
387         call->fail = false;
388         return false;
389 }
390
391 static void cleanup_calloc(struct calloc_call *call)
392 {
393         free(call->ret);
394 }
395
396 void *failtest_calloc(size_t nmemb, size_t size,
397                       const char *file, unsigned line)
398 {
399         struct failtest_call *p;
400         struct calloc_call call;
401         call.nmemb = nmemb;
402         call.size = size;
403         p = add_history(FAILTEST_CALLOC, file, line, &call);
404
405         if (should_fail(p)) {
406                 p->u.calloc.ret = NULL;
407                 p->error = ENOMEM;
408         } else {
409                 p->u.calloc.ret = calloc(nmemb, size);
410                 set_cleanup(p, cleanup_calloc, struct calloc_call);
411         }
412         errno = p->error;
413         return p->u.calloc.ret;
414 }
415
416 static void cleanup_malloc(struct malloc_call *call)
417 {
418         free(call->ret);
419 }
420
421 void *failtest_malloc(size_t size, const char *file, unsigned line)
422 {
423         struct failtest_call *p;
424         struct malloc_call call;
425         call.size = size;
426
427         p = add_history(FAILTEST_MALLOC, file, line, &call);
428         if (should_fail(p)) {
429                 p->u.calloc.ret = NULL;
430                 p->error = ENOMEM;
431         } else {
432                 p->u.calloc.ret = malloc(size);
433                 set_cleanup(p, cleanup_malloc, struct malloc_call);
434         }
435         errno = p->error;
436         return p->u.calloc.ret;
437 }
438
439 static void cleanup_realloc(struct realloc_call *call)
440 {
441         free(call->ret);
442 }
443
444 /* Walk back and find out if we got this ptr from a previous routine. */
445 static void fixup_ptr_history(void *ptr, unsigned int last)
446 {
447         int i;
448
449         /* Start at end of history, work back. */
450         for (i = last - 1; i >= 0; i--) {
451                 switch (history[i].type) {
452                 case FAILTEST_REALLOC:
453                         if (history[i].u.realloc.ret == ptr) {
454                                 history[i].cleanup = NULL;
455                                 return;
456                         }
457                         break;
458                 case FAILTEST_MALLOC:
459                         if (history[i].u.malloc.ret == ptr) {
460                                 history[i].cleanup = NULL;
461                                 return;
462                         }
463                         break;
464                 case FAILTEST_CALLOC:
465                         if (history[i].u.calloc.ret == ptr) {
466                                 history[i].cleanup = NULL;
467                                 return;
468                         }
469                         break;
470                 default:
471                         break;
472                 }
473         }
474 }
475
476 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
477 {
478         struct failtest_call *p;
479         struct realloc_call call;
480         call.size = size;
481         p = add_history(FAILTEST_REALLOC, file, line, &call);
482
483         /* FIXME: Try one child moving allocation, one not. */
484         if (should_fail(p)) {
485                 p->u.realloc.ret = NULL;
486                 p->error = ENOMEM;
487         } else {
488                 fixup_ptr_history(ptr, history_num-1);
489                 p->u.realloc.ret = realloc(ptr, size);
490                 set_cleanup(p, cleanup_realloc, struct realloc_call);
491         }
492         errno = p->error;
493         return p->u.realloc.ret;
494 }
495
496 void failtest_free(void *ptr)
497 {
498         fixup_ptr_history(ptr, history_num);
499         free(ptr);
500 }
501
502 static void cleanup_open(struct open_call *call)
503 {
504         close(call->ret);
505 }
506
507 int failtest_open(const char *pathname,
508                   const char *file, unsigned line, ...)
509 {
510         struct failtest_call *p;
511         struct open_call call;
512         va_list ap;
513
514         call.pathname = strdup(pathname);
515         va_start(ap, line);
516         call.flags = va_arg(ap, int);
517         if (call.flags & O_CREAT) {
518                 call.mode = va_arg(ap, mode_t);
519                 va_end(ap);
520         }
521         p = add_history(FAILTEST_OPEN, file, line, &call);
522         /* Avoid memory leak! */
523         if (p == &unrecorded_call)
524                 free((char *)call.pathname);
525         if (should_fail(p)) {
526                 p->u.open.ret = -1;
527                 /* FIXME: Play with error codes? */
528                 p->error = EACCES;
529         } else {
530                 p->u.open.ret = open(pathname, call.flags, call.mode);
531                 set_cleanup(p, cleanup_open, struct open_call);
532                 p->u.open.dup_fd = p->u.open.ret;
533         }
534         errno = p->error;
535         return p->u.open.ret;
536 }
537
538 static void cleanup_pipe(struct pipe_call *call)
539 {
540         if (!call->closed[0])
541                 close(call->fds[0]);
542         if (!call->closed[1])
543                 close(call->fds[1]);
544 }
545
546 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
547 {
548         struct failtest_call *p;
549         struct pipe_call call;
550
551         p = add_history(FAILTEST_PIPE, file, line, &call);
552         if (should_fail(p)) {
553                 p->u.open.ret = -1;
554                 /* FIXME: Play with error codes? */
555                 p->error = EMFILE;
556         } else {
557                 p->u.pipe.ret = pipe(p->u.pipe.fds);
558                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
559                 set_cleanup(p, cleanup_pipe, struct pipe_call);
560         }
561         /* This causes valgrind to notice if they use pipefd[] after failure */
562         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
563         errno = p->error;
564         return p->u.pipe.ret;
565 }
566
567 static void cleanup_read(struct read_call *call)
568 {
569         lseek(call->fd, call->off, SEEK_SET);
570 }
571
572 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
573                        const char *file, unsigned line)
574 {
575         struct failtest_call *p;
576         struct read_call call;
577         call.fd = fd;
578         call.buf = buf;
579         call.count = count;
580         call.off = off;
581         p = add_history(FAILTEST_READ, file, line, &call);
582
583         /* FIXME: Try partial read returns. */
584         if (should_fail(p)) {
585                 p->u.read.ret = -1;
586                 p->error = EIO;
587         } else {
588                 p->u.read.ret = pread(fd, buf, count, off);
589                 set_cleanup(p, cleanup_read, struct read_call);
590         }
591         errno = p->error;
592         return p->u.read.ret;
593 }
594
595 static struct write_info *new_write(void)
596 {
597         writes = realloc(writes, (writes_num + 1) * sizeof(*writes));
598         return &writes[writes_num++];
599 }
600
601 static void cleanup_write(struct write_call *call)
602 {
603         lseek(call->dup_fd, call->off, SEEK_SET);
604         write(call->dup_fd, call->saved_contents, call->saved_len);
605         lseek(call->dup_fd, call->off, SEEK_SET);
606         ftruncate(call->dup_fd, call->old_filelen);
607         free(call->saved_contents);
608 }
609
610 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
611                         const char *file, unsigned line)
612 {
613         struct failtest_call *p;
614         struct write_call call;
615
616         call.fd = call.dup_fd = fd;
617         call.buf = buf;
618         call.count = count;
619         call.off = off;
620         p = add_history(FAILTEST_WRITE, file, line, &call);
621
622         /* Save old contents if we can */
623         if (p->u.write.off != -1) {
624                 ssize_t ret;
625                 p->u.write.old_filelen = lseek(fd, 0, SEEK_END);
626
627                 /* Write past end of file?  Nothing to save.*/
628                 if (p->u.write.old_filelen <= p->u.write.off)
629                         p->u.write.saved_len = 0;
630                 /* Write which goes over end of file?  Partial save. */
631                 else if (p->u.write.off + count > p->u.write.old_filelen)
632                         p->u.write.saved_len = p->u.write.old_filelen
633                                 - p->u.write.off;
634                 /* Full save. */
635                 else
636                         p->u.write.saved_len = count;
637
638                 p->u.write.saved_contents = malloc(p->u.write.saved_len);
639                 lseek(fd, p->u.write.off, SEEK_SET);
640                 ret = read(fd, p->u.write.saved_contents, p->u.write.saved_len);
641                 if (ret != p->u.write.saved_len)
642                         err(1, "Expected %i bytes, got %i",
643                             (int)p->u.write.saved_len, (int)ret);
644                 lseek(fd, p->u.write.off, SEEK_SET);
645                 set_cleanup(p, cleanup_write, struct write_call);
646         }
647
648         /* If we're a child, tell parent about write. */
649         if (control_fd != -1) {
650                 struct write_info *winfo = new_write();
651                 enum info_type type = WRITE;
652
653                 winfo->hdr.len = count;
654                 winfo->hdr.fd = fd;
655                 winfo->data = malloc(count);
656                 memcpy(winfo->data, buf, count);
657                 winfo->hdr.offset = off;
658                 write_all(control_fd, &type, sizeof(type));
659                 write_all(control_fd, &winfo->hdr, sizeof(winfo->hdr));
660                 write_all(control_fd, winfo->data, count);
661         }
662
663         /* FIXME: Try partial write returns. */
664         if (should_fail(p)) {
665                 p->u.write.ret = -1;
666                 p->error = EIO;
667         } else {
668                 /* FIXME: We assume same write order in parent and child */
669                 if (child_writes_num != 0) {
670                         if (child_writes[0].hdr.fd != fd)
671                                 errx(1, "Child wrote to fd %u, not %u?",
672                                      child_writes[0].hdr.fd, fd);
673                         if (child_writes[0].hdr.offset != p->u.write.off)
674                                 errx(1, "Child wrote to offset %zu, not %zu?",
675                                      (size_t)child_writes[0].hdr.offset,
676                                      (size_t)p->u.write.off);
677                         if (child_writes[0].hdr.len != count)
678                                 errx(1, "Child wrote length %zu, not %zu?",
679                                      child_writes[0].hdr.len, count);
680                         if (memcmp(child_writes[0].data, buf, count)) {
681                                 child_fail(NULL, 0,
682                                            "Child wrote differently to"
683                                            " fd %u than we did!\n", fd);
684                         }
685                         free(child_writes[0].data);
686                         child_writes_num--;
687                         memmove(&child_writes[0], &child_writes[1],
688                                 sizeof(child_writes[0]) * child_writes_num);
689
690                         /* Is this is a socket or pipe, child wrote it
691                            already. */
692                         if (p->u.write.off == (off_t)-1) {
693                                 p->u.write.ret = count;
694                                 errno = p->error;
695                                 return p->u.write.ret;
696                         }
697                 }
698                 p->u.write.ret = pwrite(fd, buf, count, off);
699         }
700         errno = p->error;
701         return p->u.write.ret;
702 }
703
704 ssize_t failtest_read(int fd, void *buf, size_t count,
705                       const char *file, unsigned line)
706 {
707         return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
708                               file, line);
709 }
710
711 ssize_t failtest_write(int fd, const void *buf, size_t count,
712                        const char *file, unsigned line)
713 {
714         return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
715                                file, line);
716 }
717
718 static struct lock_info *WARN_UNUSED_RESULT
719 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
720 {
721         unsigned int i;
722         struct lock_info *l;
723
724         for (i = 0; i < lock_num; i++) {
725                 l = &locks[i];
726
727                 if (l->fd != fd)
728                         continue;
729                 /* Four cases we care about:
730                  * Start overlap:
731                  *      l =    |      |
732                  *      new = |   |
733                  * Mid overlap:
734                  *      l =    |      |
735                  *      new =    |  |
736                  * End overlap:
737                  *      l =    |      |
738                  *      new =      |    |
739                  * Total overlap:
740                  *      l =    |      |
741                  *      new = |         |
742                  */
743                 if (start > l->start && end < l->end) {
744                         /* Mid overlap: trim entry, add new one. */
745                         off_t new_start, new_end;
746                         new_start = end + 1;
747                         new_end = l->end;
748                         l->end = start - 1;
749                         locks = add_lock(locks,
750                                          fd, new_start, new_end, l->type);
751                         l = &locks[i];
752                 } else if (start <= l->start && end >= l->end) {
753                         /* Total overlap: eliminate entry. */
754                         l->end = 0;
755                         l->start = 1;
756                 } else if (end >= l->start && end < l->end) {
757                         /* Start overlap: trim entry. */
758                         l->start = end + 1;
759                 } else if (start > l->start && start <= l->end) {
760                         /* End overlap: trim entry. */
761                         l->end = start-1;
762                 }
763                 /* Nothing left?  Remove it. */
764                 if (l->end < l->start) {
765                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
766                         i--;
767                 }
768         }
769
770         if (type != F_UNLCK) {
771                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
772                 l = &locks[lock_num++];
773                 l->fd = fd;
774                 l->start = start;
775                 l->end = end;
776                 l->type = type;
777         }
778         return locks;
779 }
780
781 /* We only trap this so we can dup fds in case we need to restore. */
782 int failtest_close(int fd)
783 {
784         int new_fd = -1, i;
785
786         if (fd < 0)
787                 return close(fd);
788
789         /* Trace history to find source of fd, and if we need to cleanup writes. */
790         for (i = history_num-1; i >= 0; i--) {
791                 switch (history[i].type) {
792                 case FAILTEST_WRITE:
793                         if (history[i].u.write.fd != fd)
794                                 break;
795                         if (!history[i].cleanup)
796                                 break;
797                         /* We need to save fd so we can restore file. */
798                         if (new_fd == -1)
799                                 new_fd = dup(fd);
800                         history[i].u.write.dup_fd = new_fd;
801                         break;
802                 case FAILTEST_READ:
803                         /* We don't need to cleanup reads on closed fds. */
804                         if (history[i].u.read.fd != fd)
805                                 break;
806                         history[i].cleanup = NULL;
807                         break;
808                 case FAILTEST_PIPE:
809                         /* From a pipe?  We don't ever restore pipes... */
810                         if (history[i].u.pipe.fds[0] == fd) {
811                                 assert(new_fd == -1);
812                                 history[i].u.pipe.closed[0] = true;
813                                 goto out;
814                         }
815                         if (history[i].u.pipe.fds[1] == fd) {
816                                 assert(new_fd == -1);
817                                 history[i].u.pipe.closed[1] = true;
818                                 goto out;
819                         }
820                         break;
821                 case FAILTEST_OPEN:
822                         if (history[i].u.open.ret == fd) {
823                                 history[i].u.open.dup_fd = new_fd;
824                                 goto out;
825                         }
826                         break;
827                 default:
828                         break;
829                 }
830         }
831
832 out:
833         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
834         return close(fd);
835 }
836
837 /* Zero length means "to end of file" */
838 static off_t end_of(off_t start, off_t len)
839 {
840         if (len == 0)
841                 return off_max();
842         return start + len - 1;
843 }
844
845 /* FIXME: This only handles locks, really. */
846 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
847 {
848         struct failtest_call *p;
849         struct fcntl_call call;
850         va_list ap;
851
852         call.fd = fd;
853         call.cmd = cmd;
854
855         /* Argument extraction. */
856         switch (cmd) {
857         case F_SETFL:
858         case F_SETFD:
859                 va_start(ap, cmd);
860                 call.arg.l = va_arg(ap, long);
861                 va_end(ap);
862                 return fcntl(fd, cmd, call.arg.l);
863         case F_GETFD:
864         case F_GETFL:
865                 return fcntl(fd, cmd);
866         case F_GETLK:
867                 get_locks();
868                 va_start(ap, cmd);
869                 call.arg.fl = *va_arg(ap, struct flock *);
870                 va_end(ap);
871                 return fcntl(fd, cmd, &call.arg.fl);
872         case F_SETLK:
873         case F_SETLKW:
874                 va_start(ap, cmd);
875                 call.arg.fl = *va_arg(ap, struct flock *);
876                 va_end(ap);
877                 break;
878         default:
879                 /* This means you need to implement it here. */
880                 err(1, "failtest: unknown fcntl %u", cmd);
881         }
882
883         p = add_history(FAILTEST_FCNTL, file, line, &call);
884         get_locks();
885
886         if (should_fail(p)) {
887                 p->u.fcntl.ret = -1;
888                 if (p->u.fcntl.cmd == F_SETLK)
889                         p->error = EAGAIN;
890                 else
891                         p->error = EDEADLK;
892         } else {
893                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
894                                        &p->u.fcntl.arg.fl);
895                 if (p->u.fcntl.ret == -1)
896                         p->error = errno;
897                 else {
898                         /* We don't handle anything else yet. */
899                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
900                         locks = add_lock(locks,
901                                          p->u.fcntl.fd,
902                                          p->u.fcntl.arg.fl.l_start,
903                                          end_of(p->u.fcntl.arg.fl.l_start,
904                                                 p->u.fcntl.arg.fl.l_len),
905                                          p->u.fcntl.arg.fl.l_type);
906                 }
907         }
908         errno = p->error;
909         return p->u.fcntl.ret;
910 }
911
912 void failtest_init(int argc, char *argv[])
913 {
914         unsigned int i;
915
916         for (i = 1; i < argc; i++) {
917                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
918                         failpath = argv[i] + strlen("--failpath=");
919                 } else if (strcmp(argv[i], "--tracepath") == 0) {
920                         tracefd = dup(STDERR_FILENO);
921                         failtest_timeout_ms = -1;
922                 }
923         }
924         gettimeofday(&start, NULL);
925 }
926
927 /* Free up memory, so valgrind doesn't report leaks. */
928 static void free_everything(void)
929 {
930         unsigned int i;
931
932         for (i = 0; i < writes_num; i++) {
933                 free(writes[i].data);
934         }
935         free(writes);
936
937         /* We don't do this in cleanup: needed even for failed opens. */
938         for (i = 0; i < history_num; i++) {
939                 if (history[i].type == FAILTEST_OPEN)
940                         free((char *)history[i].u.open.pathname);
941         }
942         free(history);
943 }
944
945 void failtest_exit(int status)
946 {
947         int i;
948
949         if (control_fd == -1) {
950                 free_everything();
951                 exit(status);
952         }
953
954         if (failtest_exit_check) {
955                 if (!failtest_exit_check(history, history_num))
956                         child_fail(NULL, 0, "failtest_exit_check failed\n");
957         }
958
959         /* Cleanup everything, in reverse order. */
960         for (i = history_num - 1; i >= 0; i--)
961                 if (history[i].cleanup)
962                         history[i].cleanup(&history[i].u);
963
964         free_everything();
965         tell_parent(SUCCESS);
966         exit(0);
967 }