git.ozlabs.org Git - ccan/blob - ccan/failtest/failtest.c

   1 /* Licensed under LGPL - see LICENSE file for details */
   2 #include <ccan/failtest/failtest.h>
   3 #include <stdarg.h>
   4 #include <string.h>
   5 #include <stdio.h>
   6 #include <stdarg.h>
   7 #include <ctype.h>
   8 #include <err.h>
   9 #include <unistd.h>
  10 #include <poll.h>
  11 #include <errno.h>
  12 #include <sys/types.h>
  13 #include <sys/wait.h>
  14 #include <sys/stat.h>
  15 #include <sys/time.h>
  16 #include <sys/mman.h>
  17 #include <signal.h>
  18 #include <assert.h>
  19 #include <ccan/time/time.h>
  20 #include <ccan/read_write_all/read_write_all.h>
  21 #include <ccan/failtest/failtest_proto.h>
  22 #include <ccan/build_assert/build_assert.h>
  23 #include <ccan/hash/hash.h>
  24 #include <ccan/htable/htable_type.h>
  25 #include <ccan/str/str.h>
  26 #include <ccan/compiler/compiler.h>
  27
  28 enum failtest_result (*failtest_hook)(struct tlist_calls *);
  29
  30 static FILE *tracef = NULL, *warnf;
  31 static int traceindent = 0;
  32
  33 unsigned int failtest_timeout_ms = 20000;
  34
  35 const char *failpath;
  36 const char *debugpath;
  37
  38 enum info_type {
  39         WRITE,
  40         RELEASE_LOCKS,
  41         FAILURE,
  42         SUCCESS,
  43         UNEXPECTED
  44 };
  45
  46 struct lock_info {
  47         int fd;
  48         /* end is inclusive: you can't have a 0-byte lock. */
  49         off_t start, end;
  50         int type;
  51 };
  52
  53 /* We hash the call location together with its backtrace. */
  54 static size_t hash_call(const struct failtest_call *call)
  55 {
  56         return hash(call->file, strlen(call->file),
  57                     hash(&call->line, 1,
  58                          hash(call->backtrace, call->backtrace_num,
  59                               call->type)));
  60 }
  61
  62 static bool call_eq(const struct failtest_call *call1,
  63                     const struct failtest_call *call2)
  64 {
  65         unsigned int i;
  66
  67         if (strcmp(call1->file, call2->file) != 0
  68             || call1->line != call2->line
  69             || call1->type != call2->type
  70             || call1->backtrace_num != call2->backtrace_num)
  71                 return false;
  72
  73         for (i = 0; i < call1->backtrace_num; i++)
  74                 if (call1->backtrace[i] != call2->backtrace[i])
  75                         return false;
  76
  77         return true;
  78 }
  79
  80 /* Defines struct failtable. */
  81 HTABLE_DEFINE_TYPE(struct failtest_call, (struct failtest_call *), hash_call,
  82                    call_eq, failtable);
  83
  84 bool (*failtest_exit_check)(struct tlist_calls *history);
  85
  86 /* The entire history of all calls. */
  87 static struct tlist_calls history = TLIST_INIT(history);
  88 /* If we're a child, the fd two write control info to the parent. */
  89 static int control_fd = -1;
  90 /* If we're a child, this is the first call we did ourselves. */
  91 static struct failtest_call *our_history_start = NULL;
  92 /* For printing runtime with --trace. */
  93 static struct timeval start;
  94 /* Set when failtest_hook returns FAIL_PROBE */
  95 static bool probing = false;
  96 /* Table to track duplicates. */
  97 static struct failtable failtable;
  98
  99 /* Array of writes which our child did.  We report them on failure. */
 100 static struct write_call *child_writes = NULL;
 101 static unsigned int child_writes_num = 0;
 102
 103 /* fcntl locking info. */
 104 static pid_t lock_owner;
 105 static struct lock_info *locks = NULL;
 106 static unsigned int lock_num = 0;
 107
 108 /* Our original pid, which we return to anyone who asks. */
 109 static pid_t orig_pid;
 110
 111 /* Mapping from failtest_type to char. */
 112 static const char info_to_arg[] = "mceoxprwfal";
 113
 114 /* Dummy call used for failtest_undo wrappers. */
 115 static struct failtest_call unrecorded_call;
 116
 117 struct contents_saved {
 118         size_t count;
 119         off_t off;
 120         off_t old_len;
 121         char contents[1];
 122 };
 123
 124 /* File contents, saved in this child only. */
 125 struct saved_mmapped_file {
 126         struct saved_mmapped_file *next;
 127         struct failtest_call *opener;
 128         struct contents_saved *s;
 129 };
 130
 131 static struct saved_mmapped_file *saved_mmapped_files;
 132
 133 #if HAVE_BACKTRACE
 134 #include <execinfo.h>
 135
 136 static void **get_backtrace(unsigned int *num)
 137 {
 138         static unsigned int max_back = 100;
 139         void **ret;
 140
 141 again:
 142         ret = malloc(max_back * sizeof(void *));
 143         *num = backtrace(ret, max_back);
 144         if (*num == max_back) {
 145                 free(ret);
 146                 max_back *= 2;
 147                 goto again;
 148         }
 149         return ret;
 150 }
 151 #else
 152 /* This will test slightly less, since will consider all of the same
 153  * calls as identical.  But, it's slightly faster! */
 154 static void **get_backtrace(unsigned int *num)
 155 {
 156         *num = 0;
 157         return NULL;
 158 }
 159 #endif /* HAVE_BACKTRACE */
 160
 161 static struct failtest_call *add_history_(enum failtest_call_type type,
 162                                           bool can_leak,
 163                                           const char *file,
 164                                           unsigned int line,
 165                                           const void *elem,
 166                                           size_t elem_size)
 167 {
 168         struct failtest_call *call;
 169
 170         /* NULL file is how we suppress failure. */
 171         if (!file)
 172                 return &unrecorded_call;
 173
 174         call = malloc(sizeof *call);
 175         call->type = type;
 176         call->can_leak = can_leak;
 177         call->file = file;
 178         call->line = line;
 179         call->cleanup = NULL;
 180         call->backtrace = get_backtrace(&call->backtrace_num);
 181         memcpy(&call->u, elem, elem_size);
 182         tlist_add_tail(&history, call, list);
 183         return call;
 184 }
 185
 186 #define add_history(type, can_leak, file, line, elem)           \
 187         add_history_((type), (can_leak), (file), (line), (elem), sizeof(*(elem)))
 188
 189 /* We do a fake call inside a sizeof(), to check types. */
 190 #define set_cleanup(call, clean, type)                  \
 191         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL, false),1), (clean))
 192
 193 /* Dup the fd to a high value (out of the way I hope!), and close the old fd. */
 194 static int move_fd_to_high(int fd)
 195 {
 196         int i;
 197
 198         for (i = FD_SETSIZE - 1; i >= 0; i--) {
 199                 if (fcntl(i, F_GETFL) == -1 && errno == EBADF) {
 200                         if (dup2(fd, i) == -1)
 201                                 err(1, "Failed to dup fd %i to %i", fd, i);
 202                         close(fd);
 203                         return i;
 204                 }
 205         }
 206         /* Nothing?  Really?  Er... ok? */
 207         return fd;
 208 }
 209
 210 static bool read_write_info(int fd)
 211 {
 212         struct write_call *w;
 213         char *buf;
 214
 215         /* We don't need all of this, but it's simple. */
 216         child_writes = realloc(child_writes,
 217                                (child_writes_num+1) * sizeof(child_writes[0]));
 218         w = &child_writes[child_writes_num];
 219         if (!read_all(fd, w, sizeof(*w)))
 220                 return false;
 221
 222         w->buf = buf = malloc(w->count);
 223         if (!read_all(fd, buf, w->count))
 224                 return false;
 225
 226         child_writes_num++;
 227         return true;
 228 }
 229
 230 static char *failpath_string(void)
 231 {
 232         struct failtest_call *i;
 233         char *ret = strdup("");
 234         unsigned len = 0;
 235
 236         /* Inefficient, but who cares? */
 237         tlist_for_each(&history, i, list) {
 238                 ret = realloc(ret, len + 2);
 239                 ret[len] = info_to_arg[i->type];
 240                 if (i->fail)
 241                         ret[len] = toupper(ret[len]);
 242                 ret[++len] = '\0';
 243         }
 244         return ret;
 245 }
 246
 247 static void do_warn(int e, const char *fmt, va_list ap)
 248 {
 249         char *p = failpath_string();
 250
 251         vfprintf(warnf, fmt, ap);
 252         if (e != -1)
 253                 fprintf(warnf, ": %s", strerror(e));
 254         fprintf(warnf, " [%s]\n", p);
 255         free(p);
 256 }
 257
 258 static void fwarn(const char *fmt, ...)
 259 {
 260         va_list ap;
 261         int e = errno;
 262
 263         va_start(ap, fmt);
 264         do_warn(e, fmt, ap);
 265         va_end(ap);
 266 }
 267
 268
 269 static void fwarnx(const char *fmt, ...)
 270 {
 271         va_list ap;
 272
 273         va_start(ap, fmt);
 274         do_warn(-1, fmt, ap);
 275         va_end(ap);
 276 }
 277
 278 static void tell_parent(enum info_type type)
 279 {
 280         if (control_fd != -1)
 281                 write_all(control_fd, &type, sizeof(type));
 282 }
 283
 284 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
 285 {
 286         va_list ap;
 287         char *path = failpath_string();
 288
 289         va_start(ap, fmt);
 290         vfprintf(stderr, fmt, ap);
 291         va_end(ap);
 292
 293         fprintf(stderr, "%.*s", (int)outlen, out);
 294         printf("To reproduce: --failpath=%s\n", path);
 295         free(path);
 296         tell_parent(FAILURE);
 297         exit(1);
 298 }
 299
 300 static void PRINTF_FMT(1, 2) trace(const char *fmt, ...)
 301 {
 302         va_list ap;
 303         unsigned int i;
 304         char *p;
 305         static int idx;
 306
 307         if (!tracef)
 308                 return;
 309
 310         for (i = 0; i < traceindent; i++)
 311                 fprintf(tracef, "  ");
 312
 313         p = failpath_string();
 314         fprintf(tracef, "%i: %u: %s ", idx++, getpid(), p);
 315         va_start(ap, fmt);
 316         vfprintf(tracef, fmt, ap);
 317         va_end(ap);
 318         free(p);
 319 }
 320
 321 static pid_t child;
 322
 323 static void hand_down(int signum)
 324 {
 325         kill(child, signum);
 326 }
 327
 328 static void release_locks(void)
 329 {
 330         /* Locks were never acquired/reacquired? */
 331         if (lock_owner == 0)
 332                 return;
 333
 334         /* We own them?  Release them all. */
 335         if (lock_owner == getpid()) {
 336                 unsigned int i;
 337                 struct flock fl;
 338                 fl.l_type = F_UNLCK;
 339                 fl.l_whence = SEEK_SET;
 340                 fl.l_start = 0;
 341                 fl.l_len = 0;
 342
 343                 trace("Releasing %u locks\n", lock_num);
 344                 for (i = 0; i < lock_num; i++)
 345                         fcntl(locks[i].fd, F_SETLK, &fl);
 346         } else {
 347                 /* Our parent must have them; pass request up. */
 348                 enum info_type type = RELEASE_LOCKS;
 349                 assert(control_fd != -1);
 350                 write_all(control_fd, &type, sizeof(type));
 351         }
 352         lock_owner = 0;
 353 }
 354
 355 /* off_t is a signed type.  Getting its max is non-trivial. */
 356 static off_t off_max(void)
 357 {
 358         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
 359         if (sizeof(off_t) == 4)
 360                 return (off_t)0x7FFFFFF;
 361         else
 362                 return (off_t)0x7FFFFFFFFFFFFFFULL;
 363 }
 364
 365 static void get_locks(void)
 366 {
 367         unsigned int i;
 368         struct flock fl;
 369
 370         if (lock_owner == getpid())
 371                 return;
 372
 373         if (lock_owner != 0) {
 374                 enum info_type type = RELEASE_LOCKS;
 375                 assert(control_fd != -1);
 376                 trace("Asking parent to release locks\n");
 377                 write_all(control_fd, &type, sizeof(type));
 378         }
 379
 380         fl.l_whence = SEEK_SET;
 381
 382         for (i = 0; i < lock_num; i++) {
 383                 fl.l_type = locks[i].type;
 384                 fl.l_start = locks[i].start;
 385                 if (locks[i].end == off_max())
 386                         fl.l_len = 0;
 387                 else
 388                         fl.l_len = locks[i].end - locks[i].start + 1;
 389
 390                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
 391                         abort();
 392         }
 393         trace("Acquired %u locks\n", lock_num);
 394         lock_owner = getpid();
 395 }
 396
 397
 398 static struct contents_saved *save_contents(const char *filename,
 399                                             int fd, size_t count, off_t off,
 400                                             const char *why)
 401 {
 402         struct contents_saved *s = malloc(sizeof(*s) + count);
 403         ssize_t ret;
 404
 405         s->off = off;
 406
 407         ret = pread(fd, s->contents, count, off);
 408         if (ret < 0) {
 409                 fwarn("failtest_write: failed to save old contents!");
 410                 s->count = 0;
 411         } else
 412                 s->count = ret;
 413
 414         /* Use lseek to get the size of file, but we have to restore
 415          * file offset */
 416         off = lseek(fd, 0, SEEK_CUR);
 417         s->old_len = lseek(fd, 0, SEEK_END);
 418         lseek(fd, off, SEEK_SET);
 419
 420         trace("Saving %p %s %zu@%llu after %s (filelength %llu) via fd %i\n",
 421               s, filename, s->count, (long long)s->off, why,
 422               (long long)s->old_len, fd);
 423         return s;
 424 }
 425
 426 static void restore_contents(struct failtest_call *opener,
 427                              struct contents_saved *s,
 428                              bool restore_offset,
 429                              const char *caller)
 430 {
 431         int fd;
 432
 433         /* The top parent doesn't need to restore. */
 434         if (control_fd == -1)
 435                 return;
 436
 437         /* Has the fd been closed? */
 438         if (opener->u.open.closed) {
 439                 /* Reopen, replace fd, close silently as we clean up. */
 440                 fd = open(opener->u.open.pathname, O_RDWR);
 441                 if (fd < 0) {
 442                         fwarn("failtest: could not reopen %s to clean up %s!",
 443                               opener->u.open.pathname, caller);
 444                         return;
 445                 }
 446                 /* Make it clearly distinguisable from a "normal" fd. */
 447                 fd = move_fd_to_high(fd);
 448                 trace("Reopening %s to restore it (was fd %i, now %i)\n",
 449                       opener->u.open.pathname, opener->u.open.ret, fd);
 450                 opener->u.open.ret = fd;
 451                 opener->u.open.closed = false;
 452         }
 453         fd = opener->u.open.ret;
 454
 455         trace("Restoring %p %s %zu@%llu after %s (filelength %llu) via fd %i\n",
 456               s, opener->u.open.pathname, s->count, (long long)s->off, caller,
 457               (long long)s->old_len, fd);
 458         if (pwrite(fd, s->contents, s->count, s->off) != s->count) {
 459                 fwarn("failtest: write failed cleaning up %s for %s!",
 460                       opener->u.open.pathname, caller);
 461         }
 462
 463         if (ftruncate(fd, s->old_len) != 0) {
 464                 fwarn("failtest_write: truncate failed cleaning up %s for %s!",
 465                       opener->u.open.pathname, caller);
 466         }
 467
 468         if (restore_offset) {
 469                 trace("Restoring offset of fd %i to %llu\n",
 470                       fd, (long long)s->off);
 471                 lseek(fd, s->off, SEEK_SET);
 472         }
 473 }
 474
 475 /* We save/restore most things on demand, but always do mmaped files. */
 476 static void save_mmapped_files(void)
 477 {
 478         struct failtest_call *i;
 479         trace("Saving mmapped files in child\n");
 480
 481         tlist_for_each_rev(&history, i, list) {
 482                 struct mmap_call *m = &i->u.mmap;
 483                 struct saved_mmapped_file *s;
 484
 485                 if (i->type != FAILTEST_MMAP)
 486                         continue;
 487
 488                 /* FIXME: We only handle mmapped files where fd is still open. */
 489                 if (m->opener->u.open.closed)
 490                         continue;
 491
 492                 s = malloc(sizeof *s);
 493                 s->s = save_contents(m->opener->u.open.pathname,
 494                                      m->fd, m->length, m->offset,
 495                                      "mmapped file before fork");
 496                 s->opener = m->opener;
 497                 s->next = saved_mmapped_files;
 498                 saved_mmapped_files = s;
 499         }
 500 }
 501
 502 static void free_mmapped_files(bool restore)
 503 {
 504         trace("%s mmapped files in child\n",
 505               restore ? "Restoring" : "Discarding");
 506         while (saved_mmapped_files) {
 507                 struct saved_mmapped_file *next = saved_mmapped_files->next;
 508                 if (restore)
 509                         restore_contents(saved_mmapped_files->opener,
 510                                          saved_mmapped_files->s, false,
 511                                          "saved mmap");
 512                 free(saved_mmapped_files->s);
 513                 free(saved_mmapped_files);
 514                 saved_mmapped_files = next;
 515         }
 516 }
 517
 518 /* Returns a FAILTEST_OPEN, FAILTEST_PIPE or NULL. */
 519 static struct failtest_call *opener_of(int fd)
 520 {
 521         struct failtest_call *i;
 522
 523         /* Don't get confused and match genuinely failed opens. */
 524         if (fd < 0)
 525                 return NULL;
 526
 527         /* Figure out the set of live fds. */
 528         tlist_for_each_rev(&history, i, list) {
 529                 if (i->fail)
 530                         continue;
 531                 switch (i->type) {
 532                 case FAILTEST_CLOSE:
 533                         if (i->u.close.fd == fd) {
 534                                 return NULL;
 535                         }
 536                         break;
 537                 case FAILTEST_OPEN:
 538                         if (i->u.open.ret == fd) {
 539                                 if (i->u.open.closed)
 540                                         return NULL;
 541                                 return i;
 542                         }
 543                         break;
 544                 case FAILTEST_PIPE:
 545                         if (i->u.pipe.fds[0] == fd || i->u.pipe.fds[1] == fd) {
 546                                 return i;
 547                         }
 548                         break;
 549                 default:
 550                         break;
 551                 }
 552         }
 553
 554         /* FIXME: socket, dup, etc are untracked! */
 555         return NULL;
 556 }
 557
 558 static void free_call(struct failtest_call *call)
 559 {
 560         /* We don't do this in cleanup: needed even for failed opens. */
 561         if (call->type == FAILTEST_OPEN)
 562                 free((char *)call->u.open.pathname);
 563         free(call->backtrace);
 564         tlist_del_from(&history, call, list);
 565         free(call);
 566 }
 567
 568 /* Free up memory, so valgrind doesn't report leaks. */
 569 static void free_everything(void)
 570 {
 571         struct failtest_call *i;
 572
 573         while ((i = tlist_top(&history, struct failtest_call, list)) != NULL)
 574                 free_call(i);
 575
 576         failtable_clear(&failtable);
 577 }
 578
 579 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
 580 {
 581         struct failtest_call *i;
 582         bool restore = true;
 583
 584         /* For children, we don't care if they "failed" the testing. */
 585         if (control_fd != -1)
 586                 status = 0;
 587         else
 588                 /* We don't restore contents for original parent. */
 589                 restore = false;
 590
 591         /* Cleanup everything, in reverse order. */
 592         tlist_for_each_rev(&history, i, list) {
 593                 /* Don't restore things our parent did. */
 594                 if (i == our_history_start)
 595                         restore = false;
 596
 597                 if (i->fail)
 598                         continue;
 599
 600                 if (i->cleanup)
 601                         i->cleanup(&i->u, restore);
 602
 603                 /* But their program shouldn't leak, even on failure. */
 604                 if (!forced_cleanup && i->can_leak) {
 605                         printf("Leak at %s:%u: --failpath=%s\n",
 606                                i->file, i->line, failpath_string());
 607                         status = 1;
 608                 }
 609         }
 610
 611         /* Put back mmaped files the way our parent (if any) expects. */
 612         free_mmapped_files(true);
 613
 614         free_everything();
 615         if (status == 0)
 616                 tell_parent(SUCCESS);
 617         else
 618                 tell_parent(FAILURE);
 619         exit(status);
 620 }
 621
 622 static bool following_path(void)
 623 {
 624         if (!failpath)
 625                 return false;
 626         /* + means continue after end, like normal. */
 627         if (*failpath == '+') {
 628                 failpath = NULL;
 629                 return false;
 630         }
 631         return true;
 632 }
 633
 634 static bool follow_path(struct failtest_call *call)
 635 {
 636         if (*failpath == '\0') {
 637                 /* Continue, but don't inject errors. */
 638                 return call->fail = false;
 639         }
 640
 641         if (tolower((unsigned char)*failpath) != info_to_arg[call->type])
 642                 errx(1, "Failpath expected '%s' got '%c'\n",
 643                      failpath, info_to_arg[call->type]);
 644         call->fail = cisupper(*(failpath++));
 645                         if (call->fail)
 646                                 call->can_leak = false;
 647         return call->fail;
 648 }
 649
 650 static bool should_fail(struct failtest_call *call)
 651 {
 652         int status;
 653         int control[2], output[2];
 654         enum info_type type = UNEXPECTED;
 655         char *out = NULL;
 656         size_t outlen = 0;
 657         struct failtest_call *dup;
 658
 659         if (call == &unrecorded_call)
 660                 return false;
 661
 662         if (following_path())
 663                 return follow_path(call);
 664
 665         /* Attach debugger if they asked for it. */
 666         if (debugpath) {
 667                 char *path;
 668
 669                 /* Pretend this last call matches whatever path wanted:
 670                  * keeps valgrind happy. */
 671                 call->fail = cisupper(debugpath[strlen(debugpath)-1]);
 672                 path = failpath_string();
 673
 674                 if (streq(path, debugpath)) {
 675                         char str[80];
 676
 677                         /* Don't timeout. */
 678                         signal(SIGUSR1, SIG_IGN);
 679                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
 680                                 getpid(), getpid());
 681                         if (system(str) == 0)
 682                                 sleep(5);
 683                 } else {
 684                         /* Ignore last character: could be upper or lower. */
 685                         path[strlen(path)-1] = '\0';
 686                         if (!strstarts(debugpath, path)) {
 687                                 fprintf(stderr,
 688                                         "--debugpath not followed: %s\n", path);
 689                                 debugpath = NULL;
 690                         }
 691                 }
 692                 free(path);
 693         }
 694
 695         /* Are we probing?  If so, we never fail twice. */
 696         if (probing) {
 697                 trace("Not failing %c due to FAIL_PROBE return\n",
 698                       info_to_arg[call->type]);
 699                 return call->fail = false;
 700         }
 701
 702         /* Don't fail more than once in the same place. */
 703         dup = failtable_get(&failtable, call);
 704         if (dup) {
 705                 trace("Not failing %c due to duplicate\n",
 706                       info_to_arg[call->type]);
 707                 return call->fail = false;
 708         }
 709
 710         if (failtest_hook) {
 711                 switch (failtest_hook(&history)) {
 712                 case FAIL_OK:
 713                         break;
 714                 case FAIL_PROBE:
 715                         probing = true;
 716                         break;
 717                 case FAIL_DONT_FAIL:
 718                         trace("Not failing %c due to failhook return\n",
 719                               info_to_arg[call->type]);
 720                         call->fail = false;
 721                         return false;
 722                 default:
 723                         abort();
 724                 }
 725         }
 726
 727         /* Add it to our table of calls. */
 728         failtable_add(&failtable, call);
 729
 730         /* We're going to fail in the child. */
 731         call->fail = true;
 732         if (pipe(control) != 0 || pipe(output) != 0)
 733                 err(1, "opening pipe");
 734
 735         /* Move out the way, to high fds. */
 736         control[0] = move_fd_to_high(control[0]);
 737         control[1] = move_fd_to_high(control[1]);
 738         output[0] = move_fd_to_high(output[0]);
 739         output[1] = move_fd_to_high(output[1]);
 740
 741         /* Prevent double-printing (in child and parent) */
 742         fflush(stdout);
 743         fflush(warnf);
 744         if (tracef)
 745                 fflush(tracef);
 746         child = fork();
 747         if (child == -1)
 748                 err(1, "forking failed");
 749
 750         if (child == 0) {
 751                 traceindent++;
 752                 if (tracef) {
 753                         struct timeval diff;
 754                         const char *p;
 755                         char *failpath;
 756                         struct failtest_call *c;
 757
 758                         c = tlist_tail(&history, struct failtest_call, list);
 759                         diff = time_sub(time_now(), start);
 760                         failpath = failpath_string();
 761                         p = strrchr(c->file, '/');
 762                         if (p)
 763                                 p++;
 764                         else
 765                                 p = c->file;
 766                         trace("%u->%u (%u.%02u): %s (%s:%u)\n",
 767                               getppid(), getpid(),
 768                               (int)diff.tv_sec, (int)diff.tv_usec / 10000,
 769                               failpath, p, c->line);
 770                         free(failpath);
 771                 }
 772                 /* From here on, we have to clean up! */
 773                 our_history_start = tlist_tail(&history, struct failtest_call,
 774                                                list);
 775                 close(control[0]);
 776                 close(output[0]);
 777                 /* Don't swallow stderr if we're tracing. */
 778                 if (!tracef) {
 779                         dup2(output[1], STDOUT_FILENO);
 780                         dup2(output[1], STDERR_FILENO);
 781                         if (output[1] != STDOUT_FILENO
 782                             && output[1] != STDERR_FILENO)
 783                                 close(output[1]);
 784                 }
 785                 control_fd = move_fd_to_high(control[1]);
 786
 787                 /* Forget any of our parent's saved files. */
 788                 free_mmapped_files(false);
 789
 790                 /* Now, save any files we need to. */
 791                 save_mmapped_files();
 792
 793                 /* Failed calls can't leak. */
 794                 call->can_leak = false;
 795
 796                 return true;
 797         }
 798
 799         signal(SIGUSR1, hand_down);
 800
 801         close(control[1]);
 802         close(output[1]);
 803
 804         /* We grab output so we can display it; we grab writes so we
 805          * can compare. */
 806         do {
 807                 struct pollfd pfd[2];
 808                 int ret;
 809
 810                 pfd[0].fd = output[0];
 811                 pfd[0].events = POLLIN|POLLHUP;
 812                 pfd[1].fd = control[0];
 813                 pfd[1].events = POLLIN|POLLHUP;
 814
 815                 if (type == SUCCESS)
 816                         ret = poll(pfd, 1, failtest_timeout_ms);
 817                 else
 818                         ret = poll(pfd, 2, failtest_timeout_ms);
 819
 820                 if (ret == 0)
 821                         hand_down(SIGUSR1);
 822                 if (ret < 0) {
 823                         if (errno == EINTR)
 824                                 continue;
 825                         err(1, "Poll returned %i", ret);
 826                 }
 827
 828                 if (pfd[0].revents & POLLIN) {
 829                         ssize_t len;
 830
 831                         out = realloc(out, outlen + 8192);
 832                         len = read(output[0], out + outlen, 8192);
 833                         outlen += len;
 834                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
 835                         if (read_all(control[0], &type, sizeof(type))) {
 836                                 if (type == WRITE) {
 837                                         if (!read_write_info(control[0]))
 838                                                 break;
 839                                 } else if (type == RELEASE_LOCKS) {
 840                                         release_locks();
 841                                         /* FIXME: Tell them we're done... */
 842                                 }
 843                         }
 844                 } else if (pfd[0].revents & POLLHUP) {
 845                         break;
 846                 }
 847         } while (type != FAILURE);
 848
 849         close(output[0]);
 850         close(control[0]);
 851         waitpid(child, &status, 0);
 852         if (!WIFEXITED(status)) {
 853                 if (WTERMSIG(status) == SIGUSR1)
 854                         child_fail(out, outlen, "Timed out");
 855                 else
 856                         child_fail(out, outlen, "Killed by signal %u: ",
 857                                    WTERMSIG(status));
 858         }
 859         /* Child printed failure already, just pass up exit code. */
 860         if (type == FAILURE) {
 861                 fprintf(stderr, "%.*s", (int)outlen, out);
 862                 tell_parent(type);
 863                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
 864         }
 865         if (WEXITSTATUS(status) != 0)
 866                 child_fail(out, outlen, "Exited with status %i: ",
 867                            WEXITSTATUS(status));
 868
 869         free(out);
 870         signal(SIGUSR1, SIG_DFL);
 871
 872         /* Only child does probe. */
 873         probing = false;
 874
 875         /* We continue onwards without failing. */
 876         call->fail = false;
 877         return false;
 878 }
 879
 880 static void cleanup_calloc(struct calloc_call *call, bool restore)
 881 {
 882         trace("undoing calloc %p\n", call->ret);
 883         free(call->ret);
 884 }
 885
 886 void *failtest_calloc(size_t nmemb, size_t size,
 887                       const char *file, unsigned line)
 888 {
 889         struct failtest_call *p;
 890         struct calloc_call call;
 891         call.nmemb = nmemb;
 892         call.size = size;
 893         p = add_history(FAILTEST_CALLOC, true, file, line, &call);
 894
 895         if (should_fail(p)) {
 896                 p->u.calloc.ret = NULL;
 897                 p->error = ENOMEM;
 898         } else {
 899                 p->u.calloc.ret = calloc(nmemb, size);
 900                 set_cleanup(p, cleanup_calloc, struct calloc_call);
 901         }
 902         trace("calloc %zu x %zu %s:%u -> %p\n",
 903               nmemb, size, file, line, p->u.calloc.ret);
 904         errno = p->error;
 905         return p->u.calloc.ret;
 906 }
 907
 908 static void cleanup_malloc(struct malloc_call *call, bool restore)
 909 {
 910         trace("undoing malloc %p\n", call->ret);
 911         free(call->ret);
 912 }
 913
 914 void *failtest_malloc(size_t size, const char *file, unsigned line)
 915 {
 916         struct failtest_call *p;
 917         struct malloc_call call;
 918         call.size = size;
 919
 920         p = add_history(FAILTEST_MALLOC, true, file, line, &call);
 921         if (should_fail(p)) {
 922                 p->u.malloc.ret = NULL;
 923                 p->error = ENOMEM;
 924         } else {
 925                 p->u.malloc.ret = malloc(size);
 926                 set_cleanup(p, cleanup_malloc, struct malloc_call);
 927         }
 928         trace("malloc %zu %s:%u -> %p\n",
 929               size, file, line, p->u.malloc.ret);
 930         errno = p->error;
 931         return p->u.malloc.ret;
 932 }
 933
 934 static void cleanup_realloc(struct realloc_call *call, bool restore)
 935 {
 936         trace("undoing realloc %p\n", call->ret);
 937         free(call->ret);
 938 }
 939
 940 /* Walk back and find out if we got this ptr from a previous routine. */
 941 static void fixup_ptr_history(void *ptr, const char *why)
 942 {
 943         struct failtest_call *i;
 944
 945         /* Start at end of history, work back. */
 946         tlist_for_each_rev(&history, i, list) {
 947                 switch (i->type) {
 948                 case FAILTEST_REALLOC:
 949                         if (i->u.realloc.ret == ptr) {
 950                                 trace("found realloc %p %s:%u matching %s\n",
 951                                       ptr, i->file, i->line, why);
 952                                 i->cleanup = NULL;
 953                                 i->can_leak = false;
 954                                 return;
 955                         }
 956                         break;
 957                 case FAILTEST_MALLOC:
 958                         if (i->u.malloc.ret == ptr) {
 959                                 trace("found malloc %p %s:%u matching %s\n",
 960                                       ptr, i->file, i->line, why);
 961                                 i->cleanup = NULL;
 962                                 i->can_leak = false;
 963                                 return;
 964                         }
 965                         break;
 966                 case FAILTEST_CALLOC:
 967                         if (i->u.calloc.ret == ptr) {
 968                                 trace("found calloc %p %s:%u matching %s\n",
 969                                       ptr, i->file, i->line, why);
 970                                 i->cleanup = NULL;
 971                                 i->can_leak = false;
 972                                 return;
 973                         }
 974                         break;
 975                 default:
 976                         break;
 977                 }
 978         }
 979         trace("Did not find %p matching %s\n", ptr, why);
 980 }
 981
 982 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
 983 {
 984         struct failtest_call *p;
 985         struct realloc_call call;
 986         call.size = size;
 987         p = add_history(FAILTEST_REALLOC, true, file, line, &call);
 988
 989         /* FIXME: Try one child moving allocation, one not. */
 990         if (should_fail(p)) {
 991                 p->u.realloc.ret = NULL;
 992                 p->error = ENOMEM;
 993         } else {
 994                 /* Don't catch this one in the history fixup... */
 995                 p->u.realloc.ret = NULL;
 996                 fixup_ptr_history(ptr, "realloc");
 997                 p->u.realloc.ret = realloc(ptr, size);
 998                 set_cleanup(p, cleanup_realloc, struct realloc_call);
 999         }
1000         trace("realloc %p %s:%u -> %p\n",
1001               ptr, file, line, p->u.realloc.ret);
1002         errno = p->error;
1003         return p->u.realloc.ret;
1004 }
1005
1006 /* FIXME: Record free, so we can terminate fixup_ptr_history correctly.
1007  * If there's an alloc we don't see, it could get confusing if it matches
1008  * a previous allocation we did see. */
1009 void failtest_free(void *ptr)
1010 {
1011         fixup_ptr_history(ptr, "free");
1012         trace("free %p\n", ptr);
1013         free(ptr);
1014 }
1015
1016
1017 static struct contents_saved *save_file(const char *pathname)
1018 {
1019         int fd;
1020         struct contents_saved *s;
1021
1022         fd = open(pathname, O_RDONLY);
1023         if (fd < 0)
1024                 return NULL;
1025
1026         s = save_contents(pathname, fd, lseek(fd, 0, SEEK_END), 0,
1027                           "open with O_TRUNC");
1028         close(fd);
1029         return s;
1030 }
1031
1032 /* Optimization: don't create a child for an open which *we know*
1033  * would fail anyway. */
1034 static bool open_would_fail(const char *pathname, int flags)
1035 {
1036         if ((flags & O_ACCMODE) == O_RDONLY)
1037                 return access(pathname, R_OK) != 0;
1038         if (!(flags & O_CREAT)) {
1039                 if ((flags & O_ACCMODE) == O_WRONLY)
1040                         return access(pathname, W_OK) != 0;
1041                 if ((flags & O_ACCMODE) == O_RDWR)
1042                         return access(pathname, W_OK) != 0
1043                                 || access(pathname, R_OK) != 0;
1044         }
1045         /* FIXME: We could check if it exists, for O_CREAT|O_EXCL */
1046         return false;
1047 }
1048
1049 static void cleanup_open(struct open_call *call, bool restore)
1050 {
1051         if (restore && call->saved)
1052                 restore_contents(container_of(call, struct failtest_call,
1053                                               u.open),
1054                                  call->saved, false, "open with O_TRUNC");
1055         if (!call->closed) {
1056                 trace("Cleaning up open %s by closing fd %i\n",
1057                       call->pathname, call->ret);
1058                 close(call->ret);
1059                 call->closed = true;
1060         }
1061         free(call->saved);
1062 }
1063
1064 int failtest_open(const char *pathname,
1065                   const char *file, unsigned line, ...)
1066 {
1067         struct failtest_call *p;
1068         struct open_call call;
1069         va_list ap;
1070
1071         call.pathname = strdup(pathname);
1072         va_start(ap, line);
1073         call.flags = va_arg(ap, int);
1074         call.always_save = false;
1075         call.closed = false;
1076         if (call.flags & O_CREAT) {
1077                 call.mode = va_arg(ap, int);
1078                 va_end(ap);
1079         }
1080         p = add_history(FAILTEST_OPEN, true, file, line, &call);
1081         /* Avoid memory leak! */
1082         if (p == &unrecorded_call)
1083                 free((char *)call.pathname);
1084
1085         if (should_fail(p)) {
1086                 /* Don't bother inserting failures that would happen anyway. */
1087                 if (open_would_fail(pathname, call.flags)) {
1088                         trace("Open would have failed anyway: stopping\n");
1089                         failtest_cleanup(true, 0);
1090                 }
1091                 p->u.open.ret = -1;
1092                 /* FIXME: Play with error codes? */
1093                 p->error = EACCES;
1094         } else {
1095                 /* Save the old version if they're truncating it. */
1096                 if (call.flags & O_TRUNC)
1097                         p->u.open.saved = save_file(pathname);
1098                 else
1099                         p->u.open.saved = NULL;
1100                 p->u.open.ret = open(pathname, call.flags, call.mode);
1101                 if (p->u.open.ret == -1) {
1102                         p->u.open.closed = true;
1103                         p->can_leak = false;
1104                 } else {
1105                         set_cleanup(p, cleanup_open, struct open_call);
1106                 }
1107         }
1108         trace("open %s %s:%u -> %i (opener %p)\n",
1109               pathname, file, line, p->u.open.ret, &p->u.open);
1110         errno = p->error;
1111         return p->u.open.ret;
1112 }
1113
1114 static void cleanup_mmap(struct mmap_call *mmap, bool restore)
1115 {
1116         trace("cleaning up mmap @%p (opener %p)\n",
1117               mmap->ret, mmap->opener);
1118         if (restore)
1119                 restore_contents(mmap->opener, mmap->saved, false, "mmap");
1120         free(mmap->saved);
1121 }
1122
1123 void *failtest_mmap(void *addr, size_t length, int prot, int flags,
1124                     int fd, off_t offset, const char *file, unsigned line)
1125 {
1126         struct failtest_call *p;
1127         struct mmap_call call;
1128
1129         call.addr = addr;
1130         call.length = length;
1131         call.prot = prot;
1132         call.flags = flags;
1133         call.offset = offset;
1134         call.fd = fd;
1135         call.opener = opener_of(fd);
1136
1137         /* If we don't know what file it was, don't fail. */
1138         if (!call.opener) {
1139                 if (fd != -1) {
1140                         fwarnx("failtest_mmap: couldn't figure out source for"
1141                                " fd %i at %s:%u", fd, file, line);
1142                 }
1143                 addr = mmap(addr, length, prot, flags, fd, offset);
1144                 trace("mmap of fd %i -> %p (opener = NULL)\n", fd, addr);
1145                 return addr;
1146         }
1147
1148         p = add_history(FAILTEST_MMAP, false, file, line, &call);
1149         if (should_fail(p)) {
1150                 p->u.mmap.ret = MAP_FAILED;
1151                 p->error = ENOMEM;
1152         } else {
1153                 p->u.mmap.ret = mmap(addr, length, prot, flags, fd, offset);
1154                 /* Save contents if we're writing to a normal file */
1155                 if (p->u.mmap.ret != MAP_FAILED
1156                     && (prot & PROT_WRITE)
1157                     && call.opener->type == FAILTEST_OPEN) {
1158                         const char *fname = call.opener->u.open.pathname;
1159                         p->u.mmap.saved = save_contents(fname, fd, length,
1160                                                         offset, "being mmapped");
1161                         set_cleanup(p, cleanup_mmap, struct mmap_call);
1162                 }
1163         }
1164         trace("mmap of fd %i %s:%u -> %p (opener = %p)\n",
1165               fd, file, line, addr, call.opener);
1166         errno = p->error;
1167         return p->u.mmap.ret;
1168 }
1169
1170 static void cleanup_pipe(struct pipe_call *call, bool restore)
1171 {
1172         trace("cleaning up pipe fd=%i%s,%i%s\n",
1173               call->fds[0], call->closed[0] ? "(already closed)" : "",
1174               call->fds[1], call->closed[1] ? "(already closed)" : "");
1175         if (!call->closed[0])
1176                 close(call->fds[0]);
1177         if (!call->closed[1])
1178                 close(call->fds[1]);
1179 }
1180
1181 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
1182 {
1183         struct failtest_call *p;
1184         struct pipe_call call;
1185
1186         p = add_history(FAILTEST_PIPE, true, file, line, &call);
1187         if (should_fail(p)) {
1188                 p->u.open.ret = -1;
1189                 /* FIXME: Play with error codes? */
1190                 p->error = EMFILE;
1191         } else {
1192                 p->u.pipe.ret = pipe(p->u.pipe.fds);
1193                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
1194                 set_cleanup(p, cleanup_pipe, struct pipe_call);
1195         }
1196
1197         trace("pipe %s:%u -> %i,%i\n", file, line,
1198               p->u.pipe.ret ? -1 : p->u.pipe.fds[0],
1199               p->u.pipe.ret ? -1 : p->u.pipe.fds[1]);
1200
1201         /* This causes valgrind to notice if they use pipefd[] after failure */
1202         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
1203         errno = p->error;
1204         return p->u.pipe.ret;
1205 }
1206
1207 static void cleanup_read(struct read_call *call, bool restore)
1208 {
1209         if (restore) {
1210                 trace("cleaning up read on fd %i: seeking to %llu\n",
1211                       call->fd, (long long)call->off);
1212
1213                 /* Read (not readv!) moves file offset! */
1214                 if (lseek(call->fd, call->off, SEEK_SET) != call->off) {
1215                         fwarn("Restoring lseek pointer failed (read)");
1216                 }
1217         }
1218 }
1219
1220 static ssize_t failtest_add_read(int fd, void *buf, size_t count, off_t off,
1221                                  bool is_pread, const char *file, unsigned line)
1222 {
1223         struct failtest_call *p;
1224         struct read_call call;
1225         call.fd = fd;
1226         call.buf = buf;
1227         call.count = count;
1228         call.off = off;
1229         p = add_history(FAILTEST_READ, false, file, line, &call);
1230
1231         /* FIXME: Try partial read returns. */
1232         if (should_fail(p)) {
1233                 p->u.read.ret = -1;
1234                 p->error = EIO;
1235         } else {
1236                 if (is_pread)
1237                         p->u.read.ret = pread(fd, buf, count, off);
1238                 else {
1239                         p->u.read.ret = read(fd, buf, count);
1240                         if (p->u.read.ret != -1)
1241                                 set_cleanup(p, cleanup_read, struct read_call);
1242                 }
1243         }
1244         trace("%sread %s:%u fd %i %zu@%llu -> %i\n",
1245               is_pread ? "p" : "", file, line, fd, count, (long long)off,
1246               p->u.read.ret);
1247         errno = p->error;
1248         return p->u.read.ret;
1249 }
1250
1251 static void cleanup_write(struct write_call *write, bool restore)
1252 {
1253         trace("cleaning up write on %s\n", write->opener->u.open.pathname);
1254         if (restore)
1255                 restore_contents(write->opener, write->saved, !write->is_pwrite,
1256                                  "write");
1257         free(write->saved);
1258 }
1259
1260 static ssize_t failtest_add_write(int fd, const void *buf,
1261                                   size_t count, off_t off,
1262                                   bool is_pwrite,
1263                                   const char *file, unsigned line)
1264 {
1265         struct failtest_call *p;
1266         struct write_call call;
1267
1268         call.fd = fd;
1269         call.buf = buf;
1270         call.count = count;
1271         call.off = off;
1272         call.is_pwrite = is_pwrite;
1273         call.opener = opener_of(fd);
1274         p = add_history(FAILTEST_WRITE, false, file, line, &call);
1275
1276         /* If we're a child, we need to make sure we write the same thing
1277          * to non-files as the parent does, so tell it. */
1278         if (control_fd != -1 && off == (off_t)-1) {
1279                 enum info_type type = WRITE;
1280
1281                 write_all(control_fd, &type, sizeof(type));
1282                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
1283                 write_all(control_fd, buf, count);
1284         }
1285
1286         /* FIXME: Try partial write returns. */
1287         if (should_fail(p)) {
1288                 p->u.write.ret = -1;
1289                 p->error = EIO;
1290         } else {
1291                 bool is_file;
1292                 assert(call.opener == p->u.write.opener);
1293
1294                 if (p->u.write.opener) {
1295                         is_file = (p->u.write.opener->type == FAILTEST_OPEN);
1296                 } else {
1297                         /* We can't unwind it, so at least check same
1298                          * in parent and child. */
1299                         is_file = false;
1300                 }
1301
1302                 /* FIXME: We assume same write order in parent and child */
1303                 if (!is_file && child_writes_num != 0) {
1304                         if (child_writes[0].fd != fd)
1305                                 errx(1, "Child wrote to fd %u, not %u?",
1306                                      child_writes[0].fd, fd);
1307                         if (child_writes[0].off != p->u.write.off)
1308                                 errx(1, "Child wrote to offset %zu, not %zu?",
1309                                      (size_t)child_writes[0].off,
1310                                      (size_t)p->u.write.off);
1311                         if (child_writes[0].count != count)
1312                                 errx(1, "Child wrote length %zu, not %zu?",
1313                                      child_writes[0].count, count);
1314                         if (memcmp(child_writes[0].buf, buf, count)) {
1315                                 child_fail(NULL, 0,
1316                                            "Child wrote differently to"
1317                                            " fd %u than we did!\n", fd);
1318                         }
1319                         free((char *)child_writes[0].buf);
1320                         child_writes_num--;
1321                         memmove(&child_writes[0], &child_writes[1],
1322                                 sizeof(child_writes[0]) * child_writes_num);
1323
1324                         /* Child wrote it already. */
1325                         trace("write %s:%i on fd %i already done by child\n",
1326                               file, line, fd);
1327                         p->u.write.ret = count;
1328                         errno = p->error;
1329                         return p->u.write.ret;
1330                 }
1331
1332                 if (is_file) {
1333                         p->u.write.saved = save_contents(call.opener->u.open.pathname,
1334                                                          fd, count, off,
1335                                                          "being overwritten");
1336                         set_cleanup(p, cleanup_write, struct write_call);
1337                 }
1338
1339                 /* Though off is current seek ptr for write case, we need to
1340                  * move it.  write() does that for us. */
1341                 if (p->u.write.is_pwrite)
1342                         p->u.write.ret = pwrite(fd, buf, count, off);
1343                 else
1344                         p->u.write.ret = write(fd, buf, count);
1345         }
1346         trace("%swrite %s:%i %zu@%llu on fd %i -> %i\n",
1347               p->u.write.is_pwrite ? "p" : "",
1348               file, line, count, (long long)off, fd, p->u.write.ret);
1349         errno = p->error;
1350         return p->u.write.ret;
1351 }
1352
1353 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t offset,
1354                         const char *file, unsigned line)
1355 {
1356         return failtest_add_write(fd, buf, count, offset, true, file, line);
1357 }
1358
1359 ssize_t failtest_write(int fd, const void *buf, size_t count,
1360                        const char *file, unsigned line)
1361 {
1362         return failtest_add_write(fd, buf, count, lseek(fd, 0, SEEK_CUR), false,
1363                                   file, line);
1364 }
1365
1366 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
1367                        const char *file, unsigned line)
1368 {
1369         return failtest_add_read(fd, buf, count, off, true, file, line);
1370 }
1371
1372 ssize_t failtest_read(int fd, void *buf, size_t count,
1373                       const char *file, unsigned line)
1374 {
1375         return failtest_add_read(fd, buf, count, lseek(fd, 0, SEEK_CUR), false,
1376                                  file, line);
1377 }
1378
1379 static struct lock_info *WARN_UNUSED_RESULT
1380 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
1381 {
1382         unsigned int i;
1383         struct lock_info *l;
1384
1385         for (i = 0; i < lock_num; i++) {
1386                 l = &locks[i];
1387
1388                 if (l->fd != fd)
1389                         continue;
1390                 /* Four cases we care about:
1391                  * Start overlap:
1392                  *      l =    |      |
1393                  *      new = |   |
1394                  * Mid overlap:
1395                  *      l =    |      |
1396                  *      new =    |  |
1397                  * End overlap:
1398                  *      l =    |      |
1399                  *      new =      |    |
1400                  * Total overlap:
1401                  *      l =    |      |
1402                  *      new = |         |
1403                  */
1404                 if (start > l->start && end < l->end) {
1405                         /* Mid overlap: trim entry, add new one. */
1406                         off_t new_start, new_end;
1407                         new_start = end + 1;
1408                         new_end = l->end;
1409                         trace("splitting lock on fd %i from %llu-%llu"
1410                               " to %llu-%llu\n",
1411                               fd, (long long)l->start, (long long)l->end,
1412                               (long long)l->start, (long long)start - 1);
1413                         l->end = start - 1;
1414                         locks = add_lock(locks,
1415                                          fd, new_start, new_end, l->type);
1416                         l = &locks[i];
1417                 } else if (start <= l->start && end >= l->end) {
1418                         /* Total overlap: eliminate entry. */
1419                         trace("erasing lock on fd %i %llu-%llu\n",
1420                               fd, (long long)l->start, (long long)l->end);
1421                         l->end = 0;
1422                         l->start = 1;
1423                 } else if (end >= l->start && end < l->end) {
1424                         trace("trimming lock on fd %i from %llu-%llu"
1425                               " to %llu-%llu\n",
1426                               fd, (long long)l->start, (long long)l->end,
1427                               (long long)end + 1, (long long)l->end);
1428                         /* Start overlap: trim entry. */
1429                         l->start = end + 1;
1430                 } else if (start > l->start && start <= l->end) {
1431                         trace("trimming lock on fd %i from %llu-%llu"
1432                               " to %llu-%llu\n",
1433                               fd, (long long)l->start, (long long)l->end,
1434                               (long long)l->start, (long long)start - 1);
1435                         /* End overlap: trim entry. */
1436                         l->end = start-1;
1437                 }
1438                 /* Nothing left?  Remove it. */
1439                 if (l->end < l->start) {
1440                         trace("forgetting lock on fd %i\n", fd);
1441                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
1442                         i--;
1443                 }
1444         }
1445
1446         if (type != F_UNLCK) {
1447                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
1448                 l = &locks[lock_num++];
1449                 l->fd = fd;
1450                 l->start = start;
1451                 l->end = end;
1452                 l->type = type;
1453                 trace("new lock on fd %i %llu-%llu\n",
1454                       fd, (long long)l->start, (long long)l->end);
1455         }
1456         return locks;
1457 }
1458
1459 /* We trap this so we can record it: we don't fail it. */
1460 int failtest_close(int fd, const char *file, unsigned line)
1461 {
1462         struct close_call call;
1463         struct failtest_call *p, *opener;
1464
1465         /* Do this before we add ourselves to history! */
1466         opener = opener_of(fd);
1467
1468         call.fd = fd;
1469         p = add_history(FAILTEST_CLOSE, false, file, line, &call);
1470         p->fail = false;
1471
1472         /* Consume close from failpath (shouldn't tell us to fail). */
1473         if (following_path()) {
1474                 if (follow_path(p))
1475                         abort();
1476         }
1477
1478         trace("close on fd %i\n", fd);
1479         if (fd < 0)
1480                 return close(fd);
1481
1482         /* Mark opener as not leaking, remove its cleanup function. */
1483         if (opener) {
1484                 trace("close on fd %i found opener %p\n", fd, opener);
1485                 if (opener->type == FAILTEST_PIPE) {
1486                         /* From a pipe? */
1487                         if (opener->u.pipe.fds[0] == fd) {
1488                                 assert(!opener->u.pipe.closed[0]);
1489                                 opener->u.pipe.closed[0] = true;
1490                         } else if (opener->u.pipe.fds[1] == fd) {
1491                                 assert(!opener->u.pipe.closed[1]);
1492                                 opener->u.pipe.closed[1] = true;
1493                         } else
1494                                 abort();
1495                         opener->can_leak = (!opener->u.pipe.closed[0]
1496                                             || !opener->u.pipe.closed[1]);
1497                 } else if (opener->type == FAILTEST_OPEN) {
1498                         opener->u.open.closed = true;
1499                         opener->can_leak = false;
1500                 } else
1501                         abort();
1502         }
1503
1504         /* Restore offset now, in case parent shared (can't do after close!). */
1505         if (control_fd != -1) {
1506                 struct failtest_call *i;
1507
1508                 tlist_for_each_rev(&history, i, list) {
1509                         if (i == our_history_start)
1510                                 break;
1511                         if (i == opener)
1512                                 break;
1513                         if (i->type == FAILTEST_LSEEK && i->u.lseek.fd == fd) {
1514                                 trace("close on fd %i undoes lseek\n", fd);
1515                                 /* This seeks back. */
1516                                 i->cleanup(&i->u, true);
1517                                 i->cleanup = NULL;
1518                         } else if (i->type == FAILTEST_WRITE
1519                                    && i->u.write.fd == fd
1520                                    && !i->u.write.is_pwrite) {
1521                                 trace("close on fd %i undoes write"
1522                                       " offset change\n", fd);
1523                                 /* Write (not pwrite!) moves file offset! */
1524                                 if (lseek(fd, i->u.write.off, SEEK_SET)
1525                                     != i->u.write.off) {
1526                                         fwarn("Restoring lseek pointer failed (write)");
1527                                 }
1528                         } else if (i->type == FAILTEST_READ
1529                                    && i->u.read.fd == fd) {
1530                                 /* preads don't *have* cleanups */
1531                                 if (i->cleanup) {
1532                                         trace("close on fd %i undoes read"
1533                                               " offset change\n", fd);
1534                                         /* This seeks back. */
1535                                         i->cleanup(&i->u, true);
1536                                         i->cleanup = NULL;
1537                                 }
1538                         }
1539                 }
1540         }
1541
1542         /* Close unlocks everything. */
1543         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
1544         return close(fd);
1545 }
1546
1547 /* Zero length means "to end of file" */
1548 static off_t end_of(off_t start, off_t len)
1549 {
1550         if (len == 0)
1551                 return off_max();
1552         return start + len - 1;
1553 }
1554
1555 /* FIXME: This only handles locks, really. */
1556 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
1557 {
1558         struct failtest_call *p;
1559         struct fcntl_call call;
1560         va_list ap;
1561
1562         call.fd = fd;
1563         call.cmd = cmd;
1564
1565         /* Argument extraction. */
1566         switch (cmd) {
1567         case F_SETFL:
1568         case F_SETFD:
1569                 va_start(ap, cmd);
1570                 call.arg.l = va_arg(ap, long);
1571                 va_end(ap);
1572                 trace("fcntl on fd %i F_SETFL/F_SETFD\n", fd);
1573                 return fcntl(fd, cmd, call.arg.l);
1574         case F_GETFD:
1575         case F_GETFL:
1576                 trace("fcntl on fd %i F_GETFL/F_GETFD\n", fd);
1577                 return fcntl(fd, cmd);
1578         case F_GETLK:
1579                 trace("fcntl on fd %i F_GETLK\n", fd);
1580                 get_locks();
1581                 va_start(ap, cmd);
1582                 call.arg.fl = *va_arg(ap, struct flock *);
1583                 va_end(ap);
1584                 return fcntl(fd, cmd, &call.arg.fl);
1585         case F_SETLK:
1586         case F_SETLKW:
1587                 trace("fcntl on fd %i F_SETLK%s\n",
1588                       fd, cmd == F_SETLKW ? "W" : "");
1589                 va_start(ap, cmd);
1590                 call.arg.fl = *va_arg(ap, struct flock *);
1591                 va_end(ap);
1592                 break;
1593         default:
1594                 /* This means you need to implement it here. */
1595                 err(1, "failtest: unknown fcntl %u", cmd);
1596         }
1597
1598         p = add_history(FAILTEST_FCNTL, false, file, line, &call);
1599
1600         if (should_fail(p)) {
1601                 p->u.fcntl.ret = -1;
1602                 if (p->u.fcntl.cmd == F_SETLK)
1603                         p->error = EAGAIN;
1604                 else
1605                         p->error = EDEADLK;
1606         } else {
1607                 get_locks();
1608                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1609                                        &p->u.fcntl.arg.fl);
1610                 if (p->u.fcntl.ret == -1)
1611                         p->error = errno;
1612                 else {
1613                         /* We don't handle anything else yet. */
1614                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1615                         locks = add_lock(locks,
1616                                          p->u.fcntl.fd,
1617                                          p->u.fcntl.arg.fl.l_start,
1618                                          end_of(p->u.fcntl.arg.fl.l_start,
1619                                                 p->u.fcntl.arg.fl.l_len),
1620                                          p->u.fcntl.arg.fl.l_type);
1621                 }
1622         }
1623         trace("fcntl on fd %i -> %i\n", fd, p->u.fcntl.ret);
1624         errno = p->error;
1625         return p->u.fcntl.ret;
1626 }
1627
1628 static void cleanup_lseek(struct lseek_call *call, bool restore)
1629 {
1630         if (restore) {
1631                 trace("cleaning up lseek on fd %i -> %llu\n",
1632                       call->fd, (long long)call->old_off);
1633                 if (lseek(call->fd, call->old_off, SEEK_SET) != call->old_off)
1634                         fwarn("Restoring lseek pointer failed");
1635         }
1636 }
1637
1638 /* We trap this so we can undo it: we don't fail it. */
1639 off_t failtest_lseek(int fd, off_t offset, int whence, const char *file,
1640                      unsigned int line)
1641 {
1642         struct failtest_call *p;
1643         struct lseek_call call;
1644         call.fd = fd;
1645         call.offset = offset;
1646         call.whence = whence;
1647         call.old_off = lseek(fd, 0, SEEK_CUR);
1648
1649         p = add_history(FAILTEST_LSEEK, false, file, line, &call);
1650         p->fail = false;
1651
1652         /* Consume lseek from failpath. */
1653         if (failpath)
1654                 if (should_fail(p))
1655                         abort();
1656
1657         p->u.lseek.ret = lseek(fd, offset, whence);
1658
1659         if (p->u.lseek.ret != (off_t)-1)
1660                 set_cleanup(p, cleanup_lseek, struct lseek_call);
1661
1662         trace("lseek %s:%u on fd %i from %llu to %llu%s\n",
1663               file, line, fd, (long long)call.old_off, (long long)offset,
1664               whence == SEEK_CUR ? " (from current off)" :
1665               whence == SEEK_END ? " (from end)" :
1666               whence == SEEK_SET ? "" : " (invalid whence)");
1667         return p->u.lseek.ret;
1668 }
1669
1670
1671 pid_t failtest_getpid(const char *file, unsigned line)
1672 {
1673         /* You must call failtest_init first! */
1674         assert(orig_pid);
1675         return orig_pid;
1676 }
1677
1678 void failtest_init(int argc, char *argv[])
1679 {
1680         unsigned int i;
1681
1682         orig_pid = getpid();
1683
1684         warnf = fdopen(move_fd_to_high(dup(STDERR_FILENO)), "w");
1685         for (i = 1; i < argc; i++) {
1686                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1687                         failpath = argv[i] + strlen("--failpath=");
1688                 } else if (strcmp(argv[i], "--trace") == 0) {
1689                         tracef = warnf;
1690                         failtest_timeout_ms = -1;
1691                 } else if (!strncmp(argv[i], "--debugpath=",
1692                                     strlen("--debugpath="))) {
1693                         debugpath = argv[i] + strlen("--debugpath=");
1694                 }
1695         }
1696         failtable_init(&failtable);
1697         start = time_now();
1698 }
1699
1700 bool failtest_has_failed(void)
1701 {
1702         return control_fd != -1;
1703 }
1704
1705 void failtest_exit(int status)
1706 {
1707         trace("failtest_exit with status %i\n", status);
1708         if (failtest_exit_check) {
1709                 if (!failtest_exit_check(&history))
1710                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1711         }
1712
1713         failtest_cleanup(false, status);
1714 }