git.ozlabs.org Git - ccan/blob - ccan/failtest/failtest.c

   1 /* Licensed under LGPL - see LICENSE file for details */
   2 #include <ccan/failtest/failtest.h>
   3 #include <stdarg.h>
   4 #include <string.h>
   5 #include <stdio.h>
   6 #include <stdarg.h>
   7 #include <ctype.h>
   8 #include <err.h>
   9 #include <unistd.h>
  10 #include <poll.h>
  11 #include <errno.h>
  12 #include <sys/types.h>
  13 #include <sys/wait.h>
  14 #include <sys/stat.h>
  15 #include <sys/time.h>
  16 #include <sys/mman.h>
  17 #include <signal.h>
  18 #include <assert.h>
  19 #include <ccan/time/time.h>
  20 #include <ccan/read_write_all/read_write_all.h>
  21 #include <ccan/failtest/failtest_proto.h>
  22 #include <ccan/build_assert/build_assert.h>
  23 #include <ccan/hash/hash.h>
  24 #include <ccan/htable/htable_type.h>
  25 #include <ccan/str/str.h>
  26
  27 enum failtest_result (*failtest_hook)(struct tlist_calls *);
  28
  29 static int tracefd = -1;
  30 static int warnfd;
  31
  32 unsigned int failtest_timeout_ms = 20000;
  33
  34 const char *failpath;
  35 const char *debugpath;
  36
  37 enum info_type {
  38         WRITE,
  39         RELEASE_LOCKS,
  40         FAILURE,
  41         SUCCESS,
  42         UNEXPECTED
  43 };
  44
  45 struct lock_info {
  46         int fd;
  47         /* end is inclusive: you can't have a 0-byte lock. */
  48         off_t start, end;
  49         int type;
  50 };
  51
  52 /* We hash the call location together with its backtrace. */
  53 static size_t hash_call(const struct failtest_call *call)
  54 {
  55         return hash(call->file, strlen(call->file),
  56                     hash(&call->line, 1,
  57                          hash(call->backtrace, call->backtrace_num,
  58                               call->type)));
  59 }
  60
  61 static bool call_eq(const struct failtest_call *call1,
  62                     const struct failtest_call *call2)
  63 {
  64         unsigned int i;
  65
  66         if (strcmp(call1->file, call2->file) != 0
  67             || call1->line != call2->line
  68             || call1->type != call2->type
  69             || call1->backtrace_num != call2->backtrace_num)
  70                 return false;
  71
  72         for (i = 0; i < call1->backtrace_num; i++)
  73                 if (call1->backtrace[i] != call2->backtrace[i])
  74                         return false;
  75
  76         return true;
  77 }
  78
  79 /* Defines struct failtable. */
  80 HTABLE_DEFINE_TYPE(struct failtest_call, (struct failtest_call *), hash_call,
  81                    call_eq, failtable);
  82
  83 bool (*failtest_exit_check)(struct tlist_calls *history);
  84
  85 static struct tlist_calls history = TLIST_INIT(history);
  86 static int control_fd = -1;
  87 static struct timeval start;
  88 static bool probing = false;
  89 static struct failtable failtable;
  90
  91 static struct write_call *child_writes = NULL;
  92 static unsigned int child_writes_num = 0;
  93
  94 static pid_t lock_owner;
  95 static struct lock_info *locks = NULL;
  96 static unsigned int lock_num = 0;
  97
  98 static pid_t orig_pid;
  99
 100 static const char info_to_arg[] = "mceoxprwfa";
 101
 102 /* Dummy call used for failtest_undo wrappers. */
 103 static struct failtest_call unrecorded_call;
 104
 105 #if HAVE_BACKTRACE
 106 #include <execinfo.h>
 107
 108 static void **get_backtrace(unsigned int *num)
 109 {
 110         static unsigned int max_back = 100;
 111         void **ret;
 112
 113 again:
 114         ret = malloc(max_back * sizeof(void *));
 115         *num = backtrace(ret, max_back);
 116         if (*num == max_back) {
 117                 free(ret);
 118                 max_back *= 2;
 119                 goto again;
 120         }
 121         return ret;
 122 }
 123 #else
 124 /* This will test slightly less, since will consider all of the same
 125  * calls as identical.  But, it's slightly faster! */
 126 static void **get_backtrace(unsigned int *num)
 127 {
 128         *num = 0;
 129         return NULL;
 130 }
 131 #endif /* HAVE_BACKTRACE */
 132
 133 static struct failtest_call *add_history_(enum failtest_call_type type,
 134                                           const char *file,
 135                                           unsigned int line,
 136                                           const void *elem,
 137                                           size_t elem_size)
 138 {
 139         struct failtest_call *call;
 140
 141         /* NULL file is how we suppress failure. */
 142         if (!file)
 143                 return &unrecorded_call;
 144
 145         call = malloc(sizeof *call);
 146         call->type = type;
 147         call->file = file;
 148         call->line = line;
 149         call->cleanup = NULL;
 150         call->backtrace = get_backtrace(&call->backtrace_num);
 151         memcpy(&call->u, elem, elem_size);
 152         tlist_add_tail(&history, call, list);
 153         return call;
 154 }
 155
 156 #define add_history(type, file, line, elem) \
 157         add_history_((type), (file), (line), (elem), sizeof(*(elem)))
 158
 159 /* We do a fake call inside a sizeof(), to check types. */
 160 #define set_cleanup(call, clean, type)                  \
 161         (call)->cleanup = (void *)((void)sizeof(clean((type *)NULL),1), (clean))
 162
 163
 164 /* Dup the fd to a high value (out of the way I hope!), and close the old fd. */
 165 static int move_fd_to_high(int fd)
 166 {
 167         int i;
 168
 169         for (i = FD_SETSIZE - 1; i >= 0; i--) {
 170                 if (fcntl(i, F_GETFL) == -1 && errno == EBADF) {
 171                         if (dup2(fd, i) == -1)
 172                                 err(1, "Failed to dup fd %i to %i", fd, i);
 173                         close(fd);
 174                         return i;
 175                 }
 176         }
 177         /* Nothing?  Really?  Er... ok? */
 178         return fd;
 179 }
 180
 181 static bool read_write_info(int fd)
 182 {
 183         struct write_call *w;
 184         char *buf;
 185
 186         /* We don't need all of this, but it's simple. */
 187         child_writes = realloc(child_writes,
 188                                (child_writes_num+1) * sizeof(child_writes[0]));
 189         w = &child_writes[child_writes_num];
 190         if (!read_all(fd, w, sizeof(*w)))
 191                 return false;
 192
 193         w->buf = buf = malloc(w->count);
 194         if (!read_all(fd, buf, w->count))
 195                 return false;
 196
 197         child_writes_num++;
 198         return true;
 199 }
 200
 201 static char *failpath_string(void)
 202 {
 203         struct failtest_call *i;
 204         char *ret = strdup("");
 205         unsigned len = 0;
 206
 207         /* Inefficient, but who cares? */
 208         tlist_for_each(&history, i, list) {
 209                 ret = realloc(ret, len + 2);
 210                 ret[len] = info_to_arg[i->type];
 211                 if (i->fail)
 212                         ret[len] = toupper(ret[len]);
 213                 ret[++len] = '\0';
 214         }
 215         return ret;
 216 }
 217
 218 static void warn_via_fd(int e, const char *fmt, va_list ap)
 219 {
 220         char *p = failpath_string();
 221
 222         vdprintf(warnfd, fmt, ap);
 223         if (e != -1)
 224                 dprintf(warnfd, ": %s", strerror(e));
 225         dprintf(warnfd, " [%s]\n", p);
 226         free(p);
 227 }
 228
 229 static void fwarn(const char *fmt, ...)
 230 {
 231         va_list ap;
 232         int e = errno;
 233
 234         va_start(ap, fmt);
 235         warn_via_fd(e, fmt, ap);
 236         va_end(ap);
 237 }
 238
 239
 240 static void fwarnx(const char *fmt, ...)
 241 {
 242         va_list ap;
 243
 244         va_start(ap, fmt);
 245         warn_via_fd(-1, fmt, ap);
 246         va_end(ap);
 247 }
 248
 249 static void tell_parent(enum info_type type)
 250 {
 251         if (control_fd != -1)
 252                 write_all(control_fd, &type, sizeof(type));
 253 }
 254
 255 static void child_fail(const char *out, size_t outlen, const char *fmt, ...)
 256 {
 257         va_list ap;
 258         char *path = failpath_string();
 259
 260         va_start(ap, fmt);
 261         vfprintf(stderr, fmt, ap);
 262         va_end(ap);
 263
 264         fprintf(stderr, "%.*s", (int)outlen, out);
 265         printf("To reproduce: --failpath=%s\n", path);
 266         free(path);
 267         tell_parent(FAILURE);
 268         exit(1);
 269 }
 270
 271 static void trace(const char *fmt, ...)
 272 {
 273         va_list ap;
 274
 275         if (tracefd == -1)
 276                 return;
 277
 278         va_start(ap, fmt);
 279         vdprintf(tracefd, fmt, ap);
 280         va_end(ap);
 281 }
 282
 283 static pid_t child;
 284
 285 static void hand_down(int signum)
 286 {
 287         kill(child, signum);
 288 }
 289
 290 static void release_locks(void)
 291 {
 292         /* Locks were never acquired/reacquired? */
 293         if (lock_owner == 0)
 294                 return;
 295
 296         /* We own them?  Release them all. */
 297         if (lock_owner == getpid()) {
 298                 unsigned int i;
 299                 struct flock fl;
 300                 fl.l_type = F_UNLCK;
 301                 fl.l_whence = SEEK_SET;
 302                 fl.l_start = 0;
 303                 fl.l_len = 0;
 304
 305                 for (i = 0; i < lock_num; i++)
 306                         fcntl(locks[i].fd, F_SETLK, &fl);
 307         } else {
 308                 /* Our parent must have them; pass request up. */
 309                 enum info_type type = RELEASE_LOCKS;
 310                 assert(control_fd != -1);
 311                 write_all(control_fd, &type, sizeof(type));
 312         }
 313         lock_owner = 0;
 314 }
 315
 316 /* off_t is a signed type.  Getting its max is non-trivial. */
 317 static off_t off_max(void)
 318 {
 319         BUILD_ASSERT(sizeof(off_t) == 4 || sizeof(off_t) == 8);
 320         if (sizeof(off_t) == 4)
 321                 return (off_t)0x7FFFFFF;
 322         else
 323                 return (off_t)0x7FFFFFFFFFFFFFFULL;
 324 }
 325
 326 static void get_locks(void)
 327 {
 328         unsigned int i;
 329         struct flock fl;
 330
 331         if (lock_owner == getpid())
 332                 return;
 333
 334         if (lock_owner != 0) {
 335                 enum info_type type = RELEASE_LOCKS;
 336                 assert(control_fd != -1);
 337                 write_all(control_fd, &type, sizeof(type));
 338         }
 339
 340         fl.l_whence = SEEK_SET;
 341
 342         for (i = 0; i < lock_num; i++) {
 343                 fl.l_type = locks[i].type;
 344                 fl.l_start = locks[i].start;
 345                 if (locks[i].end == off_max())
 346                         fl.l_len = 0;
 347                 else
 348                         fl.l_len = locks[i].end - locks[i].start + 1;
 349
 350                 if (fcntl(locks[i].fd, F_SETLKW, &fl) != 0)
 351                         abort();
 352         }
 353         lock_owner = getpid();
 354 }
 355
 356 struct saved_file {
 357         struct saved_file *next;
 358         int fd;
 359         void *contents;
 360         off_t off, len;
 361 };
 362
 363 static struct saved_file *save_file(struct saved_file *next, int fd)
 364 {
 365         struct saved_file *s = malloc(sizeof(*s));
 366
 367         s->next = next;
 368         s->fd = fd;
 369         s->off = lseek(fd, 0, SEEK_CUR);
 370         /* Special file?  Erk... */
 371         assert(s->off != -1);
 372         s->len = lseek(fd, 0, SEEK_END);
 373         lseek(fd, 0, SEEK_SET);
 374         s->contents = malloc(s->len);
 375         if (read(fd, s->contents, s->len) != s->len)
 376                 err(1, "Failed to save %zu bytes", (size_t)s->len);
 377         lseek(fd, s->off, SEEK_SET);
 378         return s;
 379 }
 380
 381 /* We have little choice but to save and restore open files: mmap means we
 382  * can really intercept changes in the child.
 383  *
 384  * We could do non-mmap'ed files on demand, however. */
 385 static struct saved_file *save_files(void)
 386 {
 387         struct saved_file *files = NULL;
 388         struct failtest_call *i;
 389
 390         /* Figure out the set of live fds. */
 391         tlist_for_each_rev(&history, i, list) {
 392                 if (i->type == FAILTEST_OPEN) {
 393                         int fd = i->u.open.ret;
 394                         /* Only do successful, writable fds. */
 395                         if (fd < 0)
 396                                 continue;
 397
 398                         /* If it was closed, cleanup == NULL. */
 399                         if (!i->cleanup)
 400                                 continue;
 401
 402                         if ((i->u.open.flags & O_RDWR) == O_RDWR) {
 403                                 files = save_file(files, fd);
 404                         } else if ((i->u.open.flags & O_WRONLY)
 405                                    == O_WRONLY) {
 406                                 /* FIXME: Handle O_WRONLY.  Open with O_RDWR? */
 407                                 abort();
 408                         }
 409                 }
 410         }
 411
 412         return files;
 413 }
 414
 415 static void restore_files(struct saved_file *s)
 416 {
 417         while (s) {
 418                 struct saved_file *next = s->next;
 419
 420                 lseek(s->fd, 0, SEEK_SET);
 421                 if (write(s->fd, s->contents, s->len) != s->len)
 422                         err(1, "Failed to restore %zu bytes", (size_t)s->len);
 423                 if (ftruncate(s->fd, s->len) != 0)
 424                         err(1, "Failed to trim file to length %zu",
 425                             (size_t)s->len);
 426                 free(s->contents);
 427                 lseek(s->fd, s->off, SEEK_SET);
 428                 free(s);
 429                 s = next;
 430         }
 431 }
 432
 433 static void free_files(struct saved_file *s)
 434 {
 435         while (s) {
 436                 struct saved_file *next = s->next;
 437                 free(s->contents);
 438                 free(s);
 439                 s = next;
 440         }
 441 }
 442
 443 static void free_call(struct failtest_call *call)
 444 {
 445         /* We don't do this in cleanup: needed even for failed opens. */
 446         if (call->type == FAILTEST_OPEN)
 447                 free((char *)call->u.open.pathname);
 448         free(call->backtrace);
 449         tlist_del_from(&history, call, list);
 450         free(call);
 451 }
 452
 453 /* Free up memory, so valgrind doesn't report leaks. */
 454 static void free_everything(void)
 455 {
 456         struct failtest_call *i;
 457
 458         while ((i = tlist_top(&history, struct failtest_call, list)) != NULL)
 459                 free_call(i);
 460
 461         failtable_clear(&failtable);
 462 }
 463
 464 static NORETURN void failtest_cleanup(bool forced_cleanup, int status)
 465 {
 466         struct failtest_call *i;
 467
 468         /* For children, we don't care if they "failed" the testing. */
 469         if (control_fd != -1)
 470                 status = 0;
 471
 472         if (forced_cleanup) {
 473                 /* We didn't actually do final operation: remove it. */
 474                 i = tlist_tail(&history, struct failtest_call, list);
 475                 free_call(i);
 476         }
 477
 478         /* Cleanup everything, in reverse order. */
 479         tlist_for_each_rev(&history, i, list) {
 480                 if (!i->cleanup)
 481                         continue;
 482                 if (!forced_cleanup) {
 483                         printf("Leak at %s:%u: --failpath=%s\n",
 484                                i->file, i->line, failpath_string());
 485                         status = 1;
 486                 }
 487                 i->cleanup(&i->u);
 488         }
 489
 490         free_everything();
 491         if (status == 0)
 492                 tell_parent(SUCCESS);
 493         else
 494                 tell_parent(FAILURE);
 495         exit(status);
 496 }
 497
 498 static bool following_path(void)
 499 {
 500         if (!failpath)
 501                 return false;
 502         /* + means continue after end, like normal. */
 503         if (*failpath == '+') {
 504                 failpath = NULL;
 505                 return false;
 506         }
 507         return true;
 508 }
 509
 510 static bool follow_path(struct failtest_call *call)
 511 {
 512         if (*failpath == '\0') {
 513                 /* Continue, but don't inject errors. */
 514                 return call->fail = false;
 515         }
 516
 517         if (tolower((unsigned char)*failpath) != info_to_arg[call->type])
 518                 errx(1, "Failpath expected '%s' got '%c'\n",
 519                      failpath, info_to_arg[call->type]);
 520         call->fail = cisupper(*(failpath++));
 521         return call->fail;
 522 }
 523
 524 static bool should_fail(struct failtest_call *call)
 525 {
 526         int status;
 527         int control[2], output[2];
 528         enum info_type type = UNEXPECTED;
 529         char *out = NULL;
 530         size_t outlen = 0;
 531         struct saved_file *files;
 532         struct failtest_call *dup;
 533
 534         if (call == &unrecorded_call)
 535                 return false;
 536
 537         if (following_path())
 538                 return follow_path(call);
 539
 540         /* Attach debugger if they asked for it. */
 541         if (debugpath) {
 542                 char *path;
 543
 544                 /* Pretend this last call matches whatever path wanted:
 545                  * keeps valgrind happy. */
 546                 call->fail = cisupper(debugpath[strlen(debugpath)-1]);
 547                 path = failpath_string();
 548
 549                 if (streq(path, debugpath)) {
 550                         char str[80];
 551
 552                         /* Don't timeout. */
 553                         signal(SIGUSR1, SIG_IGN);
 554                         sprintf(str, "xterm -e gdb /proc/%d/exe %d &",
 555                                 getpid(), getpid());
 556                         if (system(str) == 0)
 557                                 sleep(5);
 558                 } else {
 559                         /* Ignore last character: could be upper or lower. */
 560                         path[strlen(path)-1] = '\0';
 561                         if (!strstarts(debugpath, path)) {
 562                                 fprintf(stderr,
 563                                         "--debugpath not followed: %s\n", path);
 564                                 debugpath = NULL;
 565                         }
 566                 }
 567                 free(path);
 568         }
 569
 570         /* Are we probing?  If so, we never fail twice. */
 571         if (probing)
 572                 return call->fail = false;
 573
 574         /* Don't more than once in the same place. */
 575         dup = failtable_get(&failtable, call);
 576         if (dup)
 577                 return call->fail = false;
 578
 579         if (failtest_hook) {
 580                 switch (failtest_hook(&history)) {
 581                 case FAIL_OK:
 582                         break;
 583                 case FAIL_PROBE:
 584                         probing = true;
 585                         break;
 586                 case FAIL_DONT_FAIL:
 587                         call->fail = false;
 588                         return false;
 589                 default:
 590                         abort();
 591                 }
 592         }
 593
 594         /* Add it to our table of calls. */
 595         failtable_add(&failtable, call);
 596
 597         files = save_files();
 598
 599         /* We're going to fail in the child. */
 600         call->fail = true;
 601         if (pipe(control) != 0 || pipe(output) != 0)
 602                 err(1, "opening pipe");
 603
 604         /* Prevent double-printing (in child and parent) */
 605         fflush(stdout);
 606         child = fork();
 607         if (child == -1)
 608                 err(1, "forking failed");
 609
 610         if (child == 0) {
 611                 if (tracefd != -1) {
 612                         struct timeval diff;
 613                         const char *p;
 614                         char *failpath;
 615                         struct failtest_call *c;
 616
 617                         c = tlist_tail(&history, struct failtest_call, list);
 618                         diff = time_sub(time_now(), start);
 619                         failpath = failpath_string();
 620                         trace("%u->%u (%u.%02u): %s (", getppid(), getpid(),
 621                               (int)diff.tv_sec, (int)diff.tv_usec / 10000,
 622                               failpath);
 623                         free(failpath);
 624                         p = strrchr(c->file, '/');
 625                         if (p)
 626                                 trace("%s", p+1);
 627                         else
 628                                 trace("%s", c->file);
 629                         trace(":%u)\n", c->line);
 630                 }
 631                 close(control[0]);
 632                 close(output[0]);
 633                 dup2(output[1], STDOUT_FILENO);
 634                 dup2(output[1], STDERR_FILENO);
 635                 if (output[1] != STDOUT_FILENO && output[1] != STDERR_FILENO)
 636                         close(output[1]);
 637                 control_fd = move_fd_to_high(control[1]);
 638                 /* Valgrind spots the leak if we don't free these. */
 639                 free_files(files);
 640                 return true;
 641         }
 642
 643         signal(SIGUSR1, hand_down);
 644
 645         close(control[1]);
 646         close(output[1]);
 647
 648         /* We grab output so we can display it; we grab writes so we
 649          * can compare. */
 650         do {
 651                 struct pollfd pfd[2];
 652                 int ret;
 653
 654                 pfd[0].fd = output[0];
 655                 pfd[0].events = POLLIN|POLLHUP;
 656                 pfd[1].fd = control[0];
 657                 pfd[1].events = POLLIN|POLLHUP;
 658
 659                 if (type == SUCCESS)
 660                         ret = poll(pfd, 1, failtest_timeout_ms);
 661                 else
 662                         ret = poll(pfd, 2, failtest_timeout_ms);
 663
 664                 if (ret == 0)
 665                         hand_down(SIGUSR1);
 666                 if (ret < 0) {
 667                         if (errno == EINTR)
 668                                 continue;
 669                         err(1, "Poll returned %i", ret);
 670                 }
 671
 672                 if (pfd[0].revents & POLLIN) {
 673                         ssize_t len;
 674
 675                         out = realloc(out, outlen + 8192);
 676                         len = read(output[0], out + outlen, 8192);
 677                         outlen += len;
 678                 } else if (type != SUCCESS && (pfd[1].revents & POLLIN)) {
 679                         if (read_all(control[0], &type, sizeof(type))) {
 680                                 if (type == WRITE) {
 681                                         if (!read_write_info(control[0]))
 682                                                 break;
 683                                 } else if (type == RELEASE_LOCKS) {
 684                                         release_locks();
 685                                         /* FIXME: Tell them we're done... */
 686                                 }
 687                         }
 688                 } else if (pfd[0].revents & POLLHUP) {
 689                         break;
 690                 }
 691         } while (type != FAILURE);
 692
 693         close(output[0]);
 694         close(control[0]);
 695         waitpid(child, &status, 0);
 696         if (!WIFEXITED(status)) {
 697                 if (WTERMSIG(status) == SIGUSR1)
 698                         child_fail(out, outlen, "Timed out");
 699                 else
 700                         child_fail(out, outlen, "Killed by signal %u: ",
 701                                    WTERMSIG(status));
 702         }
 703         /* Child printed failure already, just pass up exit code. */
 704         if (type == FAILURE) {
 705                 fprintf(stderr, "%.*s", (int)outlen, out);
 706                 tell_parent(type);
 707                 exit(WEXITSTATUS(status) ? WEXITSTATUS(status) : 1);
 708         }
 709         if (WEXITSTATUS(status) != 0)
 710                 child_fail(out, outlen, "Exited with status %i: ",
 711                            WEXITSTATUS(status));
 712
 713         free(out);
 714         signal(SIGUSR1, SIG_DFL);
 715
 716         restore_files(files);
 717
 718         /* Only child does probe. */
 719         probing = false;
 720
 721         /* We continue onwards without failing. */
 722         call->fail = false;
 723         return false;
 724 }
 725
 726 static void cleanup_calloc(struct calloc_call *call)
 727 {
 728         free(call->ret);
 729 }
 730
 731 void *failtest_calloc(size_t nmemb, size_t size,
 732                       const char *file, unsigned line)
 733 {
 734         struct failtest_call *p;
 735         struct calloc_call call;
 736         call.nmemb = nmemb;
 737         call.size = size;
 738         p = add_history(FAILTEST_CALLOC, file, line, &call);
 739
 740         if (should_fail(p)) {
 741                 p->u.calloc.ret = NULL;
 742                 p->error = ENOMEM;
 743         } else {
 744                 p->u.calloc.ret = calloc(nmemb, size);
 745                 set_cleanup(p, cleanup_calloc, struct calloc_call);
 746         }
 747         errno = p->error;
 748         return p->u.calloc.ret;
 749 }
 750
 751 static void cleanup_malloc(struct malloc_call *call)
 752 {
 753         free(call->ret);
 754 }
 755
 756 void *failtest_malloc(size_t size, const char *file, unsigned line)
 757 {
 758         struct failtest_call *p;
 759         struct malloc_call call;
 760         call.size = size;
 761
 762         p = add_history(FAILTEST_MALLOC, file, line, &call);
 763         if (should_fail(p)) {
 764                 p->u.malloc.ret = NULL;
 765                 p->error = ENOMEM;
 766         } else {
 767                 p->u.malloc.ret = malloc(size);
 768                 set_cleanup(p, cleanup_malloc, struct malloc_call);
 769         }
 770         errno = p->error;
 771         return p->u.malloc.ret;
 772 }
 773
 774 static void cleanup_realloc(struct realloc_call *call)
 775 {
 776         free(call->ret);
 777 }
 778
 779 /* Walk back and find out if we got this ptr from a previous routine. */
 780 static void fixup_ptr_history(void *ptr)
 781 {
 782         struct failtest_call *i;
 783
 784         /* Start at end of history, work back. */
 785         tlist_for_each_rev(&history, i, list) {
 786                 switch (i->type) {
 787                 case FAILTEST_REALLOC:
 788                         if (i->u.realloc.ret == ptr) {
 789                                 i->cleanup = NULL;
 790                                 return;
 791                         }
 792                         break;
 793                 case FAILTEST_MALLOC:
 794                         if (i->u.malloc.ret == ptr) {
 795                                 i->cleanup = NULL;
 796                                 return;
 797                         }
 798                         break;
 799                 case FAILTEST_CALLOC:
 800                         if (i->u.calloc.ret == ptr) {
 801                                 i->cleanup = NULL;
 802                                 return;
 803                         }
 804                         break;
 805                 default:
 806                         break;
 807                 }
 808         }
 809 }
 810
 811 void *failtest_realloc(void *ptr, size_t size, const char *file, unsigned line)
 812 {
 813         struct failtest_call *p;
 814         struct realloc_call call;
 815         call.size = size;
 816         p = add_history(FAILTEST_REALLOC, file, line, &call);
 817
 818         /* FIXME: Try one child moving allocation, one not. */
 819         if (should_fail(p)) {
 820                 p->u.realloc.ret = NULL;
 821                 p->error = ENOMEM;
 822         } else {
 823                 /* Don't catch this one in the history fixup... */
 824                 p->u.realloc.ret = NULL;
 825                 fixup_ptr_history(ptr);
 826                 p->u.realloc.ret = realloc(ptr, size);
 827                 set_cleanup(p, cleanup_realloc, struct realloc_call);
 828         }
 829         errno = p->error;
 830         return p->u.realloc.ret;
 831 }
 832
 833 void failtest_free(void *ptr)
 834 {
 835         fixup_ptr_history(ptr);
 836         free(ptr);
 837 }
 838
 839 static void cleanup_open(struct open_call *call)
 840 {
 841         close(call->ret);
 842 }
 843
 844 int failtest_open(const char *pathname,
 845                   const char *file, unsigned line, ...)
 846 {
 847         struct failtest_call *p;
 848         struct open_call call;
 849         va_list ap;
 850
 851         call.pathname = strdup(pathname);
 852         va_start(ap, line);
 853         call.flags = va_arg(ap, int);
 854         if (call.flags & O_CREAT) {
 855                 call.mode = va_arg(ap, int);
 856                 va_end(ap);
 857         }
 858         p = add_history(FAILTEST_OPEN, file, line, &call);
 859         /* Avoid memory leak! */
 860         if (p == &unrecorded_call)
 861                 free((char *)call.pathname);
 862         p->u.open.ret = open(pathname, call.flags, call.mode);
 863
 864         if (p->u.open.ret == -1) {
 865                 if (following_path())
 866                         follow_path(p);
 867                 p->fail = false;
 868                 p->error = errno;
 869         } else if (should_fail(p)) {
 870                 close(p->u.open.ret);
 871                 p->u.open.ret = -1;
 872                 /* FIXME: Play with error codes? */
 873                 p->error = EACCES;
 874         } else {
 875                 set_cleanup(p, cleanup_open, struct open_call);
 876         }
 877         errno = p->error;
 878         return p->u.open.ret;
 879 }
 880
 881 void *failtest_mmap(void *addr, size_t length, int prot, int flags,
 882                     int fd, off_t offset, const char *file, unsigned line)
 883 {
 884         struct failtest_call *p;
 885         struct mmap_call call;
 886
 887         call.addr = addr;
 888         call.length = length;
 889         call.prot = prot;
 890         call.flags = flags;
 891         call.offset = offset;
 892         call.fd = fd;
 893
 894         p = add_history(FAILTEST_MMAP, file, line, &call);
 895         if (should_fail(p)) {
 896                 p->u.mmap.ret = MAP_FAILED;
 897                 p->error = ENOMEM;
 898         } else {
 899                 p->u.mmap.ret = mmap(addr, length, prot, flags, fd, offset);
 900         }
 901         errno = p->error;
 902         return p->u.mmap.ret;
 903 }
 904
 905 static void cleanup_pipe(struct pipe_call *call)
 906 {
 907         if (!call->closed[0])
 908                 close(call->fds[0]);
 909         if (!call->closed[1])
 910                 close(call->fds[1]);
 911 }
 912
 913 int failtest_pipe(int pipefd[2], const char *file, unsigned line)
 914 {
 915         struct failtest_call *p;
 916         struct pipe_call call;
 917
 918         p = add_history(FAILTEST_PIPE, file, line, &call);
 919         if (should_fail(p)) {
 920                 p->u.open.ret = -1;
 921                 /* FIXME: Play with error codes? */
 922                 p->error = EMFILE;
 923         } else {
 924                 p->u.pipe.ret = pipe(p->u.pipe.fds);
 925                 p->u.pipe.closed[0] = p->u.pipe.closed[1] = false;
 926                 set_cleanup(p, cleanup_pipe, struct pipe_call);
 927         }
 928         /* This causes valgrind to notice if they use pipefd[] after failure */
 929         memcpy(pipefd, p->u.pipe.fds, sizeof(p->u.pipe.fds));
 930         errno = p->error;
 931         return p->u.pipe.ret;
 932 }
 933
 934 ssize_t failtest_pread(int fd, void *buf, size_t count, off_t off,
 935                        const char *file, unsigned line)
 936 {
 937         struct failtest_call *p;
 938         struct read_call call;
 939         call.fd = fd;
 940         call.buf = buf;
 941         call.count = count;
 942         call.off = off;
 943         p = add_history(FAILTEST_READ, file, line, &call);
 944
 945         /* FIXME: Try partial read returns. */
 946         if (should_fail(p)) {
 947                 p->u.read.ret = -1;
 948                 p->error = EIO;
 949         } else {
 950                 p->u.read.ret = pread(fd, buf, count, off);
 951         }
 952         errno = p->error;
 953         return p->u.read.ret;
 954 }
 955
 956 ssize_t failtest_pwrite(int fd, const void *buf, size_t count, off_t off,
 957                         const char *file, unsigned line)
 958 {
 959         struct failtest_call *p;
 960         struct write_call call;
 961
 962         call.fd = fd;
 963         call.buf = buf;
 964         call.count = count;
 965         call.off = off;
 966         p = add_history(FAILTEST_WRITE, file, line, &call);
 967
 968         /* If we're a child, we need to make sure we write the same thing
 969          * to non-files as the parent does, so tell it. */
 970         if (control_fd != -1 && off == (off_t)-1) {
 971                 enum info_type type = WRITE;
 972
 973                 write_all(control_fd, &type, sizeof(type));
 974                 write_all(control_fd, &p->u.write, sizeof(p->u.write));
 975                 write_all(control_fd, buf, count);
 976         }
 977
 978         /* FIXME: Try partial write returns. */
 979         if (should_fail(p)) {
 980                 p->u.write.ret = -1;
 981                 p->error = EIO;
 982         } else {
 983                 /* FIXME: We assume same write order in parent and child */
 984                 if (off == (off_t)-1 && child_writes_num != 0) {
 985                         if (child_writes[0].fd != fd)
 986                                 errx(1, "Child wrote to fd %u, not %u?",
 987                                      child_writes[0].fd, fd);
 988                         if (child_writes[0].off != p->u.write.off)
 989                                 errx(1, "Child wrote to offset %zu, not %zu?",
 990                                      (size_t)child_writes[0].off,
 991                                      (size_t)p->u.write.off);
 992                         if (child_writes[0].count != count)
 993                                 errx(1, "Child wrote length %zu, not %zu?",
 994                                      child_writes[0].count, count);
 995                         if (memcmp(child_writes[0].buf, buf, count)) {
 996                                 child_fail(NULL, 0,
 997                                            "Child wrote differently to"
 998                                            " fd %u than we did!\n", fd);
 999                         }
1000                         free((char *)child_writes[0].buf);
1001                         child_writes_num--;
1002                         memmove(&child_writes[0], &child_writes[1],
1003                                 sizeof(child_writes[0]) * child_writes_num);
1004
1005                         /* Is this is a socket or pipe, child wrote it
1006                            already. */
1007                         if (p->u.write.off == (off_t)-1) {
1008                                 p->u.write.ret = count;
1009                                 errno = p->error;
1010                                 return p->u.write.ret;
1011                         }
1012                 }
1013                 p->u.write.ret = pwrite(fd, buf, count, off);
1014         }
1015         errno = p->error;
1016         return p->u.write.ret;
1017 }
1018
1019 ssize_t failtest_read(int fd, void *buf, size_t count,
1020                       const char *file, unsigned line)
1021 {
1022         return failtest_pread(fd, buf, count, lseek(fd, 0, SEEK_CUR),
1023                               file, line);
1024 }
1025
1026 ssize_t failtest_write(int fd, const void *buf, size_t count,
1027                        const char *file, unsigned line)
1028 {
1029         return failtest_pwrite(fd, buf, count, lseek(fd, 0, SEEK_CUR),
1030                                file, line);
1031 }
1032
1033 static struct lock_info *WARN_UNUSED_RESULT
1034 add_lock(struct lock_info *locks, int fd, off_t start, off_t end, int type)
1035 {
1036         unsigned int i;
1037         struct lock_info *l;
1038
1039         for (i = 0; i < lock_num; i++) {
1040                 l = &locks[i];
1041
1042                 if (l->fd != fd)
1043                         continue;
1044                 /* Four cases we care about:
1045                  * Start overlap:
1046                  *      l =    |      |
1047                  *      new = |   |
1048                  * Mid overlap:
1049                  *      l =    |      |
1050                  *      new =    |  |
1051                  * End overlap:
1052                  *      l =    |      |
1053                  *      new =      |    |
1054                  * Total overlap:
1055                  *      l =    |      |
1056                  *      new = |         |
1057                  */
1058                 if (start > l->start && end < l->end) {
1059                         /* Mid overlap: trim entry, add new one. */
1060                         off_t new_start, new_end;
1061                         new_start = end + 1;
1062                         new_end = l->end;
1063                         l->end = start - 1;
1064                         locks = add_lock(locks,
1065                                          fd, new_start, new_end, l->type);
1066                         l = &locks[i];
1067                 } else if (start <= l->start && end >= l->end) {
1068                         /* Total overlap: eliminate entry. */
1069                         l->end = 0;
1070                         l->start = 1;
1071                 } else if (end >= l->start && end < l->end) {
1072                         /* Start overlap: trim entry. */
1073                         l->start = end + 1;
1074                 } else if (start > l->start && start <= l->end) {
1075                         /* End overlap: trim entry. */
1076                         l->end = start-1;
1077                 }
1078                 /* Nothing left?  Remove it. */
1079                 if (l->end < l->start) {
1080                         memmove(l, l + 1, (--lock_num - i) * sizeof(l[0]));
1081                         i--;
1082                 }
1083         }
1084
1085         if (type != F_UNLCK) {
1086                 locks = realloc(locks, (lock_num + 1) * sizeof(*locks));
1087                 l = &locks[lock_num++];
1088                 l->fd = fd;
1089                 l->start = start;
1090                 l->end = end;
1091                 l->type = type;
1092         }
1093         return locks;
1094 }
1095
1096 /* We trap this so we can record it: we don't fail it. */
1097 int failtest_close(int fd, const char *file, unsigned line)
1098 {
1099         struct failtest_call *i;
1100         struct close_call call;
1101         struct failtest_call *p;
1102
1103         call.fd = fd;
1104         p = add_history(FAILTEST_CLOSE, file, line, &call);
1105         p->fail = false;
1106
1107         /* Consume close from failpath (shouldn't tell us to fail). */
1108         if (following_path()) {
1109                 if (follow_path(p))
1110                         abort();
1111         }
1112
1113         if (fd < 0)
1114                 return close(fd);
1115
1116         /* Trace history to find source of fd. */
1117         tlist_for_each_rev(&history, i, list) {
1118                 switch (i->type) {
1119                 case FAILTEST_PIPE:
1120                         /* From a pipe? */
1121                         if (i->u.pipe.fds[0] == fd) {
1122                                 assert(!i->u.pipe.closed[0]);
1123                                 i->u.pipe.closed[0] = true;
1124                                 if (i->u.pipe.closed[1])
1125                                         i->cleanup = NULL;
1126                                 goto out;
1127                         }
1128                         if (i->u.pipe.fds[1] == fd) {
1129                                 assert(!i->u.pipe.closed[1]);
1130                                 i->u.pipe.closed[1] = true;
1131                                 if (i->u.pipe.closed[0])
1132                                         i->cleanup = NULL;
1133                                 goto out;
1134                         }
1135                         break;
1136                 case FAILTEST_OPEN:
1137                         if (i->u.open.ret == fd) {
1138                                 assert((void *)i->cleanup
1139                                        == (void *)cleanup_open);
1140                                 i->cleanup = NULL;
1141                                 goto out;
1142                         }
1143                         break;
1144                 default:
1145                         break;
1146                 }
1147         }
1148
1149 out:
1150         locks = add_lock(locks, fd, 0, off_max(), F_UNLCK);
1151         return close(fd);
1152 }
1153
1154 /* Zero length means "to end of file" */
1155 static off_t end_of(off_t start, off_t len)
1156 {
1157         if (len == 0)
1158                 return off_max();
1159         return start + len - 1;
1160 }
1161
1162 /* FIXME: This only handles locks, really. */
1163 int failtest_fcntl(int fd, const char *file, unsigned line, int cmd, ...)
1164 {
1165         struct failtest_call *p;
1166         struct fcntl_call call;
1167         va_list ap;
1168
1169         call.fd = fd;
1170         call.cmd = cmd;
1171
1172         /* Argument extraction. */
1173         switch (cmd) {
1174         case F_SETFL:
1175         case F_SETFD:
1176                 va_start(ap, cmd);
1177                 call.arg.l = va_arg(ap, long);
1178                 va_end(ap);
1179                 return fcntl(fd, cmd, call.arg.l);
1180         case F_GETFD:
1181         case F_GETFL:
1182                 return fcntl(fd, cmd);
1183         case F_GETLK:
1184                 get_locks();
1185                 va_start(ap, cmd);
1186                 call.arg.fl = *va_arg(ap, struct flock *);
1187                 va_end(ap);
1188                 return fcntl(fd, cmd, &call.arg.fl);
1189         case F_SETLK:
1190         case F_SETLKW:
1191                 va_start(ap, cmd);
1192                 call.arg.fl = *va_arg(ap, struct flock *);
1193                 va_end(ap);
1194                 break;
1195         default:
1196                 /* This means you need to implement it here. */
1197                 err(1, "failtest: unknown fcntl %u", cmd);
1198         }
1199
1200         p = add_history(FAILTEST_FCNTL, file, line, &call);
1201
1202         if (should_fail(p)) {
1203                 p->u.fcntl.ret = -1;
1204                 if (p->u.fcntl.cmd == F_SETLK)
1205                         p->error = EAGAIN;
1206                 else
1207                         p->error = EDEADLK;
1208         } else {
1209                 get_locks();
1210                 p->u.fcntl.ret = fcntl(p->u.fcntl.fd, p->u.fcntl.cmd,
1211                                        &p->u.fcntl.arg.fl);
1212                 if (p->u.fcntl.ret == -1)
1213                         p->error = errno;
1214                 else {
1215                         /* We don't handle anything else yet. */
1216                         assert(p->u.fcntl.arg.fl.l_whence == SEEK_SET);
1217                         locks = add_lock(locks,
1218                                          p->u.fcntl.fd,
1219                                          p->u.fcntl.arg.fl.l_start,
1220                                          end_of(p->u.fcntl.arg.fl.l_start,
1221                                                 p->u.fcntl.arg.fl.l_len),
1222                                          p->u.fcntl.arg.fl.l_type);
1223                 }
1224         }
1225         errno = p->error;
1226         return p->u.fcntl.ret;
1227 }
1228
1229 pid_t failtest_getpid(const char *file, unsigned line)
1230 {
1231         /* You must call failtest_init first! */
1232         assert(orig_pid);
1233         return orig_pid;
1234 }
1235
1236 void failtest_init(int argc, char *argv[])
1237 {
1238         unsigned int i;
1239
1240         orig_pid = getpid();
1241
1242         warnfd = move_fd_to_high(dup(STDERR_FILENO));
1243         for (i = 1; i < argc; i++) {
1244                 if (!strncmp(argv[i], "--failpath=", strlen("--failpath="))) {
1245                         failpath = argv[i] + strlen("--failpath=");
1246                 } else if (strcmp(argv[i], "--tracepath") == 0) {
1247                         tracefd = warnfd;
1248                         failtest_timeout_ms = -1;
1249                 } else if (!strncmp(argv[i], "--debugpath=",
1250                                     strlen("--debugpath="))) {
1251                         debugpath = argv[i] + strlen("--debugpath=");
1252                 }
1253         }
1254         failtable_init(&failtable);
1255         start = time_now();
1256 }
1257
1258 bool failtest_has_failed(void)
1259 {
1260         return control_fd != -1;
1261 }
1262
1263 void failtest_exit(int status)
1264 {
1265         if (failtest_exit_check) {
1266                 if (!failtest_exit_check(&history))
1267                         child_fail(NULL, 0, "failtest_exit_check failed\n");
1268         }
1269
1270         failtest_cleanup(false, status);
1271 }