git.ozlabs.org Git - ccan/blob - ccan/tdb/tools/replay_trace.c

   1 #include <ccan/tdb/tdb.h>
   2 #include <ccan/grab_file/grab_file.h>
   3 #include <ccan/hash/hash.h>
   4 #include <ccan/talloc/talloc.h>
   5 #include <ccan/str_talloc/str_talloc.h>
   6 #include <ccan/str/str.h>
   7 #include <ccan/list/list.h>
   8 #include <err.h>
   9 #include <ctype.h>
  10 #include <string.h>
  11 #include <unistd.h>
  12 #include <sys/types.h>
  13 #include <sys/wait.h>
  14 #include <sys/time.h>
  15 #include <errno.h>
  16 #include <signal.h>
  17 #include <assert.h>
  18
  19 #define STRINGIFY2(x) #x
  20 #define STRINGIFY(x) STRINGIFY2(x)
  21
  22 /* Avoid mod by zero */
  23 static unsigned int total_keys = 1;
  24
  25 #define DEBUG_DEPS 1
  26
  27 /* Traversals block transactions in the current implementation. */
  28 #define TRAVERSALS_TAKE_TRANSACTION_LOCK 1
  29
  30 struct pipe {
  31         int fd[2];
  32 };
  33 static struct pipe *pipes;
  34
  35 static void __attribute__((noreturn)) fail(const char *filename,
  36                                            unsigned int line,
  37                                            const char *fmt, ...)
  38 {
  39         va_list ap;
  40
  41         va_start(ap, fmt);
  42         fprintf(stderr, "%s:%u: FAIL: ", filename, line);
  43         vfprintf(stderr, fmt, ap);
  44         fprintf(stderr, "\n");
  45         va_end(ap);
  46         exit(1);
  47 }
  48
  49 /* Try or die. */
  50 #define try(expr, expect)                                               \
  51         do {                                                            \
  52                 int ret = (expr);                                       \
  53                 if (ret != (expect))                                    \
  54                         fail(filename[file], i+1,                       \
  55                              STRINGIFY(expr) "= %i", ret);              \
  56         } while (0)
  57
  58 /* Try or imitate results. */
  59 #define unreliable(expr, expect, force, undo)                           \
  60         do {                                                            \
  61                 int ret = expr;                                         \
  62                 if (ret != expect) {                                    \
  63                         fprintf(stderr, "%s:%u: %s gave %i not %i",     \
  64                                 filename[file], i+1, STRINGIFY(expr),   \
  65                                 ret, expect);                           \
  66                         if (expect == 0)                                \
  67                                 force;                                  \
  68                         else                                            \
  69                                 undo;                                   \
  70                 }                                                       \
  71         } while (0)
  72
  73 static bool key_eq(TDB_DATA a, TDB_DATA b)
  74 {
  75         if (a.dsize != b.dsize)
  76                 return false;
  77         return memcmp(a.dptr, b.dptr, a.dsize) == 0;
  78 }
  79
  80 /* This is based on the hash algorithm from gdbm */
  81 static unsigned int hash_key(TDB_DATA *key)
  82 {
  83         uint32_t value; /* Used to compute the hash value.  */
  84         uint32_t   i;   /* Used to cycle through random values. */
  85
  86         /* Set the initial value from the key size. */
  87         for (value = 0x238F13AF ^ key->dsize, i=0; i < key->dsize; i++)
  88                 value = (value + (key->dptr[i] << (i*5 % 24)));
  89
  90         return (1103515243 * value + 12345);
  91 }
  92
  93 enum op_type {
  94         OP_TDB_LOCKALL,
  95         OP_TDB_LOCKALL_MARK,
  96         OP_TDB_LOCKALL_UNMARK,
  97         OP_TDB_LOCKALL_NONBLOCK,
  98         OP_TDB_UNLOCKALL,
  99         OP_TDB_LOCKALL_READ,
 100         OP_TDB_LOCKALL_READ_NONBLOCK,
 101         OP_TDB_UNLOCKALL_READ,
 102         OP_TDB_CHAINLOCK,
 103         OP_TDB_CHAINLOCK_NONBLOCK,
 104         OP_TDB_CHAINLOCK_MARK,
 105         OP_TDB_CHAINLOCK_UNMARK,
 106         OP_TDB_CHAINUNLOCK,
 107         OP_TDB_CHAINLOCK_READ,
 108         OP_TDB_CHAINUNLOCK_READ,
 109         OP_TDB_PARSE_RECORD,
 110         OP_TDB_EXISTS,
 111         OP_TDB_STORE,
 112         OP_TDB_APPEND,
 113         OP_TDB_GET_SEQNUM,
 114         OP_TDB_WIPE_ALL,
 115         OP_TDB_TRANSACTION_START,
 116         OP_TDB_TRANSACTION_CANCEL,
 117         OP_TDB_TRANSACTION_COMMIT,
 118         OP_TDB_TRAVERSE_READ_START,
 119         OP_TDB_TRAVERSE_START,
 120         OP_TDB_TRAVERSE_END,
 121         OP_TDB_TRAVERSE,
 122         OP_TDB_FIRSTKEY,
 123         OP_TDB_NEXTKEY,
 124         OP_TDB_FETCH,
 125         OP_TDB_DELETE,
 126 };
 127
 128 struct op {
 129         unsigned int serial;
 130         enum op_type op;
 131         TDB_DATA key;
 132         TDB_DATA data;
 133         int ret;
 134
 135         /* Who is waiting for us? */
 136         struct list_head post;
 137         /* What are we waiting for? */
 138         struct list_head pre;
 139
 140         /* If I'm part of a group (traverse/transaction) where is
 141          * start?  (Otherwise, 0) */
 142         unsigned int group_start;
 143
 144         union {
 145                 int flag; /* open and store */
 146                 struct {  /* append */
 147                         TDB_DATA pre;
 148                         TDB_DATA post;
 149                 } append;
 150                 unsigned int group_len; /* transaction/traverse start */
 151         };
 152 };
 153
 154 static unsigned char hex_char(const char *filename, unsigned int line, char c)
 155 {
 156         c = toupper(c);
 157         if (c >= 'A' && c <= 'F')
 158                 return c - 'A' + 10;
 159         if (c >= '0' && c <= '9')
 160                 return c - '0';
 161         fail(filename, line, "invalid hex character '%c'", c);
 162 }
 163
 164 /* TDB data is <size>:<%02x>* */
 165 static TDB_DATA make_tdb_data(const void *ctx,
 166                               const char *filename, unsigned int line,
 167                               const char *word)
 168 {
 169         TDB_DATA data;
 170         unsigned int i;
 171         const char *p;
 172
 173         if (streq(word, "NULL"))
 174                 return tdb_null;
 175
 176         data.dsize = atoi(word);
 177         data.dptr = talloc_array(ctx, unsigned char, data.dsize);
 178         p = strchr(word, ':');
 179         if (!p)
 180                 fail(filename, line, "invalid tdb data '%s'", word);
 181         p++;
 182         for (i = 0; i < data.dsize; i++)
 183                 data.dptr[i] = hex_char(filename, line, p[i*2])*16
 184                         + hex_char(filename, line, p[i*2+1]);
 185
 186         return data;
 187 }
 188
 189 static void add_op(const char *filename, struct op **op, unsigned int i,
 190                    unsigned int serial, enum op_type type)
 191 {
 192         struct op *new;
 193         *op = talloc_realloc(NULL, *op, struct op, i+1);
 194         new = (*op) + i;
 195         new->op = type;
 196         new->serial = serial;
 197         new->ret = 0;
 198         new->group_start = 0;
 199 }
 200
 201 static void op_add_nothing(const char *filename,
 202                            struct op op[], unsigned int op_num, char *words[])
 203 {
 204         if (words[2])
 205                 fail(filename, op_num+1, "Expected no arguments");
 206         op[op_num].key = tdb_null;
 207 }
 208
 209 static void op_add_key(const char *filename,
 210                        struct op op[], unsigned int op_num, char *words[])
 211 {
 212         if (words[2] == NULL || words[3])
 213                 fail(filename, op_num+1, "Expected just a key");
 214
 215         op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
 216         if (op[op_num].op != OP_TDB_TRAVERSE)
 217                 total_keys++;
 218 }
 219
 220 static void op_add_key_ret(const char *filename,
 221                            struct op op[], unsigned int op_num, char *words[])
 222 {
 223         if (!words[2] || !words[3] || !words[4] || words[5]
 224             || !streq(words[3], "="))
 225                 fail(filename, op_num+1, "Expected <key> = <ret>");
 226         op[op_num].ret = atoi(words[4]);
 227         op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
 228         /* May only be a unique key if it fails */
 229         if (op[op_num].ret != 0)
 230                 total_keys++;
 231 }
 232
 233 static void op_add_key_data(const char *filename,
 234                             struct op op[], unsigned int op_num, char *words[])
 235 {
 236         if (!words[2] || !words[3] || !words[4] || words[5]
 237             || !streq(words[3], "="))
 238                 fail(filename, op_num+1, "Expected <key> = <data>");
 239         op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
 240         op[op_num].data = make_tdb_data(op, filename, op_num+1, words[4]);
 241         /* May only be a unique key if it fails */
 242         if (!op[op_num].data.dptr)
 243                 total_keys++;
 244 }
 245
 246 /* <serial> tdb_store <rec> <rec> <flag> = <ret> */
 247 static void op_add_store(const char *filename,
 248                          struct op op[], unsigned int op_num, char *words[])
 249 {
 250         if (!words[2] || !words[3] || !words[4] || !words[5] || !words[6]
 251             || words[7] || !streq(words[5], "="))
 252                 fail(filename, op_num+1, "Expect <key> <data> <flag> = <ret>");
 253
 254         op[op_num].flag = strtoul(words[4], NULL, 0);
 255         op[op_num].ret = atoi(words[6]);
 256         op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
 257         op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]);
 258         total_keys++;
 259 }
 260
 261 /* <serial> tdb_append <rec> <rec> = <rec> */
 262 static void op_add_append(const char *filename,
 263                           struct op op[], unsigned int op_num, char *words[])
 264 {
 265         if (!words[2] || !words[3] || !words[4] || !words[5] || words[6]
 266             || !streq(words[4], "="))
 267                 fail(filename, op_num+1, "Expect <key> <data> = <rec>");
 268
 269         op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
 270         op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]);
 271
 272         op[op_num].append.post
 273                 = make_tdb_data(op, filename, op_num+1, words[5]);
 274
 275         /* By subtraction, figure out what previous data was. */
 276         op[op_num].append.pre.dptr = op[op_num].append.post.dptr;
 277         op[op_num].append.pre.dsize
 278                 = op[op_num].append.post.dsize - op[op_num].data.dsize;
 279         total_keys++;
 280 }
 281
 282 /* <serial> tdb_get_seqnum = <ret> */
 283 static void op_add_seqnum(const char *filename,
 284                           struct op op[], unsigned int op_num, char *words[])
 285 {
 286         if (!words[2] || !words[3] || words[4] || !streq(words[2], "="))
 287                 fail(filename, op_num+1, "Expect = <ret>");
 288
 289         op[op_num].key = tdb_null;
 290         op[op_num].ret = atoi(words[3]);
 291 }
 292
 293 static void op_add_traverse(const char *filename,
 294                             struct op op[], unsigned int op_num, char *words[])
 295 {
 296         if (words[2])
 297                 fail(filename, op_num+1, "Expect no arguments");
 298
 299         op[op_num].key = tdb_null;
 300         op[op_num].group_len = 0;
 301 }
 302
 303 static void op_add_transaction(const char *filename, struct op op[],
 304                                unsigned int op_num, char *words[])
 305 {
 306         if (words[2])
 307                 fail(filename, op_num+1, "Expect no arguments");
 308
 309         op[op_num].key = tdb_null;
 310         op[op_num].group_len = 0;
 311 }
 312
 313 static int op_transaction_start(struct op op[], unsigned int op_num)
 314 {
 315         unsigned int i;
 316
 317         for (i = op_num-1; i > 0; i--) {
 318                 if (op[i].op == OP_TDB_TRANSACTION_START && !op[i].group_len)
 319                         return i;
 320         }
 321         return 0;
 322 }
 323
 324 static void op_analyze_transaction(const char *filename,
 325                                    struct op op[], unsigned int op_num,
 326                                    char *words[])
 327 {
 328         unsigned int start, i;
 329
 330         op[op_num].key = tdb_null;
 331
 332         if (words[2])
 333                 fail(filename, op_num+1, "Expect no arguments");
 334
 335         start = op_transaction_start(op, op_num);
 336         if (!start)
 337                 fail(filename, op_num+1, "no transaction start found");
 338
 339         op[start].group_len = op_num - start;
 340
 341         /* This rolls in nested transactions.  I think that's right. */
 342         for (i = start; i <= op_num; i++)
 343                 op[i].group_start = start;
 344 }
 345
 346 struct traverse_hash {
 347         TDB_DATA key;
 348         unsigned int index;
 349 };
 350
 351 static void op_analyze_traverse(const char *filename,
 352                                 struct op op[], unsigned int op_num,
 353                                 char *words[])
 354 {
 355         int i, start;
 356
 357         op[op_num].key = tdb_null;
 358
 359         /* = %u means traverse function terminated. */
 360         if (words[2]) {
 361                 if (!streq(words[2], "=") || !words[3] || words[4])
 362                         fail(filename, op_num+1, "expect = <num>");
 363                 op[op_num].ret = atoi(words[3]);
 364         } else
 365                 op[op_num].ret = 0;
 366
 367         for (i = op_num-1; i >= 0; i--) {
 368                 if (op[i].op != OP_TDB_TRAVERSE_READ_START
 369                     && op[i].op != OP_TDB_TRAVERSE_START)
 370                         continue;
 371                 if (op[i].group_len)
 372                         continue;
 373                 break;
 374         }
 375
 376         if (i < 0)
 377                 fail(filename, op_num+1, "no traversal start found");
 378
 379         start = i;
 380         op[start].group_len = op_num - start;
 381
 382         for (i = start; i <= op_num; i++)
 383                 op[i].group_start = start;
 384 }
 385
 386 /* Keep -Wmissing-declarations happy: */
 387 const struct op_table *
 388 find_keyword (register const char *str, register unsigned int len);
 389
 390 #include "keywords.c"
 391
 392 struct depend {
 393         /* We can have more than one */
 394         struct list_node pre_list;
 395         struct list_node post_list;
 396         unsigned int needs_file;
 397         unsigned int needs_opnum;
 398         unsigned int satisfies_file;
 399         unsigned int satisfies_opnum;
 400 };
 401
 402 static void check_deps(const char *filename, struct op op[], unsigned int num)
 403 {
 404 #ifdef DEBUG_DEPS
 405         unsigned int i;
 406
 407         for (i = 1; i < num; i++)
 408                 if (!list_empty(&op[i].pre))
 409                         fail(filename, i+1, "Still has dependencies");
 410 #endif
 411 }
 412
 413 static void dump_pre(char *filename[], struct op *op[],
 414                      unsigned int file, unsigned int i)
 415 {
 416         struct depend *dep;
 417
 418         printf("%s:%u (%u) still waiting for:\n", filename[file], i+1,
 419                 op[file][i].serial);
 420         list_for_each(&op[file][i].pre, dep, pre_list)
 421                 printf("    %s:%u (%u)\n",
 422                        filename[dep->satisfies_file], dep->satisfies_opnum+1,
 423                        op[dep->satisfies_file][dep->satisfies_opnum].serial);
 424         check_deps(filename[file], op[file], i);
 425 }
 426
 427 /* We simply read/write pointers, since we all are children. */
 428 static void do_pre(struct tdb_context *tdb,
 429                    char *filename[], struct op *op[],
 430                    unsigned int file, int pre_fd, unsigned int i)
 431 {
 432         while (!list_empty(&op[file][i].pre)) {
 433                 struct depend *dep;
 434
 435 #if DEBUG_DEPS
 436                 printf("%s:%u:waiting for pre\n", filename[file], i+1);
 437                 fflush(stdout);
 438 #endif
 439                 alarm(10);
 440                 while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) {
 441                         if (errno == EINTR) {
 442                                 dump_pre(filename, op, file, i);
 443                                 exit(1);
 444                         } else
 445                                 errx(1, "Reading from pipe");
 446                 }
 447                 alarm(0);
 448
 449 #if DEBUG_DEPS
 450                 printf("%s:%u:got pre %u from %s:%u\n", filename[file], i+1,
 451                        dep->needs_opnum+1, filename[dep->satisfies_file],
 452                        dep->satisfies_opnum+1);
 453                 fflush(stdout);
 454 #endif
 455                 /* This could be any op, not just this one. */
 456                 talloc_free(dep);
 457         }
 458 }
 459
 460 static void do_post(char *filename[], struct op *op[],
 461                     unsigned int file, unsigned int i)
 462 {
 463         struct depend *dep;
 464
 465         list_for_each(&op[file][i].post, dep, post_list) {
 466 #if DEBUG_DEPS
 467                 printf("%s:%u:sending to file %s:%u\n", filename[file], i+1,
 468                        filename[dep->needs_file], dep->needs_opnum+1);
 469 #endif
 470                 if (write(pipes[dep->needs_file].fd[1], &dep, sizeof(dep))
 471                     != sizeof(dep))
 472                         err(1, "%s:%u failed to tell file %s",
 473                             filename[file], i+1, filename[dep->needs_file]);
 474         }
 475 }
 476
 477 static int get_len(TDB_DATA key, TDB_DATA data, void *private_data)
 478 {
 479         return data.dsize;
 480 }
 481
 482 static unsigned run_ops(struct tdb_context *tdb,
 483                         int pre_fd,
 484                         char *filename[],
 485                         struct op *op[],
 486                         unsigned int file,
 487                         unsigned int start, unsigned int stop);
 488
 489 struct traverse_info {
 490         struct op **op;
 491         char **filename;
 492         unsigned file;
 493         int pre_fd;
 494         unsigned int start;
 495         unsigned int i;
 496 };
 497
 498 /* More complex.  Just do whatever's they did at the n'th entry. */
 499 static int nontrivial_traverse(struct tdb_context *tdb,
 500                                TDB_DATA key, TDB_DATA data,
 501                                void *_tinfo)
 502 {
 503         struct traverse_info *tinfo = _tinfo;
 504         unsigned int trav_len = tinfo->op[tinfo->file][tinfo->start].group_len;
 505
 506         if (tinfo->i == tinfo->start + trav_len) {
 507                 /* This can happen if traverse expects to be empty. */
 508                 if (trav_len == 1)
 509                         return 1;
 510                 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
 511                      "traverse did not terminate");
 512         }
 513
 514         if (tinfo->op[tinfo->file][tinfo->i].op != OP_TDB_TRAVERSE)
 515                 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
 516                      "%s:%u:traverse terminated early");
 517
 518         /* Run any normal ops. */
 519         tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->op,
 520                            tinfo->file, tinfo->i+1, tinfo->start + trav_len);
 521
 522         if (tinfo->i == tinfo->start + trav_len)
 523                 return 1;
 524
 525         return 0;
 526 }
 527
 528 static unsigned op_traverse(struct tdb_context *tdb,
 529                             int pre_fd,
 530                             char *filename[],
 531                             unsigned int file,
 532                             int (*traversefn)(struct tdb_context *,
 533                                               tdb_traverse_func, void *),
 534                             struct op *op[],
 535                             unsigned int start)
 536 {
 537         struct traverse_info tinfo = { op, filename, file, pre_fd,
 538                                        start, start+1 };
 539
 540         traversefn(tdb, nontrivial_traverse, &tinfo);
 541
 542         /* Traversing in wrong order can have strange effects: eg. if
 543          * original traverse went A (delete A), B, we might do B
 544          * (delete A).  So if we have ops left over, we do it now. */
 545         while (tinfo.i != start + op[file][start].group_len) {
 546                 if (op[file][tinfo.i].op == OP_TDB_TRAVERSE)
 547                         tinfo.i++;
 548                 else
 549                         tinfo.i = run_ops(tdb, pre_fd, filename, op, file,
 550                                           tinfo.i,
 551                                           start + op[file][start].group_len);
 552         }
 553
 554         return tinfo.i;
 555 }
 556
 557 static void break_out(int sig)
 558 {
 559 }
 560
 561 static __attribute__((noinline))
 562 unsigned run_ops(struct tdb_context *tdb,
 563                  int pre_fd,
 564                  char *filename[],
 565                  struct op *op[],
 566                  unsigned int file,
 567                  unsigned int start, unsigned int stop)
 568 {
 569         unsigned int i;
 570         struct sigaction sa;
 571
 572         sa.sa_handler = break_out;
 573         sa.sa_flags = 0;
 574
 575         sigaction(SIGALRM, &sa, NULL);
 576         for (i = start; i < stop; i++) {
 577                 do_pre(tdb, filename, op, file, pre_fd, i);
 578
 579                 switch (op[file][i].op) {
 580                 case OP_TDB_LOCKALL:
 581                         try(tdb_lockall(tdb), op[file][i].ret);
 582                         break;
 583                 case OP_TDB_LOCKALL_MARK:
 584                         try(tdb_lockall_mark(tdb), op[file][i].ret);
 585                         break;
 586                 case OP_TDB_LOCKALL_UNMARK:
 587                         try(tdb_lockall_unmark(tdb), op[file][i].ret);
 588                         break;
 589                 case OP_TDB_LOCKALL_NONBLOCK:
 590                         unreliable(tdb_lockall_nonblock(tdb), op[file][i].ret,
 591                                    tdb_lockall(tdb), tdb_unlockall(tdb));
 592                         break;
 593                 case OP_TDB_UNLOCKALL:
 594                         try(tdb_unlockall(tdb), op[file][i].ret);
 595                         break;
 596                 case OP_TDB_LOCKALL_READ:
 597                         try(tdb_lockall_read(tdb), op[file][i].ret);
 598                         break;
 599                 case OP_TDB_LOCKALL_READ_NONBLOCK:
 600                         unreliable(tdb_lockall_read_nonblock(tdb),
 601                                    op[file][i].ret,
 602                                    tdb_lockall_read(tdb),
 603                                    tdb_unlockall_read(tdb));
 604                         break;
 605                 case OP_TDB_UNLOCKALL_READ:
 606                         try(tdb_unlockall_read(tdb), op[file][i].ret);
 607                         break;
 608                 case OP_TDB_CHAINLOCK:
 609                         try(tdb_chainlock(tdb, op[file][i].key),
 610                             op[file][i].ret);
 611                         break;
 612                 case OP_TDB_CHAINLOCK_NONBLOCK:
 613                         unreliable(tdb_chainlock_nonblock(tdb, op[file][i].key),
 614                                    op[file][i].ret,
 615                                    tdb_chainlock(tdb, op[file][i].key),
 616                                    tdb_chainunlock(tdb, op[file][i].key));
 617                         break;
 618                 case OP_TDB_CHAINLOCK_MARK:
 619                         try(tdb_chainlock_mark(tdb, op[file][i].key),
 620                             op[file][i].ret);
 621                         break;
 622                 case OP_TDB_CHAINLOCK_UNMARK:
 623                         try(tdb_chainlock_unmark(tdb, op[file][i].key),
 624                             op[file][i].ret);
 625                         break;
 626                 case OP_TDB_CHAINUNLOCK:
 627                         try(tdb_chainunlock(tdb, op[file][i].key),
 628                             op[file][i].ret);
 629                         break;
 630                 case OP_TDB_CHAINLOCK_READ:
 631                         try(tdb_chainlock_read(tdb, op[file][i].key),
 632                             op[file][i].ret);
 633                         break;
 634                 case OP_TDB_CHAINUNLOCK_READ:
 635                         try(tdb_chainunlock_read(tdb, op[file][i].key),
 636                             op[file][i].ret);
 637                         break;
 638                 case OP_TDB_PARSE_RECORD:
 639                         try(tdb_parse_record(tdb, op[file][i].key, get_len,
 640                                              NULL),
 641                             op[file][i].ret);
 642                         break;
 643                 case OP_TDB_EXISTS:
 644                         try(tdb_exists(tdb, op[file][i].key), op[file][i].ret);
 645                         break;
 646                 case OP_TDB_STORE:
 647                         try(tdb_store(tdb, op[file][i].key, op[file][i].data,
 648                                       op[file][i].flag),
 649                             op[file][i].ret);
 650                         break;
 651                 case OP_TDB_APPEND:
 652                         try(tdb_append(tdb, op[file][i].key, op[file][i].data),
 653                             op[file][i].ret);
 654                         break;
 655                 case OP_TDB_GET_SEQNUM:
 656                         try(tdb_get_seqnum(tdb), op[file][i].ret);
 657                         break;
 658                 case OP_TDB_WIPE_ALL:
 659                         try(tdb_wipe_all(tdb), op[file][i].ret);
 660                         break;
 661                 case OP_TDB_TRANSACTION_START:
 662                         try(tdb_transaction_start(tdb), op[file][i].ret);
 663                         break;
 664                 case OP_TDB_TRANSACTION_CANCEL:
 665                         try(tdb_transaction_cancel(tdb), op[file][i].ret);
 666                         break;
 667                 case OP_TDB_TRANSACTION_COMMIT:
 668                         try(tdb_transaction_commit(tdb), op[file][i].ret);
 669                         break;
 670                 case OP_TDB_TRAVERSE_READ_START:
 671                         i = op_traverse(tdb, pre_fd, filename, file,
 672                                         tdb_traverse_read, op, i);
 673                         break;
 674                 case OP_TDB_TRAVERSE_START:
 675                         i = op_traverse(tdb, pre_fd, filename, file,
 676                                         tdb_traverse, op, i);
 677                         break;
 678                 case OP_TDB_TRAVERSE:
 679                         /* Terminate: we're in a traverse, and we've
 680                          * done our ops. */
 681                         return i;
 682                 case OP_TDB_TRAVERSE_END:
 683                         fail(filename[file], i+1, "unexpected end traverse");
 684                 /* FIXME: These must be treated like traverse. */
 685                 case OP_TDB_FIRSTKEY:
 686                         if (!key_eq(tdb_firstkey(tdb), op[file][i].data))
 687                                 fail(filename[file], i+1, "bad firstkey");
 688                         break;
 689                 case OP_TDB_NEXTKEY:
 690                         if (!key_eq(tdb_nextkey(tdb, op[file][i].key),
 691                                     op[file][i].data))
 692                                 fail(filename[file], i+1, "bad nextkey");
 693                         break;
 694                 case OP_TDB_FETCH: {
 695                         TDB_DATA f = tdb_fetch(tdb, op[file][i].key);
 696                         if (!key_eq(f, op[file][i].data))
 697                                 fail(filename[file], i+1, "bad fetch %u",
 698                                      f.dsize);
 699                         break;
 700                 }
 701                 case OP_TDB_DELETE:
 702                         try(tdb_delete(tdb, op[file][i].key), op[file][i].ret);
 703                         break;
 704                 }
 705                 do_post(filename, op, file, i);
 706         }
 707         return i;
 708 }
 709
 710 /* tdbtorture, in particular, can do a tdb_close with a transaction in
 711  * progress. */
 712 static struct op *maybe_cancel_transaction(const char *filename,
 713                                            struct op *op, unsigned int *num)
 714 {
 715         unsigned int start = op_transaction_start(op, *num);
 716
 717         if (start) {
 718                 char *words[] = { "<unknown>", "tdb_close", NULL };
 719                 add_op(filename, &op, *num, op[start].serial,
 720                        OP_TDB_TRANSACTION_CANCEL);
 721                 op_analyze_transaction(filename, op, *num, words);
 722                 (*num)++;
 723         }
 724         return op;
 725 }
 726
 727 static struct op *load_tracefile(const char *filename, unsigned int *num,
 728                                  unsigned int *hashsize,
 729                                  unsigned int *tdb_flags,
 730                                  unsigned int *open_flags)
 731 {
 732         unsigned int i;
 733         struct op *op = talloc_array(NULL, struct op, 1);
 734         char **words;
 735         char **lines;
 736         char *file;
 737
 738         file = grab_file(NULL, filename, NULL);
 739         if (!file)
 740                 err(1, "Reading %s", filename);
 741
 742         lines = strsplit(file, file, "\n", NULL);
 743         if (!lines[0])
 744                 errx(1, "%s is empty", filename);
 745
 746         words = strsplit(lines, lines[0], " ", NULL);
 747         if (!streq(words[1], "tdb_open"))
 748                 fail(filename, 1, "does not start with tdb_open");
 749
 750         *hashsize = atoi(words[2]);
 751         *tdb_flags = strtoul(words[3], NULL, 0);
 752         *open_flags = strtoul(words[4], NULL, 0);
 753
 754         for (i = 1; lines[i]; i++) {
 755                 const struct op_table *opt;
 756
 757                 words = strsplit(lines, lines[i], " ", NULL);
 758                 if (!words[0] || !words[1])
 759                         fail(filename, i+1, "Expected serial number and op");
 760
 761                 opt = find_keyword(words[1], strlen(words[1]));
 762                 if (!opt) {
 763                         if (streq(words[1], "tdb_close")) {
 764                                 if (lines[i+1])
 765                                         fail(filename, i+2,
 766                                              "lines after tdb_close");
 767                                 *num = i;
 768                                 talloc_free(lines);
 769                                 return maybe_cancel_transaction(filename,
 770                                                                 op, num);
 771                         }
 772                         fail(filename, i+1, "Unknown operation '%s'", words[1]);
 773                 }
 774
 775                 add_op(filename, &op, i, atoi(words[0]), opt->type);
 776                 opt->enhance_op(filename, op, i, words);
 777         }
 778
 779         fprintf(stderr, "%s:%u:last operation is not tdb_close: incomplete?",
 780               filename, i);
 781         talloc_free(lines);
 782         *num = i - 1;
 783         return maybe_cancel_transaction(filename, op, num);
 784 }
 785
 786 /* We remember all the keys we've ever seen, and who has them. */
 787 struct key_user {
 788         unsigned int file;
 789         unsigned int op_num;
 790 };
 791
 792 struct keyinfo {
 793         TDB_DATA key;
 794         unsigned int num_users;
 795         struct key_user *user;
 796 };
 797
 798 static const TDB_DATA must_not_exist;
 799 static const TDB_DATA must_exist;
 800 static const TDB_DATA not_exists_or_empty;
 801
 802 /* NULL means doesn't care if it exists or not, &must_exist means
 803  * it must exist but we don't care what, &must_not_exist means it must
 804  * not exist, otherwise the data it needs. */
 805 static const TDB_DATA *needs(const struct op *op)
 806 {
 807         switch (op->op) {
 808         /* FIXME: Pull forward deps, since we can deadlock */
 809         case OP_TDB_CHAINLOCK:
 810         case OP_TDB_CHAINLOCK_NONBLOCK:
 811         case OP_TDB_CHAINLOCK_MARK:
 812         case OP_TDB_CHAINLOCK_UNMARK:
 813         case OP_TDB_CHAINUNLOCK:
 814         case OP_TDB_CHAINLOCK_READ:
 815         case OP_TDB_CHAINUNLOCK_READ:
 816                 return NULL;
 817
 818         case OP_TDB_APPEND:
 819                 if (op->append.pre.dsize == 0)
 820                         return &not_exists_or_empty;
 821                 return &op->append.pre;
 822
 823         case OP_TDB_STORE:
 824                 if (op->flag == TDB_INSERT) {
 825                         if (op->ret < 0)
 826                                 return &must_exist;
 827                         else
 828                                 return &must_not_exist;
 829                 } else if (op->flag == TDB_MODIFY) {
 830                         if (op->ret < 0)
 831                                 return &must_not_exist;
 832                         else
 833                                 return &must_exist;
 834                 }
 835                 /* No flags?  Don't care */
 836                 return NULL;
 837
 838         case OP_TDB_EXISTS:
 839                 if (op->ret == 1)
 840                         return &must_exist;
 841                 else
 842                         return &must_not_exist;
 843
 844         case OP_TDB_PARSE_RECORD:
 845                 if (op->ret < 0)
 846                         return &must_not_exist;
 847                 return &must_exist;
 848
 849         /* FIXME: handle these. */
 850         case OP_TDB_WIPE_ALL:
 851         case OP_TDB_FIRSTKEY:
 852         case OP_TDB_NEXTKEY:
 853         case OP_TDB_GET_SEQNUM:
 854         case OP_TDB_TRAVERSE:
 855         case OP_TDB_TRANSACTION_COMMIT:
 856         case OP_TDB_TRANSACTION_CANCEL:
 857         case OP_TDB_TRANSACTION_START:
 858                 return NULL;
 859
 860         case OP_TDB_FETCH:
 861                 if (!op->data.dptr)
 862                         return &must_not_exist;
 863                 return &op->data;
 864
 865         case OP_TDB_DELETE:
 866                 if (op->ret < 0)
 867                         return &must_not_exist;
 868                 return &must_exist;
 869
 870         default:
 871                 errx(1, "Unexpected op %i", op->op);
 872         }
 873
 874 }
 875
 876 static bool is_transaction(const struct op *op)
 877 {
 878         return op->op == OP_TDB_TRANSACTION_START;
 879 }
 880
 881 /* What's the data after this op?  pre if nothing changed. */
 882 static const TDB_DATA *gives(const TDB_DATA *key, const TDB_DATA *pre,
 883                              const struct op *op)
 884 {
 885         if (is_transaction(op)) {
 886                 unsigned int i;
 887
 888                 /* Cancelled transactions don't change anything. */
 889                 if (op[op->group_len].op == OP_TDB_TRANSACTION_CANCEL)
 890                         return pre;
 891                 assert(op[op->group_len].op == OP_TDB_TRANSACTION_COMMIT);
 892
 893                 for (i = 1; i < op->group_len; i++) {
 894                         /* This skips nested transactions, too */
 895                         if (op[i].op != OP_TDB_TRAVERSE
 896                             && key_eq(op[i].key, *key))
 897                                 pre = gives(key, pre, &op[i]);
 898                 }
 899                 return pre;
 900         }
 901
 902         /* Failed ops don't change state of db. */
 903         if (op->ret < 0)
 904                 return pre;
 905
 906         if (op->op == OP_TDB_DELETE || op->op == OP_TDB_WIPE_ALL)
 907                 return &tdb_null;
 908
 909         if (op->op == OP_TDB_APPEND)
 910                 return &op->append.post;
 911
 912         if (op->op == OP_TDB_STORE)
 913                 return &op->data;
 914
 915         return pre;
 916 }
 917
 918 static bool in_transaction(const struct op op[], unsigned int i)
 919 {
 920         return op[i].group_start && is_transaction(&op[op[i].group_start]);
 921 }
 922
 923 static bool in_traverse(const struct op op[], unsigned int i)
 924 {
 925         return op[i].group_start && !is_transaction(&op[op[i].group_start]);
 926 }
 927
 928 static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[],
 929                                 unsigned int num)
 930 {
 931         unsigned int i, j, h;
 932         struct keyinfo *hash;
 933
 934         hash = talloc_zero_array(op[0], struct keyinfo, total_keys*2);
 935         for (i = 0; i < num; i++) {
 936                 for (j = 1; j < num_ops[i]; j++) {
 937                         /* We can't do this on allocation, due to realloc. */
 938                         list_head_init(&op[i][j].post);
 939                         list_head_init(&op[i][j].pre);
 940
 941                         if (!op[i][j].key.dptr)
 942                                 continue;
 943
 944                         /* We don't wait for traverse keys */
 945                         /* FIXME: We should, for trivial traversals. */
 946                         if (op[i][j].op == OP_TDB_TRAVERSE)
 947                                 continue;
 948
 949                         h = hash_key(&op[i][j].key) % (total_keys * 2);
 950                         while (!key_eq(hash[h].key, op[i][j].key)) {
 951                                 if (!hash[h].key.dptr) {
 952                                         hash[h].key = op[i][j].key;
 953                                         break;
 954                                 }
 955                                 h = (h + 1) % (total_keys * 2);
 956                         }
 957                         /* Might as well save some memory if we can. */
 958                         if (op[i][j].key.dptr != hash[h].key.dptr) {
 959                                 talloc_free(op[i][j].key.dptr);
 960                                 op[i][j].key.dptr = hash[h].key.dptr;
 961                         }
 962                         hash[h].user = talloc_realloc(hash, hash[h].user,
 963                                                      struct key_user,
 964                                                      hash[h].num_users+1);
 965
 966                         /* If it's in a transaction, it's the transaction which
 967                          * matters from an analysis POV. */
 968                         if (in_transaction(op[i], j)) {
 969                                 unsigned start = op[i][j].group_start;
 970
 971                                 /* Don't include twice. */
 972                                 if (hash[h].num_users
 973                                     && hash[h].user[hash[h].num_users-1].file
 974                                         == i
 975                                     && hash[h].user[hash[h].num_users-1].op_num
 976                                         == start)
 977                                         continue;
 978
 979                                 hash[h].user[hash[h].num_users].op_num = start;
 980                         } else
 981                                 hash[h].user[hash[h].num_users].op_num = j;
 982                         hash[h].user[hash[h].num_users].file = i;
 983                         hash[h].num_users++;
 984                 }
 985         }
 986
 987         return hash;
 988 }
 989
 990 static bool satisfies(const TDB_DATA *key, const TDB_DATA *data,
 991                       const struct op *op)
 992 {
 993         const TDB_DATA *need = NULL;
 994
 995         if (is_transaction(op)) {
 996                 unsigned int i;
 997
 998                 /* Look through for an op in this transaction which
 999                  * needs this key. */
1000                 for (i = 1; i < op->group_len; i++) {
1001                         if (op[i].op != OP_TDB_TRAVERSE
1002                             && key_eq(op[i].key, *key)) {
1003                                 need = needs(&op[i]);
1004                                 /* tdb_exists() is special: there might be
1005                                  * something in the transaction with more
1006                                  * specific requirements.  Other ops don't have
1007                                  * specific requirements (eg. store or delete),
1008                                  * but they change the value so we can't get
1009                                  * more information from future ops. */
1010                                 if (op[i].op != OP_TDB_EXISTS)
1011                                         break;
1012                         }
1013                 }
1014         } else
1015                 need = needs(op);
1016
1017         /* Don't need anything?  Cool. */
1018         if (!need)
1019                 return true;
1020
1021         /* This should be tdb_null or a real value. */
1022         assert(data != &must_exist);
1023         assert(data != &must_not_exist);
1024         assert(data != &not_exists_or_empty);
1025
1026         /* must_not_exist == must_not_exist, must_exist == must_exist, or
1027            not_exists_or_empty == not_exists_or_empty. */
1028         if (data->dsize == need->dsize && data->dptr == need->dptr)
1029                 return true;
1030
1031         /* Must not exist?  data must not exist. */
1032         if (need == &must_not_exist)
1033                 return data->dptr == NULL;
1034
1035         /* Must exist? */
1036         if (need == &must_exist)
1037                 return data->dptr != NULL;
1038
1039         /* Either noexist or empty. */
1040         if (need == &not_exists_or_empty)
1041                 return data->dsize == 0;
1042
1043         /* Needs something specific. */
1044         return key_eq(*data, *need);
1045 }
1046
1047 static void move_to_front(struct key_user res[], unsigned off, unsigned elem)
1048 {
1049         if (elem != off) {
1050                 struct key_user tmp = res[elem];
1051                 memmove(res + off + 1, res + off, (elem - off)*sizeof(res[0]));
1052                 res[off] = tmp;
1053         }
1054 }
1055
1056 static void restore_to_pos(struct key_user res[], unsigned off, unsigned elem)
1057 {
1058         if (elem != off) {
1059                 struct key_user tmp = res[off];
1060                 memmove(res + off, res + off + 1, (elem - off)*sizeof(res[0]));
1061                 res[elem] = tmp;
1062         }
1063 }
1064
1065 static bool sort_deps(char *filename[], struct op *op[],
1066                       struct key_user res[],
1067                       unsigned off, unsigned num,
1068                       const TDB_DATA *key, const TDB_DATA *data,
1069                       unsigned num_files, unsigned fuzz)
1070 {
1071         unsigned int i, files_done;
1072         struct op *this_op;
1073         bool done[num_files];
1074
1075         /* Does this make serial numbers go backwards?  Allow a little fuzz. */
1076         if (off > 0) {
1077                 int serial1 = op[res[off-1].file][res[off-1].op_num].serial;
1078                 int serial2 = op[res[off].file][res[off].op_num].serial;
1079
1080                 if (serial1 - serial2 > (int)fuzz) {
1081 #if DEBUG_DEPS
1082                         printf("Serial jump too far (%u -> %u)\n",
1083                                serial1, serial2);
1084 #endif
1085                         return false;
1086                 }
1087         }
1088
1089         /* One or none left?  We're sorted. */
1090         if (off + 1 >= num)
1091                 return true;
1092
1093         memset(done, 0, sizeof(done));
1094
1095         /* Since ops within a trace file are ordered, we just need to figure
1096          * out which file to try next.  Since we don't take into account
1097          * inter-key relationships (which exist by virtue of trace file order),
1098          * we minimize the chance of harm by trying to keep in serial order. */
1099         for (files_done = 0, i = off; i < num && files_done < num_files; i++) {
1100                 if (done[res[i].file])
1101                         continue;
1102
1103                 this_op = &op[res[i].file][res[i].op_num];
1104
1105                 /* Is what we have good enough for this op? */
1106                 if (satisfies(key, data, this_op)) {
1107                         move_to_front(res, off, i);
1108                         if (sort_deps(filename, op, res, off+1, num,
1109                                       key, gives(key, data, this_op),
1110                                       num_files, fuzz))
1111                                 return true;
1112                         restore_to_pos(res, off, i);
1113                 }
1114                 done[res[i].file] = true;
1115                 files_done++;
1116         }
1117
1118         /* No combination worked. */
1119         return false;
1120 }
1121
1122 static void check_dep_sorting(struct key_user user[], unsigned num_users,
1123                               unsigned num_files)
1124 {
1125 #if DEBUG_DEPS
1126         unsigned int i;
1127         unsigned minima[num_files];
1128
1129         memset(minima, 0, sizeof(minima));
1130         for (i = 0; i < num_users; i++) {
1131                 assert(minima[user[i].file] < user[i].op_num);
1132                 minima[user[i].file] = user[i].op_num;
1133         }
1134 #endif
1135 }
1136
1137 /* All these ops happen on the same key.  Which comes first?
1138  *
1139  * This can happen both because read ops or failed write ops don't
1140  * change serial number, and also due to race since we access the
1141  * number unlocked (the race can cause less detectable ordering problems,
1142  * in which case we'll deadlock and report: fix manually in that case).
1143  */
1144 static void figure_deps(char *filename[], struct op *op[],
1145                         const TDB_DATA *key, struct key_user user[],
1146                         unsigned num_users, unsigned num_files)
1147 {
1148         /* We assume database starts empty. */
1149         const struct TDB_DATA *data = &tdb_null;
1150         unsigned int fuzz;
1151
1152         /* We prefer to keep strict serial order if possible: it's the
1153          * most likely.  We get more lax if that fails. */
1154         for (fuzz = 0; fuzz < 100; fuzz = (fuzz + 1)*2) {
1155                 if (sort_deps(filename, op, user, 0, num_users, key, data,
1156                               num_files, fuzz))
1157                         break;
1158         }
1159
1160         if (fuzz >= 100)
1161                 fail(filename[user[0].file], user[0].op_num+1,
1162                      "Could not resolve inter-dependencies");
1163
1164         check_dep_sorting(user, num_users, num_files);
1165 }
1166
1167 static void sort_ops(struct keyinfo hash[], char *filename[], struct op *op[],
1168                      unsigned int num)
1169 {
1170         unsigned int h;
1171
1172         /* Gcc nexted function extension.  How cool is this? */
1173         int compare_serial(const void *_a, const void *_b)
1174         {
1175                 const struct key_user *a = _a, *b = _b;
1176
1177                 /* First, maintain order within any trace file. */
1178                 if (a->file == b->file)
1179                         return a->op_num - b->op_num;
1180
1181                 /* Otherwise, arrange by serial order. */
1182                 return op[a->file][a->op_num].serial
1183                         - op[b->file][b->op_num].serial;
1184         }
1185
1186         /* Now sort into serial order. */
1187         for (h = 0; h < total_keys * 2; h++) {
1188                 struct key_user *user = hash[h].user;
1189
1190                 qsort(user, hash[h].num_users, sizeof(user[0]), compare_serial);
1191                 figure_deps(filename, op, &hash[h].key, user, hash[h].num_users,
1192                             num);
1193         }
1194 }
1195
1196 static int destroy_depend(struct depend *dep)
1197 {
1198         list_del(&dep->pre_list);
1199         list_del(&dep->post_list);
1200         return 0;
1201 }
1202
1203 static void add_dependency(void *ctx,
1204                            struct op *op[],
1205                            char *filename[],
1206                            unsigned int needs_file,
1207                            unsigned int needs_opnum,
1208                            unsigned int satisfies_file,
1209                            unsigned int satisfies_opnum)
1210 {
1211         struct depend *dep;
1212
1213         /* We don't depend on ourselves. */
1214         if (needs_file == satisfies_file) {
1215                 assert(satisfies_opnum < needs_opnum);
1216                 return;
1217         }
1218
1219 #if DEBUG_DEPS
1220         printf("%s:%u: depends on %s:%u\n",
1221                filename[needs_file], needs_opnum+1,
1222                filename[satisfies_file], satisfies_opnum+1);
1223 #endif
1224
1225 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1226         /* If something in a traverse depends on something in another
1227          * traverse/transaction, it creates a dependency between the
1228          * two groups. */
1229         if ((in_traverse(op[satisfies_file], satisfies_opnum)
1230              && op[needs_file][needs_opnum].group_start)
1231             || (in_traverse(op[needs_file], needs_opnum)
1232                 && op[satisfies_file][satisfies_opnum].group_start)) {
1233                 unsigned int sat;
1234
1235                 /* We are satisfied by end of group. */
1236                 sat = op[satisfies_file][satisfies_opnum].group_start;
1237                 satisfies_opnum = sat + op[satisfies_file][sat].group_len;
1238                 /* And we need that done by start of our group. */
1239                 needs_opnum = op[needs_file][needs_opnum].group_start;
1240         }
1241
1242         /* There is also this case:
1243          *  <traverse> <read foo> ...
1244          *  <transaction> ... </transaction> <create foo>
1245          * Where if we start the traverse then wait, we could block
1246          * the transaction and deadlock.
1247          *
1248          * We try to address this by ensuring that where seqnum indicates it's
1249          * possible, we wait for <create foo> before *starting* traverse.
1250          */
1251         else if (in_traverse(op[needs_file], needs_opnum)) {
1252                 struct op *need = &op[needs_file][needs_opnum];
1253                 if (op[needs_file][need->group_start].serial <
1254                     op[satisfies_file][satisfies_opnum].serial) {
1255                         needs_opnum = need->group_start;
1256                 }
1257         }
1258 #endif
1259
1260         /* If you depend on a transaction, you actually depend on it ending. */
1261         if (is_transaction(&op[satisfies_file][satisfies_opnum])) {
1262                 satisfies_opnum
1263                         += op[satisfies_file][satisfies_opnum].group_len;
1264 #if DEBUG_DEPS
1265                 printf("-> Actually end of transaction %s:%u\n",
1266                        filename[satisfies_file], satisfies_opnum+1);
1267 #endif
1268         } else
1269                 /* We should never create a dependency from middle of
1270                  * a transaction. */
1271                 assert(!in_transaction(op[satisfies_file], satisfies_opnum)
1272                        || op[satisfies_file][satisfies_opnum].op
1273                        == OP_TDB_TRANSACTION_COMMIT
1274                        || op[satisfies_file][satisfies_opnum].op
1275                        == OP_TDB_TRANSACTION_CANCEL);
1276
1277         assert(op[needs_file][needs_opnum].op != OP_TDB_TRAVERSE);
1278         assert(op[satisfies_file][satisfies_opnum].op != OP_TDB_TRAVERSE);
1279
1280         dep = talloc(ctx, struct depend);
1281         dep->needs_file = needs_file;
1282         dep->needs_opnum = needs_opnum;
1283         dep->satisfies_file = satisfies_file;
1284         dep->satisfies_opnum = satisfies_opnum;
1285         list_add(&op[satisfies_file][satisfies_opnum].post, &dep->post_list);
1286         list_add(&op[needs_file][needs_opnum].pre, &dep->pre_list);
1287         talloc_set_destructor(dep, destroy_depend);
1288 }
1289
1290 static bool changes_db(const TDB_DATA *key, const struct op *op)
1291 {
1292         return gives(key, NULL, op) != NULL;
1293 }
1294
1295 static void depend_on_previous(struct op *op[],
1296                                char *filename[],
1297                                unsigned int num,
1298                                struct key_user user[],
1299                                unsigned int i,
1300                                int prev)
1301 {
1302         bool deps[num];
1303         int j;
1304
1305         if (i == 0)
1306                 return;
1307
1308         if (prev == i - 1) {
1309                 /* Just depend on previous. */
1310                 add_dependency(NULL, op, filename,
1311                                user[i].file, user[i].op_num,
1312                                user[prev].file, user[prev].op_num);
1313                 return;
1314         }
1315
1316         /* We have to wait for the readers.  Find last one in *each* file. */
1317         memset(deps, 0, sizeof(deps));
1318         deps[user[i].file] = true;
1319         for (j = i - 1; j > prev; j--) {
1320                 if (!deps[user[j].file]) {
1321                         add_dependency(NULL, op, filename,
1322                                        user[i].file, user[i].op_num,
1323                                        user[j].file, user[j].op_num);
1324                         deps[user[j].file] = true;
1325                 }
1326         }
1327 }
1328
1329 /* This is simple, but not complete.  We don't take into account
1330  * indirect dependencies. */
1331 static void optimize_dependencies(struct op *op[], unsigned int num_ops[],
1332                                   unsigned int num)
1333 {
1334         unsigned int i, j;
1335
1336         /* There can only be one real dependency on each file */
1337         for (i = 0; i < num; i++) {
1338                 for (j = 1; j < num_ops[i]; j++) {
1339                         struct depend *dep, *next;
1340                         struct depend *prev[num];
1341
1342                         memset(prev, 0, sizeof(prev));
1343
1344                         list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
1345                                 if (!prev[dep->satisfies_file]) {
1346                                         prev[dep->satisfies_file] = dep;
1347                                         continue;
1348                                 }
1349                                 if (prev[dep->satisfies_file]->satisfies_opnum
1350                                     < dep->satisfies_opnum) {
1351                                         talloc_free(prev[dep->satisfies_file]);
1352                                         prev[dep->satisfies_file] = dep;
1353                                 } else
1354                                         talloc_free(dep);
1355                         }
1356                 }
1357         }
1358
1359         for (i = 0; i < num; i++) {
1360                 int deps[num];
1361
1362                 for (j = 0; j < num; j++)
1363                         deps[j] = -1;
1364
1365                 for (j = 1; j < num_ops[i]; j++) {
1366                         struct depend *dep, *next;
1367
1368                         list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
1369                                 if (deps[dep->satisfies_file]
1370                                     >= (int)dep->satisfies_opnum)
1371                                         talloc_free(dep);
1372                                 else
1373                                         deps[dep->satisfies_file]
1374                                                 = dep->satisfies_opnum;
1375                         }
1376                 }
1377         }
1378 }
1379
1380 static void derive_dependencies(char *filename[],
1381                                 struct op *op[], unsigned int num_ops[],
1382                                 unsigned int num)
1383 {
1384         struct keyinfo *hash;
1385         unsigned int h, i;
1386
1387         /* Create hash table for faster key lookup. */
1388         hash = hash_ops(op, num_ops, num);
1389
1390         /* Sort them by serial number. */
1391         sort_ops(hash, filename, op, num);
1392
1393         /* Create dependencies back to the last change, rather than
1394          * creating false dependencies by naively making each one
1395          * depend on the previous.  This has two purposes: it makes
1396          * later optimization simpler, and it also avoids deadlock with
1397          * same sequence number ops inside traversals (if one
1398          * traversal doesn't write anything, two ops can have the same
1399          * sequence number yet we can create a traversal dependency
1400          * the other way). */
1401         for (h = 0; h < total_keys * 2; h++) {
1402                 int prev = -1;
1403
1404                 if (hash[h].num_users < 2)
1405                         continue;
1406
1407                 for (i = 0; i < hash[h].num_users; i++) {
1408                         if (changes_db(&hash[h].key, &op[hash[h].user[i].file]
1409                                        [hash[h].user[i].op_num])) {
1410                                 depend_on_previous(op, filename, num,
1411                                                    hash[h].user, i, prev);
1412                                 prev = i;
1413                         } else if (prev >= 0)
1414                                 add_dependency(hash, op, filename,
1415                                                hash[h].user[i].file,
1416                                                hash[h].user[i].op_num,
1417                                                hash[h].user[prev].file,
1418                                                hash[h].user[prev].op_num);
1419                 }
1420         }
1421
1422         optimize_dependencies(op, num_ops, num);
1423 }
1424
1425 int main(int argc, char *argv[])
1426 {
1427         struct timeval start, end;
1428         unsigned int i, num_ops[argc], hashsize[argc], tdb_flags[argc], open_flags[argc];
1429         struct op *op[argc];
1430         int fds[2];
1431         char c;
1432         bool ok = true;
1433
1434         if (argc < 3)
1435                 errx(1, "Usage: %s <tdbfile> <tracefile>...", argv[0]);
1436
1437         pipes = talloc_array(NULL, struct pipe, argc - 2);
1438         for (i = 0; i < argc - 2; i++) {
1439                 printf("Loading tracefile %s...", argv[2+i]);
1440                 fflush(stdout);
1441                 op[i] = load_tracefile(argv[2+i], &num_ops[i], &hashsize[i],
1442                                        &tdb_flags[i], &open_flags[i]);
1443                 if (pipe(pipes[i].fd) != 0)
1444                         err(1, "creating pipe");
1445                 printf("done\n");
1446         }
1447
1448         printf("Calculating inter-dependencies...");
1449         fflush(stdout);
1450         derive_dependencies(argv+2, op, num_ops, i);
1451         printf("done\n");
1452
1453         /* Don't fork for single arg case: simple debugging. */
1454         if (argc == 3) {
1455                 struct tdb_context *tdb;
1456                 tdb = tdb_open_ex(argv[1], hashsize[0], tdb_flags[0],
1457                                   open_flags[0], 0600,
1458                                   NULL, hash_key);
1459                 printf("Single threaded run...");
1460                 fflush(stdout);
1461
1462                 run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1, num_ops[0]);
1463                 check_deps(argv[2], op[0], num_ops[0]);
1464
1465                 printf("done\n");
1466                 exit(0);
1467         }
1468
1469         if (pipe(fds) != 0)
1470                 err(1, "creating pipe");
1471
1472         for (i = 0; i < argc - 2; i++) {
1473                 struct tdb_context *tdb;
1474
1475                 switch (fork()) {
1476                 case -1:
1477                         err(1, "fork failed");
1478                 case 0:
1479                         close(fds[1]);
1480                         tdb = tdb_open_ex(argv[1], hashsize[i], tdb_flags[i],
1481                                           open_flags[i], 0600,
1482                                           NULL, hash_key);
1483                         if (!tdb)
1484                                 err(1, "Opening tdb %s", argv[1]);
1485
1486                         /* This catches parent exiting. */
1487                         if (read(fds[0], &c, 1) != 1)
1488                                 exit(1);
1489                         run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1,
1490                                 num_ops[i]);
1491                         check_deps(argv[2+i], op[i], num_ops[i]);
1492                         exit(0);
1493                 default:
1494                         break;
1495                 }
1496         }
1497
1498         /* Let everything settle. */
1499         sleep(1);
1500
1501         printf("Starting run...");
1502         fflush(stdout);
1503         gettimeofday(&start, NULL);
1504         /* Tell them all to go!  Any write of sufficient length will do. */
1505         if (write(fds[1], hashsize, i) != i)
1506                 err(1, "Writing to wakeup pipe");
1507
1508         for (i = 0; i < argc - 2; i++) {
1509                 int status;
1510                 wait(&status);
1511                 if (!WIFEXITED(status)) {
1512                         warnx("Child died with signal %i", WTERMSIG(status));
1513                         ok = false;
1514                 } else if (WEXITSTATUS(status) != 0)
1515                         /* Assume child spat out error. */
1516                         ok = false;
1517         }
1518         if (!ok)
1519                 exit(1);
1520
1521         gettimeofday(&end, NULL);
1522         printf("done\n");
1523
1524         end.tv_sec -= start.tv_sec;
1525         printf("Time replaying: %lu usec\n",
1526                end.tv_sec * 1000000UL + (end.tv_usec - start.tv_usec));
1527
1528         exit(0);
1529 }