+ /* We are satisfied by end of group. */
+ sat = op[satisfies_file][satisfies_opnum].group_start;
+ satisfies_opnum = sat + op[satisfies_file][sat].group_len;
+ /* And we need that done by start of our group. */
+ needs_opnum = op[needs_file][needs_opnum].group_start;
+ }
+
+ /* There is also this case:
+ * <traverse> <read foo> ...
+ * <transaction> ... </transaction> <create foo>
+ * Where if we start the traverse then wait, we could block
+ * the transaction and deadlock.
+ *
+ * We try to address this by ensuring that where seqnum indicates it's
+ * possible, we wait for <create foo> before *starting* traverse.
+ */
+ else if (in_traverse(op[needs_file], needs_opnum)) {
+ struct op *need = &op[needs_file][needs_opnum];
+ if (op[needs_file][need->group_start].serial >
+ op[satisfies_file][satisfies_opnum].serial) {
+ needs_opnum = need->group_start;
+ }
+ }
+#endif
+
+ /* If you depend on a transaction, you actually depend on it ending. */
+ if (starts_transaction(&op[satisfies_file][satisfies_opnum])) {
+ satisfies_opnum
+ += op[satisfies_file][satisfies_opnum].group_len;
+#if DEBUG_DEPS
+ printf("-> Actually end of transaction %s:%u\n",
+ filename[satisfies_file], satisfies_opnum+1);
+#endif
+ } else
+ /* We should never create a dependency from middle of
+ * a transaction. */
+ assert(!in_transaction(op[satisfies_file], satisfies_opnum)
+ || op[satisfies_file][satisfies_opnum].op
+ == OP_TDB_TRANSACTION_COMMIT
+ || op[satisfies_file][satisfies_opnum].op
+ == OP_TDB_TRANSACTION_CANCEL);
+
+ assert(op[needs_file][needs_opnum].op != OP_TDB_TRAVERSE);
+ assert(op[satisfies_file][satisfies_opnum].op != OP_TDB_TRAVERSE);
+
+ dep = talloc(ctx, struct depend);
+ dep->needs_file = needs_file;
+ dep->needs_opnum = needs_opnum;
+ dep->satisfies_file = satisfies_file;
+ dep->satisfies_opnum = satisfies_opnum;
+ list_add(&op[satisfies_file][satisfies_opnum].post, &dep->post_list);
+ list_add(&op[needs_file][needs_opnum].pre, &dep->pre_list);
+ talloc_set_destructor(dep, destroy_depend);
+}
+
+static bool changes_db(const TDB_DATA *key, const struct op *op)
+{
+ return gives(key, NULL, op) != NULL;
+}
+
+static void depend_on_previous(struct op *op[],
+ char *filename[],
+ unsigned int num,
+ struct key_user user[],
+ unsigned int i,
+ int prev)
+{
+ bool deps[num];
+ int j;
+
+ if (i == 0)
+ return;
+
+ if (prev == i - 1) {
+ /* Just depend on previous. */
+ add_dependency(NULL, op, filename,
+ user[i].file, user[i].op_num,
+ user[prev].file, user[prev].op_num);
+ return;
+ }
+
+ /* We have to wait for the readers. Find last one in *each* file. */
+ memset(deps, 0, sizeof(deps));
+ deps[user[i].file] = true;
+ for (j = i - 1; j > prev; j--) {
+ if (!deps[user[j].file]) {
+ add_dependency(NULL, op, filename,
+ user[i].file, user[i].op_num,
+ user[j].file, user[j].op_num);
+ deps[user[j].file] = true;
+ }
+ }
+}
+
+/* This is simple, but not complete. We don't take into account
+ * indirect dependencies. */
+static void optimize_dependencies(struct op *op[], unsigned int num_ops[],
+ unsigned int num)
+{
+ unsigned int i, j;
+
+ /* There can only be one real dependency on each file */
+ for (i = 0; i < num; i++) {
+ for (j = 1; j < num_ops[i]; j++) {
+ struct depend *dep, *next;
+ struct depend *prev[num];
+
+ memset(prev, 0, sizeof(prev));
+
+ list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
+ if (!prev[dep->satisfies_file]) {
+ prev[dep->satisfies_file] = dep;
+ continue;
+ }
+ if (prev[dep->satisfies_file]->satisfies_opnum
+ < dep->satisfies_opnum) {
+ talloc_free(prev[dep->satisfies_file]);
+ prev[dep->satisfies_file] = dep;
+ } else
+ talloc_free(dep);
+ }
+ }
+ }
+
+ for (i = 0; i < num; i++) {
+ int deps[num];
+
+ for (j = 0; j < num; j++)
+ deps[j] = -1;
+
+ for (j = 1; j < num_ops[i]; j++) {
+ struct depend *dep, *next;
+
+ list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
+ if (deps[dep->satisfies_file]
+ >= (int)dep->satisfies_opnum)
+ talloc_free(dep);
+ else
+ deps[dep->satisfies_file]
+ = dep->satisfies_opnum;
+ }
+ }
+ }
+}
+
+#if TRAVERSALS_TAKE_TRANSACTION_LOCK
+struct traverse_dep {
+ unsigned int file;
+ unsigned int op_num;
+};
+
+/* Force an order among the traversals, so they don't deadlock (as much) */
+static void make_traverse_depends(char *filename[],
+ struct op *op[], unsigned int num_ops[],
+ unsigned int num)
+{
+ unsigned int i, num_traversals = 0;
+ int j;
+ struct traverse_dep *dep;
+
+ /* Sort by which one runs first. */
+ int compare_traverse_dep(const void *_a, const void *_b)
+ {
+ const struct traverse_dep *ta = _a, *tb = _b;
+ const struct op *a = &op[ta->file][ta->op_num],
+ *b = &op[tb->file][tb->op_num];
+
+ if (a->serial != b->serial)
+ return a->serial - b->serial;
+
+ /* If they have same serial, it means one didn't make any
+ * changes. Thus sort by end in that case. */
+ return a[a->group_len].serial - b[b->group_len].serial;
+ }
+
+ dep = talloc_array(NULL, struct traverse_dep, 1);
+
+ /* Count them. */
+ for (i = 0; i < num; i++) {
+ for (j = 1; j < num_ops[i]; j++) {
+ /* Traverse start (ignore those in
+ * transactions; they're already covered by
+ * transaction dependencies). */
+ if (starts_traverse(&op[i][j])
+ && !in_transaction(op[i], j)) {
+ dep = talloc_realloc(NULL, dep,
+ struct traverse_dep,
+ num_traversals+1);
+ dep[num_traversals].file = i;
+ dep[num_traversals].op_num = j;
+ num_traversals++;
+ }
+ }
+ }
+ qsort(dep, num_traversals, sizeof(dep[0]), compare_traverse_dep);
+
+ for (i = 1; i < num_traversals; i++) {
+ const struct op *prev = &op[dep[i-1].file][dep[i-1].op_num];
+ const struct op *curr = &op[dep[i].file][dep[i].op_num];
+
+ /* Read traverses don't depend on each other (read lock). */
+ if (prev->op == OP_TDB_TRAVERSE_READ_START
+ && curr->op == OP_TDB_TRAVERSE_READ_START)
+ continue;
+
+ /* Only make dependency if it's clear. */
+ if (compare_traverse_dep(&dep[i], &dep[i-1])) {
+ /* i depends on end of traverse i-1. */
+ add_dependency(NULL, op, filename,
+ dep[i].file, dep[i].op_num,
+ dep[i-1].file, dep[i-1].op_num
+ + prev->group_len);
+ }
+ }
+ talloc_free(dep);
+}
+#endif
+
+static void derive_dependencies(char *filename[],
+ struct op *op[], unsigned int num_ops[],
+ unsigned int num)
+{
+ struct keyinfo *hash;
+ unsigned int h, i;
+
+ /* Create hash table for faster key lookup. */
+ hash = hash_ops(op, num_ops, num);
+
+ /* Sort them by serial number. */
+ sort_ops(hash, filename, op, num);
+
+ /* Create dependencies back to the last change, rather than
+ * creating false dependencies by naively making each one
+ * depend on the previous. This has two purposes: it makes
+ * later optimization simpler, and it also avoids deadlock with
+ * same sequence number ops inside traversals (if one
+ * traversal doesn't write anything, two ops can have the same
+ * sequence number yet we can create a traversal dependency
+ * the other way). */
+ for (h = 0; h < total_keys * 2; h++) {
+ int prev = -1;
+
+ if (hash[h].num_users < 2)
+ continue;
+
+ for (i = 0; i < hash[h].num_users; i++) {
+ if (changes_db(&hash[h].key, &op[hash[h].user[i].file]
+ [hash[h].user[i].op_num])) {
+ depend_on_previous(op, filename, num,
+ hash[h].user, i, prev);
+ prev = i;
+ } else if (prev >= 0)
+ add_dependency(hash, op, filename,
+ hash[h].user[i].file,
+ hash[h].user[i].op_num,
+ hash[h].user[prev].file,
+ hash[h].user[prev].op_num);
+ }
+ }
+
+#if TRAVERSALS_TAKE_TRANSACTION_LOCK
+ make_traverse_depends(filename, op, num_ops, num);
+#endif
+
+ optimize_dependencies(op, num_ops, num);
+}
+
+int main(int argc, char *argv[])
+{
+ struct timeval start, end;
+ unsigned int i, num_ops[argc], hashsize[argc], tdb_flags[argc], open_flags[argc];
+ struct op *op[argc];
+ int fds[2];
+ char c;
+ bool ok = true;
+
+ if (argc < 3)
+ errx(1, "Usage: %s <tdbfile> <tracefile>...", argv[0]);
+
+ pipes = talloc_array(NULL, struct pipe, argc - 2);
+ for (i = 0; i < argc - 2; i++) {
+ printf("Loading tracefile %s...", argv[2+i]);
+ fflush(stdout);
+ op[i] = load_tracefile(argv[2+i], &num_ops[i], &hashsize[i],
+ &tdb_flags[i], &open_flags[i]);
+ if (pipe(pipes[i].fd) != 0)
+ err(1, "creating pipe");
+ printf("done\n");
+ }
+
+ printf("Calculating inter-dependencies...");
+ fflush(stdout);
+ derive_dependencies(argv+2, op, num_ops, i);
+ printf("done\n");
+
+ /* Don't fork for single arg case: simple debugging. */
+ if (argc == 3) {
+ struct tdb_context *tdb;
+ tdb = tdb_open_ex(argv[1], hashsize[0], tdb_flags[0]|TDB_NOSYNC,
+ open_flags[0], 0600, NULL, hash_key);
+ printf("Single threaded run...");
+ fflush(stdout);
+
+ run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1, num_ops[0],
+ false);
+ check_deps(argv[2], op[0], num_ops[0]);
+
+ printf("done\n");
+ exit(0);
+ }
+
+ if (pipe(fds) != 0)
+ err(1, "creating pipe");
+
+ for (i = 0; i < argc - 2; i++) {
+ struct tdb_context *tdb;
+
+ switch (fork()) {
+ case -1:
+ err(1, "fork failed");
+ case 0:
+ close(fds[1]);
+ tdb = tdb_open_ex(argv[1], hashsize[i],
+ tdb_flags[i]|TDB_NOSYNC,
+ open_flags[i], 0600, NULL, hash_key);
+ if (!tdb)
+ err(1, "Opening tdb %s", argv[1]);
+
+ /* This catches parent exiting. */
+ if (read(fds[0], &c, 1) != 1)
+ exit(1);
+ run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1,
+ num_ops[i], false);
+ check_deps(argv[2+i], op[i], num_ops[i]);
+ exit(0);
+ default:
+ break;
+ }
+ }
+
+ /* Let everything settle. */
+ sleep(1);
+
+ printf("Starting run...");
+ fflush(stdout);