X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ftdb%2Ftools%2Freplay_trace.c;h=db5a07be1cf7c7dd4b3cd7ae18a5770ca1d5ca63;hp=0bfaf6c5946ee7c50ed07c8cc7397a17eed3d8bd;hb=d9961fc330a057b0f5359b3d97a5317aee2d6efa;hpb=eb2cbb4bf503b6f386663f5cfa932145e56769a8 diff --git a/ccan/tdb/tools/replay_trace.c b/ccan/tdb/tools/replay_trace.c index 0bfaf6c5..db5a07be 100644 --- a/ccan/tdb/tools/replay_trace.c +++ b/ccan/tdb/tools/replay_trace.c @@ -14,6 +14,7 @@ #include #include #include +#include #define STRINGIFY2(x) #x #define STRINGIFY(x) STRINGIFY2(x) @@ -143,7 +144,10 @@ struct op { union { int flag; /* open and store */ struct traverse *trav; /* traverse start */ - TDB_DATA pre_append; /* append */ + struct { /* append */ + TDB_DATA pre; + TDB_DATA post; + } append; unsigned int transaction_end; /* transaction start */ }; }; @@ -259,8 +263,6 @@ static void op_add_store(const char *filename, static void op_add_append(const char *filename, struct op op[], unsigned int op_num, char *words[]) { - TDB_DATA post_append; - if (!words[2] || !words[3] || !words[4] || !words[5] || words[6] || !streq(words[4], "=")) fail(filename, op_num+1, "Expect = "); @@ -268,11 +270,13 @@ static void op_add_append(const char *filename, op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]); op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]); - post_append = make_tdb_data(op, filename, op_num+1, words[5]); + op[op_num].append.post + = make_tdb_data(op, filename, op_num+1, words[5]); /* By subtraction, figure out what previous data was. */ - op[op_num].pre_append.dptr = post_append.dptr; - op[op_num].pre_append.dsize = post_append.dsize - op[op_num].data.dsize; + op[op_num].append.pre.dptr = op[op_num].append.post.dptr; + op[op_num].append.pre.dsize + = op[op_num].append.post.dsize - op[op_num].data.dsize; total_keys++; } @@ -444,30 +448,14 @@ find_keyword (register const char *str, register unsigned int len); struct depend { /* We can have more than one */ - struct list_node list; - unsigned int file; - unsigned int op; -}; - -struct depend_xmit { - unsigned int dst_op; - unsigned int src_file, src_op; + struct list_node pre_list; + struct list_node post_list; + unsigned int needs_file; + unsigned int needs_opnum; + unsigned int satisfies_file; + unsigned int satisfies_opnum; }; -static void remove_matching_dep(struct list_head *deps, - unsigned int file, unsigned int op) -{ - struct depend *dep; - - list_for_each(deps, dep, list) { - if (dep->file == file && dep->op == op) { - list_del(&dep->list); - return; - } - } - errx(1, "Failed to find depend on file %u line %u\n", file, op+1); -} - static void check_deps(const char *filename, struct op op[], unsigned int num) { #ifdef DEBUG_DEPS @@ -485,16 +473,18 @@ static void dump_pre(char *filename[], unsigned int file, struct depend *dep; printf("%s:%u still waiting for:\n", filename[file], i+1); - list_for_each(&op[i].pre, dep, list) - printf(" %s:%u\n", filename[dep->file], dep->op+1); + list_for_each(&op[i].pre, dep, pre_list) + printf(" %s:%u\n", + filename[dep->satisfies_file], dep->satisfies_opnum+1); check_deps(filename[file], op, i); } +/* We simply read/write pointers, since we all are children. */ static void do_pre(char *filename[], unsigned int file, int pre_fd, struct op op[], unsigned int i) { while (!list_empty(&op[i].pre)) { - struct depend_xmit dep; + struct depend *dep; #if DEBUG_DEPS printf("%s:%u:waiting for pre\n", filename[file], i+1); @@ -512,12 +502,12 @@ static void do_pre(char *filename[], unsigned int file, int pre_fd, #if DEBUG_DEPS printf("%s:%u:got pre %u from %s:%u\n", filename[file], i+1, - dep.dst_op+1, filename[dep.src_file], dep.src_op+1); + dep->needs_opnum+1, filename[dep->satisfies_file], + dep->satisfies_opnum+1); fflush(stdout); #endif /* This could be any op, not just this one. */ - remove_matching_dep(&op[dep.dst_op].pre, - dep.src_file, dep.src_op); + talloc_free(dep); } } @@ -526,20 +516,15 @@ static void do_post(char *filename[], unsigned int file, { struct depend *dep; - list_for_each(&op[i].post, dep, list) { - struct depend_xmit dx; - - dx.src_file = file; - dx.src_op = i; - dx.dst_op = dep->op; + list_for_each(&op[i].post, dep, post_list) { #if DEBUG_DEPS printf("%s:%u:sending to file %s:%u\n", filename[file], i+1, - filename[dep->file], dep->op+1); + filename[dep->needs_file], dep->needs_opnum+1); #endif - if (write(pipes[dep->file].fd[1], &dx, sizeof(dx)) - != sizeof(dx)) + if (write(pipes[dep->needs_file].fd[1], &dep, sizeof(dep)) + != sizeof(dep)) err(1, "%s:%u failed to tell file %s", - filename[file], i+1, filename[dep->file]); + filename[file], i+1, filename[dep->needs_file]); } } @@ -737,11 +722,10 @@ unsigned run_ops(struct tdb_context *tdb, break; case OP_TDB_STORE: try(tdb_store(tdb, op[i].key, op[i].data, op[i].flag), - op[i].ret < 0 ? op[i].ret : 0); + op[i].ret); break; case OP_TDB_APPEND: - try(tdb_append(tdb, op[i].key, op[i].data), - op[i].ret < 0 ? op[i].ret : 0); + try(tdb_append(tdb, op[i].key, op[i].data), op[i].ret); break; case OP_TDB_GET_SEQNUM: try(tdb_get_seqnum(tdb), op[i].ret); @@ -888,9 +872,9 @@ static const TDB_DATA *needs(const struct op *op) return NULL; case OP_TDB_APPEND: - if (op->pre_append.dsize == 0) + if (op->append.pre.dsize == 0) return ¬_exists_or_empty; - return &op->pre_append; + return &op->append.pre; case OP_TDB_STORE: if (op->flag == TDB_INSERT) { @@ -945,74 +929,23 @@ static const TDB_DATA *needs(const struct op *op) } -enum satisfaction { - /* This op makes the other one possible. */ - SATISFIES, - /* This op makes the other one impossible. */ - DISSATISFIES, - /* This op makes no difference. */ - NO_CHANGE -}; - -static enum satisfaction satisfies(const struct op *op, const TDB_DATA *need) +/* What's the data after this op? pre if nothing changed. */ +static const TDB_DATA *gives(const struct op *op, const TDB_DATA *pre) { - bool deletes, creates; - /* Failed ops don't change state of db. */ if (op->ret < 0) - return NO_CHANGE; - - deletes = (op->op == OP_TDB_DELETE || op->op == OP_TDB_WIPE_ALL); - /* Append/store is creating the record if ret == 0 (1 means replaced) */ - if (op->op == OP_TDB_APPEND || op->op == OP_TDB_STORE) - creates = (op->ret == 0); - else - creates = false; - - if (need == &must_not_exist) { - if (deletes) - return SATISFIES; - if (creates) - return DISSATISFIES; - return NO_CHANGE; - } + return pre; - if (need == &must_exist) { - if (deletes) - return DISSATISFIES; - if (creates) - return SATISFIES; - return NO_CHANGE; - } + if (op->op == OP_TDB_DELETE || op->op == OP_TDB_WIPE_ALL) + return &tdb_null; - if (need == ¬_exists_or_empty) { - if (deletes) - return SATISFIES; - if (!creates) - return NO_CHANGE; - } + if (op->op == OP_TDB_APPEND) + return &op->append.post; - /* OK, we need an exact match. */ - if (deletes) - return DISSATISFIES; - - /* An append which results in the wrong data dissatisfies. */ - if (op->op == OP_TDB_APPEND) { - if (op->pre_append.dsize + op->data.dsize != need->dsize) - return DISSATISFIES; - if (memcmp(op->pre_append.dptr, need->dptr, - op->pre_append.dsize) != 0) - return DISSATISFIES; - if (memcmp(op->data.dptr, need->dptr + op->pre_append.dsize, - op->data.dsize) != 0) - return DISSATISFIES; - return SATISFIES; - } else if (op->op == OP_TDB_STORE) { - if (key_eq(op->data, *need)) - return SATISFIES; - return DISSATISFIES; - } - return NO_CHANGE; + if (op->op == OP_TDB_STORE) + return &op->data; + + return pre; } static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[], @@ -1021,46 +954,6 @@ static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[], unsigned int i, j, h; struct keyinfo *hash; - /* Gcc nexted function extension. How cool is this? */ - int compare_user_serial(const void *_a, const void *_b) - { - const struct key_user *a = _a, *b = _b; - int ret = op[a->file][a->op_num].serial - - op[b->file][b->op_num].serial; - - /* Serial is not completely reliable. First, fetches don't - * inc serial, second we don't lock to get seq number. - * This smooths things a little for simple cases. */ - if (ret == 0) { - const TDB_DATA *a_needs, *b_needs; - - b_needs = needs(&op[b->file][b->op_num]); - switch (satisfies(&op[a->file][a->op_num], b_needs)) { - case SATISFIES: - /* A comes first: it satisfies B. */ - return -1; - case DISSATISFIES: - /* A doesn't come first: it messes up B. */ - return 1; - default: - break; - } - - a_needs = needs(&op[a->file][a->op_num]); - switch (satisfies(&op[b->file][b->op_num], a_needs)) { - case SATISFIES: - /* B comes first: it satisfies A. */ - return 1; - case DISSATISFIES: - /* B doesn't come first: it messes up A. */ - return -1; - default: - break; - } - } - return ret; - } - hash = talloc_zero_array(op[0], struct keyinfo, total_keys*2); for (i = 0; i < num; i++) { for (j = 1; j < num_ops[i]; j++) { @@ -1098,14 +991,169 @@ static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[], } } - /* Now sort into seqnum order. */ - for (h = 0; h < total_keys * 2; h++) - qsort(hash[h].user, hash[h].num_users, sizeof(hash[h].user[0]), - compare_user_serial); - return hash; } +static bool satisfies(const TDB_DATA *data, const TDB_DATA *need) +{ + /* Don't need anything? Cool. */ + if (!need) + return true; + + /* This should be tdb_null or a real value. */ + assert(data != &must_exist); + assert(data != &must_not_exist); + assert(data != ¬_exists_or_empty); + + /* must_not_exist == must_not_exist, must_exist == must_exist, or + not_exists_or_empty == not_exists_or_empty. */ + if (data->dsize == need->dsize && data->dptr == need->dptr) + return true; + + /* Must not exist? data must not exist. */ + if (need == &must_not_exist) + return data->dptr == NULL; + + /* Must exist? */ + if (need == &must_exist) + return data->dptr != NULL; + + /* Either noexist or empty. */ + if (need == ¬_exists_or_empty) + return data->dsize == 0; + + /* Needs something specific. */ + return key_eq(*data, *need); +} + +static void move_to_front(struct key_user res[], unsigned int elem) +{ + if (elem != 0) { + struct key_user tmp = res[elem]; + memmove(res + 1, res, elem*sizeof(res[0])); + res[0] = tmp; + } +} + +static void restore_to_pos(struct key_user res[], unsigned int elem) +{ + if (elem != 0) { + struct key_user tmp = res[0]; + memmove(res, res + 1, elem*sizeof(res[0])); + res[elem] = tmp; + } +} + +static bool sort_deps(char *filename[], struct op *op[], + struct key_user res[], unsigned num, + const TDB_DATA *data, unsigned num_files) +{ + unsigned int i, files_done; + struct op *this_op; + bool done[num_files]; + + /* Nothing left? We're sorted. */ + if (num == 0) + return true; + + memset(done, 0, sizeof(done)); + + /* Since ops within a trace file are ordered, we just need to figure + * out which file to try next. Since we don't take into account + * inter-key relationships (which exist by virtue of trace file order), + * we minimize the chance of harm by trying to keep in serial order. */ + for (files_done = 0, i = 0; i < num && files_done < num_files; i++) { + if (done[res[i].file]) + continue; + + this_op = &op[res[i].file][res[i].op_num]; + /* Is what we have good enough for this op? */ + if (satisfies(data, needs(this_op))) { + move_to_front(res, i); + if (sort_deps(filename, op, res+1, num-1, + gives(this_op, data), num_files)) + return true; + restore_to_pos(res, i); + } + done[res[i].file] = true; + files_done++; + } + + /* No combination worked. */ + return false; +} + +static void check_dep_sorting(struct key_user user[], unsigned num_users, + unsigned num_files) +{ +#if DEBUG_DEPS + unsigned int i; + unsigned minima[num_files]; + + memset(minima, 0, sizeof(minima)); + for (i = 0; i < num_users; i++) { + assert(minima[user[i].file] < user[i].op_num); + minima[user[i].file] = user[i].op_num; + } +#endif +} + +/* All these ops have the same serial number. Which comes first? + * + * This can happen both because read ops or failed write ops don't + * change serial number, and also due to race since we access the + * number unlocked (the race can cause less detectable ordering problems, + * in which case we'll deadlock and report: fix manually in that case). + */ +static void figure_deps(char *filename[], struct op *op[], + struct key_user user[], unsigned num_users, + unsigned num_files) +{ + /* We assume database starts empty. */ + const struct TDB_DATA *data = &tdb_null; + + if (!sort_deps(filename, op, user, num_users, data, num_files)) + fail(filename[user[0].file], user[0].op_num+1, + "Could not resolve inter-dependencies"); + + check_dep_sorting(user, num_users, num_files); +} + +static void sort_ops(struct keyinfo hash[], char *filename[], struct op *op[], + unsigned int num) +{ + unsigned int h; + + /* Gcc nexted function extension. How cool is this? */ + int compare_serial(const void *_a, const void *_b) + { + const struct key_user *a = _a, *b = _b; + + /* First, maintain order within any trace file. */ + if (a->file == b->file) + return a->op_num - b->op_num; + + /* Otherwise, arrange by serial order. */ + return op[a->file][a->op_num].serial + - op[b->file][b->op_num].serial; + } + + /* Now sort into serial order. */ + for (h = 0; h < total_keys * 2; h++) { + struct key_user *user = hash[h].user; + + qsort(user, hash[h].num_users, sizeof(user[0]), compare_serial); + figure_deps(filename, op, user, hash[h].num_users, num); + } +} + +static int destroy_depend(struct depend *dep) +{ + list_del(&dep->pre_list); + list_del(&dep->post_list); + return 0; +} + static void add_dependency(void *ctx, struct op *op[], char *filename[], @@ -1114,12 +1162,14 @@ static void add_dependency(void *ctx, unsigned int satisfies_file, unsigned int satisfies_opnum) { - struct depend *post, *pre; + struct depend *dep; unsigned int needs_start, sat_start; /* We don't depend on ourselves. */ - if (needs_file == satisfies_file) + if (needs_file == satisfies_file) { + assert(satisfies_opnum < needs_opnum); return; + } #if DEBUG_DEPS printf("%s:%u: depends on %s:%u\n", @@ -1159,15 +1209,14 @@ static void add_dependency(void *ctx, } } - post = talloc(ctx, struct depend); - post->file = needs_file; - post->op = needs_opnum; - list_add(&op[satisfies_file][satisfies_opnum].post, &post->list); - - pre = talloc(ctx, struct depend); - pre->file = satisfies_file; - pre->op = satisfies_opnum; - list_add(&op[needs_file][needs_opnum].pre, &pre->list); + dep = talloc(ctx, struct depend); + dep->needs_file = needs_file; + dep->needs_opnum = needs_opnum; + dep->satisfies_file = satisfies_file; + dep->satisfies_opnum = satisfies_opnum; + list_add(&op[satisfies_file][satisfies_opnum].post, &dep->post_list); + list_add(&op[needs_file][needs_opnum].pre, &dep->pre_list); + talloc_set_destructor(dep, destroy_depend); } #if TRAVERSALS_TAKE_TRANSACTION_LOCK @@ -1227,31 +1276,143 @@ static void make_traverse_depends(char *filename[], } #endif /* TRAVERSALS_TAKE_TRANSACTION_LOCK */ +static bool changes_db(const struct op *op) +{ + return gives(op, NULL) != NULL; +} + +static void depend_on_previous(struct op *op[], + char *filename[], + unsigned int num, + struct key_user user[], + unsigned int i, + int prev) +{ + bool deps[num]; + int j; + + if (i == 0) + return; + + if (prev == i - 1) { + /* Just depend on previous. */ + add_dependency(NULL, op, filename, + user[i].file, user[i].op_num, + user[prev].file, user[prev].op_num); + return; + } + + /* We have to wait for the readers. Find last one in *each* file. */ + memset(deps, 0, sizeof(deps)); + deps[user[i].file] = true; + for (j = i - 1; j > prev; j--) { + if (!deps[user[j].file]) { + add_dependency(NULL, op, filename, + user[i].file, user[i].op_num, + user[j].file, user[j].op_num); + deps[user[j].file] = true; + } + } +} + +/* This is simple, but not complete. We don't take into account + * indirect dependencies. */ +static void optimize_dependencies(struct op *op[], unsigned int num_ops[], + unsigned int num) +{ + unsigned int i, j; + + /* There can only be one real dependency on each file */ + for (i = 0; i < num; i++) { + for (j = 1; j < num_ops[i]; j++) { + struct depend *dep, *next; + struct depend *prev[num]; + + memset(prev, 0, sizeof(prev)); + + list_for_each_safe(&op[i][j].pre, dep, next, pre_list) { + if (!prev[dep->satisfies_file]) { + prev[dep->satisfies_file] = dep; + continue; + } + if (prev[dep->satisfies_file]->satisfies_opnum + < dep->satisfies_opnum) { + talloc_free(prev[dep->satisfies_file]); + prev[dep->satisfies_file] = dep; + } else + talloc_free(dep); + } + } + } + + for (i = 0; i < num; i++) { + int deps[num]; + + for (j = 0; j < num; j++) + deps[j] = -1; + + for (j = 1; j < num_ops[i]; j++) { + struct depend *dep, *next; + + list_for_each_safe(&op[i][j].pre, dep, next, pre_list) { + if (deps[dep->satisfies_file] + >= (int)dep->satisfies_opnum) + talloc_free(dep); + else + deps[dep->satisfies_file] + = dep->satisfies_opnum; + } + } + } +} + static void derive_dependencies(char *filename[], struct op *op[], unsigned int num_ops[], unsigned int num) { struct keyinfo *hash; - unsigned int i, j; + unsigned int h, i; /* Create hash table for faster key lookup. */ hash = hash_ops(op, num_ops, num); - /* We make the naive assumption that two ops on the same key - * have to be ordered; it's overkill. */ - for (i = 0; i < total_keys * 2; i++) { - for (j = 1; j < hash[i].num_users; j++) { - add_dependency(hash, op, filename, - hash[i].user[j].file, - hash[i].user[j].op_num, - hash[i].user[j-1].file, - hash[i].user[j-1].op_num); + /* Sort them by serial number. */ + sort_ops(hash, filename, op, num); + + /* Create dependencies back to the last change, rather than + * creating false dependencies by naively making each one + * depend on the previous. This has two purposes: it makes + * later optimization simpler, and it also avoids deadlock with + * same sequence number ops inside traversals (if one + * traversal doesn't write anything, two ops can have the same + * sequence number yet we can create a traversal dependency + * the other way). */ + for (h = 0; h < total_keys * 2; h++) { + int prev = -1; + + if (hash[h].num_users < 2) + continue; + + for (i = 0; i < hash[h].num_users; i++) { + if (changes_db(&op[hash[h].user[i].file] + [hash[h].user[i].op_num])) { + depend_on_previous(op, filename, num, + hash[h].user, i, prev); + prev = i; + } else if (prev >= 0) + add_dependency(hash, op, filename, + hash[h].user[i].file, + hash[h].user[i].op_num, + hash[h].user[prev].file, + hash[h].user[prev].op_num); } } #if TRAVERSALS_TAKE_TRANSACTION_LOCK make_traverse_depends(filename, op, num_ops, num); #endif + + optimize_dependencies(op, num_ops, num); } int main(int argc, char *argv[])