1 #include <ccan/tdb/tdb.h>
2 #include <ccan/grab_file/grab_file.h>
3 #include <ccan/hash/hash.h>
4 #include <ccan/talloc/talloc.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/str/str.h>
7 #include <ccan/list/list.h>
12 #include <sys/types.h>
20 #define STRINGIFY2(x) #x
21 #define STRINGIFY(x) STRINGIFY2(x)
23 /* Avoid mod by zero */
24 static unsigned int total_keys = 1;
26 /* All the wipe_all ops. */
27 static struct op_desc *wipe_alls = NULL;
28 static unsigned int num_wipe_alls = 0;
30 /* #define DEBUG_DEPS 1 */
32 /* Traversals block transactions in the current implementation. */
33 #define TRAVERSALS_TAKE_TRANSACTION_LOCK 1
38 static struct pipe *pipes;
39 static int backoff_fd = -1;
41 static void __attribute__((noreturn)) fail(const char *filename,
48 fprintf(stderr, "%s:%u: FAIL: ", filename, line);
49 vfprintf(stderr, fmt, ap);
50 fprintf(stderr, "\n");
56 #define try(expr, expect) \
59 if (ret != (expect)) \
60 fail(filename[file], i+1, \
61 STRINGIFY(expr) "= %i", ret); \
64 /* Try or imitate results. */
65 #define unreliable(expr, expect, force, undo) \
68 if (ret != expect) { \
69 fprintf(stderr, "%s:%u: %s gave %i not %i", \
70 filename[file], i+1, STRINGIFY(expr), \
79 static bool key_eq(TDB_DATA a, TDB_DATA b)
81 if (a.dsize != b.dsize)
83 return memcmp(a.dptr, b.dptr, a.dsize) == 0;
86 /* This is based on the hash algorithm from gdbm */
87 static unsigned int hash_key(TDB_DATA *key)
89 uint32_t value; /* Used to compute the hash value. */
90 uint32_t i; /* Used to cycle through random values. */
92 /* Set the initial value from the key size. */
93 for (value = 0x238F13AF ^ key->dsize, i=0; i < key->dsize; i++)
94 value = (value + (key->dptr[i] << (i*5 % 24)));
96 return (1103515243 * value + 12345);
102 OP_TDB_LOCKALL_UNMARK,
103 OP_TDB_LOCKALL_NONBLOCK,
106 OP_TDB_LOCKALL_READ_NONBLOCK,
107 OP_TDB_UNLOCKALL_READ,
109 OP_TDB_CHAINLOCK_NONBLOCK,
110 OP_TDB_CHAINLOCK_MARK,
111 OP_TDB_CHAINLOCK_UNMARK,
113 OP_TDB_CHAINLOCK_READ,
114 OP_TDB_CHAINUNLOCK_READ,
121 OP_TDB_TRANSACTION_START,
122 OP_TDB_TRANSACTION_CANCEL,
123 OP_TDB_TRANSACTION_PREPARE_COMMIT,
124 OP_TDB_TRANSACTION_COMMIT,
125 OP_TDB_TRAVERSE_READ_START,
126 OP_TDB_TRAVERSE_START,
129 OP_TDB_TRAVERSE_END_EARLY,
144 /* Who is waiting for us? */
145 struct list_head post;
146 /* What are we waiting for? */
147 struct list_head pre;
149 /* If I'm part of a group (traverse/transaction) where is
150 * start? (Otherwise, 0) */
151 unsigned int group_start;
154 int flag; /* open and store */
155 struct { /* append */
159 /* transaction/traverse start/chainlock */
160 unsigned int group_len;
169 static unsigned char hex_char(const char *filename, unsigned int line, char c)
172 if (c >= 'A' && c <= 'F')
174 if (c >= '0' && c <= '9')
176 fail(filename, line, "invalid hex character '%c'", c);
179 /* TDB data is <size>:<%02x>* */
180 static TDB_DATA make_tdb_data(const void *ctx,
181 const char *filename, unsigned int line,
188 if (streq(word, "NULL"))
191 data.dsize = atoi(word);
192 data.dptr = talloc_array(ctx, unsigned char, data.dsize);
193 p = strchr(word, ':');
195 fail(filename, line, "invalid tdb data '%s'", word);
197 for (i = 0; i < data.dsize; i++)
198 data.dptr[i] = hex_char(filename, line, p[i*2])*16
199 + hex_char(filename, line, p[i*2+1]);
204 static void add_op(const char *filename, struct op **op, unsigned int i,
205 unsigned int seqnum, enum op_type type)
208 *op = talloc_realloc(NULL, *op, struct op, i+1);
211 new->seqnum = seqnum;
213 new->group_start = 0;
216 static void op_add_nothing(char *filename[], struct op op[],
217 unsigned file, unsigned op_num, char *words[])
220 fail(filename[file], op_num+1, "Expected no arguments");
221 op[op_num].key = tdb_null;
224 static void op_add_key(char *filename[], struct op op[],
225 unsigned file, unsigned op_num, char *words[])
227 if (words[2] == NULL || words[3])
228 fail(filename[file], op_num+1, "Expected just a key");
230 op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
234 static void op_add_key_ret(char *filename[], struct op op[],
235 unsigned file, unsigned op_num, char *words[])
237 if (!words[2] || !words[3] || !words[4] || words[5]
238 || !streq(words[3], "="))
239 fail(filename[file], op_num+1, "Expected <key> = <ret>");
240 op[op_num].ret = atoi(words[4]);
241 op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
242 /* May only be a unique key if it fails */
243 if (op[op_num].ret != 0)
247 static void op_add_key_data(char *filename[], struct op op[],
248 unsigned file, unsigned op_num, char *words[])
250 if (!words[2] || !words[3] || !words[4] || words[5]
251 || !streq(words[3], "="))
252 fail(filename[file], op_num+1, "Expected <key> = <data>");
253 op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
254 op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[4]);
255 /* Likely only be a unique key if it fails */
256 if (!op[op_num].data.dptr)
258 else if (random() % 2)
262 /* We don't record the keys or data for a traverse, as we don't use them. */
263 static void op_add_traverse(char *filename[], struct op op[],
264 unsigned file, unsigned op_num, char *words[])
266 if (!words[2] || !words[3] || !words[4] || words[5]
267 || !streq(words[3], "="))
268 fail(filename[file], op_num+1, "Expected <key> = <data>");
269 op[op_num].key = tdb_null;
272 /* Full traverse info is useful for debugging, but changing it to
273 * "traversefn" without the data makes the traces *much* smaller! */
274 static void op_add_traversefn(char *filename[], struct op op[],
275 unsigned file, unsigned op_num, char *words[])
278 fail(filename[file], op_num+1, "Expected no values");
279 op[op_num].key = tdb_null;
282 /* <seqnum> tdb_store <rec> <rec> <flag> = <ret> */
283 static void op_add_store(char *filename[], struct op op[],
284 unsigned file, unsigned op_num, char *words[])
286 if (!words[2] || !words[3] || !words[4] || !words[5] || !words[6]
287 || words[7] || !streq(words[5], "="))
288 fail(filename[file], op_num+1, "Expect <key> <data> <flag> = <ret>");
290 op[op_num].flag = strtoul(words[4], NULL, 0);
291 op[op_num].ret = atoi(words[6]);
292 op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
293 op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[3]);
297 /* <seqnum> tdb_append <rec> <rec> = <rec> */
298 static void op_add_append(char *filename[], struct op op[],
299 unsigned file, unsigned op_num, char *words[])
301 if (!words[2] || !words[3] || !words[4] || !words[5] || words[6]
302 || !streq(words[4], "="))
303 fail(filename[file], op_num+1, "Expect <key> <data> = <rec>");
305 op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
306 op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[3]);
308 op[op_num].append.post
309 = make_tdb_data(op, filename[file], op_num+1, words[5]);
311 /* By subtraction, figure out what previous data was. */
312 op[op_num].append.pre.dptr = op[op_num].append.post.dptr;
313 op[op_num].append.pre.dsize
314 = op[op_num].append.post.dsize - op[op_num].data.dsize;
318 /* <seqnum> tdb_get_seqnum = <ret> */
319 static void op_add_seqnum(char *filename[], struct op op[],
320 unsigned file, unsigned op_num, char *words[])
322 if (!words[2] || !words[3] || words[4] || !streq(words[2], "="))
323 fail(filename[file], op_num+1, "Expect = <ret>");
325 op[op_num].key = tdb_null;
326 op[op_num].ret = atoi(words[3]);
329 static void op_add_traverse_start(char *filename[], struct op op[],
330 unsigned file, unsigned op_num, char *words[])
333 fail(filename[file], op_num+1, "Expect no arguments");
335 op[op_num].key = tdb_null;
336 op[op_num].group_len = 0;
339 static void op_add_transaction(char *filename[], struct op op[],
340 unsigned file, unsigned op_num, char *words[])
343 fail(filename[file], op_num+1, "Expect no arguments");
345 op[op_num].key = tdb_null;
346 op[op_num].group_len = 0;
349 static void op_add_chainlock(char *filename[], struct op op[],
350 unsigned file, unsigned op_num, char *words[])
352 if (words[2] == NULL || words[3])
353 fail(filename[file], op_num+1, "Expected just a key");
355 /* A chainlock key isn't a key in the normal sense; it doesn't
356 * have to be in the db at all. Also, we don't want to hash this op. */
357 op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[2]);
358 op[op_num].key = tdb_null;
359 op[op_num].group_len = 0;
362 static void op_add_chainlock_ret(char *filename[], struct op op[],
363 unsigned file, unsigned op_num, char *words[])
365 if (!words[2] || !words[3] || !words[4] || words[5]
366 || !streq(words[3], "="))
367 fail(filename[file], op_num+1, "Expected <key> = <ret>");
368 op[op_num].ret = atoi(words[4]);
369 op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[2]);
370 op[op_num].key = tdb_null;
371 op[op_num].group_len = 0;
375 static void op_add_wipe_all(char *filename[], struct op op[],
376 unsigned file, unsigned op_num, char *words[])
379 fail(filename[file], op_num+1, "Expected no arguments");
380 op[op_num].key = tdb_null;
381 wipe_alls = talloc_realloc(NULL, wipe_alls, struct op_desc,
383 wipe_alls[num_wipe_alls].file = file;
384 wipe_alls[num_wipe_alls].op_num = op_num;
388 static int op_find_start(struct op op[], unsigned int op_num, enum op_type type)
392 for (i = op_num-1; i > 0; i--) {
393 if (op[i].type == type && !op[i].group_len)
399 static void op_analyze_transaction(char *filename[], struct op op[],
400 unsigned file, unsigned op_num,
403 unsigned int start, i;
405 op[op_num].key = tdb_null;
408 fail(filename[file], op_num+1, "Expect no arguments");
410 start = op_find_start(op, op_num, OP_TDB_TRANSACTION_START);
412 fail(filename[file], op_num+1, "no transaction start found");
414 op[start].group_len = op_num - start;
416 /* This rolls in nested transactions. I think that's right. */
417 for (i = start; i <= op_num; i++)
418 op[i].group_start = start;
421 /* We treat chainlocks a lot like transactions, even though that's overkill */
422 static void op_analyze_chainlock(char *filename[], struct op op[],
423 unsigned file, unsigned op_num, char *words[])
425 unsigned int i, start;
427 if (words[2] == NULL || words[3])
428 fail(filename[file], op_num+1, "Expected just a key");
430 op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[2]);
431 op[op_num].key = tdb_null;
434 start = op_find_start(op, op_num, OP_TDB_CHAINLOCK);
436 start = op_find_start(op, op_num, OP_TDB_CHAINLOCK_READ);
438 fail(filename[file], op_num+1, "no initial chainlock found");
440 /* FIXME: We'd have to do something clever to make this work
442 if (!key_eq(op[start].data, op[op_num].data))
443 fail(filename[file], op_num+1, "nested chainlock calls?");
445 op[start].group_len = op_num - start;
446 for (i = start; i <= op_num; i++)
447 op[i].group_start = start;
450 static void op_analyze_traverse(char *filename[], struct op op[],
451 unsigned file, unsigned op_num, char *words[])
455 op[op_num].key = tdb_null;
457 /* = %u means traverse function terminated. */
459 if (!streq(words[2], "=") || !words[3] || words[4])
460 fail(filename[file], op_num+1, "expect = <num>");
461 op[op_num].ret = atoi(words[3]);
465 start = op_find_start(op, op_num, OP_TDB_TRAVERSE_START);
467 start = op_find_start(op, op_num, OP_TDB_TRAVERSE_READ_START);
469 fail(filename[file], op_num+1, "no traversal start found");
471 op[start].group_len = op_num - start;
473 /* Don't roll in nested traverse/chainlock */
474 for (i = start; i <= op_num; i++)
475 if (!op[i].group_start)
476 op[i].group_start = start;
479 /* Keep -Wmissing-declarations happy: */
480 const struct op_table *
481 find_keyword (register const char *str, register unsigned int len);
483 #include "keywords.c"
486 /* We can have more than one */
487 struct list_node pre_list;
488 struct list_node post_list;
489 struct op_desc needs;
490 struct op_desc prereq;
493 static void check_deps(const char *filename, struct op op[], unsigned int num)
498 for (i = 1; i < num; i++)
499 if (!list_empty(&op[i].pre))
500 fail(filename, i+1, "Still has dependencies");
504 static void dump_pre(char *filename[], struct op *op[],
505 unsigned int file, unsigned int i)
509 printf("%s:%u (%u) still waiting for:\n", filename[file], i+1,
511 list_for_each(&op[file][i].pre, dep, pre_list)
512 printf(" %s:%u (%u)\n",
513 filename[dep->prereq.file], dep->prereq.op_num+1,
514 op[dep->prereq.file][dep->prereq.op_num].seqnum);
515 check_deps(filename[file], op[file], i);
518 /* We simply read/write pointers, since we all are children. */
519 static bool do_pre(struct tdb_context *tdb,
520 char *filename[], struct op *op[],
521 unsigned int file, int pre_fd, unsigned int i,
524 while (!list_empty(&op[file][i].pre)) {
528 printf("%s:%u:waiting for pre\n", filename[file], i+1);
535 while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) {
536 if (errno == EINTR) {
538 struct op_desc desc = { file,i };
539 warnx("%s:%u:avoiding deadlock",
540 filename[file], i+1);
541 if (write(backoff_fd, &desc,
542 sizeof(desc)) != sizeof(desc))
543 err(1, "writing backoff_fd");
546 dump_pre(filename, op, file, i);
549 errx(1, "Reading from pipe");
554 printf("%s:%u:got pre %u from %s:%u\n", filename[file], i+1,
555 dep->needs.op_num+1, filename[dep->prereq.file],
556 dep->prereq.op_num+1);
559 /* This could be any op, not just this one. */
565 static void do_post(char *filename[], struct op *op[],
566 unsigned int file, unsigned int i)
570 list_for_each(&op[file][i].post, dep, post_list) {
572 printf("%s:%u:sending to file %s:%u\n", filename[file], i+1,
573 filename[dep->needs.file], dep->needs.op_num+1);
575 if (write(pipes[dep->needs.file].fd[1], &dep, sizeof(dep))
577 err(1, "%s:%u failed to tell file %s",
578 filename[file], i+1, filename[dep->needs.file]);
582 static int get_len(TDB_DATA key, TDB_DATA data, void *private_data)
587 static unsigned run_ops(struct tdb_context *tdb,
592 unsigned int start, unsigned int stop,
595 struct traverse_info {
604 /* More complex. Just do whatever's they did at the n'th entry. */
605 static int nontrivial_traverse(struct tdb_context *tdb,
606 TDB_DATA key, TDB_DATA data,
609 struct traverse_info *tinfo = _tinfo;
610 unsigned int trav_len = tinfo->op[tinfo->file][tinfo->start].group_len;
611 bool avoid_deadlock = false;
613 if (tinfo->i == tinfo->start + trav_len) {
614 /* This can happen if traverse expects to be empty. */
617 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
618 "traverse did not terminate");
621 if (tinfo->op[tinfo->file][tinfo->i].type != OP_TDB_TRAVERSE)
622 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
623 "%s:%u:traverse terminated early");
625 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
626 avoid_deadlock = true;
629 /* Run any normal ops. */
630 tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->op,
631 tinfo->file, tinfo->i+1, tinfo->start + trav_len,
634 /* We backed off, or we hit OP_TDB_TRAVERSE_END/EARLY. */
635 if (tinfo->op[tinfo->file][tinfo->i].type != OP_TDB_TRAVERSE)
641 static unsigned op_traverse(struct tdb_context *tdb,
645 int (*traversefn)(struct tdb_context *,
646 tdb_traverse_func, void *),
650 struct traverse_info tinfo = { op, filename, file, pre_fd,
653 traversefn(tdb, nontrivial_traverse, &tinfo);
655 /* Traversing in wrong order can have strange effects: eg. if
656 * original traverse went A (delete A), B, we might do B
657 * (delete A). So if we have ops left over, we do it now. */
658 while (tinfo.i != start + op[file][start].group_len) {
659 if (op[file][tinfo.i].type == OP_TDB_TRAVERSE
660 || op[file][tinfo.i].type == OP_TDB_TRAVERSE_END_EARLY)
663 tinfo.i = run_ops(tdb, pre_fd, filename, op, file,
665 start + op[file][start].group_len,
672 static void break_out(int sig)
676 static __attribute__((noinline))
677 unsigned run_ops(struct tdb_context *tdb,
682 unsigned int start, unsigned int stop,
688 sa.sa_handler = break_out;
691 sigaction(SIGALRM, &sa, NULL);
692 for (i = start; i < stop; i++) {
693 if (!do_pre(tdb, filename, op, file, pre_fd, i, backoff))
696 switch (op[file][i].type) {
698 try(tdb_lockall(tdb), op[file][i].ret);
700 case OP_TDB_LOCKALL_MARK:
701 try(tdb_lockall_mark(tdb), op[file][i].ret);
703 case OP_TDB_LOCKALL_UNMARK:
704 try(tdb_lockall_unmark(tdb), op[file][i].ret);
706 case OP_TDB_LOCKALL_NONBLOCK:
707 unreliable(tdb_lockall_nonblock(tdb), op[file][i].ret,
708 tdb_lockall(tdb), tdb_unlockall(tdb));
710 case OP_TDB_UNLOCKALL:
711 try(tdb_unlockall(tdb), op[file][i].ret);
713 case OP_TDB_LOCKALL_READ:
714 try(tdb_lockall_read(tdb), op[file][i].ret);
716 case OP_TDB_LOCKALL_READ_NONBLOCK:
717 unreliable(tdb_lockall_read_nonblock(tdb),
719 tdb_lockall_read(tdb),
720 tdb_unlockall_read(tdb));
722 case OP_TDB_UNLOCKALL_READ:
723 try(tdb_unlockall_read(tdb), op[file][i].ret);
725 case OP_TDB_CHAINLOCK:
726 try(tdb_chainlock(tdb, op[file][i].key),
729 case OP_TDB_CHAINLOCK_NONBLOCK:
730 unreliable(tdb_chainlock_nonblock(tdb, op[file][i].key),
732 tdb_chainlock(tdb, op[file][i].key),
733 tdb_chainunlock(tdb, op[file][i].key));
735 case OP_TDB_CHAINLOCK_MARK:
736 try(tdb_chainlock_mark(tdb, op[file][i].key),
739 case OP_TDB_CHAINLOCK_UNMARK:
740 try(tdb_chainlock_unmark(tdb, op[file][i].key),
743 case OP_TDB_CHAINUNLOCK:
744 try(tdb_chainunlock(tdb, op[file][i].key),
747 case OP_TDB_CHAINLOCK_READ:
748 try(tdb_chainlock_read(tdb, op[file][i].key),
751 case OP_TDB_CHAINUNLOCK_READ:
752 try(tdb_chainunlock_read(tdb, op[file][i].key),
755 case OP_TDB_PARSE_RECORD:
756 try(tdb_parse_record(tdb, op[file][i].key, get_len,
761 try(tdb_exists(tdb, op[file][i].key), op[file][i].ret);
764 try(tdb_store(tdb, op[file][i].key, op[file][i].data,
769 try(tdb_append(tdb, op[file][i].key, op[file][i].data),
772 case OP_TDB_GET_SEQNUM:
773 try(tdb_get_seqnum(tdb), op[file][i].ret);
775 case OP_TDB_WIPE_ALL:
776 try(tdb_wipe_all(tdb), op[file][i].ret);
778 case OP_TDB_TRANSACTION_START:
779 try(tdb_transaction_start(tdb), op[file][i].ret);
781 case OP_TDB_TRANSACTION_CANCEL:
782 try(tdb_transaction_cancel(tdb), op[file][i].ret);
784 case OP_TDB_TRANSACTION_PREPARE_COMMIT:
785 try(tdb_transaction_prepare_commit(tdb),
788 case OP_TDB_TRANSACTION_COMMIT:
789 try(tdb_transaction_commit(tdb), op[file][i].ret);
791 case OP_TDB_TRAVERSE_READ_START:
792 i = op_traverse(tdb, pre_fd, filename, file,
793 tdb_traverse_read, op, i);
795 case OP_TDB_TRAVERSE_START:
796 i = op_traverse(tdb, pre_fd, filename, file,
797 tdb_traverse, op, i);
799 case OP_TDB_TRAVERSE:
800 case OP_TDB_TRAVERSE_END_EARLY:
801 /* Terminate: we're in a traverse, and we've
804 case OP_TDB_TRAVERSE_END:
805 fail(filename[file], i+1, "unexpected end traverse");
806 /* FIXME: These must be treated like traverse. */
807 case OP_TDB_FIRSTKEY:
808 if (!key_eq(tdb_firstkey(tdb), op[file][i].data))
809 fail(filename[file], i+1, "bad firstkey");
812 if (!key_eq(tdb_nextkey(tdb, op[file][i].key),
814 fail(filename[file], i+1, "bad nextkey");
817 TDB_DATA f = tdb_fetch(tdb, op[file][i].key);
818 if (!key_eq(f, op[file][i].data))
819 fail(filename[file], i+1, "bad fetch %u",
824 try(tdb_delete(tdb, op[file][i].key), op[file][i].ret);
827 /* We do nothing here: the transaction and traverse are
828 * traced. It's in the trace to mark it, since it
829 * may become unnecessary in future. */
832 do_post(filename, op, file, i);
837 /* tdbtorture, in particular, can do a tdb_close with a transaction in
839 static struct op *maybe_cancel_transaction(char *filename[], unsigned int file,
840 struct op *op, unsigned int *num)
842 unsigned int start = op_find_start(op, *num, OP_TDB_TRANSACTION_START);
845 char *words[] = { "<unknown>", "tdb_close", NULL };
846 add_op(filename[file], &op, *num, op[start].seqnum,
847 OP_TDB_TRANSACTION_CANCEL);
848 op_analyze_transaction(filename, op, file, *num, words);
854 static struct op *load_tracefile(char *filename[],
857 unsigned int *hashsize,
858 unsigned int *tdb_flags,
859 unsigned int *open_flags)
862 struct op *op = talloc_array(NULL, struct op, 1);
867 contents = grab_file(NULL, filename[file], NULL);
869 err(1, "Reading %s", filename[file]);
871 lines = strsplit(contents, contents, "\n", NULL);
873 errx(1, "%s is empty", filename[file]);
875 words = strsplit(lines, lines[0], " ", NULL);
876 if (!streq(words[1], "tdb_open"))
877 fail(filename[file], 1, "does not start with tdb_open");
879 *hashsize = atoi(words[2]);
880 *tdb_flags = strtoul(words[3], NULL, 0);
881 *open_flags = strtoul(words[4], NULL, 0);
883 for (i = 1; lines[i]; i++) {
884 const struct op_table *opt;
886 words = strsplit(lines, lines[i], " ", NULL);
887 if (!words[0] || !words[1])
888 fail(filename[file], i+1,
889 "Expected seqnum number and op");
891 opt = find_keyword(words[1], strlen(words[1]));
893 if (streq(words[1], "tdb_close")) {
895 fail(filename[file], i+2,
896 "lines after tdb_close");
899 return maybe_cancel_transaction(filename, file,
902 fail(filename[file], i+1,
903 "Unknown operation '%s'", words[1]);
906 add_op(filename[file], &op, i, atoi(words[0]), opt->type);
907 opt->enhance_op(filename, op, file, i, words);
910 fprintf(stderr, "%s:%u:last operation is not tdb_close: incomplete?",
914 return maybe_cancel_transaction(filename, file, op, num);
917 /* We remember all the keys we've ever seen, and who has them. */
920 unsigned int num_users;
921 struct op_desc *user;
924 static bool starts_transaction(const struct op *op)
926 return op->type == OP_TDB_TRANSACTION_START;
929 static bool in_transaction(const struct op op[], unsigned int i)
931 return op[i].group_start && starts_transaction(&op[op[i].group_start]);
934 static bool successful_transaction(const struct op *op)
936 return starts_transaction(op)
937 && op[op->group_len].type == OP_TDB_TRANSACTION_COMMIT;
940 static bool starts_traverse(const struct op *op)
942 return op->type == OP_TDB_TRAVERSE_START
943 || op->type == OP_TDB_TRAVERSE_READ_START;
946 static bool in_traverse(const struct op op[], unsigned int i)
948 return op[i].group_start && starts_traverse(&op[op[i].group_start]);
951 static bool starts_chainlock(const struct op *op)
953 return op->type == OP_TDB_CHAINLOCK_READ
954 || op->type == OP_TDB_CHAINLOCK;
957 static bool in_chainlock(const struct op op[], unsigned int i)
959 return op[i].group_start && starts_chainlock(&op[op[i].group_start]);
962 static const TDB_DATA must_not_exist;
963 static const TDB_DATA must_exist;
964 static const TDB_DATA not_exists_or_empty;
966 /* NULL means doesn't care if it exists or not, &must_exist means
967 * it must exist but we don't care what, &must_not_exist means it must
968 * not exist, otherwise the data it needs. */
969 static const TDB_DATA *needs(const TDB_DATA *key, const struct op *op)
971 /* Look through for an op in this transaction which needs this key. */
972 if (starts_transaction(op) || starts_chainlock(op)) {
974 const TDB_DATA *need = NULL;
976 for (i = 1; i < op->group_len; i++) {
977 if (key_eq(op[i].key, *key)
978 || op[i].type == OP_TDB_WIPE_ALL) {
979 need = needs(key, &op[i]);
980 /* tdb_exists() is special: there might be
981 * something in the transaction with more
982 * specific requirements. Other ops don't have
983 * specific requirements (eg. store or delete),
984 * but they change the value so we can't get
985 * more information from future ops. */
986 if (op[i].type != OP_TDB_EXISTS)
995 /* FIXME: Pull forward deps, since we can deadlock */
996 case OP_TDB_CHAINLOCK:
997 case OP_TDB_CHAINLOCK_NONBLOCK:
998 case OP_TDB_CHAINLOCK_MARK:
999 case OP_TDB_CHAINLOCK_UNMARK:
1000 case OP_TDB_CHAINUNLOCK:
1001 case OP_TDB_CHAINLOCK_READ:
1002 case OP_TDB_CHAINUNLOCK_READ:
1006 if (op->append.pre.dsize == 0)
1007 return ¬_exists_or_empty;
1008 return &op->append.pre;
1011 if (op->flag == TDB_INSERT) {
1015 return &must_not_exist;
1016 } else if (op->flag == TDB_MODIFY) {
1018 return &must_not_exist;
1022 /* No flags? Don't care */
1029 return &must_not_exist;
1031 case OP_TDB_PARSE_RECORD:
1033 return &must_not_exist;
1036 /* FIXME: handle these. */
1037 case OP_TDB_WIPE_ALL:
1038 case OP_TDB_FIRSTKEY:
1039 case OP_TDB_NEXTKEY:
1040 case OP_TDB_GET_SEQNUM:
1041 case OP_TDB_TRAVERSE:
1042 case OP_TDB_TRANSACTION_COMMIT:
1043 case OP_TDB_TRANSACTION_CANCEL:
1044 case OP_TDB_TRANSACTION_START:
1049 return &must_not_exist;
1054 return &must_not_exist;
1058 errx(1, "Unexpected op type %i", op->type);
1063 /* What's the data after this op? pre if nothing changed. */
1064 static const TDB_DATA *gives(const TDB_DATA *key, const TDB_DATA *pre,
1065 const struct op *op)
1067 if (starts_transaction(op) || starts_chainlock(op)) {
1070 /* Cancelled transactions don't change anything. */
1071 if (op[op->group_len].type == OP_TDB_TRANSACTION_CANCEL)
1073 assert(op[op->group_len].type == OP_TDB_TRANSACTION_COMMIT
1074 || op[op->group_len].type == OP_TDB_CHAINUNLOCK_READ
1075 || op[op->group_len].type == OP_TDB_CHAINUNLOCK);
1077 for (i = 1; i < op->group_len; i++) {
1078 /* This skips nested transactions, too */
1079 if (key_eq(op[i].key, *key)
1080 || op[i].type == OP_TDB_WIPE_ALL)
1081 pre = gives(key, pre, &op[i]);
1086 /* Failed ops don't change state of db. */
1090 if (op->type == OP_TDB_DELETE || op->type == OP_TDB_WIPE_ALL)
1093 if (op->type == OP_TDB_APPEND)
1094 return &op->append.post;
1096 if (op->type == OP_TDB_STORE)
1102 static void add_hash_user(struct keyinfo *hash,
1106 unsigned int op_num)
1108 hash[h].user = talloc_realloc(hash, hash[h].user,
1109 struct op_desc, hash[h].num_users+1);
1111 /* If it's in a transaction, it's the transaction which
1112 * matters from an analysis POV. */
1113 if (in_transaction(op[file], op_num)
1114 || in_chainlock(op[file], op_num)) {
1117 op_num = op[file][op_num].group_start;
1119 /* Don't include twice. */
1120 for (i = 0; i < hash[h].num_users; i++) {
1121 if (hash[h].user[i].file == file
1122 && hash[h].user[i].op_num == op_num)
1126 hash[h].user[hash[h].num_users].op_num = op_num;
1127 hash[h].user[hash[h].num_users].file = file;
1128 hash[h].num_users++;
1131 static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[],
1134 unsigned int i, j, h;
1135 struct keyinfo *hash;
1137 hash = talloc_zero_array(op[0], struct keyinfo, total_keys*2);
1138 for (i = 0; i < num; i++) {
1139 for (j = 1; j < num_ops[i]; j++) {
1140 /* We can't do this on allocation, due to realloc. */
1141 list_head_init(&op[i][j].post);
1142 list_head_init(&op[i][j].pre);
1144 if (!op[i][j].key.dptr)
1147 h = hash_key(&op[i][j].key) % (total_keys * 2);
1148 while (!key_eq(hash[h].key, op[i][j].key)) {
1149 if (!hash[h].key.dptr) {
1150 hash[h].key = op[i][j].key;
1153 h = (h + 1) % (total_keys * 2);
1155 /* Might as well save some memory if we can. */
1156 if (op[i][j].key.dptr != hash[h].key.dptr) {
1157 talloc_free(op[i][j].key.dptr);
1158 op[i][j].key.dptr = hash[h].key.dptr;
1161 add_hash_user(hash, h, op, i, j);
1165 /* Any wipe all entries need adding to all hash entries. */
1166 for (h = 0; h < total_keys*2; h++) {
1167 if (!hash[h].num_users)
1170 for (i = 0; i < num_wipe_alls; i++)
1171 add_hash_user(hash, h, op,
1172 wipe_alls[i].file, wipe_alls[i].op_num);
1178 static bool satisfies(const TDB_DATA *key, const TDB_DATA *data,
1179 const struct op *op)
1181 const TDB_DATA *need = needs(key, op);
1183 /* Don't need anything? Cool. */
1187 /* This should be tdb_null or a real value. */
1188 assert(data != &must_exist);
1189 assert(data != &must_not_exist);
1190 assert(data != ¬_exists_or_empty);
1192 /* Must not exist? data must not exist. */
1193 if (need == &must_not_exist)
1194 return data == &tdb_null;
1197 if (need == &must_exist)
1198 return data != &tdb_null;
1200 /* Either noexist or empty. */
1201 if (need == ¬_exists_or_empty)
1202 return data->dsize == 0;
1204 /* Needs something specific. */
1205 return key_eq(*data, *need);
1208 static void move_to_front(struct op_desc res[], unsigned off, unsigned elem)
1211 struct op_desc tmp = res[elem];
1212 memmove(res + off + 1, res + off, (elem - off)*sizeof(res[0]));
1217 static void restore_to_pos(struct op_desc res[], unsigned off, unsigned elem)
1220 struct op_desc tmp = res[off];
1221 memmove(res + off, res + off + 1, (elem - off)*sizeof(res[0]));
1226 static bool sort_deps(char *filename[], struct op *op[],
1227 struct op_desc res[],
1228 unsigned off, unsigned num,
1229 const TDB_DATA *key, const TDB_DATA *data,
1230 unsigned num_files, unsigned fuzz)
1232 unsigned int i, files_done;
1234 bool done[num_files];
1236 /* None left? We're sorted. */
1240 /* Does this make sequence number go backwards? Allow a little fuzz. */
1242 int seqnum1 = op[res[off-1].file][res[off-1].op_num].seqnum;
1243 int seqnum2 = op[res[off].file][res[off].op_num].seqnum;
1245 if (seqnum1 - seqnum2 > (int)fuzz) {
1247 printf("Seqnum jump too far (%u -> %u)\n",
1254 memset(done, 0, sizeof(done));
1256 /* Since ops within a trace file are ordered, we just need to figure
1257 * out which file to try next. Since we don't take into account
1258 * inter-key relationships (which exist by virtue of trace file order),
1259 * we minimize the chance of harm by trying to keep in seqnum order. */
1260 for (files_done = 0, i = off; i < num && files_done < num_files; i++) {
1261 if (done[res[i].file])
1264 this_op = &op[res[i].file][res[i].op_num];
1266 /* Is what we have good enough for this op? */
1267 if (satisfies(key, data, this_op)) {
1268 move_to_front(res, off, i);
1269 if (sort_deps(filename, op, res, off+1, num,
1270 key, gives(key, data, this_op),
1273 restore_to_pos(res, off, i);
1275 done[res[i].file] = true;
1279 /* No combination worked. */
1283 static void check_dep_sorting(struct op_desc user[], unsigned num_users,
1288 unsigned minima[num_files];
1290 memset(minima, 0, sizeof(minima));
1291 for (i = 0; i < num_users; i++) {
1292 assert(minima[user[i].file] < user[i].op_num);
1293 minima[user[i].file] = user[i].op_num;
1298 /* All these ops happen on the same key. Which comes first?
1300 * This can happen both because read ops or failed write ops don't
1301 * change sequence number, and also due to race since we access the
1302 * number unlocked (the race can cause less detectable ordering problems,
1303 * in which case we'll deadlock and report: fix manually in that case).
1305 static bool figure_deps(char *filename[], struct op *op[],
1306 const TDB_DATA *key, const TDB_DATA *data,
1307 struct op_desc user[],
1308 unsigned num_users, unsigned num_files)
1312 /* We prefer to keep strict seqnum order if possible: it's the
1313 * most likely. We get more lax if that fails. */
1314 for (fuzz = 0; fuzz < 100; fuzz = (fuzz + 1)*2) {
1315 if (sort_deps(filename, op, user, 0, num_users, key, data,
1323 check_dep_sorting(user, num_users, num_files);
1327 /* We're having trouble sorting out dependencies for this key. Assume that it's
1328 * a pre-existing record in the db, so determine a likely value. */
1329 static const TDB_DATA *preexisting_data(char *filename[], struct op *op[],
1330 const TDB_DATA *key,
1331 struct op_desc *user,
1332 unsigned int num_users)
1335 const TDB_DATA *data;
1337 for (i = 0; i < num_users; i++) {
1338 data = needs(key, &op[user->file][user->op_num]);
1339 if (data && data != &must_not_exist) {
1340 printf("%s:%u: needs pre-existing record\n",
1341 filename[user->file], user->op_num+1);
1348 static void sort_ops(struct tdb_context *tdb,
1349 struct keyinfo hash[], char *filename[], struct op *op[],
1354 /* Gcc nexted function extension. How cool is this? */
1355 int compare_seqnum(const void *_a, const void *_b)
1357 const struct op_desc *a = _a, *b = _b;
1359 /* First, maintain order within any trace file. */
1360 if (a->file == b->file)
1361 return a->op_num - b->op_num;
1363 /* Otherwise, arrange by seqnum order. */
1364 if (op[a->file][a->op_num].seqnum !=
1365 op[b->file][b->op_num].seqnum)
1366 return op[a->file][a->op_num].seqnum
1367 - op[b->file][b->op_num].seqnum;
1369 /* Cancelled transactions are assumed to happen first. */
1370 if (starts_transaction(&op[a->file][a->op_num])
1371 && !successful_transaction(&op[a->file][a->op_num]))
1373 if (starts_transaction(&op[b->file][b->op_num])
1374 && !successful_transaction(&op[b->file][b->op_num]))
1381 /* Now sort into seqnum order. */
1382 for (h = 0; h < total_keys * 2; h++) {
1383 struct op_desc *user = hash[h].user;
1385 qsort(user, hash[h].num_users, sizeof(user[0]), compare_seqnum);
1386 if (!figure_deps(filename, op, &hash[h].key, &tdb_null, user,
1387 hash[h].num_users, num)) {
1388 const TDB_DATA *data;
1390 data = preexisting_data(filename, op, &hash[h].key,
1391 user, hash[h].num_users);
1392 /* Give the first op what it wants: does that help? */
1393 if (!figure_deps(filename, op, &hash[h].key, data, user,
1394 hash[h].num_users, num))
1395 fail(filename[user[0].file], user[0].op_num+1,
1396 "Could not resolve inter-dependencies");
1397 if (tdb_store(tdb, hash[h].key, *data, TDB_INSERT) != 0)
1398 errx(1, "Could not store initial value");
1403 static int destroy_depend(struct depend *dep)
1405 list_del(&dep->pre_list);
1406 list_del(&dep->post_list);
1410 static void add_dependency(void *ctx,
1413 const struct op_desc *needs,
1414 const struct op_desc *prereq)
1418 /* We don't depend on ourselves. */
1419 if (needs->file == prereq->file) {
1420 assert(prereq->op_num < needs->op_num);
1425 printf("%s:%u: depends on %s:%u\n",
1426 filename[needs->file], needs->op_num+1,
1427 filename[prereq->file], prereq->op_num+1);
1430 dep = talloc(ctx, struct depend);
1431 dep->needs = *needs;
1432 dep->prereq = *prereq;
1434 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1435 /* If something in a traverse depends on something in another
1436 * traverse/transaction, it creates a dependency between the
1438 if ((in_traverse(op[prereq->file], prereq->op_num)
1439 && (starts_transaction(&op[needs->file][needs->op_num])
1440 || starts_traverse(&op[needs->file][needs->op_num])))
1441 || (in_traverse(op[needs->file], needs->op_num)
1442 && (starts_transaction(&op[prereq->file][prereq->op_num])
1443 || starts_traverse(&op[prereq->file][prereq->op_num])))) {
1446 /* We are satisfied by end of group. */
1447 start = op[prereq->file][prereq->op_num].group_start;
1448 dep->prereq.op_num = start + op[prereq->file][start].group_len;
1449 /* And we need that done by start of our group. */
1450 dep->needs.op_num = op[needs->file][needs->op_num].group_start;
1453 /* There is also this case:
1454 * <traverse> <read foo> ...
1455 * <transaction> ... </transaction> <create foo>
1456 * Where if we start the traverse then wait, we could block
1457 * the transaction and deadlock.
1459 * We try to address this by ensuring that where seqnum indicates it's
1460 * possible, we wait for <create foo> before *starting* traverse.
1462 else if (in_traverse(op[needs->file], needs->op_num)) {
1463 struct op *need = &op[needs->file][needs->op_num];
1464 if (op[needs->file][need->group_start].seqnum >
1465 op[prereq->file][prereq->op_num].seqnum) {
1466 dep->needs.op_num = need->group_start;
1471 /* If you depend on a transaction or chainlock, you actually
1472 * depend on it ending. */
1473 if (starts_transaction(&op[prereq->file][dep->prereq.op_num])
1474 || starts_chainlock(&op[prereq->file][dep->prereq.op_num])) {
1476 += op[dep->prereq.file][dep->prereq.op_num].group_len;
1478 printf("-> Actually end of transaction %s:%u\n",
1479 filename[dep->prereq->file], dep->prereq->op_num+1);
1482 /* We should never create a dependency from middle of
1484 assert(!in_transaction(op[prereq->file], dep->prereq.op_num)
1485 || op[prereq->file][dep->prereq.op_num].type
1486 == OP_TDB_TRANSACTION_COMMIT
1487 || op[prereq->file][dep->prereq.op_num].type
1488 == OP_TDB_TRANSACTION_CANCEL);
1490 list_add(&op[dep->prereq.file][dep->prereq.op_num].post,
1492 list_add(&op[dep->needs.file][dep->needs.op_num].pre,
1494 talloc_set_destructor(dep, destroy_depend);
1497 static bool changes_db(const TDB_DATA *key, const struct op *op)
1499 return gives(key, NULL, op) != NULL;
1502 static void depend_on_previous(struct op *op[],
1505 struct op_desc user[],
1515 if (prev == i - 1) {
1516 /* Just depend on previous. */
1517 add_dependency(NULL, op, filename, &user[i], &user[prev]);
1521 /* We have to wait for the readers. Find last one in *each* file. */
1522 memset(deps, 0, sizeof(deps));
1523 deps[user[i].file] = true;
1524 for (j = i - 1; j > prev; j--) {
1525 if (!deps[user[j].file]) {
1526 add_dependency(NULL, op, filename, &user[i], &user[j]);
1527 deps[user[j].file] = true;
1532 /* This is simple, but not complete. We don't take into account
1533 * indirect dependencies. */
1534 static void optimize_dependencies(struct op *op[], unsigned int num_ops[],
1539 /* There can only be one real dependency on each file */
1540 for (i = 0; i < num; i++) {
1541 for (j = 1; j < num_ops[i]; j++) {
1542 struct depend *dep, *next;
1543 struct depend *prev[num];
1545 memset(prev, 0, sizeof(prev));
1547 list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
1548 if (!prev[dep->prereq.file]) {
1549 prev[dep->prereq.file] = dep;
1552 if (prev[dep->prereq.file]->prereq.op_num
1553 < dep->prereq.op_num) {
1554 talloc_free(prev[dep->prereq.file]);
1555 prev[dep->prereq.file] = dep;
1562 for (i = 0; i < num; i++) {
1565 for (j = 0; j < num; j++)
1568 for (j = 1; j < num_ops[i]; j++) {
1569 struct depend *dep, *next;
1571 list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
1572 if (deps[dep->prereq.file]
1573 >= (int)dep->prereq.op_num)
1576 deps[dep->prereq.file]
1577 = dep->prereq.op_num;
1583 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1584 /* Force an order among the traversals, so they don't deadlock (as much) */
1585 static void make_traverse_depends(char *filename[],
1586 struct op *op[], unsigned int num_ops[],
1589 unsigned int i, num_traversals = 0;
1591 struct op_desc *desc;
1593 /* Sort by which one runs first. */
1594 int compare_traverse_desc(const void *_a, const void *_b)
1596 const struct op_desc *da = _a, *db = _b;
1597 const struct op *a = &op[da->file][da->op_num],
1598 *b = &op[db->file][db->op_num];
1600 if (a->seqnum != b->seqnum)
1601 return a->seqnum - b->seqnum;
1603 /* If they have same seqnum, it means one didn't make any
1604 * changes. Thus sort by end in that case. */
1605 return a[a->group_len].seqnum - b[b->group_len].seqnum;
1608 desc = talloc_array(NULL, struct op_desc, 1);
1611 for (i = 0; i < num; i++) {
1612 for (j = 1; j < num_ops[i]; j++) {
1613 /* Traverse start (ignore those in
1614 * transactions; they're already covered by
1615 * transaction dependencies). */
1616 if (starts_traverse(&op[i][j])
1617 && !in_transaction(op[i], j)) {
1618 desc = talloc_realloc(NULL, desc,
1621 desc[num_traversals].file = i;
1622 desc[num_traversals].op_num = j;
1627 qsort(desc, num_traversals, sizeof(desc[0]), compare_traverse_desc);
1629 for (i = 1; i < num_traversals; i++) {
1630 const struct op *prev = &op[desc[i-1].file][desc[i-1].op_num];
1631 const struct op *curr = &op[desc[i].file][desc[i].op_num];
1633 /* Read traverses don't depend on each other (read lock). */
1634 if (prev->type == OP_TDB_TRAVERSE_READ_START
1635 && curr->type == OP_TDB_TRAVERSE_READ_START)
1638 /* Only make dependency if it's clear. */
1639 if (compare_traverse_desc(&desc[i], &desc[i-1])) {
1640 /* i depends on end of traverse i-1. */
1641 struct op_desc end = desc[i-1];
1642 end.op_num += prev->group_len;
1643 add_dependency(NULL, op, filename, &desc[i], &end);
1649 static void set_nonblock(int fd)
1651 if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)|O_NONBLOCK) != 0)
1652 err(1, "Setting pipe nonblocking");
1655 static bool handle_backoff(struct op *op[], int fd)
1657 struct op_desc desc;
1658 bool handled = false;
1660 /* Sloppy coding: we assume PIPEBUF never fills. */
1661 while (read(fd, &desc, sizeof(desc)) != -1) {
1664 for (i = desc.op_num; i > 0; i--) {
1665 if (op[desc.file][i].type == OP_TDB_TRAVERSE) {
1666 /* We insert a fake end here. */
1667 op[desc.file][i].type
1668 = OP_TDB_TRAVERSE_END_EARLY;
1670 } else if (starts_traverse(&op[desc.file][i])) {
1671 unsigned int start = i;
1672 struct op tmp = op[desc.file][i];
1673 /* Move the ops outside traverse. */
1674 memmove(&op[desc.file][i],
1675 &op[desc.file][i+1],
1676 (desc.op_num-i-1) * sizeof(op[0][0]));
1677 op[desc.file][desc.op_num] = tmp;
1678 while (op[desc.file][i].group_start == start) {
1679 op[desc.file][i++].group_start
1689 #else /* !TRAVERSALS_TAKE_TRANSACTION_LOCK */
1690 static bool handle_backoff(struct op *op[], int fd)
1696 static void derive_dependencies(struct tdb_context *tdb,
1698 struct op *op[], unsigned int num_ops[],
1701 struct keyinfo *hash;
1704 /* Create hash table for faster key lookup. */
1705 hash = hash_ops(op, num_ops, num);
1707 /* Sort them by sequence number. */
1708 sort_ops(tdb, hash, filename, op, num);
1710 /* Create dependencies back to the last change, rather than
1711 * creating false dependencies by naively making each one
1712 * depend on the previous. This has two purposes: it makes
1713 * later optimization simpler, and it also avoids deadlock with
1714 * same sequence number ops inside traversals (if one
1715 * traversal doesn't write anything, two ops can have the same
1716 * sequence number yet we can create a traversal dependency
1717 * the other way). */
1718 for (h = 0; h < total_keys * 2; h++) {
1721 if (hash[h].num_users < 2)
1724 for (i = 0; i < hash[h].num_users; i++) {
1725 if (changes_db(&hash[h].key, &op[hash[h].user[i].file]
1726 [hash[h].user[i].op_num])) {
1727 depend_on_previous(op, filename, num,
1728 hash[h].user, i, prev);
1730 } else if (prev >= 0)
1731 add_dependency(hash, op, filename,
1733 &hash[h].user[prev]);
1737 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1738 make_traverse_depends(filename, op, num_ops, num);
1741 optimize_dependencies(op, num_ops, num);
1744 static struct timeval run_test(char *argv[],
1745 unsigned int num_ops[],
1746 unsigned int hashsize[],
1747 unsigned int tdb_flags[],
1748 unsigned int open_flags[],
1753 struct timeval start, end, diff;
1756 for (i = 0; argv[i+2]; i++) {
1757 struct tdb_context *tdb;
1762 err(1, "fork failed");
1765 tdb = tdb_open_ex(argv[1], hashsize[i],
1767 open_flags[i], 0600, NULL, hash_key);
1769 err(1, "Opening tdb %s", argv[1]);
1771 /* This catches parent exiting. */
1772 if (read(fds[0], &c, 1) != 1)
1774 run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1,
1776 check_deps(argv[2+i], op[i], num_ops[i]);
1783 /* Let everything settle. */
1786 printf("Starting run...");
1788 gettimeofday(&start, NULL);
1789 /* Tell them all to go! Any write of sufficient length will do. */
1790 if (write(fds[1], hashsize, i) != i)
1791 err(1, "Writing to wakeup pipe");
1793 for (i = 0; argv[i + 2]; i++) {
1796 if (!WIFEXITED(status)) {
1797 warnx("Child died with signal %i", WTERMSIG(status));
1799 } else if (WEXITSTATUS(status) != 0)
1800 /* Assume child spat out error. */
1806 gettimeofday(&end, NULL);
1809 if (end.tv_usec < start.tv_usec) {
1810 end.tv_usec += 1000000;
1813 diff.tv_sec = end.tv_sec - start.tv_sec;
1814 diff.tv_usec = end.tv_usec - start.tv_usec;
1818 int main(int argc, char *argv[])
1820 struct timeval diff;
1821 unsigned int i, num_ops[argc], hashsize[argc], tdb_flags[argc], open_flags[argc];
1822 struct op *op[argc];
1824 struct tdb_context *tdb;
1827 errx(1, "Usage: %s <tdbfile> <tracefile>...", argv[0]);
1829 pipes = talloc_array(NULL, struct pipe, argc - 1);
1830 for (i = 0; i < argc - 2; i++) {
1831 printf("Loading tracefile %s...", argv[2+i]);
1833 op[i] = load_tracefile(argv+2, i, &num_ops[i], &hashsize[i],
1834 &tdb_flags[i], &open_flags[i]);
1835 if (pipe(pipes[i].fd) != 0)
1836 err(1, "creating pipe");
1837 /* Don't truncate, or clear if first: we do that. */
1838 open_flags[i] &= ~(O_TRUNC);
1839 tdb_flags[i] &= ~(TDB_CLEAR_IF_FIRST);
1840 /* Open NOSYNC, to save time. */
1841 tdb_flags[i] |= TDB_NOSYNC;
1845 /* Dependency may figure we need to create seed records. */
1846 tdb = tdb_open_ex(argv[1], hashsize[0], TDB_CLEAR_IF_FIRST|TDB_NOSYNC,
1847 O_CREAT|O_TRUNC|O_RDWR, 0600, NULL, hash_key);
1849 printf("Calculating inter-dependencies...");
1851 derive_dependencies(tdb, argv+2, op, num_ops, i);
1855 /* Don't fork for single arg case: simple debugging. */
1857 tdb = tdb_open_ex(argv[1], hashsize[0], tdb_flags[0],
1858 open_flags[0], 0600, NULL, hash_key);
1859 printf("Single threaded run...");
1862 run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1, num_ops[0],
1864 check_deps(argv[2], op[0], num_ops[0]);
1871 err(1, "creating pipe");
1873 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1874 if (pipe(pipes[argc-2].fd) != 0)
1875 err(1, "creating pipe");
1876 backoff_fd = pipes[argc-2].fd[1];
1877 set_nonblock(pipes[argc-2].fd[1]);
1878 set_nonblock(pipes[argc-2].fd[0]);
1882 diff = run_test(argv, num_ops, hashsize, tdb_flags, open_flags,
1884 } while (handle_backoff(op, pipes[argc-2].fd[0]));
1886 printf("Time replaying: %lu usec\n",
1887 diff.tv_sec * 1000000UL + diff.tv_usec);