1 #include <ccan/tdb/tdb.h>
2 #include <ccan/grab_file/grab_file.h>
3 #include <ccan/hash/hash.h>
4 #include <ccan/talloc/talloc.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/str/str.h>
7 #include <ccan/list/list.h>
12 #include <sys/types.h>
18 #define STRINGIFY2(x) #x
19 #define STRINGIFY(x) STRINGIFY2(x)
21 /* Avoid mod by zero */
22 static unsigned int total_keys = 1;
24 /* #define DEBUG_DEPS 1 */
26 /* Traversals block transactions in the current implementation. */
27 #define TRAVERSALS_TAKE_TRANSACTION_LOCK 1
32 static struct pipe *pipes;
34 static void __attribute__((noreturn)) fail(const char *filename,
41 fprintf(stderr, "%s:%u: FAIL: ", filename, line);
42 vfprintf(stderr, fmt, ap);
43 fprintf(stderr, "\n");
49 #define try(expr, expect) \
52 if (ret != (expect)) \
53 fail(filename[file], i+1, \
54 STRINGIFY(expr) "= %i", ret); \
57 /* Try or imitate results. */
58 #define unreliable(expr, expect, force, undo) \
61 if (ret != expect) { \
62 fprintf(stderr, "%s:%u: %s gave %i not %i", \
63 filename[file], i+1, STRINGIFY(expr), \
72 static bool key_eq(TDB_DATA a, TDB_DATA b)
74 if (a.dsize != b.dsize)
76 return memcmp(a.dptr, b.dptr, a.dsize) == 0;
79 /* This is based on the hash algorithm from gdbm */
80 static unsigned int hash_key(TDB_DATA *key)
82 uint32_t value; /* Used to compute the hash value. */
83 uint32_t i; /* Used to cycle through random values. */
85 /* Set the initial value from the key size. */
86 for (value = 0x238F13AF ^ key->dsize, i=0; i < key->dsize; i++)
87 value = (value + (key->dptr[i] << (i*5 % 24)));
89 return (1103515243 * value + 12345);
95 OP_TDB_LOCKALL_UNMARK,
96 OP_TDB_LOCKALL_NONBLOCK,
99 OP_TDB_LOCKALL_READ_NONBLOCK,
100 OP_TDB_UNLOCKALL_READ,
102 OP_TDB_CHAINLOCK_NONBLOCK,
103 OP_TDB_CHAINLOCK_MARK,
104 OP_TDB_CHAINLOCK_UNMARK,
106 OP_TDB_CHAINLOCK_READ,
107 OP_TDB_CHAINUNLOCK_READ,
114 OP_TDB_TRANSACTION_START,
115 OP_TDB_TRANSACTION_CANCEL,
116 OP_TDB_TRANSACTION_COMMIT,
117 OP_TDB_TRAVERSE_READ_START,
118 OP_TDB_TRAVERSE_START,
134 /* Who is waiting for us? */
135 struct list_head post;
136 /* What are we waiting for? */
137 struct list_head pre;
139 /* If I'm part of a group (traverse/transaction) where is
140 * start? (Otherwise, 0) */
141 unsigned int group_start;
144 int flag; /* open and store */
145 struct traverse *trav; /* traverse start */
146 TDB_DATA pre_append; /* append */
147 unsigned int transaction_end; /* transaction start */
151 static unsigned char hex_char(const char *filename, unsigned int line, char c)
154 if (c >= 'A' && c <= 'F')
156 if (c >= '0' && c <= '9')
158 fail(filename, line, "invalid hex character '%c'", c);
161 /* TDB data is <size>:<%02x>* */
162 static TDB_DATA make_tdb_data(const void *ctx,
163 const char *filename, unsigned int line,
170 if (streq(word, "NULL"))
173 data.dsize = atoi(word);
174 data.dptr = talloc_array(ctx, unsigned char, data.dsize);
175 p = strchr(word, ':');
177 fail(filename, line, "invalid tdb data '%s'", word);
179 for (i = 0; i < data.dsize; i++)
180 data.dptr[i] = hex_char(filename, line, p[i*2])*16
181 + hex_char(filename, line, p[i*2+1]);
186 static void add_op(const char *filename, struct op **op, unsigned int i,
187 unsigned int serial, enum op_type type)
190 *op = talloc_realloc(NULL, *op, struct op, i+1);
193 new->serial = serial;
195 new->group_start = 0;
198 static void op_add_nothing(const char *filename,
199 struct op op[], unsigned int op_num, char *words[])
202 fail(filename, op_num+1, "Expected no arguments");
203 op[op_num].key = tdb_null;
206 static void op_add_key(const char *filename,
207 struct op op[], unsigned int op_num, char *words[])
209 if (words[2] == NULL || words[3])
210 fail(filename, op_num+1, "Expected just a key");
212 op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
213 if (op[op_num].op != OP_TDB_TRAVERSE)
217 static void op_add_key_ret(const char *filename,
218 struct op op[], unsigned int op_num, char *words[])
220 if (!words[2] || !words[3] || !words[4] || words[5]
221 || !streq(words[3], "="))
222 fail(filename, op_num+1, "Expected <key> = <ret>");
223 op[op_num].ret = atoi(words[4]);
224 op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
225 /* May only be a unique key if it fails */
226 if (op[op_num].ret != 0)
230 static void op_add_key_data(const char *filename,
231 struct op op[], unsigned int op_num, char *words[])
233 if (!words[2] || !words[3] || !words[4] || words[5]
234 || !streq(words[3], "="))
235 fail(filename, op_num+1, "Expected <key> = <data>");
236 op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
237 op[op_num].data = make_tdb_data(op, filename, op_num+1, words[4]);
238 /* May only be a unique key if it fails */
239 if (!op[op_num].data.dptr)
243 /* <serial> tdb_store <rec> <rec> <flag> = <ret> */
244 static void op_add_store(const char *filename,
245 struct op op[], unsigned int op_num, char *words[])
247 if (!words[2] || !words[3] || !words[4] || !words[5] || !words[6]
248 || words[7] || !streq(words[5], "="))
249 fail(filename, op_num+1, "Expect <key> <data> <flag> = <ret>");
251 op[op_num].flag = strtoul(words[4], NULL, 0);
252 op[op_num].ret = atoi(words[6]);
253 op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
254 op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]);
258 /* <serial> tdb_append <rec> <rec> = <rec> */
259 static void op_add_append(const char *filename,
260 struct op op[], unsigned int op_num, char *words[])
262 TDB_DATA post_append;
264 if (!words[2] || !words[3] || !words[4] || !words[5] || words[6]
265 || !streq(words[4], "="))
266 fail(filename, op_num+1, "Expect <key> <data> = <rec>");
268 op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
269 op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]);
271 post_append = make_tdb_data(op, filename, op_num+1, words[5]);
273 /* By subtraction, figure out what previous data was. */
274 op[op_num].pre_append.dptr = post_append.dptr;
275 op[op_num].pre_append.dsize = post_append.dsize - op[op_num].data.dsize;
279 /* <serial> tdb_get_seqnum = <ret> */
280 static void op_add_seqnum(const char *filename,
281 struct op op[], unsigned int op_num, char *words[])
283 if (!words[2] || !words[3] || words[4] || !streq(words[2], "="))
284 fail(filename, op_num+1, "Expect = <ret>");
286 op[op_num].key = tdb_null;
287 op[op_num].ret = atoi(words[3]);
290 static void op_add_traverse(const char *filename,
291 struct op op[], unsigned int op_num, char *words[])
294 fail(filename, op_num+1, "Expect no arguments");
296 op[op_num].key = tdb_null;
297 op[op_num].trav = NULL;
300 static void op_add_transaction(const char *filename, struct op op[],
301 unsigned int op_num, char *words[])
304 fail(filename, op_num+1, "Expect no arguments");
306 op[op_num].key = tdb_null;
307 op[op_num].transaction_end = 0;
310 static void op_analyze_transaction(const char *filename,
311 struct op op[], unsigned int op_num,
316 op[op_num].key = tdb_null;
319 fail(filename, op_num+1, "Expect no arguments");
321 for (i = op_num-1; i >= 0; i--) {
322 if (op[i].op == OP_TDB_TRANSACTION_START &&
323 !op[i].transaction_end)
328 fail(filename, op_num+1, "no transaction start found");
331 op[start].transaction_end = op_num;
333 /* This rolls in nested transactions. I think that's right. */
334 for (i++; i <= op_num; i++)
335 op[i].group_start = start;
338 struct traverse_hash {
343 /* A traverse is a hash of keys, each one associated with ops. */
345 /* How many traversal callouts should I do? */
348 /* Where is traversal end op? */
351 /* For trivial traversals. */
352 struct traverse_hash *hash;
355 /* A trivial traversal is one which doesn't terminate early and only
356 * plays with its own record. We can reliably replay these even if
357 * traverse order changes. */
358 static bool is_trivial_traverse(struct op op[], unsigned int end)
362 TDB_DATA cur = tdb_null;
364 if (op[end].ret != 0)
367 for (i = 0; i < end; i++) {
370 if (op[i].op == OP_TDB_TRAVERSE)
372 if (!key_eq(cur, op[i].key))
377 /* With multiple things happening at once, no traverse is trivial. */
381 static void op_analyze_traverse(const char *filename,
382 struct op op[], unsigned int op_num,
386 struct traverse *trav = talloc(op, struct traverse);
388 op[op_num].key = tdb_null;
390 /* = %u means traverse function terminated. */
392 if (!streq(words[2], "=") || !words[3] || words[4])
393 fail(filename, op_num+1, "expect = <num>");
394 op[op_num].ret = atoi(words[3]);
400 for (i = op_num-1; i >= 0; i--) {
401 if (op[i].op == OP_TDB_TRAVERSE)
403 if (op[i].op != OP_TDB_TRAVERSE_READ_START
404 && op[i].op != OP_TDB_TRAVERSE_START)
412 fail(filename, op_num+1, "no traversal start found");
415 op[start].trav = trav;
417 for (i = start; i <= op_num; i++)
418 op[i].group_start = start;
420 if (is_trivial_traverse(op+i, op_num-i)) {
421 /* Fill in a plentiful hash table. */
422 op[start].trav->hash = talloc_zero_array(op[i].trav,
423 struct traverse_hash,
425 for (i = start; i < op_num; i++) {
427 if (op[i].op != OP_TDB_TRAVERSE)
429 h = hash_key(&op[i].key) % (trav->num * 2);
430 while (trav->hash[h].index)
431 h = (h + 1) % (trav->num * 2);
432 trav->hash[h].index = i+1;
433 trav->hash[h].key = op[i].key;
439 /* Keep -Wmissing-declarations happy: */
440 const struct op_table *
441 find_keyword (register const char *str, register unsigned int len);
443 #include "keywords.c"
446 /* We can have more than one */
447 struct list_node list;
454 unsigned int src_file, src_op;
457 static void remove_matching_dep(struct list_head *deps,
458 unsigned int file, unsigned int op)
462 list_for_each(deps, dep, list) {
463 if (dep->file == file && dep->op == op) {
464 list_del(&dep->list);
468 errx(1, "Failed to find depend on file %u line %u\n", file, op+1);
471 static void check_deps(const char *filename, struct op op[], unsigned int num)
476 for (i = 1; i < num; i++)
477 if (!list_empty(&op[i].pre))
478 fail(filename, i+1, "Still has dependencies");
482 static void dump_pre(char *filename[], unsigned int file,
483 struct op op[], unsigned int i)
487 printf("%s:%u still waiting for:\n", filename[file], i+1);
488 list_for_each(&op[i].pre, dep, list)
489 printf(" %s:%u\n", filename[dep->file], dep->op+1);
490 check_deps(filename[file], op, i);
493 static void do_pre(char *filename[], unsigned int file, int pre_fd,
494 struct op op[], unsigned int i)
496 while (!list_empty(&op[i].pre)) {
497 struct depend_xmit dep;
500 printf("%s:%u:waiting for pre\n", filename[file], i+1);
504 while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) {
505 if (errno == EINTR) {
506 dump_pre(filename, file, op, i);
509 errx(1, "Reading from pipe");
514 printf("%s:%u:got pre %u from %s:%u\n", filename[file], i+1,
515 dep.dst_op+1, filename[dep.src_file], dep.src_op+1);
518 /* This could be any op, not just this one. */
519 remove_matching_dep(&op[dep.dst_op].pre,
520 dep.src_file, dep.src_op);
524 static void do_post(char *filename[], unsigned int file,
525 const struct op op[], unsigned int i)
529 list_for_each(&op[i].post, dep, list) {
530 struct depend_xmit dx;
536 printf("%s:%u:sending to file %s:%u\n", filename[file], i+1,
537 filename[dep->file], dep->op+1);
539 if (write(pipes[dep->file].fd[1], &dx, sizeof(dx))
541 err(1, "%s:%u failed to tell file %s",
542 filename[file], i+1, filename[dep->file]);
546 static int get_len(TDB_DATA key, TDB_DATA data, void *private_data)
551 static unsigned run_ops(struct tdb_context *tdb,
556 unsigned int start, unsigned int stop);
558 struct traverse_info {
567 /* Trivial case: do whatever they did for this key. */
568 static int trivial_traverse(struct tdb_context *tdb,
569 TDB_DATA key, TDB_DATA data,
572 struct traverse_info *tinfo = _tinfo;
573 struct traverse *trav = tinfo->op[tinfo->start].trav;
574 unsigned int h = hash_key(&key) % (trav->num * 2);
576 while (trav->hash[h].index) {
577 if (key_eq(trav->hash[h].key, key)) {
578 run_ops(tdb, tinfo->pre_fd, tinfo->filename,
579 tinfo->file, tinfo->op, trav->hash[h].index,
584 h = (h + 1) % (trav->num * 2);
586 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
587 "unexpected traverse key");
590 /* More complex. Just do whatever's they did at the n'th entry. */
591 static int nontrivial_traverse(struct tdb_context *tdb,
592 TDB_DATA key, TDB_DATA data,
595 struct traverse_info *tinfo = _tinfo;
596 struct traverse *trav = tinfo->op[tinfo->start].trav;
598 if (tinfo->i == trav->end) {
599 /* This can happen if traverse expects to be empty. */
600 if (tinfo->start + 1 == trav->end)
602 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
603 "traverse did not terminate");
606 if (tinfo->op[tinfo->i].op != OP_TDB_TRAVERSE)
607 fail(tinfo->filename[tinfo->file], tinfo->start + 1,
608 "%s:%u:traverse terminated early");
610 /* Run any normal ops. */
611 tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->file,
612 tinfo->op, tinfo->i+1, trav->end);
614 if (tinfo->i == trav->end)
620 static unsigned op_traverse(struct tdb_context *tdb,
624 int (*traversefn)(struct tdb_context *,
625 tdb_traverse_func, void *),
629 struct traverse *trav = op[start].trav;
630 struct traverse_info tinfo = { op, filename, file, pre_fd,
635 int ret = traversefn(tdb, trivial_traverse, &tinfo);
636 if (ret != trav->num)
637 fail(filename[file], start+1,
638 "short traversal %i", ret);
642 traversefn(tdb, nontrivial_traverse, &tinfo);
644 /* Traversing in wrong order can have strange effects: eg. if
645 * original traverse went A (delete A), B, we might do B
646 * (delete A). So if we have ops left over, we do it now. */
647 while (tinfo.i != trav->end) {
648 if (op[tinfo.i].op == OP_TDB_TRAVERSE)
651 tinfo.i = run_ops(tdb, pre_fd, filename, file, op,
658 static void break_out(int sig)
662 static __attribute__((noinline))
663 unsigned run_ops(struct tdb_context *tdb,
667 struct op op[], unsigned int start, unsigned int stop)
672 sa.sa_handler = break_out;
675 sigaction(SIGALRM, &sa, NULL);
676 for (i = start; i < stop; i++) {
677 do_pre(filename, file, pre_fd, op, i);
681 try(tdb_lockall(tdb), op[i].ret);
683 case OP_TDB_LOCKALL_MARK:
684 try(tdb_lockall_mark(tdb), op[i].ret);
686 case OP_TDB_LOCKALL_UNMARK:
687 try(tdb_lockall_unmark(tdb), op[i].ret);
689 case OP_TDB_LOCKALL_NONBLOCK:
690 unreliable(tdb_lockall_nonblock(tdb), op[i].ret,
691 tdb_lockall(tdb), tdb_unlockall(tdb));
693 case OP_TDB_UNLOCKALL:
694 try(tdb_unlockall(tdb), op[i].ret);
696 case OP_TDB_LOCKALL_READ:
697 try(tdb_lockall_read(tdb), op[i].ret);
699 case OP_TDB_LOCKALL_READ_NONBLOCK:
700 unreliable(tdb_lockall_read_nonblock(tdb), op[i].ret,
701 tdb_lockall_read(tdb),
702 tdb_unlockall_read(tdb));
704 case OP_TDB_UNLOCKALL_READ:
705 try(tdb_unlockall_read(tdb), op[i].ret);
707 case OP_TDB_CHAINLOCK:
708 try(tdb_chainlock(tdb, op[i].key), op[i].ret);
710 case OP_TDB_CHAINLOCK_NONBLOCK:
711 unreliable(tdb_chainlock_nonblock(tdb, op[i].key),
713 tdb_chainlock(tdb, op[i].key),
714 tdb_chainunlock(tdb, op[i].key));
716 case OP_TDB_CHAINLOCK_MARK:
717 try(tdb_chainlock_mark(tdb, op[i].key), op[i].ret);
719 case OP_TDB_CHAINLOCK_UNMARK:
720 try(tdb_chainlock_unmark(tdb, op[i].key), op[i].ret);
722 case OP_TDB_CHAINUNLOCK:
723 try(tdb_chainunlock(tdb, op[i].key), op[i].ret);
725 case OP_TDB_CHAINLOCK_READ:
726 try(tdb_chainlock_read(tdb, op[i].key), op[i].ret);
728 case OP_TDB_CHAINUNLOCK_READ:
729 try(tdb_chainunlock_read(tdb, op[i].key), op[i].ret);
731 case OP_TDB_PARSE_RECORD:
732 try(tdb_parse_record(tdb, op[i].key, get_len, NULL),
736 try(tdb_exists(tdb, op[i].key), op[i].ret);
739 try(tdb_store(tdb, op[i].key, op[i].data, op[i].flag),
740 op[i].ret < 0 ? op[i].ret : 0);
743 try(tdb_append(tdb, op[i].key, op[i].data),
744 op[i].ret < 0 ? op[i].ret : 0);
746 case OP_TDB_GET_SEQNUM:
747 try(tdb_get_seqnum(tdb), op[i].ret);
749 case OP_TDB_WIPE_ALL:
750 try(tdb_wipe_all(tdb), op[i].ret);
752 case OP_TDB_TRANSACTION_START:
753 try(tdb_transaction_start(tdb), op[i].ret);
755 case OP_TDB_TRANSACTION_CANCEL:
756 try(tdb_transaction_cancel(tdb), op[i].ret);
758 case OP_TDB_TRANSACTION_COMMIT:
759 try(tdb_transaction_commit(tdb), op[i].ret);
761 case OP_TDB_TRAVERSE_READ_START:
762 i = op_traverse(tdb, pre_fd, filename, file,
763 tdb_traverse_read, op, i);
765 case OP_TDB_TRAVERSE_START:
766 i = op_traverse(tdb, pre_fd, filename, file,
767 tdb_traverse, op, i);
769 case OP_TDB_TRAVERSE:
770 /* Terminate: we're in a traverse, and we've
773 case OP_TDB_TRAVERSE_END:
774 fail(filename[file], i+1, "unepxected end traverse");
775 /* FIXME: These must be treated like traverse. */
776 case OP_TDB_FIRSTKEY:
777 if (!key_eq(tdb_firstkey(tdb), op[i].data))
778 fail(filename[file], i+1, "bad firstkey");
781 if (!key_eq(tdb_nextkey(tdb, op[i].key), op[i].data))
782 fail(filename[file], i+1, "bad nextkey");
785 TDB_DATA f = tdb_fetch(tdb, op[i].key);
786 if (!key_eq(f, op[i].data))
787 fail(filename[file], i+1, "bad fetch %u",
792 try(tdb_delete(tdb, op[i].key), op[i].ret);
795 do_post(filename, file, op, i);
800 static struct op *load_tracefile(const char *filename, unsigned int *num,
801 unsigned int *hashsize,
802 unsigned int *tdb_flags,
803 unsigned int *open_flags)
806 struct op *op = talloc_array(NULL, struct op, 1);
811 file = grab_file(NULL, filename, NULL);
813 err(1, "Reading %s", filename);
815 lines = strsplit(file, file, "\n", NULL);
817 errx(1, "%s is empty", filename);
819 words = strsplit(lines, lines[0], " ", NULL);
820 if (!streq(words[1], "tdb_open"))
821 fail(filename, 1, "does not start with tdb_open");
823 *hashsize = atoi(words[2]);
824 *tdb_flags = strtoul(words[3], NULL, 0);
825 *open_flags = strtoul(words[4], NULL, 0);
827 for (i = 1; lines[i]; i++) {
828 const struct op_table *opt;
830 words = strsplit(lines, lines[i], " ", NULL);
831 if (!words[0] || !words[1])
832 fail(filename, i+1, "Expected serial number and op");
834 opt = find_keyword(words[1], strlen(words[1]));
836 if (streq(words[1], "tdb_close")) {
839 "lines after tdb_close");
844 fail(filename, i+1, "Unknown operation '%s'", words[1]);
847 add_op(filename, &op, i, atoi(words[0]), opt->type);
848 opt->enhance_op(filename, op, i, words);
851 fprintf(stderr, "%s:%u:last operation is not tdb_close: incomplete?",
858 /* We remember all the keys we've ever seen, and who has them. */
866 unsigned int num_users;
867 struct key_user *user;
870 static const TDB_DATA must_not_exist;
871 static const TDB_DATA must_exist;
872 static const TDB_DATA not_exists_or_empty;
874 /* NULL means doesn't care if it exists or not, &must_exist means
875 * it must exist but we don't care what, &must_not_exist means it must
876 * not exist, otherwise the data it needs. */
877 static const TDB_DATA *needs(const struct op *op)
880 /* FIXME: Pull forward deps, since we can deadlock */
881 case OP_TDB_CHAINLOCK:
882 case OP_TDB_CHAINLOCK_NONBLOCK:
883 case OP_TDB_CHAINLOCK_MARK:
884 case OP_TDB_CHAINLOCK_UNMARK:
885 case OP_TDB_CHAINUNLOCK:
886 case OP_TDB_CHAINLOCK_READ:
887 case OP_TDB_CHAINUNLOCK_READ:
891 if (op->pre_append.dsize == 0)
892 return ¬_exists_or_empty;
893 return &op->pre_append;
896 if (op->flag == TDB_INSERT) {
900 return &must_not_exist;
901 } else if (op->flag == TDB_MODIFY) {
903 return &must_not_exist;
907 /* No flags? Don't care */
914 return &must_not_exist;
916 case OP_TDB_PARSE_RECORD:
918 return &must_not_exist;
921 /* FIXME: handle these. */
922 case OP_TDB_WIPE_ALL:
923 case OP_TDB_FIRSTKEY:
925 case OP_TDB_GET_SEQNUM:
926 case OP_TDB_TRAVERSE:
927 case OP_TDB_TRANSACTION_COMMIT:
928 case OP_TDB_TRANSACTION_CANCEL:
929 case OP_TDB_TRANSACTION_START:
934 return &must_not_exist;
939 return &must_not_exist;
943 errx(1, "Unexpected op %i", op->op);
949 /* This op makes the other one possible. */
951 /* This op makes the other one impossible. */
953 /* This op makes no difference. */
957 static enum satisfaction satisfies(const struct op *op, const TDB_DATA *need)
959 bool deletes, creates;
961 /* Failed ops don't change state of db. */
965 deletes = (op->op == OP_TDB_DELETE || op->op == OP_TDB_WIPE_ALL);
966 /* Append/store is creating the record if ret == 0 (1 means replaced) */
967 if (op->op == OP_TDB_APPEND || op->op == OP_TDB_STORE)
968 creates = (op->ret == 0);
972 if (need == &must_not_exist) {
980 if (need == &must_exist) {
988 if (need == ¬_exists_or_empty) {
995 /* OK, we need an exact match. */
999 /* An append which results in the wrong data dissatisfies. */
1000 if (op->op == OP_TDB_APPEND) {
1001 if (op->pre_append.dsize + op->data.dsize != need->dsize)
1002 return DISSATISFIES;
1003 if (memcmp(op->pre_append.dptr, need->dptr,
1004 op->pre_append.dsize) != 0)
1005 return DISSATISFIES;
1006 if (memcmp(op->data.dptr, need->dptr + op->pre_append.dsize,
1007 op->data.dsize) != 0)
1008 return DISSATISFIES;
1010 } else if (op->op == OP_TDB_STORE) {
1011 if (key_eq(op->data, *need))
1013 return DISSATISFIES;
1018 static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[],
1021 unsigned int i, j, h;
1022 struct keyinfo *hash;
1024 /* Gcc nexted function extension. How cool is this? */
1025 int compare_user_serial(const void *_a, const void *_b)
1027 const struct key_user *a = _a, *b = _b;
1028 int ret = op[a->file][a->op_num].serial
1029 - op[b->file][b->op_num].serial;
1031 /* Serial is not completely reliable. First, fetches don't
1032 * inc serial, second we don't lock to get seq number.
1033 * This smooths things a little for simple cases. */
1035 const TDB_DATA *a_needs, *b_needs;
1037 b_needs = needs(&op[b->file][b->op_num]);
1038 switch (satisfies(&op[a->file][a->op_num], b_needs)) {
1040 /* A comes first: it satisfies B. */
1043 /* A doesn't come first: it messes up B. */
1049 a_needs = needs(&op[a->file][a->op_num]);
1050 switch (satisfies(&op[b->file][b->op_num], a_needs)) {
1052 /* B comes first: it satisfies A. */
1055 /* B doesn't come first: it messes up A. */
1064 hash = talloc_zero_array(op[0], struct keyinfo, total_keys*2);
1065 for (i = 0; i < num; i++) {
1066 for (j = 1; j < num_ops[i]; j++) {
1067 /* We can't do this on allocation, due to realloc. */
1068 list_head_init(&op[i][j].post);
1069 list_head_init(&op[i][j].pre);
1071 if (!op[i][j].key.dptr)
1074 /* We don't wait for traverse keys */
1075 /* FIXME: We should, for trivial traversals. */
1076 if (op[i][j].op == OP_TDB_TRAVERSE)
1079 h = hash_key(&op[i][j].key) % (total_keys * 2);
1080 while (!key_eq(hash[h].key, op[i][j].key)) {
1081 if (!hash[h].key.dptr) {
1082 hash[h].key = op[i][j].key;
1085 h = (h + 1) % (total_keys * 2);
1087 /* Might as well save some memory if we can. */
1088 if (op[i][j].key.dptr != hash[h].key.dptr) {
1089 talloc_free(op[i][j].key.dptr);
1090 op[i][j].key.dptr = hash[h].key.dptr;
1092 hash[h].user = talloc_realloc(hash, hash[h].user,
1094 hash[h].num_users+1);
1095 hash[h].user[hash[h].num_users].op_num = j;
1096 hash[h].user[hash[h].num_users].file = i;
1097 hash[h].num_users++;
1101 /* Now sort into seqnum order. */
1102 for (h = 0; h < total_keys * 2; h++)
1103 qsort(hash[h].user, hash[h].num_users, sizeof(hash[h].user[0]),
1104 compare_user_serial);
1109 static void add_dependency(void *ctx,
1112 unsigned int needs_file,
1113 unsigned int needs_opnum,
1114 unsigned int satisfies_file,
1115 unsigned int satisfies_opnum)
1117 struct depend *post, *pre;
1118 unsigned int needs_start, sat_start;
1120 /* We don't depend on ourselves. */
1121 if (needs_file == satisfies_file)
1125 printf("%s:%u: depends on %s:%u\n",
1126 filename[needs_file], needs_opnum+1,
1127 filename[satisfies_file], satisfies_opnum+1);
1130 needs_start = op[needs_file][needs_opnum].group_start;
1131 sat_start = op[satisfies_file][satisfies_opnum].group_start;
1133 /* If needs is in a transaction, we need it before start. */
1135 switch (op[needs_file][needs_start].op) {
1136 case OP_TDB_TRANSACTION_START:
1137 needs_opnum = needs_start;
1139 printf(" -> Back to %u\n", needs_start+1);
1148 /* If satisfies is in a transaction, we wait until after commit. */
1149 /* FIXME: If transaction is cancelled, don't need dependency. */
1151 if (op[satisfies_file][sat_start].op
1152 == OP_TDB_TRANSACTION_START) {
1154 = op[satisfies_file][sat_start].transaction_end;
1156 printf(" -> Depends on %u\n", satisfies_opnum+1);
1162 post = talloc(ctx, struct depend);
1163 post->file = needs_file;
1164 post->op = needs_opnum;
1165 list_add(&op[satisfies_file][satisfies_opnum].post, &post->list);
1167 pre = talloc(ctx, struct depend);
1168 pre->file = satisfies_file;
1169 pre->op = satisfies_opnum;
1170 list_add(&op[needs_file][needs_opnum].pre, &pre->list);
1173 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1174 struct traverse_dep {
1176 unsigned int op_num;
1177 const struct op *op;
1180 /* Sort by which one runs first. */
1181 static int compare_traverse_dep(const void *_a, const void *_b)
1183 const struct traverse_dep *a = _a, *b = _b;
1184 const struct traverse *trava = a->op->trav, *travb = b->op->trav;
1186 if (a->op->serial != b->op->serial)
1187 return a->op->serial - b->op->serial;
1189 /* If they have same serial, it means one didn't make any changes.
1190 * Thus sort by end in that case. */
1191 return a->op[trava->end - a->op_num].serial
1192 - b->op[travb->end - b->op_num].serial;
1195 /* Traversals can deadlock against each other. Force order. */
1196 static void make_traverse_depends(char *filename[],
1197 struct op *op[], unsigned int num_ops[],
1200 unsigned int i, j, num_traversals = 0;
1201 struct traverse_dep *dep;
1203 dep = talloc_array(NULL, struct traverse_dep, 1);
1206 for (i = 0; i < num; i++) {
1207 for (j = 0; j < num_ops[i]; j++) {
1208 if (op[i][j].op == OP_TDB_TRAVERSE_START
1209 || op[i][j].op == OP_TDB_TRAVERSE_READ_START) {
1210 dep = talloc_realloc(NULL, dep,
1211 struct traverse_dep,
1213 dep[num_traversals].file = i;
1214 dep[num_traversals].op_num = j;
1215 dep[num_traversals].op = &op[i][j];
1220 qsort(dep, num_traversals, sizeof(dep[0]), compare_traverse_dep);
1221 for (i = 1; i < num_traversals; i++) {
1222 /* i depends on end of traverse i-1. */
1223 add_dependency(NULL, op, filename, dep[i].file, dep[i].op_num,
1224 dep[i-1].file, dep[i-1].op->trav->end);
1228 #endif /* TRAVERSALS_TAKE_TRANSACTION_LOCK */
1230 static void derive_dependencies(char *filename[],
1231 struct op *op[], unsigned int num_ops[],
1234 struct keyinfo *hash;
1237 /* Create hash table for faster key lookup. */
1238 hash = hash_ops(op, num_ops, num);
1240 /* We make the naive assumption that two ops on the same key
1241 * have to be ordered; it's overkill. */
1242 for (i = 0; i < total_keys * 2; i++) {
1243 for (j = 1; j < hash[i].num_users; j++) {
1244 add_dependency(hash, op, filename,
1245 hash[i].user[j].file,
1246 hash[i].user[j].op_num,
1247 hash[i].user[j-1].file,
1248 hash[i].user[j-1].op_num);
1252 #if TRAVERSALS_TAKE_TRANSACTION_LOCK
1253 make_traverse_depends(filename, op, num_ops, num);
1257 int main(int argc, char *argv[])
1259 struct timeval start, end;
1260 unsigned int i, num_ops[argc], hashsize[argc], tdb_flags[argc], open_flags[argc];
1261 struct op *op[argc];
1267 errx(1, "Usage: %s <tdbfile> <tracefile>...", argv[0]);
1269 pipes = talloc_array(NULL, struct pipe, argc - 2);
1270 for (i = 0; i < argc - 2; i++) {
1271 printf("Loading tracefile %s...", argv[2+i]);
1273 op[i] = load_tracefile(argv[2+i], &num_ops[i], &hashsize[i],
1274 &tdb_flags[i], &open_flags[i]);
1275 if (pipe(pipes[i].fd) != 0)
1276 err(1, "creating pipe");
1280 printf("Calculating inter-dependencies...");
1282 derive_dependencies(argv+2, op, num_ops, i);
1285 /* Don't fork for single arg case: simple debugging. */
1287 struct tdb_context *tdb;
1288 tdb = tdb_open_ex(argv[1], hashsize[0], tdb_flags[0],
1289 open_flags[0], 0600,
1291 printf("Single threaded run...");
1294 run_ops(tdb, pipes[0].fd[0], argv+2, 0, op[0], 1, num_ops[0]);
1295 check_deps(argv[2], op[0], num_ops[0]);
1302 err(1, "creating pipe");
1304 for (i = 0; i < argc - 2; i++) {
1305 struct tdb_context *tdb;
1309 err(1, "fork failed");
1312 tdb = tdb_open_ex(argv[1], hashsize[i], tdb_flags[i],
1313 open_flags[i], 0600,
1316 err(1, "Opening tdb %s", argv[1]);
1318 /* This catches parent exiting. */
1319 if (read(fds[0], &c, 1) != 1)
1321 run_ops(tdb, pipes[i].fd[0], argv+2, i, op[i], 1,
1323 check_deps(argv[2+i], op[i], num_ops[i]);
1330 /* Let everything settle. */
1333 printf("Starting run...");
1335 gettimeofday(&start, NULL);
1336 /* Tell them all to go! Any write of sufficient length will do. */
1337 if (write(fds[1], hashsize, i) != i)
1338 err(1, "Writing to wakeup pipe");
1340 for (i = 0; i < argc - 2; i++) {
1343 if (!WIFEXITED(status)) {
1344 warnx("Child died with signal %i", WTERMSIG(status));
1346 } else if (WEXITSTATUS(status) != 0)
1347 /* Assume child spat out error. */
1353 gettimeofday(&end, NULL);
1356 end.tv_sec -= start.tv_sec;
1357 printf("Time replaying: %lu usec\n",
1358 end.tv_sec * 1000000UL + (end.tv_usec - start.tv_usec));