]> git.ozlabs.org Git - ccan/commitdiff
timers: implementation of lazily-ordered timers.
authorRusty Russell <rusty@rustcorp.com.au>
Fri, 5 Apr 2013 06:13:27 +0000 (16:43 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Fri, 5 Apr 2013 06:21:05 +0000 (16:51 +1030)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
13 files changed:
Makefile-ccan
ccan/timer/LICENSE [new symlink]
ccan/timer/_info [new file with mode: 0644]
ccan/timer/benchmarks/Makefile [new file with mode: 0644]
ccan/timer/benchmarks/benchmark.c [new file with mode: 0644]
ccan/timer/benchmarks/expected-usage.c [new file with mode: 0644]
ccan/timer/design.txt [new file with mode: 0644]
ccan/timer/test/run-add.c [new file with mode: 0644]
ccan/timer/test/run-expiry.c [new file with mode: 0644]
ccan/timer/test/run-ff.c [new file with mode: 0644]
ccan/timer/test/run.c [new file with mode: 0644]
ccan/timer/timer.c [new file with mode: 0644]
ccan/timer/timer.h [new file with mode: 0644]

index c7eec337fba7c510df38adfc014f91f9b3b4f3c1..7ae3863a349a513376be5baca39f5310ec1cd0b3 100644 (file)
@@ -88,6 +88,7 @@ MODS_WITH_SRC := antithread \
        tally \
        tap \
        time \
+       timer \
        ttxml \
        wwviaudio
 
diff --git a/ccan/timer/LICENSE b/ccan/timer/LICENSE
new file mode 120000 (symlink)
index 0000000..dc314ec
--- /dev/null
@@ -0,0 +1 @@
+../../licenses/LGPL-2.1
\ No newline at end of file
diff --git a/ccan/timer/_info b/ccan/timer/_info
new file mode 100644 (file)
index 0000000..cb6808b
--- /dev/null
@@ -0,0 +1,80 @@
+#include <string.h>
+#include "config.h"
+
+/**
+ * timer - efficient implementation of rarely-expiring timers.
+ *
+ * This is a lazy implementation of timers: you can add and delete timers
+ * very quickly, and they are only sorted as their expiry approaches.
+ *
+ * This is a common case for timeouts, which must often be set, but
+ * rarely expire.
+ *
+ * Example:
+ *     // Silly example which outputs strings until timers expire.
+ *     #include <ccan/timer/timer.h>
+ *     #include <ccan/time/time.h>
+ *     #include <stdlib.h>
+ *     #include <stdio.h>
+ *
+ *     struct timed_string {
+ *             struct list_node node;
+ *             struct timer timer;
+ *             const char *string;
+ *     };
+ *
+ *     int main(int argc, char *argv[])
+ *     {
+ *             struct timers timers;
+ *             struct list_head strings;
+ *             struct list_head expired;
+ *             struct timed_string *s;
+ *
+ *             timers_init(&timers, time_now());
+ *             list_head_init(&strings);
+ *
+ *             while (argv[1]) {
+ *                     s = malloc(sizeof(*s));
+ *                     s->string = argv[1];
+ *                     timer_add(&timers, &s->timer,
+ *                               time_add(time_now(),
+ *                                        time_from_msec(atol(argv[2]))));
+ *                     list_add_tail(&strings, &s->node);
+ *                     argv += 2;
+ *             }
+ *
+ *             while (!list_empty(&strings)) {
+ *                     struct timespec now = time_now();
+ *                     list_for_each(&strings, s, node)
+ *                             printf("%s", s->string);
+ *                     timers_expire(&timers, now, &expired);
+ *                     while ((s = list_pop(&expired, struct timed_string,
+ *                                          timer.list)) != NULL) {
+ *                             list_del_from(&strings, &s->node);
+ *                             free(s);
+ *                     }
+ *             }
+ *
+ *             exit(0);
+ *     }
+ *
+ * License: LGPL (v2.1 or any later version)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+       /* Expect exactly one argument */
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               printf("ccan/array_size\n");
+               printf("ccan/ilog\n");
+               printf("ccan/likely\n");
+               printf("ccan/list\n");
+               printf("ccan/time\n");
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/timer/benchmarks/Makefile b/ccan/timer/benchmarks/Makefile
new file mode 100644 (file)
index 0000000..7e0653a
--- /dev/null
@@ -0,0 +1,35 @@
+ALL:=expected-usage
+CCANDIR:=../../..
+CFLAGS:=-Wall -I$(CCANDIR) -O3 -flto
+LDFLAGS:=-O3 -flto
+LDLIBS:=-lrt
+
+OBJS:=time.o timer.o list.o opt_opt.o opt_parse.o opt_usage.o opt_helpers.o expected-usage.o
+
+default: $(ALL)
+
+expected-usage: $(OBJS)
+
+opt_parse.o: $(CCANDIR)/ccan/opt/parse.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+opt_usage.o: $(CCANDIR)/ccan/opt/usage.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+opt_helpers.o: $(CCANDIR)/ccan/opt/helpers.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+opt_opt.o: $(CCANDIR)/ccan/opt/opt.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+time.o: $(CCANDIR)/ccan/time/time.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+timer.o: $(CCANDIR)/ccan/timer/timer.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+list.o: $(CCANDIR)/ccan/list/list.c
+       $(CC) $(CFLAGS) -c -o $@ $<
+
+clean:
+       $(RM) *.o $(ALL)
diff --git a/ccan/timer/benchmarks/benchmark.c b/ccan/timer/benchmarks/benchmark.c
new file mode 100644 (file)
index 0000000..cb7120a
--- /dev/null
@@ -0,0 +1,53 @@
+#include <ccan/time/time.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef FIRST_APPROX
+#include "first-approx.c"
+#endif
+#ifdef SECOND_APPROX
+#include "second-approx.c"
+#endif
+#ifdef NO_APPROX
+#include "no-approx.c"
+#endif
+
+int main(int argc, char *argv[])
+{
+       struct timespec start, val, val2, end, diff;
+       unsigned int i, j, limit = atoi(argv[1] ?: "100000");
+       uint64_t val64;
+
+       val = start = time_now();
+       val64 = to_u64(start);
+       val2.tv_sec = 0;
+       val2.tv_nsec = 1;
+
+       for (j = 0; j < limit; j++) {
+               for (i = 0; i < limit; i++) {
+                       val = time_add(val, val2);
+                       val64 += to_u64(val2);
+               }
+       }
+
+       end = time_now();
+
+       printf("val64 says %lu.%09lu\n",
+              from_u64(val64).tv_sec,
+              from_u64(val64).tv_nsec);
+
+       printf("val says %lu.%09lu\n",
+              val.tv_sec,
+              val.tv_nsec);
+
+       if (time_greater(val, from_u64(val64)))
+               diff = time_sub(val, from_u64(val64));
+       else
+               diff = time_sub(from_u64(val64), val);
+
+       printf("Time %lluns, error = %i%%\n",
+              (long long)time_to_nsec(time_sub(end, start)),
+              (int)(100 * time_to_nsec(diff) / time_to_nsec(time_sub(val, start))));
+       return 0;
+}
diff --git a/ccan/timer/benchmarks/expected-usage.c b/ccan/timer/benchmarks/expected-usage.c
new file mode 100644 (file)
index 0000000..9777856
--- /dev/null
@@ -0,0 +1,71 @@
+/* We expect a timer to rarely go off, so benchmark that case:
+ * Every 1ms a connection comes in, we set up a 30 second timer for it.
+ * After 8192ms we finish the connection (and thus delete the timer).
+ */
+#include <ccan/timer/timer.h>
+#include <ccan/opt/opt.h>
+#include <ccan/array_size/array_size.h>
+#include <stdio.h>
+
+#define PER_CONN_TIME 8192
+#define CONN_TIMEOUT_MS 30000
+
+int main(int argc, char *argv[])
+{
+       struct timespec start, curr;
+       struct timers timers;
+       struct list_head expired;
+       struct timer t[PER_CONN_TIME];
+       unsigned int i, num;
+       bool check = false;
+
+       opt_register_noarg("-c|--check", opt_set_bool, &check,
+                          "Check timer structure during progress");
+
+       opt_parse(&argc, argv, opt_log_stderr_exit);
+
+       num = argv[1] ? atoi(argv[1]) : (check ? 10000 : 1000000);
+
+       list_head_init(&expired);
+       curr = start = time_now();
+       timers_init(&timers, start);
+
+       for (i = 0; i < num; i++) {
+               curr = time_add(curr, time_from_msec(1));
+               if (check)
+                       timers_check(&timers, NULL);
+               timers_expire(&timers, curr, &expired);
+               if (check)
+                       timers_check(&timers, NULL);
+               assert(list_empty(&expired));
+
+               if (i >= PER_CONN_TIME) {
+                       timer_del(&timers, &t[i%PER_CONN_TIME]);
+                       if (check)
+                               timers_check(&timers, NULL);
+               }
+               timer_add(&timers, &t[i%PER_CONN_TIME],
+                         time_add(curr, time_from_msec(CONN_TIMEOUT_MS)));
+               if (check)
+                       timers_check(&timers, NULL);
+       }
+       if (num > PER_CONN_TIME) {
+               for (i = 0; i < PER_CONN_TIME; i++)
+                       timer_del(&timers, &t[i]);
+       }
+
+       curr = time_sub(time_now(), start);
+       if (check)
+               timers_check(&timers, NULL);
+       timers_cleanup(&timers);
+       opt_free_table();
+
+       for (i = 0; i < ARRAY_SIZE(timers.level); i++)
+               if (!timers.level[i])
+                       break;
+
+       printf("%u in %lu.%09lu (%u levels / %zu)\n",
+              num, (long)curr.tv_sec, curr.tv_nsec,
+              i, ARRAY_SIZE(timers.level));
+       return 0;
+}
diff --git a/ccan/timer/design.txt b/ccan/timer/design.txt
new file mode 100644 (file)
index 0000000..c1fae66
--- /dev/null
@@ -0,0 +1,76 @@
+Cascading timer design.
+
+Inspired by the Linux kernel approach, documented roughly at:
+       https://lwn.net/Articles/152436/
+
+For easy description, we use whole seconds and powers of 10: in the
+implementation, we use powers of 2 (eg. 256 entries) and smaller
+granularities.
+
+We start with a simple data structure:
+
+struct timer_level {
+       struct timer_level *next;
+
+       /* Ten buckets:         [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] */
+       struct list_head bucket[10];
+};
+
+struct timers {
+       /* We can never have a timer before this, aka "now". */
+       time_t offset;
+
+       struct timer_level *level;
+
+       /* Anything too far in the future. */
+       struct list_head far;
+}
+
+The first level of timers holds anything which will happen in the next
+10 seconds.  The next level holds things which will happen in the next
+100 seconds.  And so on.
+
+When we want to add a new timer into the structure, we need to figure
+out first what level it goes into, and second, which bucket.  Say our
+offset is 500,000,001 (about Tue Nov 5, 1985 in Unix time).  And our
+timer is set to go off in 5 seconds, ie. 500,000,006.
+
+The level is easy: the difference between the timer and the offset is
+5, and that's less than 10, so it's in the first level.  The position,
+however, depends on the absolute time, in this case the last digit 6,
+so it's in bucket 6.
+
+Adding a timer at 500,000,123?  The difference is > 100 and < 1000, so
+it's in the third level.  The bucket is 1.  If there's no third level,
+we just add it to the 'far' list for stuff which is in the far future.
+
+Deleting a timer is as simple as removing it; there is no external
+bookkeeping in this scheme.  This matters, since timers used for
+timeouts are almost always deleted before they expire.
+
+Now, when a second passes, we need to know if there are any timers
+which are due.  We increment the offset to 500,000,002, and look in
+the first level, bucket 2 for any timers, so lookup is simple.
+
+We do this eight more times, and we increment the offset to
+500,000,010.  We've swept around back to bucket 0, though it may not
+be empty if we added more timers as we were going.
+
+But we need to look into the next level since a timer at 500,000,010
+added when the offset was 500,000,000 would have gone up there.  We
+empty bucket 1 (due to the '1' in 500,000,010) into these buckets,
+which will contain timers between 500,000,010 and 500,000,019, which
+all now are less than 10 seconds away, so belong in the bottom level.
+
+Similarly, at 500,000,020 we will empty bucket 1 of the second level
+into the first level.  And at 500,000,100 we will empty bucket 1 of
+the third level into the second level then bucket 0 of the second
+level into the first level.  We do it in this order, since emptying
+bucket 1 on the third level (500,000,100 - 500,000,199) may put more
+entries (500,000,100 - 500,000,109) into bucket 0 on the second level.
+
+When we get to 500,001,000 we should empty the fourth level.  If there
+is no fourth level, that's when we sort through the 'far' list and
+empty any which are less than 500,002,000.  If there are many entries
+in the far list, we should add more levels to reduce the number, or at
+least the frequency we have to check it.
diff --git a/ccan/timer/test/run-add.c b/ccan/timer/test/run-add.c
new file mode 100644 (file)
index 0000000..79f029a
--- /dev/null
@@ -0,0 +1,52 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+/* More than 32 bits */
+#define MAX_ORD 34
+
+/* 0...17, 63, 64, 65, 127, 128, 129, 255, 256, 257, ... */
+static uint64_t next(uint64_t base)
+{
+       if (base > 16 && ((base - 1) & ((base - 1) >> 1)) == 0)
+               return base * 2 - 3;
+       return base+1;
+}
+
+int main(void)
+{
+       struct timers timers;
+       struct timer t;
+       uint64_t diff;
+       unsigned int i;
+
+       /* This is how many tests you plan to run */
+       plan_tests(2 + (18 + (MAX_ORD - 4) * 3) * (18 + (MAX_ORD - 4) * 3));
+
+       timers_init(&timers, time_from_nsec(0));
+       ok1(timers_check(&timers, NULL));
+
+       for (i = 0; i < 4; i++)
+               add_level(&timers, i);
+
+       i = 0;
+       for (diff = 0; diff < (1ULL << MAX_ORD)+2; diff = next(diff)) {
+               i++;
+               for (timers.base = 0;
+                    timers.base < (1ULL << MAX_ORD)+2;
+                    timers.base = next(timers.base)) {
+                       t.time = timers.base + diff;
+                       timer_add_raw(&timers, &t);
+                       ok1(timers_check(&timers, NULL));
+                       timer_del(&timers, &t);
+               }
+       }
+
+       ok1(timers_check(&timers, NULL));
+
+       timers_cleanup(&timers);
+
+       /* This exits depending on whether all tests passed */
+       return exit_status();
+}
diff --git a/ccan/timer/test/run-expiry.c b/ccan/timer/test/run-expiry.c
new file mode 100644 (file)
index 0000000..e53de6a
--- /dev/null
@@ -0,0 +1,31 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+       struct timers timers;
+       struct timer t;
+       struct list_head list;
+
+       /* This is how many tests you plan to run */
+       plan_tests(7);
+
+       timers_init(&timers, grains_to_time(1364984760903400ULL));
+       ok1(timers.base == 1364984760903400ULL);
+       timer_add(&timers, &t, grains_to_time(1364984761003398ULL));
+       ok1(t.time == 1364984761003398ULL);
+       ok1(timers.first == 1364984761003398ULL);
+       timers_expire(&timers, grains_to_time(1364984760903444ULL), &list);
+       ok1(timers_check(&timers, NULL));
+       ok1(list_pop(&list, struct timer, list) == NULL);
+       timers_expire(&timers, grains_to_time(1364984761002667ULL), &list);
+       ok1(timers_check(&timers, NULL));
+       ok1(list_pop(&list, struct timer, list) == NULL);
+
+       timers_cleanup(&timers);
+
+       /* This exits depending on whether all tests passed */
+       return exit_status();
+}
diff --git a/ccan/timer/test/run-ff.c b/ccan/timer/test/run-ff.c
new file mode 100644 (file)
index 0000000..c40e3b1
--- /dev/null
@@ -0,0 +1,27 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+       struct timers timers;
+       struct timer t;
+       struct list_head expired;
+
+       /* This is how many tests you plan to run */
+       plan_tests(3);
+
+       timers_init(&timers, time_from_usec(1364726722653919ULL));
+       timer_add(&timers, &t, time_from_usec(1364726722703919ULL));
+       timers_expire(&timers, time_from_usec(1364726722653920ULL), &expired);
+       ok1(list_empty(&expired));
+       timers_expire(&timers, time_from_usec(1364726725454187ULL), &expired);
+       ok1(!list_empty(&expired));
+       ok1(list_top(&expired, struct timer, list) == &t);
+
+       timers_cleanup(&timers);
+
+       /* This exits depending on whether all tests passed */
+       return exit_status();
+}
diff --git a/ccan/timer/test/run.c b/ccan/timer/test/run.c
new file mode 100644 (file)
index 0000000..2946ba1
--- /dev/null
@@ -0,0 +1,84 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+       struct timers timers;
+       struct timer t[64];
+       struct list_head expired;
+       struct timespec earliest;
+       uint64_t i;
+
+       /* This is how many tests you plan to run */
+       plan_tests(488);
+
+       timers_init(&timers, time_from_nsec(0));
+       ok1(timers_check(&timers, NULL));
+       ok1(!timer_earliest(&timers, &earliest));
+
+       timer_add(&timers, &t[0], time_from_nsec(1));
+       ok1(timers_check(&timers, NULL));
+       ok1(timer_earliest(&timers, &earliest));
+       ok1(time_eq(earliest, grains_to_time(t[0].time)));
+       timer_del(&timers, &t[0]);
+       ok1(timers_check(&timers, NULL));
+       ok1(!timer_earliest(&timers, &earliest));
+
+       /* Check timer ordering. */
+       for (i = 0; i < 32; i++) {
+               timer_add(&timers, &t[i*2], time_from_nsec(1ULL << i));
+               ok1(timers_check(&timers, NULL));
+               timer_add(&timers, &t[i*2+1], time_from_nsec((1ULL << i) + 1));
+               ok1(timers_check(&timers, NULL));
+       }
+
+       for (i = 0; i < 32; i++) {
+               const struct timer *t1, *t2;
+
+               t1 = get_first(&timers);
+               ok1(t1 == &t[i*2] || t1 == &t[i*2+1]);
+               timer_del(&timers, (struct timer *)t1);
+               ok1(timers_check(&timers, NULL));
+
+               t2 = get_first(&timers);
+               ok1(t2 != t1 && (t2 == &t[i*2] || t2 == &t[i*2+1]));
+               timer_del(&timers, (struct timer *)t2);
+               ok1(timers_check(&timers, NULL));
+       }
+
+       /* Check expiry. */
+       for (i = 0; i < 32; i++) {
+               uint64_t exp = (uint64_t)TIMER_GRANULARITY << i;
+
+               timer_add(&timers, &t[i*2], time_from_nsec(exp));
+               ok1(timers_check(&timers, NULL));
+               timer_add(&timers, &t[i*2+1], time_from_nsec(exp + 1));
+               ok1(timers_check(&timers, NULL));
+       }
+
+       for (i = 0; i < 32; i++) {
+               struct timer *t1, *t2;
+
+               ok1(timer_earliest(&timers, &earliest));
+               timers_expire(&timers, earliest, &expired);
+
+               t1 = list_pop(&expired, struct timer, list);
+               ok1(t1);
+               t2 = list_pop(&expired, struct timer, list);
+               ok1(t2);
+               ok1(list_empty(&expired));
+
+               ok1(t1 == &t[i*2] || t1 == &t[i*2+1]);
+               ok1(t2 != t1 && (t2 == &t[i*2] || t2 == &t[i*2+1]));
+               ok1(timers_check(&timers, NULL));
+       }
+
+       ok1(!timer_earliest(&timers, &earliest));
+
+       timers_cleanup(&timers);
+
+       /* This exits depending on whether all tests passed */
+       return exit_status();
+}
diff --git a/ccan/timer/timer.c b/ccan/timer/timer.c
new file mode 100644 (file)
index 0000000..9486e6b
--- /dev/null
@@ -0,0 +1,431 @@
+/* LGPL (v2.1 or any later version) - see LICENSE file for details */
+#include <ccan/timer/timer.h>
+#include <ccan/array_size/array_size.h>
+#include <ccan/ilog/ilog.h>
+#include <ccan/likely/likely.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define PER_LEVEL (1ULL << TIMER_LEVEL_BITS)
+
+struct timer_level {
+       struct list_head list[PER_LEVEL];
+};
+
+static uint64_t time_to_grains(struct timespec ts)
+{
+       return ts.tv_sec * ((uint64_t)1000000000 / TIMER_GRANULARITY)
+               + (ts.tv_nsec / TIMER_GRANULARITY);
+}
+
+static struct timespec grains_to_time(uint64_t grains)
+{
+       struct timespec ts;
+
+       ts.tv_sec = grains / (1000000000 / TIMER_GRANULARITY);
+       ts.tv_nsec = (grains % (1000000000 / TIMER_GRANULARITY))
+               * TIMER_GRANULARITY;
+       return ts;
+}
+
+void timers_init(struct timers *timers, struct timespec start)
+{
+       unsigned int i;
+
+       list_head_init(&timers->far);
+       timers->base = time_to_grains(start);
+       for (i = 0; i < ARRAY_SIZE(timers->level); i++)
+               timers->level[i] = NULL;
+}
+
+static void timer_add_raw(struct timers *timers, struct timer *t)
+{
+       struct list_head *l;
+       uint64_t diff;
+       unsigned int level;
+
+       /* Level depends how far away it is. */
+       diff = t->time - timers->base;
+       level = ilog64(diff / 2) / TIMER_LEVEL_BITS;
+
+       if (!timers->level[level])
+               l = &timers->far;
+       else {
+               int off = (t->time >> (level*TIMER_LEVEL_BITS)) & (PER_LEVEL-1);
+               l = &timers->level[level]->list[off];
+       }
+
+       list_add_tail(l, &t->list);
+}
+
+void timer_add(struct timers *timers, struct timer *t, struct timespec when)
+{
+       t->time = time_to_grains(when);
+
+       /* Added in the past?  Treat it as imminent. */
+       if (t->time < timers->base)
+               t->time = timers->base;
+
+       timer_add_raw(timers, t);
+}
+
+/* FIXME: inline */
+void timer_del(struct timers *timers, struct timer *t)
+{
+       list_del(&t->list);
+}
+
+static void timers_far_get(struct timers *timers,
+                          struct list_head *list,
+                          uint64_t when)
+{
+       struct timer *i, *next;
+
+       list_for_each_safe(&timers->far, i, next, list) {
+               if (i->time <= when) {
+                       list_del_from(&timers->far, &i->list);
+                       list_add_tail(list, &i->list);
+               }
+       }
+}
+
+static void add_level(struct timers *timers, unsigned int level)
+{
+       struct timer_level *l;
+       struct timer *t;
+       unsigned int i;
+       struct list_head from_far;
+
+       l = malloc(sizeof(*l));
+       if (!l)
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(l->list); i++)
+               list_head_init(&l->list[i]);
+       timers->level[level] = l;
+
+       list_head_init(&from_far);
+       timers_far_get(timers, &from_far,
+                      timers->base + (1ULL << ((level+1)*TIMER_LEVEL_BITS)) - 1);
+
+       while ((t = list_pop(&from_far, struct timer, list)) != NULL)
+               timer_add_raw(timers, t);
+}
+
+/* Take timers from level and distribute them down one. */
+static void cascade(struct timers *timers, unsigned int level)
+{
+       struct timer *i;
+       struct list_head from_far, *list;
+
+       if (level == ARRAY_SIZE(timers->level) || !timers->level[level]) {
+               list_head_init(&from_far);
+               timers_far_get(timers, &from_far,
+                              timers->base
+                              + (1ULL << (level*TIMER_LEVEL_BITS))-1);
+               list = &from_far;
+               if (level != ARRAY_SIZE(timers->level))
+                       add_level(timers, level);
+       } else {
+               unsigned src;
+
+               src = (timers->base >> (level * TIMER_LEVEL_BITS)) % PER_LEVEL;
+               if (src == 0)
+                       cascade(timers, level + 1);
+               list = &timers->level[level]->list[src];
+       }
+
+       while ((i = list_pop(list, struct timer, list)) != NULL) {
+               unsigned dst;
+
+               assert(i->time >= timers->base);
+               assert(i->time < (timers->base
+                                 + (1ULL << ((level+1)*TIMER_LEVEL_BITS))));
+
+               dst = (i->time >> ((level-1)*TIMER_LEVEL_BITS)) % PER_LEVEL;
+               list_add_tail(&timers->level[level-1]->list[dst], &i->list);
+       }
+}
+
+static const struct timer *find_first(const struct list_head *list,
+                                     const struct timer *prev)
+{
+       struct timer *t;
+
+       list_for_each(list, t, list) {
+               if (!prev || t->time < prev->time)
+                       prev = t;
+       }
+       return prev;
+}
+
+static struct timer *get_first(const struct timers *timers)
+{
+       unsigned int level = 0, i, off;
+       bool need_next;
+       uint64_t base = timers->base;
+       const struct timer *found = NULL;
+       struct list_head *h;
+
+next:
+       if (!timers->level[level])
+               return (struct timer *)find_first(&timers->far, NULL);
+
+       need_next = false;
+       off = base % PER_LEVEL;
+       for (i = 0; i < PER_LEVEL; i++) {
+               h = &timers->level[level]->list[(i+off) % PER_LEVEL];
+
+               if (!list_empty(h))
+                       break;
+
+               /* We haven't cascaded yet, so if we wrap, we'll need to
+                * check next level, too. */
+               if (i + off == PER_LEVEL)
+                       need_next = true;
+       }
+       if (i == PER_LEVEL) {
+               level++;
+               base >>= TIMER_LEVEL_BITS;
+               goto next;
+       }
+
+       /* Level 0 is exact, so they're all the same. */
+       if (level == 0)
+               found = list_top(h, struct timer, list);
+       else
+               found = find_first(h, NULL);
+
+       if (need_next) {
+               if (!timers->level[level+1]) {
+                       found = find_first(&timers->far, found);
+               } else {
+                       base >>= TIMER_LEVEL_BITS;
+                       off = base % PER_LEVEL;
+                       h = &timers->level[level+1]->list[off];
+                       found = find_first(h, found);
+               }
+       }
+
+       return (struct timer *)found;
+}
+
+bool timer_earliest(const struct timers *timers, struct timespec *first)
+{
+       struct timer *found = get_first(timers);
+
+       if (!found)
+               return false;
+       *first = grains_to_time(found->time);
+       return true;
+}
+
+/* Assume no timers before 'time', cascade down and update base time. */
+static void timer_fast_forward(struct timers *timers, uint64_t time)
+{
+       unsigned int level, changed;
+       int need_level = -1;
+       struct list_head list;
+       struct timer *i;
+
+       /* How many bits changed between base and time?
+        * Each time we wrap, we need to empty buckets from above. */
+       if (time == timers->base)
+               return;
+
+       changed = ilog64_nz(time ^ timers->base);
+       level = (changed - 1) / TIMER_LEVEL_BITS;
+
+       /* Buckets always empty downwards, so we could cascade manually,
+        * but it's rarely very many so we just remove and re-add */
+       list_head_init(&list);
+
+       do {
+               if (!timers->level[level]) {
+                       /* We need any which belong on this level. */
+                       timers_far_get(timers, &list,
+                                      timers->base
+                                      + (1ULL << ((level+1)*TIMER_LEVEL_BITS))-1);
+                       need_level = level;
+               } else {
+                       unsigned src;
+
+                       /* Get all timers from this bucket. */
+                       src = (time >> (level * TIMER_LEVEL_BITS)) % PER_LEVEL;
+                       list_append_list(&list,
+                                        &timers->level[level]->list[src]);
+               }
+       } while (level--);
+
+       /* Did we hit the last level?  If so, add. */
+       if (need_level != -1)
+               add_level(timers, need_level);
+
+       /* Fast-forward the time, and re-add everyone. */
+       timers->base = time;
+       while ((i = list_pop(&list, struct timer, list)) != NULL)
+               timer_add_raw(timers, i);
+}
+
+/* Fills list of expired timers. */
+void timers_expire(struct timers *timers,
+                  struct timespec expire,
+                  struct list_head *list)
+{
+       uint64_t now = time_to_grains(expire);
+       unsigned int off;
+       const struct timer *first;
+
+       assert(now >= timers->base);
+
+       list_head_init(list);
+
+       if (!timers->level[0]) {
+               if (list_empty(&timers->far))
+                       return;
+               add_level(timers, 0);
+       }
+
+       while ((first = get_first(timers)) != NULL) {
+               assert(first->time >= timers->base);
+               if (first->time > now) {
+                       timer_fast_forward(timers, now);
+                       break;
+               }
+
+               timer_fast_forward(timers, first->time);
+               off = timers->base % PER_LEVEL;
+
+               list_append_list(list, &timers->level[0]->list[off]);
+               if (timers->base == now)
+                       break;
+       }
+}
+
+static bool timer_list_check(const struct list_head *l,
+                            uint64_t min, uint64_t max,
+                            const char *abortstr)
+{
+       const struct timer *t;
+
+       if (!list_check(l, abortstr))
+               return false;
+
+       list_for_each(l, t, list) {
+               if (t->time < min || t->time > max) {
+                       if (abortstr) {
+                               fprintf(stderr,
+                                       "%s: timer %p %llu not %llu-%llu\n",
+                                       abortstr, t, t->time, min, max);
+                               abort();
+                       }
+                       return false;
+               }
+       }
+       return true;
+}
+
+struct timers *timers_check(const struct timers *timers, const char *abortstr)
+{
+       unsigned int l, i, off;
+       uint64_t base;
+
+       l = 0;
+       if (!timers->level[0])
+               goto past_levels;
+
+       /* First level is simple. */
+       off = timers->base % PER_LEVEL;
+       for (i = 0; i < PER_LEVEL; i++) {
+               struct list_head *h;
+
+               h = &timers->level[l]->list[(i+off) % PER_LEVEL];
+               if (!timer_list_check(h, timers->base + i, timers->base + i,
+                                     abortstr))
+                       return NULL;
+       }
+
+       /* For other levels, "current" bucket has been emptied, and may contain
+        * entries for the current + level_size bucket. */
+       for (l = 1; timers->level[l] && l < PER_LEVEL; l++) {
+               uint64_t per_bucket = 1ULL << (TIMER_LEVEL_BITS * l);
+
+               off = ((timers->base >> (l*TIMER_LEVEL_BITS)) % PER_LEVEL);
+               /* We start at *next* bucket. */
+               base = (timers->base & ~(per_bucket - 1)) + per_bucket;
+
+               for (i = 1; i <= PER_LEVEL; i++) {
+                       struct list_head *h;
+
+                       h = &timers->level[l]->list[(i+off) % PER_LEVEL];
+                       if (!timer_list_check(h, base, base + per_bucket - 1,
+                                             abortstr))
+                               return NULL;
+                       base += per_bucket;
+               }
+       }
+
+past_levels:
+       base = (timers->base & ~((1ULL << (TIMER_LEVEL_BITS * l)) - 1))
+               + (1ULL << (TIMER_LEVEL_BITS * l)) - 1;
+       if (!timer_list_check(&timers->far, base, -1ULL, abortstr))
+               return NULL;
+
+       return (struct timers *)timers;
+}
+
+//#ifdef CCAN_TIMER_DEBUG
+void timers_dump(const struct timers *timers, FILE *fp)
+{
+       unsigned int l, i;
+       uint64_t min, max, num;
+       struct timer *t;
+
+       if (!fp)
+               fp = stderr;
+
+       fprintf(fp, "Base: %llu\n", timers->base);
+
+       for (l = 0; timers->level[l] && l < ARRAY_SIZE(timers->level); l++) {
+               fprintf(fp, "Level %i (+%llu):\n",
+                       l, (uint64_t)1 << (TIMER_LEVEL_BITS * l));
+               for (i = 0; i < (1 << TIMER_LEVEL_BITS); i++) {
+
+                       if (list_empty(&timers->level[l]->list[i]))
+                               continue;
+                       min = -1ULL;
+                       max = 0;
+                       num = 0;
+                       list_for_each(&timers->level[l]->list[i], t, list) {
+                               if (t->time < min)
+                                       min = t->time;
+                               if (t->time > max)
+                                       max = t->time;
+                               num++;
+                       }
+                       fprintf(stderr, "  %llu (+%llu-+%llu)\n",
+                               num, min - timers->base, max - timers->base);
+               }
+       }
+
+       min = -1ULL;
+       max = 0;
+       num = 0;
+       list_for_each(&timers->far, t, list) {
+               if (t->time < min)
+                       min = t->time;
+               if (t->time > max)
+                       max = t->time;
+               num++;
+       }
+       fprintf(stderr, "Far: %llu (%llu-%llu)\n", num, min, max);
+}
+//#endif
+
+void timers_cleanup(struct timers *timers)
+{
+       unsigned int l;
+
+       for (l = 0; l < ARRAY_SIZE(timers->level); l++)
+               free(timers->level[l]);
+}
diff --git a/ccan/timer/timer.h b/ccan/timer/timer.h
new file mode 100644 (file)
index 0000000..7b5d199
--- /dev/null
@@ -0,0 +1,143 @@
+/* LGPL (v2.1 or any later version) - see LICENSE file for details */
+#ifndef CCAN_TIMER_H
+#define CCAN_TIMER_H
+#include <ccan/time/time.h>
+#include <ccan/list/list.h>
+#include <stdint.h>
+
+/* We divide all nsec values by 1000, reducing it to usec granularity. */
+#define TIMER_GRANULARITY 1000
+/* This gives 16 pointers per level, up to 13 levels deep. */
+#define TIMER_LEVEL_BITS 4
+
+struct timers;
+struct timer;
+
+/**
+ * timers_init - initialize a timers struct.
+ * @timers: the struct timers
+ * @start: the minimum time which will ever be added.
+ *
+ * This sets up a timers struct: any timers added before @start will be
+ * set to expire immediately.
+ */
+void timers_init(struct timers *timers, struct timespec start);
+
+/**
+ * timers_cleanup - free allocations within timers struct.
+ * @timers: the struct timers
+ *
+ * This frees any timer layers allocated during use.
+ */
+void timers_cleanup(struct timers *timers);
+
+/**
+ * timer_add - insert a timer.
+ * @timers: the struct timers
+ * @timer: the (uninitialized) timer to add
+ * @when: when @timer expires.
+ *
+ * This efficiently adds @timer to @timers, to expire @when (rounded to
+ * TIMER_GRANULARITY nanoseconds).
+ */
+void timer_add(struct timers *timers, struct timer *timer,
+              struct timespec when);
+
+/**
+ * timer_del - remove an unexpired timer.
+ * @timers: the struct timers
+ * @timer: the timer previously added with timer_add()
+ *
+ * This efficiently removes @timer from @timers.
+ */
+void timer_del(struct timers *timers, struct timer *timer);
+
+/**
+ * timer_earliest - find out the first time when a timer will expire
+ * @timers: the struct timers
+ * @first: the time, only set if there is a timer.
+ *
+ * This returns false, and doesn't alter @first if there are no
+ * timers.  Otherwise, it sets @first to the expiry time of the first
+ * timer (rounded to TIMER_GRANULARITY nanoseconds), and returns true.
+ */
+bool timer_earliest(const struct timers *timers, struct timespec *first);
+
+/**
+ * timer_expire - update timers structure and remove expired timers.
+ * @timers: the struct timers
+ * @expire: the current time
+ * @list: the list for expired timers.
+ *
+ * @list will be initialized to the empty list, then all timers added
+ * with a @when arg less than or equal to @expire will be added to it in
+ * expiry order (within TIMER_GRANULARITY nanosecond precision).
+ *
+ * After this, @expire is considered the current time, and adding any
+ * timers with @when before this value will be silently changed to
+ * adding them with immediate expiration.
+ *
+ * You should not move @expire backwards, though it need not move
+ * forwards.
+ */
+void timers_expire(struct timers *timers,
+                  struct timespec expire,
+                  struct list_head *list);
+
+/**
+ * timers_check - check timer structure for consistency
+ * @t: the struct timers
+ * @abortstr: the location to print on aborting, or NULL.
+ *
+ * Because timers have redundant information, consistency checking can
+ * be done on the tree.  This is useful as a debugging check.  If
+ * @abortstr is non-NULL, that will be printed in a diagnostic if the
+ * timers structure is inconsistent, and the function will abort.
+ *
+ * Returns the timers struct if it is consistent, NULL if not (it can
+ * never return NULL if @abortstr is set).
+ */
+struct timers *timers_check(const struct timers *t, const char *abortstr);
+
+#ifdef CCAN_TIMER_DEBUG
+#include <stdio.h>
+
+/**
+ * timers_dump - dump the timers datastructure (for debugging it)
+ * @t: the struct timers
+ * @fp: the FILE to dump to (stderr if @fp is NULL)
+ */
+void timers_dump(const struct timers *timers, FILE *fp);
+#endif
+
+/**
+ * struct timers - structure to hold a set of timers.
+ *
+ * Initialized using timers_init, the levels of the timer are
+ * allocated as necessary, using malloc.
+ *
+ * See Also:
+ *     timers_init(), timers_cleanup()
+ */
+struct timers {
+       /* Far in the future. */
+       struct list_head far;
+       uint64_t base;
+
+       struct timer_level *level[(64 + TIMER_LEVEL_BITS-1) / TIMER_LEVEL_BITS];
+};
+
+/**
+ * struct timer - a single timer.
+ *
+ * Set up by timer_add(), this is usually contained within an
+ * application-specific structure.
+ *
+ * See Also:
+ *     ccan/container_of, timer_add(), timer_del()
+ */
+struct timer {
+       struct list_node list;
+       uint64_t time;
+};
+#endif /* CCAN_TIMER_H */