+ memset(&block, 0, sizeof(block));
+ sha256(&block.h, &n, sizeof(n));
+
+ start = time_now();
+ for (i = 0; i < n; i++) {
+ sha256(&block.h, &block.h, sizeof(block.h));
+ }
+ diff = time_divide(time_between(time_now(), start), n);
+ printf("Normal gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n",
+ block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
+ block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
+ (unsigned long long)time_to_nsec(diff));
+
+ /* Now, don't re-initialize every time; use Transform */
+ memset(&block, 0, sizeof(block));
+ sha256(&block.h, &n, sizeof(n));
+ block.u8[sizeof(block.h)] = 0x80;
+ /* Size is 256 bits */
+ block.u8[sizeof(block)-2] = 1;
+
+ start = time_now();
+ for (i = 0; i < n; i++) {
+ struct sha256_ctx ctx = SHA256_INIT;
+ size_t j;
+ Transform(ctx.s, block.u32);
+ for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
+ block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
+ }
+ diff = time_divide(time_between(time_now(), start), n);
+ printf("Transform gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n",
+ block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
+ block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
+ (unsigned long long)time_to_nsec(diff));
+
+ /* Now, assembler variants */
+ sha256(&block.h, &n, sizeof(n));
+
+ start = time_now();
+ for (i = 0; i < n; i++) {
+ struct sha256_ctx ctx = SHA256_INIT;
+ size_t j;
+ sha256_rorx(block.u32, ctx.s, 1);
+ for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
+ block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
+ }
+ diff = time_divide(time_between(time_now(), start), n);
+ printf("Asm rorx for %02x%02x%02x%02x%02x%02x... is %llu nsec\n",
+ block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
+ block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
+ (unsigned long long)time_to_nsec(diff));
+
+ sha256(&block.h, &n, sizeof(n));
+