X-Git-Url: https://git.ozlabs.org/?a=blobdiff_plain;f=ccan%2Fcrypto%2Fsha256%2Fbenchmarks%2Fdouble-sha-bench.c;h=4478b16379a0940854122cae18b408b846c2225f;hb=HEAD;hp=93fd9d31d418cd6d41b62e391c43359e6ce1e32e;hpb=02e6fb3c3422b8af26133638bca38c8a5a5fa3fb;p=ccan diff --git a/ccan/crypto/sha256/benchmarks/double-sha-bench.c b/ccan/crypto/sha256/benchmarks/double-sha-bench.c index 93fd9d31..4478b163 100644 --- a/ccan/crypto/sha256/benchmarks/double-sha-bench.c +++ b/ccan/crypto/sha256/benchmarks/double-sha-bench.c @@ -1,23 +1,122 @@ /* Bitcoin does a lot of SHA of SHA. Benchmark that. */ -#include +#include #include #include +void sha256_avx(void *input_data, uint32_t digest[8], uint64_t num_blks); +void sha256_rorx(void *input_data, uint32_t digest[8], uint64_t num_blks); +void sha256_rorx_x8ms(void *input_data, uint32_t digest[8], uint64_t num_blks); +void sha256_sse4(void *input_data, uint32_t digest[8], uint64_t num_blks); + int main(int argc, char *argv[]) { struct timeabs start; struct timerel diff; size_t i, n; - struct sha256 h; + union { + struct sha256 h; + uint32_t u32[16]; + uint8_t u8[64]; + } block; n = atoi(argv[1] ? argv[1] : "1000000"); - sha256(&h, &n, sizeof(n)); + memset(&block, 0, sizeof(block)); + sha256(&block.h, &n, sizeof(n)); + + start = time_now(); + for (i = 0; i < n; i++) { + sha256(&block.h, &block.h, sizeof(block.h)); + } + diff = time_divide(time_between(time_now(), start), n); + printf("Normal gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n", + block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], + block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], + (unsigned long long)time_to_nsec(diff)); + + /* Now, don't re-initialize every time; use Transform */ + memset(&block, 0, sizeof(block)); + sha256(&block.h, &n, sizeof(n)); + block.u8[sizeof(block.h)] = 0x80; + /* Size is 256 bits */ + block.u8[sizeof(block)-2] = 1; + + start = time_now(); + for (i = 0; i < n; i++) { + struct sha256_ctx ctx = SHA256_INIT; + size_t j; + Transform(ctx.s, block.u32); + for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) + block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); + } + diff = time_divide(time_between(time_now(), start), n); + printf("Transform gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n", + block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], + block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], + (unsigned long long)time_to_nsec(diff)); + + /* Now, assembler variants */ + sha256(&block.h, &n, sizeof(n)); + + start = time_now(); + for (i = 0; i < n; i++) { + struct sha256_ctx ctx = SHA256_INIT; + size_t j; + sha256_rorx(block.u32, ctx.s, 1); + for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) + block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); + } + diff = time_divide(time_between(time_now(), start), n); + printf("Asm rorx for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", + block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], + block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], + (unsigned long long)time_to_nsec(diff)); + + sha256(&block.h, &n, sizeof(n)); + start = time_now(); - for (i = 0; i < n; i++) - sha256(&h, &h, sizeof(h)); + for (i = 0; i < n; i++) { + struct sha256_ctx ctx = SHA256_INIT; + size_t j; + sha256_sse4(block.u32, ctx.s, 1); + for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) + block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); + } diff = time_divide(time_between(time_now(), start), n); - printf("Hashing time for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", - h.u.u8[0], h.u.u8[1], h.u.u8[2], h.u.u8[3], h.u.u8[4], h.u.u8[5], + printf("Asm SSE4 for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", + block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], + block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], (unsigned long long)time_to_nsec(diff)); + + sha256(&block.h, &n, sizeof(n)); + start = time_now(); + for (i = 0; i < n; i++) { + struct sha256_ctx ctx = SHA256_INIT; + size_t j; + sha256_rorx_x8ms(block.u32, ctx.s, 1); + for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) + block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); + } + diff = time_divide(time_between(time_now(), start), n); + printf("Asm RORx-x8ms for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", + block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], + block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], + (unsigned long long)time_to_nsec(diff)); + + sha256(&block.h, &n, sizeof(n)); + start = time_now(); + for (i = 0; i < n; i++) { + struct sha256_ctx ctx = SHA256_INIT; + size_t j; + sha256_avx(block.u32, ctx.s, 1); + for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) + block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); + } + diff = time_divide(time_between(time_now(), start), n); + printf("Asm AVX for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", + block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], + block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], + (unsigned long long)time_to_nsec(diff)); + return 0; } +