From 2c148ec2ad0459d29ba19ec5b1c5515717cc8d80 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 11 Jun 2019 14:24:56 +0930 Subject: [PATCH 1/1] ccan/crc32c: new module for accelerated CRC32 (on x86-64). Note: the previous code in ccan/crc is wrong, so I started fresh with actual test vectors. Signed-off-by: Rusty Russell --- ccan/crc32c/_info | 46 ++++ ccan/crc32c/benchmark/Makefile | 24 ++ ccan/crc32c/benchmark/bench.c | 57 +++++ ccan/crc32c/crc32c.c | 415 +++++++++++++++++++++++++++++++++ ccan/crc32c/crc32c.h | 48 ++++ ccan/crc32c/test/api-crc32c.c | 109 +++++++++ ccan/crc32c/test/run-crc32c.c | 111 +++++++++ 7 files changed, 810 insertions(+) create mode 100644 ccan/crc32c/_info create mode 100644 ccan/crc32c/benchmark/Makefile create mode 100644 ccan/crc32c/benchmark/bench.c create mode 100644 ccan/crc32c/crc32c.c create mode 100644 ccan/crc32c/crc32c.h create mode 100644 ccan/crc32c/test/api-crc32c.c create mode 100644 ccan/crc32c/test/run-crc32c.c diff --git a/ccan/crc32c/_info b/ccan/crc32c/_info new file mode 100644 index 00000000..5fe710eb --- /dev/null +++ b/ccan/crc32c/_info @@ -0,0 +1,46 @@ +#include "config.h" +#include +#include + +/** + * crc32c - routine for Castagnoli CRC (crc32c) of bytes + * + * Cyclic Redundancy Check routine, optimized for x86-64. Reasonably fast + * checksum routines, but not suitable for cryptographic use. + * + * They are useful for simple error detection, eg. a 32-bit CRC will + * detect a single error burst of up to 32 bits. + * + * Example: + * #include + * #include + * #include + * + * // Given "123456789" outputs 0xe3069283 + * int main(int argc, char *argv[]) + * { + * if (argc != 2) { + * fprintf(stderr, "Usage: %s \n" + * "Prints 32 bit CRC of the string\n", argv[0]); + * exit(1); + * } + * printf("0x%08x\n", crc32c(0, argv[1], strlen(argv[1]))); + * exit(0); + * } + * + * License: MIT + * Author: Mark Adler + * Maintainer: Rusty Russell + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/compiler\n"); + return 0; + } + + return 1; +} diff --git a/ccan/crc32c/benchmark/Makefile b/ccan/crc32c/benchmark/Makefile new file mode 100644 index 00000000..61127593 --- /dev/null +++ b/ccan/crc32c/benchmark/Makefile @@ -0,0 +1,24 @@ +CCANDIR=../../.. +CFLAGS=-Wall -Werror -O3 -I$(CCANDIR) -flto +#CFLAGS=-Wall -Werror -g3 -I$(CCANDIR) +LDFLAGS := -flto -O3 + +all: bench + +CCAN_OBJS:=ccan-tal.o ccan-tal-grab_file.o ccan-noerr.o ccan-take.o ccan-time.o + +bench: bench.o $(CCAN_OBJS) + +clean: + rm -f bench *.o + +ccan-time.o: $(CCANDIR)/ccan/time/time.c + $(CC) $(CFLAGS) -c -o $@ $< +ccan-tal.o: $(CCANDIR)/ccan/tal/tal.c + $(CC) $(CFLAGS) -c -o $@ $< +ccan-take.o: $(CCANDIR)/ccan/take/take.c + $(CC) $(CFLAGS) -c -o $@ $< +ccan-noerr.o: $(CCANDIR)/ccan/noerr/noerr.c + $(CC) $(CFLAGS) -c -o $@ $< +ccan-tal-grab_file.o: $(CCANDIR)/ccan/tal/grab_file/grab_file.c + $(CC) $(CFLAGS) -c -o $@ $< diff --git a/ccan/crc32c/benchmark/bench.c b/ccan/crc32c/benchmark/bench.c new file mode 100644 index 00000000..54b2d97a --- /dev/null +++ b/ccan/crc32c/benchmark/bench.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include + +#define RUNS 65536 + +int main(int argc, char *argv[]) +{ + void *p; + struct timeabs start, end; + size_t len, runs; + uint64_t sums = 0; + bool sw = false, hw = false; + + if (argv[1]) { + if (streq(argv[1], "--software")) { + sw = true; + argv++; + argc--; + + } else if (streq(argv[1], "--hardware")) { + hw = true; + argv++; + argc--; + } + } + + if (argc < 2 || (runs = atol(argv[1])) == 0) + errx(1, "Usage: bench []"); + + p = grab_file(NULL, argv[2]); + if (!p) + err(1, "Reading %s", argv[2] ? argv[2] : ""); + len = tal_count(p) - 1; + start = time_now(); + if (sw) { + for (size_t i = 0; i < runs; i++) + sums += crc32c_sw(0, p, len); + } else if (hw) { + for (size_t i = 0; i < runs; i++) + sums += crc32c_hw(0, p, len); + } else { + for (size_t i = 0; i < runs; i++) + sums += crc32c(0, p, len); + } + end = time_now(); + + assert(sums % runs == 0); + printf("%u usec for %zu bytes, sum=%08x\n", + (int)time_to_usec(time_divide(time_between(end, start), runs)), + len, + (unsigned int)(sums / runs)); + return 0; +} diff --git a/ccan/crc32c/crc32c.c b/ccan/crc32c/crc32c.c new file mode 100644 index 00000000..9eaec79b --- /dev/null +++ b/ccan/crc32c/crc32c.c @@ -0,0 +1,415 @@ +/* MIT (BSD) license - see LICENSE file for details */ +/* crc32c.c -- compute CRC-32C using the Intel crc32 instruction + * Copyright (C) 2013, 2015 Mark Adler + * Version 1.3 31 Dec 2015 Mark Adler + */ + +/* + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler + madler@alumni.caltech.edu + */ + +/* Use hardware CRC instruction on Intel SSE 4.2 processors. This computes a + CRC-32C, *not* the CRC-32 used by Ethernet and zip, gzip, etc. A software + version is provided as a fall-back, as well as for speed comparisons. */ + +/* Version history: + 1.0 10 Feb 2013 First version + 1.1 1 Aug 2013 Correct comments on why three crc instructions in parallel + 1.2 1 Nov 2015 Add const qualifier to avoid compiler warning + Load entire input into memory (test code) + Argument gives number of times to repeat (test code) + Argument < 0 forces software implementation (test code) + 1.3 31 Dec 2015 Check for Intel architecture using compiler macro + Support big-endian processors in software calculation + Add header for external use + */ + +#include "crc32c.h" +#include +#include + +static uint32_t crc32c_sw(uint32_t crc, void const *buf, size_t len); + +/* CRC-32C (iSCSI) polynomial in reversed bit order. */ +#define POLY 0x82f63b78 + +#ifdef __x86_64__ + +/* Hardware CRC-32C for Intel and compatible processors. */ + +/* Multiply a matrix times a vector over the Galois field of two elements, + GF(2). Each element is a bit in an unsigned integer. mat must have at + least as many entries as the power of two for most significant one bit in + vec. */ +static inline uint32_t gf2_matrix_times(uint32_t *mat, uint32_t vec) { + uint32_t sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* Multiply a matrix by itself over GF(2). Both mat and square must have 32 + rows. */ +static inline void gf2_matrix_square(uint32_t *square, uint32_t *mat) { + for (unsigned n = 0; n < 32; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* Construct an operator to apply len zeros to a crc. len must be a power of + two. If len is not a power of two, then the result is the same as for the + largest power of two less than len. The result for len == 0 is the same as + for len == 1. A version of this routine could be easily written for any + len, but that is not needed for this application. */ +static void crc32c_zeros_op(uint32_t *even, size_t len) { + uint32_t odd[32]; /* odd-power-of-two zeros operator */ + + /* put operator for one zero bit in odd */ + odd[0] = POLY; /* CRC-32C polynomial */ + uint32_t row = 1; + for (unsigned n = 1; n < 32; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* first square will put the operator for one zero byte (eight zero bits), + in even -- next square puts operator for two zero bytes in odd, and so + on, until len has been rotated down to zero */ + do { + gf2_matrix_square(even, odd); + len >>= 1; + if (len == 0) + return; + gf2_matrix_square(odd, even); + len >>= 1; + } while (len); + + /* answer ended up in odd -- copy to even */ + for (unsigned n = 0; n < 32; n++) + even[n] = odd[n]; +} + +/* Take a length and build four lookup tables for applying the zeros operator + for that length, byte-by-byte on the operand. */ +static void crc32c_zeros(uint32_t zeros[][256], size_t len) { + uint32_t op[32]; + + crc32c_zeros_op(op, len); + for (unsigned n = 0; n < 256; n++) { + zeros[0][n] = gf2_matrix_times(op, n); + zeros[1][n] = gf2_matrix_times(op, n << 8); + zeros[2][n] = gf2_matrix_times(op, n << 16); + zeros[3][n] = gf2_matrix_times(op, n << 24); + } +} + +/* Apply the zeros operator table to crc. */ +static inline uint32_t crc32c_shift(uint32_t zeros[][256], uint32_t crc) { + return zeros[0][crc & 0xff] ^ zeros[1][(crc >> 8) & 0xff] ^ + zeros[2][(crc >> 16) & 0xff] ^ zeros[3][crc >> 24]; +} + +/* Block sizes for three-way parallel crc computation. LONG and SHORT must + both be powers of two. The associated string constants must be set + accordingly, for use in constructing the assembler instructions. */ +#define LONG 8192 +#define LONGx1 "8192" +#define LONGx2 "16384" +#define SHORT 256 +#define SHORTx1 "256" +#define SHORTx2 "512" + +/* Tables for hardware crc that shift a crc by LONG and SHORT zeros. */ +static bool crc32c_once_hw; +static uint32_t crc32c_long[4][256]; +static uint32_t crc32c_short[4][256]; + +/* Initialize tables for shifting crcs. */ +static void crc32c_init_hw(void) { + crc32c_once_hw = true; + crc32c_zeros(crc32c_long, LONG); + crc32c_zeros(crc32c_short, SHORT); +} + +/* Compute CRC-32C using the Intel hardware instruction. */ +static uint32_t crc32c_hw(uint32_t crc, void const *buf, size_t len) { + /* populate shift tables the first time through */ + if (!crc32c_once_hw) + crc32c_init_hw(); + + /* pre-process the crc */ + crc = ~crc; + uint64_t crc0 = crc; /* 64-bits for crc32q instruction */ + + /* compute the crc for up to seven leading bytes to bring the data pointer + to an eight-byte boundary */ + unsigned char const *next = buf; + while (len && ((uintptr_t)next & 7) != 0) { + __asm__("crc32b\t" "(%1), %0" + : "=r"(crc0) + : "r"(next), "0"(crc0)); + next++; + len--; + } + + /* compute the crc on sets of LONG*3 bytes, executing three independent crc + instructions, each on LONG bytes -- this is optimized for the Nehalem, + Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a + throughput of one crc per cycle, but a latency of three cycles */ + while (len >= LONG*3) { + uint64_t crc1 = 0; + uint64_t crc2 = 0; + unsigned char const * const end = next + LONG; + do { + __asm__("crc32q\t" "(%3), %0\n\t" + "crc32q\t" LONGx1 "(%3), %1\n\t" + "crc32q\t" LONGx2 "(%3), %2" + : "=r"(crc0), "=r"(crc1), "=r"(crc2) + : "r"(next), "0"(crc0), "1"(crc1), "2"(crc2)); + next += 8; + } while (next < end); + crc0 = crc32c_shift(crc32c_long, crc0) ^ crc1; + crc0 = crc32c_shift(crc32c_long, crc0) ^ crc2; + next += LONG*2; + len -= LONG*3; + } + + /* do the same thing, but now on SHORT*3 blocks for the remaining data less + than a LONG*3 block */ + while (len >= SHORT*3) { + uint64_t crc1 = 0; + uint64_t crc2 = 0; + unsigned char const * const end = next + SHORT; + do { + __asm__("crc32q\t" "(%3), %0\n\t" + "crc32q\t" SHORTx1 "(%3), %1\n\t" + "crc32q\t" SHORTx2 "(%3), %2" + : "=r"(crc0), "=r"(crc1), "=r"(crc2) + : "r"(next), "0"(crc0), "1"(crc1), "2"(crc2)); + next += 8; + } while (next < end); + crc0 = crc32c_shift(crc32c_short, crc0) ^ crc1; + crc0 = crc32c_shift(crc32c_short, crc0) ^ crc2; + next += SHORT*2; + len -= SHORT*3; + } + + /* compute the crc on the remaining eight-byte units less than a SHORT*3 + block */ + { + unsigned char const * const end = next + (len - (len & 7)); + while (next < end) { + __asm__("crc32q\t" "(%1), %0" + : "=r"(crc0) + : "r"(next), "0"(crc0)); + next += 8; + } + len &= 7; + } + + /* compute the crc for up to seven trailing bytes */ + while (len) { + __asm__("crc32b\t" "(%1), %0" + : "=r"(crc0) + : "r"(next), "0"(crc0)); + next++; + len--; + } + + /* return a post-processed crc */ + return ~crc0; +} + +/* Compute a CRC-32C. If the crc32 instruction is available, use the hardware + version. Otherwise, use the software version. */ +uint32_t crc32c(uint32_t crc, void const *buf, size_t len) { + + return cpu_supports("sse4.2") ? crc32c_hw(crc, buf, len) : crc32c_sw(crc, buf, len); +} + +#else /* !__x86_64__ */ + +uint32_t crc32c(uint32_t crc, void const *buf, size_t len) { + return crc32c_sw(crc, buf, len); +} + +#endif + +/* Construct table for software CRC-32C little-endian calculation. */ +static bool crc32c_once_little; +static uint32_t crc32c_table_little[8][256]; +static void crc32c_init_sw_little(void) { + for (unsigned n = 0; n < 256; n++) { + uint32_t crc = n; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc32c_table_little[0][n] = crc; + } + for (unsigned n = 0; n < 256; n++) { + uint32_t crc = crc32c_table_little[0][n]; + for (unsigned k = 1; k < 8; k++) { + crc = crc32c_table_little[0][crc & 0xff] ^ (crc >> 8); + crc32c_table_little[k][n] = crc; + } + } + crc32c_once_little = true; +} + +/* Compute a CRC-32C in software assuming a little-endian architecture, + constructing the required table if that hasn't already been done. */ +static uint32_t crc32c_sw_little(uint32_t crc, void const *buf, size_t len) { + unsigned char const *next = buf; + + if (!crc32c_once_little) + crc32c_init_sw_little(); + crc = ~crc; + while (len && ((uintptr_t)next & 7) != 0) { + crc = crc32c_table_little[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + if (len >= 8) { + uint64_t crcw = crc; + do { + crcw ^= *(uint64_t const *)next; + crcw = crc32c_table_little[7][crcw & 0xff] ^ + crc32c_table_little[6][(crcw >> 8) & 0xff] ^ + crc32c_table_little[5][(crcw >> 16) & 0xff] ^ + crc32c_table_little[4][(crcw >> 24) & 0xff] ^ + crc32c_table_little[3][(crcw >> 32) & 0xff] ^ + crc32c_table_little[2][(crcw >> 40) & 0xff] ^ + crc32c_table_little[1][(crcw >> 48) & 0xff] ^ + crc32c_table_little[0][crcw >> 56]; + next += 8; + len -= 8; + } while (len >= 8); + crc = crcw; + } + while (len) { + crc = crc32c_table_little[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + return ~crc; +} + +/* Swap the bytes in a uint64_t. (Only for big-endian.) */ +#if defined(__has_builtin) || (defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) +# define swap __builtin_bswap64 +#else +static inline uint64_t swap(uint64_t x) { + x = ((x << 8) & 0xff00ff00ff00ff00) | ((x >> 8) & 0xff00ff00ff00ff); + x = ((x << 16) & 0xffff0000ffff0000) | ((x >> 16) & 0xffff0000ffff); + return (x << 32) | (x >> 32); +} +#endif + +/* Construct tables for software CRC-32C big-endian calculation. */ +static bool crc32c_once_big; +static uint32_t crc32c_table_big_byte[256]; +static uint64_t crc32c_table_big[8][256]; +static void crc32c_init_sw_big(void) { + for (unsigned n = 0; n < 256; n++) { + uint32_t crc = n; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + crc32c_table_big_byte[n] = crc; + } + for (unsigned n = 0; n < 256; n++) { + uint32_t crc = crc32c_table_big_byte[n]; + crc32c_table_big[0][n] = swap(crc); + for (unsigned k = 1; k < 8; k++) { + crc = crc32c_table_big_byte[crc & 0xff] ^ (crc >> 8); + crc32c_table_big[k][n] = swap(crc); + } + } + crc32c_once_big = true; +} + +/* Compute a CRC-32C in software assuming a big-endian architecture, + constructing the required tables if that hasn't already been done. */ +static uint32_t crc32c_sw_big(uint32_t crc, void const *buf, size_t len) { + unsigned char const *next = buf; + + if (!crc32c_once_big) + crc32c_init_sw_big(); + crc = ~crc; + while (len && ((uintptr_t)next & 7) != 0) { + crc = crc32c_table_big_byte[(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + if (len >= 8) { + uint64_t crcw = swap(crc); + do { + crcw ^= *(uint64_t const *)next; + crcw = crc32c_table_big[0][crcw & 0xff] ^ + crc32c_table_big[1][(crcw >> 8) & 0xff] ^ + crc32c_table_big[2][(crcw >> 16) & 0xff] ^ + crc32c_table_big[3][(crcw >> 24) & 0xff] ^ + crc32c_table_big[4][(crcw >> 32) & 0xff] ^ + crc32c_table_big[5][(crcw >> 40) & 0xff] ^ + crc32c_table_big[6][(crcw >> 48) & 0xff] ^ + crc32c_table_big[7][(crcw >> 56)]; + next += 8; + len -= 8; + } while (len >= 8); + crc = swap(crcw); + } + while (len) { + crc = crc32c_table_big_byte[(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + return ~crc; +} + +/* Table-driven software CRC-32C. This is about 15 times slower than using the + hardware instructions. Determine the endianess of the processor and proceed + accordingly. Ideally the endianess will be determined at compile time, in + which case the unused functions and tables for the other endianess will be + removed by the optimizer. If not, then the proper routines and tables will + be used, even if the endianess is changed mid-stream. (Yes, there are + processors that permit that -- go figure.) */ +static uint32_t crc32c_sw(uint32_t crc, void const *buf, size_t len) { + static int const little = 1; + if (*(char const *)&little) + return crc32c_sw_little(crc, buf, len); + else + return crc32c_sw_big(crc, buf, len); +} diff --git a/ccan/crc32c/crc32c.h b/ccan/crc32c/crc32c.h new file mode 100644 index 00000000..49e3337f --- /dev/null +++ b/ccan/crc32c/crc32c.h @@ -0,0 +1,48 @@ +/* Licensed under MIT - see LICENSE file for details */ +#ifndef CCAN_CRC32C_H +#define CCAN_CRC32C_H +#include +#include + +/** + * crc32c - Castagnoli 32 bit crc of string of bytes + * @start_crc: the initial crc (usually 0) + * @buf: pointer to bytes + * @size: length of buffer + * + * If you don't know what crc32 to use, use this one: it's the best. + * + * @Article{castagnoli-crc, + * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman}, + * title = {{Optimization of Cyclic Redundancy-Check Codes with 24 + * and 32 Parity Bits}}, + * journal = IEEE Transactions on Communication, + * year = {1993}, + * volume = {41}, + * number = {6}, + * pages = {}, + * month = {June}, + *} + * 32 bit CRC checksum using polynomial + * X^32+X^28+X^27+X^26+X^25+X^23+X^22+X^20+X^19+X^18+X^14+X^13+X^11+X^10+X^9+X^8+X^6+X^0. + * + * You can calculate the CRC of non-contiguous arrays by passing @start_crc + * as 0 the first time, and the current crc result from then on. + * + * Example: + * #include + * ... + * // Check that iovec has the crc we expect (Castagnoli version) + * static bool check_crc(uint32_t expected, const struct iovec *iov, int l) + * { + * uint32_t crc = 0; + * while (l >= 0) { + * crc = crc32c(crc, iov->iov_base, iov->iov_len); + * iov++; + * } + * return crc == expected; + * } + */ +uint32_t crc32c(uint32_t start_crc, const void *buf, size_t size); + +#endif /* CCAN_CRC32C_H */ diff --git a/ccan/crc32c/test/api-crc32c.c b/ccan/crc32c/test/api-crc32c.c new file mode 100644 index 00000000..681a2f2d --- /dev/null +++ b/ccan/crc32c/test/api-crc32c.c @@ -0,0 +1,109 @@ +/* Test vectors from https://tools.ietf.org/html/rfc3720#appendix-B.4 */ +#include +#include +#include + +#define BSWAP_32(val) \ + ((((uint32_t)(val) & 0x000000ff) << 24) \ + | (((uint32_t)(val) & 0x0000ff00) << 8) \ + | (((uint32_t)(val) & 0x00ff0000) >> 8) \ + | (((uint32_t)(val) & 0xff000000) >> 24)) + +#if HAVE_LITTLE_ENDIAN +#define BE32_TO_CPU(le_val) BSWAP_32((uint32_t)le_val) +#else +#define BE32_TO_CPU(le_val) ((uint32_t)(le_val)) +#endif + +int main(void) +{ + unsigned char m[48]; + + plan_tests(5); + + /* 32 bytes of zeroes: + + Byte: 0 1 2 3 + + 0: 00 00 00 00 + ... + 28: 00 00 00 00 + + CRC: aa 36 91 8a + */ + + memset(m, 0, 32); + ok1(crc32c(0, m, 32) == BE32_TO_CPU(0xaa36918a)); + + /* 32 bytes of ones: + + Byte: 0 1 2 3 + + 0: ff ff ff ff + ... + 28: ff ff ff ff + + CRC: 43 ab a8 62 + */ + memset(m, 0xff, 32); + ok1(crc32c(0, m, 32) == BE32_TO_CPU(0x43aba862)); + + /* 32 bytes of incrementing 00..1f: + + Byte: 0 1 2 3 + + 0: 00 01 02 03 + ... + 28: 1c 1d 1e 1f + + CRC: 4e 79 dd 46 + */ + for (size_t i = 0; i < 32; i++) + m[i] = i; + ok1(crc32c(0, m, 32) == BE32_TO_CPU(0x4e79dd46)); + + /* 32 bytes of decrementing 1f..00: + + Byte: 0 1 2 3 + + 0: 1f 1e 1d 1c + ... + 28: 03 02 01 00 + + CRC: 5c db 3f 11 + */ + for (size_t i = 0; i < 32; i++) + m[i] = 31 - i; + ok1(crc32c(0, m, 32) == BE32_TO_CPU(0x5cdb3f11)); + + /* An iSCSI - SCSI Read (10) Command PDU + Byte: 0 1 2 3 + + 0: 01 c0 00 00 + 4: 00 00 00 00 + 8: 00 00 00 00 + 12: 00 00 00 00 + 16: 14 00 00 00 + 20: 00 00 04 00 + 24: 00 00 00 14 + 28: 00 00 00 18 + 32: 28 00 00 00 + 36: 00 00 00 00 + 40: 02 00 00 00 + 44: 00 00 00 00 + + CRC: 56 3a 96 d9 + */ + memset(m, 0, sizeof(m)); + m[0] = 0x01; + m[1] = 0xc0; + m[16] = 0x14; + m[22] = 0x04; + m[27] = 0x14; + m[31] = 0x18; + m[32] = 0x28; + m[40] = 0x02; + ok1(crc32c(0, m, sizeof(m)) == BE32_TO_CPU(0x563a96d9)); + + return exit_status(); +} diff --git a/ccan/crc32c/test/run-crc32c.c b/ccan/crc32c/test/run-crc32c.c new file mode 100644 index 00000000..304a3d2f --- /dev/null +++ b/ccan/crc32c/test/run-crc32c.c @@ -0,0 +1,111 @@ +/* Test vectors from https://tools.ietf.org/html/rfc3720#appendix-B.4 */ + +/* Get access to sw version explicitly */ +#include +#include +#include + +#define BSWAP_32(val) \ + ((((uint32_t)(val) & 0x000000ff) << 24) \ + | (((uint32_t)(val) & 0x0000ff00) << 8) \ + | (((uint32_t)(val) & 0x00ff0000) >> 8) \ + | (((uint32_t)(val) & 0xff000000) >> 24)) + +#if HAVE_LITTLE_ENDIAN +#define BE32_TO_CPU(le_val) BSWAP_32((uint32_t)le_val) +#else +#define BE32_TO_CPU(le_val) ((uint32_t)(le_val)) +#endif + +int main(void) +{ + unsigned char m[48]; + + plan_tests(5); + + /* 32 bytes of zeroes: + + Byte: 0 1 2 3 + + 0: 00 00 00 00 + ... + 28: 00 00 00 00 + + CRC: aa 36 91 8a + */ + + memset(m, 0, 32); + ok1(crc32c_sw(0, m, 32) == BE32_TO_CPU(0xaa36918a)); + + /* 32 bytes of ones: + + Byte: 0 1 2 3 + + 0: ff ff ff ff + ... + 28: ff ff ff ff + + CRC: 43 ab a8 62 + */ + memset(m, 0xff, 32); + ok1(crc32c_sw(0, m, 32) == BE32_TO_CPU(0x43aba862)); + + /* 32 bytes of incrementing 00..1f: + + Byte: 0 1 2 3 + + 0: 00 01 02 03 + ... + 28: 1c 1d 1e 1f + + CRC: 4e 79 dd 46 + */ + for (size_t i = 0; i < 32; i++) + m[i] = i; + ok1(crc32c_sw(0, m, 32) == BE32_TO_CPU(0x4e79dd46)); + + /* 32 bytes of decrementing 1f..00: + + Byte: 0 1 2 3 + + 0: 1f 1e 1d 1c + ... + 28: 03 02 01 00 + + CRC: 5c db 3f 11 + */ + for (size_t i = 0; i < 32; i++) + m[i] = 31 - i; + ok1(crc32c_sw(0, m, 32) == BE32_TO_CPU(0x5cdb3f11)); + + /* An iSCSI - SCSI Read (10) Command PDU + Byte: 0 1 2 3 + + 0: 01 c0 00 00 + 4: 00 00 00 00 + 8: 00 00 00 00 + 12: 00 00 00 00 + 16: 14 00 00 00 + 20: 00 00 04 00 + 24: 00 00 00 14 + 28: 00 00 00 18 + 32: 28 00 00 00 + 36: 00 00 00 00 + 40: 02 00 00 00 + 44: 00 00 00 00 + + CRC: 56 3a 96 d9 + */ + memset(m, 0, sizeof(m)); + m[0] = 0x01; + m[1] = 0xc0; + m[16] = 0x14; + m[22] = 0x04; + m[27] = 0x14; + m[31] = 0x18; + m[32] = 0x28; + m[40] = 0x02; + ok1(crc32c_sw(0, m, sizeof(m)) == BE32_TO_CPU(0x563a96d9)); + + return exit_status(); +} -- 2.39.2