X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcrcsync%2Fcrcsync.c;h=6e87dac43ec8d0a58d65847967a1a0abd737b46b;hp=cb768806f6e03e829293f624ea06db540fe02e32;hb=56023cca5f66a40646a1e807c3d10af6e5913623;hpb=94ff9fc87d90173145772a41f79987988f9f0972 diff --git a/ccan/crcsync/crcsync.c b/ccan/crcsync/crcsync.c index cb768806..6e87dac4 100644 --- a/ccan/crcsync/crcsync.c +++ b/ccan/crcsync/crcsync.c @@ -1,76 +1,95 @@ +/* Licensed under LGPLv2.1+ - see LICENSE file for details */ #include "crcsync.h" #include #include #include +#include + +/* FIXME: That 64-bit CRC takes a while to warm the lower bits. Do + * some quantitative tests and replace it? Meanwhile, use upper bits. */ +static uint64_t mask_of(unsigned int crcbits) +{ + return -1ULL << (64 - crcbits); +} void crc_of_blocks(const void *data, size_t len, unsigned int block_size, - unsigned int crcbits, uint32_t crc[]) + unsigned int crcbits, uint64_t crc[]) { unsigned int i; const uint8_t *buf = data; - uint32_t crcmask = crcbits < 32 ? (1 << crcbits) - 1 : 0xFFFFFFFF; + uint64_t crcmask = mask_of(crcbits); for (i = 0; len >= block_size; i++) { - crc[i] = (crc32c(0, buf, block_size) & crcmask); + crc[i] = (crc64_iso(0, buf, block_size) & crcmask); buf += block_size; len -= block_size; } if (len) - crc[i] = (crc32c(0, buf, len) & crcmask); + crc[i] = (crc64_iso(0, buf, len) & crcmask); } struct crc_context { size_t block_size; - uint32_t crcmask; + uint64_t crcmask; /* Saved old buffer bytes (block_size bytes). */ void *buffer; size_t buffer_start, buffer_end; /* Progress so far. */ - uint32_t running_crc; + uint64_t running_crc; size_t literal_bytes; size_t total_bytes; int have_match; + /* Final block is special (if a different size) */ + size_t tail_size; + uint64_t tail_crc; + /* Uncrc tab. */ - uint32_t uncrc_tab[256]; + uint64_t uncrc_tab[256]; + /* This doesn't count the last CRC. */ unsigned int num_crcs; - uint32_t crc[]; + uint64_t crc[]; }; /* Calculate the how the crc changes when we take a give char out of the * crc'd area. */ -static void init_uncrc_tab(uint32_t uncrc_tab[], unsigned int wsize) +static void init_uncrc_tab(uint64_t uncrc_tab[], unsigned int wsize) { unsigned int i; - uint32_t part_crc; + uint64_t part_crc; uint8_t buffer[wsize]; /* Calculate crc(buffer+1, wsize-1) once, since it doesn't change. */ memset(buffer, 0, wsize); - part_crc = crc32c(0, buffer+1, wsize-1); + part_crc = crc64_iso(0, buffer+1, wsize-1); for (i = 0; i < 256; i++) { buffer[0] = i; - uncrc_tab[i] = (crc32c(0, buffer, wsize) ^ part_crc); + uncrc_tab[i] = (crc64_iso(0, buffer, wsize) ^ part_crc); } } struct crc_context *crc_context_new(size_t block_size, unsigned crcbits, - const uint32_t crc[], unsigned num_crcs) + const uint64_t crc[], unsigned num_crcs, + size_t tail_size) { struct crc_context *ctx; + assert(num_crcs > 0); + assert(block_size > 0); + assert(tail_size < block_size); + ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs); if (ctx) { ctx->block_size = block_size; - /* Technically, 1 << 32 is undefined. */ - if (crcbits >= 32) - ctx->crcmask = 0xFFFFFFFF; - else - ctx->crcmask = (1 << crcbits)-1; + ctx->tail_size = tail_size; + if (tail_size) + ctx->tail_crc = crc[--num_crcs]; + + ctx->crcmask = mask_of(crcbits); ctx->num_crcs = num_crcs; memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs); ctx->buffer_end = 0; @@ -103,19 +122,27 @@ static int crc_matches(const struct crc_context *ctx) return -1; } -static uint32_t crc_add_byte(uint32_t crc, uint8_t newbyte) +static bool tail_matches(const struct crc_context *ctx) +{ + if (ctx->literal_bytes != ctx->tail_size) + return false; + + return (ctx->running_crc & ctx->crcmask) == ctx->tail_crc; +} + +static uint64_t crc_add_byte(uint64_t crc, uint8_t newbyte) { - return crc32c(crc, &newbyte, 1); + return crc64_iso(crc, &newbyte, 1); } -static uint32_t crc_remove_byte(uint32_t crc, uint8_t oldbyte, - const uint32_t uncrc_tab[]) +static uint64_t crc_remove_byte(uint64_t crc, uint8_t oldbyte, + const uint64_t uncrc_tab[]) { return crc ^ uncrc_tab[oldbyte]; } -static uint32_t crc_roll(uint32_t crc, uint8_t oldbyte, uint8_t newbyte, - const uint32_t uncrc_tab[]) +static uint64_t crc_roll(uint64_t crc, uint8_t oldbyte, uint8_t newbyte, + const uint64_t uncrc_tab[]) { return crc_add_byte(crc_remove_byte(crc, oldbyte, uncrc_tab), newbyte); } @@ -138,44 +165,45 @@ size_t crc_read_block(struct crc_context *ctx, long *result, goto have_match; } + /* old is the trailing edge of the checksum window. */ if (buffer_size(ctx) >= ctx->block_size) - old = ctx->buffer + ctx->buffer_start; + old = (uint8_t *)ctx->buffer + ctx->buffer_start; else old = NULL; - while (!old || (crcmatch = crc_matches(ctx)) < 0) { + while (ctx->literal_bytes < ctx->block_size + || (crcmatch = crc_matches(ctx)) < 0) { if (consumed == buflen) break; + /* Increment these now in case we hit goto: below. */ + ctx->literal_bytes++; + ctx->total_bytes++; + consumed++; + /* Update crc. */ if (old) { ctx->running_crc = crc_roll(ctx->running_crc, *old, *p, ctx->uncrc_tab); old++; - if (old == ctx->buffer + ctx->buffer_end) + /* End of stored buffer? Start on data they gave us. */ + if (old == (uint8_t *)ctx->buffer + ctx->buffer_end) old = buf; } else { ctx->running_crc = crc_add_byte(ctx->running_crc, *p); if (p == (uint8_t *)buf + ctx->block_size - 1) old = buf; + /* We don't roll this csum, we only look for it after + * a block match. It's simpler and faster. */ + if (tail_matches(ctx)) { + crcmatch = ctx->num_crcs; + goto have_match; + } } - - ctx->literal_bytes++; - ctx->total_bytes++; - consumed++; p++; } - /* Make sure we have a copy of the last block_size bytes. - * First, copy down the old data. */ - if (buffer_size(ctx)) { - memmove(ctx->buffer, ctx->buffer + ctx->buffer_start, - buffer_size(ctx)); - ctx->buffer_end -= ctx->buffer_start; - ctx->buffer_start = 0; - } - if (crcmatch >= 0) { /* We have a match! */ if (ctx->literal_bytes > ctx->block_size) { @@ -187,81 +215,71 @@ size_t crc_read_block(struct crc_context *ctx, long *result, } else { have_match: *result = -crcmatch-1; - ctx->literal_bytes -= ctx->block_size; - assert(ctx->literal_bytes == 0); + if (crcmatch == ctx->num_crcs) + assert(ctx->literal_bytes == ctx->tail_size); + else + assert(ctx->literal_bytes == ctx->block_size); + ctx->literal_bytes = 0; ctx->have_match = -1; ctx->running_crc = 0; + /* Nothing more in the buffer. */ + ctx->buffer_start = ctx->buffer_end = 0; } } else { /* Output literal if it's more than 1 block ago. */ if (ctx->literal_bytes > ctx->block_size) { *result = ctx->literal_bytes - ctx->block_size; - ctx->literal_bytes = ctx->block_size; + ctx->literal_bytes -= *result; + /* Advance buffer. */ + if (*result >= buffer_size(ctx)) + ctx->buffer_start = ctx->buffer_end = 0; + else + ctx->buffer_start += *result; } else *result = 0; /* Now save any literal bytes we'll need in future. */ len = ctx->literal_bytes - buffer_size(ctx); - memcpy(ctx->buffer + ctx->buffer_end, buf, len); + + /* Move down old data if we don't have room. */ + if (ctx->buffer_end + len > ctx->block_size) { + memmove(ctx->buffer, + (uint8_t *)ctx->buffer + ctx->buffer_start, + buffer_size(ctx)); + ctx->buffer_end -= ctx->buffer_start; + ctx->buffer_start = 0; + } + + /* Copy len bytes from tail of buffer. */ + memcpy((uint8_t *)ctx->buffer + ctx->buffer_end, + (const uint8_t *)buf + buflen - len, len); ctx->buffer_end += len; assert(buffer_size(ctx) <= ctx->block_size); } return consumed; } -/* We could try many techniques to match the final block. We can - * simply try to checksum whatever's left at the end and see if it - * matches the final block checksum. This works for the exact-match - * case. - * - * We can do slightly better than this: if we try to match the checksum - * on every block (starting with block_size 1) from where we end to EOF, - * we can capture the "data appended" case as well. - */ -static size_t final_block_match(struct crc_context *ctx) -{ - size_t size; - uint32_t crc; - - if (ctx->num_crcs == 0) - return 0; - - crc = 0; - for (size = 0; size < buffer_size(ctx); size++) { - const uint8_t *p = ctx->buffer; - crc = crc_add_byte(crc, p[ctx->buffer_start+size]); - if ((crc & ctx->crcmask) == ctx->crc[ctx->num_crcs-1]) - return size+1; - } - return 0; -} - long crc_read_flush(struct crc_context *ctx) { long ret; - /* In case we ended on a whole block match. */ - if (ctx->have_match == -1) { - size_t final; - - final = final_block_match(ctx); - if (!final) { - /* This is how many bytes we're about to consume. */ - ret = buffer_size(ctx); - ctx->buffer_start += ret; - ctx->literal_bytes -= ret; - - return ret; - } - ctx->buffer_start += final; - ctx->literal_bytes -= final; - ctx->have_match = ctx->num_crcs-1; + /* We might have ended right on a matched block. */ + if (ctx->have_match != -1) { + ctx->literal_bytes -= ctx->block_size; + assert(ctx->literal_bytes == 0); + ret = -ctx->have_match-1; + ctx->have_match = -1; + ctx->running_crc = 0; + /* Nothing more in the buffer. */ + ctx->buffer_start = ctx->buffer_end; + return ret; } - /* It might be a partial block match, so no assert */ + /* The rest is just a literal. */ + ret = buffer_size(ctx); + assert(ctx->literal_bytes == ret); + ctx->buffer_start = ctx->buffer_end = 0; ctx->literal_bytes = 0; - ret = -ctx->have_match-1; - ctx->have_match = -1; return ret; }