X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcrcsync%2Fcrcsync.c;h=b6eb65b007fb891339689ad297f9f03ae0709cfd;hp=6c251f1ff967d1c1a49a9a42592f50926e1223c0;hb=2a1c08c3e86ac3a9750bfb6f77c236356a2a3f3d;hpb=f5129b0c08976fbaa07d3d610de40677e81ad344;ds=sidebyside diff --git a/ccan/crcsync/crcsync.c b/ccan/crcsync/crcsync.c index 6c251f1f..b6eb65b0 100644 --- a/ccan/crcsync/crcsync.c +++ b/ccan/crcsync/crcsync.c @@ -2,6 +2,7 @@ #include #include #include +#include void crc_of_blocks(const void *data, size_t len, unsigned int block_size, unsigned int crcbits, uint32_t crc[]) @@ -33,9 +34,14 @@ struct crc_context { size_t total_bytes; int have_match; + /* Final block is special (if a different size) */ + size_t tail_size; + uint32_t tail_crc; + /* Uncrc tab. */ uint32_t uncrc_tab[256]; + /* This doesn't count the last CRC. */ unsigned int num_crcs; uint32_t crc[]; }; @@ -59,13 +65,22 @@ static void init_uncrc_tab(uint32_t uncrc_tab[], unsigned int wsize) } struct crc_context *crc_context_new(size_t block_size, unsigned crcbits, - const uint32_t crc[], unsigned num_crcs) + const uint32_t crc[], unsigned num_crcs, + size_t tail_size) { struct crc_context *ctx; + assert(num_crcs > 0); + assert(block_size > 0); + assert(tail_size < block_size); + ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs); if (ctx) { ctx->block_size = block_size; + ctx->tail_size = tail_size; + if (tail_size) + ctx->tail_crc = crc[--num_crcs]; + /* Technically, 1 << 32 is undefined. */ if (crcbits >= 32) ctx->crcmask = 0xFFFFFFFF; @@ -103,6 +118,14 @@ static int crc_matches(const struct crc_context *ctx) return -1; } +static bool tail_matches(const struct crc_context *ctx) +{ + if (ctx->literal_bytes != ctx->tail_size) + return false; + + return (ctx->running_crc & ctx->crcmask) == ctx->tail_crc; +} + static uint32_t crc_add_byte(uint32_t crc, uint8_t newbyte) { return crc32c(crc, &newbyte, 1); @@ -138,40 +161,45 @@ size_t crc_read_block(struct crc_context *ctx, long *result, goto have_match; } + /* old is the trailing edge of the checksum window. */ if (buffer_size(ctx) >= ctx->block_size) old = ctx->buffer + ctx->buffer_start; else old = NULL; - while (!old || (crcmatch = crc_matches(ctx)) < 0) { + while (ctx->literal_bytes < ctx->block_size + || (crcmatch = crc_matches(ctx)) < 0) { if (consumed == buflen) break; + /* Increment these now in case we hit goto: below. */ + ctx->literal_bytes++; + ctx->total_bytes++; + consumed++; + /* Update crc. */ if (old) { ctx->running_crc = crc_roll(ctx->running_crc, *old, *p, ctx->uncrc_tab); old++; + /* End of stored buffer? Start on data they gave us. */ if (old == ctx->buffer + ctx->buffer_end) old = buf; } else { ctx->running_crc = crc_add_byte(ctx->running_crc, *p); if (p == (uint8_t *)buf + ctx->block_size - 1) old = buf; + /* We don't roll this csum, we only look for it after + * a block match. It's simpler and faster. */ + if (tail_matches(ctx)) { + crcmatch = ctx->num_crcs; + goto have_match; + } } - - ctx->literal_bytes++; - ctx->total_bytes++; - consumed++; p++; } - /* Make sure we have a copy of the last block_size bytes. - * First, copy down the old data. */ - if (buffer_size(ctx)) { - } - if (crcmatch >= 0) { /* We have a match! */ if (ctx->literal_bytes > ctx->block_size) { @@ -183,16 +211,26 @@ size_t crc_read_block(struct crc_context *ctx, long *result, } else { have_match: *result = -crcmatch-1; - ctx->literal_bytes -= ctx->block_size; - assert(ctx->literal_bytes == 0); + if (crcmatch == ctx->num_crcs) + assert(ctx->literal_bytes == ctx->tail_size); + else + assert(ctx->literal_bytes == ctx->block_size); + ctx->literal_bytes = 0; ctx->have_match = -1; ctx->running_crc = 0; + /* Nothing more in the buffer. */ + ctx->buffer_start = ctx->buffer_end = 0; } } else { /* Output literal if it's more than 1 block ago. */ if (ctx->literal_bytes > ctx->block_size) { *result = ctx->literal_bytes - ctx->block_size; - ctx->literal_bytes = ctx->block_size; + ctx->literal_bytes -= *result; + /* Advance buffer. */ + if (*result >= buffer_size(ctx)) + ctx->buffer_start = ctx->buffer_end = 0; + else + ctx->buffer_start += *result; } else *result = 0; @@ -206,66 +244,36 @@ size_t crc_read_block(struct crc_context *ctx, long *result, ctx->buffer_end -= ctx->buffer_start; ctx->buffer_start = 0; } - memcpy(ctx->buffer + ctx->buffer_end, buf, len); + + /* Copy len bytes from tail of buffer. */ + memcpy(ctx->buffer + ctx->buffer_end, buf + buflen - len, len); ctx->buffer_end += len; assert(buffer_size(ctx) <= ctx->block_size); } return consumed; } -/* We could try many techniques to match the final block. We can - * simply try to checksum whatever's left at the end and see if it - * matches the final block checksum. This works for the exact-match - * case. - * - * We can do slightly better than this: if we try to match the checksum - * on every block (starting with block_size 1) from where we end to EOF, - * we can capture the "data appended" case as well. - */ -static size_t final_block_match(struct crc_context *ctx) -{ - size_t size; - uint32_t crc; - - if (ctx->num_crcs == 0) - return 0; - - crc = 0; - for (size = 0; size < buffer_size(ctx); size++) { - const uint8_t *p = ctx->buffer; - crc = crc_add_byte(crc, p[ctx->buffer_start+size]); - if ((crc & ctx->crcmask) == ctx->crc[ctx->num_crcs-1]) - return size+1; - } - return 0; -} - long crc_read_flush(struct crc_context *ctx) { long ret; - /* In case we ended on a whole block match. */ - if (ctx->have_match == -1) { - size_t final; - - final = final_block_match(ctx); - if (!final) { - /* This is how many bytes we're about to consume. */ - ret = buffer_size(ctx); - ctx->buffer_start += ret; - ctx->literal_bytes -= ret; - - return ret; - } - ctx->buffer_start += final; - ctx->literal_bytes -= final; - ctx->have_match = ctx->num_crcs-1; + /* We might have ended right on a matched block. */ + if (ctx->have_match != -1) { + ctx->literal_bytes -= ctx->block_size; + assert(ctx->literal_bytes == 0); + ret = -ctx->have_match-1; + ctx->have_match = -1; + ctx->running_crc = 0; + /* Nothing more in the buffer. */ + ctx->buffer_start = ctx->buffer_end; + return ret; } - /* It might be a partial block match, so no assert */ + /* The rest is just a literal. */ + ret = buffer_size(ctx); + assert(ctx->literal_bytes == ret); + ctx->buffer_start = ctx->buffer_end = 0; ctx->literal_bytes = 0; - ret = -ctx->have_match-1; - ctx->have_match = -1; return ret; }