From c934a7a21d9ea8aa421d62a65a93f46c95adb535 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 4 Apr 2009 22:05:46 +1030 Subject: [PATCH] crcsync change: better detection of final block. --- ccan/crcsync/crcsync.c | 102 +++++++++++++++-------------- ccan/crcsync/crcsync.h | 4 +- ccan/crcsync/test/run-crash.c | 10 ++- ccan/crcsync/test/{api.c => run.c} | 11 +++- 4 files changed, 70 insertions(+), 57 deletions(-) rename ccan/crcsync/test/{api.c => run.c} (95%) diff --git a/ccan/crcsync/crcsync.c b/ccan/crcsync/crcsync.c index 2301bc10..459010a7 100644 --- a/ccan/crcsync/crcsync.c +++ b/ccan/crcsync/crcsync.c @@ -2,6 +2,7 @@ #include #include #include +#include void crc_of_blocks(const void *data, size_t len, unsigned int block_size, unsigned int crcbits, uint32_t crc[]) @@ -33,9 +34,14 @@ struct crc_context { size_t total_bytes; int have_match; + /* Final block is special (if a different size) */ + size_t final_size; + uint32_t final_crc; + /* Uncrc tab. */ uint32_t uncrc_tab[256]; + /* This doesn't count the last CRC. */ unsigned int num_crcs; uint32_t crc[]; }; @@ -59,13 +65,27 @@ static void init_uncrc_tab(uint32_t uncrc_tab[], unsigned int wsize) } struct crc_context *crc_context_new(size_t block_size, unsigned crcbits, - const uint32_t crc[], unsigned num_crcs) + const uint32_t crc[], unsigned num_crcs, + size_t final_size) { struct crc_context *ctx; + assert(num_crcs > 0); + assert(block_size > 0); + assert(final_size > 0); + assert(final_size <= block_size); + ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs); if (ctx) { ctx->block_size = block_size; + if (final_size != block_size) { + ctx->final_size = final_size; + ctx->final_crc = crc[--num_crcs]; + } else { + /* If this is 0, we never compare against it. */ + ctx->final_size = 0; + } + /* Technically, 1 << 32 is undefined. */ if (crcbits >= 32) ctx->crcmask = 0xFFFFFFFF; @@ -103,6 +123,14 @@ static int crc_matches(const struct crc_context *ctx) return -1; } +static bool final_matches(const struct crc_context *ctx) +{ + if (ctx->literal_bytes != ctx->final_size) + return false; + + return (ctx->running_crc & ctx->crcmask) == ctx->final_crc; +} + static uint32_t crc_add_byte(uint32_t crc, uint8_t newbyte) { return crc32c(crc, &newbyte, 1); @@ -144,10 +172,16 @@ size_t crc_read_block(struct crc_context *ctx, long *result, else old = NULL; - while (!old || (crcmatch = crc_matches(ctx)) < 0) { + while (ctx->literal_bytes < ctx->block_size + || (crcmatch = crc_matches(ctx)) < 0) { if (consumed == buflen) break; + /* Increment these now in case we hit goto: below. */ + ctx->literal_bytes++; + ctx->total_bytes++; + consumed++; + /* Update crc. */ if (old) { ctx->running_crc = crc_roll(ctx->running_crc, @@ -161,11 +195,13 @@ size_t crc_read_block(struct crc_context *ctx, long *result, ctx->running_crc = crc_add_byte(ctx->running_crc, *p); if (p == (uint8_t *)buf + ctx->block_size - 1) old = buf; + /* We don't roll this csum, we only look for it after + * a block match. It's simpler and faster. */ + if (final_matches(ctx)) { + crcmatch = ctx->num_crcs; + goto have_match; + } } - - ctx->literal_bytes++; - ctx->total_bytes++; - consumed++; p++; } @@ -180,8 +216,11 @@ size_t crc_read_block(struct crc_context *ctx, long *result, } else { have_match: *result = -crcmatch-1; - ctx->literal_bytes -= ctx->block_size; - assert(ctx->literal_bytes == 0); + if (crcmatch == ctx->num_crcs) + assert(ctx->literal_bytes == ctx->final_size); + else + assert(ctx->literal_bytes == ctx->block_size); + ctx->literal_bytes = 0; ctx->have_match = -1; ctx->running_crc = 0; /* Nothing more in the buffer. */ @@ -219,37 +258,9 @@ size_t crc_read_block(struct crc_context *ctx, long *result, return consumed; } -/* We could try many techniques to match the final block. We can - * simply try to checksum whatever's left at the end and see if it - * matches the final block checksum. This works for the exact-match - * case. - * - * We can do slightly better than this: if we try to match the checksum - * on every block (starting with block_size 1) from where we end to EOF, - * we can capture the "data appended" case as well. - */ -static size_t final_block_match(struct crc_context *ctx) -{ - size_t size; - uint32_t crc; - - if (ctx->num_crcs == 0) - return 0; - - crc = 0; - for (size = 0; size < buffer_size(ctx); size++) { - const uint8_t *p = ctx->buffer; - crc = crc_add_byte(crc, p[ctx->buffer_start+size]); - if ((crc & ctx->crcmask) == ctx->crc[ctx->num_crcs-1]) - return size+1; - } - return 0; -} - long crc_read_flush(struct crc_context *ctx) { long ret; - size_t final; /* We might have ended right on a matched block. */ if (ctx->have_match != -1) { @@ -263,20 +274,11 @@ long crc_read_flush(struct crc_context *ctx) return ret; } - /* Look for truncated final block. */ - final = final_block_match(ctx); - if (!final) { - /* Nope? Just a literal. */ - ret = buffer_size(ctx); - ctx->buffer_start += ret; - ctx->literal_bytes -= ret; - return ret; - } - - /* We matched (some of) what's left. */ - ret = -((int)ctx->num_crcs-1)-1; - ctx->buffer_start += final; - ctx->literal_bytes -= final; + /* The rest is just a literal. */ + ret = buffer_size(ctx); + assert(ctx->literal_bytes == ret); + ctx->buffer_start = ctx->buffer_end = 0; + ctx->literal_bytes = 0; return ret; } diff --git a/ccan/crcsync/crcsync.h b/ccan/crcsync/crcsync.h index 8ea33457..4b25562b 100644 --- a/ccan/crcsync/crcsync.h +++ b/ccan/crcsync/crcsync.h @@ -23,12 +23,14 @@ void crc_of_blocks(const void *data, size_t len, unsigned int blocksize, * @crcbits: the bits valid in the CRCs (<= 32) * @crc: array of block crcs * @num_crcs: number of block crcs + * @final_size: the final block size (<= blocksize). * * Returns an allocated pointer to the structure for crc_find_block, * or NULL. Makes a copy of @crc and @num_crcs. */ struct crc_context *crc_context_new(size_t blocksize, unsigned crcbits, - const uint32_t crc[], unsigned num_crcs); + const uint32_t crc[], unsigned num_crcs, + size_t final_size); /** * crc_read_block - search for block matches in the buffer. diff --git a/ccan/crcsync/test/run-crash.c b/ccan/crcsync/test/run-crash.c index c047cce2..7fb1df45 100644 --- a/ccan/crcsync/test/run-crash.c +++ b/ccan/crcsync/test/run-crash.c @@ -39,26 +39,30 @@ int main(int argc, char *argv[]) "pqr-a-very-long-test-that-differs-between-two-invokations-of-the-same-page-st" /* MATCH */ "uvwxy" "z ABC" "DEFGH" "IJKLM" "NOPQR" "STUVW" "XYZ 0" "12345" + "6789" /* NO MATCH */ - "6789ab"; + "ab"; int expected[] = { 4, -2, -3, 77, -5, -6, -7, -8, -9, -10, -11, -12, - 6 }; + -13, + 2 }; crc_info_t crc_info1; struct crc_context *crcctx; long result; size_t ndigested; size_t offset = 0; size_t len2 = strlen(data2); + size_t finalsize = strlen(data1) % BLOCKSIZE ?: BLOCKSIZE; int expected_i = 0; plan_tests(ARRAY_SIZE(expected) + 2); crcblocks(&crc_info1, data1, strlen(data1), BLOCKSIZE); - crcctx = crc_context_new(BLOCKSIZE, 30, crc_info1.crcs, crc_info1.block_count); + crcctx = crc_context_new(BLOCKSIZE, 30, crc_info1.crcs, crc_info1.block_count, + finalsize); while ( offset < len2) { ndigested = crc_read_block(crcctx, &result, data2+offset, len2 - offset); diff --git a/ccan/crcsync/test/api.c b/ccan/crcsync/test/run.c similarity index 95% rename from ccan/crcsync/test/api.c rename to ccan/crcsync/test/run.c index 2f07cb69..5f821b6c 100644 --- a/ccan/crcsync/test/api.c +++ b/ccan/crcsync/test/run.c @@ -1,4 +1,5 @@ #include "crcsync/crcsync.h" +#include "../crcsync.c" #include "tap/tap.h" #include #include @@ -64,14 +65,17 @@ static void test_sync(const char *buffer1, size_t len1, const struct result results[], size_t num_results) { struct crc_context *ctx; - size_t used, ret, i, curr_literal; + size_t used, ret, i, curr_literal, finalsize; long result; uint32_t crcs[num_blocks(len1, block_size)]; crc_of_blocks(buffer1, len1, block_size, 32, crcs); + finalsize = len1 % block_size ?: block_size; + /* Normal method. */ - ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs)); + ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs), + finalsize); curr_literal = 0; for (used = 0, i = 0; used < len2; used += ret) { @@ -89,7 +93,8 @@ static void test_sync(const char *buffer1, size_t len1, crc_context_free(ctx); /* Byte-at-a-time method. */ - ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs)); + ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs), + finalsize); curr_literal = 0; for (used = 0, i = 0; used < len2; used += ret) { -- 2.39.2