+/* Licensed under LGPLv2.1+ - see LICENSE file for details */
#include "crcsync.h"
#include <ccan/crc/crc.h>
#include <string.h>
#include <assert.h>
+#include <stdbool.h>
+
+/* FIXME: That 64-bit CRC takes a while to warm the lower bits. Do
+ * some quantitative tests and replace it? Meanwhile, use upper bits. */
+static uint64_t mask_of(unsigned int crcbits)
+{
+ return -1ULL << (64 - crcbits);
+}
void crc_of_blocks(const void *data, size_t len, unsigned int block_size,
- unsigned int crcbits, uint32_t crc[])
+ unsigned int crcbits, uint64_t crc[])
{
unsigned int i;
const uint8_t *buf = data;
- uint32_t crcmask = crcbits < 32 ? (1 << crcbits) - 1 : 0xFFFFFFFF;
+ uint64_t crcmask = mask_of(crcbits);
for (i = 0; len >= block_size; i++) {
- crc[i] = (crc32c(0, buf, block_size) & crcmask);
+ crc[i] = (crc64_iso(0, buf, block_size) & crcmask);
buf += block_size;
len -= block_size;
}
if (len)
- crc[i] = (crc32c(0, buf, len) & crcmask);
+ crc[i] = (crc64_iso(0, buf, len) & crcmask);
}
struct crc_context {
size_t block_size;
- uint32_t crcmask;
+ uint64_t crcmask;
/* Saved old buffer bytes (block_size bytes). */
void *buffer;
size_t buffer_start, buffer_end;
/* Progress so far. */
- uint32_t running_crc;
+ uint64_t running_crc;
size_t literal_bytes;
size_t total_bytes;
int have_match;
+ /* Final block is special (if a different size) */
+ size_t tail_size;
+ uint64_t tail_crc;
+
/* Uncrc tab. */
- uint32_t uncrc_tab[256];
+ uint64_t uncrc_tab[256];
+ /* This doesn't count the last CRC. */
unsigned int num_crcs;
- uint32_t crc[];
+ uint64_t crc[];
};
/* Calculate the how the crc changes when we take a give char out of the
* crc'd area. */
-static void init_uncrc_tab(uint32_t uncrc_tab[], unsigned int wsize)
+static void init_uncrc_tab(uint64_t uncrc_tab[], unsigned int wsize)
{
unsigned int i;
- uint32_t part_crc;
+ uint64_t part_crc;
uint8_t buffer[wsize];
/* Calculate crc(buffer+1, wsize-1) once, since it doesn't change. */
memset(buffer, 0, wsize);
- part_crc = crc32c(0, buffer+1, wsize-1);
+ part_crc = crc64_iso(0, buffer+1, wsize-1);
for (i = 0; i < 256; i++) {
buffer[0] = i;
- uncrc_tab[i] = (crc32c(0, buffer, wsize) ^ part_crc);
+ uncrc_tab[i] = (crc64_iso(0, buffer, wsize) ^ part_crc);
}
}
struct crc_context *crc_context_new(size_t block_size, unsigned crcbits,
- const uint32_t crc[], unsigned num_crcs)
+ const uint64_t crc[], unsigned num_crcs,
+ size_t tail_size)
{
struct crc_context *ctx;
+ assert(num_crcs > 0);
+ assert(block_size > 0);
+ assert(tail_size < block_size);
+
ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs);
if (ctx) {
ctx->block_size = block_size;
- /* Technically, 1 << 32 is undefined. */
- if (crcbits >= 32)
- ctx->crcmask = 0xFFFFFFFF;
- else
- ctx->crcmask = (1 << crcbits)-1;
+ ctx->tail_size = tail_size;
+ if (tail_size)
+ ctx->tail_crc = crc[--num_crcs];
+
+ ctx->crcmask = mask_of(crcbits);
ctx->num_crcs = num_crcs;
memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs);
ctx->buffer_end = 0;
return -1;
}
-static uint32_t crc_add_byte(uint32_t crc, uint8_t newbyte)
+static bool tail_matches(const struct crc_context *ctx)
+{
+ if (ctx->literal_bytes != ctx->tail_size)
+ return false;
+
+ return (ctx->running_crc & ctx->crcmask) == ctx->tail_crc;
+}
+
+static uint64_t crc_add_byte(uint64_t crc, uint8_t newbyte)
{
- return crc32c(crc, &newbyte, 1);
+ return crc64_iso(crc, &newbyte, 1);
}
-static uint32_t crc_remove_byte(uint32_t crc, uint8_t oldbyte,
- const uint32_t uncrc_tab[])
+static uint64_t crc_remove_byte(uint64_t crc, uint8_t oldbyte,
+ const uint64_t uncrc_tab[])
{
return crc ^ uncrc_tab[oldbyte];
}
-static uint32_t crc_roll(uint32_t crc, uint8_t oldbyte, uint8_t newbyte,
- const uint32_t uncrc_tab[])
+static uint64_t crc_roll(uint64_t crc, uint8_t oldbyte, uint8_t newbyte,
+ const uint64_t uncrc_tab[])
{
return crc_add_byte(crc_remove_byte(crc, oldbyte, uncrc_tab), newbyte);
}
/* old is the trailing edge of the checksum window. */
if (buffer_size(ctx) >= ctx->block_size)
- old = ctx->buffer + ctx->buffer_start;
+ old = (uint8_t *)ctx->buffer + ctx->buffer_start;
else
old = NULL;
- while (!old || (crcmatch = crc_matches(ctx)) < 0) {
+ while (ctx->literal_bytes < ctx->block_size
+ || (crcmatch = crc_matches(ctx)) < 0) {
if (consumed == buflen)
break;
+ /* Increment these now in case we hit goto: below. */
+ ctx->literal_bytes++;
+ ctx->total_bytes++;
+ consumed++;
+
/* Update crc. */
if (old) {
ctx->running_crc = crc_roll(ctx->running_crc,
ctx->uncrc_tab);
old++;
/* End of stored buffer? Start on data they gave us. */
- if (old == ctx->buffer + ctx->buffer_end)
+ if (old == (uint8_t *)ctx->buffer + ctx->buffer_end)
old = buf;
} else {
ctx->running_crc = crc_add_byte(ctx->running_crc, *p);
if (p == (uint8_t *)buf + ctx->block_size - 1)
old = buf;
+ /* We don't roll this csum, we only look for it after
+ * a block match. It's simpler and faster. */
+ if (tail_matches(ctx)) {
+ crcmatch = ctx->num_crcs;
+ goto have_match;
+ }
}
-
- ctx->literal_bytes++;
- ctx->total_bytes++;
- consumed++;
p++;
}
} else {
have_match:
*result = -crcmatch-1;
- ctx->literal_bytes -= ctx->block_size;
- assert(ctx->literal_bytes == 0);
+ if (crcmatch == ctx->num_crcs)
+ assert(ctx->literal_bytes == ctx->tail_size);
+ else
+ assert(ctx->literal_bytes == ctx->block_size);
+ ctx->literal_bytes = 0;
ctx->have_match = -1;
ctx->running_crc = 0;
/* Nothing more in the buffer. */
/* Move down old data if we don't have room. */
if (ctx->buffer_end + len > ctx->block_size) {
- memmove(ctx->buffer, ctx->buffer + ctx->buffer_start,
+ memmove(ctx->buffer,
+ (uint8_t *)ctx->buffer + ctx->buffer_start,
buffer_size(ctx));
ctx->buffer_end -= ctx->buffer_start;
ctx->buffer_start = 0;
}
/* Copy len bytes from tail of buffer. */
- memcpy(ctx->buffer + ctx->buffer_end, buf + buflen - len, len);
+ memcpy((uint8_t *)ctx->buffer + ctx->buffer_end,
+ (const uint8_t *)buf + buflen - len, len);
ctx->buffer_end += len;
assert(buffer_size(ctx) <= ctx->block_size);
}
return consumed;
}
-/* We could try many techniques to match the final block. We can
- * simply try to checksum whatever's left at the end and see if it
- * matches the final block checksum. This works for the exact-match
- * case.
- *
- * We can do slightly better than this: if we try to match the checksum
- * on every block (starting with block_size 1) from where we end to EOF,
- * we can capture the "data appended" case as well.
- */
-static size_t final_block_match(struct crc_context *ctx)
-{
- size_t size;
- uint32_t crc;
-
- if (ctx->num_crcs == 0)
- return 0;
-
- crc = 0;
- for (size = 0; size < buffer_size(ctx); size++) {
- const uint8_t *p = ctx->buffer;
- crc = crc_add_byte(crc, p[ctx->buffer_start+size]);
- if ((crc & ctx->crcmask) == ctx->crc[ctx->num_crcs-1])
- return size+1;
- }
- return 0;
-}
-
long crc_read_flush(struct crc_context *ctx)
{
long ret;
- size_t final;
/* We might have ended right on a matched block. */
if (ctx->have_match != -1) {
return ret;
}
- /* Look for truncated final block. */
- final = final_block_match(ctx);
- if (!final) {
- /* Nope? Just a literal. */
- ret = buffer_size(ctx);
- ctx->buffer_start += ret;
- ctx->literal_bytes -= ret;
- return ret;
- }
-
- /* We matched (some of) what's left. */
- ret = -((int)ctx->num_crcs-1)-1;
- ctx->buffer_start += final;
- ctx->literal_bytes -= final;
+ /* The rest is just a literal. */
+ ret = buffer_size(ctx);
+ assert(ctx->literal_bytes == ret);
+ ctx->buffer_start = ctx->buffer_end = 0;
+ ctx->literal_bytes = 0;
return ret;
}