From d1a8796172f3298d087003c623a049f1177fd060 Mon Sep 17 00:00:00 2001 From: Joey Adams Date: Fri, 19 Aug 2011 22:29:44 -0400 Subject: [PATCH] bdelta: new module for binary diff/patch --- ccan/bdelta/LICENSE | 1 + ccan/bdelta/_info | 143 ++++++ ccan/bdelta/bdelta.c | 838 +++++++++++++++++++++++++++++++++ ccan/bdelta/bdelta.h | 112 +++++ ccan/bdelta/test/common.h | 294 ++++++++++++ ccan/bdelta/test/run-medium.c | 79 ++++ ccan/bdelta/test/run-myers.c | 202 ++++++++ ccan/bdelta/test/run-trivial.c | 67 +++ 8 files changed, 1736 insertions(+) create mode 120000 ccan/bdelta/LICENSE create mode 100644 ccan/bdelta/_info create mode 100644 ccan/bdelta/bdelta.c create mode 100644 ccan/bdelta/bdelta.h create mode 100644 ccan/bdelta/test/common.h create mode 100644 ccan/bdelta/test/run-medium.c create mode 100644 ccan/bdelta/test/run-myers.c create mode 100644 ccan/bdelta/test/run-trivial.c diff --git a/ccan/bdelta/LICENSE b/ccan/bdelta/LICENSE new file mode 120000 index 00000000..2354d129 --- /dev/null +++ b/ccan/bdelta/LICENSE @@ -0,0 +1 @@ +../../licenses/BSD-MIT \ No newline at end of file diff --git a/ccan/bdelta/_info b/ccan/bdelta/_info new file mode 100644 index 00000000..8b1135ad --- /dev/null +++ b/ccan/bdelta/_info @@ -0,0 +1,143 @@ +#include +#include "config.h" + +/** + * bdelta - Generate and apply binary deltas + * + * This library takes two strings containing binary data, and produces a + * "patch" that can be applied to the first one to produce the second one. + * It can be used to save bandwidth and disk space when many texts differing + * by a small number of bytes need to be transmitted or stored. + * + * Patches produced by this version of the library can be applied using future + * versions, but not past versions. + * + * bdelta implements the algorithm described in + * An O(ND) Difference Algorithm and Its Variations by Eugene W. Myers. + * Because its memory usage and expected running time are O(N + D^2), + * it works well only when the strings differ by a small number of bytes. + * This implementation stops trying when the strings differ by more than + * 1000 bytes, and falls back to producing a patch that simply emits the new + * string. + * + * Thus, bdelta does not save any space when given two strings that differ by + * more than 1000 bytes. This may be improved in a future version of the + * library. + * + * Example: + * #include + * #include + * #include + * #include + * + * static void gulp(const char *filename, void **data_out, size_t *size_out); + * + * static int usage(const char *prog) + * { + * fprintf( + * stderr, + * "Usage: %s diff > \n" + * " %s patch > \n", + * prog, prog + * ); + * return 1; + * } + * + * int main(int argc, char *argv[]) + * { + * void *old, *new_, *patch; + * size_t old_size, new_size, patch_size; + * BDELTAcode rc; + * + * if (argc != 4) + * return usage(argv[0]); + * + * if (strcmp(argv[1], "diff") == 0) { + * gulp(argv[2], &old, &old_size); + * gulp(argv[3], &new_, &new_size); + * + * rc = bdelta_diff(old, old_size, new_, new_size, &patch, &patch_size); + * if (rc != BDELTA_OK) { + * bdelta_perror("bdelta_diff", rc); + * return 1; + * } + * + * if (fwrite(patch, 1, patch_size, stdout) != patch_size) { + * perror("stdout"); + * return 1; + * } + * } else if (strcmp(argv[1], "patch") == 0) { + * gulp(argv[2], &old, &old_size); + * gulp(argv[3], &patch, &patch_size); + * + * rc = bdelta_patch(old, old_size, patch, patch_size, &new_, &new_size); + * if (rc != BDELTA_OK) { + * bdelta_perror("bdelta_patch", rc); + * return 1; + * } + * + * if (fwrite(new_, 1, new_size, stdout) != new_size) { + * perror("stdout"); + * return 1; + * } + * } else { + * return usage(argv[0]); + * } + * + * free(old); + * free(new_); + * free(patch); + * return 0; + * } + * + * static void gulp(const char *filename, void **data_out, size_t *size_out) + * { + * FILE *f = fopen(filename, "rb"); + * size_t size = 0; + * size_t alloc = 16; + * char *data = malloc(alloc); + * + * if (f == NULL || data == NULL) + * goto error; + * + * for (;;) { + * size += fread(data + size, 1, alloc - size, f); + * if (size < alloc) { + * if (!feof(f)) + * goto error; + * break; + * } + * data = realloc(data, alloc *= 2); + * if (data == NULL) + * goto error; + * } + * + * if (fclose(f) != 0) + * goto error; + * + * *data_out = data; + * *size_out = size; + * return; + * + * error: + * perror(filename); + * exit(EXIT_FAILURE); + * } + * + * Author: Joey Adams + * License: MIT + * Version: 0.1.1 + */ +int main(int argc, char *argv[]) +{ + /* Expect exactly one argument */ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + /* Nothing */ + return 0; + } + + return 1; +} diff --git a/ccan/bdelta/bdelta.c b/ccan/bdelta/bdelta.c new file mode 100644 index 00000000..121ff46f --- /dev/null +++ b/ccan/bdelta/bdelta.c @@ -0,0 +1,838 @@ +/* + * Copyright (C) 2011 Joseph Adams + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "bdelta.h" + +#include +#include +#include +#include +#include +#include + +typedef struct +{ + unsigned char *cur; /* End of string; insertion point for new bytes */ + unsigned char *end; /* End of buffer */ + unsigned char *start; /* Beginning of string */ +} SB; + +/* sb is evaluated multiple times in these macros. */ +#define sb_size(sb) ((size_t)((sb)->cur - (sb)->start)) +#define sb_avail(sb) ((size_t)((sb)->end - (sb)->cur)) + +/* sb and need may be evaluated multiple times. */ +#define sb_need(sb, need) do { \ + if (sb_avail(sb) < (need)) \ + if (sb_grow(sb, need) != 0) \ + goto out_of_memory; \ + } while (0) + +static int sb_init(SB *sb) +{ + sb->start = malloc(17); + if (sb->start == NULL) + return -1; + sb->cur = sb->start; + sb->end = sb->start + 16; + return 0; +} + +static int sb_grow(SB *sb, size_t need) +{ + size_t length = sb->cur - sb->start; + size_t alloc = sb->end - sb->start; + unsigned char *tmp; + + do { + alloc *= 2; + } while (alloc < length + need); + + tmp = realloc(sb->start, alloc + 1); + if (tmp == NULL) + return -1; + sb->start = tmp; + sb->cur = tmp + length; + sb->end = tmp + alloc; + return 0; +} + +static int sb_putc(SB *sb, unsigned char c) +{ + sb_need(sb, 1); + *sb->cur++ = c; + return 0; + +out_of_memory: + return -1; +} + +static int sb_write(SB *sb, const void *data, size_t size) +{ + sb_need(sb, size); + memcpy(sb->cur, data, size); + sb->cur += size; + return 0; + +out_of_memory: + return -1; +} + +static void sb_return(SB *sb, void **data_out, size_t *length_out) +{ + *sb->cur = 0; + if (data_out) + *data_out = sb->start; + else + free(sb->start); + if (length_out) + *length_out = sb->cur - sb->start; +} + +static void sb_discard(SB *sb, void **data_out, size_t *length_out) +{ + free(sb->start); + if (data_out) + *data_out = NULL; + if (length_out) + *length_out = 0; +} + +/* + * The first byte in a patch is the "patch type", which indicates how the + * patch is formatted. This keeps the patch format flexible while retaining + * backward compatibility. Patches produced with an older version of + * the library can be applied with a newer version. + * + * PT_LITERAL + * Contains nothing more than the content of the new text. + * + * PT_CSI32 + * A string of copy, skip, and insert instructions for generating the new + * string from the old. + * + * copy(size): Copy @size bytes from old to new. + * skip(size): Skip @size bytes of old. + * insert(text): Insert @text into new. + * + * The syntax is as follows: + * + * copy: instruction_byte(1) size + * skip: instruction_byte(2) size + * insert: instruction_byte(3) size $size*OCTET + * + * 0 <= size_param_length <= 4 + * instruction_byte(op) = op | size_param_length << 2 + * size: $size_param_length*OCTET + * -- size is an unsigned integer encoded in big endian. + * -- However, if size_param_length is 0, the operation size is 1. + * + * Simply put, an instruction starts with an opcode and operation size. + * An insert instruction is followed by the bytes to be inserted. + */ +#define PT_LITERAL 10 +#define PT_CSI32 11 + +#define OP_COPY 1 +#define OP_SKIP 2 +#define OP_INSERT 3 + +static unsigned int bytes_needed_for_size(uint32_t size) +{ + if (size == 1) + return 0; + else if (size <= 0xFF) + return 1; + else if (size <= 0xFFFF) + return 2; + else if (size <= 0xFFFFFF) + return 3; + else + return 4; +} + +/* + * Return values: + * + * BDELTA_OK: Success + * BDELTA_MEMORY: Memory allocation failed + */ +static BDELTAcode csi32_emit_op(SB *patch_out, int op, uint32_t size, const char **new_) +{ + unsigned int i; + unsigned int size_param_length; + size_t need; + uint32_t tmp; + + assert(op >= 1 && op <= 3); + + if (size == 0) + return BDELTA_OK; + size_param_length = bytes_needed_for_size(size); + + need = 1 + size_param_length; + if (op == OP_INSERT) + need += size; + sb_need(patch_out, need); + + *patch_out->cur++ = (unsigned int)op | size_param_length << 2; + for (i = size_param_length, tmp = size; i-- > 0; tmp >>= 8) + patch_out->cur[i] = tmp & 0xFF; + patch_out->cur += size_param_length; + + switch (op) { + case OP_COPY: + *new_ += size; + break; + case OP_SKIP: + break; + case OP_INSERT: + memcpy(patch_out->cur, *new_, size); + patch_out->cur += size; + *new_ += size; + break; + default: + assert(0); + } + + return BDELTA_OK; + +out_of_memory: + return BDELTA_MEMORY; +} + +/* + * On success, returns 1, advances *sp past the parsed text, and sets *op_out and *size_out. + * On error or EOF, returns 0. + */ +static int csi32_parse_op( + const unsigned char **sp, const unsigned char *e, + int *op_out, uint32_t *size_out) +{ + const unsigned char *s = *sp; + int op; + unsigned int i; + unsigned int size_param_length; + uint32_t size; + + if (s >= e) + return 0; + op = *s & 3; + size_param_length = *s >> 2; + s++; + if (op == 0 || size_param_length > 4) + return 0; + + if (size_param_length == 0) { + size = 1; + } else { + if ((size_t)(e - s) < size_param_length) + return 0; + size = 0; + for (i = 0; i < size_param_length; i++) { + size <<= 8; + size |= *s++ & 0xFF; + } + } + + /* Make sure insert data fits in the patch, but don't consume it. */ + if (op == OP_INSERT && (size_t)(e - s) < size) + return 0; + + *op_out = op; + *size_out = size; + *sp = s; + return 1; +} + +/* + * bdelta uses the algorithm described in: + * + * Myers, E. (1986). An O(ND) Difference Algorithm and Its Variations. + * Retrieved from http://www.xmailserver.org/diff2.pdf + * + * The pseudocode in Myers' paper (Figure 2) uses an array called V, + * where (V[k], V[k] - k) is the endpoint of the furthest-reaching + * D-path ending on diagonal k. + * + * The structure below holds the V array for every iteration of the outer loop. + * Because each iteration produces D+1 values, a triangle is formed: + * + * k + * -5 -4 -3 -2 -1 0 1 2 3 4 5 + * ---------------------------------- + * 0 | 0 (copy 0) + * | \ skip 1 + * 1 | 0 1 + * | \ skip 1, then copy 1 + * 2 | 2 2 3 + * D | / insert 1, then copy 2 + * 3 | 3 4 5 5 + * | \ skip 1, then copy 1 + * 4 | 3 4 5 7 7 + * | / insert 1 + * 5 | 3 4 5 7 - - + * + * @data will literally contain: 0 0 1 2 2 3 3 4 5 5 3 4 5 7 7 3 4 5 7 + * + * To convert this to an edit script, we first climb back to the top, + * using the same procedure as was used when the triangle was generated: + * + * If k = -D, climb right (the only way we can go). + * If k = +D, climb left (the only way we can go). + * Otherwise, favor the greater number. + * If the numbers are the same, climb left. + * + * Finally, we convert the descent to the solution to a patch script: + * + * The top number n corresponds to: + * copy n + * + * A descent left from a to b corresponds to: + * insert 1 + * copy b-a + * + * A descent right from a to b corresponds to: + * skip 1 + * copy b-a-1 + */ +typedef struct +{ + uint32_t *data; + int solution_d; + int solution_k; + uint32_t *solution_ptr; +} Triangle; + +/* + * Return values: + * + * BDELTA_OK: Success + * BDELTA_MEMORY: Memory allocation failed + * BDELTA_INTERNAL_DMAX_EXCEEDED: d_max exceeded (strings are too different) + */ +static BDELTAcode build_triangle( + const char *old, uint32_t old_size, + const char *new_, uint32_t new_size, + int d_max, + Triangle *triangle_out) +{ + int d, k; + uint32_t x, y; + uint32_t *data; + uint32_t *vprev; /* position within previous row */ + uint32_t *v; /* position within current row */ + uint32_t *vcur; /* beginning of current row */ + size_t data_alloc = 16; + + memset(triangle_out, 0, sizeof(*triangle_out)); + + data = malloc(data_alloc * sizeof(*data)); + if (data == NULL) + return BDELTA_MEMORY; + + /* Allow dmax < 0 to mean "no limit". */ + if (d_max < 0) + d_max = old_size + new_size; + + /* + * Compute the farthest-reaching 0-path so the loop after this + * will have a "previous" row to start with. + */ + for (x = 0; x < old_size && x < new_size && old[x] == new_[x]; ) + x++; + *data = x; + if (x >= old_size && x >= new_size) { + /* Strings are equal, so return a triangle with one row (a dot). */ + assert(x == old_size && x == new_size); + triangle_out->data = data; + triangle_out->solution_d = 0; + triangle_out->solution_k = 0; + triangle_out->solution_ptr = data; + return BDELTA_OK; + } + vprev = data; + vcur = v = data + 1; + + /* + * Here is the core of the Myers diff algorithm. + * + * This is a direct translation of the pseudocode in Myers' paper, + * with implementation-specific adaptations: + * + * * Every V array is preserved per iteration of the outer loop. + * This is necessary so we can determine the actual patch, not just + * the length of the shortest edit string. See the coment above + * the definition of Triangle for an in-depth explanation. + * + * * Array items are stored consecutively so as to not waste space. + * + * * The buffer holding the V arrays is expanded dynamically. + */ + for (d = 1; d <= d_max; d++, vprev = vcur, vcur = v) { + /* Ensure that the buffer has enough room for this row. */ + if ((size_t)(v - data + d + 1) > data_alloc) { + size_t vprev_idx = vprev - data; + size_t v_idx = v - data; + size_t vcur_idx = vcur - data; + uint32_t *tmp; + + do { + data_alloc *= 2; + } while ((size_t)(v - data + d + 1) > data_alloc); + + tmp = realloc(data, data_alloc * sizeof(*data)); + if (tmp == NULL) { + free(data); + return BDELTA_MEMORY; + } + data = tmp; + + /* Relocate pointers to the buffer we just expanded. */ + vprev = data + vprev_idx; + v = data + v_idx; + vcur = data + vcur_idx; + } + + for (k = -d; k <= d; k += 2, vprev++) { + if (k == -d || (k != d && vprev[-1] < vprev[0])) + x = vprev[0]; + else + x = vprev[-1] + 1; + y = x - k; + while (x < old_size && y < new_size && old[x] == new_[y]) + x++, y++; + *v++ = x; + if (x >= old_size && y >= new_size) { + /* Shortest edit string found. */ + assert(x == old_size && y == new_size); + triangle_out->data = data; + triangle_out->solution_d = d; + triangle_out->solution_k = k; + triangle_out->solution_ptr = v - 1; + return BDELTA_OK; + } + } + } + + free(data); + return BDELTA_INTERNAL_DMAX_EXCEEDED; +} + +/* + * Trace a solution back to the top, returning a string of instructions + * for descending from the top to the solution. + * + * An instruction is one of the following: + * + * -1: Descend left. + * +1: Descend right. + * 0: Finished. You should be at the solution now. + * + * If memory allocation fails, this function will return NULL. + */ +static signed char *climb_triangle(const Triangle *triangle) +{ + signed char *descent; + int d, k; + uint32_t *p; + + assert(triangle->solution_d >= 0); + + descent = malloc(triangle->solution_d + 1); + if (descent == NULL) + return NULL; + d = triangle->solution_d; + k = triangle->solution_k; + p = triangle->solution_ptr; + descent[d] = 0; + + while (d > 0) { + if (k == -d || (k != d && *(p-d-1) < *(p-d))) { + /* Climb right */ + k++; + p = p - d; + descent[--d] = -1; + } else { + /* Climb left */ + k--; + p = p - d - 1; + descent[--d] = 1; + } + } + + return descent; +} + +/* + * Generate the actual patch, given data produced by build_triangle and + * climb_triangle. new_ is needed for the content of the insertions. + * + * See the comment above the definition of Triangle. It concisely documents + * how a descent down the triangle corresponds to a patch script. + * + * The resulting patch, including the patch type byte, is appended to patch_out. + * + * Return values: + * + * BDELTA_OK: Success + * BDELTA_MEMORY: Memory allocation failed + */ +static BDELTAcode descent_to_patch( + const signed char *descent, + const Triangle *triangle, + const char *new_, uint32_t new_size, + SB *patch_out) +{ + const char *new_end = new_ + new_size; + uint32_t *p = triangle->data; + uint32_t *p2; + int d = 0; + int k = 0; + int pending_op = 0; + int current_op; + uint32_t pending_length = 0; + uint32_t copy_length; + + if (sb_putc(patch_out, PT_CSI32) != 0) + return BDELTA_MEMORY; + + if (*p > 0) { + if (csi32_emit_op(patch_out, OP_COPY, *p, &new_) != BDELTA_OK) + return BDELTA_MEMORY; + } + + for (; *descent != 0; descent++, p = p2) { + if (*descent < 0) { + /* Descend left. */ + d++; + k--; + p2 = p + d; + current_op = OP_INSERT; + assert(*p2 >= *p); + copy_length = *p2 - *p; + } else { + /* Descend right. */ + d++; + k++; + p2 = p + d + 1; + current_op = OP_SKIP; + assert(*p2 > *p); + copy_length = *p2 - *p - 1; + } + + if (pending_op == current_op) { + pending_length++; + } else { + if (pending_op != 0) { + if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK) + return BDELTA_MEMORY; + } + pending_op = current_op; + pending_length = 1; + } + + if (copy_length > 0) { + if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK) + return BDELTA_MEMORY; + pending_op = 0; + if (csi32_emit_op(patch_out, OP_COPY, copy_length, &new_) != BDELTA_OK) + return BDELTA_MEMORY; + } + } + assert(d == triangle->solution_d); + assert(k == triangle->solution_k); + assert(p == triangle->solution_ptr); + + /* Emit the last pending op, unless it's a skip. */ + if (pending_op != 0 && pending_op != OP_SKIP) { + if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK) + return BDELTA_MEMORY; + } + + assert(new_ == new_end); + return BDELTA_OK; +} + +/* + * Generate a patch using Myers' O(ND) algorithm. + * + * The patch is appended to @patch_out, which must be initialized before calling. + * + * Return values: + * + * BDELTA_OK: Success + * BDELTA_MEMORY: Memory allocation failed + * BDELTA_INTERNAL_INPUTS_TOO_LARGE: Input sizes are too large + * BDELTA_INTERNAL_DMAX_EXCEEDED: d_max exceeded (strings are too different) + */ +static BDELTAcode diff_myers( + const char *old, size_t old_size, + const char *new_, size_t new_size, + SB *patch_out) +{ + Triangle triangle; + signed char *descent; + BDELTAcode rc; + + /* Make sure old_size + new_size does not overflow int or uint32_t. */ + if (old_size >= UINT32_MAX || + new_size >= UINT32_MAX - old_size || + old_size >= (unsigned int)INT_MAX || + new_size >= (unsigned int)INT_MAX - old_size) + return BDELTA_INTERNAL_INPUTS_TOO_LARGE; + + rc = build_triangle(old, old_size, new_, new_size, 1000, &triangle); + if (rc != BDELTA_OK) + return rc; + + descent = climb_triangle(&triangle); + if (descent == NULL) + goto oom1; + + if (descent_to_patch(descent, &triangle, new_, new_size, patch_out) != BDELTA_OK) + goto oom2; + + free(descent); + free(triangle.data); + return BDELTA_OK; + +oom2: + free(descent); +oom1: + free(triangle.data); + return BDELTA_MEMORY; +} + +BDELTAcode bdelta_diff( + const void *old, size_t old_size, + const void *new_, size_t new_size, + void **patch_out, size_t *patch_size_out) +{ + SB patch; + + if (sb_init(&patch) != 0) + goto out_of_memory; + + if (new_size == 0) + goto emit_new_literally; + + if (diff_myers(old, old_size, new_, new_size, &patch) != BDELTA_OK) + goto emit_new_literally; + + if (sb_size(&patch) > new_size) { + /* + * A literal copy of new is no longer than this patch. + * All that for nothing. + */ + goto emit_new_literally; + } + + /* + * Verify that patch, when applied to old, produces the correct text. + * If it doesn't, it's a bug, but fall back to a simple emit + * to avert data corruption. + */ + { + void *result; + size_t result_size; + BDELTAcode rc; + int correct; + + rc = bdelta_patch( + old, old_size, + patch.start, patch.cur - patch.start, + &result, &result_size + ); + + switch (rc) { + case BDELTA_OK: + correct = (result_size == new_size && + memcmp(result, new_, new_size) == 0); + free(result); + break; + + case BDELTA_MEMORY: + goto out_of_memory; + + default: + correct = 0; + break; + } + + if (!correct) { + assert(0); + goto emit_new_literally; + } + } + + sb_return(&patch, patch_out, patch_size_out); + return BDELTA_OK; + +emit_new_literally: + if (patch.cur != patch.start) { + free(patch.start); + if (sb_init(&patch) != 0) + goto out_of_memory; + } + if (sb_putc(&patch, PT_LITERAL) != 0 || sb_write(&patch, new_, new_size) != 0) + goto out_of_memory; + sb_return(&patch, patch_out, patch_size_out); + return BDELTA_OK; + +out_of_memory: + sb_discard(&patch, patch_out, patch_size_out); + return BDELTA_MEMORY; +} + +/* + * Return values: + * + * BDELTA_OK: Success + * BDELTA_PATCH_INVALID: Patch is malformed + * BDELTA_PATCH_MISMATCH: Old string is too small + * BDELTA_MEMORY: Memory allocation failed + */ +static BDELTAcode patch_csi32( + const unsigned char *o, const unsigned char *oe, + const unsigned char *p, const unsigned char *pe, + SB *new_out) +{ + int op; + uint32_t size; + + while (csi32_parse_op(&p, pe, &op, &size)) { + if ((op == OP_COPY || op == OP_SKIP) && (size_t)(oe - o) < size) { + /* Copy or skip instruction exceeds length of old string. */ + return BDELTA_PATCH_MISMATCH; + } + if (op == OP_COPY || op == OP_INSERT) + sb_need(new_out, size); + + switch (op) { + case OP_COPY: /* Copy @size bytes from old string. */ + memcpy(new_out->cur, o, size); + new_out->cur += size; + o += size; + break; + + case OP_SKIP: /* Skip @size bytes of old string. */ + o += size; + break; + + case OP_INSERT: /* Insert @size new bytes (from the patch script). */ + memcpy(new_out->cur, p, size); + new_out->cur += size; + p += size; + break; + + default: + assert(0); + } + } + if (p != pe) + return BDELTA_PATCH_INVALID; + + return BDELTA_OK; + +out_of_memory: + return BDELTA_MEMORY; +} + +BDELTAcode bdelta_patch( + const void *old, size_t old_size, + const void *patch, size_t patch_size, + void **new_out, size_t *new_size_out) +{ + const unsigned char *o = old; + const unsigned char *oe = o + old_size; + const unsigned char *p = patch; + const unsigned char *pe = p + patch_size; + SB result; + BDELTAcode rc; + + if (sb_init(&result) != 0) { + rc = BDELTA_MEMORY; + goto discard; + } + + if (p >= pe) { + rc = BDELTA_PATCH_INVALID; + goto discard; + } + + switch (*p++) { + case PT_LITERAL: + if (sb_write(&result, p, pe - p) != 0) { + rc = BDELTA_MEMORY; + goto discard; + } + break; + + case PT_CSI32: + rc = patch_csi32(o, oe, p, pe, &result); + if (rc != BDELTA_OK) + goto discard; + break; + + default: + rc = BDELTA_PATCH_INVALID; + goto discard; + } + + sb_return(&result, new_out, new_size_out); + return BDELTA_OK; + +discard: + sb_discard(&result, new_out, new_size_out); + return rc; +} + +const char *bdelta_strerror(BDELTAcode code) +{ + switch (code) { + case BDELTA_OK: + return "Success"; + case BDELTA_MEMORY: + return "Could not allocate memory"; + case BDELTA_PATCH_INVALID: + return "Patch is invalid"; + case BDELTA_PATCH_MISMATCH: + return "Patch applied to wrong data"; + + case BDELTA_INTERNAL_DMAX_EXCEEDED: + return "Difference threshold exceeded (internal error)"; + case BDELTA_INTERNAL_INPUTS_TOO_LARGE: + return "Inputs are too large (internal error)"; + + default: + return "Invalid error code"; + } +} + +void bdelta_perror(const char *s, BDELTAcode code) +{ + if (s != NULL && *s != '\0') + fprintf(stderr, "%s: %s\n", s, bdelta_strerror(code)); + else + fprintf(stderr, "%s\n", bdelta_strerror(code)); +} diff --git a/ccan/bdelta/bdelta.h b/ccan/bdelta/bdelta.h new file mode 100644 index 00000000..67ed73d9 --- /dev/null +++ b/ccan/bdelta/bdelta.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2011 Joseph Adams + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef CCAN_BDELTA_H +#define CCAN_BDELTA_H + +#include + +typedef enum { + BDELTA_OK = 0, /* Operation succeeded. */ + + BDELTA_MEMORY = 1, /* Memory allocation failed. */ + BDELTA_PATCH_INVALID = 2, /* Patch is malformed. */ + BDELTA_PATCH_MISMATCH = 3, /* Patch applied to wrong original string. */ + + /* Internal error codes. These will never be returned by API functions. */ + BDELTA_INTERNAL_DMAX_EXCEEDED = -10, + BDELTA_INTERNAL_INPUTS_TOO_LARGE = -11, +} BDELTAcode; + +/* + * bdelta_diff - Given two byte strings, generate a "patch" (also a byte string) + * that describes how to transform the old string into the new string. + * + * On success, returns BDELTA_OK, and passes a malloc'd block + * and its size through *patch_out and *patch_size_out. + * + * On failure, returns an error code, and clears *patch_out and *patch_size_out. + * + * Example: + * const char *old = "abcabba"; + * const char *new_ = "cbabac"; + * void *patch; + * size_t patch_size; + * BDELTAcode rc; + * + * rc = bdelta_diff(old, strlen(old), new_, strlen(new_), &patch, &patch_size); + * if (rc != BDELTA_OK) { + * bdelta_perror("bdelta_diff", rc); + * return; + * } + * ... + * free(patch); + */ +BDELTAcode bdelta_diff( + const void *old, size_t old_size, + const void *new_, size_t new_size, + void **patch_out, size_t *patch_size_out +); + +/* + * bdelta_patch - Apply a patch produced by bdelta_diff to the + * old string to recover the new string. + * + * On success, returns BDELTA_OK, and passes a malloc'd block + * and its size through *new_out and *new_size_out. + * + * On failure, returns an error code, and clears *new_out and *new_size_out. + * + * Example: + * const char *old = "abcabba"; + * void *new_; + * size_t new_size; + * BDELTAcode rc; + * + * rc = bdelta_patch(old, strlen(old), patch, patch_size, &new_, &new_size); + * if (rc != BDELTA_OK) { + * bdelta_perror("bdelta_patch", rc); + * return; + * } + * fwrite(new_, 1, new_size, stdout); + * putchar('\n'); + * free(new_); + */ +BDELTAcode bdelta_patch( + const void *old, size_t old_size, + const void *patch, size_t patch_size, + void **new_out, size_t *new_size_out +); + +/* + * bdelta_strerror - Return a string describing a bdelta error code. + */ +const char *bdelta_strerror(BDELTAcode code); + +/* + * bdelta_perror - Print a bdelta error message to stderr. + * + * This function handles @s the same way perror does. + */ +void bdelta_perror(const char *s, BDELTAcode code); + +#endif diff --git a/ccan/bdelta/test/common.h b/ccan/bdelta/test/common.h new file mode 100644 index 00000000..f58e4ee3 --- /dev/null +++ b/ccan/bdelta/test/common.h @@ -0,0 +1,294 @@ +#include +#include +#include + +#include +#include +#include + +/* + * Finds a pseudorandom 32-bit number from 0 to 2^32-1 . + * Uses the BCPL linear congruential generator method. + * + * Used instead of system RNG to ensure tests are consistent. + */ +static uint32_t rand32(void) +{ +#if 0 + /* + * Tests should be run with a different random function + * from time to time. I've found that the method below + * sometimes behaves poorly for testing purposes. + * For example, rand32() % N might only return even numbers. + */ + assert(RAND_MAX == 2147483647); + return ((random() & 0xFFFF) << 16) | (random() & 0xFFFF); +#else + static uint32_t rand32_state = 0; + rand32_state *= (uint32_t)0x7FF8A3ED; + rand32_state += (uint32_t)0x2AA01D31; + return rand32_state; +#endif +} + +#define RSTRING_OK 0 /* Operation succeeded */ +#define RSTRING_MEMORY 1 /* Memory allocation failed */ +#define RSTRING_ZERO_CARDINALITY 2 /* Cardinality of byte range is zero */ +#define RSTRING_RANGE_OVERLAP 3 /* Byte range contains overlapping characters */ + +struct rstring_byte_range { + unsigned int cardinality; + unsigned int multiplier; + unsigned int offset; +}; + +static int check_range(const struct rstring_byte_range *range) +{ + unsigned int i; + unsigned int c; + char set[256]; + + if (range == NULL) + return RSTRING_OK; + + if (range->cardinality == 0) + return RSTRING_ZERO_CARDINALITY; + + memset(set, 0, sizeof(set)); + + c = range->offset & 0xFF; + for (i = 0; i < range->cardinality; i++) { + if (set[c]) + return RSTRING_RANGE_OVERLAP; + set[c] = 1; + c += range->multiplier; + c &= 0xFF; + } + + return RSTRING_OK; +} + +/* + * Generate a pseudorandom string of the given length, + * writing it into a user-supplied buffer. + */ +static uint8_t *random_string_into(uint8_t *str, uint32_t size, const struct rstring_byte_range *range) +{ + uint32_t i; + uint32_t r; + + if (range == NULL) { + for (i = 0; size - i >= 4; ) { + r = rand32(); + str[i++] = r & 0xFF; + r >>= 8; + str[i++] = r & 0xFF; + r >>= 8; + str[i++] = r & 0xFF; + r >>= 8; + str[i++] = r & 0xFF; + } + + if (i < size) { + r = rand32(); + do { + str[i++] = r & 0xFF; + r >>= 8; + } while (i < size); + } + } else { + for (i = 0; i < size; ) + str[i++] = ((rand32() % range->cardinality) * range->multiplier + range->offset) & 0xFF; + } + + return str; +} + +/* + * Generate a pseudorandom string of the given length. + */ +static uint8_t *random_string(uint32_t size, const struct rstring_byte_range *range) +{ + uint8_t *str; + + if (check_range(range) != RSTRING_OK) { + fprintf(stderr, "Invalid byte range for random string generation\n"); + exit(EXIT_FAILURE); + } + + str = malloc(size); + if (str == NULL) + return NULL; + + return random_string_into(str, size, range); +} + +/* + * Generate two byte strings: + * + * An "old" string, of length @old_size + * A "new" string, differing from "old" by at most @diff_size bytes + * (where modifying a character is considered one change). + * + * Returns RSTRING_OK on success, RSTRING_MEMORY if memory allocation fails. + */ +static int random_string_pair( + uint32_t old_size, uint32_t diff_size, const struct rstring_byte_range *range, + uint8_t **old_out, uint8_t **new_out, uint32_t *new_size_out) +{ + uint8_t *old; + uint8_t *new_; + uint8_t *nptr; + uint32_t new_size; + uint32_t i; + uint32_t j; + + /* insertions[i] is the number of characters to insert before position i. */ + uint32_t *insertions; + + /* + * changes[i] indicates what to do to the character at position i: + * + * 0: Leave it as-is. + * 1: Delete it. + * 2: Change it to another byte (possibly the same byte). + */ + char *changes; + + /* String of new bytes to use for insertions and changes. */ + uint8_t *new_bytes; + uint8_t *new_bytes_ptr; + uint32_t new_bytes_size; + + old = random_string(old_size, range); + if (old == NULL) + goto oom0; + + insertions = calloc(old_size + 1, sizeof(*insertions)); + if (insertions == NULL) + goto oom1; + + changes = calloc(old_size, sizeof(*changes)); + if (changes == NULL) + goto oom2; + + /* + * Generate a collection of edits, which will be + * applied to the old string "simultaneously". + */ + new_size = old_size; + new_bytes_size = 0; + for (i = 0; i < diff_size; i++) { + uint32_t pos; + + switch (rand32() % 3) { + case 0: /* Insert a byte. */ + pos = rand32() % (old_size + 1); + insertions[pos]++; + new_size++; + new_bytes_size++; + break; + + case 1: /* Delete a byte. */ + pos = rand32() % old_size; + if (changes[pos]) { + /* + * This character is already deleted or changed. + * Do something else instead. + * + * For the observative: i will overflow if it's 0. + * However, overflow of unsigned integers is well-defined + * by the C standard. i will wrap around to UINT32_MAX, + * then the for-loop increment above will wrap it back to 0. + */ + i--; + } else { + changes[pos] = 1; + new_size--; + } + break; + + default: /* Change a byte. */ + pos = rand32() % old_size; + if (changes[pos]) { + /* + * This character is already deleted or changed. + * Do something else instead. + */ + i--; + } else { + changes[pos] = 2; + new_bytes_size++; + } + break; + } + } + + new_bytes = malloc(new_bytes_size); + if (new_bytes == NULL) + goto oom3; + random_string_into(new_bytes, new_bytes_size, range); + + new_ = malloc(new_size); + if (new_ == NULL) + goto oom4; + + /* Apply the insertions and changes generated above. */ + nptr = new_; + new_bytes_ptr = new_bytes; + for (i = 0; i < old_size; i++) { + for (j = 0; j < insertions[i]; j++) + *nptr++ = *new_bytes_ptr++; + + switch (changes[i]) { + case 0: /* No change */ + *nptr++ = old[i]; + break; + + case 1: /* Delete */ + break; + + default: /* Change value */ + *nptr++ = *new_bytes_ptr++; + break; + } + } + for (j = 0; j < insertions[i]; j++) + *nptr++ = *new_bytes_ptr++; + assert((size_t)(nptr - new_) == new_size); + assert((size_t)(new_bytes_ptr - new_bytes) == new_bytes_size); + + free(new_bytes); + free(changes); + free(insertions); + + if (old_out) + *old_out = old; + else + free(old); + if (new_out) + *new_out = new_; + else + free(new_); + if (new_size_out) + *new_size_out = new_size; + + return RSTRING_OK; + +oom4: + free(new_bytes); +oom3: + free(changes); +oom2: + free(insertions); +oom1: + free(old); +oom0: + if (old_out) + *old_out = NULL; + if (new_out) + *new_out = NULL; + if (new_size_out) + *new_size_out = 0; + return RSTRING_MEMORY; +} diff --git a/ccan/bdelta/test/run-medium.c b/ccan/bdelta/test/run-medium.c new file mode 100644 index 00000000..743eeb5d --- /dev/null +++ b/ccan/bdelta/test/run-medium.c @@ -0,0 +1,79 @@ +#include "common.h" + +/* + * Note that bdelta_diff verifies the patch before returning it (except for + * when it returns a PT_LITERAL patch, as its correctness is easy to prove). + * Only the trivial tests check the result explicitly using bdiff_patch. + */ +static int test_random( + uint32_t old_size, uint32_t diff_size, + unsigned int cardinality, unsigned int multiplier, unsigned int offset) +{ + struct rstring_byte_range range; + uint8_t *old; + uint8_t *new_; + uint32_t new_size; + BDELTAcode rc; + + range.cardinality = cardinality; + range.multiplier = multiplier; + range.offset = offset; + + if (random_string_pair(old_size, diff_size, cardinality == 0 ? NULL : &range, + &old, &new_, &new_size) != RSTRING_OK) + { + fprintf(stderr, "Error generating random string pair\n"); + exit(EXIT_FAILURE); + } + + rc = bdelta_diff(old, old_size, new_, new_size, NULL, NULL); + if (rc != BDELTA_OK) { + bdelta_perror("bdelta_diff", rc); + return 0; + } + + free(new_); + free(old); + return 1; +} + +int main(void) +{ + int i; + int count = 25; + + plan_tests(count * 14); + + for (i = 0; i < count; i++) + ok1(test_random(100, 10, 0, 0, 0)); + for (i = 0; i < count; i++) + ok1(test_random(100, rand32() % 200, 0, 0, 0)); + for (i = 0; i < count; i++) + ok1(test_random(1000, rand32() % 200, 0, 0, 0)); + for (i = 0; i < count; i++) + ok1(test_random(1000, rand32() % 2000, 0, 0, 0)); + for (i = 0; i < count; i++) + ok1(test_random(10000, rand32() % 200, 0, 0, 0)); + for (i = 0; i < count; i++) + ok1(test_random(10000, rand32() % 2000, 0, 0, 0)); + for (i = 0; i < count; i++) + ok1(test_random(rand32() % 20000, rand32() % 20000, 0, 0, 0)); + + /* Low-cardinality tests */ + for (i = 0; i < count; i++) + ok1(test_random(100, 10, rand32() % 20 + 1, 1, i)); + for (i = 0; i < count; i++) + ok1(test_random(100, rand32() % 200, rand32() % 20 + 1, 1, i)); + for (i = 0; i < count; i++) + ok1(test_random(1000, rand32() % 200, rand32() % 20 + 1, 1, i)); + for (i = 0; i < count; i++) + ok1(test_random(1000, rand32() % 2000, rand32() % 20 + 1, 1, i)); + for (i = 0; i < count; i++) + ok1(test_random(10000, rand32() % 200, rand32() % 20 + 1, 1, i)); + for (i = 0; i < count; i++) + ok1(test_random(10000, rand32() % 2000, rand32() % 20 + 1, 1, i)); + for (i = 0; i < count; i++) + ok1(test_random(rand32() % 20000, rand32() % 20000, rand32() % 20 + 1, 1, i)); + + return exit_status(); +} diff --git a/ccan/bdelta/test/run-myers.c b/ccan/bdelta/test/run-myers.c new file mode 100644 index 00000000..7042e2ce --- /dev/null +++ b/ccan/bdelta/test/run-myers.c @@ -0,0 +1,202 @@ +#include "common.h" + +#define is_digit(c) ((c) >= '0' && (c) <= '9') + +static uint32_t parse_int(const char **sp) +{ + const char *s = *sp; + uint32_t n = 0; + + while (is_digit(*s)) { + n *= 10; + n += *s++ - '0'; + } + + *sp = s; + return n; +} + +static const char *op_name(int op) +{ + switch (op) { + case OP_COPY: + return "copy"; + case OP_SKIP: + return "skip"; + case OP_INSERT: + return "insert"; + + default: + return ""; + } +} + +static const char *verify_csi32( + const unsigned char *patch_start, const unsigned char *patch_end, + const char *expected) +{ + const unsigned char *p = patch_start; + + if (p >= patch_end) + return "Patch type byte missing"; + if (*p++ != PT_CSI32) + return "Patch type byte is not PT_CSI32"; + + for (;;) { + int patch_op; + uint32_t patch_size; + int expected_op; + uint32_t expected_size; + + while (*expected == ' ') + expected++; + if (*expected == '\0') + break; + + if (!csi32_parse_op(&p, patch_end, &patch_op, &patch_size)) { + if (p == patch_end) + return "Patch shorter than expected."; + else + return "Patch contains invalid CSI-32"; + } + + switch (*expected) { + case 'c': + expected_op = OP_COPY; + break; + case 's': + expected_op = OP_SKIP; + break; + case 'i': + expected_op = OP_INSERT; + break; + + default: + diag("verify_csi32: Unexpected character %c", *expected); + return "Syntax error in expected difference"; + } + expected++; + + while (*expected == ' ') + expected++; + + if (patch_op != expected_op) { + diag("verify_csi32: Expected %s, but next op is %s %lu", + op_name(expected_op), op_name(patch_op), (unsigned long)patch_size); + return "Operation mismatch"; + } + + if (expected_op == OP_COPY || expected_op == OP_SKIP) { + if (!is_digit(*expected)) { + diag("verify_csi32: Expected size after %s instruction", + op_name(expected_op)); + return "Syntax error in expected difference"; + } + expected_size = parse_int(&expected); + + if (patch_size != expected_size) { + diag("verify_csi32: Expected %s %lu, but patch says %s %lu", + op_name(expected_op), (unsigned long)expected_size, + op_name(expected_op), (unsigned long)patch_size); + return "Operation size mismatch"; + } + } else { + if (*expected++ != '\'') { + diag("verify_csi32: Expected single-quoted string after insert instruction"); + return "Syntax error in expected difference"; + } + + for (expected_size = 0; ; expected_size++) { + unsigned char c; + + if (*expected == '\'' && *(expected + 1) == '\'') { + c = '\''; + expected += 2; + } else if (*expected == '\'') { + expected++; + break; + } else if (*expected == '\0') { + diag("verify_csi32: Missing end quote"); + return "Syntax error in expected difference"; + } else { + c = *expected++; + } + + if (!(p < patch_end && *p++ == c)) + return "Insertion string mismatch"; + } + + if (patch_size != expected_size) + return "Insertion string mismatch"; + } + } + + if (p != patch_end) + return "Patch longer than expected."; + + return NULL; +} + +static void test_myers(const char *old, const char *new_, const char *expected_difference) +{ + SB patch; + BDELTAcode rc; + const char *verify_msg; + + if (sb_init(&patch) != 0) { + fail("Out of memory"); + return; + } + + rc = diff_myers(old, strlen(old), new_, strlen(new_), &patch); + if (rc != BDELTA_OK) { + fail("test_myers(%s, %s, %s): diff_myers failed: %s", old, new_, expected_difference, bdelta_strerror(rc)); + sb_discard(&patch, NULL, NULL); + return; + } + + verify_msg = verify_csi32(patch.start, patch.cur, expected_difference); + sb_discard(&patch, NULL, NULL); + + if (verify_msg != NULL) { + fail("test_myers(%s, %s, %s): %s", old, new_, expected_difference, verify_msg); + return; + } + + pass("test_myers(%s, %s, %s)", old, new_, expected_difference); +} + +int main(void) +{ + (void) random_string_pair; + + plan_tests(17); + + test_myers("abcabba", "cbabac", "s2 c1 i'b' c2 s1 c1 i'c'"); + test_myers("abcdefg", "abcdefg", "c7"); + test_myers("abcdefg", "abcde", "c5"); + test_myers("abcdefg", "abcdefga", "c7 i'a'"); + test_myers("abbbbbb", "bbbbbb", "s1 c6"); + test_myers("abbbbbb", "bbbbbbb", "s1 c6 i'b'"); + test_myers("bbbbbb", "abbbbbb", "i'a' c6"); + test_myers("bbbbbbb", "abbbbbb", "i'a' c6"); + test_myers("bbbbbb", "abbbbbbb", "i'a' c6 i'b'"); + + test_myers("ac", "ba", "i'b' c1"); + test_myers("ac", "bc", "s1 i'b' c1"); + test_myers("abcd", "cabd", "i'c' c2 s1 c1"); + test_myers("", "abc", "i'abc'"); + test_myers("abc", "", ""); + test_myers("abcd", "d", "s3 c1"); + test_myers("", "", ""); + + /* + * In the event the strings have no characters in common, diff_myers will + * emit a skip of all the characters in the old string. Although it is + * unnecessary, it is tricky to get rid of. bdelta_diff will discard the + * patch anyway, because it's bigger than a literal. + */ + test_myers("aaaaaaa", "bbbbbbb", "s7 i'bbbbbbb'"); + + return exit_status(); +} diff --git a/ccan/bdelta/test/run-trivial.c b/ccan/bdelta/test/run-trivial.c new file mode 100644 index 00000000..8dd06c1f --- /dev/null +++ b/ccan/bdelta/test/run-trivial.c @@ -0,0 +1,67 @@ +#include +#include +#include + +static int test_trivial(const char *old, const char *new_) +{ + void *patch; + size_t patch_size; + BDELTAcode rc; + + void *new2; + size_t new2_size; + + rc = bdelta_diff(old, strlen(old), new_, strlen(new_), &patch, &patch_size); + if (rc != BDELTA_OK) { + bdelta_perror("bdelta_diff", rc); + return 0; + } + + if (patch_size > strlen(new_) + 1) { + fprintf(stderr, "bdelta_diff produced a patch larger than a simple literal emitting the new string.\n"); + return 0; + } + + rc = bdelta_patch(old, strlen(old), patch, patch_size, &new2, &new2_size); + if (rc != BDELTA_OK) { + bdelta_perror("bdelta_patch", rc); + return 0; + } + + if (new2_size != strlen(new_) || strcmp(new2, new_) != 0) { + fprintf(stderr, "patch(old, diff(old, new)) != new\n"); + return 0; + } + + /* Make sure bdelta_diff properly discards unwanted return values. */ + rc = bdelta_diff(old, strlen(old), new_, strlen(new_), NULL, NULL); + if (rc != BDELTA_OK) { + bdelta_perror("bdelta_diff (second time)", rc); + return 0; + } + + free(new2); + free(patch); + return 1; +} + +int main(void) +{ + plan_tests(13); + + ok1(test_trivial("abcabba", "cbabac")); + ok1(test_trivial("aaabbbcdaabcc", "aaabbcdaabeca")); + ok1(test_trivial("aaaaaaaa", "bbbbbbbb")); + ok1(test_trivial("aaaaaaaa", "")); + ok1(test_trivial("", "bbbbbbbb")); + ok1(test_trivial("", "")); + ok1(test_trivial("aaaaaaaa", "aaaaaaaabbbbbbbb")); + ok1(test_trivial("aaaaaaaa", "bbbbbbbbaaaaaaaa")); + ok1(test_trivial("aaaaaaaabbbbbbbb", "aaaaaaaa")); + ok1(test_trivial("aaaaaaaabbbbbbbb", "bbbbbbbb")); + ok1(test_trivial("aaaaaaaabbbbbbbb", "bbbbbbbb")); + ok1(test_trivial("abababababababab", "babababababababa")); + ok1(test_trivial("aababcabcdabcde", "aababcabcdabcde")); + + return exit_status(); +} -- 2.39.2