1 /* Because this code is derived from the 4.3BSD compress source:
4 * Copyright (c) 1985, 1986 The Regents of the University of California.
7 * This code is derived from software contributed to Berkeley by
8 * James A. Woods, derived from original work by Spencer Thomas
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * This version is for use with STREAMS under SunOS 4.x.
43 * $Id: bsd-comp.c,v 1.3 1994/08/31 23:58:53 paulus Exp $
46 #include <sys/param.h>
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/kmem_alloc.h>
50 #include <net/ppp_str.h>
53 #include <net/ppp-comp.h>
56 * PPP "BSD compress" compression
57 * The differences between this compression and the classic BSD LZW
58 * source are obvious from the requirement that the classic code worked
59 * with files while this handles arbitrarily long streams that
60 * are broken into packets. They are:
62 * When the code size expands, a block of junk is not emitted by
63 * the compressor and not expected by the decompressor.
65 * New codes are not necessarily assigned every time an old
66 * code is output by the compressor. This is because a packet
67 * end forces a code to be emitted, but does not imply that a
68 * new sequence has been seen.
70 * The compression ratio is checked at the first end of a packet
71 * after the appropriate gap. Besides simplifying and speeding
72 * things up, this makes it more likely that the transmitter
73 * and receiver will agree when the dictionary is cleared when
74 * compression is not going well.
78 * A dictionary for doing BSD compress.
81 int totlen; /* length of this structure */
82 u_int hsize; /* size of the hash table */
83 u_char hshift; /* used in hash function */
84 u_char n_bits; /* current bits/code */
88 u_int maxmaxcode; /* largest valid code */
89 u_int max_ent; /* largest code in use */
90 u_long seqno; /* # of last byte of packet */
91 u_long in_count; /* uncompressed bytes */
92 u_long bytes_out; /* compressed bytes */
93 u_long ratio; /* recent compression ratio */
94 u_long checkpoint; /* when to next check the ratio */
95 int clear_count; /* times dictionary cleared */
96 int incomp_count; /* incompressible packets */
97 u_short *lens; /* array of lengths of codes */
99 union { /* hash value */
102 #ifdef BSD_LITTLE_ENDIAN
103 u_short prefix; /* preceding code */
104 u_char suffix; /* last character of new code */
108 u_char suffix; /* last character of new code */
109 u_short prefix; /* preceding code */
113 u_short codem1; /* output of hash table -1 */
114 u_short cptr; /* map code to hash table entry */
118 #define BSD_OVHD 3 /* BSD compress overhead/packet */
119 #define MIN_BSD_BITS 9
120 #define BSD_INIT_BITS MIN_BSD_BITS
121 #define MAX_BSD_BITS 15
123 static void *bsd_comp_alloc __P((u_char *options, int opt_len));
124 static void *bsd_decomp_alloc __P((u_char *options, int opt_len));
125 static void bsd_free __P((void *state));
126 static int bsd_comp_init __P((void *state, u_char *options, int opt_len,
127 int unit, int debug));
128 static int bsd_decomp_init __P((void *state, u_char *options, int opt_len,
129 int unit, int mru, int debug));
130 static int bsd_compress __P((void *state, mblk_t **mret,
131 mblk_t *mp, int slen, int maxolen));
132 static void bsd_incomp __P((void *state, mblk_t *dmsg));
133 static mblk_t *bsd_decompress __P((void *state, mblk_t *cmp, int hdroff));
134 static void bsd_reset __P((void *state));
137 * Procedures exported to ppp_comp.c.
139 struct compressor ppp_bsd_compress = {
140 0x21, /* compress_proto */
141 bsd_comp_alloc, /* comp_alloc */
142 bsd_free, /* comp_free */
143 bsd_comp_init, /* comp_init */
144 bsd_reset, /* comp_reset */
145 bsd_compress, /* compress */
146 bsd_decomp_alloc, /* decomp_alloc */
147 bsd_free, /* decomp_free */
148 bsd_decomp_init, /* decomp_init */
149 bsd_reset, /* decomp_reset */
150 bsd_decompress, /* decompress */
151 bsd_incomp, /* incomp */
155 * the next two codes should not be changed lightly, as they must not
156 * lie within the contiguous general code space.
158 #define CLEAR 256 /* table clear output code */
159 #define FIRST 257 /* first free entry */
162 #define MAXCODE(b) ((1 << (b)) - 1)
163 #define BADCODEM1 MAXCODE(MAX_BSD_BITS);
165 #define BSD_HASH(prefix,suffix,hshift) ((((u_long)(suffix)) << (hshift)) \
167 #define BSD_KEY(prefix,suffix) ((((u_long)(suffix)) << 16) + (u_long)(prefix))
169 #define CHECK_GAP 10000 /* Ratio check interval */
173 * clear the dictionary
180 db->max_ent = FIRST-1;
181 db->n_bits = BSD_INIT_BITS;
185 db->incomp_count = 0;
186 db->checkpoint = CHECK_GAP;
190 * If the dictionary is full, then see if it is time to reset it.
192 * Compute the compression ratio using fixed-point arithmetic
193 * with 8 fractional bits.
195 * Since we have an infinite stream instead of a single file,
196 * watch only the local compression ratio.
198 * Since both peers must reset the dictionary at the same time even in
199 * the absence of CLEAR codes (while packets are incompressible), they
200 * must compute the same ratio.
202 static int /* 1=output CLEAR */
208 if (db->in_count >= db->checkpoint) {
209 /* age the ratio by limiting the size of the counts */
210 if (db->in_count >= 0x7fffff
211 || db->bytes_out >= 0x7fffff) {
212 db->in_count -= db->in_count/4;
213 db->bytes_out -= db->bytes_out/4;
216 db->checkpoint = db->in_count + CHECK_GAP;
218 if (db->max_ent >= db->maxmaxcode) {
219 /* Reset the dictionary only if the ratio is worse,
220 * or if it looks as if it has been poisoned
221 * by incompressible data.
223 * This does not overflow, because
224 * db->in_count <= 0x7fffff.
226 new_ratio = db->in_count<<8;
227 if (db->bytes_out != 0)
228 new_ratio /= db->bytes_out;
230 if (new_ratio < db->ratio || new_ratio < 256) {
234 db->ratio = new_ratio;
241 * Reset state, as on a CCP ResetReq.
247 struct bsd_db *db = (struct bsd_db *) state;
255 * Allocate space for a (de) compressor.
258 bsd_alloc(options, opt_len, decomp)
263 u_int newlen, hsize, hshift, maxmaxcode;
266 if (opt_len != 3 || options[0] != 0x21 || options[1] != 3)
270 case 9: /* needs 82152 for both directions */
271 case 10: /* needs 84144 */
272 case 11: /* needs 88240 */
273 case 12: /* needs 96432 */
277 case 13: /* needs 176784 */
281 case 14: /* needs 353744 */
285 case 15: /* needs 691440 */
289 case 16: /* needs 1366160--far too much, */
290 /* hsize = 69001; */ /* and 69001 is too big for cptr */
291 /* hshift = 8; */ /* in struct bsd_db */
297 maxmaxcode = MAXCODE(bits);
298 newlen = sizeof(*db) + (hsize-1) * (sizeof(db->dict[0]));
299 db = (struct bsd_db *) kmem_alloc(newlen, KMEM_NOSLEEP);
302 bzero(db, sizeof(*db) - sizeof(db->dict));
307 db->lens = (u_short *) kmem_alloc((maxmaxcode+1) * sizeof(db->lens[0]),
310 kmem_free(db, newlen);
318 db->maxmaxcode = maxmaxcode;
327 struct bsd_db *db = (struct bsd_db *) state;
330 kmem_free(db->lens, (db->maxmaxcode+1) * sizeof(db->lens[0]));
331 kmem_free(db, db->totlen);
335 bsd_comp_alloc(options, opt_len)
339 return bsd_alloc(options, opt_len, 0);
343 bsd_decomp_alloc(options, opt_len)
347 return bsd_alloc(options, opt_len, 1);
351 * Initialize the database.
354 bsd_init(db, options, opt_len, unit, mru, debug, decomp)
357 int opt_len, unit, mru, debug, decomp;
361 if (opt_len != 3 || options[0] != 0x21 || options[1] != 3
362 || MAXCODE(options[2]) != db->maxmaxcode
363 || decomp && db->lens == NULL)
373 db->dict[--i].codem1 = BADCODEM1;
374 db->dict[i].cptr = 0;
379 db->clear_count = -1;
389 bsd_comp_init(state, options, opt_len, unit, debug)
392 int opt_len, unit, debug;
394 return bsd_init((struct bsd_db *) state, options, opt_len,
399 bsd_decomp_init(state, options, opt_len, unit, mru, debug)
402 int opt_len, unit, mru, debug;
404 return bsd_init((struct bsd_db *) state, options, opt_len,
405 unit, mru, debug, 1);
412 * Assume the protocol is known to be >= 0x21 and < 0xff.
413 * One change from the BSD compress command is that when the
414 * code size expands, we do not output a bunch of padding.
416 static int /* new slen */
417 bsd_compress(state, mret, mp, slen, maxolen)
419 mblk_t **mret; /* return compressed mbuf chain here */
420 mblk_t *mp; /* from here */
421 int slen; /* uncompressed length */
422 int maxolen; /* max compressed length */
424 struct bsd_db *db = (struct bsd_db *) state;
425 int hshift = db->hshift;
426 u_int max_ent = db->max_ent;
427 u_int n_bits = db->n_bits;
430 struct bsd_dict *dictp;
433 long hval, disp, ent;
441 #define PUTBYTE(v) { \
444 if (wptr >= cp_end) { \
449 cp_end = m->b_datap->db_lim; \
457 #define OUTPUT(ent) { \
459 accm |= ((ent) << bitno); \
461 PUTBYTE(accm >> 24); \
464 } while (bitno <= 24); \
467 /* Don't generate compressed packets which are larger than
468 the uncompressed packet. */
472 /* Allocate enough message blocks to give maxolen total space. */
474 for (olen = maxolen; olen > 0; ) {
475 m = allocb((olen < 4096? olen: 4096), BPRI_MED);
485 olen -= m->b_datap->db_lim - m->b_wptr;
490 if ((m = *mret) != NULL) {
492 cp_end = m->b_datap->db_lim;
494 wptr = cp_end = NULL;
498 * Copy the PPP header over, changing the protocol,
499 * and install the 3-byte sequence number.
501 slen += db->seqno - PPP_HDRLEN + 1;
504 wptr[0] = rptr[0]; /* assumes the ppp header is */
505 wptr[1] = rptr[1]; /* all in one mblk */
506 wptr[2] = 0; /* change the protocol */
511 wptr += PPP_HDRLEN + BSD_OVHD;
514 /* start with the protocol byte */
517 slen = mp->b_wptr - rptr;
518 db->in_count += slen + 1;
525 slen = np->b_wptr - rptr;
528 continue; /* handle 0-length buffers */
529 db->in_count += slen;
534 fcode = BSD_KEY(ent, c);
535 hval = BSD_HASH(ent, c, hshift);
536 dictp = &db->dict[hval];
538 /* Validate and then check the entry. */
539 if (dictp->codem1 >= max_ent)
541 if (dictp->f.fcode == fcode) {
542 ent = dictp->codem1+1;
543 continue; /* found (prefix,suffix) */
546 /* continue probing until a match or invalid entry */
547 disp = (hval == 0) ? 1 : hval;
550 if (hval >= db->hsize)
552 dictp = &db->dict[hval];
553 if (dictp->codem1 >= max_ent)
555 } while (dictp->f.fcode != fcode);
556 ent = dictp->codem1+1; /* finally found (prefix,suffix) */
560 OUTPUT(ent); /* output the prefix */
562 /* code -> hashtable */
563 if (max_ent < db->maxmaxcode) {
564 struct bsd_dict *dictp2;
565 /* expand code size if needed */
566 if (max_ent >= MAXCODE(n_bits))
567 db->n_bits = ++n_bits;
569 /* Invalidate old hash table entry using
570 * this code, and then take it over.
572 dictp2 = &db->dict[max_ent+1];
573 if (db->dict[dictp2->cptr].codem1 == max_ent)
574 db->dict[dictp2->cptr].codem1 = BADCODEM1;
576 dictp->codem1 = max_ent;
577 dictp->f.fcode = fcode;
579 db->max_ent = ++max_ent;
584 OUTPUT(ent); /* output the last code */
585 db->bytes_out += olen;
588 OUTPUT(CLEAR); /* do not count the CLEAR */
590 /* Pad dribble bits of last code with ones.
591 * Do not emit a completely useless byte of ones.
594 PUTBYTE((accm | (0xff << (bitno-8))) >> 24);
596 /* Increase code size if we would have without the packet
597 * boundary and as the decompressor will.
599 if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode)
602 if (olen + PPP_HDRLEN + BSD_OVHD > maxolen && *mret != NULL) {
603 /* throw away the compressed stuff if it is longer than uncompressed */
606 } else if (wptr != NULL) {
614 return olen + PPP_HDRLEN + BSD_OVHD;
621 * Update the "BSD Compress" dictionary on the receiver for
622 * incompressible data by pretending to compress the incoming data.
623 * The protocol is assumed to be < 0x100.
626 bsd_incomp(state, dmsg)
630 struct bsd_db *db = (struct bsd_db *) state;
631 u_int hshift = db->hshift;
632 u_int max_ent = db->max_ent;
633 u_int n_bits = db->n_bits;
634 struct bsd_dict *dictp;
646 db->in_count++; /* count the protocol as 1 byte */
648 ent = rptr[3]; /* get the protocol */
651 slen = dmsg->b_wptr - rptr;
657 continue; /* skip zero-length buffers */
659 db->in_count += slen;
664 fcode = BSD_KEY(ent, c);
665 hval = BSD_HASH(ent, c, hshift);
666 dictp = &db->dict[hval];
668 /* validate and then check the entry */
669 if (dictp->codem1 >= max_ent)
671 if (dictp->f.fcode == fcode) {
672 ent = dictp->codem1+1;
673 continue; /* found (prefix,suffix) */
676 /* continue probing until a match or invalid entry */
677 disp = (hval == 0) ? 1 : hval;
680 if (hval >= db->hsize)
682 dictp = &db->dict[hval];
683 if (dictp->codem1 >= max_ent)
685 } while (dictp->f.fcode != fcode);
686 ent = dictp->codem1+1;
687 continue; /* finally found (prefix,suffix) */
689 nomatch: /* output (count) the prefix */
692 /* code -> hashtable */
693 if (max_ent < db->maxmaxcode) {
694 struct bsd_dict *dictp2;
695 /* expand code size if needed */
696 if (max_ent >= MAXCODE(n_bits))
697 db->n_bits = ++n_bits;
699 /* Invalidate previous hash table entry
700 * assigned this code, and then take it over.
702 dictp2 = &db->dict[max_ent+1];
703 if (db->dict[dictp2->cptr].codem1 == max_ent)
704 db->dict[dictp2->cptr].codem1 = BADCODEM1;
706 dictp->codem1 = max_ent;
707 dictp->f.fcode = fcode;
709 db->max_ent = ++max_ent;
710 db->lens[max_ent] = db->lens[ent]+1;
713 } while (--slen != 0);
715 bitno += n_bits; /* output (count) the last code */
716 db->bytes_out += bitno/8;
719 /* Increase code size if we would have without the packet
720 * boundary and as the decompressor will.
722 if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode)
728 * Decompress "BSD Compress"
730 static mblk_t * /* 0=failed, so zap CCP */
731 bsd_decompress(state, cmsg, hdroff)
736 struct bsd_db *db = (struct bsd_db *) state;
737 u_int max_ent = db->max_ent;
739 u_int bitno = 32; /* 1st valid bit in accm */
740 u_int n_bits = db->n_bits;
741 u_int tgtbitno = 32-n_bits; /* bitno when we have a code */
742 struct bsd_dict *dictp;
743 int explen, i, seq, len;
744 u_int incode, oldcode, finchar;
745 u_char *p, *rptr, *wptr;
748 int dlen, space, codelen;
751 * Get at least the BSD Compress header in the first buffer
754 if (rptr + PPP_HDRLEN + BSD_OVHD <= cmsg->b_wptr) {
755 if (!pullupmsg(cmsg, PPP_HDRLEN + BSD_OVHD + 1)) {
757 printf("bsd_decomp%d: failed to pullup\n", db->unit);
764 * Save the address/control from the PPP header
765 * and then get the sequence number.
767 adrs = PPP_ADDRESS(rptr);
768 ctrl = PPP_CONTROL(rptr);
770 seq = (rptr[0] << 16) + (rptr[1] << 8) + rptr[2];
772 len = cmsg->b_wptr - rptr;
775 * Check the sequence number and give up if the length is nonsense.
776 * The check is against mru+1 because we compress one byte of protocol.
778 explen = (seq - db->seqno) & 0xffffff;
780 if (explen > db->mru + 1 || explen < 1) {
782 printf("bsd_decomp%d: bad length 0x%x\n", db->unit, explen);
786 /* allocate enough message blocks for the decompressed message */
787 dlen = explen + PPP_HDRLEN - 1 + hdroff;
788 /* XXX assume decompressed packet fits in a single block */
789 dmsg = allocb(dlen, BPRI_HI);
791 /* give up if cannot get an uncompressed buffer */
796 /* Fill in the ppp header, but not the last byte of the protocol
797 (that comes from the decompressed data). */
801 wptr += PPP_HDRLEN - 1;
802 space = dmsg->b_datap->db_lim - wptr;
804 db->bytes_out += len;
810 if (!cmsg) { /* quit at end of message */
814 printf("bsd_decomp%d: lost %d bytes\n",
821 len = cmsg->b_wptr - rptr;
822 db->bytes_out += len;
823 continue; /* handle 0-length buffers */
826 /* Accumulate bytes until we have a complete code.
827 * Then get the next code, relying on the 32-bit,
828 * unsigned accm to mask the result.
831 accm |= *rptr++ << bitno;
833 if (tgtbitno < bitno)
835 incode = accm >> tgtbitno;
839 if (incode == CLEAR) {
840 /* The dictionary must only be cleared at
841 * the end of a packet. But there could be an
842 * empty message block at the end.
844 if (len > 0 || cmsg->b_cont != 0) {
846 len += msgdsize(cmsg->b_cont);
850 printf("bsd_decomp%d: bad CLEAR\n", db->unit);
859 /* Special case for KwKwK string. */
860 if (incode > max_ent) {
861 if (incode > max_ent+2 || incode > db->maxmaxcode
862 || oldcode == CLEAR) {
865 printf("bsd_decomp%d: bad code 0x%x oldcode=0x%x ",
866 db->unit, incode, oldcode);
867 printf("max_ent=0x%x dlen=%d seqno=%d\n",
868 max_ent, dlen, db->seqno);
877 codelen = db->lens[finchar];
882 printf("bsd_decomp%d: ran out of buffer\n", db->unit);
886 /* decode code and install in decompressed buffer */
891 len += msgdsize(cmsg->b_cont);
892 printf("bsd_decomp%d: overran output by %d with %d bytes left\n",
893 db->unit, -space, len);
898 p = (wptr += codelen);
899 while (finchar > LAST) {
900 dictp = &db->dict[db->dict[finchar].cptr];
905 printf("bsd_decomp%d: fell off end of chain ", db->unit);
906 printf("0x%x at 0x%x by 0x%x, max_ent=0x%x\n",
907 incode, finchar, db->dict[finchar].cptr, max_ent);
910 if (dictp->codem1 != finchar-1) {
912 printf("bsd_decomp%d: bad code chain 0x%x finchar=0x%x ",
913 db->unit, incode, finchar);
914 printf("oldcode=0x%x cptr=0x%x codem1=0x%x\n", oldcode,
915 db->dict[finchar].cptr, dictp->codem1);
919 *--p = dictp->f.hs.suffix;
920 finchar = dictp->f.hs.prefix;
926 printf("bsd_decomp%d: short by %d after code 0x%x, max_ent=0x%x\n",
927 db->unit, codelen, incode, max_ent);
930 if (incode > max_ent) { /* the KwKwK case again */
936 * If not first code in a packet, and
937 * if not out of code space, then allocate a new code.
939 * Keep the hash table correct so it can be used
940 * with uncompressed packets.
942 if (oldcode != CLEAR && max_ent < db->maxmaxcode) {
943 struct bsd_dict *dictp2;
947 fcode = BSD_KEY(oldcode,finchar);
948 hval = BSD_HASH(oldcode,finchar,db->hshift);
949 dictp = &db->dict[hval];
951 /* look for a free hash table entry */
952 if (dictp->codem1 < max_ent) {
953 disp = (hval == 0) ? 1 : hval;
956 if (hval >= db->hsize)
958 dictp = &db->dict[hval];
959 } while (dictp->codem1 < max_ent);
962 /* Invalidate previous hash table entry
963 * assigned this code, and then take it over
965 dictp2 = &db->dict[max_ent+1];
966 if (db->dict[dictp2->cptr].codem1 == max_ent) {
967 db->dict[dictp2->cptr].codem1 = BADCODEM1;
970 dictp->codem1 = max_ent;
971 dictp->f.fcode = fcode;
973 db->max_ent = ++max_ent;
974 db->lens[max_ent] = db->lens[oldcode]+1;
976 /* Expand code size if needed. */
977 if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode) {
978 db->n_bits = ++n_bits;
979 tgtbitno = 32-n_bits;
986 /* fail on packets with bad lengths/sequence numbers */
992 /* Keep the checkpoint right so that incompressible packets
993 * clear the dictionary at the right times.
995 db->in_count += explen;
996 if (bsd_check(db) && db->debug) {
997 printf("bsd_decomp%d: peer should have cleared dictionary\n",