xref: /haiku/src/bin/unzip/unshrink.c (revision 17049c451a91f427aec94b944b75876b611103e7)
1 /*
2   Copyright (c) 1990-2001 Info-ZIP.  All rights reserved.
3 
4   See the accompanying file LICENSE, version 2000-Apr-09 or later
5   (the contents of which are also included in unzip.h) for terms of use.
6   If, for some reason, all these files are missing, the Info-ZIP license
7   also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8 */
9 /*---------------------------------------------------------------------------
10 
11   unshrink.c                     version 1.21                     23 Nov 95
12 
13 
14        NOTE:  This code may or may not infringe on the so-called "Welch
15        patent" owned by Unisys.  (From reading the patent, it appears
16        that a pure LZW decompressor is *not* covered, but this claim has
17        not been tested in court, and Unisys is reported to believe other-
18        wise.)  It is therefore the responsibility of the user to acquire
19        whatever license(s) may be required for legal use of this code.
20 
21        THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
22        IN VIOLATION OF APPLICABLE PATENT LAW.
23 
24 
25   Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
26   up to 13 bits; in addition, there is provision for partial clearing of
27   leaf nodes.  PKWARE uses the special code 256 (decimal) to indicate a
28   change in code size or a partial clear of the code tree:  256,1 for the
29   former and 256,2 for the latter.  [Note that partial clearing can "orphan"
30   nodes:  the parent-to-be can be cleared before its new child is added,
31   but the child is added anyway (as an orphan, as though the parent still
32   existed).  When the tree fills up to the point where the parent node is
33   reused, the orphan is effectively "adopted."  Versions prior to 1.05 were
34   affected more due to greater use of pointers (to children and siblings
35   as well as parents).]
36 
37   This replacement version of unshrink.c was written from scratch.  It is
38   based only on the algorithms described in Mark Nelson's _The Data Compres-
39   sion Book_ and in Terry Welch's original paper in the June 1984 issue of
40   IEEE _Computer_; no existing source code, including any in Nelson's book,
41   was used.
42 
43   Memory requirements have been reduced in this version and are now no more
44   than the original Sam Smith code.  This is still larger than any of the
45   other algorithms:  at a minimum, 8K+8K+16K (stack+values+parents) assuming
46   16-bit short ints, and this does not even include the output buffer (the
47   other algorithms leave the uncompressed data in the work area, typically
48   called slide[]).  For machines with a 64KB data space this is a problem,
49   particularly when text conversion is required and line endings have more
50   than one character.  UnZip's solution is to use two roughly equal halves
51   of outbuf for the ASCII conversion in such a case; the "unshrink" argument
52   to flush() signals that this is the case.
53 
54   For large-memory machines, a second outbuf is allocated for translations,
55   but only if unshrinking and only if translations are required.
56 
57               | binary mode  |        text mode
58     ---------------------------------------------------
59     big mem   |  big outbuf  | big outbuf + big outbuf2  <- malloc'd here
60     small mem | small outbuf | half + half small outbuf
61 
62   Copyright 1994, 1995 Greg Roelofs.  See the accompanying file "COPYING"
63   in UnZip 5.20 (or later) source or binary distributions.
64 
65   ---------------------------------------------------------------------------*/
66 
67 
68 #define __UNSHRINK_C    /* identifies this source module */
69 #define UNZIP_INTERNAL
70 #include "unzip.h"      /* defines LZW_CLEAN by default */
71 
72 
73 #ifndef LZW_CLEAN
74 
75 static void  partial_clear  OF((__GPRO));
76 
77 #ifdef DEBUG
78 #  define OUTDBG(c) \
79    if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
80 #else
81 #  define OUTDBG(c)
82 #endif
83 
84 /* HSIZE is defined as 2^13 (8192) in unzip.h */
85 #define BOGUSCODE  256
86 #define FLAG_BITS  parent        /* upper bits of parent[] used as flag bits */
87 #define CODE_MASK  (HSIZE - 1)   /* 0x1fff (lower bits are parent's index) */
88 #define FREE_CODE  HSIZE         /* 0x2000 (code is unused or was cleared) */
89 #define HAS_CHILD  (HSIZE << 1)  /* 0x4000 (code has a child--do not clear) */
90 
91 #define parent G.area.shrink.Parent
92 #define Value  G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
93 #define stack  G.area.shrink.Stack
94 
95 
96 /***********************/
97 /* Function unshrink() */
98 /***********************/
99 
unshrink(__G)100 int unshrink(__G)
101      __GDEF
102 {
103     int offset = (HSIZE - 1);
104     uch *stacktop = stack + offset;
105     register uch *newstr;
106     int codesize=9, len, KwKwK, error;
107     shrint code, oldcode, freecode, curcode;
108     shrint lastfreecode;
109     unsigned int outbufsiz;
110 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
111     /* Normally realbuf and outbuf will be the same.  However, if the data
112      * are redirected to a large memory buffer, realbuf will point to the
113      * new location while outbuf will remain pointing to the malloc'd
114      * memory buffer. */
115     uch *realbuf = G.outbuf;
116 #else
117 #   define realbuf G.outbuf
118 #endif
119 
120 
121 /*---------------------------------------------------------------------------
122     Initialize various variables.
123   ---------------------------------------------------------------------------*/
124 
125     lastfreecode = BOGUSCODE;
126 
127 #ifndef VMS     /* VMS uses its own buffer scheme for textmode flush(). */
128 #ifndef SMALL_MEM
129     /* non-memory-limited machines:  allocate second (large) buffer for
130      * textmode conversion in flush(), but only if needed */
131     if (G.pInfo->textmode && !G.outbuf2 &&
132         (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
133         return PK_MEM3;
134 #endif
135 #endif /* !VMS */
136 
137     for (code = 0;  code < BOGUSCODE;  ++code) {
138         Value[code] = (uch)code;
139         parent[code] = BOGUSCODE;
140     }
141     for (code = BOGUSCODE+1;  code < HSIZE;  ++code)
142         parent[code] = FREE_CODE;
143 
144 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
145     if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
146         realbuf = G.redirect_buffer;
147         outbufsiz = (unsigned)G.redirect_size;
148     } else
149 #endif
150 #ifdef DLL
151     if (G.pInfo->textmode && !G.redirect_data)
152 #else
153     if (G.pInfo->textmode)
154 #endif
155         outbufsiz = RAWBUFSIZ;
156     else
157         outbufsiz = OUTBUFSIZ;
158     G.outptr = realbuf;
159     G.outcnt = 0L;
160 
161 /*---------------------------------------------------------------------------
162     Get and output first code, then loop over remaining ones.
163   ---------------------------------------------------------------------------*/
164 
165     READBITS(codesize, oldcode)
166     if (!G.zipeof) {
167         *G.outptr++ = (uch)oldcode;
168         OUTDBG((uch)oldcode)
169         ++G.outcnt;
170     }
171 
172     do {
173         READBITS(codesize, code)
174         if (G.zipeof)
175             break;
176         if (code == BOGUSCODE) {   /* possible to have consecutive escapes? */
177             READBITS(codesize, code)
178             if (code == 1) {
179                 ++codesize;
180                 Trace((stderr, " (codesize now %d bits)\n", codesize));
181             } else if (code == 2) {
182                 Trace((stderr, " (partial clear code)\n"));
183                 partial_clear(__G);   /* clear leafs (nodes with no children) */
184                 Trace((stderr, " (done with partial clear)\n"));
185                 lastfreecode = BOGUSCODE;  /* reset start of free-node search */
186             }
187             continue;
188         }
189 
190     /*-----------------------------------------------------------------------
191         Translate code:  traverse tree from leaf back to root.
192       -----------------------------------------------------------------------*/
193 
194         newstr = stacktop;
195         curcode = code;
196 
197         if (parent[curcode] == FREE_CODE) {
198             /* or (FLAG_BITS[curcode] & FREE_CODE)? */
199             KwKwK = TRUE;
200             Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
201               oldcode));
202             --newstr;   /* last character will be same as first character */
203             curcode = oldcode;
204         } else
205             KwKwK = FALSE;
206 
207         do {
208             *newstr-- = Value[curcode];
209             curcode = (shrint)(parent[curcode] & CODE_MASK);
210         } while (curcode != BOGUSCODE);
211 
212         len = (int)(stacktop - newstr++);
213         if (KwKwK)
214             *stacktop = *newstr;
215 
216     /*-----------------------------------------------------------------------
217         Write expanded string in reverse order to output buffer.
218       -----------------------------------------------------------------------*/
219 
220         Trace((stderr, "code %4d; oldcode %4d; char %3d (%c); string [", code,
221           oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr));
222 
223         {
224             register uch *p;
225 
226             for (p = newstr;  p < newstr+len;  ++p) {
227                 *G.outptr++ = *p;
228                 OUTDBG(*p)
229                 if (++G.outcnt == outbufsiz) {
230                     Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
231                     if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
232                         Trace((stderr, "unshrink:  flush() error (%d)\n",
233                           error));
234                         return error;
235                     }
236                     G.outptr = realbuf;
237                     G.outcnt = 0L;
238                     Trace((stderr, "done with flush()\n"));
239                 }
240             }
241         }
242 
243     /*-----------------------------------------------------------------------
244         Add new leaf (first character of newstr) to tree as child of oldcode.
245       -----------------------------------------------------------------------*/
246 
247         /* search for freecode */
248         freecode = (shrint)(lastfreecode + 1);
249         /* add if-test before loop for speed? */
250         while (parent[freecode] != FREE_CODE)
251             ++freecode;
252         lastfreecode = freecode;
253         Trace((stderr, "]; newcode %d\n", freecode));
254 
255         Value[freecode] = *newstr;
256         parent[freecode] = oldcode;
257         oldcode = code;
258 
259     } while (!G.zipeof);
260 
261 /*---------------------------------------------------------------------------
262     Flush any remaining data and return to sender...
263   ---------------------------------------------------------------------------*/
264 
265     if (G.outcnt > 0L) {
266         Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
267         if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
268             Trace((stderr, "unshrink:  flush() error (%d)\n", error));
269             return error;
270         }
271         Trace((stderr, "done with flush()\n"));
272     }
273 
274     return PK_OK;
275 
276 } /* end function unshrink() */
277 
278 
279 
280 
281 
282 /****************************/
283 /* Function partial_clear() */      /* no longer recursive... */
284 /****************************/
285 
partial_clear(__G)286 static void partial_clear(__G)
287      __GDEF
288 {
289     register shrint code;
290 
291     /* clear all nodes which have no children (i.e., leaf nodes only) */
292 
293     /* first loop:  mark each parent as such */
294     for (code = BOGUSCODE+1;  code < HSIZE;  ++code) {
295         register shrint cparent = (shrint)(parent[code] & CODE_MASK);
296 
297         if (cparent > BOGUSCODE && cparent != FREE_CODE)
298             FLAG_BITS[cparent] |= HAS_CHILD;   /* set parent's child-bit */
299     }
300 
301     /* second loop:  clear all nodes *not* marked as parents; reset flag bits */
302     for (code = BOGUSCODE+1;  code < HSIZE;  ++code) {
303         if (FLAG_BITS[code] & HAS_CHILD)    /* just clear child-bit */
304             FLAG_BITS[code] &= ~HAS_CHILD;
305         else {                              /* leaf:  lose it */
306             Trace((stderr, "%d\n", code));
307             parent[code] = FREE_CODE;
308         }
309     }
310 
311     return;
312 }
313 
314 #endif /* !LZW_CLEAN */
315