1 /*
2 Copyright (c) 1990-2001 Info-ZIP. All rights reserved.
3
4 See the accompanying file LICENSE, version 2000-Apr-09 or later
5 (the contents of which are also included in unzip.h) for terms of use.
6 If, for some reason, all these files are missing, the Info-ZIP license
7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
8 */
9 /*---------------------------------------------------------------------------
10
11 unshrink.c version 1.21 23 Nov 95
12
13
14 NOTE: This code may or may not infringe on the so-called "Welch
15 patent" owned by Unisys. (From reading the patent, it appears
16 that a pure LZW decompressor is *not* covered, but this claim has
17 not been tested in court, and Unisys is reported to believe other-
18 wise.) It is therefore the responsibility of the user to acquire
19 whatever license(s) may be required for legal use of this code.
20
21 THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
22 IN VIOLATION OF APPLICABLE PATENT LAW.
23
24
25 Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
26 up to 13 bits; in addition, there is provision for partial clearing of
27 leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a
28 change in code size or a partial clear of the code tree: 256,1 for the
29 former and 256,2 for the latter. [Note that partial clearing can "orphan"
30 nodes: the parent-to-be can be cleared before its new child is added,
31 but the child is added anyway (as an orphan, as though the parent still
32 existed). When the tree fills up to the point where the parent node is
33 reused, the orphan is effectively "adopted." Versions prior to 1.05 were
34 affected more due to greater use of pointers (to children and siblings
35 as well as parents).]
36
37 This replacement version of unshrink.c was written from scratch. It is
38 based only on the algorithms described in Mark Nelson's _The Data Compres-
39 sion Book_ and in Terry Welch's original paper in the June 1984 issue of
40 IEEE _Computer_; no existing source code, including any in Nelson's book,
41 was used.
42
43 Memory requirements have been reduced in this version and are now no more
44 than the original Sam Smith code. This is still larger than any of the
45 other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming
46 16-bit short ints, and this does not even include the output buffer (the
47 other algorithms leave the uncompressed data in the work area, typically
48 called slide[]). For machines with a 64KB data space this is a problem,
49 particularly when text conversion is required and line endings have more
50 than one character. UnZip's solution is to use two roughly equal halves
51 of outbuf for the ASCII conversion in such a case; the "unshrink" argument
52 to flush() signals that this is the case.
53
54 For large-memory machines, a second outbuf is allocated for translations,
55 but only if unshrinking and only if translations are required.
56
57 | binary mode | text mode
58 ---------------------------------------------------
59 big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here
60 small mem | small outbuf | half + half small outbuf
61
62 Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING"
63 in UnZip 5.20 (or later) source or binary distributions.
64
65 ---------------------------------------------------------------------------*/
66
67
68 #define __UNSHRINK_C /* identifies this source module */
69 #define UNZIP_INTERNAL
70 #include "unzip.h" /* defines LZW_CLEAN by default */
71
72
73 #ifndef LZW_CLEAN
74
75 static void partial_clear OF((__GPRO));
76
77 #ifdef DEBUG
78 # define OUTDBG(c) \
79 if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
80 #else
81 # define OUTDBG(c)
82 #endif
83
84 /* HSIZE is defined as 2^13 (8192) in unzip.h */
85 #define BOGUSCODE 256
86 #define FLAG_BITS parent /* upper bits of parent[] used as flag bits */
87 #define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */
88 #define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */
89 #define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */
90
91 #define parent G.area.shrink.Parent
92 #define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
93 #define stack G.area.shrink.Stack
94
95
96 /***********************/
97 /* Function unshrink() */
98 /***********************/
99
unshrink(__G)100 int unshrink(__G)
101 __GDEF
102 {
103 int offset = (HSIZE - 1);
104 uch *stacktop = stack + offset;
105 register uch *newstr;
106 int codesize=9, len, KwKwK, error;
107 shrint code, oldcode, freecode, curcode;
108 shrint lastfreecode;
109 unsigned int outbufsiz;
110 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
111 /* Normally realbuf and outbuf will be the same. However, if the data
112 * are redirected to a large memory buffer, realbuf will point to the
113 * new location while outbuf will remain pointing to the malloc'd
114 * memory buffer. */
115 uch *realbuf = G.outbuf;
116 #else
117 # define realbuf G.outbuf
118 #endif
119
120
121 /*---------------------------------------------------------------------------
122 Initialize various variables.
123 ---------------------------------------------------------------------------*/
124
125 lastfreecode = BOGUSCODE;
126
127 #ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */
128 #ifndef SMALL_MEM
129 /* non-memory-limited machines: allocate second (large) buffer for
130 * textmode conversion in flush(), but only if needed */
131 if (G.pInfo->textmode && !G.outbuf2 &&
132 (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
133 return PK_MEM3;
134 #endif
135 #endif /* !VMS */
136
137 for (code = 0; code < BOGUSCODE; ++code) {
138 Value[code] = (uch)code;
139 parent[code] = BOGUSCODE;
140 }
141 for (code = BOGUSCODE+1; code < HSIZE; ++code)
142 parent[code] = FREE_CODE;
143
144 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
145 if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
146 realbuf = G.redirect_buffer;
147 outbufsiz = (unsigned)G.redirect_size;
148 } else
149 #endif
150 #ifdef DLL
151 if (G.pInfo->textmode && !G.redirect_data)
152 #else
153 if (G.pInfo->textmode)
154 #endif
155 outbufsiz = RAWBUFSIZ;
156 else
157 outbufsiz = OUTBUFSIZ;
158 G.outptr = realbuf;
159 G.outcnt = 0L;
160
161 /*---------------------------------------------------------------------------
162 Get and output first code, then loop over remaining ones.
163 ---------------------------------------------------------------------------*/
164
165 READBITS(codesize, oldcode)
166 if (!G.zipeof) {
167 *G.outptr++ = (uch)oldcode;
168 OUTDBG((uch)oldcode)
169 ++G.outcnt;
170 }
171
172 do {
173 READBITS(codesize, code)
174 if (G.zipeof)
175 break;
176 if (code == BOGUSCODE) { /* possible to have consecutive escapes? */
177 READBITS(codesize, code)
178 if (code == 1) {
179 ++codesize;
180 Trace((stderr, " (codesize now %d bits)\n", codesize));
181 } else if (code == 2) {
182 Trace((stderr, " (partial clear code)\n"));
183 partial_clear(__G); /* clear leafs (nodes with no children) */
184 Trace((stderr, " (done with partial clear)\n"));
185 lastfreecode = BOGUSCODE; /* reset start of free-node search */
186 }
187 continue;
188 }
189
190 /*-----------------------------------------------------------------------
191 Translate code: traverse tree from leaf back to root.
192 -----------------------------------------------------------------------*/
193
194 newstr = stacktop;
195 curcode = code;
196
197 if (parent[curcode] == FREE_CODE) {
198 /* or (FLAG_BITS[curcode] & FREE_CODE)? */
199 KwKwK = TRUE;
200 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
201 oldcode));
202 --newstr; /* last character will be same as first character */
203 curcode = oldcode;
204 } else
205 KwKwK = FALSE;
206
207 do {
208 *newstr-- = Value[curcode];
209 curcode = (shrint)(parent[curcode] & CODE_MASK);
210 } while (curcode != BOGUSCODE);
211
212 len = (int)(stacktop - newstr++);
213 if (KwKwK)
214 *stacktop = *newstr;
215
216 /*-----------------------------------------------------------------------
217 Write expanded string in reverse order to output buffer.
218 -----------------------------------------------------------------------*/
219
220 Trace((stderr, "code %4d; oldcode %4d; char %3d (%c); string [", code,
221 oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr));
222
223 {
224 register uch *p;
225
226 for (p = newstr; p < newstr+len; ++p) {
227 *G.outptr++ = *p;
228 OUTDBG(*p)
229 if (++G.outcnt == outbufsiz) {
230 Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
231 if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
232 Trace((stderr, "unshrink: flush() error (%d)\n",
233 error));
234 return error;
235 }
236 G.outptr = realbuf;
237 G.outcnt = 0L;
238 Trace((stderr, "done with flush()\n"));
239 }
240 }
241 }
242
243 /*-----------------------------------------------------------------------
244 Add new leaf (first character of newstr) to tree as child of oldcode.
245 -----------------------------------------------------------------------*/
246
247 /* search for freecode */
248 freecode = (shrint)(lastfreecode + 1);
249 /* add if-test before loop for speed? */
250 while (parent[freecode] != FREE_CODE)
251 ++freecode;
252 lastfreecode = freecode;
253 Trace((stderr, "]; newcode %d\n", freecode));
254
255 Value[freecode] = *newstr;
256 parent[freecode] = oldcode;
257 oldcode = code;
258
259 } while (!G.zipeof);
260
261 /*---------------------------------------------------------------------------
262 Flush any remaining data and return to sender...
263 ---------------------------------------------------------------------------*/
264
265 if (G.outcnt > 0L) {
266 Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
267 if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
268 Trace((stderr, "unshrink: flush() error (%d)\n", error));
269 return error;
270 }
271 Trace((stderr, "done with flush()\n"));
272 }
273
274 return PK_OK;
275
276 } /* end function unshrink() */
277
278
279
280
281
282 /****************************/
283 /* Function partial_clear() */ /* no longer recursive... */
284 /****************************/
285
partial_clear(__G)286 static void partial_clear(__G)
287 __GDEF
288 {
289 register shrint code;
290
291 /* clear all nodes which have no children (i.e., leaf nodes only) */
292
293 /* first loop: mark each parent as such */
294 for (code = BOGUSCODE+1; code < HSIZE; ++code) {
295 register shrint cparent = (shrint)(parent[code] & CODE_MASK);
296
297 if (cparent > BOGUSCODE && cparent != FREE_CODE)
298 FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */
299 }
300
301 /* second loop: clear all nodes *not* marked as parents; reset flag bits */
302 for (code = BOGUSCODE+1; code < HSIZE; ++code) {
303 if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */
304 FLAG_BITS[code] &= ~HAS_CHILD;
305 else { /* leaf: lose it */
306 Trace((stderr, "%d\n", code));
307 parent[code] = FREE_CODE;
308 }
309 }
310
311 return;
312 }
313
314 #endif /* !LZW_CLEAN */
315