xref: /haiku/src/add-ons/kernel/file_systems/ntfs/libntfs/compress.c (revision a1163de83ea633463a79de234b8742ee106531b2)
1 /**
2  * compress.c - Compressed attribute handling code.  Originated from the Linux-NTFS
3  *		project.
4  *
5  * Copyright (c) 2004-2005 Anton Altaparmakov
6  * Copyright (c) 2004-2006 Szabolcs Szakacsits
7  * Copyright (c)      2005 Yura Pakhuchiy
8  *
9  * This program/include file is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as published
11  * by the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program/include file is distributed in the hope that it will be
15  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program (in the main directory of the NTFS-3G
21  * distribution in the file COPYING); if not, write to the Free Software
22  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23  */
24 
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28 
29 #ifdef HAVE_STDIO_H
30 #include <stdio.h>
31 #endif
32 #ifdef HAVE_STRING_H
33 #include <string.h>
34 #endif
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #ifdef HAVE_ERRNO_H
39 #include <errno.h>
40 #endif
41 
42 #include "attrib.h"
43 #include "debug.h"
44 #include "volume.h"
45 #include "types.h"
46 #include "layout.h"
47 #include "runlist.h"
48 #include "compress.h"
49 #include "logging.h"
50 #include "misc.h"
51 
52 /**
53  * enum ntfs_compression_constants - constants used in the compression code
54  */
55 typedef enum {
56 	/* Token types and access mask. */
57 	NTFS_SYMBOL_TOKEN	=	0,
58 	NTFS_PHRASE_TOKEN	=	1,
59 	NTFS_TOKEN_MASK		=	1,
60 
61 	/* Compression sub-block constants. */
62 	NTFS_SB_SIZE_MASK	=	0x0fff,
63 	NTFS_SB_SIZE		=	0x1000,
64 	NTFS_SB_IS_COMPRESSED	=	0x8000,
65 } ntfs_compression_constants;
66 
67 /**
68  * ntfs_decompress - decompress a compression block into an array of pages
69  * @dest:	buffer to which to write the decompressed data
70  * @dest_size:	size of buffer @dest in bytes
71  * @cb_start:	compression block to decompress
72  * @cb_size:	size of compression block @cb_start in bytes
73  *
74  * This decompresses the compression block @cb_start into the destination
75  * buffer @dest.
76  *
77  * @cb_start is a pointer to the compression block which needs decompressing
78  * and @cb_size is the size of @cb_start in bytes (8-64kiB).
79  *
80  * Return 0 if success or -EOVERFLOW on error in the compressed stream.
81  */
82 static int ntfs_decompress(u8 *dest, const u32 dest_size,
83 		u8 *const cb_start, const u32 cb_size)
84 {
85 	/*
86 	 * Pointers into the compressed data, i.e. the compression block (cb),
87 	 * and the therein contained sub-blocks (sb).
88 	 */
89 	u8 *cb_end = cb_start + cb_size; /* End of cb. */
90 	u8 *cb = cb_start;	/* Current position in cb. */
91 	u8 *cb_sb_start = cb;	/* Beginning of the current sb in the cb. */
92 	u8 *cb_sb_end;		/* End of current sb / beginning of next sb. */
93 	/* Variables for uncompressed data / destination. */
94 	u8 *dest_end = dest + dest_size;	/* End of dest buffer. */
95 	u8 *dest_sb_start;	/* Start of current sub-block in dest. */
96 	u8 *dest_sb_end;	/* End of current sb in dest. */
97 	/* Variables for tag and token parsing. */
98 	u8 tag;			/* Current tag. */
99 	int token;		/* Loop counter for the eight tokens in tag. */
100 
101 	ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
102 do_next_sb:
103 	ntfs_log_debug("Beginning sub-block at offset = %d in the cb.\n",
104 			(int)(cb - cb_start));
105 	/*
106 	 * Have we reached the end of the compression block or the end of the
107 	 * decompressed data?  The latter can happen for example if the current
108 	 * position in the compression block is one byte before its end so the
109 	 * first two checks do not detect it.
110 	 */
111 	if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) {
112 		ntfs_log_debug("Completed. Returning success (0).\n");
113 		return 0;
114 	}
115 	/* Setup offset for the current sub-block destination. */
116 	dest_sb_start = dest;
117 	dest_sb_end = dest + NTFS_SB_SIZE;
118 	/* Check that we are still within allowed boundaries. */
119 	if (dest_sb_end > dest_end)
120 		goto return_overflow;
121 	/* Does the minimum size of a compressed sb overflow valid range? */
122 	if (cb + 6 > cb_end)
123 		goto return_overflow;
124 	/* Setup the current sub-block source pointers and validate range. */
125 	cb_sb_start = cb;
126 	cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK)
127 			+ 3;
128 	if (cb_sb_end > cb_end)
129 		goto return_overflow;
130 	/* Now, we are ready to process the current sub-block (sb). */
131 	if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) {
132 		ntfs_log_debug("Found uncompressed sub-block.\n");
133 		/* This sb is not compressed, just copy it into destination. */
134 		/* Advance source position to first data byte. */
135 		cb += 2;
136 		/* An uncompressed sb must be full size. */
137 		if (cb_sb_end - cb != NTFS_SB_SIZE)
138 			goto return_overflow;
139 		/* Copy the block and advance the source position. */
140 		memcpy(dest, cb, NTFS_SB_SIZE);
141 		cb += NTFS_SB_SIZE;
142 		/* Advance destination position to next sub-block. */
143 		dest += NTFS_SB_SIZE;
144 		goto do_next_sb;
145 	}
146 	ntfs_log_debug("Found compressed sub-block.\n");
147 	/* This sb is compressed, decompress it into destination. */
148 	/* Forward to the first tag in the sub-block. */
149 	cb += 2;
150 do_next_tag:
151 	if (cb == cb_sb_end) {
152 		/* Check if the decompressed sub-block was not full-length. */
153 		if (dest < dest_sb_end) {
154 			int nr_bytes = dest_sb_end - dest;
155 
156 			ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
157 			/* Zero remainder and update destination position. */
158 			memset(dest, 0, nr_bytes);
159 			dest += nr_bytes;
160 		}
161 		/* We have finished the current sub-block. */
162 		goto do_next_sb;
163 	}
164 	/* Check we are still in range. */
165 	if (cb > cb_sb_end || dest > dest_sb_end)
166 		goto return_overflow;
167 	/* Get the next tag and advance to first token. */
168 	tag = *cb++;
169 	/* Parse the eight tokens described by the tag. */
170 	for (token = 0; token < 8; token++, tag >>= 1) {
171 		u16 lg, pt, length, max_non_overlap;
172 		register u16 i;
173 		u8 *dest_back_addr;
174 
175 		/* Check if we are done / still in range. */
176 		if (cb >= cb_sb_end || dest > dest_sb_end)
177 			break;
178 		/* Determine token type and parse appropriately.*/
179 		if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
180 			/*
181 			 * We have a symbol token, copy the symbol across, and
182 			 * advance the source and destination positions.
183 			 */
184 			*dest++ = *cb++;
185 			/* Continue with the next token. */
186 			continue;
187 		}
188 		/*
189 		 * We have a phrase token. Make sure it is not the first tag in
190 		 * the sb as this is illegal and would confuse the code below.
191 		 */
192 		if (dest == dest_sb_start)
193 			goto return_overflow;
194 		/*
195 		 * Determine the number of bytes to go back (p) and the number
196 		 * of bytes to copy (l). We use an optimized algorithm in which
197 		 * we first calculate log2(current destination position in sb),
198 		 * which allows determination of l and p in O(1) rather than
199 		 * O(n). We just need an arch-optimized log2() function now.
200 		 */
201 		lg = 0;
202 		for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
203 			lg++;
204 		/* Get the phrase token into i. */
205 		pt = le16_to_cpup((u16*)cb);
206 		/*
207 		 * Calculate starting position of the byte sequence in
208 		 * the destination using the fact that p = (pt >> (12 - lg)) + 1
209 		 * and make sure we don't go too far back.
210 		 */
211 		dest_back_addr = dest - (pt >> (12 - lg)) - 1;
212 		if (dest_back_addr < dest_sb_start)
213 			goto return_overflow;
214 		/* Now calculate the length of the byte sequence. */
215 		length = (pt & (0xfff >> lg)) + 3;
216 		/* Verify destination is in range. */
217 		if (dest + length > dest_sb_end)
218 			goto return_overflow;
219 		/* The number of non-overlapping bytes. */
220 		max_non_overlap = dest - dest_back_addr;
221 		if (length <= max_non_overlap) {
222 			/* The byte sequence doesn't overlap, just copy it. */
223 			memcpy(dest, dest_back_addr, length);
224 			/* Advance destination pointer. */
225 			dest += length;
226 		} else {
227 			/*
228 			 * The byte sequence does overlap, copy non-overlapping
229 			 * part and then do a slow byte by byte copy for the
230 			 * overlapping part. Also, advance the destination
231 			 * pointer.
232 			 */
233 			memcpy(dest, dest_back_addr, max_non_overlap);
234 			dest += max_non_overlap;
235 			dest_back_addr += max_non_overlap;
236 			length -= max_non_overlap;
237 			while (length--)
238 				*dest++ = *dest_back_addr++;
239 		}
240 		/* Advance source position and continue with the next token. */
241 		cb += 2;
242 	}
243 	/* No tokens left in the current tag. Continue with the next tag. */
244 	goto do_next_tag;
245 return_overflow:
246 	errno = EOVERFLOW;
247 	ntfs_log_perror("Failed to decompress file");
248 	return -1;
249 }
250 
251 /**
252  * ntfs_is_cb_compressed - internal function, do not use
253  *
254  * This is a very specialised function determining if a cb is compressed or
255  * uncompressed.  It is assumed that checking for a sparse cb has already been
256  * performed and that the cb is not sparse.  It makes all sorts of other
257  * assumptions as well and hence it is not useful anywhere other than where it
258  * is used at the moment.  Please, do not make this function available for use
259  * outside of compress.c as it is bound to confuse people and not do what they
260  * want.
261  *
262  * Return TRUE on errors so that the error will be detected later on in the
263  * code.  Might be a bit confusing to debug but there really should never be
264  * errors coming from here.
265  */
266 static BOOL ntfs_is_cb_compressed(ntfs_attr *na, runlist_element *rl,
267 				  VCN cb_start_vcn, int cb_clusters)
268 {
269 	/*
270 	 * The simplest case: the run starting at @cb_start_vcn contains
271 	 * @cb_clusters clusters which are all not sparse, thus the cb is not
272 	 * compressed.
273 	 */
274 restart:
275 	cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
276 	while (cb_clusters > 0) {
277 		/* Go to the next run. */
278 		rl++;
279 		/* Map the next runlist fragment if it is not mapped. */
280 		if (rl->lcn < LCN_HOLE || !rl->length) {
281 			cb_start_vcn = rl->vcn;
282 			rl = ntfs_attr_find_vcn(na, rl->vcn);
283 			if (!rl || rl->lcn < LCN_HOLE || !rl->length)
284 				return TRUE;
285 			/*
286 			 * If the runs were merged need to deal with the
287 			 * resulting partial run so simply restart.
288 			 */
289 			if (rl->vcn < cb_start_vcn)
290 				goto restart;
291 		}
292 		/* If the current run is sparse, the cb is compressed. */
293 		if (rl->lcn == LCN_HOLE)
294 			return TRUE;
295 		/* If the whole cb is not sparse, it is not compressed. */
296 		if (rl->length >= cb_clusters)
297 			return FALSE;
298 		cb_clusters -= rl->length;
299 	};
300 	/* All cb_clusters were not sparse thus the cb is not compressed. */
301 	return FALSE;
302 }
303 
304 /**
305  * ntfs_compressed_attr_pread - read from a compressed attribute
306  * @na:		ntfs attribute to read from
307  * @pos:	byte position in the attribute to begin reading from
308  * @count:	number of bytes to read
309  * @b:		output data buffer
310  *
311  * NOTE:  You probably want to be using attrib.c::ntfs_attr_pread() instead.
312  *
313  * This function will read @count bytes starting at offset @pos from the
314  * compressed ntfs attribute @na into the data buffer @b.
315  *
316  * On success, return the number of successfully read bytes.  If this number
317  * is lower than @count this means that the read reached end of file or that
318  * an error was encountered during the read so that the read is partial.
319  * 0 means end of file or nothing was read (also return 0 when @count is 0).
320  *
321  * On error and nothing has been read, return -1 with errno set appropriately
322  * to the return code of ntfs_pread(), or to EINVAL in case of invalid
323  * arguments.
324  */
325 s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
326 {
327 	s64 br, to_read, ofs, total, total2;
328 	u64 cb_size_mask;
329 	VCN start_vcn, vcn, end_vcn;
330 	ntfs_volume *vol;
331 	runlist_element *rl;
332 	u8 *dest, *cb, *cb_pos, *cb_end;
333 	u32 cb_size;
334 	int err;
335 	unsigned int nr_cbs, cb_clusters;
336 
337 	ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
338 			(unsigned long long)na->ni->mft_no, na->type,
339 			(long long)pos, (long long)count);
340 	if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b ||
341 			pos < 0 || count < 0) {
342 		errno = EINVAL;
343 		return -1;
344 	}
345 	/*
346 	 * Encrypted attributes are not supported.  We return access denied,
347 	 * which is what Windows NT4 does, too.
348 	 */
349 	if (NAttrEncrypted(na)) {
350 		errno = EACCES;
351 		return -1;
352 	}
353 	if (!count)
354 		return 0;
355 	/* Truncate reads beyond end of attribute. */
356 	if (pos + count > na->data_size) {
357 		if (pos >= na->data_size) {
358 			return 0;
359 		}
360 		count = na->data_size - pos;
361 	}
362 	/* If it is a resident attribute, simply use ntfs_attr_pread(). */
363 	if (!NAttrNonResident(na))
364 		return ntfs_attr_pread(na, pos, count, b);
365 	total = total2 = 0;
366 	/* Zero out reads beyond initialized size. */
367 	if (pos + count > na->initialized_size) {
368 		if (pos >= na->initialized_size) {
369 			memset(b, 0, count);
370 			return count;
371 		}
372 		total2 = pos + count - na->initialized_size;
373 		count -= total2;
374 		memset((u8*)b + count, 0, total2);
375 	}
376 	vol = na->ni->vol;
377 	cb_size = na->compression_block_size;
378 	cb_size_mask = cb_size - 1UL;
379 	cb_clusters = na->compression_block_clusters;
380 
381 	/* Need a temporary buffer for each loaded compression block. */
382 	cb = ntfs_malloc(cb_size);
383 	if (!cb)
384 		return -1;
385 
386 	/* Need a temporary buffer for each uncompressed block. */
387 	dest = ntfs_malloc(cb_size);
388 	if (!dest) {
389 		free(cb);
390 		return -1;
391 	}
392 	/*
393 	 * The first vcn in the first compression block (cb) which we need to
394 	 * decompress.
395 	 */
396 	start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
397 	/* Offset in the uncompressed cb at which to start reading data. */
398 	ofs = pos & cb_size_mask;
399 	/*
400 	 * The first vcn in the cb after the last cb which we need to
401 	 * decompress.
402 	 */
403 	end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
404 			vol->cluster_size_bits;
405 	/* Number of compression blocks (cbs) in the wanted vcn range. */
406 	nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
407 			na->compression_block_size_bits;
408 	cb_end = cb + cb_size;
409 do_next_cb:
410 	nr_cbs--;
411 	cb_pos = cb;
412 	vcn = start_vcn;
413 	start_vcn += cb_clusters;
414 
415 	/* Check whether the compression block is sparse. */
416 	rl = ntfs_attr_find_vcn(na, vcn);
417 	if (!rl || rl->lcn < LCN_HOLE) {
418 		free(cb);
419 		free(dest);
420 		if (total)
421 			return total;
422 		/* FIXME: Do we want EIO or the error code? (AIA) */
423 		errno = EIO;
424 		return -1;
425 	}
426 	if (rl->lcn == LCN_HOLE) {
427 		/* Sparse cb, zero out destination range overlapping the cb. */
428 		ntfs_log_debug("Found sparse compression block.\n");
429 		to_read = min(count, cb_size - ofs);
430 		memset(b, 0, to_read);
431 		ofs = 0;
432 		total += to_read;
433 		count -= to_read;
434 		b = (u8*)b + to_read;
435 	} else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
436 		s64 tdata_size, tinitialized_size;
437 		/*
438 		 * Uncompressed cb, read it straight into the destination range
439 		 * overlapping the cb.
440 		 */
441 		ntfs_log_debug("Found uncompressed compression block.\n");
442 		/*
443 		 * Read the uncompressed data into the destination buffer.
444 		 * NOTE: We cheat a little bit here by marking the attribute as
445 		 * not compressed in the ntfs_attr structure so that we can
446 		 * read the data by simply using ntfs_attr_pread().  (-8
447 		 * NOTE: we have to modify data_size and initialized_size
448 		 * temporarily as well...
449 		 */
450 		to_read = min(count, cb_size - ofs);
451 		ofs += vcn << vol->cluster_size_bits;
452 		NAttrClearCompressed(na);
453 		tdata_size = na->data_size;
454 		tinitialized_size = na->initialized_size;
455 		na->data_size = na->initialized_size = na->allocated_size;
456 		do {
457 			br = ntfs_attr_pread(na, ofs, to_read, b);
458 			if (br < 0) {
459 				err = errno;
460 				na->data_size = tdata_size;
461 				na->initialized_size = tinitialized_size;
462 				NAttrSetCompressed(na);
463 				free(cb);
464 				free(dest);
465 				if (total)
466 					return total;
467 				errno = err;
468 				return br;
469 			}
470 			total += br;
471 			count -= br;
472 			b = (u8*)b + br;
473 			to_read -= br;
474 			ofs += br;
475 		} while (to_read > 0);
476 		na->data_size = tdata_size;
477 		na->initialized_size = tinitialized_size;
478 		NAttrSetCompressed(na);
479 		ofs = 0;
480 	} else {
481 		s64 tdata_size, tinitialized_size;
482 
483 		/*
484 		 * Compressed cb, decompress it into the temporary buffer, then
485 		 * copy the data to the destination range overlapping the cb.
486 		 */
487 		ntfs_log_debug("Found compressed compression block.\n");
488 		/*
489 		 * Read the compressed data into the temporary buffer.
490 		 * NOTE: We cheat a little bit here by marking the attribute as
491 		 * not compressed in the ntfs_attr structure so that we can
492 		 * read the raw, compressed data by simply using
493 		 * ntfs_attr_pread().  (-8
494 		 * NOTE: We have to modify data_size and initialized_size
495 		 * temporarily as well...
496 		 */
497 		to_read = cb_size;
498 		NAttrClearCompressed(na);
499 		tdata_size = na->data_size;
500 		tinitialized_size = na->initialized_size;
501 		na->data_size = na->initialized_size = na->allocated_size;
502 		do {
503 			br = ntfs_attr_pread(na,
504 					(vcn << vol->cluster_size_bits) +
505 					(cb_pos - cb), to_read, cb_pos);
506 			if (br < 0) {
507 				err = errno;
508 				na->data_size = tdata_size;
509 				na->initialized_size = tinitialized_size;
510 				NAttrSetCompressed(na);
511 				free(cb);
512 				free(dest);
513 				if (total)
514 					return total;
515 				errno = err;
516 				return br;
517 			}
518 			cb_pos += br;
519 			to_read -= br;
520 		} while (to_read > 0);
521 		na->data_size = tdata_size;
522 		na->initialized_size = tinitialized_size;
523 		NAttrSetCompressed(na);
524 		/* Just a precaution. */
525 		if (cb_pos + 2 <= cb_end)
526 			*(u16*)cb_pos = 0;
527 		ntfs_log_debug("Successfully read the compression block.\n");
528 		if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
529 			err = errno;
530 			free(cb);
531 			free(dest);
532 			if (total)
533 				return total;
534 			errno = err;
535 			return -1;
536 		}
537 		to_read = min(count, cb_size - ofs);
538 		memcpy(b, dest + ofs, to_read);
539 		total += to_read;
540 		count -= to_read;
541 		b = (u8*)b + to_read;
542 		ofs = 0;
543 	}
544 	/* Do we have more work to do? */
545 	if (nr_cbs)
546 		goto do_next_cb;
547 	/* We no longer need the buffers. */
548 	free(cb);
549 	free(dest);
550 	/* Return number of bytes read. */
551 	return total + total2;
552 }
553