1 /** 2 * compress.c - Compressed attribute handling code. Originated from the Linux-NTFS 3 * project. 4 * 5 * Copyright (c) 2004-2005 Anton Altaparmakov 6 * Copyright (c) 2004-2006 Szabolcs Szakacsits 7 * Copyright (c) 2005 Yura Pakhuchiy 8 * 9 * This program/include file is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as published 11 * by the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * This program/include file is distributed in the hope that it will be 15 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 16 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program (in the main directory of the NTFS-3G 21 * distribution in the file COPYING); if not, write to the Free Software 22 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 */ 24 25 #ifdef HAVE_CONFIG_H 26 #include "config.h" 27 #endif 28 29 #ifdef HAVE_STDIO_H 30 #include <stdio.h> 31 #endif 32 #ifdef HAVE_STRING_H 33 #include <string.h> 34 #endif 35 #ifdef HAVE_STDLIB_H 36 #include <stdlib.h> 37 #endif 38 #ifdef HAVE_ERRNO_H 39 #include <errno.h> 40 #endif 41 42 #include "attrib.h" 43 #include "debug.h" 44 #include "volume.h" 45 #include "types.h" 46 #include "layout.h" 47 #include "runlist.h" 48 #include "compress.h" 49 #include "logging.h" 50 #include "misc.h" 51 52 /** 53 * enum ntfs_compression_constants - constants used in the compression code 54 */ 55 typedef enum { 56 /* Token types and access mask. */ 57 NTFS_SYMBOL_TOKEN = 0, 58 NTFS_PHRASE_TOKEN = 1, 59 NTFS_TOKEN_MASK = 1, 60 61 /* Compression sub-block constants. */ 62 NTFS_SB_SIZE_MASK = 0x0fff, 63 NTFS_SB_SIZE = 0x1000, 64 NTFS_SB_IS_COMPRESSED = 0x8000, 65 } ntfs_compression_constants; 66 67 /** 68 * ntfs_decompress - decompress a compression block into an array of pages 69 * @dest: buffer to which to write the decompressed data 70 * @dest_size: size of buffer @dest in bytes 71 * @cb_start: compression block to decompress 72 * @cb_size: size of compression block @cb_start in bytes 73 * 74 * This decompresses the compression block @cb_start into the destination 75 * buffer @dest. 76 * 77 * @cb_start is a pointer to the compression block which needs decompressing 78 * and @cb_size is the size of @cb_start in bytes (8-64kiB). 79 * 80 * Return 0 if success or -EOVERFLOW on error in the compressed stream. 81 */ 82 static int ntfs_decompress(u8 *dest, const u32 dest_size, 83 u8 *const cb_start, const u32 cb_size) 84 { 85 /* 86 * Pointers into the compressed data, i.e. the compression block (cb), 87 * and the therein contained sub-blocks (sb). 88 */ 89 u8 *cb_end = cb_start + cb_size; /* End of cb. */ 90 u8 *cb = cb_start; /* Current position in cb. */ 91 u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */ 92 u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ 93 /* Variables for uncompressed data / destination. */ 94 u8 *dest_end = dest + dest_size; /* End of dest buffer. */ 95 u8 *dest_sb_start; /* Start of current sub-block in dest. */ 96 u8 *dest_sb_end; /* End of current sb in dest. */ 97 /* Variables for tag and token parsing. */ 98 u8 tag; /* Current tag. */ 99 int token; /* Loop counter for the eight tokens in tag. */ 100 101 ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size); 102 do_next_sb: 103 ntfs_log_debug("Beginning sub-block at offset = %d in the cb.\n", 104 (int)(cb - cb_start)); 105 /* 106 * Have we reached the end of the compression block or the end of the 107 * decompressed data? The latter can happen for example if the current 108 * position in the compression block is one byte before its end so the 109 * first two checks do not detect it. 110 */ 111 if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) { 112 ntfs_log_debug("Completed. Returning success (0).\n"); 113 return 0; 114 } 115 /* Setup offset for the current sub-block destination. */ 116 dest_sb_start = dest; 117 dest_sb_end = dest + NTFS_SB_SIZE; 118 /* Check that we are still within allowed boundaries. */ 119 if (dest_sb_end > dest_end) 120 goto return_overflow; 121 /* Does the minimum size of a compressed sb overflow valid range? */ 122 if (cb + 6 > cb_end) 123 goto return_overflow; 124 /* Setup the current sub-block source pointers and validate range. */ 125 cb_sb_start = cb; 126 cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK) 127 + 3; 128 if (cb_sb_end > cb_end) 129 goto return_overflow; 130 /* Now, we are ready to process the current sub-block (sb). */ 131 if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) { 132 ntfs_log_debug("Found uncompressed sub-block.\n"); 133 /* This sb is not compressed, just copy it into destination. */ 134 /* Advance source position to first data byte. */ 135 cb += 2; 136 /* An uncompressed sb must be full size. */ 137 if (cb_sb_end - cb != NTFS_SB_SIZE) 138 goto return_overflow; 139 /* Copy the block and advance the source position. */ 140 memcpy(dest, cb, NTFS_SB_SIZE); 141 cb += NTFS_SB_SIZE; 142 /* Advance destination position to next sub-block. */ 143 dest += NTFS_SB_SIZE; 144 goto do_next_sb; 145 } 146 ntfs_log_debug("Found compressed sub-block.\n"); 147 /* This sb is compressed, decompress it into destination. */ 148 /* Forward to the first tag in the sub-block. */ 149 cb += 2; 150 do_next_tag: 151 if (cb == cb_sb_end) { 152 /* Check if the decompressed sub-block was not full-length. */ 153 if (dest < dest_sb_end) { 154 int nr_bytes = dest_sb_end - dest; 155 156 ntfs_log_debug("Filling incomplete sub-block with zeroes.\n"); 157 /* Zero remainder and update destination position. */ 158 memset(dest, 0, nr_bytes); 159 dest += nr_bytes; 160 } 161 /* We have finished the current sub-block. */ 162 goto do_next_sb; 163 } 164 /* Check we are still in range. */ 165 if (cb > cb_sb_end || dest > dest_sb_end) 166 goto return_overflow; 167 /* Get the next tag and advance to first token. */ 168 tag = *cb++; 169 /* Parse the eight tokens described by the tag. */ 170 for (token = 0; token < 8; token++, tag >>= 1) { 171 u16 lg, pt, length, max_non_overlap; 172 register u16 i; 173 u8 *dest_back_addr; 174 175 /* Check if we are done / still in range. */ 176 if (cb >= cb_sb_end || dest > dest_sb_end) 177 break; 178 /* Determine token type and parse appropriately.*/ 179 if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { 180 /* 181 * We have a symbol token, copy the symbol across, and 182 * advance the source and destination positions. 183 */ 184 *dest++ = *cb++; 185 /* Continue with the next token. */ 186 continue; 187 } 188 /* 189 * We have a phrase token. Make sure it is not the first tag in 190 * the sb as this is illegal and would confuse the code below. 191 */ 192 if (dest == dest_sb_start) 193 goto return_overflow; 194 /* 195 * Determine the number of bytes to go back (p) and the number 196 * of bytes to copy (l). We use an optimized algorithm in which 197 * we first calculate log2(current destination position in sb), 198 * which allows determination of l and p in O(1) rather than 199 * O(n). We just need an arch-optimized log2() function now. 200 */ 201 lg = 0; 202 for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1) 203 lg++; 204 /* Get the phrase token into i. */ 205 pt = le16_to_cpup((u16*)cb); 206 /* 207 * Calculate starting position of the byte sequence in 208 * the destination using the fact that p = (pt >> (12 - lg)) + 1 209 * and make sure we don't go too far back. 210 */ 211 dest_back_addr = dest - (pt >> (12 - lg)) - 1; 212 if (dest_back_addr < dest_sb_start) 213 goto return_overflow; 214 /* Now calculate the length of the byte sequence. */ 215 length = (pt & (0xfff >> lg)) + 3; 216 /* Verify destination is in range. */ 217 if (dest + length > dest_sb_end) 218 goto return_overflow; 219 /* The number of non-overlapping bytes. */ 220 max_non_overlap = dest - dest_back_addr; 221 if (length <= max_non_overlap) { 222 /* The byte sequence doesn't overlap, just copy it. */ 223 memcpy(dest, dest_back_addr, length); 224 /* Advance destination pointer. */ 225 dest += length; 226 } else { 227 /* 228 * The byte sequence does overlap, copy non-overlapping 229 * part and then do a slow byte by byte copy for the 230 * overlapping part. Also, advance the destination 231 * pointer. 232 */ 233 memcpy(dest, dest_back_addr, max_non_overlap); 234 dest += max_non_overlap; 235 dest_back_addr += max_non_overlap; 236 length -= max_non_overlap; 237 while (length--) 238 *dest++ = *dest_back_addr++; 239 } 240 /* Advance source position and continue with the next token. */ 241 cb += 2; 242 } 243 /* No tokens left in the current tag. Continue with the next tag. */ 244 goto do_next_tag; 245 return_overflow: 246 errno = EOVERFLOW; 247 ntfs_log_perror("Failed to decompress file"); 248 return -1; 249 } 250 251 /** 252 * ntfs_is_cb_compressed - internal function, do not use 253 * 254 * This is a very specialised function determining if a cb is compressed or 255 * uncompressed. It is assumed that checking for a sparse cb has already been 256 * performed and that the cb is not sparse. It makes all sorts of other 257 * assumptions as well and hence it is not useful anywhere other than where it 258 * is used at the moment. Please, do not make this function available for use 259 * outside of compress.c as it is bound to confuse people and not do what they 260 * want. 261 * 262 * Return TRUE on errors so that the error will be detected later on in the 263 * code. Might be a bit confusing to debug but there really should never be 264 * errors coming from here. 265 */ 266 static BOOL ntfs_is_cb_compressed(ntfs_attr *na, runlist_element *rl, 267 VCN cb_start_vcn, int cb_clusters) 268 { 269 /* 270 * The simplest case: the run starting at @cb_start_vcn contains 271 * @cb_clusters clusters which are all not sparse, thus the cb is not 272 * compressed. 273 */ 274 restart: 275 cb_clusters -= rl->length - (cb_start_vcn - rl->vcn); 276 while (cb_clusters > 0) { 277 /* Go to the next run. */ 278 rl++; 279 /* Map the next runlist fragment if it is not mapped. */ 280 if (rl->lcn < LCN_HOLE || !rl->length) { 281 cb_start_vcn = rl->vcn; 282 rl = ntfs_attr_find_vcn(na, rl->vcn); 283 if (!rl || rl->lcn < LCN_HOLE || !rl->length) 284 return TRUE; 285 /* 286 * If the runs were merged need to deal with the 287 * resulting partial run so simply restart. 288 */ 289 if (rl->vcn < cb_start_vcn) 290 goto restart; 291 } 292 /* If the current run is sparse, the cb is compressed. */ 293 if (rl->lcn == LCN_HOLE) 294 return TRUE; 295 /* If the whole cb is not sparse, it is not compressed. */ 296 if (rl->length >= cb_clusters) 297 return FALSE; 298 cb_clusters -= rl->length; 299 }; 300 /* All cb_clusters were not sparse thus the cb is not compressed. */ 301 return FALSE; 302 } 303 304 /** 305 * ntfs_compressed_attr_pread - read from a compressed attribute 306 * @na: ntfs attribute to read from 307 * @pos: byte position in the attribute to begin reading from 308 * @count: number of bytes to read 309 * @b: output data buffer 310 * 311 * NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead. 312 * 313 * This function will read @count bytes starting at offset @pos from the 314 * compressed ntfs attribute @na into the data buffer @b. 315 * 316 * On success, return the number of successfully read bytes. If this number 317 * is lower than @count this means that the read reached end of file or that 318 * an error was encountered during the read so that the read is partial. 319 * 0 means end of file or nothing was read (also return 0 when @count is 0). 320 * 321 * On error and nothing has been read, return -1 with errno set appropriately 322 * to the return code of ntfs_pread(), or to EINVAL in case of invalid 323 * arguments. 324 */ 325 s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b) 326 { 327 s64 br, to_read, ofs, total, total2; 328 u64 cb_size_mask; 329 VCN start_vcn, vcn, end_vcn; 330 ntfs_volume *vol; 331 runlist_element *rl; 332 u8 *dest, *cb, *cb_pos, *cb_end; 333 u32 cb_size; 334 int err; 335 unsigned int nr_cbs, cb_clusters; 336 337 ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n", 338 (unsigned long long)na->ni->mft_no, na->type, 339 (long long)pos, (long long)count); 340 if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b || 341 pos < 0 || count < 0) { 342 errno = EINVAL; 343 return -1; 344 } 345 /* 346 * Encrypted attributes are not supported. We return access denied, 347 * which is what Windows NT4 does, too. 348 */ 349 if (NAttrEncrypted(na)) { 350 errno = EACCES; 351 return -1; 352 } 353 if (!count) 354 return 0; 355 /* Truncate reads beyond end of attribute. */ 356 if (pos + count > na->data_size) { 357 if (pos >= na->data_size) { 358 return 0; 359 } 360 count = na->data_size - pos; 361 } 362 /* If it is a resident attribute, simply use ntfs_attr_pread(). */ 363 if (!NAttrNonResident(na)) 364 return ntfs_attr_pread(na, pos, count, b); 365 total = total2 = 0; 366 /* Zero out reads beyond initialized size. */ 367 if (pos + count > na->initialized_size) { 368 if (pos >= na->initialized_size) { 369 memset(b, 0, count); 370 return count; 371 } 372 total2 = pos + count - na->initialized_size; 373 count -= total2; 374 memset((u8*)b + count, 0, total2); 375 } 376 vol = na->ni->vol; 377 cb_size = na->compression_block_size; 378 cb_size_mask = cb_size - 1UL; 379 cb_clusters = na->compression_block_clusters; 380 381 /* Need a temporary buffer for each loaded compression block. */ 382 cb = ntfs_malloc(cb_size); 383 if (!cb) 384 return -1; 385 386 /* Need a temporary buffer for each uncompressed block. */ 387 dest = ntfs_malloc(cb_size); 388 if (!dest) { 389 free(cb); 390 return -1; 391 } 392 /* 393 * The first vcn in the first compression block (cb) which we need to 394 * decompress. 395 */ 396 start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits; 397 /* Offset in the uncompressed cb at which to start reading data. */ 398 ofs = pos & cb_size_mask; 399 /* 400 * The first vcn in the cb after the last cb which we need to 401 * decompress. 402 */ 403 end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >> 404 vol->cluster_size_bits; 405 /* Number of compression blocks (cbs) in the wanted vcn range. */ 406 nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >> 407 na->compression_block_size_bits; 408 cb_end = cb + cb_size; 409 do_next_cb: 410 nr_cbs--; 411 cb_pos = cb; 412 vcn = start_vcn; 413 start_vcn += cb_clusters; 414 415 /* Check whether the compression block is sparse. */ 416 rl = ntfs_attr_find_vcn(na, vcn); 417 if (!rl || rl->lcn < LCN_HOLE) { 418 free(cb); 419 free(dest); 420 if (total) 421 return total; 422 /* FIXME: Do we want EIO or the error code? (AIA) */ 423 errno = EIO; 424 return -1; 425 } 426 if (rl->lcn == LCN_HOLE) { 427 /* Sparse cb, zero out destination range overlapping the cb. */ 428 ntfs_log_debug("Found sparse compression block.\n"); 429 to_read = min(count, cb_size - ofs); 430 memset(b, 0, to_read); 431 ofs = 0; 432 total += to_read; 433 count -= to_read; 434 b = (u8*)b + to_read; 435 } else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) { 436 s64 tdata_size, tinitialized_size; 437 /* 438 * Uncompressed cb, read it straight into the destination range 439 * overlapping the cb. 440 */ 441 ntfs_log_debug("Found uncompressed compression block.\n"); 442 /* 443 * Read the uncompressed data into the destination buffer. 444 * NOTE: We cheat a little bit here by marking the attribute as 445 * not compressed in the ntfs_attr structure so that we can 446 * read the data by simply using ntfs_attr_pread(). (-8 447 * NOTE: we have to modify data_size and initialized_size 448 * temporarily as well... 449 */ 450 to_read = min(count, cb_size - ofs); 451 ofs += vcn << vol->cluster_size_bits; 452 NAttrClearCompressed(na); 453 tdata_size = na->data_size; 454 tinitialized_size = na->initialized_size; 455 na->data_size = na->initialized_size = na->allocated_size; 456 do { 457 br = ntfs_attr_pread(na, ofs, to_read, b); 458 if (br < 0) { 459 err = errno; 460 na->data_size = tdata_size; 461 na->initialized_size = tinitialized_size; 462 NAttrSetCompressed(na); 463 free(cb); 464 free(dest); 465 if (total) 466 return total; 467 errno = err; 468 return br; 469 } 470 total += br; 471 count -= br; 472 b = (u8*)b + br; 473 to_read -= br; 474 ofs += br; 475 } while (to_read > 0); 476 na->data_size = tdata_size; 477 na->initialized_size = tinitialized_size; 478 NAttrSetCompressed(na); 479 ofs = 0; 480 } else { 481 s64 tdata_size, tinitialized_size; 482 483 /* 484 * Compressed cb, decompress it into the temporary buffer, then 485 * copy the data to the destination range overlapping the cb. 486 */ 487 ntfs_log_debug("Found compressed compression block.\n"); 488 /* 489 * Read the compressed data into the temporary buffer. 490 * NOTE: We cheat a little bit here by marking the attribute as 491 * not compressed in the ntfs_attr structure so that we can 492 * read the raw, compressed data by simply using 493 * ntfs_attr_pread(). (-8 494 * NOTE: We have to modify data_size and initialized_size 495 * temporarily as well... 496 */ 497 to_read = cb_size; 498 NAttrClearCompressed(na); 499 tdata_size = na->data_size; 500 tinitialized_size = na->initialized_size; 501 na->data_size = na->initialized_size = na->allocated_size; 502 do { 503 br = ntfs_attr_pread(na, 504 (vcn << vol->cluster_size_bits) + 505 (cb_pos - cb), to_read, cb_pos); 506 if (br < 0) { 507 err = errno; 508 na->data_size = tdata_size; 509 na->initialized_size = tinitialized_size; 510 NAttrSetCompressed(na); 511 free(cb); 512 free(dest); 513 if (total) 514 return total; 515 errno = err; 516 return br; 517 } 518 cb_pos += br; 519 to_read -= br; 520 } while (to_read > 0); 521 na->data_size = tdata_size; 522 na->initialized_size = tinitialized_size; 523 NAttrSetCompressed(na); 524 /* Just a precaution. */ 525 if (cb_pos + 2 <= cb_end) 526 *(u16*)cb_pos = 0; 527 ntfs_log_debug("Successfully read the compression block.\n"); 528 if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) { 529 err = errno; 530 free(cb); 531 free(dest); 532 if (total) 533 return total; 534 errno = err; 535 return -1; 536 } 537 to_read = min(count, cb_size - ofs); 538 memcpy(b, dest + ofs, to_read); 539 total += to_read; 540 count -= to_read; 541 b = (u8*)b + to_read; 542 ofs = 0; 543 } 544 /* Do we have more work to do? */ 545 if (nr_cbs) 546 goto do_next_cb; 547 /* We no longer need the buffers. */ 548 free(cb); 549 free(dest); 550 /* Return number of bytes read. */ 551 return total + total2; 552 } 553