xref: /haiku/src/kits/package/hpkg/PackageFileHeapWriter.cpp (revision d06cbe081b7ea043aea2012359744091de6d604d)
1 /*
2  * Copyright 2013-2014, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <package/hpkg/PackageFileHeapWriter.h>
8 
9 #include <algorithm>
10 #include <new>
11 
12 #include <ByteOrder.h>
13 #include <List.h>
14 #include <package/hpkg/ErrorOutput.h>
15 #include <package/hpkg/HPKGDefs.h>
16 
17 #include <AutoDeleter.h>
18 #include <package/hpkg/DataReader.h>
19 #include <package/hpkg/PackageFileHeapReader.h>
20 #include <RangeArray.h>
21 #include <CompressionAlgorithm.h>
22 
23 
24 // minimum length of data we require before trying to compress them
25 static const size_t kCompressionSizeThreshold = 64;
26 
27 
28 namespace BPackageKit {
29 
30 namespace BHPKG {
31 
32 namespace BPrivate {
33 
34 
35 struct PackageFileHeapWriter::Chunk {
36 	uint64	offset;
37 	uint32	compressedSize;
38 	uint32	uncompressedSize;
39 	void*	buffer;
40 };
41 
42 
43 struct PackageFileHeapWriter::ChunkSegment {
44 	ssize_t	chunkIndex;
45 	uint32	toKeepOffset;
46 	uint32	toKeepSize;
47 };
48 
49 
50 struct PackageFileHeapWriter::ChunkBuffer {
51 	ChunkBuffer(PackageFileHeapWriter* writer, size_t bufferSize)
52 		:
53 		fWriter(writer),
54 		fChunks(),
55 		fCurrentChunkIndex(0),
56 		fNextReadIndex(0),
57 		fSegments(),
58 		fCurrentSegmentIndex(0),
59 		fBuffers(),
60 		fUnusedBuffers(),
61 		fBufferSize(bufferSize)
62 	{
63 	}
64 
65 	~ChunkBuffer()
66 	{
67 		for (int32 i = 0; void* buffer = fBuffers.ItemAt(i); i++)
68 			free(buffer);
69 	}
70 
71 	bool PushChunkSegment(uint64 chunkOffset, uint32 compressedSize,
72 		uint32 uncompressedSize, uint32 toKeepOffset, uint32 toKeepSize)
73 	{
74 		ChunkSegment segment;
75 		segment.toKeepOffset = toKeepOffset;
76 		segment.toKeepSize = toKeepSize;
77 
78 		// might refer to the last chunk
79 		segment.chunkIndex = fChunks.Count() - 1;
80 
81 		if (segment.chunkIndex < 0
82 			|| fChunks.ElementAt(segment.chunkIndex).offset != chunkOffset) {
83 			// no, need to push a new chunk
84 			segment.chunkIndex++;
85 
86 			Chunk chunk;
87 			chunk.offset = chunkOffset;
88 			chunk.compressedSize = compressedSize;
89 			chunk.uncompressedSize = uncompressedSize;
90 			chunk.buffer = NULL;
91 			if (!fChunks.Add(chunk))
92 				return false;
93 		}
94 
95 		return fSegments.Add(segment);
96 	}
97 
98 	bool IsEmpty() const
99 	{
100 		return fSegments.IsEmpty();
101 	}
102 
103 	bool HasMoreSegments() const
104 	{
105 		return fCurrentSegmentIndex < fSegments.Count();
106 	}
107 
108 	const ChunkSegment& CurrentSegment() const
109 	{
110 		return fSegments[fCurrentSegmentIndex];
111 	}
112 
113 	const Chunk& ChunkAt(ssize_t index) const
114 	{
115 		return fChunks[index];
116 	}
117 
118 	bool HasMoreChunksToRead() const
119 	{
120 		return fNextReadIndex < fChunks.Count();
121 	}
122 
123 	bool HasBufferedChunk() const
124 	{
125 		return fCurrentChunkIndex < fNextReadIndex;
126 	}
127 
128 	uint64 NextReadOffset() const
129 	{
130 		return fChunks[fNextReadIndex].offset;
131 	}
132 
133 	void ReadNextChunk()
134 	{
135 		if (!HasMoreChunksToRead())
136 			throw status_t(B_BAD_VALUE);
137 
138 		Chunk& chunk = fChunks[fNextReadIndex++];
139 		chunk.buffer = _GetBuffer();
140 
141 		status_t error = fWriter->ReadFileData(chunk.offset, chunk.buffer,
142 			chunk.compressedSize);
143 		if (error != B_OK)
144 			throw error;
145 	}
146 
147 	void CurrentSegmentDone()
148 	{
149 		// Unless the next segment refers to the same chunk, advance to the next
150 		// chunk.
151 		const ChunkSegment& segment = fSegments[fCurrentSegmentIndex++];
152 		if (!HasMoreSegments()
153 			|| segment.chunkIndex != CurrentSegment().chunkIndex) {
154 			_PutBuffer(fChunks[fCurrentChunkIndex++].buffer);
155 		}
156 	}
157 
158 private:
159 	void* _GetBuffer()
160 	{
161 		if (!fUnusedBuffers.IsEmpty())
162 			return fUnusedBuffers.RemoveItem(fUnusedBuffers.CountItems() - 1);
163 
164 		void* buffer = malloc(fBufferSize);
165 		if (buffer == NULL && !fBuffers.AddItem(buffer)) {
166 			free(buffer);
167 			throw std::bad_alloc();
168 		}
169 
170 		return buffer;
171 	}
172 
173 	void _PutBuffer(void* buffer)
174 	{
175 		if (buffer != NULL && !fUnusedBuffers.AddItem(buffer)) {
176 			fBuffers.RemoveItem(buffer);
177 			free(buffer);
178 		}
179 	}
180 
181 private:
182 	PackageFileHeapWriter*	fWriter;
183 
184 	Array<Chunk>			fChunks;
185 	ssize_t					fCurrentChunkIndex;
186 	ssize_t					fNextReadIndex;
187 
188 	Array<ChunkSegment>		fSegments;
189 	ssize_t					fCurrentSegmentIndex;
190 
191 	BList					fBuffers;
192 	BList					fUnusedBuffers;
193 	size_t					fBufferSize;
194 };
195 
196 
197 PackageFileHeapWriter::PackageFileHeapWriter(BErrorOutput* errorOutput,
198 	BPositionIO* file, off_t heapOffset,
199 	CompressionAlgorithmOwner* compressionAlgorithm,
200 	DecompressionAlgorithmOwner* decompressionAlgorithm)
201 	:
202 	PackageFileHeapAccessorBase(errorOutput, file, heapOffset,
203 		decompressionAlgorithm),
204 	fPendingDataBuffer(NULL),
205 	fCompressedDataBuffer(NULL),
206 	fPendingDataSize(0),
207 	fOffsets(),
208 	fCompressionAlgorithm(compressionAlgorithm)
209 {
210 	if (fCompressionAlgorithm != NULL)
211 		fCompressionAlgorithm->AcquireReference();
212 }
213 
214 
215 PackageFileHeapWriter::~PackageFileHeapWriter()
216 {
217 	_Uninit();
218 
219 	if (fCompressionAlgorithm != NULL)
220 		fCompressionAlgorithm->ReleaseReference();
221 }
222 
223 
224 void
225 PackageFileHeapWriter::Init()
226 {
227 	// allocate data buffers
228 	fPendingDataBuffer = malloc(kChunkSize);
229 	fCompressedDataBuffer = malloc(kChunkSize);
230 	if (fPendingDataBuffer == NULL || fCompressedDataBuffer == NULL)
231 		throw std::bad_alloc();
232 }
233 
234 
235 void
236 PackageFileHeapWriter::Reinit(PackageFileHeapReader* heapReader)
237 {
238 	fHeapOffset = heapReader->HeapOffset();
239 	fCompressedHeapSize = heapReader->CompressedHeapSize();
240 	fUncompressedHeapSize = heapReader->UncompressedHeapSize();
241 	fPendingDataSize = 0;
242 
243 	// copy the offsets array
244 	size_t chunkCount = (fUncompressedHeapSize + kChunkSize - 1) / kChunkSize;
245 	if (chunkCount > 0) {
246 		if (!fOffsets.AddUninitialized(chunkCount))
247 			throw std::bad_alloc();
248 
249 		for (size_t i = 0; i < chunkCount; i++)
250 			fOffsets[i] = heapReader->Offsets()[i];
251 	}
252 
253 	_UnwriteLastPartialChunk();
254 }
255 
256 
257 status_t
258 PackageFileHeapWriter::AddData(BDataReader& dataReader, off_t size,
259 	uint64& _offset)
260 {
261 	_offset = fUncompressedHeapSize;
262 
263 	// copy the data to the heap
264 	off_t readOffset = 0;
265 	off_t remainingSize = size;
266 	while (remainingSize > 0) {
267 		// read data into pending data buffer
268 		size_t toCopy = std::min(remainingSize,
269 			off_t(kChunkSize - fPendingDataSize));
270 		status_t error = dataReader.ReadData(readOffset,
271 			(uint8*)fPendingDataBuffer + fPendingDataSize, toCopy);
272 		if (error != B_OK) {
273 			fErrorOutput->PrintError("Failed to read data: %s\n",
274 				strerror(error));
275 			return error;
276 		}
277 
278 		fPendingDataSize += toCopy;
279 		fUncompressedHeapSize += toCopy;
280 		remainingSize -= toCopy;
281 		readOffset += toCopy;
282 
283 		if (fPendingDataSize == kChunkSize) {
284 			error = _FlushPendingData();
285 			if (error != B_OK)
286 				return error;
287 		}
288 	}
289 
290 	return B_OK;
291 }
292 
293 
294 void
295 PackageFileHeapWriter::AddDataThrows(const void* buffer, size_t size)
296 {
297 	BBufferDataReader reader(buffer, size);
298 	uint64 dummyOffset;
299 	status_t error = AddData(reader, size, dummyOffset);
300 	if (error != B_OK)
301 		throw status_t(error);
302 }
303 
304 
305 void
306 PackageFileHeapWriter::RemoveDataRanges(
307 	const ::BPrivate::RangeArray<uint64>& ranges)
308 {
309 	ssize_t rangeCount = ranges.CountRanges();
310 	if (rangeCount == 0)
311 		return;
312 
313 	if (fUncompressedHeapSize == 0) {
314 		fErrorOutput->PrintError("Can't remove ranges from empty heap\n");
315 		throw status_t(B_BAD_VALUE);
316 	}
317 
318 	// Before we begin flush any pending data, so we don't need any special
319 	// handling and also can use the pending data buffer.
320 	_FlushPendingData();
321 
322 	// We potentially have to recompress all data from the first affected chunk
323 	// to the end (minus the removed ranges, of course). As a basic algorithm we
324 	// can use our usual data writing strategy, i.e. read a chunk, decompress it
325 	// to a temporary buffer, and write the data to keep via AddData(). There
326 	// are a few complications/optimizations, though:
327 	// * As data moves to other chunks, it may actually compress worse than
328 	//   before. While unlikely, we still have to take care of this case by
329 	//   making sure our reading end is at least a complete uncompressed chunk
330 	//   ahead of the writing end.
331 	// * When we run into the situation that we have to move complete aligned
332 	//   chunks, we want to avoid uncompressing and recompressing them
333 	//   needlessly.
334 
335 	// Build a list of (possibly partial) chunks we want to keep.
336 
337 	// the first partial chunk (if any) and all chunks between ranges
338 	ChunkBuffer chunkBuffer(this, kChunkSize);
339 	uint64 writeOffset = ranges[0].offset - ranges[0].offset % kChunkSize;
340 	uint64 readOffset = writeOffset;
341 	for (ssize_t i = 0; i < rangeCount; i++) {
342 		const Range<uint64>& range = ranges[i];
343 		if (range.size > 0) {
344 			_PushChunks(chunkBuffer, readOffset, range.offset);
345 			readOffset = range.offset + range.size;
346 		}
347 	}
348 
349 	if (readOffset == writeOffset) {
350 		fErrorOutput->PrintError("Only empty ranges to remove from heap\n");
351 		throw status_t(B_BAD_VALUE);
352 	}
353 
354 	// all chunks after the last range
355 	_PushChunks(chunkBuffer, readOffset, fUncompressedHeapSize);
356 
357 	// Reset our state to look like all chunks from the first affected one have
358 	// been removed and re-add all data we want to keep.
359 
360 	// truncate the offsets array and reset the heap sizes
361 	ssize_t firstChunkIndex = ssize_t(writeOffset / kChunkSize);
362 	fCompressedHeapSize = fOffsets[firstChunkIndex];
363 	fUncompressedHeapSize = (uint64)firstChunkIndex * kChunkSize;
364 	fOffsets.Remove(firstChunkIndex, fOffsets.Count() - firstChunkIndex);
365 
366 	// we need a decompression buffer
367 	void* decompressionBuffer = malloc(kChunkSize);
368 	if (decompressionBuffer == NULL)
369 		throw std::bad_alloc();
370 	MemoryDeleter decompressionBufferDeleter(decompressionBuffer);
371 
372 	const Chunk* decompressedChunk = NULL;
373 
374 	while (chunkBuffer.HasMoreSegments()) {
375 		const ChunkSegment& segment = chunkBuffer.CurrentSegment();
376 
377 		// If we have an aligned, complete chunk, copy its compressed data.
378 		bool copyCompressed = fPendingDataSize == 0 && segment.toKeepOffset == 0
379 			&& segment.toKeepSize == kChunkSize;
380 
381 		// Read more chunks. We need at least one buffered one to do anything
382 		// and we want to buffer as many as necessary to ensure we don't
383 		// overwrite one we haven't buffered yet.
384 		while (chunkBuffer.HasMoreChunksToRead()
385 			&& (!chunkBuffer.HasBufferedChunk()
386 				|| (!copyCompressed
387 					&& chunkBuffer.NextReadOffset()
388 						< fCompressedHeapSize + kChunkSize))) {
389 			// read chunk
390 			chunkBuffer.ReadNextChunk();
391 		}
392 
393 		// copy compressed chunk data, if possible
394 		const Chunk& chunk = chunkBuffer.ChunkAt(segment.chunkIndex);
395 		if (copyCompressed) {
396 			status_t error = _WriteChunk(chunk.buffer, chunk.compressedSize,
397 				false);
398 			if (error != B_OK)
399 				throw error;
400 			continue;
401 		}
402 
403 		// decompress chunk, if compressed
404 		void* uncompressedData;
405 		if (chunk.uncompressedSize == chunk.compressedSize) {
406 			uncompressedData = chunk.buffer;
407 		} else if (decompressedChunk == &chunk) {
408 			uncompressedData = decompressionBuffer;
409 		} else {
410 			status_t error = DecompressChunkData(chunk.buffer,
411 				chunk.compressedSize, decompressionBuffer,
412 				chunk.uncompressedSize);
413 			if (error != B_OK)
414 				throw error;
415 
416 			decompressedChunk = &chunk;
417 			uncompressedData = decompressionBuffer;
418 		}
419 
420 		// add chunk data
421 		AddDataThrows((uint8*)uncompressedData + segment.toKeepOffset,
422 			segment.toKeepSize);
423 
424 		chunkBuffer.CurrentSegmentDone();
425 	}
426 
427 	// Make sure a last partial chunk ends up in the pending data buffer. This
428 	// is only necessary when we didn't have to move any chunk segments, since
429 	// the loop would otherwise have read it in and left it in the pending data
430 	// buffer.
431 	if (chunkBuffer.IsEmpty())
432 		_UnwriteLastPartialChunk();
433 }
434 
435 
436 status_t
437 PackageFileHeapWriter::Finish()
438 {
439 	// flush pending data, if any
440 	status_t error = _FlushPendingData();
441 	if (error != B_OK)
442 		return error;
443 
444 	// write chunk sizes table
445 
446 	// We don't need to do that, if we don't use any compression.
447 	if (fCompressionAlgorithm == NULL)
448 		return B_OK;
449 
450 	// We don't need to write the last chunk size, since it is implied by the
451 	// total size minus the sum of all other chunk sizes.
452 	ssize_t offsetCount = fOffsets.Count();
453 	if (offsetCount < 2)
454 		return B_OK;
455 
456 	// Convert the offsets to 16 bit sizes and write them. We use the (no longer
457 	// used) pending data buffer for the conversion.
458 	uint16* buffer = (uint16*)fPendingDataBuffer;
459 	for (ssize_t offsetIndex = 1; offsetIndex < offsetCount;) {
460 		ssize_t toWrite = std::min(offsetCount - offsetIndex,
461 			ssize_t(kChunkSize / 2));
462 
463 		for (ssize_t i = 0; i < toWrite; i++, offsetIndex++) {
464 			// store chunkSize - 1, so it fits 16 bit (chunks cannot be empty)
465 			buffer[i] = B_HOST_TO_BENDIAN_INT16(
466 				uint16(fOffsets[offsetIndex] - fOffsets[offsetIndex - 1] - 1));
467 		}
468 
469 		error = _WriteDataUncompressed(buffer, toWrite * 2);
470 		if (error != B_OK)
471 			return error;
472 	}
473 
474 	return B_OK;
475 }
476 
477 
478 status_t
479 PackageFileHeapWriter::ReadAndDecompressChunk(size_t chunkIndex,
480 	void* compressedDataBuffer, void* uncompressedDataBuffer)
481 {
482 	if (uint64(chunkIndex + 1) * kChunkSize > fUncompressedHeapSize) {
483 		// The chunk has not been written to disk yet. Its data are still in the
484 		// pending data buffer.
485 		memcpy(uncompressedDataBuffer, fPendingDataBuffer, fPendingDataSize);
486 		// TODO: This can be optimized. Since we write to a BDataIO anyway,
487 		// there's no need to copy the data.
488 		return B_OK;
489 	}
490 
491 	uint64 offset = fOffsets[chunkIndex];
492 	size_t compressedSize = chunkIndex + 1 == (size_t)fOffsets.Count()
493 		? fCompressedHeapSize - offset
494 		: fOffsets[chunkIndex + 1] - offset;
495 
496 	return ReadAndDecompressChunkData(offset, compressedSize, kChunkSize,
497 		compressedDataBuffer, uncompressedDataBuffer);
498 }
499 
500 
501 void
502 PackageFileHeapWriter::_Uninit()
503 {
504 	free(fPendingDataBuffer);
505 	free(fCompressedDataBuffer);
506 	fPendingDataBuffer = NULL;
507 	fCompressedDataBuffer = NULL;
508 }
509 
510 
511 status_t
512 PackageFileHeapWriter::_FlushPendingData()
513 {
514 	if (fPendingDataSize == 0)
515 		return B_OK;
516 
517 	status_t error = _WriteChunk(fPendingDataBuffer, fPendingDataSize, true);
518 	if (error == B_OK)
519 		fPendingDataSize = 0;
520 
521 	return error;
522 }
523 
524 
525 status_t
526 PackageFileHeapWriter::_WriteChunk(const void* data, size_t size,
527 	bool mayCompress)
528 {
529 	// add offset
530 	if (!fOffsets.Add(fCompressedHeapSize)) {
531 		fErrorOutput->PrintError("Out of memory!\n");
532 		return B_NO_MEMORY;
533 	}
534 
535 	// Try to use compression only for data large enough.
536 	bool compress = mayCompress && size >= (off_t)kCompressionSizeThreshold;
537 	if (compress) {
538 		status_t error = _WriteDataCompressed(data, size);
539 		if (error != B_OK) {
540 			if (error != B_BUFFER_OVERFLOW)
541 				return error;
542 			compress = false;
543 		}
544 	}
545 
546 	// Write uncompressed, if necessary.
547 	if (!compress) {
548 		status_t error = _WriteDataUncompressed(data, size);
549 		if (error != B_OK)
550 			return error;
551 	}
552 
553 	return B_OK;
554 }
555 
556 
557 status_t
558 PackageFileHeapWriter::_WriteDataCompressed(const void* data, size_t size)
559 {
560 	if (fCompressionAlgorithm == NULL)
561 		return B_BUFFER_OVERFLOW;
562 
563 	size_t compressedSize;
564 	status_t error = fCompressionAlgorithm->algorithm->CompressBuffer(data,
565 		size, fCompressedDataBuffer, size, compressedSize,
566 		fCompressionAlgorithm->parameters);
567 	if (error != B_OK) {
568 		if (error != B_BUFFER_OVERFLOW) {
569 			fErrorOutput->PrintError("Failed to compress chunk data: %s\n",
570 				strerror(error));
571 		}
572 		return error;
573 	}
574 
575 	// only use compressed data when we've actually saved space
576 	if (compressedSize == size)
577 		return B_BUFFER_OVERFLOW;
578 
579 	return _WriteDataUncompressed(fCompressedDataBuffer, compressedSize);
580 }
581 
582 
583 status_t
584 PackageFileHeapWriter::_WriteDataUncompressed(const void* data, size_t size)
585 {
586 	status_t error = fFile->WriteAtExactly(
587 		fHeapOffset + (off_t)fCompressedHeapSize, data, size);
588 	if (error != B_OK) {
589 		fErrorOutput->PrintError("Failed to write data: %s\n", strerror(error));
590 		return error;
591 	}
592 
593 	fCompressedHeapSize += size;
594 
595 	return B_OK;
596 }
597 
598 
599 void
600 PackageFileHeapWriter::_PushChunks(ChunkBuffer& chunkBuffer, uint64 startOffset,
601 	uint64 endOffset)
602 {
603 	if (endOffset > fUncompressedHeapSize) {
604 		fErrorOutput->PrintError("Invalid range to remove from heap\n");
605 		throw status_t(B_BAD_VALUE);
606 	}
607 
608 	ssize_t chunkIndex = startOffset / kChunkSize;
609 	uint64 uncompressedChunkOffset = (uint64)chunkIndex * kChunkSize;
610 
611 	while (startOffset < endOffset) {
612 		bool isLastChunk = fUncompressedHeapSize - uncompressedChunkOffset
613 			<= kChunkSize;
614 		uint32 inChunkOffset = uint32(startOffset - uncompressedChunkOffset);
615 		uint32 uncompressedChunkSize = isLastChunk
616 			? fUncompressedHeapSize - uncompressedChunkOffset
617 			: kChunkSize;
618 		uint64 compressedChunkOffset = fOffsets[chunkIndex];
619 		uint32 compressedChunkSize = isLastChunk
620 			? fCompressedHeapSize - compressedChunkOffset
621 			: fOffsets[chunkIndex + 1] - compressedChunkOffset;
622 		uint32 toKeepSize = uint32(std::min(
623 			(uint64)uncompressedChunkSize - inChunkOffset,
624 			endOffset - startOffset));
625 
626 		if (!chunkBuffer.PushChunkSegment(compressedChunkOffset,
627 				compressedChunkSize, uncompressedChunkSize, inChunkOffset,
628 				toKeepSize)) {
629 			throw std::bad_alloc();
630 		}
631 
632 		startOffset += toKeepSize;
633 		chunkIndex++;
634 		uncompressedChunkOffset += uncompressedChunkSize;
635 	}
636 }
637 
638 
639 void
640 PackageFileHeapWriter::_UnwriteLastPartialChunk()
641 {
642 	// If the last chunk is partial, read it in and remove it from the offsets.
643 	size_t lastChunkSize = fUncompressedHeapSize % kChunkSize;
644 	if (lastChunkSize != 0) {
645 		status_t error = ReadData(fUncompressedHeapSize - lastChunkSize,
646 			fPendingDataBuffer, lastChunkSize);
647 		if (error != B_OK)
648 			throw error;
649 
650 		fPendingDataSize = lastChunkSize;
651 		fCompressedHeapSize = fOffsets[fOffsets.Count() - 1];
652 		fOffsets.Remove(fOffsets.Count() - 1);
653 	}
654 }
655 
656 
657 }	// namespace BPrivate
658 
659 }	// namespace BHPKG
660 
661 }	// namespace BPackageKit
662