xref: /haiku/src/kits/package/hpkg/PackageFileHeapWriter.cpp (revision b3263ad3e1662cc419849a2d2cf539f9636613dc)
1 /*
2  * Copyright 2013-2014, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <package/hpkg/PackageFileHeapWriter.h>
8 
9 #include <errno.h>
10 
11 #include <algorithm>
12 #include <new>
13 
14 #include <ByteOrder.h>
15 #include <List.h>
16 #include <package/hpkg/ErrorOutput.h>
17 #include <package/hpkg/HPKGDefs.h>
18 
19 #include <AutoDeleter.h>
20 #include <package/hpkg/DataReader.h>
21 #include <package/hpkg/PackageFileHeapReader.h>
22 #include <RangeArray.h>
23 #include <CompressionAlgorithm.h>
24 
25 
26 // minimum length of data we require before trying to compress them
27 static const size_t kCompressionSizeThreshold = 64;
28 
29 
30 namespace BPackageKit {
31 
32 namespace BHPKG {
33 
34 namespace BPrivate {
35 
36 
37 struct PackageFileHeapWriter::Chunk {
38 	uint64	offset;
39 	uint32	compressedSize;
40 	uint32	uncompressedSize;
41 	void*	buffer;
42 };
43 
44 
45 struct PackageFileHeapWriter::ChunkSegment {
46 	ssize_t	chunkIndex;
47 	uint32	toKeepOffset;
48 	uint32	toKeepSize;
49 };
50 
51 
52 struct PackageFileHeapWriter::ChunkBuffer {
53 	ChunkBuffer(PackageFileHeapWriter* writer, size_t bufferSize)
54 		:
55 		fWriter(writer),
56 		fChunks(),
57 		fCurrentChunkIndex(0),
58 		fNextReadIndex(0),
59 		fSegments(),
60 		fCurrentSegmentIndex(0),
61 		fBuffers(),
62 		fUnusedBuffers(),
63 		fBufferSize(bufferSize)
64 	{
65 	}
66 
67 	~ChunkBuffer()
68 	{
69 		for (int32 i = 0; void* buffer = fBuffers.ItemAt(i); i++)
70 			free(buffer);
71 	}
72 
73 	bool PushChunkSegment(uint64 chunkOffset, uint32 compressedSize,
74 		uint32 uncompressedSize, uint32 toKeepOffset, uint32 toKeepSize)
75 	{
76 		ChunkSegment segment;
77 		segment.toKeepOffset = toKeepOffset;
78 		segment.toKeepSize = toKeepSize;
79 
80 		// might refer to the last chunk
81 		segment.chunkIndex = fChunks.Count() - 1;
82 
83 		if (segment.chunkIndex < 0
84 			|| fChunks.ElementAt(segment.chunkIndex).offset != chunkOffset) {
85 			// no, need to push a new chunk
86 			segment.chunkIndex++;
87 
88 			Chunk chunk;
89 			chunk.offset = chunkOffset;
90 			chunk.compressedSize = compressedSize;
91 			chunk.uncompressedSize = uncompressedSize;
92 			chunk.buffer = NULL;
93 			if (!fChunks.Add(chunk))
94 				return false;
95 		}
96 
97 		return fSegments.Add(segment);
98 	}
99 
100 	bool IsEmpty() const
101 	{
102 		return fSegments.IsEmpty();
103 	}
104 
105 	bool HasMoreSegments() const
106 	{
107 		return fCurrentSegmentIndex < fSegments.Count();
108 	}
109 
110 	const ChunkSegment& CurrentSegment() const
111 	{
112 		return fSegments[fCurrentSegmentIndex];
113 	}
114 
115 	const Chunk& ChunkAt(ssize_t index) const
116 	{
117 		return fChunks[index];
118 	}
119 
120 	bool HasMoreChunksToRead() const
121 	{
122 		return fNextReadIndex < fChunks.Count();
123 	}
124 
125 	bool HasBufferedChunk() const
126 	{
127 		return fCurrentChunkIndex < fNextReadIndex;
128 	}
129 
130 	uint64 NextReadOffset() const
131 	{
132 		return fChunks[fNextReadIndex].offset;
133 	}
134 
135 	void ReadNextChunk()
136 	{
137 		if (!HasMoreChunksToRead())
138 			throw status_t(B_BAD_VALUE);
139 
140 		Chunk& chunk = fChunks[fNextReadIndex++];
141 		chunk.buffer = _GetBuffer();
142 
143 		status_t error = fWriter->ReadFileData(chunk.offset, chunk.buffer,
144 			chunk.compressedSize);
145 		if (error != B_OK)
146 			throw error;
147 	}
148 
149 	void CurrentSegmentDone()
150 	{
151 		// Unless the next segment refers to the same chunk, advance to the next
152 		// chunk.
153 		const ChunkSegment& segment = fSegments[fCurrentSegmentIndex++];
154 		if (!HasMoreSegments()
155 			|| segment.chunkIndex != CurrentSegment().chunkIndex) {
156 			_PutBuffer(fChunks[fCurrentChunkIndex++].buffer);
157 		}
158 	}
159 
160 private:
161 	void* _GetBuffer()
162 	{
163 		if (!fUnusedBuffers.IsEmpty())
164 			return fUnusedBuffers.RemoveItem(fUnusedBuffers.CountItems() - 1);
165 
166 		void* buffer = malloc(fBufferSize);
167 		if (buffer == NULL && !fBuffers.AddItem(buffer)) {
168 			free(buffer);
169 			throw std::bad_alloc();
170 		}
171 
172 		return buffer;
173 	}
174 
175 	void _PutBuffer(void* buffer)
176 	{
177 		if (buffer != NULL && !fUnusedBuffers.AddItem(buffer)) {
178 			fBuffers.RemoveItem(buffer);
179 			free(buffer);
180 		}
181 	}
182 
183 private:
184 	PackageFileHeapWriter*	fWriter;
185 
186 	Array<Chunk>			fChunks;
187 	ssize_t					fCurrentChunkIndex;
188 	ssize_t					fNextReadIndex;
189 
190 	Array<ChunkSegment>		fSegments;
191 	ssize_t					fCurrentSegmentIndex;
192 
193 	BList					fBuffers;
194 	BList					fUnusedBuffers;
195 	size_t					fBufferSize;
196 };
197 
198 
199 PackageFileHeapWriter::PackageFileHeapWriter(BErrorOutput* errorOutput, int fd,
200 	off_t heapOffset, CompressionAlgorithmOwner* compressionAlgorithm,
201 	DecompressionAlgorithmOwner* decompressionAlgorithm)
202 	:
203 	PackageFileHeapAccessorBase(errorOutput, fd, heapOffset,
204 		decompressionAlgorithm),
205 	fPendingDataBuffer(NULL),
206 	fCompressedDataBuffer(NULL),
207 	fPendingDataSize(0),
208 	fOffsets(),
209 	fCompressionAlgorithm(compressionAlgorithm)
210 {
211 	if (fCompressionAlgorithm != NULL)
212 		fCompressionAlgorithm->AcquireReference();
213 }
214 
215 
216 PackageFileHeapWriter::~PackageFileHeapWriter()
217 {
218 	_Uninit();
219 
220 	if (fCompressionAlgorithm != NULL)
221 		fCompressionAlgorithm->ReleaseReference();
222 }
223 
224 
225 void
226 PackageFileHeapWriter::Init()
227 {
228 	// allocate data buffers
229 	fPendingDataBuffer = malloc(kChunkSize);
230 	fCompressedDataBuffer = malloc(kChunkSize);
231 	if (fPendingDataBuffer == NULL || fCompressedDataBuffer == NULL)
232 		throw std::bad_alloc();
233 }
234 
235 
236 void
237 PackageFileHeapWriter::Reinit(PackageFileHeapReader* heapReader)
238 {
239 	fHeapOffset = heapReader->HeapOffset();
240 	fCompressedHeapSize = heapReader->CompressedHeapSize();
241 	fUncompressedHeapSize = heapReader->UncompressedHeapSize();
242 	fPendingDataSize = 0;
243 
244 	// copy the offsets array
245 	size_t chunkCount = (fUncompressedHeapSize + kChunkSize - 1) / kChunkSize;
246 	if (chunkCount > 0) {
247 		if (!fOffsets.AddUninitialized(chunkCount))
248 			throw std::bad_alloc();
249 
250 		for (size_t i = 0; i < chunkCount; i++)
251 			fOffsets[i] = heapReader->Offsets()[i];
252 	}
253 
254 	_UnwriteLastPartialChunk();
255 }
256 
257 
258 status_t
259 PackageFileHeapWriter::AddData(BDataReader& dataReader, off_t size,
260 	uint64& _offset)
261 {
262 	_offset = fUncompressedHeapSize;
263 
264 	// copy the data to the heap
265 	off_t readOffset = 0;
266 	off_t remainingSize = size;
267 	while (remainingSize > 0) {
268 		// read data into pending data buffer
269 		size_t toCopy = std::min(remainingSize,
270 			off_t(kChunkSize - fPendingDataSize));
271 		status_t error = dataReader.ReadData(readOffset,
272 			(uint8*)fPendingDataBuffer + fPendingDataSize, toCopy);
273 		if (error != B_OK) {
274 			fErrorOutput->PrintError("Failed to read data: %s\n",
275 				strerror(error));
276 			return error;
277 		}
278 
279 		fPendingDataSize += toCopy;
280 		fUncompressedHeapSize += toCopy;
281 		remainingSize -= toCopy;
282 		readOffset += toCopy;
283 
284 		if (fPendingDataSize == kChunkSize) {
285 			error = _FlushPendingData();
286 			if (error != B_OK)
287 				return error;
288 		}
289 	}
290 
291 	return B_OK;
292 }
293 
294 
295 void
296 PackageFileHeapWriter::AddDataThrows(const void* buffer, size_t size)
297 {
298 	BBufferDataReader reader(buffer, size);
299 	uint64 dummyOffset;
300 	status_t error = AddData(reader, size, dummyOffset);
301 	if (error != B_OK)
302 		throw status_t(error);
303 }
304 
305 
306 void
307 PackageFileHeapWriter::RemoveDataRanges(
308 	const ::BPrivate::RangeArray<uint64>& ranges)
309 {
310 	ssize_t rangeCount = ranges.CountRanges();
311 	if (rangeCount == 0)
312 		return;
313 
314 	if (fUncompressedHeapSize == 0) {
315 		fErrorOutput->PrintError("Can't remove ranges from empty heap\n");
316 		throw status_t(B_BAD_VALUE);
317 	}
318 
319 	// Before we begin flush any pending data, so we don't need any special
320 	// handling and also can use the pending data buffer.
321 	_FlushPendingData();
322 
323 	// We potentially have to recompress all data from the first affected chunk
324 	// to the end (minus the removed ranges, of course). As a basic algorithm we
325 	// can use our usual data writing strategy, i.e. read a chunk, decompress it
326 	// to a temporary buffer, and write the data to keep via AddData(). There
327 	// are a few complications/optimizations, though:
328 	// * As data moves to other chunks, it may actually compress worse than
329 	//   before. While unlikely, we still have to take care of this case by
330 	//   making sure our reading end is at least a complete uncompressed chunk
331 	//   ahead of the writing end.
332 	// * When we run into the situation that we have to move complete aligned
333 	//   chunks, we want to avoid uncompressing and recompressing them
334 	//   needlessly.
335 
336 	// Build a list of (possibly partial) chunks we want to keep.
337 
338 	// the first partial chunk (if any) and all chunks between ranges
339 	ChunkBuffer chunkBuffer(this, kChunkSize);
340 	uint64 writeOffset = ranges[0].offset - ranges[0].offset % kChunkSize;
341 	uint64 readOffset = writeOffset;
342 	for (ssize_t i = 0; i < rangeCount; i++) {
343 		const Range<uint64>& range = ranges[i];
344 		if (range.size > 0) {
345 			_PushChunks(chunkBuffer, readOffset, range.offset);
346 			readOffset = range.offset + range.size;
347 		}
348 	}
349 
350 	if (readOffset == writeOffset) {
351 		fErrorOutput->PrintError("Only empty ranges to remove from heap\n");
352 		throw status_t(B_BAD_VALUE);
353 	}
354 
355 	// all chunks after the last range
356 	_PushChunks(chunkBuffer, readOffset, fUncompressedHeapSize);
357 
358 	// Reset our state to look like all chunks from the first affected one have
359 	// been removed and re-add all data we want to keep.
360 
361 	// truncate the offsets array and reset the heap sizes
362 	ssize_t firstChunkIndex = ssize_t(writeOffset / kChunkSize);
363 	fCompressedHeapSize = fOffsets[firstChunkIndex];
364 	fUncompressedHeapSize = (uint64)firstChunkIndex * kChunkSize;
365 	fOffsets.Remove(firstChunkIndex, fOffsets.Count() - firstChunkIndex);
366 
367 	// we need a decompression buffer
368 	void* decompressionBuffer = malloc(kChunkSize);
369 	if (decompressionBuffer == NULL)
370 		throw std::bad_alloc();
371 	MemoryDeleter decompressionBufferDeleter(decompressionBuffer);
372 
373 	const Chunk* decompressedChunk = NULL;
374 
375 	while (chunkBuffer.HasMoreSegments()) {
376 		const ChunkSegment& segment = chunkBuffer.CurrentSegment();
377 
378 		// If we have an aligned, complete chunk, copy its compressed data.
379 		bool copyCompressed = fPendingDataSize == 0 && segment.toKeepOffset == 0
380 			&& segment.toKeepSize == kChunkSize;
381 
382 		// Read more chunks. We need at least one buffered one to do anything
383 		// and we want to buffer as many as necessary to ensure we don't
384 		// overwrite one we haven't buffered yet.
385 		while (chunkBuffer.HasMoreChunksToRead()
386 			&& (!chunkBuffer.HasBufferedChunk()
387 				|| (!copyCompressed
388 					&& chunkBuffer.NextReadOffset()
389 						< fCompressedHeapSize + kChunkSize))) {
390 			// read chunk
391 			chunkBuffer.ReadNextChunk();
392 		}
393 
394 		// copy compressed chunk data, if possible
395 		const Chunk& chunk = chunkBuffer.ChunkAt(segment.chunkIndex);
396 		if (copyCompressed) {
397 			status_t error = _WriteChunk(chunk.buffer, chunk.compressedSize,
398 				false);
399 			if (error != B_OK)
400 				throw error;
401 			continue;
402 		}
403 
404 		// decompress chunk, if compressed
405 		void* uncompressedData;
406 		if (chunk.uncompressedSize == chunk.compressedSize) {
407 			uncompressedData = chunk.buffer;
408 		} else if (decompressedChunk == &chunk) {
409 			uncompressedData = decompressionBuffer;
410 		} else {
411 			status_t error = DecompressChunkData(chunk.buffer,
412 				chunk.compressedSize, decompressionBuffer,
413 				chunk.uncompressedSize);
414 			if (error != B_OK)
415 				throw error;
416 
417 			decompressedChunk = &chunk;
418 			uncompressedData = decompressionBuffer;
419 		}
420 
421 		// add chunk data
422 		AddDataThrows((uint8*)uncompressedData + segment.toKeepOffset,
423 			segment.toKeepSize);
424 
425 		chunkBuffer.CurrentSegmentDone();
426 	}
427 
428 	// Make sure a last partial chunk ends up in the pending data buffer. This
429 	// is only necessary when we didn't have to move any chunk segments, since
430 	// the loop would otherwise have read it in and left it in the pending data
431 	// buffer.
432 	if (chunkBuffer.IsEmpty())
433 		_UnwriteLastPartialChunk();
434 }
435 
436 
437 status_t
438 PackageFileHeapWriter::Finish()
439 {
440 	// flush pending data, if any
441 	status_t error = _FlushPendingData();
442 	if (error != B_OK)
443 		return error;
444 
445 	// write chunk sizes table
446 
447 	// We don't need to write the last chunk size, since it is implied by the
448 	// total size minus the sum of all other chunk sizes.
449 	ssize_t offsetCount = fOffsets.Count();
450 	if (offsetCount < 2)
451 		return B_OK;
452 
453 	// Convert the offsets to 16 bit sizes and write them. We use the (no longer
454 	// used) pending data buffer for the conversion.
455 	uint16* buffer = (uint16*)fPendingDataBuffer;
456 	for (ssize_t offsetIndex = 1; offsetIndex < offsetCount;) {
457 		ssize_t toWrite = std::min(offsetCount - offsetIndex,
458 			ssize_t(kChunkSize / 2));
459 
460 		for (ssize_t i = 0; i < toWrite; i++, offsetIndex++) {
461 			// store chunkSize - 1, so it fits 16 bit (chunks cannot be empty)
462 			buffer[i] = B_HOST_TO_BENDIAN_INT16(
463 				uint16(fOffsets[offsetIndex] - fOffsets[offsetIndex - 1] - 1));
464 		}
465 
466 		error = _WriteDataUncompressed(buffer, toWrite * 2);
467 		if (error != B_OK)
468 			return error;
469 	}
470 
471 	return B_OK;
472 }
473 
474 
475 status_t
476 PackageFileHeapWriter::ReadAndDecompressChunk(size_t chunkIndex,
477 	void* compressedDataBuffer, void* uncompressedDataBuffer)
478 {
479 	if (uint64(chunkIndex + 1) * kChunkSize > fUncompressedHeapSize) {
480 		// The chunk has not been written to disk yet. Its data are still in the
481 		// pending data buffer.
482 		memcpy(uncompressedDataBuffer, fPendingDataBuffer, fPendingDataSize);
483 		// TODO: This can be optimized. Since we write to a BDataIO anyway,
484 		// there's no need to copy the data.
485 		return B_OK;
486 	}
487 
488 	uint64 offset = fOffsets[chunkIndex];
489 	size_t compressedSize = chunkIndex + 1 == (size_t)fOffsets.Count()
490 		? fCompressedHeapSize - offset
491 		: fOffsets[chunkIndex + 1] - offset;
492 
493 	return ReadAndDecompressChunkData(offset, compressedSize, kChunkSize,
494 		compressedDataBuffer, uncompressedDataBuffer);
495 }
496 
497 
498 void
499 PackageFileHeapWriter::_Uninit()
500 {
501 	free(fPendingDataBuffer);
502 	free(fCompressedDataBuffer);
503 	fPendingDataBuffer = NULL;
504 	fCompressedDataBuffer = NULL;
505 }
506 
507 
508 status_t
509 PackageFileHeapWriter::_FlushPendingData()
510 {
511 	if (fPendingDataSize == 0)
512 		return B_OK;
513 
514 	status_t error = _WriteChunk(fPendingDataBuffer, fPendingDataSize, true);
515 	if (error == B_OK)
516 		fPendingDataSize = 0;
517 
518 	return error;
519 }
520 
521 
522 status_t
523 PackageFileHeapWriter::_WriteChunk(const void* data, size_t size,
524 	bool mayCompress)
525 {
526 	// add offset
527 	if (!fOffsets.Add(fCompressedHeapSize)) {
528 		fErrorOutput->PrintError("Out of memory!\n");
529 		return B_NO_MEMORY;
530 	}
531 
532 	// Try to use compression only for data large enough.
533 	bool compress = mayCompress && size >= (off_t)kCompressionSizeThreshold;
534 	if (compress) {
535 		status_t error = _WriteDataCompressed(data, size);
536 		if (error != B_OK) {
537 			if (error != B_BUFFER_OVERFLOW)
538 				return error;
539 			compress = false;
540 		}
541 	}
542 
543 	// Write uncompressed, if necessary.
544 	if (!compress) {
545 		status_t error = _WriteDataUncompressed(data, size);
546 		if (error != B_OK)
547 			return error;
548 	}
549 
550 	return B_OK;
551 }
552 
553 
554 status_t
555 PackageFileHeapWriter::_WriteDataCompressed(const void* data, size_t size)
556 {
557 	if (fCompressionAlgorithm == NULL)
558 		return B_BUFFER_OVERFLOW;
559 
560 	size_t compressedSize;
561 	status_t error = fCompressionAlgorithm->algorithm->CompressBuffer(data,
562 		size, fCompressedDataBuffer, size, compressedSize,
563 		fCompressionAlgorithm->parameters);
564 	if (error != B_OK) {
565 		if (error != B_BUFFER_OVERFLOW) {
566 			fErrorOutput->PrintError("Failed to compress chunk data: %s\n",
567 				strerror(error));
568 		}
569 		return error;
570 	}
571 
572 	// only use compressed data when we've actually saved space
573 	if (compressedSize == size)
574 		return B_BUFFER_OVERFLOW;
575 
576 	return _WriteDataUncompressed(fCompressedDataBuffer, compressedSize);
577 }
578 
579 
580 status_t
581 PackageFileHeapWriter::_WriteDataUncompressed(const void* data, size_t size)
582 {
583 	ssize_t bytesWritten = pwrite(fFD, data, size,
584 		fHeapOffset + (off_t)fCompressedHeapSize);
585 	if (bytesWritten < 0) {
586 		fErrorOutput->PrintError("Failed to write data: %s\n", strerror(errno));
587 		return errno;
588 	}
589 	if ((size_t)bytesWritten != size) {
590 		fErrorOutput->PrintError("Failed to write all data\n");
591 		return B_ERROR;
592 	}
593 
594 	fCompressedHeapSize += bytesWritten;
595 
596 	return B_OK;
597 }
598 
599 
600 void
601 PackageFileHeapWriter::_PushChunks(ChunkBuffer& chunkBuffer, uint64 startOffset,
602 	uint64 endOffset)
603 {
604 	if (endOffset > fUncompressedHeapSize) {
605 		fErrorOutput->PrintError("Invalid range to remove from heap\n");
606 		throw status_t(B_BAD_VALUE);
607 	}
608 
609 	ssize_t chunkIndex = startOffset / kChunkSize;
610 	uint64 uncompressedChunkOffset = (uint64)chunkIndex * kChunkSize;
611 
612 	while (startOffset < endOffset) {
613 		bool isLastChunk = fUncompressedHeapSize - uncompressedChunkOffset
614 			<= kChunkSize;
615 		uint32 inChunkOffset = uint32(startOffset - uncompressedChunkOffset);
616 		uint32 uncompressedChunkSize = isLastChunk
617 			? fUncompressedHeapSize - uncompressedChunkOffset
618 			: kChunkSize;
619 		uint64 compressedChunkOffset = fOffsets[chunkIndex];
620 		uint32 compressedChunkSize = isLastChunk
621 			? fCompressedHeapSize - compressedChunkOffset
622 			: fOffsets[chunkIndex + 1] - compressedChunkOffset;
623 		uint32 toKeepSize = uint32(std::min(
624 			(uint64)uncompressedChunkSize - inChunkOffset,
625 			endOffset - startOffset));
626 
627 		if (!chunkBuffer.PushChunkSegment(compressedChunkOffset,
628 				compressedChunkSize, uncompressedChunkSize, inChunkOffset,
629 				toKeepSize)) {
630 			throw std::bad_alloc();
631 		}
632 
633 		startOffset += toKeepSize;
634 		chunkIndex++;
635 		uncompressedChunkOffset += uncompressedChunkSize;
636 	}
637 }
638 
639 
640 void
641 PackageFileHeapWriter::_UnwriteLastPartialChunk()
642 {
643 	// If the last chunk is partial, read it in and remove it from the offsets.
644 	size_t lastChunkSize = fUncompressedHeapSize % kChunkSize;
645 	if (lastChunkSize != 0) {
646 		status_t error = ReadData(fUncompressedHeapSize - lastChunkSize,
647 			fPendingDataBuffer, lastChunkSize);
648 		if (error != B_OK)
649 			throw error;
650 
651 		fPendingDataSize = lastChunkSize;
652 		fCompressedHeapSize = fOffsets[fOffsets.Count() - 1];
653 		fOffsets.Remove(fOffsets.Count() - 1);
654 	}
655 }
656 
657 
658 }	// namespace BPrivate
659 
660 }	// namespace BHPKG
661 
662 }	// namespace BPackageKit
663