xref: /haiku/src/add-ons/kernel/file_systems/bfs/BlockAllocator.cpp (revision 87a66be5501b1025a680290b7425bfddee8a50fe)
1 /*
2  * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //! Block bitmap handling and allocation policies
8 
9 
10 #include "BlockAllocator.h"
11 
12 #include "Debug.h"
13 #include "Inode.h"
14 #include "Volume.h"
15 
16 
17 // Things the BlockAllocator should do:
18 
19 // - find a range of blocks of a certain size nearby a specific position
20 // - allocating an unsharp range of blocks for pre-allocation
21 // - free blocks
22 // - know how to deal with each allocation, special handling for directories,
23 //   files, symlinks, etc. (type sensitive allocation policies)
24 
25 // What makes the code complicated is the fact that we are not just reading
26 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition
27 // with a block size of 2048 bytes already has a 800kB bitmap, and the size
28 // of partitions will grow even more - so that's not an option.
29 // Instead we are reading in every block when it's used - since an allocation
30 // group can span several blocks in the block bitmap, the AllocationBlock
31 // class is there to make handling those easier.
32 
33 // The current implementation is only slightly optimized and could probably
34 // be improved a lot. Furthermore, the allocation policies used here should
35 // have some real world tests.
36 
37 #if BFS_TRACING && !defined(FS_SHELL)
38 namespace BFSBlockTracing {
39 
40 
41 class Allocate : public AbstractTraceEntry {
42 public:
Allocate(block_run run)43 	Allocate(block_run run)
44 		:
45 		fRun(run)
46 	{
47 		Initialized();
48 	}
49 
AddDump(TraceOutput & out)50 	virtual void AddDump(TraceOutput& out)
51 	{
52 		out.Print("bfs:alloc %" B_PRId32 ".%" B_PRIu16 ".%" B_PRIu16,
53 			fRun.AllocationGroup(), fRun.Start(), fRun.Length());
54 	}
55 
Run() const56 	const block_run& Run() const { return fRun; }
57 
58 private:
59 			block_run			fRun;
60 };
61 
62 
63 class Free : public AbstractTraceEntry {
64 public:
Free(block_run run)65 	Free(block_run run)
66 		:
67 		fRun(run)
68 	{
69 		Initialized();
70 	}
71 
AddDump(TraceOutput & out)72 	virtual void AddDump(TraceOutput& out)
73 	{
74 		out.Print("bfs:free %" B_PRId32 ".%" B_PRIu16 ".%" B_PRIu16,
75 			fRun.AllocationGroup(), fRun.Start(), fRun.Length());
76 	}
77 
Run() const78 	const block_run& Run() const { return fRun; }
79 
80 private:
81 			block_run			fRun;
82 };
83 
84 
85 static uint32
checksum(const uint8 * data,size_t size)86 checksum(const uint8* data, size_t size)
87 {
88 	const uint32* data4 = (const uint32*)data;
89 	uint32 sum = 0;
90 	while (size >= 4) {
91 		sum += *data4;
92 		data4++;
93 		size -= 4;
94 	}
95 	return sum;
96 }
97 
98 
99 class Block : public AbstractTraceEntry {
100 public:
Block(const char * label,off_t blockNumber,const uint8 * data,size_t size,uint32 start=0,uint32 length=0)101 	Block(const char* label, off_t blockNumber, const uint8* data,
102 			size_t size, uint32 start = 0, uint32 length = 0)
103 		:
104 		fBlock(blockNumber),
105 		fData(data),
106 		fStart(start),
107 		fLength(length),
108 		fLabel(label)
109 	{
110 		fSum = checksum(data, size);
111 		Initialized();
112 	}
113 
AddDump(TraceOutput & out)114 	virtual void AddDump(TraceOutput& out)
115 	{
116 		out.Print("bfs:%s: block %lld (%p), sum %lu, s/l %lu/%lu", fLabel,
117 			fBlock, fData, fSum, fStart, fLength);
118 	}
119 
120 private:
121 			off_t				fBlock;
122 			const uint8*		fData;
123 			uint32				fStart;
124 			uint32				fLength;
125 			uint32				fSum;
126 			const char*			fLabel;
127 };
128 
129 
130 class BlockChange : public AbstractTraceEntry {
131 public:
BlockChange(const char * label,int32 block,uint32 oldData,uint32 newData)132 	BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData)
133 		:
134 		fBlock(block),
135 		fOldData(oldData),
136 		fNewData(newData),
137 		fLabel(label)
138 	{
139 		Initialized();
140 	}
141 
AddDump(TraceOutput & out)142 	virtual void AddDump(TraceOutput& out)
143 	{
144 		out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel,
145 			fBlock, fOldData, fNewData);
146 	}
147 
148 private:
149 			int32				fBlock;
150 			uint32				fOldData;
151 			uint32				fNewData;
152 			const char*			fLabel;
153 };
154 
155 
156 }	// namespace BFSBlockTracing
157 
158 #	define T(x) new(std::nothrow) BFSBlockTracing::x;
159 #else
160 #	define T(x) ;
161 #endif
162 
163 #ifdef DEBUG_ALLOCATION_GROUPS
164 #	define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group)
165 #else
166 #	define CHECK_ALLOCATION_GROUP(group) ;
167 #endif
168 
169 
170 class AllocationBlock : public CachedBlock {
171 public:
172 	AllocationBlock(Volume* volume);
173 
174 	inline void Allocate(uint16 start, uint16 numBlocks);
175 	inline void Free(uint16 start, uint16 numBlocks);
176 	inline bool IsUsed(uint16 block);
177 	inline uint32 NextFree(uint16 startBlock);
178 
179 	status_t SetTo(AllocationGroup& group, uint16 block);
180 	status_t SetToWritable(Transaction& transaction, AllocationGroup& group,
181 		uint16 block);
182 
NumBlockBits() const183 	uint32 NumBlockBits() const { return fNumBits; }
Chunk(int32 index)184 	uint32& Chunk(int32 index) { return ((uint32*)fBlock)[index]; }
Block() const185 	uint8* Block() const { return (uint8*)fBlock; }
186 
187 private:
188 	uint32	fNumBits;
189 #ifdef DEBUG
190 	bool	fWritable;
191 #endif
192 };
193 
194 
195 class AllocationGroup {
196 public:
197 	AllocationGroup();
198 
199 	void AddFreeRange(int32 start, int32 blocks);
IsFull() const200 	bool IsFull() const { return fFreeBits == 0; }
201 
202 	status_t Allocate(Transaction& transaction, uint16 start, int32 length);
203 	status_t Free(Transaction& transaction, uint16 start, int32 length);
204 
NumBits() const205 	uint32 NumBits() const { return fNumBits; }
NumBitmapBlocks() const206 	uint32 NumBitmapBlocks() const { return fNumBitmapBlocks; }
Start() const207 	int32 Start() const { return fStart; }
208 
209 private:
210 	friend class BlockAllocator;
211 
212 	uint32	fNumBits;
213 	uint32	fNumBitmapBlocks;
214 	int32	fStart;
215 	int32	fFirstFree;
216 	int32	fFreeBits;
217 
218 	int32	fLargestStart;
219 	int32	fLargestLength;
220 	bool	fLargestValid;
221 };
222 
223 
AllocationBlock(Volume * volume)224 AllocationBlock::AllocationBlock(Volume* volume)
225 	: CachedBlock(volume)
226 {
227 }
228 
229 
230 status_t
SetTo(AllocationGroup & group,uint16 block)231 AllocationBlock::SetTo(AllocationGroup& group, uint16 block)
232 {
233 	// 8 blocks per byte
234 	fNumBits = fVolume->BlockSize() << 3;
235 	// the last group may have less bits than the others
236 	if ((block + 1) * fNumBits > group.NumBits())
237 		fNumBits = group.NumBits() % fNumBits;
238 
239 #ifdef DEBUG
240 	fWritable = false;
241 #endif
242 	return CachedBlock::SetTo(group.Start() + block);
243 }
244 
245 
246 status_t
SetToWritable(Transaction & transaction,AllocationGroup & group,uint16 block)247 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group,
248 	uint16 block)
249 {
250 	// 8 blocks per byte
251 	fNumBits = fVolume->BlockSize() << 3;
252 	// the last group may have less bits in the last block
253 	if ((block + 1) * fNumBits > group.NumBits())
254 		fNumBits = group.NumBits() % fNumBits;
255 
256 #ifdef DEBUG
257 	fWritable = true;
258 #endif
259 	return CachedBlock::SetToWritable(transaction, group.Start() + block);
260 }
261 
262 
263 bool
IsUsed(uint16 block)264 AllocationBlock::IsUsed(uint16 block)
265 {
266 	if (block > fNumBits)
267 		return true;
268 
269 	// the block bitmap is accessed in 32-bit chunks
270 	return Chunk(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32));
271 }
272 
273 
274 uint32
NextFree(uint16 startBlock)275 AllocationBlock::NextFree(uint16 startBlock)
276 {
277 	// Set all bits below the start block in the current chunk, to ignore them.
278 	uint32 ignoreNext = (1UL << (startBlock % 32)) - 1;
279 
280 	for (uint32 offset = ROUNDDOWN(startBlock, 32);
281 			offset < fNumBits; offset += 32) {
282 		uint32 chunk = Chunk(offset >> 5);
283 		chunk |= ignoreNext;
284 		ignoreNext = 0;
285 
286 		if (chunk == UINT32_MAX)
287 			continue;
288 		uint32 result = offset + ffs(~chunk) - 1;
289 		if (result >= fNumBits)
290 			break;
291 		return result;
292 	}
293 
294 	return fNumBits;
295 }
296 
297 
298 void
Allocate(uint16 start,uint16 numBlocks)299 AllocationBlock::Allocate(uint16 start, uint16 numBlocks)
300 {
301 	ASSERT(start < fNumBits);
302 	ASSERT(uint32(start + numBlocks) <= fNumBits);
303 #ifdef DEBUG
304 	ASSERT(fWritable);
305 #endif
306 
307 	T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(),
308 		start, numBlocks));
309 
310 	int32 block = start >> 5;
311 
312 	while (numBlocks > 0) {
313 		uint32 mask = 0;
314 		for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--)
315 			mask |= 1UL << i;
316 
317 		T(BlockChange("b-alloc", block, Chunk(block),
318 			Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask)));
319 
320 #if KDEBUG
321 		// check for already set blocks
322 		if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) {
323 			FATAL(("AllocationBlock::Allocate(): some blocks are already "
324 				"allocated, start = %u, numBlocks = %u\n", start, numBlocks));
325 			panic("blocks already set!");
326 		}
327 #endif
328 
329 		Chunk(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask);
330 		start = 0;
331 	}
332 	T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(),
333 		start, numBlocks));
334 }
335 
336 
337 void
Free(uint16 start,uint16 numBlocks)338 AllocationBlock::Free(uint16 start, uint16 numBlocks)
339 {
340 	ASSERT(start < fNumBits);
341 	ASSERT(uint32(start + numBlocks) <= fNumBits);
342 #ifdef DEBUG
343 	ASSERT(fWritable);
344 #endif
345 
346 	int32 block = start >> 5;
347 
348 	while (numBlocks > 0) {
349 		uint32 mask = 0;
350 		for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--)
351 			mask |= 1UL << (i % 32);
352 
353 		T(BlockChange("b-free", block, Chunk(block),
354 			Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask)));
355 
356 		Chunk(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask);
357 		start = 0;
358 	}
359 }
360 
361 
362 //	#pragma mark -
363 
364 
365 /*!	The allocation groups are created and initialized in
366 	BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap()
367 	respectively.
368 */
AllocationGroup()369 AllocationGroup::AllocationGroup()
370 	:
371 	fFirstFree(-1),
372 	fFreeBits(0),
373 	fLargestValid(false)
374 {
375 }
376 
377 
378 void
AddFreeRange(int32 start,int32 blocks)379 AllocationGroup::AddFreeRange(int32 start, int32 blocks)
380 {
381 	//D(if (blocks > 512)
382 	//	PRINT(("range of %ld blocks starting at %ld\n",blocks,start)));
383 
384 	if (fFirstFree == -1)
385 		fFirstFree = start;
386 
387 	if (!fLargestValid || fLargestLength < blocks) {
388 		fLargestStart = start;
389 		fLargestLength = blocks;
390 		fLargestValid = true;
391 	}
392 
393 	fFreeBits += blocks;
394 }
395 
396 
397 /*!	Allocates the specified run in the allocation group.
398 	Doesn't check if the run is valid or already allocated partially, nor
399 	does it maintain the free ranges hints or the volume's used blocks count.
400 	It only does the low-level work of allocating some bits in the block bitmap.
401 	Assumes that the block bitmap lock is hold.
402 */
403 status_t
Allocate(Transaction & transaction,uint16 start,int32 length)404 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length)
405 {
406 	ASSERT(start + length <= (int32)fNumBits);
407 
408 	// Update the allocation group info
409 	// TODO: this info will be incorrect if something goes wrong later
410 	// Note, the fFirstFree block doesn't have to be really free
411 	if (start == fFirstFree)
412 		fFirstFree = start + length;
413 	fFreeBits -= length;
414 
415 	if (fLargestValid) {
416 		bool cut = false;
417 		if (fLargestStart == start) {
418 			// cut from start
419 			fLargestStart += length;
420 			fLargestLength -= length;
421 			cut = true;
422 		} else if (start > fLargestStart
423 			&& start < fLargestStart + fLargestLength) {
424 			// cut from end
425 			fLargestLength = start - fLargestStart;
426 			cut = true;
427 		}
428 		if (cut && (fLargestLength < fLargestStart
429 				|| fLargestLength
430 						< (int32)fNumBits - (fLargestStart + fLargestLength))) {
431 			// might not be the largest block anymore
432 			fLargestValid = false;
433 		}
434 	}
435 
436 	Volume* volume = transaction.GetVolume();
437 
438 	// calculate block in the block bitmap and position within
439 	uint32 bitsPerBlock = volume->BlockSize() << 3;
440 	uint32 block = start / bitsPerBlock;
441 	start = start % bitsPerBlock;
442 
443 	AllocationBlock cached(volume);
444 
445 	while (length > 0) {
446 		if (cached.SetToWritable(transaction, *this, block) < B_OK) {
447 			fLargestValid = false;
448 			RETURN_ERROR(B_IO_ERROR);
449 		}
450 
451 		uint32 numBlocks = length;
452 		if (start + numBlocks > cached.NumBlockBits())
453 			numBlocks = cached.NumBlockBits() - start;
454 
455 		cached.Allocate(start, numBlocks);
456 
457 		length -= numBlocks;
458 		start = 0;
459 		block++;
460 	}
461 
462 	return B_OK;
463 }
464 
465 
466 /*!	Frees the specified run in the allocation group.
467 	Doesn't check if the run is valid or was not completely allocated, nor
468 	does it maintain the free ranges hints or the volume's used blocks count.
469 	It only does the low-level work of freeing some bits in the block bitmap.
470 	Assumes that the block bitmap lock is hold.
471 */
472 status_t
Free(Transaction & transaction,uint16 start,int32 length)473 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length)
474 {
475 	ASSERT(start + length <= (int32)fNumBits);
476 
477 	// Update the allocation group info
478 	// TODO: this info will be incorrect if something goes wrong later
479 	if (fFirstFree > start)
480 		fFirstFree = start;
481 	fFreeBits += length;
482 
483 	// The range to be freed cannot be part of the valid largest range
484 	ASSERT(!fLargestValid || start + length <= fLargestStart
485 		|| start > fLargestStart);
486 
487 	if (fLargestValid
488 		&& (start + length == fLargestStart
489 			|| fLargestStart + fLargestLength == start
490 			|| (start < fLargestStart && fLargestStart > fLargestLength)
491 			|| (start > fLargestStart
492 				&& (int32)fNumBits - (fLargestStart + fLargestLength)
493 						> fLargestLength))) {
494 		fLargestValid = false;
495 	}
496 
497 	Volume* volume = transaction.GetVolume();
498 
499 	// calculate block in the block bitmap and position within
500 	uint32 bitsPerBlock = volume->BlockSize() << 3;
501 	uint32 block = start / bitsPerBlock;
502 	start = start % bitsPerBlock;
503 
504 	AllocationBlock cached(volume);
505 
506 	while (length > 0) {
507 		if (cached.SetToWritable(transaction, *this, block) < B_OK)
508 			RETURN_ERROR(B_IO_ERROR);
509 
510 		T(Block("free-1", block, cached.Block(), volume->BlockSize()));
511 		uint16 freeLength = length;
512 		if (uint32(start + length) > cached.NumBlockBits())
513 			freeLength = cached.NumBlockBits() - start;
514 
515 		cached.Free(start, freeLength);
516 
517 		length -= freeLength;
518 		start = 0;
519 		T(Block("free-2", block, cached.Block(), volume->BlockSize()));
520 		block++;
521 	}
522 	return B_OK;
523 }
524 
525 
526 //	#pragma mark -
527 
528 
BlockAllocator(Volume * volume)529 BlockAllocator::BlockAllocator(Volume* volume)
530 	:
531 	fVolume(volume),
532 	fGroups(NULL)
533 	//fCheckBitmap(NULL),
534 	//fCheckCookie(NULL)
535 {
536 	recursive_lock_init(&fLock, "bfs allocator");
537 }
538 
539 
~BlockAllocator()540 BlockAllocator::~BlockAllocator()
541 {
542 	recursive_lock_destroy(&fLock);
543 	delete[] fGroups;
544 }
545 
546 
547 status_t
Initialize(bool full)548 BlockAllocator::Initialize(bool full)
549 {
550 	fNumGroups = fVolume->AllocationGroups();
551 	fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup();
552 	fNumBitmapBlocks = fVolume->NumBitmapBlocks();
553 
554 	fGroups = new(std::nothrow) AllocationGroup[fNumGroups];
555 	if (fGroups == NULL)
556 		return B_NO_MEMORY;
557 
558 	if (!full)
559 		return B_OK;
560 
561 	recursive_lock_lock(&fLock);
562 		// the lock will be released by the _Initialize() method
563 
564 	thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize,
565 		"bfs block allocator", B_LOW_PRIORITY, this);
566 	if (id < B_OK)
567 		return _Initialize(this);
568 
569 	recursive_lock_transfer_lock(&fLock, id);
570 
571 	return resume_thread(id);
572 }
573 
574 
575 status_t
InitializeAndClearBitmap(Transaction & transaction)576 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction)
577 {
578 	status_t status = Initialize(false);
579 	if (status != B_OK)
580 		return status;
581 
582 	uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize();
583 	uint32 blockShift = fVolume->BlockShift();
584 
585 	uint32* buffer = (uint32*)malloc(numBits >> 3);
586 	if (buffer == NULL)
587 		RETURN_ERROR(B_NO_MEMORY);
588 
589 	memset(buffer, 0, numBits >> 3);
590 
591 	off_t offset = 1;
592 		// the bitmap starts directly after the superblock
593 
594 	// initialize the AllocationGroup objects and clear the on-disk bitmap
595 
596 	for (int32 i = 0; i < fNumGroups; i++) {
597 		if (write_pos(fVolume->Device(), offset << blockShift, buffer,
598 				fBlocksPerGroup << blockShift) < B_OK) {
599 			free(buffer);
600 			return B_ERROR;
601 		}
602 
603 		// the last allocation group may contain less blocks than the others
604 		if (i == fNumGroups - 1) {
605 			fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits;
606 			fGroups[i].fNumBitmapBlocks = 1 + ((fGroups[i].NumBits() - 1)
607 				>> (blockShift + 3));
608 		} else {
609 			fGroups[i].fNumBits = numBits;
610 			fGroups[i].fNumBitmapBlocks = fBlocksPerGroup;
611 		}
612 		fGroups[i].fStart = offset;
613 		fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0;
614 		fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits;
615 		fGroups[i].fLargestValid = true;
616 
617 		offset += fBlocksPerGroup;
618 	}
619 	free(buffer);
620 
621 	// reserve the boot block, the log area, and the block bitmap itself
622 	uint32 reservedBlocks = fVolume->ToBlock(fVolume->Log()) + fVolume->Log().Length();
623 	uint32 blocksToReserve = reservedBlocks;
624 	for (int32 i = 0; i < fNumGroups; i++) {
625 		int32 reservedBlocksInGroup = min_c(blocksToReserve, numBits);
626 		if (fGroups[i].Allocate(transaction, 0, reservedBlocksInGroup) < B_OK) {
627 			FATAL(("could not allocate reserved space for block bitmap/log!\n"));
628 			return B_ERROR;
629 		}
630 		blocksToReserve -= reservedBlocksInGroup;
631 		if (blocksToReserve == 0)
632 			break;
633 	}
634 	fVolume->SuperBlock().used_blocks
635 		= HOST_ENDIAN_TO_BFS_INT64(reservedBlocks);
636 
637 	return B_OK;
638 }
639 
640 
641 status_t
_Initialize(BlockAllocator * allocator)642 BlockAllocator::_Initialize(BlockAllocator* allocator)
643 {
644 	// The lock must already be held at this point
645 	RecursiveLocker locker(allocator->fLock, true);
646 
647 	Volume* volume = allocator->fVolume;
648 	uint32 blocks = allocator->fBlocksPerGroup;
649 	uint32 blockShift = volume->BlockShift();
650 	off_t freeBlocks = 0;
651 
652 	uint32* buffer = (uint32*)malloc(blocks << blockShift);
653 	if (buffer == NULL)
654 		RETURN_ERROR(B_NO_MEMORY);
655 
656 	AllocationGroup* groups = allocator->fGroups;
657 	off_t offset = 1;
658 	uint32 bitsPerGroup = 8 * (blocks << blockShift);
659 	int32 numGroups = allocator->fNumGroups;
660 
661 	for (int32 i = 0; i < numGroups; i++) {
662 		if (read_pos(volume->Device(), offset << blockShift, buffer,
663 				blocks << blockShift) < B_OK)
664 			break;
665 
666 		// the last allocation group may contain less blocks than the others
667 		if (i == numGroups - 1) {
668 			groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup;
669 			groups[i].fNumBitmapBlocks = 1 + ((groups[i].NumBits() - 1)
670 				>> (blockShift + 3));
671 		} else {
672 			groups[i].fNumBits = bitsPerGroup;
673 			groups[i].fNumBitmapBlocks = blocks;
674 		}
675 		groups[i].fStart = offset;
676 
677 		// finds all free ranges in this allocation group
678 		int32 start = -1, range = 0;
679 		int32 numBits = groups[i].fNumBits, bit = 0;
680 		int32 count = (numBits + 31) / 32;
681 
682 		for (int32 k = 0; k < count; k++) {
683 			for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) {
684 				if (buffer[k] & (1UL << j)) {
685 					// block is in use
686 					if (range > 0) {
687 						groups[i].AddFreeRange(start, range);
688 						range = 0;
689 					}
690 				} else if (range++ == 0) {
691 					// block is free, start new free range
692 					start = bit;
693 				}
694 			}
695 		}
696 		if (range)
697 			groups[i].AddFreeRange(start, range);
698 
699 		freeBlocks += groups[i].fFreeBits;
700 
701 		offset += blocks;
702 	}
703 	free(buffer);
704 
705 	// check if block bitmap and log area are reserved
706 	uint32 reservedBlocks = volume->ToBlock(volume->Log()) + volume->Log().Length();
707 
708 	if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) {
709 		if (volume->IsReadOnly()) {
710 			FATAL(("Space for block bitmap or log area is not reserved "
711 				"(volume is mounted read-only)!\n"));
712 		} else {
713 			Transaction transaction(volume, 0);
714 			if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) {
715 				FATAL(("Could not allocate reserved space for block "
716 					"bitmap/log!\n"));
717 				volume->Panic();
718 			} else {
719 				transaction.Done();
720 				FATAL(("Space for block bitmap or log area was not "
721 					"reserved!\n"));
722 			}
723 		}
724 	}
725 
726 	off_t usedBlocks = volume->NumBlocks() - freeBlocks;
727 	if (volume->UsedBlocks() != usedBlocks) {
728 		// If the disk in a dirty state at mount time, it's
729 		// normal that the values don't match
730 		INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %"
731 			B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks));
732 		volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks);
733 	}
734 
735 	return B_OK;
736 }
737 
738 
739 void
Uninitialize()740 BlockAllocator::Uninitialize()
741 {
742 	// We only have to make sure that the initializer thread isn't running
743 	// anymore.
744 	recursive_lock_lock(&fLock);
745 }
746 
747 
748 /*!	Tries to allocate between \a minimum, and \a maximum blocks starting
749 	at group \a groupIndex with offset \a start. The resulting allocation
750 	is put into \a run.
751 
752 	The number of allocated blocks is always a multiple of \a minimum which
753 	has to be a power of two value.
754 */
755 status_t
AllocateBlocks(Transaction & transaction,int32 groupIndex,uint16 start,uint16 maximum,uint16 minimum,block_run & run)756 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex,
757 	uint16 start, uint16 maximum, uint16 minimum, block_run& run)
758 {
759 	if (maximum == 0)
760 		return B_BAD_VALUE;
761 
762 	FUNCTION_START(("group = %" B_PRId32 ", start = %" B_PRIu16
763 		", maximum = %" B_PRIu16 ", minimum = %" B_PRIu16 "\n",
764 		groupIndex, start, maximum, minimum));
765 
766 	AllocationBlock cached(fVolume);
767 	RecursiveLocker lock(fLock);
768 
769 	uint32 bitsPerFullBlock = fVolume->BlockSize() << 3;
770 
771 	// Find the block_run that can fulfill the request best
772 	int32 bestGroup = -1;
773 	int32 bestStart = -1;
774 	int32 bestLength = -1;
775 
776 	for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) {
777 		groupIndex = groupIndex % fNumGroups;
778 		AllocationGroup& group = fGroups[groupIndex];
779 
780 		CHECK_ALLOCATION_GROUP(groupIndex);
781 
782 		if (start >= group.NumBits() || group.IsFull())
783 			continue;
784 
785 		// The wanted maximum is smaller than the largest free block in the
786 		// group or already smaller than the minimum
787 
788 		if (start < group.fFirstFree)
789 			start = group.fFirstFree;
790 
791 		if (group.fLargestValid) {
792 			if (group.fLargestLength < bestLength)
793 				continue;
794 
795 			if (group.fLargestStart >= start) {
796 				if (group.fLargestLength >= bestLength) {
797 					bestGroup = groupIndex;
798 					bestStart = group.fLargestStart;
799 					bestLength = group.fLargestLength;
800 
801 					if (bestLength >= maximum)
802 						break;
803 				}
804 
805 				// We know everything about this group we have to, let's skip
806 				// to the next
807 				continue;
808 			}
809 		}
810 
811 		// There may be more than one block per allocation group - and
812 		// we iterate through it to find a place for the allocation.
813 		// (one allocation can't exceed one allocation group)
814 
815 		uint32 block = start / (fVolume->BlockSize() << 3);
816 		int32 currentStart = 0, currentLength = 0;
817 		int32 groupLargestStart = -1;
818 		int32 groupLargestLength = -1;
819 		int32 currentBit = start;
820 		bool canFindGroupLargest = start == 0;
821 
822 		for (; block < group.NumBitmapBlocks(); block++) {
823 			if (cached.SetTo(group, block) < B_OK)
824 				RETURN_ERROR(B_ERROR);
825 
826 			T(Block("alloc-in", group.Start() + block, cached.Block(),
827 				fVolume->BlockSize(), groupIndex, currentStart));
828 
829 			// find a block large enough to hold the allocation
830 			for (uint32 bit = start % bitsPerFullBlock;
831 					bit < cached.NumBlockBits(); bit++) {
832 				if (!cached.IsUsed(bit)) {
833 					if (currentLength == 0) {
834 						// start new range
835 						currentStart = currentBit;
836 					}
837 
838 					// have we found a range large enough to hold numBlocks?
839 					if (++currentLength >= maximum) {
840 						bestGroup = groupIndex;
841 						bestStart = currentStart;
842 						bestLength = currentLength;
843 						break;
844 					}
845 				} else {
846 					if (currentLength) {
847 						// end of a range
848 						if (currentLength > bestLength) {
849 							bestGroup = groupIndex;
850 							bestStart = currentStart;
851 							bestLength = currentLength;
852 						}
853 						if (currentLength > groupLargestLength) {
854 							groupLargestStart = currentStart;
855 							groupLargestLength = currentLength;
856 						}
857 						currentLength = 0;
858 					}
859 					if (((int32)group.NumBits() - currentBit)
860 							<= groupLargestLength) {
861 						// We can't find a bigger block in this group anymore,
862 						// let's skip the rest.
863 						block = group.NumBitmapBlocks();
864 						break;
865 					}
866 
867 					// Advance the current bit to one before the next free (or last) bit,
868 					// so that the next loop iteration will check the next free bit.
869 					const uint32 nextFreeOffset = cached.NextFree(bit) - bit;
870 					bit += nextFreeOffset - 1;
871 					currentBit += nextFreeOffset - 1;
872 				}
873 				currentBit++;
874 			}
875 
876 			T(Block("alloc-out", block, cached.Block(),
877 				fVolume->BlockSize(), groupIndex, currentStart));
878 
879 			if (bestLength >= maximum) {
880 				canFindGroupLargest = false;
881 				break;
882 			}
883 
884 			// start from the beginning of the next block
885 			start = 0;
886 		}
887 
888 		if (currentBit == (int32)group.NumBits()) {
889 			if (currentLength > bestLength) {
890 				bestGroup = groupIndex;
891 				bestStart = currentStart;
892 				bestLength = currentLength;
893 			}
894 			if (canFindGroupLargest && currentLength > groupLargestLength) {
895 				groupLargestStart = currentStart;
896 				groupLargestLength = currentLength;
897 			}
898 		}
899 
900 		if (canFindGroupLargest && !group.fLargestValid
901 			&& groupLargestLength >= 0) {
902 			group.fLargestStart = groupLargestStart;
903 			group.fLargestLength = groupLargestLength;
904 			group.fLargestValid = true;
905 		}
906 
907 		if (bestLength >= maximum)
908 			break;
909 	}
910 
911 	// If we found a suitable range, mark the blocks as in use, and
912 	// write the updated block bitmap back to disk
913 	if (bestLength < minimum)
914 		return B_DEVICE_FULL;
915 
916 	if (bestLength > maximum)
917 		bestLength = maximum;
918 	else if (minimum > 1) {
919 		// make sure bestLength is a multiple of minimum
920 		bestLength = round_down(bestLength, minimum);
921 	}
922 
923 	if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK)
924 		RETURN_ERROR(B_IO_ERROR);
925 
926 	CHECK_ALLOCATION_GROUP(bestGroup);
927 
928 	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup);
929 	run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart);
930 	run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength);
931 
932 	fVolume->SuperBlock().used_blocks
933 		= HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength);
934 		// We are not writing back the disk's superblock - it's
935 		// either done by the journaling code, or when the disk
936 		// is unmounted.
937 		// If the value is not correct at mount time, it will be
938 		// fixed anyway.
939 
940 	// We need to flush any remaining blocks in the new allocation to make sure
941 	// they won't interfere with the file cache.
942 	block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run),
943 		run.Length());
944 
945 	T(Allocate(run));
946 	return B_OK;
947 }
948 
949 
950 status_t
AllocateForInode(Transaction & transaction,const block_run * parent,mode_t type,block_run & run)951 BlockAllocator::AllocateForInode(Transaction& transaction,
952 	const block_run* parent, mode_t type, block_run& run)
953 {
954 	// Apply some allocation policies here (AllocateBlocks() will break them
955 	// if necessary) - we will start with those described in Dominic Giampaolo's
956 	// "Practical File System Design", and see how good they work
957 
958 	// Files are going in the same allocation group as its parent,
959 	// sub-directories will be inserted 8 allocation groups after
960 	// the one of the parent
961 	uint16 group = parent->AllocationGroup();
962 	if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY)
963 		group += 8;
964 
965 	return AllocateBlocks(transaction, group, 0, 1, 1, run);
966 }
967 
968 
969 status_t
Allocate(Transaction & transaction,Inode * inode,off_t numBlocks,block_run & run,uint16 minimum)970 BlockAllocator::Allocate(Transaction& transaction, Inode* inode,
971 	off_t numBlocks, block_run& run, uint16 minimum)
972 {
973 	if (numBlocks <= 0)
974 		return B_ERROR;
975 
976 	// one block_run can't hold more data than there is in one allocation group
977 	if (numBlocks > fGroups[0].NumBits())
978 		numBlocks = fGroups[0].NumBits();
979 
980 	// since block_run.length is uint16, the largest number of blocks that
981 	// can be covered by a block_run is 65535
982 	// TODO: if we drop compatibility, couldn't we do this any better?
983 	// There are basically two possibilities:
984 	// a) since a length of zero doesn't have any sense, take that for 65536 -
985 	//    but that could cause many problems (bugs) in other areas
986 	// b) reduce the maximum amount of blocks per block_run, so that the
987 	//    remaining number of free blocks can be used in a useful manner
988 	//    (like 4 blocks) - but that would also reduce the maximum file size
989 	// c) have BlockRun::Length() return (length + 1).
990 	if (numBlocks > MAX_BLOCK_RUN_LENGTH)
991 		numBlocks = MAX_BLOCK_RUN_LENGTH;
992 
993 	// Apply some allocation policies here (AllocateBlocks() will break them
994 	// if necessary)
995 	uint16 group = inode->BlockRun().AllocationGroup();
996 	uint16 start = 0;
997 
998 	// Are there already allocated blocks? (then just try to allocate near the
999 	// last one)
1000 	if (inode->Size() > 0) {
1001 		const data_stream& data = inode->Node().data;
1002 		// TODO: we currently don't care for when the data stream
1003 		// is already grown into the indirect ranges
1004 		if (data.max_double_indirect_range == 0
1005 			&& data.max_indirect_range == 0) {
1006 			// Since size > 0, there must be a valid block run in this stream
1007 			int32 last = 0;
1008 			for (; last < NUM_DIRECT_BLOCKS - 1; last++)
1009 				if (data.direct[last + 1].IsZero())
1010 					break;
1011 
1012 			group = data.direct[last].AllocationGroup();
1013 			start = data.direct[last].Start() + data.direct[last].Length();
1014 		}
1015 	} else if (inode->IsContainer() || inode->IsSymLink()) {
1016 		// directory and symbolic link data will go in the same allocation
1017 		// group as the inode is in but after the inode data
1018 		start = inode->BlockRun().Start();
1019 	} else {
1020 		// file data will start in the next allocation group
1021 		group = inode->BlockRun().AllocationGroup() + 1;
1022 	}
1023 
1024 	return AllocateBlocks(transaction, group, start, numBlocks, minimum, run);
1025 }
1026 
1027 
1028 status_t
Free(Transaction & transaction,block_run run)1029 BlockAllocator::Free(Transaction& transaction, block_run run)
1030 {
1031 	RecursiveLocker lock(fLock);
1032 
1033 	int32 group = run.AllocationGroup();
1034 	uint16 start = run.Start();
1035 	uint16 length = run.Length();
1036 
1037 	FUNCTION_START(("group = %" B_PRId32 ", start = %" B_PRIu16
1038 		", length = %" B_PRIu16 "\n", group, start, length))
1039 	T(Free(run));
1040 
1041 	// doesn't use Volume::IsValidBlockRun() here because it can check better
1042 	// against the group size (the last group may have a different length)
1043 	if (group < 0 || group >= fNumGroups
1044 		|| start > fGroups[group].NumBits()
1045 		|| uint32(start + length) > fGroups[group].NumBits()
1046 		|| length == 0) {
1047 		FATAL(("tried to free an invalid block_run"
1048 			" (%" B_PRId32 ", %" B_PRIu16 ", %" B_PRIu16")\n",
1049 			group, start, length));
1050 		DEBUGGER(("tried to free invalid block_run"));
1051 		return B_BAD_VALUE;
1052 	}
1053 	// check if someone tries to free reserved areas at the beginning of the
1054 	// drive
1055 	if (group < fVolume->Log().AllocationGroup()
1056 		|| (group == fVolume->Log().AllocationGroup()
1057 			&& start < uint32(fVolume->Log().Start()) + fVolume->Log().Length())) {
1058 		FATAL(("tried to free a reserved block_run"
1059 			" (%" B_PRId32 ", %" B_PRIu16 ", %" B_PRIu16")\n",
1060 			group, start, length));
1061 		DEBUGGER(("tried to free reserved block"));
1062 		return B_BAD_VALUE;
1063 	}
1064 #ifdef DEBUG
1065 	if (CheckBlockRun(run) != B_OK)
1066 		return B_BAD_DATA;
1067 #endif
1068 
1069 	CHECK_ALLOCATION_GROUP(group);
1070 
1071 	if (fGroups[group].Free(transaction, start, length) != B_OK)
1072 		RETURN_ERROR(B_IO_ERROR);
1073 
1074 	CHECK_ALLOCATION_GROUP(group);
1075 
1076 #ifdef DEBUG
1077 	if (CheckBlockRun(run, NULL, false) != B_OK) {
1078 		DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just "
1079 			"freed)\n"));
1080 	}
1081 #endif
1082 
1083 	fVolume->SuperBlock().used_blocks =
1084 		HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length());
1085 	return B_OK;
1086 }
1087 
1088 
1089 #ifdef DEBUG_FRAGMENTER
1090 void
Fragment()1091 BlockAllocator::Fragment()
1092 {
1093 	AllocationBlock cached(fVolume);
1094 	RecursiveLocker lock(fLock);
1095 
1096 	// only leave 4 block holes
1097 	static const uint32 kMask = 0x0f0f0f0f;
1098 	uint32 valuesPerBlock = fVolume->BlockSize() / 4;
1099 
1100 	for (int32 i = 0; i < fNumGroups; i++) {
1101 		AllocationGroup& group = fGroups[i];
1102 
1103 		for (uint32 block = 0; block < group.NumBlocks(); block++) {
1104 			Transaction transaction(fVolume, 0);
1105 
1106 			if (cached.SetToWritable(transaction, group, block) != B_OK)
1107 				return;
1108 
1109 			for (int32 index = 0; index < valuesPerBlock; index++) {
1110 				cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask);
1111 			}
1112 
1113 			transaction.Done();
1114 		}
1115 	}
1116 }
1117 #endif	// DEBUG_FRAGMENTER
1118 
1119 
1120 #ifdef DEBUG_ALLOCATION_GROUPS
1121 void
_CheckGroup(int32 groupIndex) const1122 BlockAllocator::_CheckGroup(int32 groupIndex) const
1123 {
1124 	AllocationBlock cached(fVolume);
1125 	ASSERT_LOCKED_RECURSIVE(&fLock);
1126 
1127 	AllocationGroup& group = fGroups[groupIndex];
1128 
1129 	int32 currentStart = 0, currentLength = 0;
1130 	int32 firstFree = -1;
1131 	int32 largestStart = -1;
1132 	int32 largestLength = 0;
1133 	int32 currentBit = 0;
1134 
1135 	for (uint32 block = 0; block < group.NumBlocks(); block++) {
1136 		if (cached.SetTo(group, block) < B_OK) {
1137 			panic("setting group block %d failed\n", (int)block);
1138 			return;
1139 		}
1140 
1141 		for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) {
1142 			if (!cached.IsUsed(bit)) {
1143 				if (firstFree < 0) {
1144 					firstFree = currentBit;
1145 					if (!group.fLargestValid) {
1146 						if (firstFree >= 0 && firstFree < group.fFirstFree) {
1147 							// mostly harmless but noteworthy
1148 							dprintf("group %d first free too late: should be "
1149 								"%d, is %d\n", (int)groupIndex, (int)firstFree,
1150 								(int)group.fFirstFree);
1151 						}
1152 						return;
1153 					}
1154 				}
1155 
1156 				if (currentLength == 0) {
1157 					// start new range
1158 					currentStart = currentBit;
1159 				}
1160 				currentLength++;
1161 			} else if (currentLength) {
1162 				// end of a range
1163 				if (currentLength > largestLength) {
1164 					largestStart = currentStart;
1165 					largestLength = currentLength;
1166 				}
1167 				currentLength = 0;
1168 			}
1169 			currentBit++;
1170 		}
1171 	}
1172 
1173 	if (currentLength > largestLength) {
1174 		largestStart = currentStart;
1175 		largestLength = currentLength;
1176 	}
1177 
1178 	if (firstFree >= 0 && firstFree < group.fFirstFree) {
1179 		// mostly harmless but noteworthy
1180 		dprintf("group %d first free too late: should be %d, is %d\n",
1181 			(int)groupIndex, (int)firstFree, (int)group.fFirstFree);
1182 	}
1183 	if (group.fLargestValid
1184 		&& (largestStart != group.fLargestStart
1185 			|| largestLength != group.fLargestLength)) {
1186 		panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n",
1187 			fVolume, (int)groupIndex, (int)group.fLargestStart,
1188 			(int)group.fLargestLength, (int)largestStart, (int)largestLength);
1189 	}
1190 }
1191 #endif	// DEBUG_ALLOCATION_GROUPS
1192 
1193 
1194 status_t
Trim(uint64 offset,uint64 size,uint64 & trimmedSize)1195 BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize)
1196 {
1197 	// TODO: Remove this check when offset and size handling is implemented
1198 	if (offset != 0
1199 		|| fVolume->NumBlocks() < 0
1200 		|| size < (uint64)fVolume->NumBlocks() * fVolume->BlockSize()) {
1201 		INFORM(("BFS Trim: Ranges smaller than the file system size"
1202 			" are not supported yet.\n"));
1203 		return B_UNSUPPORTED;
1204 	}
1205 
1206 	const uint32 kTrimRanges = 128;
1207 	fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data)
1208 		+ 2 * sizeof(uint64) * (kTrimRanges - 1));
1209 	if (trimData == NULL)
1210 		return B_NO_MEMORY;
1211 
1212 	MemoryDeleter deleter(trimData);
1213 	RecursiveLocker locker(fLock);
1214 
1215 	// TODO: take given offset and size into account!
1216 	int32 lastGroup = fNumGroups - 1;
1217 	uint32 firstBlock = 0;
1218 	uint32 firstBit = 0;
1219 	uint64 currentBlock = 0;
1220 	uint32 blockShift = fVolume->BlockShift();
1221 
1222 	uint64 firstFree = 0;
1223 	uint64 freeLength = 0;
1224 
1225 	trimData->range_count = 0;
1226 	trimmedSize = 0;
1227 
1228 	AllocationBlock cached(fVolume);
1229 	for (int32 groupIndex = 0; groupIndex <= lastGroup; groupIndex++) {
1230 		AllocationGroup& group = fGroups[groupIndex];
1231 
1232 		for (uint32 block = firstBlock; block < group.NumBitmapBlocks(); block++) {
1233 			cached.SetTo(group, block);
1234 
1235 			for (uint32 i = firstBit; i < cached.NumBlockBits(); i++) {
1236 				if (cached.IsUsed(i)) {
1237 					// Block is in use
1238 					if (freeLength > 0) {
1239 						// Overflow is unlikely to happen, but check it anyway
1240 						if ((firstFree << blockShift) >> blockShift
1241 								!= firstFree
1242 							|| (freeLength << blockShift) >> blockShift
1243 								!= freeLength) {
1244 							FATAL(("BlockAllocator::Trim:"
1245 								" Overflow detected!\n"));
1246 							return B_ERROR;
1247 						}
1248 						status_t status = _TrimNext(*trimData, kTrimRanges,
1249 							firstFree << blockShift, freeLength << blockShift,
1250 							false, trimmedSize);
1251 						if (status != B_OK)
1252 							return status;
1253 
1254 						freeLength = 0;
1255 					}
1256 				} else if (freeLength++ == 0) {
1257 					// Block is free, start new free range
1258 					firstFree = currentBlock;
1259 				}
1260 
1261 				currentBlock++;
1262 			}
1263 		}
1264 
1265 		firstBlock = 0;
1266 		firstBit = 0;
1267 	}
1268 
1269 	return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift,
1270 		freeLength << blockShift, true, trimmedSize);
1271 }
1272 
1273 
1274 //	#pragma mark -
1275 
1276 
1277 /*!	Checks whether or not the specified block range is allocated or not,
1278 	depending on the \a allocated argument.
1279 */
1280 status_t
CheckBlocks(off_t start,off_t length,bool allocated,off_t * firstError)1281 BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated,
1282 	off_t* firstError)
1283 {
1284 	if (start < 0 || start + length > fVolume->NumBlocks())
1285 		return B_BAD_VALUE;
1286 
1287 	off_t block = start;
1288 
1289 	int32 group = start >> fVolume->AllocationGroupShift();
1290 	uint32 bitmapBlock = start / (fVolume->BlockSize() << 3);
1291 	uint32 blockOffset = start % (fVolume->BlockSize() << 3);
1292 
1293 	uint32 groupBlock = bitmapBlock % fBlocksPerGroup;
1294 
1295 	AllocationBlock cached(fVolume);
1296 
1297 	while (groupBlock < fGroups[group].NumBitmapBlocks() && length > 0) {
1298 		if (cached.SetTo(fGroups[group], groupBlock) != B_OK)
1299 			RETURN_ERROR(B_IO_ERROR);
1300 
1301 		for (; blockOffset < cached.NumBlockBits() && length > 0;
1302 				blockOffset++, length--, block++) {
1303 			if (cached.IsUsed(blockOffset) != allocated) {
1304 				PRINT(("CheckBlocks: Erroneous block (group = %" B_PRId32
1305 					", groupBlock = %" B_PRIu32
1306 					", blockOffset = %" B_PRIu32 ")!\n",
1307 					group, groupBlock, blockOffset));
1308 
1309 				if (firstError)
1310 					*firstError = block;
1311 
1312 				return B_BAD_DATA;
1313 			}
1314 		}
1315 
1316 		blockOffset = 0;
1317 
1318 		if (++groupBlock >= fGroups[group].NumBitmapBlocks()) {
1319 			groupBlock = 0;
1320 			group++;
1321 		}
1322 	}
1323 
1324 	return B_OK;
1325 }
1326 
1327 
1328 bool
IsValidBlockRun(block_run run,const char * type)1329 BlockAllocator::IsValidBlockRun(block_run run, const char* type)
1330 {
1331 	if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups
1332 		|| run.Start() > fGroups[run.AllocationGroup()].fNumBits
1333 		|| uint32(run.Start() + run.Length())
1334 				> fGroups[run.AllocationGroup()].fNumBits
1335 		|| run.length == 0) {
1336 		PRINT(("%s: block_run(%" B_PRId32 ", %" B_PRIu16 ", %" B_PRIu16")"
1337 			" is invalid!\n", type, run.AllocationGroup(), run.Start(),
1338 			run.Length()));
1339 		return false;
1340 	}
1341 	return true;
1342 }
1343 
1344 
1345 status_t
CheckBlockRun(block_run run,const char * type,bool allocated)1346 BlockAllocator::CheckBlockRun(block_run run, const char* type, bool allocated)
1347 {
1348 	if (!IsValidBlockRun(run, type))
1349 		return B_BAD_DATA;
1350 
1351 	status_t status = CheckBlocks(fVolume->ToBlock(run), run.Length(),
1352 		allocated);
1353 	if (status != B_OK) {
1354 		PRINT(("%s: block_run(%" B_PRId32 ", %" B_PRIu16 ", %" B_PRIu16")"
1355 			" is only partially allocated!\n", type, run.AllocationGroup(),
1356 			run.Start(), run.Length()));
1357 	}
1358 
1359 	return status;
1360 }
1361 
1362 
1363 bool
_AddTrim(fs_trim_data & trimData,uint32 maxRanges,uint64 offset,uint64 size)1364 BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges,
1365 	uint64 offset, uint64 size)
1366 {
1367 	ASSERT(trimData.range_count < maxRanges);
1368 	if (size == 0)
1369 		return false;
1370 
1371 	trimData.ranges[trimData.range_count].offset = offset;
1372 	trimData.ranges[trimData.range_count].size = size;
1373 	trimData.range_count++;
1374 
1375 	return (trimData.range_count == maxRanges);
1376 }
1377 
1378 
1379 status_t
_TrimNext(fs_trim_data & trimData,uint32 maxRanges,uint64 offset,uint64 size,bool force,uint64 & trimmedSize)1380 BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges,
1381 	uint64 offset, uint64 size, bool force, uint64& trimmedSize)
1382 {
1383 	PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %"
1384 		B_PRIu64 ")\n", trimData.range_count, offset, size));
1385 
1386 	const bool rangesFilled = _AddTrim(trimData, maxRanges, offset, size);
1387 
1388 	if (rangesFilled || force) {
1389 		// Trim now
1390 		trimData.trimmed_size = 0;
1391 #ifdef DEBUG_TRIM
1392 		dprintf("TRIM: BFS: free ranges (bytes):\n");
1393 		for (uint32 i = 0; i < trimData.range_count; i++) {
1394 			dprintf("[%3" B_PRIu32 "] %" B_PRIu64 " : %" B_PRIu64 "\n", i,
1395 				trimData.ranges[i].offset, trimData.ranges[i].size);
1396 		}
1397 #endif
1398 		if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData,
1399 				sizeof(fs_trim_data)
1400 					+ 2 * sizeof(uint64) * (trimData.range_count - 1)) != 0) {
1401 			return errno;
1402 		}
1403 
1404 		trimmedSize += trimData.trimmed_size;
1405 		trimData.range_count = 0;
1406 	}
1407 
1408 	return B_OK;
1409 }
1410 
1411 
1412 //	#pragma mark - debugger commands
1413 
1414 
1415 #ifdef BFS_DEBUGGER_COMMANDS
1416 
1417 void
Dump(int32 index)1418 BlockAllocator::Dump(int32 index)
1419 {
1420 	kprintf("allocation groups: %" B_PRId32 " (base %p)\n", fNumGroups, fGroups);
1421 	kprintf("blocks per group: %" B_PRId32 "\n", fBlocksPerGroup);
1422 
1423 	for (int32 i = 0; i < fNumGroups; i++) {
1424 		if (index != -1 && i != index)
1425 			continue;
1426 
1427 		AllocationGroup& group = fGroups[i];
1428 
1429 		kprintf("[%3" B_PRId32 "] num bits:       %" B_PRIu32 "  (%p)\n", i,
1430 			group.NumBits(), &group);
1431 		kprintf("      num blocks:     %" B_PRIu32 "\n", group.NumBitmapBlocks());
1432 		kprintf("      start:          %" B_PRId32 "\n", group.Start());
1433 		kprintf("      first free:     %" B_PRId32 "\n", group.fFirstFree);
1434 		kprintf("      largest start:  %" B_PRId32 "%s\n", group.fLargestStart,
1435 			group.fLargestValid ? "" : "  (invalid)");
1436 		kprintf("      largest length: %" B_PRId32 "\n", group.fLargestLength);
1437 		kprintf("      free bits:      %" B_PRId32 "\n", group.fFreeBits);
1438 	}
1439 }
1440 
1441 
1442 #if BFS_TRACING
1443 static char kTraceBuffer[256];
1444 
1445 
1446 int
dump_block_allocator_blocks(int argc,char ** argv)1447 dump_block_allocator_blocks(int argc, char** argv)
1448 {
1449 	if (argc != 3 || !strcmp(argv[1], "--help")) {
1450 		kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]);
1451 		return 0;
1452 	}
1453 
1454 	Volume* volume = (Volume*)parse_expression(argv[1]);
1455 	off_t block = parse_expression(argv[2]);
1456 
1457 	// iterate over all tracing entries to find overlapping actions
1458 
1459 	using namespace BFSBlockTracing;
1460 
1461 	LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0);
1462 	TraceEntryIterator iterator;
1463 	while (TraceEntry* _entry = iterator.Next()) {
1464 		if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) {
1465 			off_t first = volume->ToBlock(entry->Run());
1466 			off_t last = first - 1 + entry->Run().Length();
1467 			if (block >= first && block <= last) {
1468 				out.Clear();
1469 				const char* dump = out.DumpEntry(entry);
1470 				kprintf("%5ld. %s\n", iterator.Index(), dump);
1471 			}
1472 		} else if (Free* entry = dynamic_cast<Free*>(_entry)) {
1473 			off_t first = volume->ToBlock(entry->Run());
1474 			off_t last = first - 1 + entry->Run().Length();
1475 			if (block >= first && block <= last) {
1476 				out.Clear();
1477 				const char* dump = out.DumpEntry(entry);
1478 				kprintf("%5ld. %s\n", iterator.Index(), dump);
1479 			}
1480 		}
1481 	}
1482 
1483 	return 0;
1484 }
1485 #endif
1486 
1487 
1488 int
dump_block_allocator(int argc,char ** argv)1489 dump_block_allocator(int argc, char** argv)
1490 {
1491 	int32 group = -1;
1492 	if (argc == 3) {
1493 		group = parse_expression(argv[2]);
1494 		argc--;
1495 	}
1496 
1497 	if (argc != 2 || !strcmp(argv[1], "--help")) {
1498 		kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]);
1499 		return 0;
1500 	}
1501 
1502 	Volume* volume = (Volume*)parse_expression(argv[1]);
1503 	BlockAllocator& allocator = volume->Allocator();
1504 
1505 	allocator.Dump(group);
1506 	return 0;
1507 }
1508 
1509 #endif	// BFS_DEBUGGER_COMMANDS
1510 
1511