xref: /haiku/src/add-ons/kernel/file_systems/ext2/BlockAllocator.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2001-2011, Haiku Inc. All rights reserved.
3  * This file may be used under the terms of the MIT License.
4  *
5  * Authors:
6  *		Janito V. Ferreira Filho
7  *		Jérôme Duval
8  */
9 
10 
11 #include "BlockAllocator.h"
12 
13 #include <util/AutoLock.h>
14 
15 #include "BitmapBlock.h"
16 #include "Inode.h"
17 
18 
19 #undef ASSERT
20 //#define TRACE_EXT2
21 #ifdef TRACE_EXT2
22 #	define TRACE(x...) dprintf("\33[34mext2:\33[0m " x)
23 #	define ASSERT(x) { if (!(x)) kernel_debugger("ext2: assert failed: " #x "\n"); }
24 #else
25 #	define TRACE(x...) ;
26 #	define ASSERT(x) ;
27 #endif
28 #define ERROR(x...) dprintf("\33[34mext2:\33[0m " x)
29 
30 
31 class AllocationBlockGroup : public TransactionListener {
32 public:
33 						AllocationBlockGroup();
34 						~AllocationBlockGroup();
35 
36 			status_t	Initialize(Volume* volume, uint32 blockGroup,
37 							uint32 numBits);
38 
39 			bool		IsFull() const;
40 
41 			status_t	Allocate(Transaction& transaction, fsblock_t start,
42 							uint32 length);
43 			status_t	Free(Transaction& transaction, uint32 start,
44 							uint32 length);
45 			status_t	FreeAll(Transaction& transaction);
46 			status_t	Check(uint32 start, uint32 length);
47 
48 			uint32		NumBits() const;
49 			uint32		FreeBits() const;
50 			fsblock_t	Start() const;
51 
52 			fsblock_t	LargestStart() const;
53 			uint32		LargestLength() const;
54 
55 			// TransactionListener implementation
56 			void		TransactionDone(bool success);
57 			void		RemovedFromTransaction();
58 
59 private:
60 			status_t	_ScanFreeRanges(bool verify = false);
61 			void		_AddFreeRange(uint32 start, uint32 length);
62 			void		_LockInTransaction(Transaction& transaction);
63 			status_t	_InitGroup(Transaction& transaction);
64 			bool		_IsSparse();
65 			uint32		_FirstFreeBlock();
66 			void		_SetBlockBitmapChecksum(BitmapBlock& block);
67 			bool		_VerifyBlockBitmapChecksum(BitmapBlock& block);
68 
69 			Volume*		fVolume;
70 			uint32		fBlockGroup;
71 			ext2_block_group* fGroupDescriptor;
72 
73 			mutex		fLock;
74 			mutex		fTransactionLock;
75 			int32		fCurrentTransaction;
76 
77 			fsblock_t	fStart;
78 			uint32		fNumBits;
79 			fsblock_t	fBitmapBlock;
80 
81 			uint32		fFreeBits;
82 			uint32		fFirstFree;
83 			uint32		fLargestStart;
84 			uint32		fLargestLength;
85 
86 			uint32		fPreviousFreeBits;
87 			uint32		fPreviousFirstFree;
88 			uint32		fPreviousLargestStart;
89 			uint32		fPreviousLargestLength;
90 };
91 
92 
93 AllocationBlockGroup::AllocationBlockGroup()
94 	:
95 	fVolume(NULL),
96 	fBlockGroup(0),
97 	fGroupDescriptor(NULL),
98 	fCurrentTransaction(-1),
99 	fStart(0),
100 	fNumBits(0),
101 	fBitmapBlock(0),
102 	fFreeBits(0),
103 	fFirstFree(0),
104 	fLargestStart(0),
105 	fLargestLength(0),
106 	fPreviousFreeBits(0),
107 	fPreviousFirstFree(0),
108 	fPreviousLargestStart(0),
109 	fPreviousLargestLength(0)
110 {
111 	mutex_init(&fLock, "ext2 allocation block group");
112 	mutex_init(&fTransactionLock, "ext2 allocation block group transaction");
113 }
114 
115 
116 AllocationBlockGroup::~AllocationBlockGroup()
117 {
118 	mutex_destroy(&fLock);
119 	mutex_destroy(&fTransactionLock);
120 }
121 
122 
123 status_t
124 AllocationBlockGroup::Initialize(Volume* volume, uint32 blockGroup,
125 	uint32 numBits)
126 {
127 	fVolume = volume;
128 	fBlockGroup = blockGroup;
129 	fNumBits = numBits;
130 	fStart = blockGroup * numBits + fVolume->FirstDataBlock();
131 
132 	status_t status = fVolume->GetBlockGroup(blockGroup, &fGroupDescriptor);
133 	if (status != B_OK)
134 		return status;
135 
136 	fBitmapBlock = fGroupDescriptor->BlockBitmap(fVolume->Has64bitFeature());
137 	if (fBitmapBlock == 0) {
138 		ERROR("AllocationBlockGroup(%" B_PRIu32 ")::Initialize(): "
139 			"blockBitmap is zero\n", fBlockGroup);
140 		return B_BAD_VALUE;
141 	}
142 
143 	if (fGroupDescriptor->Flags() & EXT2_BLOCK_GROUP_BLOCK_UNINIT) {
144 		fFreeBits = fGroupDescriptor->FreeBlocks(fVolume->Has64bitFeature());
145 		fLargestLength = fFreeBits;
146 		fLargestStart = _FirstFreeBlock();
147 		TRACE("Group %" B_PRIu32 " is uninit\n", fBlockGroup);
148 		return B_OK;
149 	}
150 
151 	status = _ScanFreeRanges(true);
152 	if (status != B_OK)
153 		return status;
154 
155 	if (fGroupDescriptor->FreeBlocks(fVolume->Has64bitFeature())
156 		!= fFreeBits) {
157 		ERROR("AllocationBlockGroup(%" B_PRIu32 ",%" B_PRIu64 ")::Initialize()"
158 			": Mismatch between counted free blocks (%" B_PRIu32 "/%" B_PRIu32
159 			") and what is set on the group descriptor (%" B_PRIu32 ")\n",
160 			fBlockGroup, fBitmapBlock, fFreeBits, fNumBits,
161 			fGroupDescriptor->FreeBlocks(fVolume->Has64bitFeature()));
162 		return B_BAD_DATA;
163 	}
164 
165 	fPreviousFreeBits = fFreeBits;
166 	fPreviousFirstFree = fFirstFree;
167 	fPreviousLargestStart = fLargestStart;
168 	fPreviousLargestLength = fLargestLength;
169 
170 	return status;
171 }
172 
173 
174 status_t
175 AllocationBlockGroup::_ScanFreeRanges(bool verify)
176 {
177 	TRACE("AllocationBlockGroup::_ScanFreeRanges() for group %" B_PRIu32 "\n",
178 		fBlockGroup);
179 	BitmapBlock block(fVolume, fNumBits);
180 
181 	if (!block.SetTo(fBitmapBlock))
182 		return B_ERROR;
183 
184 	if (verify && !_VerifyBlockBitmapChecksum(block)) {
185 		ERROR("AllocationBlockGroup(%" B_PRIu32 ")::_ScanFreeRanges(): "
186 			"blockGroup verification failed\n", fBlockGroup);
187 		return B_BAD_DATA;
188 	}
189 
190 	fFreeBits = 0;
191 	uint32 start = 0;
192 	uint32 end = 0;
193 
194 	while (end < fNumBits) {
195 		block.FindNextUnmarked(start);
196 		ASSERT(block.CheckMarked(end, start - end));
197 		end = start;
198 
199 		if (start != block.NumBits()) {
200 			block.FindNextMarked(end);
201 			_AddFreeRange(start, end - start);
202 			ASSERT(block.CheckUnmarked(fLargestStart, fLargestLength));
203 			start = end;
204 		}
205 	}
206 
207 	return B_OK;
208 }
209 
210 
211 bool
212 AllocationBlockGroup::IsFull() const
213 {
214 	return fFreeBits == 0;
215 }
216 
217 
218 status_t
219 AllocationBlockGroup::Allocate(Transaction& transaction, fsblock_t _start,
220 	uint32 length)
221 {
222 	uint32 start = _start - fStart;
223 	TRACE("AllocationBlockGroup::Allocate(%" B_PRIu32 ",%" B_PRIu32 ")\n",
224 		start, length);
225 	if (length == 0)
226 		return B_OK;
227 
228 	uint32 end = start + length;
229 	if (end > fNumBits)
230 		return B_BAD_VALUE;
231 
232 	_LockInTransaction(transaction);
233 	_InitGroup(transaction);
234 
235 	BitmapBlock block(fVolume, fNumBits);
236 
237 	if (!block.SetToWritable(transaction, fBitmapBlock))
238 		return B_ERROR;
239 
240 	TRACE("AllocationBlockGroup::Allocate(): Largest range in %" B_PRIu32 "-%"
241 		B_PRIu32 "\n", fLargestStart, fLargestStart + fLargestLength);
242 	ASSERT(block.CheckUnmarked(fLargestStart, fLargestLength));
243 
244 	if (!block.Mark(start, length)) {
245 		ERROR("Failed to allocate blocks from %" B_PRIu32 " to %" B_PRIu32
246 			". Some were already allocated.\n", start, start + length);
247 		return B_ERROR;
248 	}
249 
250 	fFreeBits -= length;
251 	fGroupDescriptor->SetFreeBlocks(fFreeBits, fVolume->Has64bitFeature());
252 	_SetBlockBitmapChecksum(block);
253 	fVolume->WriteBlockGroup(transaction, fBlockGroup);
254 
255 	if (start == fLargestStart) {
256 		if (fFirstFree == fLargestStart)
257 			fFirstFree += length;
258 
259 		fLargestStart += length;
260 		fLargestLength -= length;
261 	} else if (start + length == fLargestStart + fLargestLength) {
262 		fLargestLength -= length;
263 	} else if (start < fLargestStart + fLargestLength
264 			&& start > fLargestStart) {
265 		uint32 firstLength = start - fLargestStart;
266 		uint32 secondLength = fLargestStart + fLargestLength
267 			- (start + length);
268 
269 		if (firstLength >= secondLength) {
270 			fLargestLength = firstLength;
271 		} else {
272 			fLargestLength = secondLength;
273 			fLargestStart = start + length;
274 		}
275 	} else {
276 		// No need to revalidate the largest free range
277 		return B_OK;
278 	}
279 
280 	TRACE("AllocationBlockGroup::Allocate(): Largest range in %" B_PRIu32 "-%"
281 		B_PRIu32 "\n", fLargestStart, fLargestStart + fLargestLength);
282 	ASSERT(block.CheckUnmarked(fLargestStart, fLargestLength));
283 
284 	if (fLargestLength < fNumBits / 2)
285 		block.FindLargestUnmarkedRange(fLargestStart, fLargestLength);
286 	ASSERT(block.CheckUnmarked(fLargestStart, fLargestLength));
287 
288 	return B_OK;
289 }
290 
291 
292 status_t
293 AllocationBlockGroup::Free(Transaction& transaction, uint32 start,
294 	uint32 length)
295 {
296 	TRACE("AllocationBlockGroup::Free(): start: %" B_PRIu32 ", length %"
297 		B_PRIu32 "\n", start, length);
298 
299 	if (length == 0)
300 		return B_OK;
301 
302 	uint32 end = start + length;
303 	if (end > fNumBits)
304 		return B_BAD_VALUE;
305 
306 	_LockInTransaction(transaction);
307 	if (fGroupDescriptor->Flags() & EXT2_BLOCK_GROUP_BLOCK_UNINIT)
308 		panic("AllocationBlockGroup::Free() can't free blocks if uninit\n");
309 
310 	BitmapBlock block(fVolume, fNumBits);
311 
312 	if (!block.SetToWritable(transaction, fBitmapBlock))
313 		return B_ERROR;
314 
315 	TRACE("AllocationBlockGroup::Free(): Largest range in %" B_PRIu32 "-%"
316 		B_PRIu32 "\n", fLargestStart, fLargestStart + fLargestLength);
317 	ASSERT(block.CheckUnmarked(fLargestStart, fLargestLength));
318 
319 	if (!block.Unmark(start, length)) {
320 		ERROR("Failed to free blocks from %" B_PRIu32 " to %" B_PRIu32
321 			". Some were already freed.\n", start, start + length);
322 		return B_ERROR;
323 	}
324 
325 	TRACE("AllocationGroup::Free(): Unmarked bits in bitmap\n");
326 
327 	if (fFirstFree > start)
328 		fFirstFree = start;
329 
330 	if (start + length == fLargestStart) {
331 		fLargestStart = start;
332 		fLargestLength += length;
333 	} else if (start == fLargestStart + fLargestLength) {
334 		fLargestLength += length;
335 	} else if (fLargestLength <= fNumBits / 2) {
336 		// May have merged with some free blocks, becoming the largest
337 		uint32 newEnd = start + length;
338 		block.FindNextMarked(newEnd);
339 
340 		uint32 newStart = start;
341 		block.FindPreviousMarked(newStart);
342 		newStart++;
343 
344 		if (newEnd - newStart > fLargestLength) {
345 			fLargestLength = newEnd - newStart;
346 			fLargestStart = newStart;
347 		}
348 	}
349 
350 	TRACE("AllocationBlockGroup::Free(): Largest range in %" B_PRIu32 "-%"
351 		B_PRIu32 "\n", fLargestStart, fLargestStart + fLargestLength);
352 	ASSERT(block.CheckUnmarked(fLargestStart, fLargestLength));
353 
354 	fFreeBits += length;
355 	fGroupDescriptor->SetFreeBlocks(fFreeBits, fVolume->Has64bitFeature());
356 	_SetBlockBitmapChecksum(block);
357 	fVolume->WriteBlockGroup(transaction, fBlockGroup);
358 
359 	return B_OK;
360 }
361 
362 
363 status_t
364 AllocationBlockGroup::FreeAll(Transaction& transaction)
365 {
366 	return Free(transaction, 0, fNumBits);
367 }
368 
369 
370 uint32
371 AllocationBlockGroup::NumBits() const
372 {
373 	return fNumBits;
374 }
375 
376 
377 uint32
378 AllocationBlockGroup::FreeBits() const
379 {
380 	return fFreeBits;
381 }
382 
383 
384 fsblock_t
385 AllocationBlockGroup::Start() const
386 {
387 	return fStart;
388 }
389 
390 
391 fsblock_t
392 AllocationBlockGroup::LargestStart() const
393 {
394 	return fStart + fLargestStart;
395 }
396 
397 
398 uint32
399 AllocationBlockGroup::LargestLength() const
400 {
401 	return fLargestLength;
402 }
403 
404 
405 void
406 AllocationBlockGroup::_AddFreeRange(uint32 start, uint32 length)
407 {
408 	if (IsFull()) {
409 		fFirstFree = start;
410 		fLargestStart = start;
411 		fLargestLength = length;
412 	} else if (length > fLargestLength) {
413 		fLargestStart = start;
414 		fLargestLength = length;
415 	}
416 
417 	fFreeBits += length;
418 }
419 
420 
421 void
422 AllocationBlockGroup::_LockInTransaction(Transaction& transaction)
423 {
424 	mutex_lock(&fLock);
425 
426 	if (transaction.ID() != fCurrentTransaction) {
427 		mutex_unlock(&fLock);
428 
429 		mutex_lock(&fTransactionLock);
430 		mutex_lock(&fLock);
431 
432 		fCurrentTransaction = transaction.ID();
433 		transaction.AddListener(this);
434 	}
435 
436 	mutex_unlock(&fLock);
437 }
438 
439 
440 status_t
441 AllocationBlockGroup::_InitGroup(Transaction& transaction)
442 {
443 	TRACE("AllocationBlockGroup::_InitGroup()\n");
444 	uint16 flags = fGroupDescriptor->Flags();
445 	if ((flags & EXT2_BLOCK_GROUP_BLOCK_UNINIT) == 0)
446 		return B_OK;
447 
448 	TRACE("AllocationBlockGroup::_InitGroup() initing\n");
449 
450 	BitmapBlock blockBitmap(fVolume, fNumBits);
451 	if (!blockBitmap.SetToWritable(transaction, fBitmapBlock))
452 		return B_ERROR;
453 
454 	blockBitmap.Mark(0, _FirstFreeBlock(), true);
455 	blockBitmap.Unmark(0, fNumBits, true);
456 
457 	fGroupDescriptor->SetFlags(flags & ~EXT2_BLOCK_GROUP_BLOCK_UNINIT);
458 	_SetBlockBitmapChecksum(blockBitmap);
459 	fVolume->WriteBlockGroup(transaction, fBlockGroup);
460 
461 	status_t status = _ScanFreeRanges();
462 	if (status != B_OK)
463 		return status;
464 
465 	if (fGroupDescriptor->FreeBlocks(fVolume->Has64bitFeature())
466 		!= fFreeBits) {
467 		ERROR("AllocationBlockGroup(%" B_PRIu32 ",%" B_PRIu64 ")::_InitGroup()"
468 			": Mismatch between counted free blocks (%" B_PRIu32 "/%" B_PRIu32
469 			") and what is set on the group descriptor (%" B_PRIu32 ")\n",
470 			fBlockGroup, fBitmapBlock, fFreeBits, fNumBits,
471 			fGroupDescriptor->FreeBlocks(fVolume->Has64bitFeature()));
472 		return B_BAD_DATA;
473 	}
474 
475 	TRACE("AllocationBlockGroup::_InitGroup() init OK\n");
476 
477 	return B_OK;
478 }
479 
480 
481 bool
482 AllocationBlockGroup::_IsSparse()
483 {
484 	if (fBlockGroup <= 1)
485 		return true;
486 	if (fBlockGroup & 0x1)
487 		return false;
488 
489 	uint32 i = fBlockGroup;
490 	while (i % 7 == 0)
491 		i /= 7;
492 	if (i == 1)
493 		return true;
494 
495 	i = fBlockGroup;
496 	while (i % 5 == 0)
497 		i /= 5;
498 	if (i == 1)
499 		return true;
500 
501 	i = fBlockGroup;
502 	while (i % 3 == 0)
503 		i /= 3;
504 	if (i == 1)
505 		return true;
506 
507 	return false;
508 }
509 
510 
511 uint32
512 AllocationBlockGroup::_FirstFreeBlock()
513 {
514 	uint32 first = 1;
515 	if (_IsSparse())
516 		first = 0;
517 	else if (!fVolume->HasMetaGroupFeature()) {
518 		first += fVolume->SuperBlock().ReservedGDTBlocks();
519 		first += fVolume->NumGroups();
520 	}
521 	return first;
522 }
523 
524 
525 void
526 AllocationBlockGroup::_SetBlockBitmapChecksum(BitmapBlock& block)
527 {
528 	if (fVolume->HasMetaGroupChecksumFeature()) {
529 		uint32 checksum = block.Checksum(fVolume->BlocksPerGroup());
530 		fGroupDescriptor->block_bitmap_csum = checksum & 0xffff;
531 		if (fVolume->GroupDescriptorSize() >= offsetof(ext2_block_group,
532 			inode_bitmap_high)) {
533 			fGroupDescriptor->block_bitmap_csum_high = checksum >> 16;
534 		}
535 	}
536 }
537 
538 
539 bool
540 AllocationBlockGroup::_VerifyBlockBitmapChecksum(BitmapBlock& block) {
541 	if (fVolume->HasMetaGroupChecksumFeature()) {
542 		uint32 checksum = block.Checksum(fVolume->BlocksPerGroup());
543 		uint32 provided = fGroupDescriptor->block_bitmap_csum;
544 		if (fVolume->GroupDescriptorSize() >= offsetof(ext2_block_group,
545 			inode_bitmap_high)) {
546 			provided |= (fGroupDescriptor->block_bitmap_csum_high << 16);
547 		} else
548 			checksum &= 0xffff;
549 		return provided == checksum;
550 	}
551 	return true;
552 }
553 
554 
555 void
556 AllocationBlockGroup::TransactionDone(bool success)
557 {
558 	if (success) {
559 		TRACE("AllocationBlockGroup::TransactionDone(): The transaction "
560 			"succeeded, discarding previous state\n");
561 		fPreviousFreeBits = fFreeBits;
562 		fPreviousFirstFree = fFirstFree;
563 		fPreviousLargestStart = fLargestStart;
564 		fPreviousLargestLength = fLargestLength;
565 	} else {
566 		TRACE("AllocationBlockGroup::TransactionDone(): The transaction "
567 			"failed, restoring to previous state\n");
568 		fFreeBits = fPreviousFreeBits;
569 		fFirstFree = fPreviousFirstFree;
570 		fLargestStart = fPreviousLargestStart;
571 		fLargestLength = fPreviousLargestLength;
572 	}
573 }
574 
575 
576 void
577 AllocationBlockGroup::RemovedFromTransaction()
578 {
579 	mutex_unlock(&fTransactionLock);
580 	fCurrentTransaction = -1;
581 }
582 
583 
584 BlockAllocator::BlockAllocator(Volume* volume)
585 	:
586 	fVolume(volume),
587 	fGroups(NULL),
588 	fBlocksPerGroup(0),
589 	fNumBlocks(0),
590 	fNumGroups(0),
591 	fFirstBlock(0)
592 {
593 	mutex_init(&fLock, "ext2 block allocator");
594 }
595 
596 
597 BlockAllocator::~BlockAllocator()
598 {
599 	mutex_destroy(&fLock);
600 
601 	if (fGroups != NULL)
602 		delete [] fGroups;
603 }
604 
605 
606 status_t
607 BlockAllocator::Initialize()
608 {
609 	fBlocksPerGroup = fVolume->BlocksPerGroup();
610 	fNumGroups = fVolume->NumGroups();
611 	fFirstBlock = fVolume->FirstDataBlock();
612 	fNumBlocks = fVolume->NumBlocks();
613 
614 	TRACE("BlockAllocator::Initialize(): blocks per group: %" B_PRIu32
615 		", block groups: %" B_PRIu32 ", first block: %" B_PRIu64
616 		", num blocks: %" B_PRIu64 "\n", fBlocksPerGroup, fNumGroups,
617 		fFirstBlock, fNumBlocks);
618 
619 	fGroups = new(std::nothrow) AllocationBlockGroup[fNumGroups];
620 	if (fGroups == NULL)
621 		return B_NO_MEMORY;
622 
623 	TRACE("BlockAllocator::Initialize(): allocated allocation block groups\n");
624 
625 	mutex_lock(&fLock);
626 		// Released by _Initialize
627 
628 #if 0
629 	thread_id id = spawn_kernel_thread(
630 		(thread_func)BlockAllocator::_Initialize, "ext2 block allocator",
631 		B_LOW_PRIORITY, this);
632 	if (id < B_OK)
633 		return _Initialize(this);
634 
635 	mutex_transfer_lock(&fLock, id);
636 
637 	return resume_thread(id);
638 #else
639 	return _Initialize(this);
640 #endif
641 }
642 
643 
644 status_t
645 BlockAllocator::AllocateBlocks(Transaction& transaction, uint32 minimum,
646 	uint32 maximum, uint32& blockGroup, fsblock_t& start, uint32& length)
647 {
648 	TRACE("BlockAllocator::AllocateBlocks()\n");
649 	MutexLocker lock(fLock);
650 	TRACE("BlockAllocator::AllocateBlocks(): Acquired lock\n");
651 
652 	TRACE("BlockAllocator::AllocateBlocks(): transaction: %" B_PRId32 ", min: "
653 		"%" B_PRIu32 ", max: %" B_PRIu32 ", block group: %" B_PRIu32 ", start:"
654 		" %" B_PRIu64 ", num groups: %" B_PRIu32 "\n", transaction.ID(),
655 		minimum, maximum, blockGroup, start, fNumGroups);
656 
657 	fsblock_t bestStart = 0;
658 	uint32 bestLength = 0;
659 	uint32 bestGroup = 0;
660 
661 	uint32 groupNum = blockGroup;
662 
663 	AllocationBlockGroup* last = &fGroups[fNumGroups];
664 	AllocationBlockGroup* group = &fGroups[blockGroup];
665 
666 	for (int32 iterations = 0; iterations < 2; iterations++) {
667 		for (; group < last; ++group, ++groupNum) {
668 			TRACE("BlockAllocator::AllocateBlocks(): Group %" B_PRIu32
669 				" has largest length of %" B_PRIu32 "\n", groupNum,
670 				group->LargestLength());
671 
672 			if (group->LargestLength() > bestLength) {
673 				if (start <= group->LargestStart()) {
674 					bestStart = group->LargestStart();
675 					bestLength = group->LargestLength();
676 					bestGroup = groupNum;
677 
678 					TRACE("BlockAllocator::AllocateBlocks(): Found a better "
679 						"range: block group: %" B_PRIu32 ", %" B_PRIu64 "-%"
680 						B_PRIu64 "\n", groupNum, bestStart,
681 						bestStart + bestLength);
682 
683 					if (bestLength >= maximum)
684 						break;
685 				}
686 			}
687 
688 			start = 0;
689 		}
690 
691 		if (bestLength >= maximum)
692 			break;
693 
694 		groupNum = 0;
695 
696 		group = &fGroups[0];
697 		last = &fGroups[blockGroup + 1];
698 	}
699 
700 	if (bestLength < minimum) {
701 		TRACE("BlockAllocator::AllocateBlocks(): best range (length %" B_PRIu32
702 			") doesn't have minimum length of %" B_PRIu32 "\n", bestLength,
703 			minimum);
704 		return B_DEVICE_FULL;
705 	}
706 
707 	if (bestLength > maximum)
708 		bestLength = maximum;
709 
710 	TRACE("BlockAllocator::AllocateBlocks(): Selected range: block group %"
711 		B_PRIu32 ", %" B_PRIu64 "-%" B_PRIu64 "\n", bestGroup, bestStart,
712 		bestStart + bestLength);
713 
714 	status_t status = fGroups[bestGroup].Allocate(transaction, bestStart,
715 		bestLength);
716 	if (status != B_OK) {
717 		TRACE("BlockAllocator::AllocateBlocks(): Failed to allocate %" B_PRIu32
718 		" blocks inside block group %" B_PRIu32 ".\n", bestLength, bestGroup);
719 		return status;
720 	}
721 
722 	start = bestStart;
723 	length = bestLength;
724 	blockGroup = bestGroup;
725 
726 	return B_OK;
727 }
728 
729 
730 status_t
731 BlockAllocator::Allocate(Transaction& transaction, Inode* inode,
732 	off_t numBlocks, uint32 minimum, fsblock_t& start, uint32& allocated)
733 {
734 	if (numBlocks <= 0)
735 		return B_ERROR;
736 	if (start > fNumBlocks)
737 		return B_BAD_VALUE;
738 
739 	uint32 group = inode->ID() / fVolume->InodesPerGroup();
740 
741 	if (inode->Size() > 0) {
742 		// Try to allocate near it's last blocks
743 		ext2_data_stream* dataStream = &inode->Node().stream;
744 		uint32 numBlocks = inode->Size() / fVolume->BlockSize() + 1;
745 		uint32 lastBlock = 0;
746 
747 		// DANGER! What happens with sparse files?
748 		if (numBlocks < EXT2_DIRECT_BLOCKS) {
749 			// Only direct blocks
750 			lastBlock = dataStream->direct[numBlocks];
751 		} else {
752 			numBlocks -= EXT2_DIRECT_BLOCKS - 1;
753 			uint32 numIndexes = fVolume->BlockSize() / 4;
754 				// block size / sizeof(int32)
755 			uint32 numIndexes2 = numIndexes * numIndexes;
756 			uint32 numIndexes3 = numIndexes2 * numIndexes;
757 			uint32 indexesInIndirect = numIndexes;
758 			uint32 indexesInDoubleIndirect = indexesInIndirect
759 				+ numIndexes2;
760 			// uint32 indexesInTripleIndirect = indexesInDoubleIndirect
761 				// + numIndexes3;
762 
763 			uint32 doubleIndirectBlock = EXT2_DIRECT_BLOCKS + 1;
764 			uint32 indirectBlock = EXT2_DIRECT_BLOCKS;
765 
766 			CachedBlock cached(fVolume);
767 			uint32* indirectData;
768 
769 			if (numBlocks > indexesInDoubleIndirect) {
770 				// Triple indirect blocks
771 				indirectData = (uint32*)cached.SetTo(EXT2_DIRECT_BLOCKS + 2);
772 				if (indirectData == NULL)
773 					return B_IO_ERROR;
774 
775 				uint32 index = (numBlocks - indexesInDoubleIndirect)
776 					/ numIndexes3;
777 				doubleIndirectBlock = indirectData[index];
778 			}
779 
780 			if (numBlocks > indexesInIndirect) {
781 				// Double indirect blocks
782 				indirectData = (uint32*)cached.SetTo(doubleIndirectBlock);
783 				if (indirectData == NULL)
784 					return B_IO_ERROR;
785 
786 				uint32 index = (numBlocks - indexesInIndirect) / numIndexes2;
787 				indirectBlock = indirectData[index];
788 			}
789 
790 			indirectData = (uint32*)cached.SetTo(indirectBlock);
791 				if (indirectData == NULL)
792 					return B_IO_ERROR;
793 
794 			uint32 index = numBlocks / numIndexes;
795 			lastBlock = indirectData[index];
796 		}
797 
798 		group = (lastBlock - fFirstBlock) / fBlocksPerGroup;
799 	}
800 
801 	// TODO: Apply some more policies
802 
803 	return AllocateBlocks(transaction, minimum, minimum + 8, group, start,
804 		allocated);
805 }
806 
807 
808 status_t
809 BlockAllocator::Free(Transaction& transaction, fsblock_t start, uint32 length)
810 {
811 	TRACE("BlockAllocator::Free(%" B_PRIu64 ", %" B_PRIu32 ")\n", start,
812 		length);
813 	MutexLocker lock(fLock);
814 
815 	if (start <= fFirstBlock) {
816 		panic("Trying to free superblock!\n");
817 		return B_BAD_VALUE;
818 	}
819 
820 	if (length == 0)
821 		return B_OK;
822 	if (start > fNumBlocks || length > fNumBlocks)
823 		return B_BAD_VALUE;
824 
825 	TRACE("BlockAllocator::Free(): first block: %" B_PRIu64
826 		", blocks per group: %" B_PRIu32 "\n", fFirstBlock, fBlocksPerGroup);
827 
828 	start -= fFirstBlock;
829 	off_t end = start + length - 1;
830 
831 	uint32 group = start / fBlocksPerGroup;
832 	if (group >= fNumGroups) {
833 		panic("BlockAllocator::Free() group %" B_PRIu32 " too big (fNumGroups "
834 			"%" B_PRIu32 ")\n", group, fNumGroups);
835 	}
836 	uint32 lastGroup = end / fBlocksPerGroup;
837 	start = start % fBlocksPerGroup;
838 
839 	if (group == lastGroup)
840 		return fGroups[group].Free(transaction, start, length);
841 
842 	TRACE("BlockAllocator::Free(): Freeing from group %" B_PRIu32 ": %"
843 		B_PRIu64 ", %" B_PRIu64 "\n", group,
844 		start, fGroups[group].NumBits() - start);
845 
846 	status_t status = fGroups[group].Free(transaction, start,
847 		fGroups[group].NumBits() - start);
848 	if (status != B_OK)
849 		return status;
850 
851 	for (++group; group < lastGroup; ++group) {
852 		TRACE("BlockAllocator::Free(): Freeing all from group %" B_PRIu32 "\n",
853 			group);
854 		status = fGroups[group].FreeAll(transaction);
855 		if (status != B_OK)
856 			return status;
857 	}
858 
859 	TRACE("BlockAllocator::Free(): Freeing from group %" B_PRIu32 ": 0-%"
860 		B_PRIu64 " \n", group, end % fBlocksPerGroup);
861 	return fGroups[group].Free(transaction, 0, (end + 1) % fBlocksPerGroup);
862 }
863 
864 
865 /*static*/ status_t
866 BlockAllocator::_Initialize(BlockAllocator* allocator)
867 {
868 	TRACE("BlockAllocator::_Initialize()\n");
869 	// fLock is already heald
870 	Volume* volume = allocator->fVolume;
871 
872 	AllocationBlockGroup* groups = allocator->fGroups;
873 	uint32 numGroups = allocator->fNumGroups - 1;
874 
875 	off_t freeBlocks = 0;
876 	TRACE("BlockAllocator::_Initialize(): free blocks: %" B_PRIdOFF "\n",
877 		freeBlocks);
878 
879 	for (uint32 i = 0; i < numGroups; ++i) {
880 		status_t status = groups[i].Initialize(volume, i,
881 			allocator->fBlocksPerGroup);
882 		if (status != B_OK) {
883 			mutex_unlock(&allocator->fLock);
884 			return status;
885 		}
886 
887 		freeBlocks += groups[i].FreeBits();
888 		TRACE("BlockAllocator::_Initialize(): free blocks: %" B_PRIdOFF "\n",
889 			freeBlocks);
890 	}
891 
892 	// Last block group may have less blocks
893 	status_t status = groups[numGroups].Initialize(volume, numGroups,
894 		allocator->fNumBlocks - allocator->fBlocksPerGroup * numGroups
895 		- allocator->fFirstBlock);
896 	if (status != B_OK) {
897 		mutex_unlock(&allocator->fLock);
898 		return status;
899 	}
900 
901 	freeBlocks += groups[numGroups].FreeBits();
902 
903 	TRACE("BlockAllocator::_Initialize(): free blocks: %" B_PRIdOFF "\n",
904 		freeBlocks);
905 
906 	mutex_unlock(&allocator->fLock);
907 
908 	if (freeBlocks != volume->NumFreeBlocks()) {
909 		TRACE("Counted free blocks (%" B_PRIdOFF ") doesn't match value in the"
910 			" superblock (%" B_PRIdOFF ").\n", freeBlocks,
911 			volume->NumFreeBlocks());
912 		return B_BAD_DATA;
913 	}
914 
915 	return B_OK;
916 }
917 
918