xref: /haiku/src/add-ons/kernel/file_systems/ext2/BlockAllocator.cpp (revision 1294543de9ac0eff000eaea1b18368c36435d08e)
1 /*
2  * Copyright 2001-2010, Haiku Inc. All rights reserved.
3  * This file may be used under the terms of the MIT License.
4  *
5  * Authors:
6  *		Janito V. Ferreira Filho
7  */
8 
9 
10 #include "BlockAllocator.h"
11 
12 #include <util/AutoLock.h>
13 
14 #include "BitmapBlock.h"
15 #include "Inode.h"
16 
17 
18 //#define TRACE_EXT2
19 #ifdef TRACE_EXT2
20 #	define TRACE(x...) dprintf("\33[34mext2:\33[0m " x)
21 #else
22 #	define TRACE(x...) ;
23 #endif
24 
25 
26 class AllocationBlockGroup : public TransactionListener {
27 public:
28 						AllocationBlockGroup();
29 						~AllocationBlockGroup();
30 
31 			status_t	Initialize(Volume* volume, uint32 blockGroup,
32 							uint32 numBits);
33 
34 			status_t	ScanFreeRanges();
35 			bool		IsFull() const;
36 
37 			status_t	Allocate(Transaction& transaction, uint32 start,
38 							uint32 length);
39 			status_t	Free(Transaction& transaction, uint32 start,
40 							uint32 length);
41 			status_t	FreeAll(Transaction& transaction);
42 			status_t	Check(uint32 start, uint32 length);
43 
44 			uint32		NumBits() const;
45 			uint32		FreeBits() const;
46 			uint32		Start() const;
47 
48 			uint32		LargestStart() const;
49 			uint32		LargestLength() const;
50 
51 			// TransactionListener implementation
52 			void		TransactionDone(bool success);
53 			void		RemovedFromTransaction();
54 
55 private:
56 			void		_AddFreeRange(uint32 start, uint32 length);
57 			void		_LockInTransaction(Transaction& transaction);
58 
59 
60 			Volume*		fVolume;
61 			uint32		fBlockGroup;
62 			ext2_block_group* fGroupDescriptor;
63 
64 			mutex		fLock;
65 			mutex		fTransactionLock;
66 			int32		fCurrentTransaction;
67 
68 			uint32		fStart;
69 			uint32		fNumBits;
70 			uint32		fBitmapBlock;
71 
72 			uint32		fFreeBits;
73 			uint32		fFirstFree;
74 			uint32		fLargestStart;
75 			uint32		fLargestLength;
76 
77 			uint32		fPreviousFreeBits;
78 			uint32		fPreviousFirstFree;
79 			uint32		fPreviousLargestStart;
80 			uint32		fPreviousLargestLength;
81 };
82 
83 
84 AllocationBlockGroup::AllocationBlockGroup()
85 	:
86 	fVolume(NULL),
87 	fBlockGroup(0),
88 	fGroupDescriptor(NULL),
89 	fStart(0),
90 	fNumBits(0),
91 	fBitmapBlock(0),
92 	fFreeBits(0),
93 	fFirstFree(0),
94 	fLargestStart(0),
95 	fLargestLength(0),
96 	fPreviousFreeBits(0),
97 	fPreviousFirstFree(0),
98 	fPreviousLargestStart(0),
99 	fPreviousLargestLength(0)
100 {
101 	mutex_init(&fLock, "ext2 allocation block group");
102 	mutex_init(&fTransactionLock, "ext2 allocation block group transaction");
103 }
104 
105 
106 AllocationBlockGroup::~AllocationBlockGroup()
107 {
108 	mutex_destroy(&fLock);
109 	mutex_destroy(&fTransactionLock);
110 }
111 
112 
113 status_t
114 AllocationBlockGroup::Initialize(Volume* volume, uint32 blockGroup,
115 	uint32 numBits)
116 {
117 	fVolume = volume;
118 	fBlockGroup = blockGroup;
119 	fNumBits = numBits;
120 
121 	status_t status = fVolume->GetBlockGroup(blockGroup, &fGroupDescriptor);
122 	if (status != B_OK)
123 		return status;
124 
125 	fBitmapBlock = fGroupDescriptor->BlockBitmap();
126 
127 	status = ScanFreeRanges();
128 
129 	if (fGroupDescriptor->FreeBlocks() != fFreeBits) {
130 		TRACE("AllocationBlockGroup::Initialize(): Mismatch between counted "
131 			"free blocks (%lu) and what is set on the group descriptor "
132 			"(%lu)\n", fFreeBits, (uint32)fGroupDescriptor->FreeBlocks());
133 		return B_BAD_DATA;
134 	}
135 
136 	fPreviousFreeBits = fFreeBits;
137 	fPreviousFirstFree = fFirstFree;
138 	fPreviousLargestStart = fLargestStart;
139 	fPreviousLargestLength = fLargestLength;
140 
141 	return status;
142 }
143 
144 
145 status_t
146 AllocationBlockGroup::ScanFreeRanges()
147 {
148 	TRACE("AllocationBlockGroup::ScanFreeRanges()\n");
149 	BitmapBlock block(fVolume, fNumBits);
150 
151 	if (!block.SetTo(fBitmapBlock))
152 		return B_ERROR;
153 
154 	uint32 start = 0;
155 	uint32 end = 0;
156 
157 	while (end < fNumBits) {
158 		block.FindNextUnmarked(start);
159 		end = start;
160 
161 		if (start != block.NumBits()) {
162 			block.FindNextMarked(end);
163 			_AddFreeRange(start, end - start);
164 			start = end;
165 		}
166 	}
167 
168 	return B_OK;
169 }
170 
171 
172 bool
173 AllocationBlockGroup::IsFull() const
174 {
175 	return fFreeBits == 0;
176 }
177 
178 
179 status_t
180 AllocationBlockGroup::Allocate(Transaction& transaction, uint32 start,
181 	uint32 length)
182 {
183 	TRACE("AllocationBlockGroup::Allocate(%ld,%ld)\n", start, length);
184 	uint32 end = start + length;
185 	if (end > fNumBits)
186 		return B_BAD_DATA;
187 
188 	_LockInTransaction(transaction);
189 
190 	BitmapBlock block(fVolume, fNumBits);
191 
192 	if (!block.SetToWritable(transaction, fBitmapBlock))
193 		return B_ERROR;
194 
195 	if (!block.Mark(start, length)) {
196 		TRACE("Failed to allocate blocks from %lu to %lu. Some were "
197 			"already allocated.\n", start, start + length);
198 		return B_ERROR;
199 	}
200 
201 	fFreeBits -= length;
202 	fGroupDescriptor->SetFreeBlocks((uint16)fFreeBits);
203 	fVolume->WriteBlockGroup(transaction, fBlockGroup);
204 
205 	if (start == fLargestStart) {
206 		if (fFirstFree == fLargestStart)
207 			fFirstFree += length;
208 
209 		fLargestStart += length;
210 		fLargestLength -= length;
211 	} else if (start + length == fLargestStart + fLargestLength) {
212 		fLargestLength -= length;
213 	} else if (start < fLargestStart + fLargestLength
214 			&& start > fLargestStart) {
215 		uint32 firstLength = start - fLargestStart;
216 		uint32 secondLength = fLargestStart + fLargestLength
217 			- (start + length);
218 
219 		if (firstLength >= secondLength) {
220 			fLargestLength = firstLength;
221 		} else {
222 			fLargestLength = secondLength;
223 			fLargestStart = start + length;
224 		}
225 	} else {
226 		// No need to revalidate the largest free range
227 		return B_OK;
228 	}
229 
230 	if (fLargestLength < fNumBits / 2)
231 		block.FindLargestUnmarkedRange(fLargestStart, fLargestLength);
232 
233 	return B_OK;
234 }
235 
236 
237 status_t
238 AllocationBlockGroup::Free(Transaction& transaction, uint32 start,
239 	uint32 length)
240 {
241 	TRACE("AllocationBlockGroup::Free(): start: %lu, length %lu\n", start,
242 		length);
243 
244 	if (length == 0)
245 		return B_OK;
246 
247 	uint32 end = start + length;
248 
249 	if (end > fNumBits)
250 		return B_BAD_DATA;
251 
252 	_LockInTransaction(transaction);
253 
254 	BitmapBlock block(fVolume, fNumBits);
255 
256 	if (!block.SetToWritable(transaction, fBitmapBlock))
257 		return B_ERROR;
258 
259 	if (!block.Unmark(start, length)) {
260 		TRACE("Failed to free blocks from %lu to %lu. Some were "
261 			"already freed.\n", start, start + length);
262 		return B_ERROR;
263 	}
264 
265 	TRACE("AllocationGroup::Free(): Unmarked bits in bitmap\n");
266 
267 	if (fFirstFree > start)
268 		fFirstFree = start;
269 
270 	if (start + length == fLargestStart) {
271 		fLargestStart = start;
272 		fLargestLength += length;
273 	} else if (start == fLargestStart + fLargestLength) {
274 		fLargestLength += length;
275 	} else if (fLargestLength <= fNumBits / 2) {
276 		// May have merged with some free blocks, becoming the largest
277 		uint32 newEnd = start + length;
278 		block.FindNextMarked(newEnd);
279 
280 		uint32 newStart = start;
281 		block.FindPreviousMarked(newStart);
282 
283 		if (newEnd - newStart > fLargestLength) {
284 			fLargestLength = newEnd - newStart;
285 			fLargestStart = newStart;
286 		}
287 	}
288 
289 	fFreeBits += length;
290 	fGroupDescriptor->SetFreeBlocks((uint16)fFreeBits);
291 	fVolume->WriteBlockGroup(transaction, fBlockGroup);
292 
293 	return B_OK;
294 }
295 
296 
297 status_t
298 AllocationBlockGroup::FreeAll(Transaction& transaction)
299 {
300 	return Free(transaction, 0, fNumBits);
301 }
302 
303 
304 uint32
305 AllocationBlockGroup::NumBits() const
306 {
307 	return fNumBits;
308 }
309 
310 
311 uint32
312 AllocationBlockGroup::FreeBits() const
313 {
314 	return fFreeBits;
315 }
316 
317 
318 uint32
319 AllocationBlockGroup::Start() const
320 {
321 	return fStart;
322 }
323 
324 
325 uint32
326 AllocationBlockGroup::LargestStart() const
327 {
328 	return fLargestStart;
329 }
330 
331 
332 uint32
333 AllocationBlockGroup::LargestLength() const
334 {
335 	return fLargestLength;
336 }
337 
338 
339 void
340 AllocationBlockGroup::_AddFreeRange(uint32 start, uint32 length)
341 {
342 	if (IsFull()) {
343 		fFirstFree = start;
344 		fLargestStart = start;
345 		fLargestLength = length;
346 	} else if (length > fLargestLength) {
347 		fLargestStart = start;
348 		fLargestLength = length;
349 	}
350 
351 	fFreeBits += length;
352 }
353 
354 
355 void
356 AllocationBlockGroup::_LockInTransaction(Transaction& transaction)
357 {
358 	mutex_lock(&fLock);
359 
360 	if (transaction.ID() != fCurrentTransaction) {
361 		mutex_unlock(&fLock);
362 
363 		mutex_lock(&fTransactionLock);
364 		mutex_lock(&fLock);
365 
366 		fCurrentTransaction = transaction.ID();
367 		transaction.AddListener(this);
368 	}
369 
370 	mutex_unlock(&fLock);
371 }
372 
373 
374 void
375 AllocationBlockGroup::TransactionDone(bool success)
376 {
377 	if (success) {
378 		TRACE("AllocationBlockGroup::TransactionDone(): The transaction "
379 			"succeeded, discarding previous state\n");
380 		fPreviousFreeBits = fFreeBits;
381 		fPreviousFirstFree = fFirstFree;
382 		fPreviousLargestStart = fLargestStart;
383 		fPreviousLargestLength = fLargestLength;
384 	} else {
385 		TRACE("AllocationBlockGroup::TransactionDone(): The transaction "
386 			"failed, restoring to previous state\n");
387 		fFreeBits = fPreviousFreeBits;
388 		fFirstFree = fPreviousFirstFree;
389 		fLargestStart = fPreviousLargestStart;
390 		fLargestLength = fPreviousLargestLength;
391 	}
392 }
393 
394 
395 void
396 AllocationBlockGroup::RemovedFromTransaction()
397 {
398 	mutex_unlock(&fTransactionLock);
399 	fCurrentTransaction = -1;
400 }
401 
402 
403 BlockAllocator::BlockAllocator(Volume* volume)
404 	:
405 	fVolume(volume),
406 	fGroups(NULL),
407 	fBlocksPerGroup(0),
408 	fNumBlocks(0),
409 	fNumGroups(0)
410 {
411 	mutex_init(&fLock, "ext2 block allocator");
412 }
413 
414 
415 BlockAllocator::~BlockAllocator()
416 {
417 	mutex_destroy(&fLock);
418 
419 	if (fGroups != NULL)
420 		delete [] fGroups;
421 }
422 
423 
424 status_t
425 BlockAllocator::Initialize()
426 {
427 	fBlocksPerGroup = fVolume->BlocksPerGroup();
428 	fNumGroups = fVolume->NumGroups();
429 	fFirstBlock = fVolume->FirstDataBlock();
430 	fNumBlocks = fVolume->NumBlocks();
431 
432 	TRACE("BlockAllocator::Initialize(): blocks per group: %lu, block groups: "
433 		"%lu, first block: %lu, num blocks: %lu\n", fBlocksPerGroup,
434 		fNumGroups, fFirstBlock, fNumBlocks);
435 
436 	fGroups = new(std::nothrow) AllocationBlockGroup[fNumGroups];
437 	if (fGroups == NULL)
438 		return B_NO_MEMORY;
439 
440 	TRACE("BlockAllocator::Initialize(): allocated allocation block groups\n");
441 
442 	mutex_lock(&fLock);
443 		// Released by _Initialize
444 
445 	thread_id id = -1; // spawn_kernel_thread(
446 		// (thread_func)BlockAllocator::_Initialize, "ext2 block allocator",
447 		// B_LOW_PRIORITY, this);
448 	if (id < B_OK)
449 		return _Initialize(this);
450 
451 	// mutex_transfer_lock(&fLock, id);
452 
453 	// return resume_thread(id);
454 	panic("Failed to fall back to synchronous block allocator"
455 		"initialization.\n");
456 	return B_ERROR;
457 }
458 
459 
460 status_t
461 BlockAllocator::AllocateBlocks(Transaction& transaction, uint32 minimum,
462 	uint32 maximum, uint32& blockGroup, uint32& start, uint32& length)
463 {
464 	TRACE("BlockAllocator::AllocateBlocks()\n");
465 	MutexLocker lock(fLock);
466 	TRACE("BlockAllocator::AllocateBlocks(): Acquired lock\n");
467 
468 	TRACE("BlockAllocator::AllocateBlocks(): transaction: %ld, min: %lu, "
469 		"max: %lu, block group: %lu, start: %lu, num groups: %lu\n",
470 		transaction.ID(), minimum, maximum, blockGroup, start, fNumGroups);
471 
472 	uint32 bestStart = 0;
473 	uint32 bestLength = 0;
474 	uint32 bestGroup = 0;
475 
476 	uint32 groupNum = blockGroup;
477 
478 	AllocationBlockGroup* last = &fGroups[fNumGroups];
479 	AllocationBlockGroup* group = &fGroups[blockGroup];
480 
481 	for (int32 iterations = 0; iterations < 2; iterations++) {
482 		for (; group < last; ++group, ++groupNum) {
483 			TRACE("BlockAllocator::AllocateBlocks(): Group %lu has largest "
484 			"length of %lu\n", groupNum, group->LargestLength());
485 
486 			if (group->LargestLength() > bestLength) {
487 				if (start <= group->LargestStart()) {
488 					bestStart = group->LargestStart();
489 					bestLength = group->LargestLength();
490 					bestGroup = groupNum;
491 
492 					TRACE("BlockAllocator::AllocateBlocks(): Found a better "
493 						"range: block group: %lu, %lu-%lu\n", groupNum,
494 						bestStart, bestStart + bestLength);
495 
496 					if (bestLength >= maximum)
497 						break;
498 				}
499 			}
500 
501 			start = 0;
502 		}
503 
504 		if (bestLength >= maximum)
505 			break;
506 
507 		groupNum = 0;
508 
509 		group = &fGroups[0];
510 		last = &fGroups[blockGroup + 1];
511 	}
512 
513 	if (bestLength < minimum) {
514 		TRACE("BlockAllocator::AllocateBlocks(): best range (length %lu) "
515 			"doesn't have minimum length of %lu\n", bestLength, minimum);
516 		return B_DEVICE_FULL;
517 	}
518 
519 	if (bestLength > maximum)
520 		bestLength = maximum;
521 
522 	TRACE("BlockAllocator::AllocateBlocks(): Selected range: block group %lu, "
523 		"%lu-%lu\n", bestGroup, bestStart, bestStart + bestLength);
524 
525 	status_t status = fGroups[bestGroup].Allocate(transaction, bestStart,
526 		bestLength);
527 	if (status != B_OK) {
528 		TRACE("BlockAllocator::AllocateBlocks(): Failed to allocate %lu blocks "
529 			"inside block group %lu.\n", bestLength, bestGroup);
530 		return status;
531 	}
532 
533 	start = bestStart;
534 	length = bestLength;
535 	blockGroup = bestGroup;
536 
537 	return B_OK;
538 }
539 
540 
541 status_t
542 BlockAllocator::Allocate(Transaction& transaction, Inode* inode,
543 	off_t numBlocks, uint32 minimum, uint32& start, uint32& allocated)
544 {
545 	if (numBlocks <= 0)
546 		return B_ERROR;
547 
548 	uint32 group = inode->ID() / fVolume->InodesPerGroup();
549 	uint32 preferred = 0;
550 
551 	if (inode->Size() > 0) {
552 		// Try to allocate near it's last blocks
553 		ext2_data_stream* dataStream = &inode->Node().stream;
554 		uint32 numBlocks = inode->Size() / fVolume->BlockSize() + 1;
555 		uint32 lastBlock = 0;
556 
557 		// DANGER! What happens with sparse files?
558 		if (numBlocks < EXT2_DIRECT_BLOCKS) {
559 			// Only direct blocks
560 			lastBlock = dataStream->direct[numBlocks];
561 		} else {
562 			numBlocks -= EXT2_DIRECT_BLOCKS - 1;
563 			uint32 numIndexes = fVolume->BlockSize() / 4;
564 				// block size / sizeof(int32)
565 			uint32 numIndexes2 = numIndexes * numIndexes;
566 			uint32 numIndexes3 = numIndexes2 * numIndexes;
567 			uint32 indexesInIndirect = numIndexes;
568 			uint32 indexesInDoubleIndirect = indexesInIndirect
569 				+ numIndexes2;
570 			// uint32 indexesInTripleIndirect = indexesInDoubleIndirect
571 				// + numIndexes3;
572 
573 			uint32 doubleIndirectBlock = EXT2_DIRECT_BLOCKS + 1;
574 			uint32 indirectBlock = EXT2_DIRECT_BLOCKS;
575 
576 			CachedBlock cached(fVolume);
577 			uint32* indirectData;
578 
579 			if (numBlocks > indexesInDoubleIndirect) {
580 				// Triple indirect blocks
581 				indirectData = (uint32*)cached.SetTo(EXT2_DIRECT_BLOCKS + 2);
582 				if (indirectData == NULL)
583 					return B_IO_ERROR;
584 
585 				uint32 index = (numBlocks - indexesInDoubleIndirect)
586 					/ numIndexes3;
587 				doubleIndirectBlock = indirectData[index];
588 			}
589 
590 			if (numBlocks > indexesInIndirect) {
591 				// Double indirect blocks
592 				indirectData = (uint32*)cached.SetTo(doubleIndirectBlock);
593 				if (indirectData == NULL)
594 					return B_IO_ERROR;
595 
596 				uint32 index = (numBlocks - indexesInIndirect) / numIndexes2;
597 				indirectBlock = indirectData[index];
598 			}
599 
600 			indirectData = (uint32*)cached.SetTo(indirectBlock);
601 				if (indirectData == NULL)
602 					return B_IO_ERROR;
603 
604 			uint32 index = numBlocks / numIndexes;
605 			lastBlock = indirectData[index];
606 		}
607 
608 		group = (lastBlock - fFirstBlock) / fBlocksPerGroup;
609 		preferred = (lastBlock - fFirstBlock) % fBlocksPerGroup + 1;
610 	}
611 
612 	// TODO: Apply some more policies
613 
614 	return AllocateBlocks(transaction, minimum, minimum + 8, group, start,
615 		allocated);
616 }
617 
618 
619 status_t
620 BlockAllocator::Free(Transaction& transaction, uint32 start, uint32 length)
621 {
622 	TRACE("BlockAllocator::Free(%lu, %lu)\n", start, length);
623 	MutexLocker lock(fLock);
624 
625 	if (start <= fFirstBlock) {
626 		panic("Trying to free superblock!\n");
627 		return B_BAD_DATA;
628 	}
629 
630 	if (length == 0)
631 		return B_OK;
632 
633 	TRACE("BlockAllocator::Free(): first block: %lu, blocks per group: %lu\n",
634 		fFirstBlock, fBlocksPerGroup);
635 
636 	start -= fFirstBlock;
637 	uint32 end = start + length - 1;
638 
639 	uint32 group = start / fBlocksPerGroup;
640 	uint32 lastGroup = end / fBlocksPerGroup;
641 	start = start % fBlocksPerGroup;
642 
643 	if (group == lastGroup)
644 		return fGroups[group].Free(transaction, start, length);
645 
646 	TRACE("BlockAllocator::Free(): Freeing from group %lu: %lu, %lu\n", group,
647 		start, fGroups[group].NumBits() - start);
648 
649 	status_t status = fGroups[group].Free(transaction, start,
650 		fGroups[group].NumBits() - start);
651 	if (status != B_OK)
652 		return status;
653 
654 	for (++group; group < lastGroup; ++group) {
655 		TRACE("BlockAllocator::Free(): Freeing all from group %lu\n", group);
656 		status = fGroups[group].FreeAll(transaction);
657 		if (status != B_OK)
658 			return status;
659 	}
660 
661 	TRACE("BlockAllocator::Free(): Freeing from group %lu: 0-%lu \n", group,
662 		end % fBlocksPerGroup);
663 	return fGroups[group].Free(transaction, 0, (end + 1) % fBlocksPerGroup);
664 }
665 
666 
667 /*static*/ status_t
668 BlockAllocator::_Initialize(BlockAllocator* allocator)
669 {
670 	TRACE("BlockAllocator::_Initialize()\n");
671 	// fLock is already heald
672 	Volume* volume = allocator->fVolume;
673 
674 	AllocationBlockGroup* groups = allocator->fGroups;
675 	uint32 numGroups = allocator->fNumGroups - 1;
676 
677 	uint32 freeBlocks = 0;
678 	TRACE("BlockAllocator::_Initialize(): free blocks: %lu\n", freeBlocks);
679 
680 	for (uint32 i = 0; i < numGroups; ++i) {
681 		status_t status = groups[i].Initialize(volume, i,
682 			allocator->fBlocksPerGroup);
683 		if (status != B_OK) {
684 			mutex_unlock(&allocator->fLock);
685 			return status;
686 		}
687 
688 		freeBlocks += groups[i].FreeBits();
689 		TRACE("BlockAllocator::_Initialize(): free blocks: %lu\n", freeBlocks);
690 	}
691 
692 	// Last block group may have less blocks
693 	status_t status = groups[numGroups].Initialize(volume, numGroups,
694 		allocator->fNumBlocks - allocator->fBlocksPerGroup * numGroups
695 		- allocator->fFirstBlock);
696 	if (status != B_OK) {
697 		mutex_unlock(&allocator->fLock);
698 		return status;
699 	}
700 
701 	freeBlocks += groups[numGroups].FreeBits();
702 
703 	TRACE("BlockAllocator::_Initialize(): free blocks: %lu\n", freeBlocks);
704 
705 	mutex_unlock(&allocator->fLock);
706 
707 	if (freeBlocks != volume->NumFreeBlocks()) {
708 		TRACE("Counted free blocks (%lu) doesn't match value in the "
709 			"superblock (%lu).\n", freeBlocks, (uint32)volume->NumFreeBlocks());
710 		return B_BAD_DATA;
711 	}
712 
713 	return B_OK;
714 }
715