xref: /haiku/src/add-ons/kernel/file_systems/btrfs/ExtentAllocator.cpp (revision 46b7da1f4f40f7157d74fc7fb26ff9ec7f2416f2)
1 /*
2  * Copyright 2017, Chế Vũ Gia Hy, cvghy116@gmail.com.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 #include "ExtentAllocator.h"
8 #include "Chunk.h"
9 
10 
11 CachedExtent*
Create(uint64 offset,uint64 length,uint64 flags)12 CachedExtent::Create(uint64 offset, uint64 length, uint64 flags)
13 {
14 	CachedExtent* self = new(std::nothrow) CachedExtent();
15 	if (self == NULL)
16 		return NULL;
17 
18 	self->offset = offset;
19 	self->length = length;
20 	self->refCount = 0;
21 	if ((flags & BTRFS_EXTENT_FLAG_ALLOCATED) != 0)
22 		self->refCount = 1;
23 	self->flags = flags;
24 	self->item = NULL;
25 	return self;
26 }
27 
28 
29 void
Delete()30 CachedExtent::Delete()
31 {
32 	if (item != NULL)
33 		free(item);
34 	item = NULL;
35 	delete this;
36 }
37 
38 
39 bool
IsAllocated() const40 CachedExtent::IsAllocated() const
41 {
42 	return (flags & BTRFS_EXTENT_FLAG_ALLOCATED) != 0;
43 }
44 
45 
46 bool
IsData() const47 CachedExtent::IsData() const
48 {
49 	return (flags & BTRFS_EXTENT_FLAG_DATA) != 0;
50 }
51 
52 
53 void
Info() const54 CachedExtent::Info() const
55 {
56 	const char* extentType = "allocated tree block";
57 	if (IsAllocated() == false && IsData() == false)
58 		extentType = "free tree block";
59 	else if (IsAllocated() == false && IsData() == true)
60 		extentType = "free data extent";
61 	else if (IsAllocated() == true && IsData() == true)
62 		extentType = "allocated data extent";
63 
64 	TRACE("%s at %" B_PRIu64 " length %" B_PRIu64 " refCount %i\n",
65 		extentType, offset, length, refCount);
66 }
67 
68 
69 // ExtentTree Implementation
70 
71 
CachedExtentTree(const TreeDefinition & definition)72 CachedExtentTree::CachedExtentTree(const TreeDefinition& definition)
73 	:
74 	AVLTree<TreeDefinition>(definition)
75 {
76 }
77 
78 
~CachedExtentTree()79 CachedExtentTree::~CachedExtentTree()
80 {
81 	Delete();
82 }
83 
84 
85 /* Find extent that cover or after "offset" and has length >= "size"
86  * it must also satisfy the condition "type".
87  */
88 status_t
FindNext(CachedExtent ** chosen,uint64 offset,uint64 size,uint64 type)89 CachedExtentTree::FindNext(CachedExtent** chosen, uint64 offset, uint64 size,
90 	uint64 type)
91 {
92 	CachedExtent* found = Find(offset);
93 	while (found != NULL && (found->flags != type || found->length < size))
94 		found = Next(found);
95 
96 	if (found == NULL)
97 		return B_ENTRY_NOT_FOUND;
98 	*chosen = found;
99 	return B_OK;
100 }
101 
102 
103 /* this will insert all free extents that are holes,
104  * created by existed allocated extents in the tree
105  * from lowerBound to upperBound
106  */
107 status_t
FillFreeExtents(uint64 lowerBound,uint64 upperBound)108 CachedExtentTree::FillFreeExtents(uint64 lowerBound, uint64 upperBound)
109 {
110 	CachedExtent* node = FindClosest(lowerBound, false);
111 	CachedExtent* hole = NULL;
112 	status_t status = B_OK;
113 	uint64 flags = node->flags & (~BTRFS_EXTENT_FLAG_ALLOCATED);
114 	if (lowerBound < node->offset) {
115 		hole = CachedExtent::Create(lowerBound, node->offset - lowerBound,
116 			flags);
117 		status = _AddFreeExtent(hole);
118 		if (status != B_OK)
119 			return status;
120 	}
121 
122 	CachedExtent* next = NULL;
123 	while ((next = Next(node)) != NULL && next->End() < upperBound) {
124 		if (node->End() == next->offset) {
125 			node = next;
126 			continue;
127 		}
128 
129 		hole = CachedExtent::Create(node->End(), next->offset - node->End(),
130 			flags);
131 		status = _AddFreeExtent(hole);
132 		if (status != B_OK)
133 			return status;
134 		node = next;
135 	}
136 
137 	// final node should be a right most node
138 	if (upperBound > node->End()) {
139 		hole = CachedExtent::Create(node->End(), upperBound - node->End(),
140 			flags);
141 		status = _AddFreeExtent(hole);
142 	}
143 
144 	return status;
145 }
146 
147 
148 void
_RemoveExtent(CachedExtent * node)149 CachedExtentTree::_RemoveExtent(CachedExtent* node)
150 {
151 	node->refCount--;
152 	if (node->refCount <= 0) {
153 		Remove(node);
154 		node->Delete();
155 	}
156 }
157 
158 
159 status_t
_AddAllocatedExtent(CachedExtent * node)160 CachedExtentTree::_AddAllocatedExtent(CachedExtent* node)
161 {
162 	if (node == NULL || node->IsAllocated() == false)
163 		return B_BAD_VALUE;
164 
165 	CachedExtent* found = Find(node->offset);
166 	if (found == NULL) {
167 		Insert(node);
168 		return B_OK;
169 	}
170 
171 	if ((found->IsData() && !node->IsData())
172 		|| (!found->IsData() && node->IsData())) {
173 		// this shouldn't happen as because different type has
174 		// its different region for locating.
175 		node->Delete();
176 		return B_BAD_VALUE;
177 	}
178 
179 	if (found->IsAllocated() == false) {
180 		// split free extents (found)
181 		if (node->End() > found->End()) {
182 			// TODO: when to handle this ?
183 			node->Delete();
184 			return B_BAD_VALUE;
185 		}
186 
187 		// |----- found ------|
188 		//    |-- node ---|
189 		uint64 diff = node->offset - found->offset;
190 		found->offset += diff + node->length;
191 		found->length -= diff + node->length;
192 		// diff < 0 couldn't happen because of the Compare function
193 		if (diff > 0) {
194 			CachedExtent* leftEmpty = CachedExtent::Create(
195 				node->offset - diff, diff, found->flags);
196 			_AddFreeExtent(leftEmpty);
197 		}
198 
199 		if (found->length == 0) {
200 			// free-extent that has no space
201 			_RemoveExtent(found);
202 		}
203 
204 		Insert(node);
205 		return B_OK;
206 	}
207 
208 	if (found->length == node->length) {
209 		found->refCount++;
210 	} else {
211 		// TODO: What should we do in this case ?
212 		return B_BAD_VALUE;
213 	}
214 	node->Delete();
215 
216 	return B_OK;
217 }
218 
219 
220 status_t
_AddFreeExtent(CachedExtent * node)221 CachedExtentTree::_AddFreeExtent(CachedExtent* node)
222 {
223 	if (node == NULL || node->IsAllocated() == true)
224 		return B_BAD_VALUE;
225 
226 	CachedExtent* found = Find(node->offset);
227 	if (found == NULL) {
228 		Insert(node);
229 		_CombineFreeExtent(node);
230 		return B_OK;
231 	}
232 
233 	if ((found->IsData() && !node->IsData())
234 		|| (!found->IsData() && node->IsData())) {
235 		// this shouldn't happen as because different type has
236 		// its different region for locating.
237 		node->Delete();
238 		return B_BAD_VALUE;
239 	}
240 
241 	if (found->End() < node->End()) {
242 		// |---- found-----|
243 		//      |--- node ------|
244 		CachedExtent* rightEmpty = CachedExtent::Create(found->End(),
245 			node->End() - found->End(), node->flags);
246 		_AddFreeExtent(rightEmpty);
247 		node->length -= node->End() - found->End();
248 	}
249 
250 	if (found->IsAllocated() == true) {
251 		// free part of the allocated extents(found)
252 		// |----- found ------|
253 		//    |-- node ---|
254 		uint64 diff = node->offset - found->offset;
255 		found->offset += diff + node->length;
256 		found->length -= diff + node->length;
257 		// diff < 0 couldn't happen because of the Compare function
258 		if (diff > 0){
259 			CachedExtent* left = CachedExtent::Create(node->offset - diff,
260 				diff, found->flags);
261 			_AddAllocatedExtent(left);
262 		}
263 
264 		if (found->length == 0)
265 			_RemoveExtent(found);
266 
267 		Insert(node);
268 		_CombineFreeExtent(node);
269 	}
270 
271 	return B_OK;
272 }
273 
274 
275 status_t
AddExtent(CachedExtent * extent)276 CachedExtentTree::AddExtent(CachedExtent* extent)
277 {
278 	if (extent->IsAllocated() == true)
279 		return _AddAllocatedExtent(extent);
280 	return _AddFreeExtent(extent);
281 }
282 
283 
284 void
_CombineFreeExtent(CachedExtent * node)285 CachedExtentTree::_CombineFreeExtent(CachedExtent* node)
286 {
287 	// node should be inserted first to call this function,
288 	// otherwise it will overdelete.
289 	if (node->IsAllocated() == true)
290 		return;
291 
292 	CachedExtent* other = Next(node);
293 	if (other != NULL) {
294 		if (node->End() == other->offset && node->flags == other->flags) {
295 			node->length += other->length;
296 			_RemoveExtent(other);
297 		}
298 	}
299 
300 	other = Previous(node);
301 	if (other != NULL) {
302 		if (other->End() == node->offset && node->flags == other->flags) {
303 			other->length += node->length;
304 			_RemoveExtent(node);
305 		}
306 	}
307 }
308 
309 
310 void
_DumpInOrder(CachedExtent * node) const311 CachedExtentTree::_DumpInOrder(CachedExtent* node) const
312 {
313 	if (node == NULL)
314 		return;
315 	_DumpInOrder(_GetValue(node->left));
316 	node->Info();
317 	_DumpInOrder(_GetValue(node->right));
318 }
319 
320 
321 void
DumpInOrder() const322 CachedExtentTree::DumpInOrder() const
323 {
324 	TRACE("\n");
325 	_DumpInOrder(RootNode());
326 	TRACE("\n");
327 }
328 
329 
330 void
Delete()331 CachedExtentTree::Delete()
332 {
333 	_Delete(RootNode());
334 	Clear();
335 }
336 
337 
338 void
_Delete(CachedExtent * node)339 CachedExtentTree::_Delete(CachedExtent* node)
340 {
341 	if (node == NULL)
342 		return;
343 	_Delete(_GetValue(node->left));
344 	_Delete(_GetValue(node->right));
345 	node->Delete();
346 }
347 
348 
349 // BlockGroup
350 
351 
BlockGroup(BTree * extentTree)352 BlockGroup::BlockGroup(BTree* extentTree)
353 	:
354 	fItem(NULL)
355 {
356 	fCurrentExtentTree = new(std::nothrow) BTree(extentTree->SystemVolume(),
357 		extentTree->RootBlock());
358 }
359 
360 
~BlockGroup()361 BlockGroup::~BlockGroup()
362 {
363 	if (fItem != NULL) {
364 		free(fItem);
365 		fItem = NULL;
366 	}
367 
368 	delete fCurrentExtentTree;
369 	fCurrentExtentTree = NULL;
370 }
371 
372 
373 status_t
SetExtentTree(off_t rootAddress)374 BlockGroup::SetExtentTree(off_t rootAddress)
375 {
376 	status_t status = fCurrentExtentTree->SetRoot(rootAddress, NULL);
377 	if (status != B_OK)
378 		return status;
379 
380 	if (fItem != NULL) {
381 		// re-allocate BlockGroup;
382 		uint64 flags = Flags();
383 		return Initialize(flags);
384 	}
385 
386 	return B_OK;
387 }
388 
389 
390 /* Initialize BlockGroup that has flag
391  */
392 status_t
Initialize(uint64 flag)393 BlockGroup::Initialize(uint64 flag)
394 {
395 	if (fCurrentExtentTree == NULL)
396 		return B_NO_MEMORY;
397 
398 	if (fItem != NULL)
399 		free(fItem);
400 
401 	TRACE("BlockGroup::Initialize() find block group has flag: %"
402 		B_PRIu64 "\n", flag);
403 	BTree::Path path(fCurrentExtentTree);
404 	fKey.SetObjectID(0);
405 	// find first item in extent to get the ObjectID
406 	// ignore type because block_group is not always the first item
407 	fKey.SetType(BTRFS_KEY_TYPE_ANY);
408 	fCurrentExtentTree->FindNext(&path, fKey, NULL);
409 
410 	fKey.SetType(BTRFS_KEY_TYPE_BLOCKGROUP_ITEM);
411 	fKey.SetOffset(0);
412 	status_t status;
413 
414 	while (true) {
415 		status = fCurrentExtentTree->FindNext(&path, fKey, (void**)&fItem);
416 		if (status != B_OK || (Flags() & flag) == flag)
417 			break;
418 		fKey.SetObjectID(End());
419 		fKey.SetOffset(0);
420 	}
421 
422 	if (status != B_OK)
423 		ERROR("BlockGroup::Initialize() cannot find block group\n");
424 
425 	return status;
426 }
427 
428 
429 status_t
LoadExtent(CachedExtentTree * tree,bool inverse)430 BlockGroup::LoadExtent(CachedExtentTree* tree, bool inverse)
431 {
432 	if (fItem == NULL)
433 		return B_NO_INIT;
434 
435 	uint64 flags = (Flags() & BTRFS_BLOCKGROUP_FLAG_DATA) != 0 ?
436 		BTRFS_EXTENT_FLAG_DATA : BTRFS_EXTENT_FLAG_TREE_BLOCK;
437 	if (inverse == false)
438 		flags |= BTRFS_EXTENT_FLAG_ALLOCATED;
439 
440 	uint64 start = Start();
441 	CachedExtent* insert;
442 	void* data;
443 	uint64 extentSize = fCurrentExtentTree->SystemVolume()->BlockSize();
444 
445 	btrfs_key key;
446 	key.SetType(BTRFS_KEY_TYPE_METADATA_ITEM);
447 	if ((flags & BTRFS_EXTENT_FLAG_DATA) != 0)
448 		key.SetType(BTRFS_KEY_TYPE_EXTENT_ITEM);
449 	key.SetObjectID(start);
450 	key.SetOffset(0);
451 
452 	TreeIterator iterator(fCurrentExtentTree, key);
453 	status_t status;
454 	while (true) {
455 		status = iterator.GetNextEntry(&data);
456 		key = iterator.Key();
457 		if (status != B_OK) {
458 			if (key.ObjectID() != Start())
459 				break;
460 			// When we couldn't find the item and key has
461 			// objectid == BlockGroup's objectID, because
462 			// key's type < BLOCKGROUP_ITEM
463 			continue;
464 		}
465 
466 		if (inverse == false) {
467 			if ((flags & BTRFS_EXTENT_FLAG_DATA) != 0)
468 				extentSize = key.Offset();
469 			insert = CachedExtent::Create(key.ObjectID(), extentSize, flags);
470 			insert->item = (btrfs_extent*)data;
471 		} else {
472 			extentSize = key.ObjectID() - start;
473 			insert = CachedExtent::Create(start, extentSize, flags);
474 			free(data);		// free extent doesn't have data;
475 		}
476 		_InsertExtent(tree, insert);
477 		start = key.ObjectID() + extentSize;
478 	}
479 
480 	if (inverse == true)
481 		_InsertExtent(tree, start, End() - start, flags);
482 
483 	return B_OK;
484 }
485 
486 
487 status_t
_InsertExtent(CachedExtentTree * tree,uint64 start,uint64 length,uint64 flags)488 BlockGroup::_InsertExtent(CachedExtentTree* tree, uint64 start, uint64 length,
489 	uint64 flags)
490 {
491 	CachedExtent* extent = CachedExtent::Create(start, length, flags);
492 	return _InsertExtent(tree, extent);
493 }
494 
495 
496 status_t
_InsertExtent(CachedExtentTree * tree,CachedExtent * extent)497 BlockGroup::_InsertExtent(CachedExtentTree* tree, CachedExtent* extent)
498 {
499 	if (extent->length == 0)
500 		return B_BAD_VALUE;
501 
502 	status_t status = tree->AddExtent(extent);
503 	if (status != B_OK) {
504 		return status;
505 	}
506 	return B_OK;
507 }
508 
509 
510 // ExtentAllocator
511 
512 
ExtentAllocator(Volume * volume)513 ExtentAllocator::ExtentAllocator(Volume* volume)
514 	:
515 	fVolume(volume),
516 	fTree(NULL),
517 	fStart((uint64)-1),
518 	fEnd(0)
519 {
520 	fTree = new(std::nothrow) CachedExtentTree(TreeDefinition());
521 }
522 
523 
~ExtentAllocator()524 ExtentAllocator::~ExtentAllocator()
525 {
526 	delete fTree;
527 	fTree = NULL;
528 }
529 
530 
531 status_t
_LoadExtentTree(uint64 flags)532 ExtentAllocator::_LoadExtentTree(uint64 flags)
533 {
534 	TRACE("ExtentAllocator::_LoadExtentTree() flags: %" B_PRIu64 "\n", flags);
535 	BlockGroup blockGroup(fVolume->ExtentTree());
536 	status_t status = blockGroup.Initialize(flags);
537 	if (status != B_OK)
538 		return status;
539 
540 	for (int i = 0; i < BTRFS_NUM_ROOT_BACKUPS; i++) {
541 		uint64 extentRootAddr =
542 			fVolume->SuperBlock().backup_roots[i].ExtentRoot();
543 		if (extentRootAddr == 0)
544 			continue;	// new device has 0 root address
545 
546 		status = blockGroup.SetExtentTree(extentRootAddr);
547 		if (status != B_OK)
548 			return status;
549 		status = blockGroup.LoadExtent(fTree, false);
550 		if (status != B_OK)
551 			return status;
552 	}
553 
554 	if (fTree->IsEmpty())	// 4 backup roots is 0
555 		return B_OK;
556 
557 	uint64 lowerBound = blockGroup.Start();
558 	uint64 upperBound = blockGroup.End();
559 	status = fTree->FillFreeExtents(lowerBound, upperBound);
560 	if (status != B_OK) {
561 		ERROR("ExtentAllocator::_LoadExtentTree() could not fill free extents"
562 			"start %" B_PRIu64 " end %" B_PRIu64 "\n", lowerBound,
563 			upperBound);
564 		return status;
565 	}
566 
567 	if (fStart > lowerBound)
568 		fStart = lowerBound;
569 	if (fEnd < upperBound)
570 		fEnd = upperBound;
571 	return B_OK;
572 }
573 
574 
575 status_t
Initialize()576 ExtentAllocator::Initialize()
577 {
578 	TRACE("ExtentAllocator::Initialize()\n");
579 	if (fTree == NULL)
580 		return B_NO_MEMORY;
581 
582 	status_t status = _LoadExtentTree(BTRFS_BLOCKGROUP_FLAG_DATA);
583 	if (status != B_OK) {
584 		ERROR("ExtentAllocator:: could not load extent tree (data)\n");
585 		return status;
586 	}
587 
588 	status = _LoadExtentTree(BTRFS_BLOCKGROUP_FLAG_SYSTEM);
589 	if (status != B_OK) {
590 		ERROR("ExtentAllocator:: could not load extent tree (system)\n");
591 		return status;
592 	}
593 
594 	status = _LoadExtentTree(BTRFS_BLOCKGROUP_FLAG_METADATA);
595 	if (status != B_OK) {
596 		ERROR("ExtentAllocator:: could not load extent tree (metadata)\n");
597 		return status;
598 	}
599 
600 	fTree->DumpInOrder();
601 	return B_OK;
602 }
603 
604 
605 /* Allocate extent that
606  * startwith or after "start" and has size >= "size"
607  * For now the allocating strategy is "first fit"
608  */
609 status_t
_Allocate(uint64 & found,uint64 start,uint64 size,uint64 type)610 ExtentAllocator::_Allocate(uint64& found, uint64 start, uint64 size,
611 	uint64 type)
612 {
613 	TRACE("ExtentAllocator::_Allocate() start %" B_PRIu64 " size %" B_PRIu64
614 		" type %" B_PRIu64 "\n", start, size, type);
615 	CachedExtent* chosen;
616 	status_t status;
617 	while (true) {
618 		status = fTree->FindNext(&chosen, start, size, type);
619 		if (status != B_OK)
620 			return status;
621 
622 		if (TreeDefinition().Compare(start, chosen) == 0) {
623 			if (chosen->End() - start >= size) {
624 				// if covered and have enough space for allocating
625 				break;
626 			}
627 			start = chosen->End();	// set new start and retry
628 		} else
629 			start = chosen->offset;
630 	}
631 
632 	TRACE("ExtentAllocator::_Allocate() found %" B_PRIu64 "\n", start);
633 	chosen = CachedExtent::Create(start, size,
634 		type | BTRFS_EXTENT_FLAG_ALLOCATED);
635 	status = fTree->AddExtent(chosen);
636 	if (status != B_OK)
637 		return status;
638 
639 	found = start;
640 	return B_OK;
641 }
642 
643 
644 // Allocate block for metadata
645 // flags is BLOCKGROUP's flags
646 status_t
AllocateTreeBlock(uint64 & found,uint64 start,uint64 flags)647 ExtentAllocator::AllocateTreeBlock(uint64& found, uint64 start, uint64 flags)
648 {
649 	// TODO: implement more features here with flags, e.g DUP, RAID, etc
650 
651 	BlockGroup blockGroup(fVolume->ExtentTree());
652 	status_t status = blockGroup.Initialize(flags);
653 	if (status != B_OK)
654 		return status;
655 	if (start == (uint64)-1)
656 		start = blockGroup.Start();
657 
658 	// normalize inputs
659 	uint64 remainder = start % fVolume->BlockSize();
660 	if (remainder != 0)
661 		start += fVolume->BlockSize() - remainder;
662 
663 	status = _Allocate(found, start, fVolume->BlockSize(),
664 		BTRFS_EXTENT_FLAG_TREE_BLOCK);
665 	if (status != B_OK)
666 		return status;
667 
668 	// check here because tree block locate in 2 blockgroups (system and
669 	// metadata), and there might be a case one can get over the limit.
670 	if (found >= blockGroup.End())
671 		return B_BAD_DATA;
672 	return B_OK;
673 }
674 
675 
676 // Allocate block for file data
677 status_t
AllocateDataBlock(uint64 & found,uint64 size,uint64 start,uint64 flags)678 ExtentAllocator::AllocateDataBlock(uint64& found, uint64 size, uint64 start,
679 	uint64 flags)
680 {
681 	// TODO: implement more features here with flags, e.g DUP, RAID, etc
682 
683 	if (start == (uint64)-1) {
684 		BlockGroup blockGroup(fVolume->ExtentTree());
685 		status_t status = blockGroup.Initialize(flags);
686 		if (status != B_OK)
687 			return status;
688 		start = blockGroup.Start();
689 	}
690 
691 	// normalize inputs
692 	uint64 remainder = start % fVolume->SectorSize();
693 	if (remainder != 0)
694 		start += fVolume->SectorSize() - remainder;
695 	size = size / fVolume->SectorSize() * fVolume->SectorSize();
696 
697 	return _Allocate(found, start, size, BTRFS_EXTENT_FLAG_DATA);
698 }
699