xref: /haiku/src/add-ons/kernel/file_systems/btrfs/BTree.cpp (revision 2897df967633aab846ff4917b53e2af7d1e54eeb)
1 /*
2  * Copyright 2017, Chế Vũ Gia Hy, cvghy116@gmail.com.
3  * Copyright 2011, Jérôme Duval, korli@users.berlios.de.
4  * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
5  * This file may be used under the terms of the MIT License.
6  */
7 
8 
9 //! BTree implementation
10 
11 
12 #include "BTree.h"
13 #include "Journal.h"
14 
15 #include <algorithm>
16 
17 
18 //#define TRACE_BTRFS
19 #ifdef TRACE_BTRFS
20 #	define TRACE(x...) dprintf("\33[34mbtrfs:\33[0m " x)
21 #else
22 #	define TRACE(x...) ;
23 #endif
24 #	define ERROR(x...) dprintf("\33[34mbtrfs:\33[0m " x)
25 
26 
27 BTree::Node::Node(Volume* volume)
28 	:
29 	fNode(NULL),
30 	fVolume(volume),
31 	fBlockNumber(0),
32 	fWritable(false)
33 {
34 }
35 
36 
37 BTree::Node::Node(Volume* volume, off_t block)
38 	:
39 	fNode(NULL),
40 	fVolume(volume),
41 	fBlockNumber(0),
42 	fWritable(false)
43 {
44 	SetTo(block);
45 }
46 
47 
48 BTree::Node::~Node()
49 {
50 	Unset();
51 }
52 
53 
54 void
55 BTree::Node::Keep()
56 {
57 	fNode = NULL;
58 }
59 
60 void
61 BTree::Node::Unset()
62 {
63 	if (fNode != NULL) {
64 		block_cache_put(fVolume->BlockCache(), fBlockNumber);
65 		fNode = NULL;
66 	}
67 }
68 
69 
70 void
71 BTree::Node::SetTo(off_t block)
72 {
73 	Unset();
74 	fBlockNumber = block;
75 	fNode = (btrfs_stream*)block_cache_get(fVolume->BlockCache(), block);
76 }
77 
78 
79 void
80 BTree::Node::SetToWritable(off_t block, int32 transactionId, bool empty)
81 {
82 	Unset();
83 	fBlockNumber = block;
84 	fWritable = true;
85 	if (empty) {
86 		fNode = (btrfs_stream*)block_cache_get_empty(fVolume->BlockCache(),
87 			block, transactionId);
88 	} else {
89 		fNode = (btrfs_stream*)block_cache_get_writable(fVolume->BlockCache(),
90 			block, transactionId);
91 	}
92 }
93 
94 
95 status_t
96 BTree::Node::SearchSlot(const btrfs_key& key, int* slot, btree_traversing type)
97 	const
98 {
99 	//binary search for item slot in a node
100 	int entrySize = sizeof(btrfs_entry);
101 	if (Level() != 0) {
102 		// internal node
103 		entrySize = sizeof(btrfs_index);
104 	}
105 
106 	int high = ItemCount();
107 	int low = 0, mid = 0, comp = 0;
108 	uint8* base = (uint8*)fNode + sizeof(btrfs_header);
109 	const btrfs_key* other;
110 	while (low < high) {
111 		mid = (low + high) / 2;
112 		other = (const btrfs_key*)(base + mid * entrySize);
113 		comp = key.Compare(*other);
114 		if (comp < 0)
115 			high = mid;
116 		else if (comp > 0)
117 			low = mid + 1;
118 		else {
119 			*slot = mid;
120 			return B_OK; 		//if key is in node
121 		}
122 	}
123 
124 	// |--item1--|--item2--|--item3--|--etc--|
125 	//     m-1        m        m+1
126 	//           k          		: comp < 0
127 	//                     k		: comp > 0
128 	if (type == BTREE_BACKWARD && comp < 0)
129 		mid--;
130 	else if (type == BTREE_FORWARD && comp > 0)
131 		mid++;
132 
133 	if (type == BTREE_EXACT || mid < 0)
134 		return B_ENTRY_NOT_FOUND;
135 
136 	*slot = mid;
137 	TRACE("SearchSlot() found slot %" B_PRId32 " comp %" B_PRId32 "\n",
138 		*slot, comp);
139 	return B_OK;
140 }
141 
142 
143 /*
144  * calculate used space except the header.
145  * type is only for leaf node
146  * type 1: only item space
147  * type 2: only item data space
148  * type 3: both type 1 and 2
149  */
150 int
151 BTree::Node::_CalculateSpace(uint32 from, uint32 to, uint8 type) const
152 {
153 	if (to < from || to >= ItemCount())
154 		return 0;
155 
156 	if (Level() != 0)
157 		return sizeof(btrfs_index) * (to - from + 1);
158 
159 	uint32 result = 0;
160 	if ((type & 1) == 1) {
161 		result += sizeof(btrfs_entry) * (to - from + 1);
162 	}
163 	if ((type & 2) == 2) {
164 		result += Item(from)->Offset() + Item(from)->Size()
165 			- Item(to)->Offset();
166 	}
167 
168 	return result;
169 }
170 
171 
172 int
173 BTree::Node::SpaceUsed() const
174 {
175 	if (Level() == 0)
176 		return _CalculateSpace(0, ItemCount() - 1, 3);
177 	return _CalculateSpace(0, ItemCount() - 1, 0);
178 }
179 
180 
181 int
182 BTree::Node::SpaceLeft() const
183 {
184 	return fVolume->BlockSize() - SpaceUsed() - sizeof(btrfs_header);
185 }
186 
187 
188 void
189 BTree::Node::_Copy(const Node* origin, uint32 at, uint32 from, uint32 to,
190 	int length) const
191 {
192 	TRACE("Node::_Copy() at: %d from: %d to: %d length: %d\n",
193 		at, from, to, length);
194 
195 	if (Level() == 0) {
196 		memcpy(Item(at), origin->Item(from), origin->_CalculateSpace(from, to));
197 		// Item offset of copied node must be changed to get the
198 		// item data offset correctly. length can be zero
199 		// but let it there doesn't harm anything.
200 		for (uint32 i = at; i - at <= to - from; ++i)
201 			Item(i)->SetOffset(Item(i)->Offset() - length);
202 
203 		memcpy(ItemData(at + to - from), origin->ItemData(to),
204 			origin->_CalculateSpace(from, to, 2));
205 	} else {
206 		memcpy(Index(at), origin->Index(from),
207 			origin->_CalculateSpace(from, to));
208 	}
209 }
210 
211 
212 status_t
213 BTree::Node::_SpaceCheck(int length) const
214 {
215 	// this is a little bit weird here because we can't find
216 	// any suitable error code
217 	if (length < 0 && std::abs(length) >= SpaceUsed())
218 		return B_DIRECTORY_NOT_EMPTY;	// not enough data to delete
219 	if (length > 0 && length >= SpaceLeft())
220 		return B_DEVICE_FULL;			// no spare space
221 	return B_OK;
222 }
223 
224 
225 /*
226  * copy node header, items and items data
227  * length is size to insert/remove
228  * if node is a internal node, length isnt used
229  * length = 0: Copy a whole
230  * length < 0: removing
231  * length > 0: inserting
232  */
233 status_t
234 BTree::Node::Copy(const Node* origin, uint32 start, uint32 end, int length)
235 	const
236 {
237 	status_t status = origin->_SpaceCheck(length);
238 	if (status != B_OK)
239 		return status;
240 
241 	memcpy(fNode, origin->fNode, sizeof(btrfs_header));
242 	if (length == 0) {
243 		// like removing [0, start - 1] and keeping [start, end]
244 		length = -origin->_CalculateSpace(0, start - 1, 2);
245 		_Copy(origin, 0, start, end, length);
246 	} else if (length < 0) {
247 		//removing all items in [start, end]
248 		if (start > 0)
249 			_Copy(origin, 0, 0, start - 1, 0);	// <-- [start,...
250 		if (end + 1 < origin->ItemCount()) {
251 			// ..., end] -->
252 			// we only care data size
253 			length += origin->_CalculateSpace(start, end);
254 			_Copy(origin, start, end + 1, origin->ItemCount() - 1, length);
255 		}
256 	} else {
257 		//inserting in [start, end] - make a hole for later
258 		if (start > 0)
259 			_Copy(origin, 0, 0, start - 1, 0);
260 		if (start < origin->ItemCount()) {
261 			length -= origin->_CalculateSpace(start, end);
262 			_Copy(origin, end + 1, start, origin->ItemCount() - 1, length);
263 		}
264 	}
265 
266 	return B_OK;
267 }
268 
269 
270 /* Like copy but here we use memmove on current node.
271  */
272 status_t
273 BTree::Node::MoveEntries(uint32 start, uint32 end, int length) const
274 {
275 	status_t status = _SpaceCheck(length);
276 	if (status != B_OK || length == 0/*B_OK*/)
277 		return status;
278 
279 	int entrySize = sizeof(btrfs_entry);
280 	if (Level() != 0)
281 		entrySize = sizeof(btrfs_index);
282 
283 	uint8* base = (uint8*)fNode + sizeof(btrfs_header);
284 	end++;
285 	if (length < 0) {
286 		// removing [start, end]
287 		TRACE("Node::MoveEntries() removing ... start %" B_PRIu32 " end %"
288 			B_PRIu32 " length %i\n", start, end, length);
289 		length += _CalculateSpace(start, end - 1);
290 	} else {
291 		// length > 0
292 		// inserting into [start, end] - make room for later
293 		TRACE("Node::MoveEntries() inserting ... start %" B_PRIu32 " end %"
294 			B_PRIu32 " length %i\n", start, end, length);
295 		length -= _CalculateSpace(start, end - 1);
296 		std::swap(start, end);
297 	}
298 
299 	if (end >= ItemCount())
300 		return B_OK;
301 
302 	int dataSize = _CalculateSpace(end, ItemCount() - 1, 2);
303 	// moving items/block pointers
304 	memmove(base + start * entrySize, base + end * entrySize,
305 		_CalculateSpace(end, ItemCount() - 1));
306 	if (Level() == 0) {
307 		// moving item data
308 		int num = start - end;
309 		for (uint32 i = start; i < ItemCount() + num; ++i)
310 			Item(i)->SetOffset(Item(i)->Offset() - length);
311 
312 		memmove(ItemData(ItemCount() - 1) - length, ItemData(ItemCount() - 1),
313 			dataSize);
314 	}
315 
316 	return B_OK;
317 }
318 
319 
320 //	#pragma mark - BTree::Path implementation
321 
322 
323 BTree::Path::Path(BTree* tree)
324 	:
325 	fTree(tree)
326 {
327 	for (int i = 0; i < BTRFS_MAX_TREE_DEPTH; ++i) {
328 		fNodes[i] = NULL;
329 		fSlots[i] = 0;
330 	}
331 }
332 
333 
334 BTree::Path::~Path()
335 {
336 	for (int i = 0; i < BTRFS_MAX_TREE_DEPTH; ++i) {
337 		delete fNodes[i];
338 		fNodes[i] = NULL;
339 		fSlots[i] = 0;
340 	}
341 }
342 
343 
344 BTree::Node*
345 BTree::Path::GetNode(int level, int* _slot) const
346 {
347 	if (_slot != NULL)
348 		*_slot = fSlots[level];
349 	return fNodes[level];
350 }
351 
352 
353 BTree::Node*
354 BTree::Path::SetNode(off_t block, int slot)
355 {
356 	Node node(fTree->SystemVolume(), block);
357 	return SetNode(&node, slot);
358 }
359 
360 
361 BTree::Node*
362 BTree::Path::SetNode(const Node* node, int slot)
363 {
364 	uint8 level = node->Level();
365 	if (fNodes[level] == NULL) {
366 		fNodes[level] = new Node(fTree->SystemVolume(), node->BlockNum());
367 		if (fNodes[level] == NULL)
368 			return NULL;
369 	} else
370 		fNodes[level]->SetTo(node->BlockNum());
371 
372 	if (slot == -1)
373 		fSlots[level] = fNodes[level]->ItemCount() - 1;
374 	else
375 		fSlots[level] = slot;
376 	return fNodes[level];
377 }
378 
379 
380 int
381 BTree::Path::Move(int level, int step)
382 {
383 	fSlots[level] += step;
384 	if (fSlots[level] < 0)
385 		return -1;
386 	if (fSlots[level] >= fNodes[level]->ItemCount())
387 		return 1;
388 	return 0;
389 }
390 
391 
392 status_t
393 BTree::Path::GetEntry(int slot, btrfs_key* _key, void** _value, uint32* _size,
394 	uint32* _offset)
395 {
396 	BTree::Node* leaf = fNodes[0];
397 	if (slot < 0 || slot >= leaf->ItemCount())
398 		return B_ENTRY_NOT_FOUND;
399 
400 	if (_key != NULL)
401 		*_key = leaf->Item(slot)->key;
402 
403 	uint32 itemSize = leaf->Item(slot)->Size();
404 	if (_value != NULL) {
405 		*_value = malloc(itemSize);
406 		if (*_value == NULL)
407 			return B_NO_MEMORY;
408 
409 		memcpy(*_value, leaf->ItemData(slot), itemSize);
410 	}
411 
412 	if (_size != NULL)
413 		*_size = itemSize;
414 
415 	if (_offset != NULL)
416 		*_offset = leaf->Item(slot)->Offset();
417 
418 	return B_OK;
419 }
420 
421 
422 status_t
423 BTree::Path::SetEntry(int slot, const btrfs_entry& entry, void* value)
424 {
425 	if (slot < 0)
426 		return B_ENTRY_NOT_FOUND;
427 
428 	memcpy(fNodes[0]->Item(slot), &entry, sizeof(btrfs_entry));
429 	memcpy(fNodes[0]->ItemData(slot), value, entry.Size());
430 	return B_OK;
431 }
432 
433 
434 /*
435  * Allocate and copy block and do all the changes that it can.
436  * for now, we only copy-on-write tree block,
437  * file data is "nocow" by default.
438  *
439  * 	o	parent	o
440  * 	|	 ===> 	 \
441  * 	o	      	x o
442  */
443 status_t
444 BTree::Path::CopyOnWrite(Transaction& transaction, int level, uint32 start,
445 	int num, int length)
446 {
447 	Node* node = fNodes[level];
448 	if (node == NULL)
449 		return B_BAD_VALUE;
450 
451 	status_t status;
452 	if (transaction.HasBlock(node->BlockNum())) {
453 		// cow-ed block can not be cow-ed again
454 		status = node->MoveEntries(start, start + num - 1, length);
455 		if (status != B_OK)
456 			return status;
457 
458 		node->SetGeneration(transaction.SystemID());
459 		if (length < 0)
460 			node->SetItemCount(node->ItemCount() - num);
461 		else if (length > 0)
462 			node->SetItemCount(node->ItemCount() + num);
463 
464 		return B_OK;
465 	}
466 
467 	uint64 address;
468 	fsblock_t block;
469 	status = fTree->SystemVolume()->GetNewBlock(address, block);
470 	if (status != B_OK)
471 		return status;
472 
473 	fNodes[level] = new(std::nothrow) BTree::Node(fTree->SystemVolume());
474 	if (fNodes[level] == NULL)
475 		return B_NO_MEMORY;
476 
477 	fNodes[level]->SetToWritable(block, transaction.ID(), true);
478 
479 	status = fNodes[level]->Copy(node, start, start + num - 1, length);
480 	if (status != B_OK)
481 		return status;
482 
483 	fNodes[level]->SetGeneration(transaction.SystemID());
484 	fNodes[level]->SetLogicalAddress(address);
485 	if (length < 0)
486 		fNodes[level]->SetItemCount(node->ItemCount() - num);
487 	else if (length > 0)
488 		fNodes[level]->SetItemCount(node->ItemCount() + num);
489 	else
490 		fNodes[level]->SetItemCount(num);
491 
492 	// change pointer of this node in parent
493 	int parentSlot;
494 	Node* parentNode = GetNode(level + 1, &parentSlot);
495 	if (parentNode != NULL)
496 		parentNode->Index(parentSlot)->SetLogicalAddress(address);
497 
498 	if (level == fTree->RootLevel())
499 		fTree->SetRoot(fNodes[level]);
500 
501 	delete node;
502 	return B_OK;
503 }
504 
505 
506 /* Copy-On-Write all internal nodes start from a specific level.
507  * level > 0: to root
508  * level <= 0: to leaf
509  *
510  *		path	cow-path       path    cow-path
511  *	=================================================
512  *		root	cow-root       root	level < 0
513  *		 |  	|               |
514  *		 n1 	cow-n1         ...______
515  *		 |  	|               |       \
516  *		 n2 	cow-n2          n1     cow-n1
517  *		 |  	/               |        |
518  *		...____/                n2     cow-n2
519  *		 |  	                |        |
520  *		leaf	level > 0      leaf    cow-leaf
521  */
522 status_t
523 BTree::Path::InternalCopy(Transaction& transaction, int level)
524 {
525 	if (std::abs(level) >= fTree->RootLevel())
526 		return B_OK;
527 
528 	TRACE("Path::InternalCopy() level %i\n", level);
529 	int from, to;
530 	if (level > 0) {
531 		from = level;
532 		to = fTree->RootLevel();
533 	} else {
534 		from = 0;
535 		to = std::abs(level);
536 	}
537 
538 	Node* node = NULL;
539 	status_t status;
540 	while (from <= to) {
541 		node = fNodes[from];
542 		status = CopyOnWrite(transaction, from, 0, node->ItemCount(), 0);
543 		from++;
544 		if (status != B_OK)
545 			return status;
546 	}
547 
548 	return B_OK;
549 }
550 
551 
552 //	#pragma mark - BTree implementation
553 
554 
555 BTree::BTree(Volume* volume)
556 	:
557 	fRootBlock(0),
558 	fRootLevel(0),
559 	fVolume(volume)
560 {
561 	mutex_init(&fIteratorLock, "btrfs b+tree iterator");
562 }
563 
564 
565 BTree::BTree(Volume* volume, btrfs_stream* stream)
566 	:
567 	fRootBlock(0),
568 	fRootLevel(0),
569 	fVolume(volume)
570 {
571 	mutex_init(&fIteratorLock, "btrfs b+tree iterator");
572 }
573 
574 
575 BTree::BTree(Volume* volume, fsblock_t rootBlock)
576 	:
577 	fRootBlock(rootBlock),
578 	fVolume(volume)
579 {
580 	mutex_init(&fIteratorLock, "btrfs b+tree iterator");
581 }
582 
583 
584 BTree::~BTree()
585 {
586 	// if there are any TreeIterators left, we need to stop them
587 	// (can happen when the tree's inode gets deleted while
588 	// traversing the tree - a TreeIterator doesn't lock the inode)
589 	mutex_lock(&fIteratorLock);
590 
591 	SinglyLinkedList<TreeIterator>::Iterator iterator
592 		= fIterators.GetIterator();
593 	while (iterator.HasNext())
594 		iterator.Next()->Stop();
595 	mutex_destroy(&fIteratorLock);
596 }
597 
598 
599 int32
600 btrfs_key::Compare(const btrfs_key& key) const
601 {
602 	if (ObjectID() > key.ObjectID())
603 		return 1;
604 	if (ObjectID() < key.ObjectID())
605 		return -1;
606 	if (Type() > key.Type())
607 		return 1;
608 	if (Type() < key.Type())
609 		return -1;
610 	if (Offset() > key.Offset())
611 		return 1;
612 	if (Offset() < key.Offset())
613 		return -1;
614 	return 0;
615 }
616 
617 
618 /* Traverse from root to fill in the path along way its finding.
619  * Return current slot at leaf if successful.
620  */
621 status_t
622 BTree::Traverse(btree_traversing type, Path* path, const btrfs_key& key)
623 	const
624 {
625 	TRACE("BTree::Traverse() objectid %" B_PRId64 " type %d offset %"
626 		B_PRId64 " \n", key.ObjectID(),	key.Type(), key.Offset());
627 	fsblock_t physicalBlock = fRootBlock;
628 	Node node(fVolume, physicalBlock);
629 	int slot;
630 	status_t status = B_OK;
631 
632 	while (node.Level() != 0) {
633 		TRACE("BTree::Traverse() level %d count %d\n", node.Level(),
634 			node.ItemCount());
635 		status = node.SearchSlot(key, &slot, BTREE_BACKWARD);
636 		if (status != B_OK)
637 			return status;
638 		if (path->SetNode(&node, slot) == NULL)
639 			return B_NO_MEMORY;
640 
641 		TRACE("BTree::Traverse() getting index %" B_PRIu32 "\n", slot);
642 
643 		status = fVolume->FindBlock(node.Index(slot)->LogicalAddress(),
644 				physicalBlock);
645 		if (status != B_OK) {
646 			ERROR("BTree::Traverse() unmapped block %" B_PRId64 "\n",
647 				node.Index(slot)->LogicalAddress());
648 			return status;
649 		}
650 		node.SetTo(physicalBlock);
651 	}
652 
653 	TRACE("BTree::Traverse() dump count %" B_PRId32 "\n", node.ItemCount());
654 	status = node.SearchSlot(key, &slot, type);
655 	if (status != B_OK)
656 		return status;
657 	if (path->SetNode(&node, slot) == NULL)
658 		return B_NO_MEMORY;
659 
660 	TRACE("BTree::Traverse() found %" B_PRIu32 " %" B_PRIu32 "\n",
661 		node.Item(slot)->Offset(), node.Item(slot)->Size());
662 	return slot;
663 }
664 
665 
666 /*!	Searches the key in the tree, and stores the allocated found item in
667 	_value, if successful.
668 	Returns B_OK when the key could be found, B_ENTRY_NOT_FOUND if not.
669 	It can also return other errors to indicate that something went wrong.
670 */
671 status_t
672 BTree::_Find(Path* path, btrfs_key& wanted, void** _value, uint32* _size,
673 	uint32* _offset, btree_traversing type) const
674 {
675 	status_t status = Traverse(type, path, wanted);
676 	if (status < B_OK)
677 		return status;
678 
679 	btrfs_key found;
680 	status = path->GetCurrentEntry(&found, _value, _size, _offset);
681 	if (status != B_OK)
682 		return status;
683 
684 	if (found.Type() != wanted.Type() && wanted.Type() != BTRFS_KEY_TYPE_ANY) {
685 		ERROR("Find() not found wanted: %" B_PRIu64 " %" B_PRIu8 " %"
686 			B_PRIu64 " found: %" B_PRIu64 " %" B_PRIu8 " %" B_PRIu64 "\n",
687 			wanted.ObjectID(), wanted.Type(), wanted.Offset(), found.ObjectID(),
688 			found.Type(), found.Offset());
689 		return B_ENTRY_NOT_FOUND;
690 	}
691 
692 	wanted = found;
693 	return B_OK;
694 }
695 
696 
697 status_t
698 BTree::FindNext(Path* path, btrfs_key& key, void** _value, uint32* _size,
699 	uint32* _offset) const
700 {
701 	return _Find(path, key, _value, _size, _offset, BTREE_FORWARD);
702 }
703 
704 
705 status_t
706 BTree::FindPrevious(Path* path, btrfs_key& key, void** _value, uint32* _size,
707 	uint32* _offset) const
708 {
709 	return _Find(path, key, _value, _size, _offset, BTREE_BACKWARD);
710 }
711 
712 
713 status_t
714 BTree::FindExact(Path* path, btrfs_key& key, void** _value, uint32* _size,
715 	uint32* _offset) const
716 {
717 	return _Find(path, key, _value, _size, _offset, BTREE_EXACT);
718 }
719 
720 
721 /*
722  * Insert "num" of consecutive empty entries start with slot of "startKey"
723  * if successful return the starting slot, otherwise return error code.
724  */
725 status_t
726 BTree::MakeEntries(Transaction& transaction, Path* path,
727 	const btrfs_key& startKey, int num, int length)
728 {
729 	TRACE("BTree::MakeEntries() num %i key (% " B_PRIu64 " %" B_PRIu8 " %"
730 		B_PRIu64 ")\n", num, startKey.ObjectID(), startKey.Type(),
731 		startKey.Offset());
732 
733 	status_t status = Traverse(BTREE_FORWARD, path, startKey);
734 	if (status < B_OK)
735 		return status;
736 
737 	int slot = status;
738 	status = path->InternalCopy(transaction, 1);
739 	if (status != B_OK)
740 		return status;
741 
742 	status = path->CopyOnWrite(transaction, 0, slot, num, length);
743 	if (status == B_DEVICE_FULL) {
744 		// TODO: push data or split node
745 		return status;
746 	}
747 
748 	if (status != B_OK)
749 		return status;
750 	return slot;
751 }
752 
753 
754 /* MakeEntries and then fill in them.
755  */
756 status_t
757 BTree::InsertEntries(Transaction& transaction, Path* path,
758 	btrfs_entry* entries, void** data, int num)
759 {
760 	int totalLength = sizeof(btrfs_entry) * num;
761 	for (int i = 0; i < num; i++)
762 		totalLength += entries[i].Size();
763 
764 	status_t slot = MakeEntries(transaction, path, entries[0].key, num,
765 		totalLength);
766 	if (slot < B_OK)
767 		return slot;
768 
769 	uint32 upperLimit;
770 	if (slot > 0) {
771 		path->GetEntry(slot - 1, NULL, NULL, NULL, &upperLimit);
772 	} else
773 		upperLimit = fVolume->BlockSize() - sizeof(btrfs_header);
774 
775 	TRACE("BTree::InsertEntries() num: %i upper limit %" B_PRIu32 "\n", num,
776 		upperLimit);
777 	for (int i = 0; i < num; i++) {
778 		upperLimit -= entries[i].Size();
779 		entries[i].SetOffset(upperLimit);
780 		path->SetEntry(slot + i, entries[i], data[i]);
781 	}
782 
783 	return B_OK;
784 }
785 
786 
787 /* Like MakeEntries, but here we remove entries instead.
788  * Removed data stored in _data
789  * May merge those functions into one.
790  */
791 status_t
792 BTree::RemoveEntries(Transaction& transaction, Path* path,
793 	const btrfs_key& startKey, void** _data, int num)
794 {
795 	TRACE("BTree::RemoveEntries() num %i key (% " B_PRIu64 " %" B_PRIu8 " %"
796 		B_PRIu64 ")\n", num, startKey.ObjectID(), startKey.Type(),
797 		startKey.Offset());
798 
799 	status_t status = Traverse(BTREE_EXACT, path, startKey);
800 	if (status < B_OK)
801 		return status;
802 
803 	int slot = status;
804 	int length = -sizeof(btrfs_entry) * num;
805 	for (int i = 0; i < num; i++) {
806 		uint32 itemSize;
807 		path->GetEntry(slot + i, NULL, &_data[i], &itemSize);
808 		length -= itemSize;
809 	}
810 
811 	status = path->InternalCopy(transaction, 1);
812 	if (status != B_OK)
813 		return status;
814 
815 	status = path->CopyOnWrite(transaction, 0, slot, num, length);
816 	if (status == B_DIRECTORY_NOT_EMPTY) {
817 		// TODO: merge node or push data
818 	}
819 
820 	return status;
821 }
822 
823 
824 status_t
825 BTree::PreviousLeaf(Path* path) const
826 {
827 	// TODO: use Traverse() ???
828 	int level = 0;
829 	int slot;
830 	Node* node = NULL;
831 	// iterate to the root until satisfy the condition
832 	while (true) {
833 		node = path->GetNode(level, &slot);
834 		if (node == NULL || slot != 0)
835 			break;
836 		level++;
837 	}
838 
839 	// the current leaf is already the left most leaf or
840 	// path was not initialized
841 	if (node == NULL)
842 		return B_ENTRY_NOT_FOUND;
843 
844 	path->Move(level, BTREE_BACKWARD);
845 	fsblock_t physicalBlock;
846 	// change all nodes below this level and slot to the ending
847 	do {
848 		status_t status = fVolume->FindBlock(
849 			node->Index(slot)->LogicalAddress(), physicalBlock);
850 		if (status != B_OK)
851 			return status;
852 
853 		node = path->SetNode(physicalBlock, -1);
854 		if (node == NULL)
855 			return B_NO_MEMORY;
856 		slot = node->ItemCount() - 1;
857 		level--;
858 	} while (level != 0);
859 
860 	return B_OK;
861 }
862 
863 
864 status_t
865 BTree::NextLeaf(Path* path) const
866 {
867 	int level = 0;
868 	int slot;
869 	Node* node = NULL;
870 	// iterate to the root until satisfy the condition
871 	while (true) {
872 		node = path->GetNode(level, &slot);
873 		if (node == NULL || slot < node->ItemCount() - 1)
874 			break;
875 		level++;
876 	}
877 
878 	// the current leaf is already the right most leaf or
879 	// path was not initialized
880 	if (node == NULL)
881 		return B_ENTRY_NOT_FOUND;
882 
883 	path->Move(level, BTREE_FORWARD);
884 	fsblock_t physicalBlock;
885 	// change all nodes below this level and slot to the beginning
886 	do {
887 		status_t status = fVolume->FindBlock(
888 			node->Index(slot)->LogicalAddress(), physicalBlock);
889 		if (status != B_OK)
890 			return status;
891 
892 		node = path->SetNode(physicalBlock, 0);
893 		if (node == NULL)
894 			return B_NO_MEMORY;
895 		slot = 0;
896 		level--;
897 	} while (level != 0);
898 
899 	return B_OK;
900 }
901 
902 
903 /* Set root infomation, to use this function root must be valid and
904  * exists on disk.
905  */
906 status_t
907 BTree::SetRoot(off_t logical, fsblock_t* block)
908 {
909 	if (block != NULL) {
910 		fRootBlock = *block;
911 	} else {
912 		if (fVolume->FindBlock(logical, fRootBlock) != B_OK) {
913 			ERROR("SetRoot() unmapped block %" B_PRId64 " %" B_PRId64 "\n",
914 				logical, fRootBlock);
915 			return B_ERROR;
916 		}
917 	}
918 
919 	btrfs_header header;
920 	read_pos(fVolume->Device(), fRootBlock * fVolume->BlockSize(), &header,
921 		sizeof(btrfs_header));
922 	fRootLevel = header.Level();
923 	fLogicalRoot = header.LogicalAddress();
924 	return B_OK;
925 }
926 
927 
928 void
929 BTree::SetRoot(Node* root)
930 {
931 	fRootBlock = root->BlockNum();
932 	fLogicalRoot = root->LogicalAddress();
933 	fRootLevel = root->Level();
934 }
935 
936 
937 void
938 BTree::_AddIterator(TreeIterator* iterator)
939 {
940 	MutexLocker _(fIteratorLock);
941 	fIterators.Add(iterator);
942 }
943 
944 
945 void
946 BTree::_RemoveIterator(TreeIterator* iterator)
947 {
948 	MutexLocker _(fIteratorLock);
949 	fIterators.Remove(iterator);
950 }
951 
952 
953 //	#pragma mark -
954 
955 
956 TreeIterator::TreeIterator(BTree* tree, const btrfs_key& key)
957 	:
958 	fTree(tree),
959 	fKey(key),
960 	fIteratorStatus(B_NO_INIT)
961 {
962 	tree->_AddIterator(this);
963 	fPath = new(std::nothrow) BTree::Path(tree);
964 	if (fPath == NULL)
965 		fIteratorStatus = B_NO_MEMORY;
966 }
967 
968 
969 TreeIterator::~TreeIterator()
970 {
971 	if (fTree)
972 		fTree->_RemoveIterator(this);
973 
974 	delete fPath;
975 	fPath = NULL;
976 }
977 
978 
979 void
980 TreeIterator::Rewind(bool inverse)
981 {
982 	if (inverse)
983 		fKey.SetOffset(BTREE_END);
984 	else
985 		fKey.SetOffset(BTREE_BEGIN);
986 	fIteratorStatus = B_NO_INIT;
987 }
988 
989 
990 /*!	Iterates through the tree in the specified direction.
991 */
992 status_t
993 TreeIterator::_Traverse(btree_traversing direction)
994 {
995 	status_t status = fTree->Traverse(direction, fPath, fKey);
996 	if (status < B_OK) {
997 		ERROR("TreeIterator::Traverse() Find failed\n");
998 		return status;
999 	}
1000 
1001 	return (fIteratorStatus = B_OK);
1002 }
1003 
1004 
1005 // Like GetEntry in BTree::Path but here we check the type and moving.
1006 status_t
1007 TreeIterator::_GetEntry(btree_traversing type, void** _value, uint32* _size,
1008 	uint32* _offset)
1009 {
1010 	status_t status = B_OK;
1011 	if (fIteratorStatus == B_NO_INIT) {
1012 		status = _Traverse(type);
1013 		if (status != B_OK)
1014 			return status;
1015 		type = BTREE_EXACT;
1016 	}
1017 
1018 	if (fIteratorStatus != B_OK)
1019 		return fIteratorStatus;
1020 
1021 	int move = fPath->Move(0, type);
1022 	if (move > 0)
1023 		status = fTree->NextLeaf(fPath);
1024 	else if (move < 0)
1025 		status = fTree->PreviousLeaf(fPath);
1026 	if (status != B_OK)
1027 		return status;
1028 
1029 	btrfs_key found;
1030 	status = fPath->GetCurrentEntry(&found, _value, _size, _offset);
1031 	if (status != B_OK)
1032 		return status;
1033 
1034 	fKey.SetObjectID(found.ObjectID());
1035 	fKey.SetOffset(found.Offset());
1036 	if (fKey.Type() != found.Type() && fKey.Type() != BTRFS_KEY_TYPE_ANY)
1037 		return B_ENTRY_NOT_FOUND;
1038 
1039 	return B_OK;
1040 }
1041 
1042 
1043 /*!	just sets the current key in the iterator.
1044 */
1045 status_t
1046 TreeIterator::Find(const btrfs_key& key)
1047 {
1048 	if (fIteratorStatus == B_INTERRUPTED)
1049 		return fIteratorStatus;
1050 
1051 	fKey = key;
1052 	fIteratorStatus = B_NO_INIT;
1053 	return B_OK;
1054 }
1055 
1056 
1057 void
1058 TreeIterator::Stop()
1059 {
1060 	fTree = NULL;
1061 	fIteratorStatus = B_INTERRUPTED;
1062 }
1063