xref: /haiku/src/add-ons/kernel/file_systems/bfs/Inode.cpp (revision 2807c36668a1730dd59bc39de65e0b8f88cd5d0d)
1 /* Inode - inode access functions
2 **
3 ** Initial version by Axel Dörfler, axeld@pinc-software.de
4 ** This file may be used under the terms of the OpenBeOS License.
5 */
6 
7 
8 #include "Debug.h"
9 #include "cpp.h"
10 #include "Inode.h"
11 #include "BPlusTree.h"
12 #include "Stream.h"
13 #include "Index.h"
14 
15 #include <string.h>
16 
17 
18 class InodeAllocator {
19 	public:
20 		InodeAllocator(Transaction *transaction);
21 		~InodeAllocator();
22 
23 		status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode);
24 		status_t CreateTree();
25 		status_t Keep();
26 
27 	private:
28 		Transaction *fTransaction;
29 		block_run fRun;
30 		Inode *fInode;
31 };
32 
33 
34 InodeAllocator::InodeAllocator(Transaction *transaction)
35 	:
36 	fTransaction(transaction),
37 	fInode(NULL)
38 {
39 }
40 
41 
42 InodeAllocator::~InodeAllocator()
43 {
44 	if (fTransaction != NULL) {
45 		if (fInode != NULL) {
46 			fInode->Node()->flags &= ~INODE_IN_USE;
47 			fInode->Free(fTransaction);
48 		} else
49 			fTransaction->GetVolume()->Free(fTransaction, fRun);
50 	}
51 
52 	delete fInode;
53 }
54 
55 
56 status_t
57 InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode)
58 {
59 	Volume *volume = fTransaction->GetVolume();
60 
61 	status_t status = volume->AllocateForInode(fTransaction, parentRun, mode, fRun);
62 	if (status < B_OK) {
63 		// don't free the space in the destructor, because
64 		// the allocation failed
65 		fTransaction = NULL;
66 		RETURN_ERROR(status);
67 	}
68 
69 	run = fRun;
70 	fInode = new Inode(volume, volume->ToVnode(run), true);
71 	if (fInode == NULL)
72 		RETURN_ERROR(B_NO_MEMORY);
73 
74 	// initialize the on-disk bfs_inode structure
75 
76 	bfs_inode *node = fInode->Node();
77 
78 	node->magic1 = INODE_MAGIC1;
79 	node->inode_num = run;
80 	node->mode = mode;
81 	node->flags = INODE_IN_USE | INODE_NOT_READY;
82 		// INODE_NOT_READY prevents the inode from being opened - it is
83 		// cleared in InodeAllocator::Keep()
84 
85 	node->create_time = (bigtime_t)time(NULL) << INODE_TIME_SHIFT;
86 	node->last_modified_time = node->create_time | (volume->GetUniqueID() & INODE_TIME_MASK);
87 		// we use Volume::GetUniqueID() to avoid having too many duplicates in the
88 		// last_modified index
89 
90 	node->inode_size = volume->InodeSize();
91 
92 	*_inode = fInode;
93 	return B_OK;
94 }
95 
96 
97 status_t
98 InodeAllocator::CreateTree()
99 {
100 	Volume *volume = fTransaction->GetVolume();
101 
102 	// force S_STR_INDEX to be set, if no type is set
103 	if ((fInode->Mode() & S_INDEX_TYPES) == 0)
104 		fInode->Node()->mode |= S_STR_INDEX;
105 
106 	BPlusTree *tree = fInode->fTree = new BPlusTree(fTransaction, fInode);
107 	if (tree == NULL || tree->InitCheck() < B_OK)
108 		return B_ERROR;
109 
110 	if (fInode->IsRegularNode()) {
111 		if (tree->Insert(fTransaction, ".", fInode->ID()) < B_OK
112 			|| tree->Insert(fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK)
113 			return B_ERROR;
114 	}
115 	return B_OK;
116 }
117 
118 
119 status_t
120 InodeAllocator::Keep()
121 {
122 	ASSERT(fInode != NULL && fTransaction != NULL);
123 
124 	Volume *volume = fTransaction->GetVolume();
125 	fInode->Node()->flags &= ~INODE_NOT_READY;
126 	status_t status = fInode->WriteBack(fTransaction);
127 
128 	fTransaction = NULL;
129 	fInode = NULL;
130 
131 	return status;
132 }
133 
134 
135 //	#pragma mark -
136 
137 
138 Inode::Inode(Volume *volume, vnode_id id, bool empty, uint8 reenter)
139 	: CachedBlock(volume, volume->VnodeToBlock(id), empty),
140 	fTree(NULL),
141 	fLock("bfs inode")
142 {
143 	Node()->flags &= INODE_PERMANENT_FLAGS;
144 
145 	// these two will help to maintain the indices
146 	fOldSize = Size();
147 	fOldLastModified = LastModified();
148 }
149 
150 
151 Inode::~Inode()
152 {
153 	delete fTree;
154 }
155 
156 
157 status_t
158 Inode::InitCheck()
159 {
160 	if (!Node())
161 		RETURN_ERROR(B_IO_ERROR);
162 
163 	// test inode magic and flags
164 	if (Node()->magic1 != INODE_MAGIC1
165 		|| !(Node()->flags & INODE_IN_USE)
166 		|| Node()->inode_num.length != 1
167 		// matches inode size?
168 		|| Node()->inode_size != fVolume->InodeSize()
169 		// parent resides on disk?
170 		|| Node()->parent.allocation_group > fVolume->AllocationGroups()
171 		|| Node()->parent.allocation_group < 0
172 		|| Node()->parent.start > (1L << fVolume->AllocationGroupShift())
173 		|| Node()->parent.length != 1
174 		// attributes, too?
175 		|| Node()->attributes.allocation_group > fVolume->AllocationGroups()
176 		|| Node()->attributes.allocation_group < 0
177 		|| Node()->attributes.start > (1L << fVolume->AllocationGroupShift())) {
178 		FATAL(("inode at block %Ld corrupt!\n", fBlockNumber));
179 		RETURN_ERROR(B_BAD_DATA);
180 	}
181 
182 	if (Flags() & INODE_NOT_READY)
183 		return B_BUSY;
184 
185 	// ToDo: Add some tests to check the integrity of the other stuff here,
186 	// especially for the data_stream!
187 
188 	// it's more important to know that the inode is corrupt
189 	// so we check for the lock not until here
190 	return fLock.InitCheck();
191 }
192 
193 
194 status_t
195 Inode::CheckPermissions(int accessMode) const
196 {
197 	uid_t user = geteuid();
198 	gid_t group = getegid();
199 
200 	// you never have write access to a read-only volume
201 	if (accessMode & W_OK && fVolume->IsReadOnly())
202 		return B_READ_ONLY_DEVICE;
203 
204 	// root users always have full access (but they can't execute anything)
205 	if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0))
206 		return B_OK;
207 
208 	// shift mode bits, to check directly against accessMode
209 	mode_t mode = Mode();
210 	if (user == Node()->uid)
211 		mode >>= 6;
212 	else if (group == Node()->gid)
213 		mode >>= 3;
214 
215 	if (accessMode & ~(mode & S_IRWXO))
216 		return B_NOT_ALLOWED;
217 
218 	return B_OK;
219 }
220 
221 
222 //	#pragma mark -
223 
224 
225 void
226 Inode::AddIterator(AttributeIterator *iterator)
227 {
228 	if (fSmallDataLock.Lock() < B_OK)
229 		return;
230 
231 	fIterators.Add(iterator);
232 
233 	fSmallDataLock.Unlock();
234 }
235 
236 
237 void
238 Inode::RemoveIterator(AttributeIterator *iterator)
239 {
240 	if (fSmallDataLock.Lock() < B_OK)
241 		return;
242 
243 	fIterators.Remove(iterator);
244 
245 	fSmallDataLock.Unlock();
246 }
247 
248 
249 /**	Tries to free up "bytes" space in the small_data section by moving
250  *	attributes to real files. Used for system attributes like the name.
251  *	You need to hold the fSmallDataLock when you call this method
252  */
253 
254 status_t
255 Inode::MakeSpaceForSmallData(Transaction *transaction, const char *name, int32 bytes)
256 {
257 	ASSERT(fSmallDataLock.IsLocked());
258 
259 	while (bytes > 0) {
260 		small_data *item = Node()->small_data_start, *max = NULL;
261 		int32 index = 0, maxIndex = 0;
262 		for (; !item->IsLast(Node()); item = item->Next(), index++) {
263 			// should not remove those
264 			if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name()))
265 				continue;
266 
267 			if (max == NULL || max->Size() < item->Size()) {
268 				maxIndex = index;
269 				max = item;
270 			}
271 
272 			// remove the first one large enough to free the needed amount of bytes
273 			if (bytes < item->Size())
274 				break;
275 		}
276 
277 		if (item->IsLast(Node()) || item->Size() < bytes)
278 			return B_ERROR;
279 
280 		bytes -= max->Size();
281 
282 		// Move the attribute to a real attribute file
283 		// Luckily, this doesn't cause any index updates
284 
285 		Inode *attribute;
286 		status_t status = CreateAttribute(transaction, item->Name(), item->type, &attribute);
287 		if (status < B_OK)
288 			RETURN_ERROR(status);
289 
290 		size_t length = item->data_size;
291 		status = attribute->WriteAt(transaction, 0, item->Data(), &length);
292 
293 		ReleaseAttribute(attribute);
294 
295 		if (status < B_OK) {
296 			Vnode vnode(fVolume,Attributes());
297 			Inode *attributes;
298 			if (vnode.Get(&attributes) < B_OK
299 				|| attributes->Remove(transaction, name) < B_OK) {
300 				FATAL(("Could not remove newly created attribute!\n"));
301 			}
302 
303 			RETURN_ERROR(status);
304 		}
305 
306 		RemoveSmallData(max, maxIndex);
307 	}
308 	return B_OK;
309 }
310 
311 
312 /**	Private function which removes the given attribute from the small_data
313  *	section.
314  *	You need to hold the fSmallDataLock when you call this method
315  */
316 
317 status_t
318 Inode::RemoveSmallData(small_data *item, int32 index)
319 {
320 	ASSERT(fSmallDataLock.IsLocked());
321 
322 	small_data *next = item->Next();
323 	if (!next->IsLast(Node())) {
324 		// find the last attribute
325 		small_data *last = next;
326 		while (!last->IsLast(Node()))
327 			last = last->Next();
328 
329 		int32 size = (uint8 *)last - (uint8 *)next;
330 		if (size < 0 || size > (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)next)
331 			return B_BAD_DATA;
332 
333 		memmove(item, next, size);
334 
335 		// Move the "last" one to its new location and
336 		// correctly terminate the small_data section
337 		last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item));
338 		memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last);
339 	} else
340 		memset(item, 0, item->Size());
341 
342 	// update all current iterators
343 	AttributeIterator *iterator = NULL;
344 	while ((iterator = fIterators.Next(iterator)) != NULL)
345 		iterator->Update(index, -1);
346 
347 	return B_OK;
348 }
349 
350 
351 /**	Removes the given attribute from the small_data section.
352  *	Note that you need to write back the inode yourself after having called
353  *	that method.
354  */
355 
356 status_t
357 Inode::RemoveSmallData(Transaction *transaction, const char *name)
358 {
359 	if (name == NULL)
360 		return B_BAD_VALUE;
361 
362 	SimpleLocker locker(fSmallDataLock);
363 
364 	// search for the small_data item
365 
366 	small_data *item = Node()->small_data_start;
367 	int32 index = 0;
368 	while (!item->IsLast(Node()) && strcmp(item->Name(), name)) {
369 		item = item->Next();
370 		index++;
371 	}
372 
373 	if (item->IsLast(Node()))
374 		return B_ENTRY_NOT_FOUND;
375 
376 	return RemoveSmallData(item, index);
377 }
378 
379 
380 /**	Try to place the given attribute in the small_data section - if the
381  *	new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
382  *	In that case, the attribute should be written to a real attribute file;
383  *	if the attribute was already part of the small_data section, but the new
384  *	one wouldn't fit, the old one is automatically removed from the small_data
385  *	section.
386  *	Note that you need to write back the inode yourself after having called that
387  *	method - it's a bad API decision that it needs a transaction but enforces you
388  *	to write back the inode all by yourself, but it's just more efficient in most
389  *	cases...
390  */
391 
392 status_t
393 Inode::AddSmallData(Transaction *transaction, const char *name, uint32 type,
394 	const uint8 *data, size_t length, bool force)
395 {
396 	if (name == NULL || data == NULL || type == 0)
397 		return B_BAD_VALUE;
398 
399 	// reject any requests that can't fit into the small_data section
400 	uint32 nameLength = strlen(name);
401 	uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1;
402 	if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode))
403 		return B_DEVICE_FULL;
404 
405 	SimpleLocker locker(fSmallDataLock);
406 
407 	small_data *item = Node()->small_data_start;
408 	int32 index = 0;
409 	while (!item->IsLast(Node()) && strcmp(item->Name(), name)) {
410 		item = item->Next();
411 		index++;
412 	}
413 
414 	// is the attribute already in the small_data section?
415 	// then just replace the data part of that one
416 	if (!item->IsLast(Node())) {
417 		// find last attribute
418 		small_data *last = item;
419 		while (!last->IsLast(Node()))
420 			last = last->Next();
421 
422 		// try to change the attributes value
423 		if (item->data_size > length
424 			|| force
425 			|| ((uint8 *)last + length - item->data_size) <= ((uint8 *)Node() + fVolume->InodeSize())) {
426 			// make room for the new attribute if needed (and we are forced to do so)
427 			if (force
428 				&& ((uint8 *)last + length - item->data_size) > ((uint8 *)Node() + fVolume->InodeSize())) {
429 				// We also take the free space at the end of the small_data section
430 				// into account, and request only what's really needed
431 				uint32 needed = length - item->data_size -
432 						(uint32)((uint8 *)Node() + fVolume->InodeSize() - (uint8 *)last);
433 
434 				if (MakeSpaceForSmallData(transaction, name, needed) < B_OK)
435 					return B_ERROR;
436 
437 				// reset our pointers
438 				item = Node()->small_data_start;
439 				index = 0;
440 				while (!item->IsLast(Node()) && strcmp(item->Name(), name)) {
441 					item = item->Next();
442 					index++;
443 				}
444 
445 				last = item;
446 				while (!last->IsLast(Node()))
447 					last = last->Next();
448 			}
449 
450 			// move the attributes after the current one
451 			small_data *next = item->Next();
452 			if (!next->IsLast(Node()))
453 				memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next);
454 
455 			// Move the "last" one to its new location and
456 			// correctly terminate the small_data section
457 			last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded)));
458 			if ((uint8 *)last < (uint8 *)Node() + fVolume->BlockSize())
459 				memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last);
460 
461 			item->type = type;
462 			item->data_size = length;
463 			memcpy(item->Data(), data, length);
464 			item->Data()[length] = '\0';
465 
466 			return B_OK;
467 		}
468 
469 		// Could not replace the old attribute, so remove it to let
470 		// let the calling function create an attribute file for it
471 		if (RemoveSmallData(item, index) < B_OK)
472 			return B_ERROR;
473 
474 		return B_DEVICE_FULL;
475 	}
476 
477 	// try to add the new attribute!
478 
479 	if ((uint8 *)item + spaceNeeded > (uint8 *)Node() + fVolume->InodeSize()) {
480 		// there is not enough space for it!
481 		if (!force)
482 			return B_DEVICE_FULL;
483 
484 		// make room for the new attribute
485 		if (MakeSpaceForSmallData(transaction, name, spaceNeeded) < B_OK)
486 			return B_ERROR;
487 
488 		// get new last item!
489 		item = Node()->small_data_start;
490 		index = 0;
491 		while (!item->IsLast(Node())) {
492 			item = item->Next();
493 			index++;
494 		}
495 	}
496 
497 	memset(item, 0, spaceNeeded);
498 	item->type = type;
499 	item->name_size = nameLength;
500 	item->data_size = length;
501 	strcpy(item->Name(), name);
502 	memcpy(item->Data(), data, length);
503 
504 	// correctly terminate the small_data section
505 	item = item->Next();
506 	if (!item->IsLast(Node()))
507 		memset(item,0,(uint8 *)Node() + fVolume->InodeSize() - (uint8 *)item);
508 
509 	// update all current iterators
510 	AttributeIterator *iterator = NULL;
511 	while ((iterator = fIterators.Next(iterator)) != NULL)
512 		iterator->Update(index,1);
513 
514 	return B_OK;
515 }
516 
517 
518 /**	Iterates through the small_data section of an inode.
519  *	To start at the beginning of this section, you let smallData
520  *	point to NULL, like:
521  *		small_data *data = NULL;
522  *		while (inode->GetNextSmallData(&data) { ... }
523  *
524  *	This function is reentrant and doesn't allocate any memory;
525  *	you can safely stop calling it at any point (you don't need
526  *	to iterate through the whole list).
527  *	You need to hold the fSmallDataLock when you call this method
528  */
529 
530 status_t
531 Inode::GetNextSmallData(small_data **_smallData) const
532 {
533 	if (!Node())
534 		RETURN_ERROR(B_ERROR);
535 
536 	ASSERT(fSmallDataLock.IsLocked());
537 
538 	small_data *data = *_smallData;
539 
540 	// begin from the start?
541 	if (data == NULL)
542 		data = Node()->small_data_start;
543 	else
544 		data = data->Next();
545 
546 	// is already last item?
547 	if (data->IsLast(Node()))
548 		return B_ENTRY_NOT_FOUND;
549 
550 	*_smallData = data;
551 
552 	return B_OK;
553 }
554 
555 
556 /**	Finds the attribute "name" in the small data section, and
557  *	returns a pointer to it (or NULL if it doesn't exist).
558  *	You need to hold the fSmallDataLock when you call this method
559  */
560 
561 small_data *
562 Inode::FindSmallData(const char *name) const
563 {
564 	ASSERT(fSmallDataLock.IsLocked());
565 
566 	small_data *smallData = NULL;
567 	while (GetNextSmallData(&smallData) == B_OK) {
568 		if (!strcmp(smallData->Name(), name))
569 			return smallData;
570 	}
571 	return NULL;
572 }
573 
574 
575 /** Returns a pointer to the node's name if present in the small data
576  *	section, NULL otherwise.
577  *	You need to hold the fSmallDataLock when you call this method
578  */
579 
580 const char *
581 Inode::Name() const
582 {
583 	ASSERT(fSmallDataLock.IsLocked());
584 
585 	small_data *smallData = NULL;
586 	while (GetNextSmallData(&smallData) == B_OK) {
587 		if (*smallData->Name() == FILE_NAME_NAME && smallData->name_size == FILE_NAME_NAME_LENGTH)
588 			return (const char *)smallData->Data();
589 	}
590 	return NULL;
591 }
592 
593 
594 /** Copies the node's name into the provided buffer.
595  *	The buffer must be B_FILE_NAME_LENGTH bytes large.
596  */
597 
598 status_t
599 Inode::GetName(char *buffer) const
600 {
601 	SimpleLocker locker(fSmallDataLock);
602 
603 	const char *name = Name();
604 	if (name == NULL)
605 		return B_ENTRY_NOT_FOUND;
606 
607 	strlcpy(buffer, name, B_FILE_NAME_LENGTH);
608 	return B_OK;
609 }
610 
611 
612 /**	Changes or set the name of a file: in the inode small_data section only, it
613  *	doesn't change it in the parent directory's b+tree.
614  *	Note that you need to write back the inode yourself after having called
615  *	that method. It suffers from the same API decision as AddSmallData() does
616  *	(and for the same reason).
617  */
618 
619 status_t
620 Inode::SetName(Transaction *transaction, const char *name)
621 {
622 	if (name == NULL || *name == '\0')
623 		return B_BAD_VALUE;
624 
625 	const char nameTag[2] = {FILE_NAME_NAME, 0};
626 
627 	return AddSmallData(transaction, nameTag, FILE_NAME_TYPE, (uint8 *)name, strlen(name), true);
628 }
629 
630 
631 /**	Reads data from the specified attribute.
632  *	This is a high-level attribute function that understands attributes
633  *	in the small_data section as well as real attribute files.
634  */
635 
636 status_t
637 Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length)
638 {
639 	if (pos < 0)
640 		pos = 0;
641 
642 	// search in the small_data section (which has to be locked first)
643 	{
644 		SimpleLocker locker(fSmallDataLock);
645 
646 		small_data *smallData = FindSmallData(name);
647 		if (smallData != NULL) {
648 			size_t length = *_length;
649 			if (pos >= smallData->data_size) {
650 				*_length = 0;
651 				return B_OK;
652 			}
653 			if (length + pos > smallData->data_size)
654 				length = smallData->data_size - pos;
655 
656 			memcpy(buffer, smallData->Data() + pos, length);
657 			*_length = length;
658 			return B_OK;
659 		}
660 	}
661 
662 	// search in the attribute directory
663 	Inode *attribute;
664 	status_t status = GetAttribute(name, &attribute);
665 	if (status == B_OK) {
666 		if (attribute->Lock().Lock() == B_OK) {
667 			status = attribute->ReadAt(pos, (uint8 *)buffer, _length);
668 			attribute->Lock().Unlock();
669 		} else
670 			status = B_ERROR;
671 
672 		ReleaseAttribute(attribute);
673 	}
674 
675 	RETURN_ERROR(status);
676 }
677 
678 
679 /**	Writes data to the specified attribute.
680  *	This is a high-level attribute function that understands attributes
681  *	in the small_data section as well as real attribute files.
682  */
683 
684 status_t
685 Inode::WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos,
686 	const uint8 *buffer, size_t *_length)
687 {
688 	// needed to maintain the index
689 	uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL;
690 	size_t oldLength = 0;
691 
692 	// ToDo: we actually depend on that the contents of "buffer" are constant
693 	// if they get changed during the write (hey, user programs), we may mess
694 	// up our index trees!
695 
696 	Index index(fVolume);
697 	bool hasIndex = index.SetTo(name) == B_OK;
698 
699 	Inode *attribute = NULL;
700 	status_t status;
701 	if (GetAttribute(name, &attribute) < B_OK) {
702 		// save the old attribute data
703 		if (hasIndex) {
704 			fSmallDataLock.Lock();
705 
706 			small_data *smallData = FindSmallData(name);
707 			if (smallData != NULL) {
708 				oldLength = smallData->data_size;
709 				if (oldLength > BPLUSTREE_MAX_KEY_LENGTH)
710 					oldLength = BPLUSTREE_MAX_KEY_LENGTH;
711 				memcpy(oldData = oldBuffer, smallData->Data(), oldLength);
712 			}
713 			fSmallDataLock.Unlock();
714 		}
715 
716 		// if the attribute doesn't exist yet (as a file), try to put it in the
717 		// small_data section first - if that fails (due to insufficent space),
718 		// create a real attribute file
719 		status = AddSmallData(transaction, name, type, buffer, *_length);
720 		if (status == B_DEVICE_FULL) {
721 			status = CreateAttribute(transaction, name, type, &attribute);
722 			if (status < B_OK)
723 				RETURN_ERROR(status);
724 		} else if (status == B_OK)
725 			status = WriteBack(transaction);
726 	}
727 
728 	if (attribute != NULL) {
729 		if (attribute->Lock().LockWrite() == B_OK) {
730 			// save the old attribute data (if this fails, oldLength will reflect it)
731 			if (hasIndex) {
732 				oldLength = BPLUSTREE_MAX_KEY_LENGTH;
733 				if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK)
734 					oldData = oldBuffer;
735 			}
736 			// ToDo: check if the data fits in the inode now and delete the file if so
737 			status = attribute->WriteAt(transaction, pos, buffer, _length);
738 
739 			attribute->Lock().UnlockWrite();
740 		} else
741 			status = B_ERROR;
742 
743 		ReleaseAttribute(attribute);
744 	}
745 
746 	if (status == B_OK) {
747 		// ToDo: find a better way for that "pos" thing...
748 		// Update index
749 		if (hasIndex && pos == 0) {
750 			// index only the first BPLUSTREE_MAX_KEY_LENGTH bytes
751 			uint16 length = *_length;
752 			if (length > BPLUSTREE_MAX_KEY_LENGTH)
753 				length = BPLUSTREE_MAX_KEY_LENGTH;
754 
755 			index.Update(transaction, name, 0, oldData, oldLength, buffer, length, this);
756 		}
757 	}
758 	return status;
759 }
760 
761 
762 /**	Removes the specified attribute from the inode.
763  *	This is a high-level attribute function that understands attributes
764  *	in the small_data section as well as real attribute files.
765  */
766 
767 status_t
768 Inode::RemoveAttribute(Transaction *transaction, const char *name)
769 {
770 	Index index(fVolume);
771 	bool hasIndex = index.SetTo(name) == B_OK;
772 
773 	// update index for attributes in the small_data section
774 	if (hasIndex) {
775 		fSmallDataLock.Lock();
776 
777 		small_data *smallData = FindSmallData(name);
778 		if (smallData != NULL) {
779 			uint32 length = smallData->data_size;
780 			if (length > BPLUSTREE_MAX_KEY_LENGTH)
781 				length = BPLUSTREE_MAX_KEY_LENGTH;
782 			index.Update(transaction, name, 0, smallData->Data(), length, NULL, 0, this);
783 		}
784 		fSmallDataLock.Unlock();
785 	}
786 
787 	status_t status = RemoveSmallData(transaction, name);
788 	if (status == B_OK) {
789 		status = WriteBack(transaction);
790 	} else if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) {
791 		// remove the attribute file if it exists
792 		Vnode vnode(fVolume, Attributes());
793 		Inode *attributes;
794 		if ((status = vnode.Get(&attributes)) < B_OK)
795 			return status;
796 
797 		// update index
798 		Inode *attribute;
799 		if (hasIndex && GetAttribute(name, &attribute) == B_OK) {
800 			uint8 data[BPLUSTREE_MAX_KEY_LENGTH];
801 			size_t length = BPLUSTREE_MAX_KEY_LENGTH;
802 			if (attribute->ReadAt(0, data, &length) == B_OK)
803 				index.Update(transaction, name, 0, data, length, NULL, 0, this);
804 
805 			ReleaseAttribute(attribute);
806 		}
807 
808 		if ((status = attributes->Remove(transaction, name)) < B_OK)
809 			return status;
810 
811 		if (attributes->IsEmpty()) {
812 			// remove attribute directory (don't fail if that can't be done)
813 			if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) {
814 				// update the inode, so that no one will ever doubt it's deleted :-)
815 				attributes->Node()->flags |= INODE_DELETED;
816 				if (attributes->WriteBack(transaction) == B_OK) {
817 					Attributes().SetTo(0, 0, 0);
818 					WriteBack(transaction);
819 				} else
820 					unremove_vnode(fVolume->ID(), attributes->ID());
821 			}
822 		}
823 	}
824 	return status;
825 }
826 
827 
828 status_t
829 Inode::GetAttribute(const char *name, Inode **attribute)
830 {
831 	// does this inode even have attributes?
832 	if (Attributes().IsZero())
833 		return B_ENTRY_NOT_FOUND;
834 
835 	Vnode vnode(fVolume, Attributes());
836 	Inode *attributes;
837 	if (vnode.Get(&attributes) < B_OK) {
838 		FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name));
839 		return B_ERROR;
840 	}
841 
842 	BPlusTree *tree;
843 	status_t status = attributes->GetTree(&tree);
844 	if (status == B_OK) {
845 		vnode_id id;
846 		if ((status = tree->Find((uint8 *)name, (uint16)strlen(name), &id)) == B_OK) {
847 			Vnode vnode(fVolume, id);
848 			// Check if the attribute is really an attribute
849 			if (vnode.Get(attribute) < B_OK
850 				|| !(*attribute)->IsAttribute())
851 				return B_ERROR;
852 
853 			vnode.Keep();
854 			return B_OK;
855 		}
856 	}
857 	return status;
858 }
859 
860 
861 void
862 Inode::ReleaseAttribute(Inode *attribute)
863 {
864 	if (attribute == NULL)
865 		return;
866 
867 	put_vnode(fVolume->ID(), attribute->ID());
868 }
869 
870 
871 status_t
872 Inode::CreateAttribute(Transaction *transaction, const char *name, uint32 type, Inode **attribute)
873 {
874 	// do we need to create the attribute directory first?
875 	if (Attributes().IsZero()) {
876 		status_t status = Inode::Create(transaction, this, NULL, S_ATTR_DIR | 0666, 0, 0, NULL);
877 		if (status < B_OK)
878 			RETURN_ERROR(status);
879 	}
880 	Vnode vnode(fVolume, Attributes());
881 	Inode *attributes;
882 	if (vnode.Get(&attributes) < B_OK)
883 		return B_ERROR;
884 
885 	// Inode::Create() locks the inode for us
886 	return Inode::Create(transaction, attributes, name, S_ATTR | 0666, 0, type, NULL, attribute);
887 }
888 
889 
890 //	#pragma mark -
891 
892 
893 /**	Gives the caller direct access to the b+tree for a given directory.
894  *	The tree is created on demand, but lasts until the inode is
895  *	deleted.
896  */
897 
898 status_t
899 Inode::GetTree(BPlusTree **tree)
900 {
901 	if (fTree) {
902 		*tree = fTree;
903 		return B_OK;
904 	}
905 
906 	if (IsContainer()) {
907 		fTree = new BPlusTree(this);
908 		if (!fTree)
909 			RETURN_ERROR(B_NO_MEMORY);
910 
911 		*tree = fTree;
912 		status_t status = fTree->InitCheck();
913 		if (status < B_OK) {
914 			delete fTree;
915 			fTree = NULL;
916 		}
917 		RETURN_ERROR(status);
918 	}
919 	RETURN_ERROR(B_BAD_VALUE);
920 }
921 
922 
923 bool
924 Inode::IsEmpty()
925 {
926 	BPlusTree *tree;
927 	status_t status = GetTree(&tree);
928 	if (status < B_OK)
929 		return status;
930 
931 	TreeIterator iterator(tree);
932 
933 	// index and attribute directories are really empty when they are
934 	// empty - directories for standard files always contain ".", and
935 	// "..", so we need to ignore those two
936 
937 	uint32 count = 0;
938 	char name[BPLUSTREE_MAX_KEY_LENGTH];
939 	uint16 length;
940 	vnode_id id;
941 	while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id) == B_OK) {
942 		if (Mode() & (S_ATTR_DIR | S_INDEX_DIR))
943 			return false;
944 
945 		if (++count > 2 || strcmp(".", name) && strcmp("..", name))
946 			return false;
947 	}
948 	return true;
949 }
950 
951 
952 /** Finds the block_run where "pos" is located in the data_stream of
953  *	the inode.
954  *	If successful, "offset" will then be set to the file offset
955  *	of the block_run returned; so "pos - offset" is for the block_run
956  *	what "pos" is for the whole stream.
957  *	The caller has to make sure that "pos" is inside the stream.
958  */
959 
960 status_t
961 Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset)
962 {
963 	// The BPlusTree class will call this function, we'll provide
964 	// standard cached access only from here
965 	return ((Stream<Access::Cached> *)this)->FindBlockRun(pos, run, offset);
966 }
967 
968 
969 status_t
970 Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length)
971 {
972 	// call the right ReadAt() method, depending on the inode flags
973 
974 	if (Flags() & INODE_NO_CACHE)
975 		return ((Stream<Access::Uncached> *)this)->ReadAt(pos, buffer, _length);
976 
977 	if (Flags() & INODE_LOGGED)
978 		return ((Stream<Access::Logged> *)this)->ReadAt(pos, buffer, _length);
979 
980 	return ((Stream<Access::Cached> *)this)->ReadAt(pos, buffer, _length);
981 }
982 
983 
984 status_t
985 Inode::WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *_length)
986 {
987 	// call the right WriteAt() method, depending on the inode flags
988 
989 	// update the last modification time in memory, it will be written
990 	// back to the inode, and the index when the file is closed
991 	// ToDo: should update the internal last modified time only at this point!
992 	Node()->last_modified_time = (bigtime_t)time(NULL) << INODE_TIME_SHIFT;
993 
994 	if (Flags() & INODE_NO_CACHE)
995 		return ((Stream<Access::Uncached> *)this)->WriteAt(transaction, pos, buffer, _length);
996 
997 	if (Flags() & INODE_LOGGED)
998 		return ((Stream<Access::Logged> *)this)->WriteAt(transaction, pos, buffer, _length);
999 
1000 	return ((Stream<Access::Cached> *)this)->WriteAt(transaction, pos, buffer, _length);
1001 }
1002 
1003 
1004 /**	Fills the gap between the old file size and the new file size
1005  *	with zeros.
1006  *	It's more or less a copy of Inode::WriteAt() but it can handle
1007  *	length differences of more than just 4 GB, and it never uses
1008  *	the log, even if the INODE_LOGGED flag is set.
1009  */
1010 
1011 status_t
1012 Inode::FillGapWithZeros(off_t pos, off_t newSize)
1013 {
1014 	// ToDo: we currently do anything here, same as original BFS!
1015 	//if (pos >= newSize)
1016 		return B_OK;
1017 
1018 	block_run run;
1019 	off_t offset;
1020 	if (FindBlockRun(pos, run, offset) < B_OK)
1021 		RETURN_ERROR(B_BAD_VALUE);
1022 
1023 	off_t length = newSize - pos;
1024 	uint32 bytesWritten = 0;
1025 	uint32 blockSize = fVolume->BlockSize();
1026 	uint32 blockShift = fVolume->BlockShift();
1027 	uint8 *block;
1028 
1029 	// the first block_run we write could not be aligned to the block_size boundary
1030 	// (write partial block at the beginning)
1031 
1032 	// pos % block_size == (pos - offset) % block_size, offset % block_size == 0
1033 	if (pos % blockSize != 0) {
1034 		run.start += (pos - offset) / blockSize;
1035 		run.length -= (pos - offset) / blockSize;
1036 
1037 		CachedBlock cached(fVolume,run);
1038 		if ((block = cached.Block()) == NULL)
1039 			RETURN_ERROR(B_BAD_VALUE);
1040 
1041 		bytesWritten = blockSize - (pos % blockSize);
1042 		if (length < bytesWritten)
1043 			bytesWritten = length;
1044 
1045 		memset(block + (pos % blockSize), 0, bytesWritten);
1046 		if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK)
1047 			RETURN_ERROR(B_IO_ERROR);
1048 
1049 		pos += bytesWritten;
1050 
1051 		length -= bytesWritten;
1052 		if (length == 0)
1053 			return B_OK;
1054 
1055 		if (FindBlockRun(pos, run, offset) < B_OK)
1056 			RETURN_ERROR(B_BAD_VALUE);
1057 	}
1058 
1059 	while (length > 0) {
1060 		// offset is the offset to the current pos in the block_run
1061 		run.start += (pos - offset) >> blockShift;
1062 		run.length -= (pos - offset) >> blockShift;
1063 
1064 		CachedBlock cached(fVolume);
1065 		off_t blockNumber = fVolume->ToBlock(run);
1066 		for (int32 i = 0; i < run.length; i++) {
1067 			if ((block = cached.SetTo(blockNumber + i, true)) == NULL)
1068 				RETURN_ERROR(B_IO_ERROR);
1069 
1070 			if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK)
1071 				RETURN_ERROR(B_IO_ERROR);
1072 		}
1073 
1074 		int32 bytes = run.length << blockShift;
1075 		length -= bytes;
1076 		bytesWritten += bytes;
1077 
1078 		// since we don't respect a last partial block, length can be lower
1079 		if (length <= 0)
1080 			break;
1081 
1082 		pos += bytes;
1083 
1084 		if (FindBlockRun(pos, run, offset) < B_OK)
1085 			RETURN_ERROR(B_BAD_VALUE);
1086 	}
1087 	return B_OK;
1088 }
1089 
1090 
1091 /** Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing
1092  *	the indirect and double indirect range uses this method.
1093  *	The allocated block_run is saved in "run"
1094  */
1095 
1096 status_t
1097 Inode::AllocateBlockArray(Transaction *transaction, block_run &run)
1098 {
1099 	if (!run.IsZero())
1100 		return B_BAD_VALUE;
1101 
1102 	status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, run, NUM_ARRAY_BLOCKS);
1103 	if (status < B_OK)
1104 		return status;
1105 
1106 	// make sure those blocks are empty
1107 	CachedBlock cached(fVolume);
1108 	off_t block = fVolume->ToBlock(run);
1109 
1110 	for (int32 i = 0;i < run.length;i++) {
1111 		block_run *runs = (block_run *)cached.SetTo(block + i, true);
1112 		if (runs == NULL)
1113 			return B_IO_ERROR;
1114 
1115 		if (cached.WriteBack(transaction) < B_OK)
1116 			return B_IO_ERROR;
1117 	}
1118 	return B_OK;
1119 }
1120 
1121 
1122 status_t
1123 Inode::GrowStream(Transaction *transaction, off_t size)
1124 {
1125 	data_stream *data = &Node()->data;
1126 
1127 	// is the data stream already large enough to hold the new size?
1128 	// (can be the case with preallocated blocks)
1129 	if (size < data->max_direct_range
1130 		|| size < data->max_indirect_range
1131 		|| size < data->max_double_indirect_range) {
1132 		data->size = size;
1133 		return B_OK;
1134 	}
1135 
1136 	// how many bytes are still needed? (unused ranges are always zero)
1137 	uint16 minimum = 1;
1138 	off_t bytes;
1139 	if (data->size < data->max_double_indirect_range) {
1140 		bytes = size - data->max_double_indirect_range;
1141 		// the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS
1142 		minimum = NUM_ARRAY_BLOCKS;
1143 	} else if (data->size < data->max_indirect_range)
1144 		bytes = size - data->max_indirect_range;
1145 	else if (data->size < data->max_direct_range)
1146 		bytes = size - data->max_direct_range;
1147 	else
1148 		bytes = size - data->size;
1149 
1150 	// do we have enough free blocks on the disk?
1151 	off_t blocks = (bytes + fVolume->BlockSize() - 1) / fVolume->BlockSize();
1152 	if (blocks > fVolume->FreeBlocks())
1153 		return B_DEVICE_FULL;
1154 
1155 	// should we preallocate some blocks (currently, always 64k)?
1156 	off_t blocksNeeded = blocks;
1157 	if (blocks < (65536 >> fVolume->BlockShift()) && fVolume->FreeBlocks() > 128)
1158 		blocks = 65536 >> fVolume->BlockShift();
1159 
1160 	while (blocksNeeded > 0) {
1161 		// the requested blocks do not need to be returned with a
1162 		// single allocation, so we need to iterate until we have
1163 		// enough blocks allocated
1164 		block_run run;
1165 		status_t status = fVolume->Allocate(transaction, this, blocks, run, minimum);
1166 		if (status < B_OK)
1167 			return status;
1168 
1169 		// okay, we have the needed blocks, so just distribute them to the
1170 		// different ranges of the stream (direct, indirect & double indirect)
1171 
1172 		// ToDo: if anything goes wrong here, we probably want to free the
1173 		// blocks that couldn't be distributed into the stream!
1174 
1175 		blocksNeeded -= run.length;
1176 		// don't preallocate if the first allocation was already too small
1177 		blocks = blocksNeeded;
1178 		if (minimum > 1) {
1179 			// make sure that "blocks" is a multiple of minimum
1180 			blocks = (blocks + minimum - 1) & ~(minimum - 1);
1181 		}
1182 
1183 		// Direct block range
1184 
1185 		if (data->size <= data->max_direct_range) {
1186 			// let's try to put them into the direct block range
1187 			int32 free = 0;
1188 			for (;free < NUM_DIRECT_BLOCKS;free++)
1189 				if (data->direct[free].IsZero())
1190 					break;
1191 
1192 			if (free < NUM_DIRECT_BLOCKS) {
1193 				// can we merge the last allocated run with the new one?
1194 				int32 last = free - 1;
1195 				if (free > 0 && data->direct[last].MergeableWith(run))
1196 					data->direct[last].length += run.length;
1197 				else
1198 					data->direct[free] = run;
1199 
1200 				data->max_direct_range += run.length * fVolume->BlockSize();
1201 				data->size = blocksNeeded > 0 ? data->max_direct_range : size;
1202 				continue;
1203 			}
1204 		}
1205 
1206 		// Indirect block range
1207 
1208 		if (data->size <= data->max_indirect_range || !data->max_indirect_range) {
1209 			CachedBlock cached(fVolume);
1210 			block_run *runs = NULL;
1211 			int32 free = 0;
1212 			off_t block;
1213 
1214 			// if there is no indirect block yet, create one
1215 			if (data->indirect.IsZero()) {
1216 				status = AllocateBlockArray(transaction, data->indirect);
1217 				if (status < B_OK)
1218 					return status;
1219 
1220 				data->max_indirect_range = data->max_direct_range;
1221 				// insert the block_run in the first block
1222 				runs = (block_run *)cached.SetTo(data->indirect);
1223 			} else {
1224 				uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run);
1225 				block = fVolume->ToBlock(data->indirect);
1226 
1227 				// search first empty entry
1228 				int32 i = 0;
1229 				for (;i < data->indirect.length;i++) {
1230 					if ((runs = (block_run *)cached.SetTo(block + i)) == NULL)
1231 						return B_IO_ERROR;
1232 
1233 					for (free = 0; free < numberOfRuns; free++)
1234 						if (runs[free].IsZero())
1235 							break;
1236 
1237 					if (free < numberOfRuns)
1238 						break;
1239 				}
1240 				if (i == data->indirect.length)
1241 					runs = NULL;
1242 			}
1243 
1244 			if (runs != NULL) {
1245 				// try to insert the run to the last one - note that this doesn't
1246 				// take block borders into account, so it could be further optimized
1247 				int32 last = free - 1;
1248 				if (free > 0 && runs[last].MergeableWith(run))
1249 					runs[last].length += run.length;
1250 				else
1251 					runs[free] = run;
1252 
1253 				data->max_indirect_range += run.length << fVolume->BlockShift();
1254 				data->size = blocksNeeded > 0 ? data->max_indirect_range : size;
1255 
1256 				cached.WriteBack(transaction);
1257 				continue;
1258 			}
1259 		}
1260 
1261 		// Double indirect block range
1262 
1263 		if (data->size <= data->max_double_indirect_range || !data->max_double_indirect_range) {
1264 			while ((run.length % NUM_ARRAY_BLOCKS) != 0) {
1265 				// The number of allocated blocks isn't a multiple of NUM_ARRAY_BLOCKS,
1266 				// so we have to change this. This can happen the first time the stream
1267 				// grows into the double indirect range.
1268 				// First, free the remaining blocks that don't fit into a multiple
1269 				// of NUM_ARRAY_BLOCKS
1270 				int32 rest = run.length % NUM_ARRAY_BLOCKS;
1271 				run.length -= rest;
1272 
1273 				status = fVolume->Free(transaction, block_run::Run(run.allocation_group,
1274 					run.start + run.length, rest));
1275 				if (status < B_OK)
1276 					return status;
1277 
1278 				blocksNeeded += rest;
1279 				blocks = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) & ~(NUM_ARRAY_BLOCKS - 1);
1280 				minimum = NUM_ARRAY_BLOCKS;
1281 					// we make sure here that we have at minimum NUM_ARRAY_BLOCKS allocated,
1282 					// so if the allocation succeeds, we don't run into an endless loop
1283 
1284 				// Are there any blocks left in the run? If not, allocate a new one
1285 				if (run.length == 0)
1286 					continue;
1287 			}
1288 
1289 			// if there is no double indirect block yet, create one
1290 			if (data->double_indirect.IsZero()) {
1291 				status = AllocateBlockArray(transaction, data->double_indirect);
1292 				if (status < B_OK)
1293 					return status;
1294 
1295 				data->max_double_indirect_range = data->max_indirect_range;
1296 			}
1297 
1298 			// calculate the index where to insert the new blocks
1299 
1300 			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1301 			int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift())
1302 				* runsPerBlock;
1303 			int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift();
1304 			int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT;
1305 
1306 			off_t start = data->max_double_indirect_range - data->max_indirect_range;
1307 			int32 indirectIndex = start / indirectSize;
1308 			int32 index = start / directSize;
1309 
1310 			// distribute the blocks to the array and allocate
1311 			// new array blocks when needed
1312 
1313 			CachedBlock cached(fVolume);
1314 			CachedBlock cachedDirect(fVolume);
1315 			block_run *array = NULL;
1316 			uint32 runLength = run.length;
1317 
1318 			// ToDo: the following code is commented - it could be used to
1319 			// preallocate all needed block arrays to see in advance if the
1320 			// allocation will succeed.
1321 			// I will probably remove it later, because it's no perfect solution
1322 			// either: if the allocation was broken up before (blocksNeeded != 0),
1323 			// it doesn't guarantee anything.
1324 			// And since failing in this case is not that common, it doesn't have
1325 			// to be optimized in that way.
1326 			// Anyway, I wanted to have it in CVS - all those lines, and they will
1327 			// be removed soon :-)
1328 /*
1329 			// allocate new block arrays if needed
1330 
1331 			off_t block = -1;
1332 
1333 			for (int32 i = 0;i < needed;i++) {
1334 				// get the block to insert the run into
1335 				block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock;
1336 				if (cached.BlockNumber() != block)
1337 					array = (block_run *)cached.SetTo(block);
1338 
1339 				if (array == NULL)
1340 					return B_ERROR;
1341 
1342 				status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]);
1343 				if (status < B_OK)
1344 					return status;
1345 			}
1346 */
1347 
1348 			while (run.length) {
1349 				// get the indirect array block
1350 				if (array == NULL) {
1351 					if (cached.Block() != NULL
1352 						&& cached.WriteBack(transaction) < B_OK)
1353 						return B_IO_ERROR;
1354 
1355 					array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) +
1356 						indirectIndex / runsPerBlock);
1357 					if (array == NULL)
1358 						return B_IO_ERROR;
1359 				}
1360 
1361 				do {
1362 					// do we need a new array block?
1363 					if (array[indirectIndex % runsPerBlock].IsZero()) {
1364 						status = AllocateBlockArray(transaction, array[indirectIndex % runsPerBlock]);
1365 						if (status < B_OK)
1366 							return status;
1367 					}
1368 
1369 					block_run *runs = (block_run *)cachedDirect.SetTo(
1370 						fVolume->ToBlock(array[indirectIndex % runsPerBlock])
1371 						+ index / runsPerBlock);
1372 					if (runs == NULL)
1373 						return B_IO_ERROR;
1374 
1375 					do {
1376 						// insert the block_run into the array
1377 						runs[index % runsPerBlock] = run;
1378 						runs[index % runsPerBlock].length = NUM_ARRAY_BLOCKS;
1379 
1380 						// alter the remaining block_run
1381 						run.start += NUM_ARRAY_BLOCKS;
1382 						run.length -= NUM_ARRAY_BLOCKS;
1383 					} while ((++index % runsPerBlock) != 0 && run.length);
1384 
1385 					if (cachedDirect.WriteBack(transaction) < B_OK)
1386 						return B_IO_ERROR;
1387 				} while ((index % runsPerArray) != 0 && run.length);
1388 
1389 				if (++indirectIndex % runsPerBlock == 0) {
1390 					array = NULL;
1391 					index = 0;
1392 				}
1393 			}
1394 
1395 			data->max_double_indirect_range += runLength << fVolume->BlockShift();
1396 			data->size = blocksNeeded > 0 ? data->max_double_indirect_range : size;
1397 
1398 			continue;
1399 		}
1400 
1401 		RETURN_ERROR(EFBIG);
1402 	}
1403 	// update the size of the data stream
1404 	data->size = size;
1405 
1406 	return B_OK;
1407 }
1408 
1409 
1410 status_t
1411 Inode::FreeStaticStreamArray(Transaction *transaction, int32 level, block_run run,
1412 	off_t size, off_t offset, off_t &max)
1413 {
1414 	int32 indirectSize;
1415 	if (level == 0)
1416 		indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift()))
1417 			* (fVolume->BlockSize() / sizeof(block_run));
1418 	else if (level == 1)
1419 		indirectSize = 4 << fVolume->BlockShift();
1420 
1421 	off_t start;
1422 	if (size > offset)
1423 		start = size - offset;
1424 	else
1425 		start = 0;
1426 
1427 	int32 index = start / indirectSize;
1428 	int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1429 
1430 	CachedBlock cached(fVolume);
1431 	off_t blockNumber = fVolume->ToBlock(run);
1432 
1433 	// set the file offset to the current block run
1434 	offset += (off_t)index * indirectSize;
1435 
1436 	for (int32 i = index / runsPerBlock; i < run.length; i++) {
1437 		block_run *array = (block_run *)cached.SetTo(blockNumber + i);
1438 		if (array == NULL)
1439 			RETURN_ERROR(B_ERROR);
1440 
1441 		for (index = index % runsPerBlock; index < runsPerBlock; index++) {
1442 			if (array[index].IsZero()) {
1443 				// we also want to break out of the outer loop
1444 				i = run.length;
1445 				break;
1446 			}
1447 
1448 			status_t status = B_OK;
1449 			if (level == 0)
1450 				status = FreeStaticStreamArray(transaction, 1, array[index], size, offset, max);
1451 			else if (offset >= size)
1452 				status = fVolume->Free(transaction, array[index]);
1453 			else
1454 				max = offset + indirectSize;
1455 
1456 			if (status < B_OK)
1457 				RETURN_ERROR(status);
1458 
1459 			if (offset >= size)
1460 				array[index].SetTo(0, 0, 0);
1461 
1462 			offset += indirectSize;
1463 		}
1464 		index = 0;
1465 
1466 		cached.WriteBack(transaction);
1467 	}
1468 	return B_OK;
1469 }
1470 
1471 
1472 /** Frees all block_runs in the array which come after the specified size.
1473  *	It also trims the last block_run that contain the size.
1474  *	"offset" and "max" are maintained until the last block_run that doesn't
1475  *	have to be freed - after this, the values won't be correct anymore, but
1476  *	will still assure correct function for all subsequent calls.
1477  */
1478 
1479 status_t
1480 Inode::FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength,
1481 	off_t size, off_t &offset, off_t &max)
1482 {
1483 	off_t newOffset = offset;
1484 	uint32 i = 0;
1485 	for (; i < arrayLength; i++, offset = newOffset) {
1486 		if (array[i].IsZero())
1487 			break;
1488 
1489 		newOffset += (off_t)array[i].length << fVolume->BlockShift();
1490 		if (newOffset <= size)
1491 			continue;
1492 
1493 		block_run run = array[i];
1494 
1495 		// determine the block_run to be freed
1496 		if (newOffset > size && offset < size) {
1497 			// free partial block_run (and update the original block_run)
1498 			run.start = array[i].start + ((size - offset) >> fVolume->BlockShift()) + 1;
1499 			array[i].length = run.start - array[i].start;
1500 			run.length -= array[i].length;
1501 
1502 			if (run.length == 0)
1503 				continue;
1504 
1505 			// update maximum range
1506 			max = offset + ((off_t)array[i].length << fVolume->BlockShift());
1507 		} else {
1508 			// free the whole block_run
1509 			array[i].SetTo(0, 0, 0);
1510 
1511 			if (max > offset)
1512 				max = offset;
1513 		}
1514 
1515 		if (fVolume->Free(transaction, run) < B_OK)
1516 			return B_IO_ERROR;
1517 	}
1518 	return B_OK;
1519 }
1520 
1521 
1522 status_t
1523 Inode::ShrinkStream(Transaction *transaction, off_t size)
1524 {
1525 	data_stream *data = &Node()->data;
1526 
1527 	if (data->max_double_indirect_range > size) {
1528 		FreeStaticStreamArray(transaction, 0, data->double_indirect, size,
1529 			data->max_indirect_range, data->max_double_indirect_range);
1530 
1531 		if (size <= data->max_indirect_range) {
1532 			fVolume->Free(transaction, data->double_indirect);
1533 			data->double_indirect.SetTo(0, 0, 0);
1534 			data->max_double_indirect_range = 0;
1535 		}
1536 	}
1537 	if (data->max_indirect_range > size) {
1538 		CachedBlock cached(fVolume);
1539 		off_t block = fVolume->ToBlock(data->indirect);
1540 		off_t offset = data->max_direct_range;
1541 
1542 		for (int32 i = 0; i < data->indirect.length; i++) {
1543 			block_run *array = (block_run *)cached.SetTo(block + i);
1544 			if (array == NULL)
1545 				break;
1546 
1547 			if (FreeStreamArray(transaction, array, fVolume->BlockSize() / sizeof(block_run),
1548 					size, offset, data->max_indirect_range) == B_OK)
1549 				cached.WriteBack(transaction);
1550 		}
1551 		if (data->max_direct_range == data->max_indirect_range) {
1552 			fVolume->Free(transaction, data->indirect);
1553 			data->indirect.SetTo(0, 0, 0);
1554 			data->max_indirect_range = 0;
1555 		}
1556 	}
1557 	if (data->max_direct_range > size) {
1558 		off_t offset = 0;
1559 		FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, size, offset,
1560 			data->max_direct_range);
1561 	}
1562 
1563 	data->size = size;
1564 	return B_OK;
1565 }
1566 
1567 
1568 status_t
1569 Inode::SetFileSize(Transaction *transaction, off_t size)
1570 {
1571 	if (size < 0
1572 		// uncached files can't be resized (Stream<Cache>::WriteAt() specifically
1573 		// denies growing uncached files because of efficiency, so it had to be
1574 		// adapted if this ever changes [which will probably happen in OpenBeOS]).
1575 		|| Flags() & INODE_NO_CACHE)
1576 		return B_BAD_VALUE;
1577 
1578 	off_t oldSize = Node()->data.size;
1579 
1580 	if (size == oldSize)
1581 		return B_OK;
1582 
1583 	// should the data stream grow or shrink?
1584 	status_t status;
1585 	if (size > oldSize) {
1586 		status = GrowStream(transaction, size);
1587 		if (status < B_OK) {
1588 			// if the growing of the stream fails, the whole operation
1589 			// fails, so we should shrink the stream to its former size
1590 			ShrinkStream(transaction, oldSize);
1591 		}
1592 	}
1593 	else
1594 		status = ShrinkStream(transaction, size);
1595 
1596 	if (status < B_OK)
1597 		return status;
1598 
1599 	return WriteBack(transaction);
1600 }
1601 
1602 
1603 status_t
1604 Inode::Append(Transaction *transaction, off_t bytes)
1605 {
1606 	return SetFileSize(transaction, Size() + bytes);
1607 }
1608 
1609 
1610 status_t
1611 Inode::Trim(Transaction *transaction)
1612 {
1613 	status_t status = ShrinkStream(transaction, Size());
1614 	if (status < B_OK)
1615 		return status;
1616 
1617 	return WriteBack(transaction);
1618 }
1619 
1620 
1621 status_t
1622 Inode::Free(Transaction *transaction)
1623 {
1624 	// Perhaps there should be an implementation of Inode::ShrinkStream() that
1625 	// just frees the data_stream, but doesn't change the inode (since it is
1626 	// freed anyway) - that would make an undelete command possible
1627 	status_t status = SetFileSize(transaction, 0);
1628 	if (status < B_OK)
1629 		return status;
1630 
1631 	// Free all attributes, and remove their indices
1632 	{
1633 		// We have to limit the scope of AttributeIterator, so that its
1634 		// destructor is not called after the inode is deleted
1635 		AttributeIterator iterator(this);
1636 
1637 		char name[B_FILE_NAME_LENGTH];
1638 		uint32 type;
1639 		size_t length;
1640 		vnode_id id;
1641 		while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK)
1642 			RemoveAttribute(transaction, name);
1643 	}
1644 
1645 	if (WriteBack(transaction) < B_OK)
1646 		return B_ERROR;
1647 
1648 	return fVolume->Free(transaction, BlockRun());
1649 }
1650 
1651 
1652 status_t
1653 Inode::Sync()
1654 {
1655 	// We may also want to flush the attribute's data stream to
1656 	// disk here... (do we?)
1657 
1658 	data_stream *data = &Node()->data;
1659 	status_t status;
1660 
1661 	// flush direct range
1662 
1663 	for (int32 i = 0;i < NUM_DIRECT_BLOCKS;i++) {
1664 		if (data->direct[i].IsZero())
1665 			return B_OK;
1666 
1667 		status = flush_blocks(fVolume->Device(), fVolume->ToBlock(data->direct[i]),
1668 			data->direct[i].length);
1669 		if (status != B_OK)
1670 			return status;
1671 	}
1672 
1673 	// flush indirect range
1674 
1675 	if (data->max_indirect_range == 0)
1676 		return B_OK;
1677 
1678 	CachedBlock cached(fVolume);
1679 	off_t block = fVolume->ToBlock(data->indirect);
1680 	int32 count = fVolume->BlockSize() / sizeof(block_run);
1681 
1682 	for (int32 j = 0; j < data->indirect.length; j++) {
1683 		block_run *runs = (block_run *)cached.SetTo(block + j);
1684 		if (runs == NULL)
1685 			break;
1686 
1687 		for (int32 i = 0; i < count; i++) {
1688 			if (runs[i].IsZero())
1689 				return B_OK;
1690 
1691 			status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), runs[i].length);
1692 			if (status != B_OK)
1693 				return status;
1694 		}
1695 	}
1696 
1697 	// flush double indirect range
1698 
1699 	if (data->max_double_indirect_range == 0)
1700 		return B_OK;
1701 
1702 	off_t indirectBlock = fVolume->ToBlock(data->double_indirect);
1703 
1704 	for (int32 l = 0; l < data->double_indirect.length; l++) {
1705 		block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l);
1706 		if (indirectRuns == NULL)
1707 			return B_FILE_ERROR;
1708 
1709 		CachedBlock directCached(fVolume);
1710 
1711 		for (int32 k = 0; k < count; k++) {
1712 			if (indirectRuns[k].IsZero())
1713 				return B_OK;
1714 
1715 			block = fVolume->ToBlock(indirectRuns[k]);
1716 			for (int32 j = 0; j < indirectRuns[k].length; j++) {
1717 				block_run *runs = (block_run *)directCached.SetTo(block + j);
1718 				if (runs == NULL)
1719 					return B_FILE_ERROR;
1720 
1721 				for (int32 i = 0; i < count; i++) {
1722 					if (runs[i].IsZero())
1723 						return B_OK;
1724 
1725 					// ToDo: combine single block_runs to bigger ones when
1726 					// they are adjacent
1727 					status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]),
1728 						runs[i].length);
1729 					if (status != B_OK)
1730 						return status;
1731 				}
1732 			}
1733 		}
1734 	}
1735 	return B_OK;
1736 }
1737 
1738 
1739 status_t
1740 Inode::Remove(Transaction *transaction, const char *name, off_t *_id, bool isDirectory)
1741 {
1742 	BPlusTree *tree;
1743 	if (GetTree(&tree) != B_OK)
1744 		RETURN_ERROR(B_BAD_VALUE);
1745 
1746 	// does the file even exists?
1747 	off_t id;
1748 	if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK)
1749 		return B_ENTRY_NOT_FOUND;
1750 
1751 	if (_id)
1752 		*_id = id;
1753 
1754 	Vnode vnode(fVolume, id);
1755 	Inode *inode;
1756 	status_t status = vnode.Get(&inode);
1757 	if (status < B_OK) {
1758 		REPORT_ERROR(status);
1759 		return B_ENTRY_NOT_FOUND;
1760 	}
1761 
1762 	// You can't unlink a mounted or the VM file while it is being used - while
1763 	// this is not really necessary, it copies the behaviour of the original BFS
1764 	// and let you and me feel a little bit safer
1765 	if (inode->Flags() & INODE_NO_CACHE)
1766 		return B_NOT_ALLOWED;
1767 
1768 	// Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
1769 	// bit is set for indices in BFS, not for attribute directories) - but you
1770 	// should really be able to do whatever you want with your indices
1771 	// without having to remove all files first :)
1772 	if (!inode->IsIndex()) {
1773 		// if it's not of the correct type, don't delete it!
1774 		if (inode->IsContainer() != isDirectory)
1775 			return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY;
1776 
1777 		// only delete empty directories
1778 		if (isDirectory && !inode->IsEmpty())
1779 			return B_DIRECTORY_NOT_EMPTY;
1780 	}
1781 
1782 	// remove_vnode() allows the inode to be accessed until the last put_vnode()
1783 	if (remove_vnode(fVolume->ID(), id) != B_OK)
1784 		return B_ERROR;
1785 
1786 	if (tree->Remove(transaction, name, id) < B_OK) {
1787 		unremove_vnode(fVolume->ID(), id);
1788 		RETURN_ERROR(B_ERROR);
1789 	}
1790 
1791 	// update the inode, so that no one will ever doubt it's deleted :-)
1792 	inode->Node()->flags |= INODE_DELETED;
1793 
1794 	// In balance to the Inode::Create() method, the main indices
1795 	// are updated here (name, size, & last_modified)
1796 
1797 	Index index(fVolume);
1798 	if (inode->IsRegularNode()) {
1799 		index.RemoveName(transaction, name, inode);
1800 			// If removing from the index fails, it is not regarded as a
1801 			// fatal error and will not be reported back!
1802 			// Deleted inodes won't be visible in queries anyway.
1803 	}
1804 
1805 	if ((inode->Mode() & (S_FILE | S_SYMLINK)) != 0) {
1806 		if (inode->IsFile())
1807 			index.RemoveSize(transaction, inode);
1808 		index.RemoveLastModified(transaction, inode);
1809 	}
1810 
1811 	if (inode->WriteBack(transaction) < B_OK)
1812 		return B_ERROR;
1813 
1814 	return B_OK;
1815 }
1816 
1817 
1818 /**	Creates the inode with the specified parent directory, and automatically
1819  *	adds the created inode to that parent directory. If an attribute directory
1820  *	is created, it will also automatically added to the parent inode as such.
1821  *	However, the indices root node, and the regular root node won't be added
1822  *	to the super block.
1823  *	It will also create the initial B+tree for the inode if it's a directory
1824  *	of any kind.
1825  *	If the "_id" or "_inode" variable is given and non-NULL to store the inode's
1826  *	ID, the inode stays locked - you have to call put_vnode() if you don't use it
1827  *	anymore.
1828  */
1829 
1830 status_t
1831 Inode::Create(Transaction *transaction, Inode *parent, const char *name, int32 mode,
1832 	int omode, uint32 type, off_t *_id, Inode **_inode)
1833 {
1834 	block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0);
1835 	Volume *volume = transaction->GetVolume();
1836 	BPlusTree *tree = NULL;
1837 
1838 	if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) {
1839 		// check if the file already exists in the directory
1840 		if (parent->GetTree(&tree) != B_OK)
1841 			RETURN_ERROR(B_BAD_VALUE);
1842 
1843 		// does the file already exist?
1844 		off_t offset;
1845 		if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) {
1846 			// return if the file should be a directory or opened in exclusive mode
1847 			if (mode & S_DIRECTORY || omode & O_EXCL)
1848 				return B_FILE_EXISTS;
1849 
1850 			Vnode vnode(volume, offset);
1851 			Inode *inode;
1852 			status_t status = vnode.Get(&inode);
1853 			if (status < B_OK) {
1854 				REPORT_ERROR(status);
1855 				return B_ENTRY_NOT_FOUND;
1856 			}
1857 
1858 			// if it's a directory, bail out!
1859 			if (inode->IsDirectory())
1860 				return B_IS_A_DIRECTORY;
1861 
1862 			// if it is a mounted device or the VM file, we don't allow to delete it
1863 			// while it is open and in use
1864 			if (inode->Flags() & INODE_NO_CACHE)
1865 				return B_NOT_ALLOWED;
1866 
1867 			// if omode & O_TRUNC, truncate the existing file
1868 			if (omode & O_TRUNC) {
1869 				WriteLocked locked(inode->Lock());
1870 
1871 				status_t status = inode->SetFileSize(transaction, 0);
1872 				if (status < B_OK)
1873 					return status;
1874 			}
1875 
1876 			if (_id)
1877 				*_id = offset;
1878 			if (_inode)
1879 				*_inode = inode;
1880 
1881 			// only keep the vnode in memory if the _id or _inode pointer is provided
1882 			if (_id != NULL || _inode != NULL)
1883 				vnode.Keep();
1884 
1885 			return B_OK;
1886 		}
1887 	} else if (parent && (mode & S_ATTR_DIR) == 0)
1888 		return B_BAD_VALUE;
1889 
1890 	// allocate space for the new inode
1891 	InodeAllocator allocator(transaction);
1892 	block_run run;
1893 	Inode *inode;
1894 	status_t status = allocator.New(&parentRun, mode, run, &inode);
1895 	if (status < B_OK)
1896 		return status;
1897 
1898 	// Initialize the parts of the bfs_inode structure that
1899 	// InodeAllocator::New() hasn't touched yet
1900 
1901 	bfs_inode *node = inode->Node();
1902 
1903 	node->parent = parentRun;
1904 
1905 	node->uid = geteuid();
1906 	node->gid = parent ? parent->Node()->gid : getegid();
1907 		// the group ID is inherited from the parent, if available
1908 
1909 	node->type = type;
1910 
1911 	// only add the name to regular files, directories, or symlinks
1912 	// don't add it to attributes, or indices
1913 	if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK)
1914 		return B_ERROR;
1915 
1916 	// Initialize b+tree if it's a directory (and add "." & ".." if it's
1917 	// a standard directory for files - not for attributes or indices)
1918 	if (inode->IsContainer()) {
1919 		status = allocator.CreateTree();
1920 		if (status < B_OK)
1921 			return status;
1922 	}
1923 
1924 	// Add a link to the inode from the parent, depending on its type
1925 	// (the INODE_NOT_READY flag is set, so it is safe to make the inode
1926 	// accessable to the file system here)
1927 	if (tree) {
1928 		status = tree->Insert(transaction, name, inode->ID());
1929 	} else if (parent && (mode & S_ATTR_DIR) != 0) {
1930 		parent->Attributes() = run;
1931 		status = parent->WriteBack(transaction);
1932 	}
1933 
1934 	// Note, we only care if the inode could be made accessable for the
1935 	// two cases above; the root node or the indices root node must
1936 	// handle this case on their own (or other cases where "parent" is
1937 	// NULL)
1938 	if (status < B_OK)
1939 		RETURN_ERROR(status);
1940 
1941 	// Update the main indices (name, size & last_modified)
1942 	// (live queries might want to access us after this)
1943 
1944 	Index index(volume);
1945 	if (inode->IsRegularNode()) {
1946 		// the name index only contains regular files
1947 		status = index.InsertName(transaction, name, inode);
1948 		if (status < B_OK && status != B_BAD_INDEX) {
1949 			// We have to remove the node from the parent at this point,
1950 			// because the InodeAllocator destructor can't handle this
1951 			// case (and if it fails, we can't do anything about it...)
1952 			if (tree)
1953 				tree->Remove(transaction, name, inode->ID());
1954 			else
1955 				parent->Node()->attributes.SetTo(0, 0, 0);
1956 
1957 			return status;
1958 		}
1959 	}
1960 
1961 	inode->UpdateOldLastModified();
1962 
1963 	// The "size" & "last_modified" indices don't contain directories
1964 	if ((mode & (S_FILE | S_SYMLINK)) != 0) {
1965 		// if adding to these indices fails, the inode creation will not be harmed;
1966 		// they are considered less important than the "name" index
1967 		if (inode->IsFile())
1968 			index.InsertSize(transaction, inode);
1969 		index.InsertLastModified(transaction, inode);
1970 	}
1971 
1972 	if (new_vnode(volume->ID(), inode->ID(), inode) != B_OK) {
1973 		// this is a really fatal error, and we can't recover from that
1974 		DIE(("new_vnode() failed for inode!"));
1975 	}
1976 
1977 	// Everything worked well until this point, we have a fully
1978 	// initialized inode, and we want to keep it
1979 	allocator.Keep();
1980 
1981 	if (_id != NULL)
1982 		*_id = inode->ID();
1983 	if (_inode != NULL)
1984 		*_inode = inode;
1985 
1986 	// if either _id or _inode is passed, we will keep the inode locked
1987 	if (_id == NULL && _inode == NULL)
1988 		put_vnode(volume->ID(), inode->ID());
1989 
1990 	return B_OK;
1991 }
1992 
1993 
1994 //	#pragma mark -
1995 
1996 
1997 AttributeIterator::AttributeIterator(Inode *inode)
1998 	:
1999 	fCurrentSmallData(0),
2000 	fInode(inode),
2001 	fAttributes(NULL),
2002 	fIterator(NULL),
2003 	fBuffer(NULL)
2004 {
2005 	inode->AddIterator(this);
2006 }
2007 
2008 
2009 AttributeIterator::~AttributeIterator()
2010 {
2011 	if (fAttributes)
2012 		put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID());
2013 
2014 	delete fIterator;
2015 	fInode->RemoveIterator(this);
2016 }
2017 
2018 
2019 status_t
2020 AttributeIterator::Rewind()
2021 {
2022 	fCurrentSmallData = 0;
2023 
2024 	if (fIterator != NULL)
2025 		fIterator->Rewind();
2026 
2027 	return B_OK;
2028 }
2029 
2030 
2031 status_t
2032 AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, vnode_id *_id)
2033 {
2034 	// read attributes out of the small data section
2035 
2036 	if (fCurrentSmallData >= 0) {
2037 		small_data *item = fInode->Node()->small_data_start;
2038 
2039 		fInode->SmallDataLock().Lock();
2040 
2041 		int32 i = 0;
2042 		for (;;item = item->Next()) {
2043 			if (item->IsLast(fInode->Node()))
2044 				break;
2045 
2046 			if (item->name_size == FILE_NAME_NAME_LENGTH
2047 				&& *item->Name() == FILE_NAME_NAME)
2048 				continue;
2049 
2050 			if (i++ == fCurrentSmallData)
2051 				break;
2052 		}
2053 
2054 		if (!item->IsLast(fInode->Node())) {
2055 			strncpy(name, item->Name(), B_FILE_NAME_LENGTH);
2056 			*_type = item->type;
2057 			*_length = item->name_size;
2058 			*_id = (vnode_id)fCurrentSmallData;
2059 
2060 			fCurrentSmallData = i;
2061 		}
2062 		else {
2063 			// stop traversing the small_data section
2064 			fCurrentSmallData = -1;
2065 		}
2066 
2067 		fInode->SmallDataLock().Unlock();
2068 
2069 		if (fCurrentSmallData != -1)
2070 			return B_OK;
2071 	}
2072 
2073 	// read attributes out of the attribute directory
2074 
2075 	if (fInode->Attributes().IsZero())
2076 		return B_ENTRY_NOT_FOUND;
2077 
2078 	Volume *volume = fInode->GetVolume();
2079 
2080 	// if you haven't yet access to the attributes directory, get it
2081 	if (fAttributes == NULL) {
2082 		if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()),
2083 				(void **)&fAttributes) != 0
2084 			|| fAttributes == NULL) {
2085 			FATAL(("get_vnode() failed in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name));
2086 			return B_ENTRY_NOT_FOUND;
2087 		}
2088 
2089 		BPlusTree *tree;
2090 		if (fAttributes->GetTree(&tree) < B_OK
2091 			|| (fIterator = new TreeIterator(tree)) == NULL) {
2092 			FATAL(("could not get tree in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name));
2093 			return B_ENTRY_NOT_FOUND;
2094 		}
2095 	}
2096 
2097 	block_run run;
2098 	uint16 length;
2099 	vnode_id id;
2100 	status_t status = fIterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id);
2101 	if (status < B_OK)
2102 		return status;
2103 
2104 	Vnode vnode(volume,id);
2105 	Inode *attribute;
2106 	if ((status = vnode.Get(&attribute)) == B_OK) {
2107 		*_type = attribute->Node()->type;
2108 		*_length = attribute->Node()->data.size;
2109 		*_id = id;
2110 	}
2111 
2112 	return status;
2113 }
2114 
2115 
2116 void
2117 AttributeIterator::Update(uint16 index, int8 change)
2118 {
2119 	// fCurrentSmallData points already to the next item
2120 	if (index < fCurrentSmallData)
2121 		fCurrentSmallData += change;
2122 }
2123 
2124