xref: /haiku/src/add-ons/kernel/file_systems/bfs/BlockAllocator.cpp (revision 020cbad9d40235a2c50a81a42d69912a5ff8fbc4)
1 /*
2  * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 //! block bitmap handling and allocation policies
7 
8 
9 #include "Debug.h"
10 #include "BlockAllocator.h"
11 #include "Volume.h"
12 #include "Inode.h"
13 #include "BPlusTree.h"
14 #include "bfs_control.h"
15 
16 #include "system_dependencies.h"
17 
18 
19 // Things the BlockAllocator should do:
20 
21 // - find a range of blocks of a certain size nearby a specific position
22 // - allocating an unsharp range of blocks for pre-allocation
23 // - free blocks
24 // - know how to deal with each allocation, special handling for directories,
25 //   files, symlinks, etc. (type sensitive allocation policies)
26 
27 // What makes the code complicated is the fact that we are not just reading
28 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition
29 // with a block size of 2048 bytes already has a 800kB bitmap, and the size
30 // of partitions will grow even more - so that's not an option.
31 // Instead we are reading in every block when it's used - since an allocation
32 // group can span several blocks in the block bitmap, the AllocationBlock
33 // class is there to make handling those easier.
34 
35 // The current implementation is only slightly optimized and could probably
36 // be improved a lot. Furthermore, the allocation policies used here should
37 // have some real world tests.
38 
39 #if defined(BFS_TRACING) && !defined(BFS_SHELL) && !defined(_BOOT_MODE)
40 namespace BFSBlockTracing {
41 
42 class Allocate : public AbstractTraceEntry {
43 	public:
44 		Allocate(block_run run)
45 			:
46 			fRun(run)
47 		{
48 			Initialized();
49 		}
50 
51 		virtual void AddDump(TraceOutput& out)
52 		{
53 			out.Print("alloc %lu.%u.%u", fRun.AllocationGroup(),
54 				fRun.Start(), fRun.Length());
55 		}
56 
57 	private:
58 		block_run	fRun;
59 };
60 
61 class Free : public AbstractTraceEntry {
62 	public:
63 		Free(block_run run)
64 			:
65 			fRun(run)
66 		{
67 			Initialized();
68 		}
69 
70 		virtual void AddDump(TraceOutput& out)
71 		{
72 			out.Print("free %lu.%u.%u", fRun.AllocationGroup(),
73 				fRun.Start(), fRun.Length());
74 		}
75 
76 	private:
77 		block_run	fRun;
78 };
79 
80 
81 static uint32
82 checksum(const uint8 *data, size_t size)
83 {
84 	const uint32 *data4 = (const uint32 *)data;
85 	uint32 sum = 0;
86 	while (size >= 4) {
87 		sum += *data4;
88 		data4++;
89 		size -= 4;
90 	}
91 	return sum;
92 }
93 
94 
95 class Block : public AbstractTraceEntry {
96 	public:
97 		Block(const char *label, off_t blockNumber, const uint8 *data,
98 				size_t size, uint32 start = 0, uint32 length = 0)
99 			:
100 			fBlock(blockNumber),
101 			fData(data),
102 			fStart(start),
103 			fLength(length)
104 		{
105 			strlcpy(fLabel, label, sizeof(fLabel));
106 			fSum = checksum(data, size);
107 			Initialized();
108 		}
109 
110 		virtual void AddDump(TraceOutput& out)
111 		{
112 			out.Print("%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel,
113 				fBlock, fData, fSum, fStart, fLength);
114 		}
115 
116 	private:
117 		off_t		fBlock;
118 		const uint8	*fData;
119 		uint32		fStart;
120 		uint32		fLength;
121 		uint32		fSum;
122 		char		fLabel[12];
123 };
124 
125 }	// namespace BFSBlockTracing
126 
127 #	define T(x) new(std::nothrow) BFSBlockTracing::x;
128 #else
129 #	define T(x) ;
130 #endif
131 
132 
133 struct check_cookie {
134 	check_cookie() {}
135 
136 	block_run			current;
137 	Inode				*parent;
138 	mode_t				parent_mode;
139 	Stack<block_run>	stack;
140 	TreeIterator		*iterator;
141 };
142 
143 
144 class AllocationBlock : public CachedBlock {
145 	public:
146 		AllocationBlock(Volume *volume);
147 
148 		void Allocate(uint16 start, uint16 numBlocks);
149 		void Free(uint16 start, uint16 numBlocks);
150 		inline bool IsUsed(uint16 block);
151 
152 		status_t SetTo(AllocationGroup &group, uint16 block);
153 		status_t SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block);
154 
155 		uint32 NumBlockBits() const { return fNumBits; }
156 		uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; }
157 		uint8 *Block() const { return (uint8 *)fBlock; }
158 
159 	private:
160 		uint32	fNumBits;
161 #ifdef DEBUG
162 		bool	fWritable;
163 #endif
164 };
165 
166 
167 class AllocationGroup {
168 	public:
169 		AllocationGroup();
170 
171 		void AddFreeRange(int32 start, int32 blocks);
172 		bool IsFull() const { return fFreeBits == 0; }
173 
174 		status_t Allocate(Transaction &transaction, uint16 start, int32 length);
175 		status_t Free(Transaction &transaction, uint16 start, int32 length);
176 
177 		uint32 NumBits() const { return fNumBits; }
178 		uint32 NumBlocks() const { return fNumBlocks; }
179 		int32 Start() const { return fStart; }
180 
181 	private:
182 		friend class BlockAllocator;
183 
184 		uint32	fNumBits;
185 		uint32	fNumBlocks;
186 		int32	fStart;
187 		int32	fFirstFree, fLargest, fLargestFirst;
188 			// ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet!
189 		int32	fFreeBits;
190 };
191 
192 
193 AllocationBlock::AllocationBlock(Volume *volume)
194 	: CachedBlock(volume)
195 {
196 }
197 
198 
199 status_t
200 AllocationBlock::SetTo(AllocationGroup &group, uint16 block)
201 {
202 	// 8 blocks per byte
203 	fNumBits = fVolume->BlockSize() << 3;
204 	// the last group may have less bits than the others
205 	if ((block + 1) * fNumBits > group.NumBits())
206 		fNumBits = group.NumBits() % fNumBits;
207 
208 #ifdef DEBUG
209 	fWritable = false;
210 #endif
211 	return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR;
212 }
213 
214 
215 status_t
216 AllocationBlock::SetToWritable(Transaction &transaction, AllocationGroup &group,
217 	uint16 block)
218 {
219 	// 8 blocks per byte
220 	fNumBits = fVolume->BlockSize() << 3;
221 	// the last group may have less bits in the last block
222 	if ((block + 1) * fNumBits > group.NumBits())
223 		fNumBits = group.NumBits() % fNumBits;
224 
225 #ifdef DEBUG
226 	fWritable = true;
227 #endif
228 	return CachedBlock::SetToWritable(transaction, group.Start() + block)
229 		!= NULL ? B_OK : B_ERROR;
230 }
231 
232 
233 bool
234 AllocationBlock::IsUsed(uint16 block)
235 {
236 	if (block > fNumBits)
237 		return true;
238 
239 	// the block bitmap is accessed in 32-bit blocks
240 	return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32));
241 }
242 
243 
244 void
245 AllocationBlock::Allocate(uint16 start, uint16 numBlocks)
246 {
247 	ASSERT(start < fNumBits);
248 	ASSERT(uint32(start + numBlocks) <= fNumBits);
249 #ifdef DEBUG
250 	ASSERT(fWritable);
251 #endif
252 
253 	if (uint32(start + numBlocks) > fNumBits) {
254 		FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %u)!\n", numBlocks, (unsigned)fNumBits));
255 		DIE(("Allocation::Allocate(): tried to allocate too many blocks"));
256 	}
257 
258 	T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(),
259 		start, numBlocks));
260 
261 	int32 block = start >> 5;
262 
263 	while (numBlocks > 0) {
264 		uint32 mask = 0;
265 		for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--)
266 			mask |= 1UL << i;
267 #ifdef DEBUG
268 		// check for already set blocks
269 		if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) {
270 			FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks));
271 			DEBUGGER(("blocks already set!"));
272 		}
273 #endif
274 		Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask);
275 		start = 0;
276 	}
277 	T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(),
278 		start, numBlocks));
279 }
280 
281 
282 void
283 AllocationBlock::Free(uint16 start, uint16 numBlocks)
284 {
285 	ASSERT(start < fNumBits);
286 	ASSERT(uint32(start + numBlocks) <= fNumBits);
287 #ifdef DEBUG
288 	ASSERT(fWritable);
289 #endif
290 
291 	if (uint32(start + numBlocks) > fNumBits) {
292 		FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %u)!\n", numBlocks, (unsigned)fNumBits));
293 		DIE(("Allocation::Free(): tried to free too many blocks"));
294 	}
295 
296 	int32 block = start >> 5;
297 
298 	while (numBlocks > 0) {
299 		uint32 mask = 0;
300 		for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--)
301 			mask |= 1UL << (i % 32);
302 
303 		Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask);
304 		start = 0;
305 	}
306 }
307 
308 
309 //	#pragma mark -
310 
311 
312 /*!	The allocation groups are created and initialized in
313 	BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap()
314 	respectively.
315 */
316 AllocationGroup::AllocationGroup()
317 	:
318 	fFirstFree(-1),
319 	fLargest(-1),
320 	fLargestFirst(-1),
321 	fFreeBits(0)
322 {
323 }
324 
325 
326 void
327 AllocationGroup::AddFreeRange(int32 start, int32 blocks)
328 {
329 	//D(if (blocks > 512)
330 	//	PRINT(("range of %ld blocks starting at %ld\n",blocks,start)));
331 
332 	if (fFirstFree == -1)
333 		fFirstFree = start;
334 
335 	if (fLargest < blocks) {
336 		fLargest = blocks;
337 		fLargestFirst = start;
338 	}
339 
340 	fFreeBits += blocks;
341 }
342 
343 
344 /*!	Allocates the specified run in the allocation group.
345 	Doesn't check if the run is valid or already allocated partially, nor
346 	does it maintain the free ranges hints or the volume's used blocks count.
347 	It only does the low-level work of allocating some bits in the block bitmap.
348 	Assumes that the block bitmap lock is hold.
349 */
350 status_t
351 AllocationGroup::Allocate(Transaction &transaction, uint16 start, int32 length)
352 {
353 	if (start > fNumBits)
354 		return B_ERROR;
355 
356 	// Update the allocation group info
357 	// ToDo: this info will be incorrect if something goes wrong later
358 	// Note, the fFirstFree block doesn't have to be really free
359 	if (start == fFirstFree)
360 		fFirstFree = start + length;
361 	fFreeBits -= length;
362 
363 	Volume *volume = transaction.GetVolume();
364 
365 	// calculate block in the block bitmap and position within
366 	uint32 bitsPerBlock = volume->BlockSize() << 3;
367 	uint32 block = start / bitsPerBlock;
368 	start = start % bitsPerBlock;
369 
370 	AllocationBlock cached(volume);
371 
372 	while (length > 0) {
373 		if (cached.SetToWritable(transaction, *this, block) < B_OK)
374 			RETURN_ERROR(B_IO_ERROR);
375 
376 		uint32 numBlocks = length;
377 		if (start + numBlocks > cached.NumBlockBits())
378 			numBlocks = cached.NumBlockBits() - start;
379 
380 		cached.Allocate(start, numBlocks);
381 
382 		length -= numBlocks;
383 		start = 0;
384 		block++;
385 	}
386 
387 	return B_OK;
388 }
389 
390 
391 /*!	Frees the specified run in the allocation group.
392 	Doesn't check if the run is valid or was not completely allocated, nor
393 	does it maintain the free ranges hints or the volume's used blocks count.
394 	It only does the low-level work of freeing some bits in the block bitmap.
395 	Assumes that the block bitmap lock is hold.
396 */
397 status_t
398 AllocationGroup::Free(Transaction &transaction, uint16 start, int32 length)
399 {
400 	if (start > fNumBits)
401 		return B_ERROR;
402 
403 	// Update the allocation group info
404 	// ToDo: this info will be incorrect if something goes wrong later
405 	if (fFirstFree > start)
406 		fFirstFree = start;
407 	fFreeBits += length;
408 
409 	Volume *volume = transaction.GetVolume();
410 
411 	// calculate block in the block bitmap and position within
412 	uint32 bitsPerBlock = volume->BlockSize() << 3;
413 	uint32 block = start / bitsPerBlock;
414 	start = start % bitsPerBlock;
415 
416 	AllocationBlock cached(volume);
417 
418 	while (length > 0) {
419 		if (cached.SetToWritable(transaction, *this, block) < B_OK)
420 			RETURN_ERROR(B_IO_ERROR);
421 
422 		T(Block("free-1", block, cached.Block(), volume->BlockSize()));
423 		uint16 freeLength = length;
424 		if (uint32(start + length) > cached.NumBlockBits())
425 			freeLength = cached.NumBlockBits() - start;
426 
427 		cached.Free(start, freeLength);
428 
429 		length -= freeLength;
430 		start = 0;
431 		T(Block("free-2", block, cached.Block(), volume->BlockSize()));
432 		block++;
433 	}
434 	return B_OK;
435 }
436 
437 
438 //	#pragma mark -
439 
440 
441 BlockAllocator::BlockAllocator(Volume *volume)
442 	:
443 	fVolume(volume),
444 	fLock("bfs allocator"),
445 	fGroups(NULL),
446 	fCheckBitmap(NULL)
447 {
448 }
449 
450 
451 BlockAllocator::~BlockAllocator()
452 {
453 	delete[] fGroups;
454 }
455 
456 
457 status_t
458 BlockAllocator::Initialize(bool full)
459 {
460 	if (fLock.InitCheck() < B_OK)
461 		return B_ERROR;
462 
463 	fNumGroups = fVolume->AllocationGroups();
464 	fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup();
465 	fGroups = new AllocationGroup[fNumGroups];
466 	if (fGroups == NULL)
467 		return B_NO_MEMORY;
468 
469 	if (!full)
470 		return B_OK;
471 
472 	fLock.Lock();
473 		// the lock will be released by the initialize() function
474 
475 	thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize,
476 			"bfs block allocator", B_LOW_PRIORITY, (void *)this);
477 	if (id < B_OK)
478 		return _Initialize(this);
479 
480 	return resume_thread(id);
481 }
482 
483 
484 status_t
485 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction)
486 {
487 	status_t status = Initialize(false);
488 	if (status < B_OK)
489 		return status;
490 
491 	uint32 blocks = fBlocksPerGroup;
492 	uint32 numBits = 8 * blocks * fVolume->BlockSize();
493 	uint32 blockShift = fVolume->BlockShift();
494 
495 	uint32 *buffer = (uint32 *)malloc(numBits >> 3);
496 	if (buffer == NULL)
497 		RETURN_ERROR(B_NO_MEMORY);
498 
499 	memset(buffer, 0, numBits >> 3);
500 
501 	off_t offset = 1;
502 
503 	// initialize the AllocationGroup objects and clear the on-disk bitmap
504 
505 	for (int32 i = 0; i < fNumGroups; i++) {
506 		if (write_pos(fVolume->Device(), offset << blockShift, buffer, blocks << blockShift) < B_OK)
507 			return B_ERROR;
508 
509 		// the last allocation group may contain less blocks than the others
510 		if (i == fNumGroups - 1) {
511 			fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits;
512 			fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) >> (blockShift + 3));
513 		} else {
514 			fGroups[i].fNumBits = numBits;
515 			fGroups[i].fNumBlocks = blocks;
516 		}
517 		fGroups[i].fStart = offset;
518 		fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0;
519 		fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits;
520 
521 		offset += blocks;
522 	}
523 	free(buffer);
524 
525 	// reserve the boot block, the log area, and the block bitmap itself
526 	uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length();
527 
528 	if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) {
529 		FATAL(("could not allocate reserved space for block bitmap/log!\n"));
530 		return B_ERROR;
531 	}
532 	fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks);
533 
534 	return B_OK;
535 }
536 
537 
538 status_t
539 BlockAllocator::_Initialize(BlockAllocator *allocator)
540 {
541 	// The lock must already be held at this point
542 
543 	Volume *volume = allocator->fVolume;
544 	uint32 blocks = allocator->fBlocksPerGroup;
545 	uint32 blockShift = volume->BlockShift();
546 	off_t freeBlocks = 0;
547 
548 	uint32 *buffer = (uint32 *)malloc(blocks << blockShift);
549 	if (buffer == NULL) {
550 		allocator->fLock.Unlock();
551 		RETURN_ERROR(B_NO_MEMORY);
552 	}
553 
554 	AllocationGroup *groups = allocator->fGroups;
555 	off_t offset = 1;
556 	uint32 bitsPerGroup = 8 * (blocks << blockShift);
557 	int32 numGroups = allocator->fNumGroups;
558 
559 	for (int32 i = 0; i < numGroups; i++) {
560 		if (read_pos(volume->Device(), offset << blockShift, buffer,
561 				blocks << blockShift) < B_OK)
562 			break;
563 
564 		// the last allocation group may contain less blocks than the others
565 		if (i == numGroups - 1) {
566 			groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup;
567 			groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) >> (blockShift + 3));
568 		} else {
569 			groups[i].fNumBits = bitsPerGroup;
570 			groups[i].fNumBlocks = blocks;
571 		}
572 		groups[i].fStart = offset;
573 
574 		// finds all free ranges in this allocation group
575 		int32 start = -1, range = 0;
576 		int32 numBits = groups[i].fNumBits, bit = 0;
577 		int32 count = (numBits + 31) / 32;
578 
579 		for (int32 k = 0; k < count; k++) {
580 			for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) {
581 				if (buffer[k] & (1UL << j)) {
582 					// block is in use
583 					if (range > 0) {
584 						groups[i].AddFreeRange(start, range);
585 						range = 0;
586 					}
587 				} else if (range++ == 0) {
588 					// block is free, start new free range
589 					start = bit;
590 				}
591 			}
592 		}
593 		if (range)
594 			groups[i].AddFreeRange(start, range);
595 
596 		freeBlocks += groups[i].fFreeBits;
597 
598 		offset += blocks;
599 	}
600 	free(buffer);
601 
602 	// check if block bitmap and log area are reserved
603 	uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length();
604 	if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) {
605 		Transaction transaction(volume, 0);
606 		if (groups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) {
607 			FATAL(("could not allocate reserved space for block bitmap/log!\n"));
608 			volume->Panic();
609 		} else {
610 			FATAL(("space for block bitmap or log area was not reserved!\n"));
611 
612 			transaction.Done();
613 		}
614 	}
615 
616 	off_t usedBlocks = volume->NumBlocks() - freeBlocks;
617 	if (volume->UsedBlocks() != usedBlocks) {
618 		// If the disk in a dirty state at mount time, it's
619 		// normal that the values don't match
620 		INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks));
621 		volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks);
622 	}
623 
624 	allocator->fLock.Unlock();
625 	return B_OK;
626 }
627 
628 
629 status_t
630 BlockAllocator::AllocateBlocks(Transaction &transaction, int32 group,
631 	uint16 start, uint16 maximum, uint16 minimum, block_run &run)
632 {
633 	if (maximum == 0)
634 		return B_BAD_VALUE;
635 
636 	FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum));
637 
638 	AllocationBlock cached(fVolume);
639 	Locker lock(fLock);
640 
641 	// The first scan through all allocation groups will look for the
642 	// wanted maximum of blocks, the second scan will just look to
643 	// satisfy the minimal requirement
644 	uint16 numBlocks = maximum;
645 	uint32 bitsPerFullBlock = fVolume->BlockSize() << 3;
646 
647 	for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) {
648 		group = group % fNumGroups;
649 
650 		if (start >= fGroups[group].NumBits() || fGroups[group].IsFull())
651 			continue;
652 
653 		if (i >= fNumGroups) {
654 			// If the minimum is the same as the maximum, it's not necessary to
655 			// search for in the allocation groups a second time
656 			if (maximum == minimum)
657 				return B_DEVICE_FULL;
658 
659 			numBlocks = minimum;
660 		}
661 
662 		// The wanted maximum is smaller than the largest free block in the
663 		// group or already smaller than the minimum
664 		// ToDo: disabled because it's currently not maintained after the first allocation
665 		//if (numBlocks > fGroups[group].fLargest)
666 		//	continue;
667 
668 		if (start < fGroups[group].fFirstFree)
669 			start = fGroups[group].fFirstFree;
670 
671 		// There may be more than one block per allocation group - and
672 		// we iterate through it to find a place for the allocation.
673 		// (one allocation can't exceed one allocation group)
674 
675 		uint32 block = start / (fVolume->BlockSize() << 3);
676 		int32 range = 0, rangeStart = 0;
677 
678 		for (; block < fGroups[group].NumBlocks(); block++) {
679 			if (cached.SetTo(fGroups[group], block) < B_OK)
680 				RETURN_ERROR(B_ERROR);
681 
682 			T(Block("alloc-in", fGroups[group].Start() + block, cached.Block(), fVolume->BlockSize(), group, rangeStart));
683 
684 			// find a block large enough to hold the allocation
685 			for (uint32 bit = start % bitsPerFullBlock;
686 					bit < cached.NumBlockBits(); bit++) {
687 				if (!cached.IsUsed(bit)) {
688 					if (range == 0) {
689 						// start new range
690 						rangeStart = block * bitsPerFullBlock + bit;
691 					}
692 
693 					// have we found a range large enough to hold numBlocks?
694 					if (++range >= maximum)
695 						break;
696 				} else if (i >= fNumGroups && range >= minimum) {
697 					// we have found a block larger than the required minimum (second pass)
698 					break;
699 				} else {
700 					// end of a range
701 					range = 0;
702 				}
703 			}
704 
705 			// ToDo: we could also remember a "largest free block that fits the minimal
706 			//	requirement" in the group, and use that - this would avoid the need
707 			//	for a second run
708 
709 			// if we found a suitable block, mark the blocks as in use, and write
710 			// the updated block bitmap back to disk
711 			if (range >= numBlocks) {
712 				// adjust allocation size
713 				if (numBlocks < maximum)
714 					numBlocks = range;
715 
716 				if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK)
717 					RETURN_ERROR(B_IO_ERROR);
718 
719 				run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group);
720 				run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart);
721 				run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks);
722 
723 				fVolume->SuperBlock().used_blocks =
724 					HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks);
725 					// We are not writing back the disk's super block - it's
726 					// either done by the journaling code, or when the disk
727 					// is unmounted.
728 					// If the value is not correct at mount time, it will be
729 					// fixed anyway.
730 
731 				T(Allocate(run));
732 				T(Block("alloc-out", block, cached.Block(),
733 					fVolume->BlockSize(), group, rangeStart));
734 				return B_OK;
735 			}
736 
737 			// start from the beginning of the next block
738 			start = 0;
739 		}
740 	}
741 	return B_DEVICE_FULL;
742 }
743 
744 
745 status_t
746 BlockAllocator::AllocateForInode(Transaction &transaction,
747 	const block_run *parent, mode_t type, block_run &run)
748 {
749 	// apply some allocation policies here (AllocateBlocks() will break them
750 	// if necessary) - we will start with those described in Dominic Giampaolo's
751 	// "Practical File System Design", and see how good they work
752 
753 	// files are going in the same allocation group as its parent, sub-directories
754 	// will be inserted 8 allocation groups after the one of the parent
755 	uint16 group = parent->AllocationGroup();
756 	if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY)
757 		group += 8;
758 
759 	return AllocateBlocks(transaction, group, 0, 1, 1, run);
760 }
761 
762 
763 status_t
764 BlockAllocator::Allocate(Transaction &transaction, Inode *inode, off_t numBlocks,
765 	block_run &run, uint16 minimum)
766 {
767 	if (numBlocks <= 0)
768 		return B_ERROR;
769 
770 	// one block_run can't hold more data than there is in one allocation group
771 	if (numBlocks > fGroups[0].NumBits())
772 		numBlocks = fGroups[0].NumBits();
773 
774 	// since block_run.length is uint16, the largest number of blocks that
775 	// can be covered by a block_run is 65535
776 	// ToDo: if we drop compatibility, couldn't we do this any better?
777 	// There are basically two possibilities:
778 	// a) since a length of zero doesn't have any sense, take that for 65536 -
779 	//    but that could cause many problems (bugs) in other areas
780 	// b) reduce the maximum amount of blocks per block_run, so that the remaining
781 	//    number of free blocks can be used in a useful manner (like 4 blocks) -
782 	//    but that would also reduce the maximum file size
783 	// c) have BlockRun::Length() return (length + 1).
784 	if (numBlocks > MAX_BLOCK_RUN_LENGTH)
785 		numBlocks = MAX_BLOCK_RUN_LENGTH;
786 
787 	// apply some allocation policies here (AllocateBlocks() will break them
788 	// if necessary)
789 	uint16 group = inode->BlockRun().AllocationGroup();
790 	uint16 start = 0;
791 
792 	// are there already allocated blocks? (then just try to allocate near the last one)
793 	if (inode->Size() > 0) {
794 		const data_stream &data = inode->Node().data;
795 		// ToDo: we currently don't care for when the data stream
796 		// is already grown into the indirect ranges
797 		if (data.max_double_indirect_range == 0
798 			&& data.max_indirect_range == 0) {
799 			// Since size > 0, there must be a valid block run in this stream
800 			int32 last = 0;
801 			for (; last < NUM_DIRECT_BLOCKS - 1; last++)
802 				if (data.direct[last + 1].IsZero())
803 					break;
804 
805 			group = data.direct[last].AllocationGroup();
806 			start = data.direct[last].Start() + data.direct[last].Length();
807 		}
808 	} else if (inode->IsContainer() || inode->IsSymLink()) {
809 		// directory and symbolic link data will go in the same allocation
810 		// group as the inode is in but after the inode data
811 		start = inode->BlockRun().Start();
812 	} else {
813 		// file data will start in the next allocation group
814 		group = inode->BlockRun().AllocationGroup() + 1;
815 	}
816 
817 	return AllocateBlocks(transaction, group, start, numBlocks, minimum, run);
818 }
819 
820 
821 status_t
822 BlockAllocator::Free(Transaction &transaction, block_run run)
823 {
824 	Locker lock(fLock);
825 
826 	int32 group = run.AllocationGroup();
827 	uint16 start = run.Start();
828 	uint16 length = run.Length();
829 
830 	FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length));
831 	T(Free(run));
832 
833 	// doesn't use Volume::IsValidBlockRun() here because it can check better
834 	// against the group size (the last group may have a different length)
835 	if (group < 0 || group >= fNumGroups
836 		|| start > fGroups[group].NumBits()
837 		|| uint32(start + length) > fGroups[group].NumBits()
838 		|| length == 0) {
839 		FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, start, length));
840 		DEBUGGER(("tried to free invalid block_run"));
841 		return B_BAD_VALUE;
842 	}
843 	// check if someone tries to free reserved areas at the beginning of the drive
844 	if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) {
845 		FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, start, length));
846 		DEBUGGER(("tried to free reserved block"));
847 		return B_BAD_VALUE;
848 	}
849 #ifdef DEBUG
850 	if (CheckBlockRun(run) < B_OK)
851 		return B_BAD_DATA;
852 #endif
853 
854 	if (fGroups[group].Free(transaction, start, length) < B_OK)
855 		RETURN_ERROR(B_IO_ERROR);
856 
857 #ifdef DEBUG
858 	if (CheckBlockRun(run, NULL, NULL, false) < B_OK)
859 		DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n"));
860 #endif
861 
862 	fVolume->SuperBlock().used_blocks =
863 		HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length());
864 	return B_OK;
865 }
866 
867 
868 size_t
869 BlockAllocator::BitmapSize() const
870 {
871 	return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup;
872 }
873 
874 
875 //	#pragma mark -
876 //	Functions to check the validity of the bitmap - they are used from
877 //	the "chkbfs" command
878 
879 
880 bool
881 BlockAllocator::_IsValidCheckControl(check_control *control)
882 {
883 	if (control == NULL
884 		|| control->magic != BFS_IOCTL_CHECK_MAGIC) {
885 		FATAL(("invalid check_control (%p)!\n", control));
886 		return false;
887 	}
888 
889 	return true;
890 }
891 
892 
893 status_t
894 BlockAllocator::StartChecking(check_control *control)
895 {
896 	if (!_IsValidCheckControl(control))
897 		return B_BAD_VALUE;
898 
899 	status_t status = fLock.Lock();
900 	if (status < B_OK)
901 		return status;
902 
903 	size_t size = BitmapSize();
904 	fCheckBitmap = (uint32 *)malloc(size);
905 	if (fCheckBitmap == NULL) {
906 		fLock.Unlock();
907 		return B_NO_MEMORY;
908 	}
909 
910 	check_cookie *cookie = new check_cookie();
911 	if (cookie == NULL) {
912 		free(fCheckBitmap);
913 		fCheckBitmap = NULL;
914 		fLock.Unlock();
915 
916 		return B_NO_MEMORY;
917 	}
918 
919 	// initialize bitmap
920 	memset(fCheckBitmap, 0, size);
921 	for (int32 block = fVolume->Log().Start() + fVolume->Log().Length();
922 			block-- > 0;) {
923 		_SetCheckBitmapAt(block);
924 	}
925 
926 	cookie->stack.Push(fVolume->Root());
927 	cookie->stack.Push(fVolume->Indices());
928 	cookie->iterator = NULL;
929 	control->cookie = cookie;
930 
931 	fCheckCookie = cookie;
932 		// to be able to restore nicely if "chkbfs" exited abnormally
933 
934 	// ToDo: check reserved area in bitmap!
935 
936 	return B_OK;
937 }
938 
939 
940 status_t
941 BlockAllocator::StopChecking(check_control *control)
942 {
943 	check_cookie *cookie;
944 	if (control == NULL)
945 		cookie = fCheckCookie;
946 	else
947 		cookie = (check_cookie *)control->cookie;
948 
949 	if (cookie == NULL)
950 		return B_ERROR;
951 
952 	if (cookie->iterator != NULL) {
953 		delete cookie->iterator;
954 		cookie->iterator = NULL;
955 
956 		// the current directory inode is still locked in memory
957 		put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current));
958 	}
959 
960 	// if CheckNextNode() could completely work through, we can
961 	// fix any damages of the bitmap
962 	if (control != NULL && control->status == B_ENTRY_NOT_FOUND) {
963 		// calculate the number of used blocks in the check bitmap
964 		size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup;
965 		off_t usedBlocks = 0LL;
966 
967 		// ToDo: update the allocation groups used blocks info
968 		for (uint32 i = size >> 2; i-- > 0;) {
969 			uint32 compare = 1;
970 			for (int16 j = 0; j < 32; j++, compare <<= 1) {
971 				if (compare & fCheckBitmap[i])
972 					usedBlocks++;
973 			}
974 		}
975 
976 		control->stats.freed = fVolume->UsedBlocks() - usedBlocks
977 			+ control->stats.missing;
978 		if (control->stats.freed < 0)
979 			control->stats.freed = 0;
980 
981 		// Should we fix errors? Were there any errors we can fix?
982 		if (control->flags & BFS_FIX_BITMAP_ERRORS
983 			&& (control->stats.freed != 0 || control->stats.missing != 0)) {
984 			// if so, write the check bitmap back over the original one,
985 			// and use transactions here to play safe - we even use several
986 			// transactions, so that we don't blow the maximum log size
987 			// on large disks; since we don't need to make this atomic
988 			fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks);
989 
990 			int32 blocksInBitmap = fNumGroups * fBlocksPerGroup;
991 			int32 blockSize = fVolume->BlockSize();
992 
993 			for (int32 i = 0; i < blocksInBitmap; i += 512) {
994 				Transaction transaction(fVolume, 1 + i);
995 
996 				int32 blocksToWrite = 512;
997 				if (blocksToWrite + i > blocksInBitmap)
998 					blocksToWrite = blocksInBitmap - i;
999 
1000 				status_t status = transaction.WriteBlocks(1 + i,
1001 					(uint8 *)fCheckBitmap + i * blockSize, blocksToWrite);
1002 				if (status < B_OK) {
1003 					FATAL(("error writing bitmap: %s\n", strerror(status)));
1004 					break;
1005 				}
1006 				transaction.Done();
1007 			}
1008 		}
1009 	} else
1010 		FATAL(("BlockAllocator::CheckNextNode() didn't run through\n"));
1011 
1012 	free(fCheckBitmap);
1013 	fCheckBitmap = NULL;
1014 	fCheckCookie = NULL;
1015 	delete cookie;
1016 	fLock.Unlock();
1017 
1018 	return B_OK;
1019 }
1020 
1021 
1022 status_t
1023 BlockAllocator::CheckNextNode(check_control *control)
1024 {
1025 	if (!_IsValidCheckControl(control))
1026 		return B_BAD_VALUE;
1027 
1028 	check_cookie *cookie = (check_cookie *)control->cookie;
1029 
1030 	while (true) {
1031 		if (cookie->iterator == NULL) {
1032 			if (!cookie->stack.Pop(&cookie->current)) {
1033 				// no more runs on the stack, we are obviously finished!
1034 				control->status = B_ENTRY_NOT_FOUND;
1035 				return B_ENTRY_NOT_FOUND;
1036 			}
1037 
1038 			// get iterator for the next directory
1039 
1040 			Vnode vnode(fVolume, cookie->current);
1041 			Inode *inode;
1042 			if (vnode.Get(&inode) < B_OK) {
1043 				FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current)));
1044 				continue;
1045 			}
1046 
1047 			if (!inode->IsContainer()) {
1048 				FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current)));
1049 				continue;
1050 			}
1051 
1052 			BPlusTree *tree;
1053 			if (inode->GetTree(&tree) != B_OK) {
1054 				FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current)));
1055 				continue;
1056 			}
1057 
1058 			cookie->parent = inode;
1059 			cookie->parent_mode = inode->Mode();
1060 
1061 			cookie->iterator = new TreeIterator(tree);
1062 			if (cookie->iterator == NULL)
1063 				RETURN_ERROR(B_NO_MEMORY);
1064 
1065 			// the inode must stay locked in memory until the iterator is freed
1066 			vnode.Keep();
1067 
1068 			// check the inode of the directory
1069 			control->errors = 0;
1070 			control->status = CheckInode(inode, control);
1071 
1072 			if (inode->GetName(control->name) < B_OK)
1073 				strcpy(control->name, "(node has no name)");
1074 
1075 			control->inode = inode->ID();
1076 			control->mode = inode->Mode();
1077 
1078 			return B_OK;
1079 		}
1080 
1081 		char name[B_FILE_NAME_LENGTH];
1082 		uint16 length;
1083 		ino_t id;
1084 
1085 		status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id);
1086 		if (status == B_ENTRY_NOT_FOUND) {
1087 			// there are no more entries in this iterator, free it and go on
1088 			delete cookie->iterator;
1089 			cookie->iterator = NULL;
1090 
1091 			// unlock the directory's inode from memory
1092 			put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current));
1093 
1094 			continue;
1095 		} else if (status == B_OK) {
1096 			// ignore "." and ".." entries
1097 			if (!strcmp(name, ".") || !strcmp(name, ".."))
1098 				continue;
1099 
1100 			// fill in the control data as soon as we have them
1101 			strlcpy(control->name, name, B_FILE_NAME_LENGTH);
1102 			control->inode = id;
1103 			control->errors = 0;
1104 
1105 			Vnode vnode(fVolume, id);
1106 			Inode *inode;
1107 			if (vnode.Get(&inode) < B_OK) {
1108 				FATAL(("Could not open inode ID %Ld!\n", id));
1109 				control->errors |= BFS_COULD_NOT_OPEN;
1110 				control->status = B_ERROR;
1111 				return B_OK;
1112 			}
1113 
1114 			// check if the inode's name is the same as in the b+tree
1115 			if (inode->IsRegularNode()) {
1116 				SimpleLocker locker(inode->SmallDataLock());
1117 				NodeGetter node(fVolume, inode);
1118 
1119 				const char *localName = inode->Name(node.Node());
1120 				if (localName == NULL || strcmp(localName, name)) {
1121 					control->errors |= BFS_NAMES_DONT_MATCH;
1122 					FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName));
1123 				}
1124 			}
1125 
1126 			control->mode = inode->Mode();
1127 
1128 			// Check for the correct mode of the node (if the mode of the
1129 			// file don't fit to its parent, there is a serious problem)
1130 			if (((cookie->parent_mode & S_ATTR_DIR) != 0 && !inode->IsAttribute())
1131 				|| ((cookie->parent_mode & S_INDEX_DIR) != 0 && !inode->IsIndex())
1132 				|| ((cookie->parent_mode & (S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR))
1133 						== S_DIRECTORY
1134 					&& (inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR)) != 0)) {
1135 				FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n",
1136 					inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber()));
1137 
1138 				// if we are allowed to fix errors, we should remove the file
1139 				if (control->flags & BFS_REMOVE_WRONG_TYPES
1140 					&& control->flags & BFS_FIX_BITMAP_ERRORS) {
1141 					// it's safe to start a transaction, because Inode::Remove()
1142 					// won't touch the block bitmap (which we hold the lock for)
1143 					// if we set the INODE_DONT_FREE_SPACE flag - since we fix
1144 					// the bitmap anyway
1145 					Transaction transaction(fVolume, cookie->parent->BlockNumber());
1146 
1147 					inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE);
1148 					status = cookie->parent->Remove(transaction, name, NULL, inode->IsContainer());
1149 					if (status == B_OK)
1150 						transaction.Done();
1151 				} else
1152 					status = B_ERROR;
1153 
1154 				control->errors |= BFS_WRONG_TYPE;
1155 				control->status = status;
1156 				return B_OK;
1157 			}
1158 
1159 			// If the inode has an attribute directory, push it on the stack
1160 			if (!inode->Attributes().IsZero())
1161 				cookie->stack.Push(inode->Attributes());
1162 
1163 			// push the directory on the stack so that it will be scanned later
1164 			if (inode->IsContainer() && !inode->IsIndex())
1165 				cookie->stack.Push(inode->BlockRun());
1166 			else {
1167 				// check it now
1168 				control->status = CheckInode(inode, control);
1169 
1170 				return B_OK;
1171 			}
1172 		}
1173 	}
1174 	// is never reached
1175 }
1176 
1177 
1178 bool
1179 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const
1180 {
1181 	size_t size = BitmapSize();
1182 	uint32 index = block / 32;	// 32bit resolution
1183 	if (index > size / 4)
1184 		return false;
1185 
1186 	return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f));
1187 }
1188 
1189 
1190 void
1191 BlockAllocator::_SetCheckBitmapAt(off_t block)
1192 {
1193 	size_t size = BitmapSize();
1194 	uint32 index = block / 32;	// 32bit resolution
1195 	if (index > size / 4)
1196 		return;
1197 
1198 	fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f));
1199 }
1200 
1201 
1202 status_t
1203 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control, bool allocated)
1204 {
1205 	if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups
1206 		|| run.Start() > fGroups[run.AllocationGroup()].fNumBits
1207 		|| uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits
1208 		|| run.length == 0) {
1209 		PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length()));
1210 		if (control == NULL)
1211 			return B_BAD_DATA;
1212 
1213 		control->errors |= BFS_INVALID_BLOCK_RUN;
1214 		return B_OK;
1215 	}
1216 
1217 	uint32 bitsPerBlock = fVolume->BlockSize() << 3;
1218 	uint32 block = run.Start() / bitsPerBlock;
1219 	uint32 pos = run.Start() % bitsPerBlock;
1220 	int32 length = 0;
1221 	off_t firstMissing = -1, firstSet = -1;
1222 	off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift();
1223 
1224 	AllocationBlock cached(fVolume);
1225 
1226 	for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) {
1227 		if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK)
1228 			RETURN_ERROR(B_IO_ERROR);
1229 
1230 		if (pos >= cached.NumBlockBits()) {
1231 			// something very strange has happened...
1232 			RETURN_ERROR(B_ERROR);
1233 		}
1234 
1235 		while (length < run.Length() && pos < cached.NumBlockBits()) {
1236 			if (cached.IsUsed(pos) != allocated) {
1237 				if (control == NULL) {
1238 					PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated (pos = %ld, length = %ld)!\n",
1239 						type, run.AllocationGroup(), run.Start(), run.Length(), pos, length));
1240 					return B_BAD_DATA;
1241 				}
1242 				if (firstMissing == -1) {
1243 					firstMissing = firstGroupBlock + pos + block * bitsPerBlock;
1244 					control->errors |= BFS_MISSING_BLOCKS;
1245 				}
1246 				control->stats.missing++;
1247 			} else if (firstMissing != -1) {
1248 				PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are %sallocated!\n",
1249 					type, run.allocation_group, run.start, run.length, firstMissing,
1250 					firstGroupBlock + pos + block * bitsPerBlock - 1, allocated ? "not " : ""));
1251 				firstMissing = -1;
1252 			}
1253 
1254 			if (fCheckBitmap != NULL) {
1255 				// Set the block in the check bitmap as well, but have a look if it
1256 				// is already allocated first
1257 				uint32 offset = pos + block * bitsPerBlock;
1258 				if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) {
1259 					if (firstSet == -1) {
1260 						firstSet = firstGroupBlock + offset;
1261 						control->errors |= BFS_BLOCKS_ALREADY_SET;
1262 					}
1263 					control->stats.already_set++;
1264 				} else {
1265 					if (firstSet != -1) {
1266 						FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are already set!\n",
1267 							type, (int)run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1));
1268 						firstSet = -1;
1269 					}
1270 					_SetCheckBitmapAt(firstGroupBlock + offset);
1271 				}
1272 			}
1273 			length++;
1274 			pos++;
1275 		}
1276 
1277 		if (block + 1 >= fBlocksPerGroup || length >= run.Length()) {
1278 			if (firstMissing != -1)
1279 				PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1));
1280 			if (firstSet != -1)
1281 				FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are already set!\n", type, (int)run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1));
1282 		}
1283 	}
1284 
1285 	return B_OK;
1286 }
1287 
1288 
1289 status_t
1290 BlockAllocator::CheckInode(Inode *inode, check_control *control)
1291 {
1292 	if (control != NULL && fCheckBitmap == NULL)
1293 		return B_NO_INIT;
1294 	if (inode == NULL)
1295 		return B_BAD_VALUE;
1296 
1297 	status_t status = CheckBlockRun(inode->BlockRun(), "inode", control);
1298 	if (status < B_OK)
1299 		return status;
1300 
1301 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
1302 		// symlinks may not have a valid data stream
1303 		if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH)
1304 			return B_BAD_DATA;
1305 
1306 		return B_OK;
1307 	}
1308 
1309 	data_stream *data = &inode->Node().data;
1310 
1311 	// check the direct range
1312 
1313 	if (data->max_direct_range) {
1314 		for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
1315 			if (data->direct[i].IsZero())
1316 				break;
1317 
1318 			status = CheckBlockRun(data->direct[i], "direct", control);
1319 			if (status < B_OK)
1320 				return status;
1321 		}
1322 	}
1323 
1324 	CachedBlock cached(fVolume);
1325 
1326 	// check the indirect range
1327 
1328 	if (data->max_indirect_range) {
1329 		status = CheckBlockRun(data->indirect, "indirect", control);
1330 		if (status < B_OK)
1331 			return status;
1332 
1333 		off_t block = fVolume->ToBlock(data->indirect);
1334 
1335 		for (int32 i = 0; i < data->indirect.Length(); i++) {
1336 			block_run *runs = (block_run *)cached.SetTo(block + i);
1337 			if (runs == NULL)
1338 				RETURN_ERROR(B_IO_ERROR);
1339 
1340 			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1341 			int32 index = 0;
1342 			for (; index < runsPerBlock; index++) {
1343 				if (runs[index].IsZero())
1344 					break;
1345 
1346 				status = CheckBlockRun(runs[index], "indirect->run", control);
1347 				if (status < B_OK)
1348 					return status;
1349 			}
1350 			if (index < runsPerBlock)
1351 				break;
1352 		}
1353 	}
1354 
1355 	// check the double indirect range
1356 
1357 	if (data->max_double_indirect_range) {
1358 		status = CheckBlockRun(data->double_indirect, "double indirect", control);
1359 		if (status < B_OK)
1360 			return status;
1361 
1362 		int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1363 		int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT;
1364 
1365 		CachedBlock cachedDirect(fVolume);
1366 		int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift())
1367 									/ sizeof(block_run);
1368 
1369 		for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) {
1370 			// get the indirect array block
1371 			block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect)
1372 				+ indirectIndex / runsPerBlock);
1373 			if (array == NULL)
1374 				return B_IO_ERROR;
1375 
1376 			block_run indirect = array[indirectIndex % runsPerBlock];
1377 			// are we finished yet?
1378 			if (indirect.IsZero())
1379 				return B_OK;
1380 
1381 			status = CheckBlockRun(indirect, "double indirect->runs", control);
1382 			if (status < B_OK)
1383 				return status;
1384 
1385 			int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run);
1386 
1387 			for (int32 index = 0; index < maxIndex; ) {
1388 				block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect)
1389 					+ index / runsPerBlock);
1390 				if (runs == NULL)
1391 					return B_IO_ERROR;
1392 
1393 				do {
1394 					// are we finished yet?
1395 					if (runs[index % runsPerBlock].IsZero())
1396 						return B_OK;
1397 
1398 					status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control);
1399 					if (status < B_OK)
1400 						return status;
1401 				} while ((++index % runsPerArray) != 0);
1402 			}
1403 		}
1404 	}
1405 
1406 	return B_OK;
1407 }
1408 
1409 
1410 //	#pragma mark - debugger commands
1411 
1412 
1413 #ifdef BFS_DEBUGGER_COMMANDS
1414 
1415 
1416 void
1417 BlockAllocator::Dump()
1418 {
1419 	kprintf("allocation groups: %ld\n", fNumGroups);
1420 	kprintf("blocks per group: %ld\n", fBlocksPerGroup);
1421 
1422 	for (int32 i = 0; i < fNumGroups; i++) {
1423 		AllocationGroup& group = fGroups[i];
1424 
1425 		kprintf("[%3ld] num bits:      %lu\n", i, group.NumBits());
1426 		kprintf("      num blocks:    %lu\n", group.NumBlocks());
1427 		kprintf("      start:         %ld\n", group.Start());
1428 		kprintf("      first free:    %ld\n", group.fFirstFree);
1429 		kprintf("      largest:       %ld\n", group.fLargest);
1430 		kprintf("      largest first: %ld\n", group.fLargestFirst);
1431 		kprintf("      free bits:     %ld\n", group.fFreeBits);
1432 	}
1433 }
1434 
1435 
1436 int
1437 dump_block_allocator(int argc, char **argv)
1438 {
1439 	if (argc != 2 || !strcmp(argv[1], "--help")) {
1440 		kprintf("usage: %s <ptr-to-volume>\n", argv[0]);
1441 		return 0;
1442 	}
1443 
1444 	Volume *volume = (Volume *)parse_expression(argv[1]);
1445 	BlockAllocator &allocator = volume->Allocator();
1446 
1447 	allocator.Dump();
1448 	return 0;
1449 }
1450 
1451 
1452 #endif	// BFS_DEBUGGER_COMMANDS
1453 
1454