xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 93a78ecaa45114d68952d08c4778f073515102f2)
1 /*
2  * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "block_cache_private.h"
8 
9 #include <KernelExport.h>
10 #include <fs_cache.h>
11 
12 #include <block_cache.h>
13 #include <lock.h>
14 #include <vm_low_memory.h>
15 #include <util/kernel_cpp.h>
16 #include <util/DoublyLinkedList.h>
17 #include <util/AutoLock.h>
18 #include <util/khash.h>
19 
20 #include <unistd.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 
26 // TODO: this is a naive but growing implementation to test the API:
27 //	1) block reading/writing is not at all optimized for speed, it will
28 //	   just read and write single blocks.
29 //	2) the locking could be improved; getting a block should not need to
30 //	   wait for blocks to be written
31 //	3) dirty blocks are only written back if asked for
32 // TODO: the retrieval/copy of the original data could be delayed until the
33 //		new data must be written, ie. in low memory situations.
34 
35 //#define TRACE_BLOCK_CACHE
36 #ifdef TRACE_BLOCK_CACHE
37 #	define TRACE(x)	dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 #define DEBUG_BLOCK_CACHE
43 
44 // This macro is used for fatal situations that are acceptable in a running
45 // system, like out of memory situations - should only panic for debugging.
46 #define FATAL(x) panic x
47 
48 struct cache_transaction {
49 	cache_transaction();
50 
51 	cache_transaction *next;
52 	int32			id;
53 	int32			num_blocks;
54 	int32			sub_num_blocks;
55 	cached_block	*first_block;
56 	block_list		blocks;
57 	transaction_notification_hook notification_hook;
58 	void			*notification_data;
59 	bool			open;
60 	bool			has_sub_transaction;
61 };
62 
63 static status_t write_cached_block(block_cache *cache, cached_block *block,
64 	bool deleteTransaction = true);
65 
66 
67 #ifdef DEBUG_BLOCK_CACHE
68 static DoublyLinkedList<block_cache> sCaches;
69 static mutex sCachesLock;
70 #endif
71 
72 
73 //	#pragma mark - private transaction
74 
75 
76 cache_transaction::cache_transaction()
77 {
78 	num_blocks = 0;
79 	sub_num_blocks = 0;
80 	first_block = NULL;
81 	notification_hook = NULL;
82 	notification_data = NULL;
83 	open = true;
84 }
85 
86 
87 static int
88 transaction_compare(void *_transaction, const void *_id)
89 {
90 	cache_transaction *transaction = (cache_transaction *)_transaction;
91 	const int32 *id = (const int32 *)_id;
92 
93 	return transaction->id - *id;
94 }
95 
96 
97 static uint32
98 transaction_hash(void *_transaction, const void *_id, uint32 range)
99 {
100 	cache_transaction *transaction = (cache_transaction *)_transaction;
101 	const int32 *id = (const int32 *)_id;
102 
103 	if (transaction != NULL)
104 		return transaction->id % range;
105 
106 	return (uint32)*id % range;
107 }
108 
109 
110 static void
111 delete_transaction(block_cache *cache, cache_transaction *transaction)
112 {
113 	if (cache->last_transaction == transaction)
114 		cache->last_transaction = NULL;
115 
116 	delete transaction;
117 }
118 
119 
120 static cache_transaction *
121 lookup_transaction(block_cache *cache, int32 id)
122 {
123 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
124 }
125 
126 
127 //	#pragma mark - cached_block
128 
129 
130 /* static */
131 int
132 cached_block::Compare(void *_cacheEntry, const void *_block)
133 {
134 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
135 	const off_t *block = (const off_t *)_block;
136 
137 	return cacheEntry->block_number - *block;
138 }
139 
140 
141 
142 /* static */
143 uint32
144 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
145 {
146 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
147 	const off_t *block = (const off_t *)_block;
148 
149 	if (cacheEntry != NULL)
150 		return cacheEntry->block_number % range;
151 
152 	return (uint64)*block % range;
153 }
154 
155 
156 //	#pragma mark - block_cache
157 
158 
159 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
160 		bool readOnly)
161 	:
162 	hash(NULL),
163 	fd(_fd),
164 	max_blocks(numBlocks),
165 	block_size(blockSize),
166 	next_transaction_id(1),
167 	last_transaction(NULL),
168 	transaction_hash(NULL),
169 	ranges_hash(NULL),
170 	read_only(readOnly)
171 {
172 #ifdef DEBUG_BLOCK_CACHE
173 	mutex_lock(&sCachesLock);
174 	sCaches.Add(this);
175 	mutex_unlock(&sCachesLock);
176 #endif
177 
178 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
179 	if (hash == NULL)
180 		return;
181 
182 	transaction_hash = hash_init(16, 0, &transaction_compare,
183 		&::transaction_hash);
184 	if (transaction_hash == NULL)
185 		return;
186 
187 	ranges_hash = hash_init(16, 0, &block_range::Compare, &block_range::Hash);
188 	if (ranges_hash == NULL)
189 		return;
190 
191 	if (benaphore_init(&lock, "block cache") < B_OK)
192 		return;
193 
194 	chunk_size = max_c(blockSize, B_PAGE_SIZE);
195 	chunks_per_range = kBlockRangeSize / chunk_size;
196 	range_mask = (1UL << chunks_per_range) - 1;
197 	chunk_mask = (1UL << (chunk_size / blockSize)) - 1;
198 
199 	register_low_memory_handler(&block_cache::LowMemoryHandler, this, 0);
200 }
201 
202 
203 block_cache::~block_cache()
204 {
205 #ifdef DEBUG_BLOCK_CACHE
206 	mutex_lock(&sCachesLock);
207 	sCaches.Remove(this);
208 	mutex_unlock(&sCachesLock);
209 #endif
210 
211 	unregister_low_memory_handler(&block_cache::LowMemoryHandler, this);
212 
213 	benaphore_destroy(&lock);
214 
215 	hash_uninit(ranges_hash);
216 	hash_uninit(transaction_hash);
217 	hash_uninit(hash);
218 }
219 
220 
221 status_t
222 block_cache::InitCheck()
223 {
224 	if (lock.sem < B_OK)
225 		return lock.sem;
226 
227 	if (hash == NULL || transaction_hash == NULL || ranges_hash == NULL)
228 		return B_NO_MEMORY;
229 
230 	return B_OK;
231 }
232 
233 
234 block_range *
235 block_cache::GetFreeRange()
236 {
237 	if (!free_ranges.IsEmpty())
238 		return free_ranges.First();
239 
240 	// we need to allocate a new range
241 	block_range *range;
242 	if (block_range::New(this, &range) != B_OK) {
243 		RemoveUnusedBlocks(2, 50);
244 
245 		if (!free_ranges.IsEmpty())
246 			return free_ranges.First();
247 
248 		RemoveUnusedBlocks(LONG_MAX, 50);
249 
250 		if (!free_ranges.IsEmpty())
251 			return free_ranges.First();
252 
253 		// TODO: We also need to free ranges from other caches to get a free one
254 		// (if not, an active volume might have stolen all free ranges already)
255 		return NULL;
256 	}
257 
258 	return range;
259 }
260 
261 
262 block_range *
263 block_cache::GetRange(void *address)
264 {
265 	return (block_range *)hash_lookup(ranges_hash, address);
266 }
267 
268 
269 void
270 block_cache::Free(void *address)
271 {
272 	if (address == NULL)
273 		return;
274 
275 	block_range *range = GetRange(address);
276 	if (range == NULL)
277 		panic("no range for address %p\n", address);
278 
279 	ASSERT(range != NULL);
280 	range->Free(this, address);
281 
282 	if (range->Unused(this))
283 		block_range::Delete(this, range);
284 }
285 
286 
287 void *
288 block_cache::Allocate()
289 {
290 	block_range *range = GetFreeRange();
291 	if (range == NULL)
292 		return NULL;
293 
294 	return range->Allocate(this);
295 }
296 
297 
298 void
299 block_cache::FreeBlock(cached_block *block)
300 {
301 	block_range *range = GetRange(block->current_data);
302 	ASSERT(range != NULL);
303 	range->Free(this, block);
304 
305 	if (block->original_data != NULL || block->parent_data != NULL) {
306 		panic("block_cache::FreeBlock(): %p, %p\n", block->original_data,
307 			block->parent_data);
308 	}
309 
310 #ifdef DEBUG_CHANGED
311 	Free(block->compare);
312 #endif
313 
314 	if (range->Unused(this))
315 		block_range::Delete(this, range);
316 
317 	delete block;
318 }
319 
320 
321 /*! Allocates a new block for \a blockNumber, ready for use */
322 cached_block *
323 block_cache::NewBlock(off_t blockNumber)
324 {
325 	cached_block *block = new(nothrow) cached_block;
326 	if (block == NULL) {
327 		FATAL(("could not allocate block!\n"));
328 		return NULL;
329 	}
330 
331 	block_range *range = GetFreeRange();
332 	if (range == NULL) {
333 		FATAL(("could not get range!\n"));
334 		delete block;
335 		return NULL;
336 	}
337 
338 	range->Allocate(this, block);
339 
340 	block->block_number = blockNumber;
341 	block->ref_count = 0;
342 	block->accessed = 0;
343 	block->transaction_next = NULL;
344 	block->transaction = block->previous_transaction = NULL;
345 	block->original_data = NULL;
346 	block->parent_data = NULL;
347 	block->is_dirty = false;
348 	block->unused = false;
349 #ifdef DEBUG_CHANGED
350 	block->compare = NULL;
351 #endif
352 
353 	return block;
354 }
355 
356 
357 void
358 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
359 {
360 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
361 
362 	for (block_list::Iterator it = unused_blocks.GetIterator();
363 		 cached_block *block = it.Next();) {
364 
365 		if (maxAccessed < block->accessed)
366 			continue;
367 
368 		TRACE(("  remove block %Ld, accessed %ld times\n",
369 			block->block_number, block->accessed));
370 
371 		// this can only happen if no transactions are used
372 		if (block->is_dirty)
373 			write_cached_block(this, block, false);
374 
375 		// remove block from lists
376 		it.Remove();
377 		hash_remove(hash, block);
378 
379 		FreeBlock(block);
380 
381 		if (--count <= 0)
382 			break;
383 	}
384 }
385 
386 
387 void
388 block_cache::LowMemoryHandler(void *data, int32 level)
389 {
390 	block_cache *cache = (block_cache *)data;
391 	BenaphoreLocker locker(&cache->lock);
392 
393 	if (!locker.IsLocked()) {
394 		// If our block_cache were deleted, it could be that we had
395 		// been called before that deletion went through, therefore,
396 		// acquiring its lock might fail.
397 		return;
398 	}
399 
400 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
401 
402 	// free some blocks according to the low memory state
403 	// (if there is enough memory left, we don't free any)
404 
405 	int32 free = 1;
406 	int32 accessed = 1;
407 	switch (vm_low_memory_state()) {
408 		case B_NO_LOW_MEMORY:
409 			return;
410 		case B_LOW_MEMORY_NOTE:
411 			free = 50;
412 			accessed = 2;
413 			break;
414 		case B_LOW_MEMORY_WARNING:
415 			free = 200;
416 			accessed = 10;
417 			break;
418 		case B_LOW_MEMORY_CRITICAL:
419 			free = LONG_MAX;
420 			accessed = LONG_MAX;
421 			break;
422 	}
423 
424 	cache->RemoveUnusedBlocks(accessed, free);
425 }
426 
427 
428 //	#pragma mark - private block functions
429 
430 
431 static void
432 put_cached_block(block_cache *cache, cached_block *block)
433 {
434 #ifdef DEBUG_CHANGED
435 	if (!block->is_dirty && block->compare != NULL
436 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
437 		dprintf("new block:\n");
438 		dump_block((const char *)block->current_data, 256, "  ");
439 		dprintf("unchanged block:\n");
440 		dump_block((const char *)block->compare, 256, "  ");
441 		write_cached_block(cache, block);
442 		panic("block_cache: supposed to be clean block was changed!\n");
443 
444 		cache->Free(block->compare);
445 		block->compare = NULL;
446 	}
447 #endif
448 
449 	if (block->ref_count < 1) {
450 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
451 		return;
452 	}
453 
454 	if (--block->ref_count == 0
455 		&& block->transaction == NULL
456 		&& block->previous_transaction == NULL) {
457 		// put this block in the list of unused blocks
458 		block->unused = true;
459 if (block->original_data != NULL || block->parent_data != NULL)
460 	panic("put_cached_block(): %p (%Ld): %p, %p\n", block, block->block_number, block->original_data, block->parent_data);
461 		cache->unused_blocks.Add(block);
462 //		block->current_data = cache->allocator->Release(block->current_data);
463 	}
464 
465 	// free some blocks according to the low memory state
466 	// (if there is enough memory left, we don't free any)
467 
468 	int32 free = 1;
469 	switch (vm_low_memory_state()) {
470 		case B_NO_LOW_MEMORY:
471 			return;
472 		case B_LOW_MEMORY_NOTE:
473 			free = 1;
474 			break;
475 		case B_LOW_MEMORY_WARNING:
476 			free = 5;
477 			break;
478 		case B_LOW_MEMORY_CRITICAL:
479 			free = 20;
480 			break;
481 	}
482 
483 	cache->RemoveUnusedBlocks(LONG_MAX, free);
484 }
485 
486 
487 static void
488 put_cached_block(block_cache *cache, off_t blockNumber)
489 {
490 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
491 		panic("put_cached_block: invalid block number %lld (max %lld)",
492 			blockNumber, cache->max_blocks - 1);
493 	}
494 
495 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
496 	if (block != NULL)
497 		put_cached_block(cache, block);
498 }
499 
500 
501 /*!
502 	Retrieves the block \a blockNumber from the hash table, if it's already
503 	there, or reads it from the disk.
504 
505 	\param _allocated tells you wether or not a new block has been allocated
506 		to satisfy your request.
507 	\param readBlock if \c false, the block will not be read in case it was
508 		not already in the cache. The block you retrieve may contain random
509 		data.
510 */
511 static cached_block *
512 get_cached_block(block_cache *cache, off_t blockNumber, bool *_allocated,
513 	bool readBlock = true)
514 {
515 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
516 		panic("get_cached_block: invalid block number %lld (max %lld)",
517 			blockNumber, cache->max_blocks - 1);
518 		return NULL;
519 	}
520 
521 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
522 		&blockNumber);
523 	*_allocated = false;
524 
525 	if (block == NULL) {
526 		// read block into cache
527 		block = cache->NewBlock(blockNumber);
528 		if (block == NULL)
529 			return NULL;
530 
531 		hash_insert(cache->hash, block);
532 		*_allocated = true;
533 	} else {
534 		// TODO: currently, the data is always mapped in
535 /*
536 		if (block->ref_count == 0 && block->current_data != NULL) {
537 			// see if the old block can be resurrected
538 			block->current_data = cache->allocator->Acquire(block->current_data);
539 		}
540 
541 		if (block->current_data == NULL) {
542 			// there is no block yet, but we need one
543 			block->current_data = cache->allocator->Get();
544 			if (block->current_data == NULL)
545 				return NULL;
546 
547 			*_allocated = true;
548 		}
549 */
550 	}
551 
552 	if (*_allocated && readBlock) {
553 		int32 blockSize = cache->block_size;
554 
555 		if (read_pos(cache->fd, blockNumber * blockSize, block->current_data,
556 				blockSize) < blockSize) {
557 			hash_remove(cache->hash, block);
558 			cache->FreeBlock(block);
559 			FATAL(("could not read block %Ld\n", blockNumber));
560 			return NULL;
561 		}
562 	}
563 
564 	if (block->unused) {
565 		//TRACE(("remove block %Ld from unused\n", blockNumber));
566 		block->unused = false;
567 		cache->unused_blocks.Remove(block);
568 	}
569 
570 	block->ref_count++;
571 	block->accessed++;
572 
573 	return block;
574 }
575 
576 
577 /*!
578 	Returns the writable block data for the requested blockNumber.
579 	If \a cleared is true, the block is not read from disk; an empty block
580 	is returned.
581 
582 	This is the only method to insert a block into a transaction. It makes
583 	sure that the previous block contents are preserved in that case.
584 */
585 static void *
586 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base,
587 	off_t length, int32 transactionID, bool cleared)
588 {
589 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
590 		blockNumber, transactionID));
591 
592 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
593 		panic("get_writable_cached_block: invalid block number %lld (max %lld)",
594 			blockNumber, cache->max_blocks - 1);
595 	}
596 
597 	bool allocated;
598 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
599 		!cleared);
600 	if (block == NULL)
601 		return NULL;
602 
603 	// if there is no transaction support, we just return the current block
604 	if (transactionID == -1) {
605 		if (cleared)
606 			memset(block->current_data, 0, cache->block_size);
607 
608 		block->is_dirty = true;
609 			// mark the block as dirty
610 
611 		return block->current_data;
612 	}
613 
614 	if (block->transaction != NULL && block->transaction->id != transactionID) {
615 		// ToDo: we have to wait here until the other transaction is done.
616 		//	Maybe we should even panic, since we can't prevent any deadlocks.
617 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
618 		put_cached_block(cache, block);
619 		return NULL;
620 	}
621 	if (block->transaction == NULL && transactionID != -1) {
622 		// get new transaction
623 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
624 		if (transaction == NULL) {
625 			panic("get_writable_cached_block(): invalid transaction %ld!\n",
626 				transactionID);
627 			put_cached_block(cache, block);
628 			return NULL;
629 		}
630 		if (!transaction->open) {
631 			panic("get_writable_cached_block(): transaction already done!\n");
632 			put_cached_block(cache, block);
633 			return NULL;
634 		}
635 
636 		block->transaction = transaction;
637 
638 		// attach the block to the transaction block list
639 		block->transaction_next = transaction->first_block;
640 		transaction->first_block = block;
641 		transaction->num_blocks++;
642 	}
643 
644 	if (!(allocated && cleared) && block->original_data == NULL) {
645 		// we already have data, so we need to preserve it
646 		block->original_data = cache->Allocate();
647 		if (block->original_data == NULL) {
648 			FATAL(("could not allocate original_data\n"));
649 			put_cached_block(cache, block);
650 			return NULL;
651 		}
652 
653 		memcpy(block->original_data, block->current_data, cache->block_size);
654 	}
655 	if (block->parent_data == block->current_data) {
656 		// remember any previous contents for the parent transaction
657 		block->parent_data = cache->Allocate();
658 		if (block->parent_data == NULL) {
659 			// TODO: maybe we should just continue the current transaction in this case...
660 			FATAL(("could not allocate parent\n"));
661 			put_cached_block(cache, block);
662 			return NULL;
663 		}
664 
665 		memcpy(block->parent_data, block->current_data, cache->block_size);
666 		block->transaction->sub_num_blocks++;
667 	}
668 
669 	if (cleared)
670 		memset(block->current_data, 0, cache->block_size);
671 
672 	block->is_dirty = true;
673 
674 	return block->current_data;
675 }
676 
677 
678 static status_t
679 write_cached_block(block_cache *cache, cached_block *block,
680 	bool deleteTransaction)
681 {
682 	cache_transaction *previous = block->previous_transaction;
683 	int32 blockSize = cache->block_size;
684 
685 	void *data = previous && block->original_data
686 		? block->original_data : block->current_data;
687 		// we first need to write back changes from previous transactions
688 
689 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
690 
691 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize,
692 		data, blockSize);
693 
694 	if (written < blockSize) {
695 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
696 			strerror(errno)));
697 		return B_IO_ERROR;
698 	}
699 
700 	if (data == block->current_data)
701 		block->is_dirty = false;
702 
703 	if (previous != NULL) {
704 		previous->blocks.Remove(block);
705 		block->previous_transaction = NULL;
706 
707 		// Has the previous transation been finished with that write?
708 		if (--previous->num_blocks == 0) {
709 			TRACE(("cache transaction %ld finished!\n", previous->id));
710 
711 			if (previous->notification_hook != NULL) {
712 				previous->notification_hook(previous->id,
713 					previous->notification_data);
714 			}
715 
716 			if (deleteTransaction) {
717 				hash_remove(cache->transaction_hash, previous);
718 				delete_transaction(cache, previous);
719 			}
720 		}
721 	}
722 
723 	return B_OK;
724 }
725 
726 
727 #ifdef DEBUG_BLOCK_CACHE
728 static int
729 dump_cache(int argc, char **argv)
730 {
731 	if (argc < 2) {
732 		kprintf("usage: %s [-b] <address>\n", argv[0]);
733 		return 0;
734 	}
735 
736 	bool showBlocks = false;
737 	int32 i = 1;
738 	if (!strcmp(argv[1], "-b")) {
739 		showBlocks = true;
740 		i++;
741 	}
742 
743 	block_cache *cache = (struct block_cache *)strtoul(argv[i], NULL, 0);
744 	if (cache == NULL) {
745 		kprintf("invalid cache address\n");
746 		return 0;
747 	}
748 
749 	kprintf("BLOCK CACHE: %p\n", cache);
750 
751 	kprintf(" fd:         %d\n", cache->fd);
752 	kprintf(" max_blocks: %Ld\n", cache->max_blocks);
753 	kprintf(" block_size: %lu\n", cache->block_size);
754 	kprintf(" next_transaction_id: %ld\n", cache->next_transaction_id);
755 	kprintf(" chunks_per_range: %lu\n", cache->chunks_per_range);
756 	kprintf(" chunks_size: %lu\n", cache->chunk_size);
757 	kprintf(" range_mask: %lu\n", cache->range_mask);
758 	kprintf(" chunks_mask: %lu\n", cache->chunk_mask);
759 
760 	if (showBlocks) {
761 		kprintf(" blocks:\n");
762 		kprintf("address  block no. current  original parent    refs access flags transact prev. trans\n");
763 	}
764 
765 	uint32 count = 0;
766 	hash_iterator iterator;
767 	hash_open(cache->hash, &iterator);
768 	cached_block *block;
769 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
770 		if (showBlocks) {
771 			kprintf("%08lx %9Ld %08lx %08lx %08lx %5ld %6ld %c%c%c%c%c %08lx %08lx\n",
772 				(addr_t)block, block->block_number, (addr_t)block->current_data,
773 				(addr_t)block->original_data, (addr_t)block->parent_data,
774 				block->ref_count, block->accessed, block->busy ? 'B' : '-',
775 				block->is_writing ? 'W' : '-', block->is_dirty ? 'B' : '-',
776 				block->unused ? 'U' : '-', block->unmapped ? 'M' : '-',
777 				(addr_t)block->transaction, (addr_t)block->previous_transaction);
778 		}
779 		count++;
780 	}
781 
782 	kprintf(" %ld blocks.\n", count);
783 
784 	hash_close(cache->hash, &iterator, false);
785 	return 0;
786 }
787 
788 
789 static int
790 dump_caches(int argc, char **argv)
791 {
792 	kprintf("Block caches:\n");
793 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
794 	while (i.HasNext()) {
795 		kprintf("  %p\n", i.Next());
796 	}
797 
798 	return 0;
799 }
800 #endif	// DEBUG_BLOCK_CACHE
801 
802 
803 extern "C" status_t
804 block_cache_init(void)
805 {
806 #ifdef DEBUG_BLOCK_CACHE
807 	mutex_init(&sCachesLock, "block caches");
808 	new (&sCaches) DoublyLinkedList<block_cache>;
809 		// manually call constructor
810 
811 	add_debugger_command("block_caches", &dump_caches, "dumps all block caches");
812 	add_debugger_command("block_cache", &dump_cache, "dumps a specific block cache");
813 #endif
814 
815 	return init_block_allocator();
816 }
817 
818 
819 //	#pragma mark - public transaction API
820 
821 
822 extern "C" int32
823 cache_start_transaction(void *_cache)
824 {
825 	block_cache *cache = (block_cache *)_cache;
826 	BenaphoreLocker locker(&cache->lock);
827 
828 	if (cache->last_transaction && cache->last_transaction->open) {
829 		panic("last transaction (%ld) still open!\n",
830 			cache->last_transaction->id);
831 	}
832 
833 	cache_transaction *transaction = new(nothrow) cache_transaction;
834 	if (transaction == NULL)
835 		return B_NO_MEMORY;
836 
837 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
838 	cache->last_transaction = transaction;
839 
840 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
841 
842 	hash_insert(cache->transaction_hash, transaction);
843 
844 	return transaction->id;
845 }
846 
847 
848 extern "C" status_t
849 cache_sync_transaction(void *_cache, int32 id)
850 {
851 	block_cache *cache = (block_cache *)_cache;
852 	BenaphoreLocker locker(&cache->lock);
853 	status_t status = B_ENTRY_NOT_FOUND;
854 
855 	hash_iterator iterator;
856 	hash_open(cache->transaction_hash, &iterator);
857 
858 	cache_transaction *transaction;
859 	while ((transaction = (cache_transaction *)hash_next(
860 			cache->transaction_hash, &iterator)) != NULL) {
861 		// close all earlier transactions which haven't been closed yet
862 
863 		if (transaction->id <= id && !transaction->open) {
864 			// write back all of their remaining dirty blocks
865 			while (transaction->num_blocks > 0) {
866 				status = write_cached_block(cache, transaction->blocks.Head(),
867 					false);
868 				if (status != B_OK)
869 					return status;
870 			}
871 
872 			hash_remove_current(cache->transaction_hash, &iterator);
873 			delete_transaction(cache, transaction);
874 		}
875 	}
876 
877 	hash_close(cache->transaction_hash, &iterator, false);
878 	return B_OK;
879 }
880 
881 
882 extern "C" status_t
883 cache_end_transaction(void *_cache, int32 id,
884 	transaction_notification_hook hook, void *data)
885 {
886 	block_cache *cache = (block_cache *)_cache;
887 	BenaphoreLocker locker(&cache->lock);
888 
889 	TRACE(("cache_end_transaction(id = %ld)\n", id));
890 
891 	cache_transaction *transaction = lookup_transaction(cache, id);
892 	if (transaction == NULL) {
893 		panic("cache_end_transaction(): invalid transaction ID\n");
894 		return B_BAD_VALUE;
895 	}
896 
897 	transaction->notification_hook = hook;
898 	transaction->notification_data = data;
899 
900 	// iterate through all blocks and free the unchanged original contents
901 
902 	cached_block *block = transaction->first_block, *next;
903 	for (; block != NULL; block = next) {
904 		next = block->transaction_next;
905 
906 		if (block->previous_transaction != NULL) {
907 			// need to write back pending changes
908 			write_cached_block(cache, block);
909 		}
910 
911 		if (block->original_data != NULL) {
912 			cache->Free(block->original_data);
913 			block->original_data = NULL;
914 		}
915 		if (transaction->has_sub_transaction) {
916 			if (block->parent_data != block->current_data)
917 				cache->Free(block->parent_data);
918 			block->parent_data = NULL;
919 		}
920 
921 		// move the block to the previous transaction list
922 		transaction->blocks.Add(block);
923 
924 		block->previous_transaction = transaction;
925 		block->transaction_next = NULL;
926 		block->transaction = NULL;
927 	}
928 
929 	transaction->open = false;
930 
931 	return B_OK;
932 }
933 
934 
935 extern "C" status_t
936 cache_abort_transaction(void *_cache, int32 id)
937 {
938 	block_cache *cache = (block_cache *)_cache;
939 	BenaphoreLocker locker(&cache->lock);
940 
941 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
942 
943 	cache_transaction *transaction = lookup_transaction(cache, id);
944 	if (transaction == NULL) {
945 		panic("cache_abort_transaction(): invalid transaction ID\n");
946 		return B_BAD_VALUE;
947 	}
948 
949 	// iterate through all blocks and restore their original contents
950 
951 	cached_block *block = transaction->first_block, *next;
952 	for (; block != NULL; block = next) {
953 		next = block->transaction_next;
954 
955 		if (block->original_data != NULL) {
956 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
957 				transaction->id, block->block_number));
958 			memcpy(block->current_data, block->original_data, cache->block_size);
959 			cache->Free(block->original_data);
960 			block->original_data = NULL;
961 		}
962 		if (transaction->has_sub_transaction) {
963 			if (block->parent_data != block->current_data)
964 				cache->Free(block->parent_data);
965 			block->parent_data = NULL;
966 		}
967 
968 		block->transaction_next = NULL;
969 		block->transaction = NULL;
970 	}
971 
972 	hash_remove(cache->transaction_hash, transaction);
973 	delete_transaction(cache, transaction);
974 	return B_OK;
975 }
976 
977 
978 /*!
979 	Acknowledges the current parent transaction, and starts a new transaction
980 	from its sub transaction.
981 	The new transaction also gets a new transaction ID.
982 */
983 extern "C" int32
984 cache_detach_sub_transaction(void *_cache, int32 id,
985 	transaction_notification_hook hook, void *data)
986 {
987 	block_cache *cache = (block_cache *)_cache;
988 	BenaphoreLocker locker(&cache->lock);
989 
990 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
991 
992 	cache_transaction *transaction = lookup_transaction(cache, id);
993 	if (transaction == NULL) {
994 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
995 		return B_BAD_VALUE;
996 	}
997 	if (!transaction->has_sub_transaction)
998 		return B_BAD_VALUE;
999 
1000 	// create a new transaction for the sub transaction
1001 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
1002 	if (transaction == NULL)
1003 		return B_NO_MEMORY;
1004 
1005 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
1006 
1007 	transaction->notification_hook = hook;
1008 	transaction->notification_data = data;
1009 
1010 	// iterate through all blocks and free the unchanged original contents
1011 
1012 	cached_block *block = transaction->first_block, *next, *last = NULL;
1013 	for (; block != NULL; block = next) {
1014 		next = block->transaction_next;
1015 
1016 		if (block->previous_transaction != NULL) {
1017 			// need to write back pending changes
1018 			write_cached_block(cache, block);
1019 		}
1020 
1021 		if (block->original_data != NULL && block->parent_data != NULL
1022 			&& block->parent_data != block->current_data) {
1023 			// free the original data if the parent data of the transaction
1024 			// will be made current - but keep them otherwise
1025 			cache->Free(block->original_data);
1026 			block->original_data = NULL;
1027 		}
1028 		if (block->parent_data != NULL
1029 			&& block->parent_data != block->current_data) {
1030 			// we need to move this block over to the new transaction
1031 			block->original_data = block->parent_data;
1032 			if (last == NULL)
1033 				newTransaction->first_block = block;
1034 			else
1035 				last->transaction_next = block;
1036 
1037 			last = block;
1038 		}
1039 		block->parent_data = NULL;
1040 
1041 		// move the block to the previous transaction list
1042 		transaction->blocks.Add(block);
1043 
1044 		block->previous_transaction = transaction;
1045 		block->transaction_next = NULL;
1046 		block->transaction = newTransaction;
1047 	}
1048 
1049 	transaction->open = false;
1050 
1051 	hash_insert(cache->transaction_hash, newTransaction);
1052 	cache->last_transaction = newTransaction;
1053 
1054 	return B_OK;
1055 }
1056 
1057 
1058 extern "C" status_t
1059 cache_abort_sub_transaction(void *_cache, int32 id)
1060 {
1061 	block_cache *cache = (block_cache *)_cache;
1062 	BenaphoreLocker locker(&cache->lock);
1063 
1064 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
1065 
1066 	cache_transaction *transaction = lookup_transaction(cache, id);
1067 	if (transaction == NULL) {
1068 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
1069 		return B_BAD_VALUE;
1070 	}
1071 	if (!transaction->has_sub_transaction)
1072 		return B_BAD_VALUE;
1073 
1074 	// revert all changes back to the version of the parent
1075 
1076 	cached_block *block = transaction->first_block, *next;
1077 	for (; block != NULL; block = next) {
1078 		next = block->transaction_next;
1079 
1080 		if (block->parent_data == NULL) {
1081 			if (block->original_data != NULL) {
1082 				// the parent transaction didn't change the block, but the sub
1083 				// transaction did - we need to revert from the original data
1084 				memcpy(block->current_data, block->original_data,
1085 					cache->block_size);
1086 			}
1087 		} else if (block->parent_data != block->current_data) {
1088 			// the block has been changed and must be restored
1089 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
1090 				transaction->id, block->block_number));
1091 			memcpy(block->current_data, block->parent_data, cache->block_size);
1092 			cache->Free(block->parent_data);
1093 		}
1094 
1095 		block->parent_data = NULL;
1096 	}
1097 
1098 	// all subsequent changes will go into the main transaction
1099 	transaction->has_sub_transaction = false;
1100 	return B_OK;
1101 }
1102 
1103 
1104 extern "C" status_t
1105 cache_start_sub_transaction(void *_cache, int32 id)
1106 {
1107 	block_cache *cache = (block_cache *)_cache;
1108 	BenaphoreLocker locker(&cache->lock);
1109 
1110 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
1111 
1112 	cache_transaction *transaction = lookup_transaction(cache, id);
1113 	if (transaction == NULL) {
1114 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
1115 		return B_BAD_VALUE;
1116 	}
1117 
1118 	// move all changed blocks up to the parent
1119 
1120 	cached_block *block = transaction->first_block, *next;
1121 	for (; block != NULL; block = next) {
1122 		next = block->transaction_next;
1123 
1124 		if (transaction->has_sub_transaction
1125 			&& block->parent_data != NULL
1126 			&& block->parent_data != block->current_data) {
1127 			// there already is an older sub transaction - we acknowledge
1128 			// its changes and move its blocks up to the parent
1129 			cache->Free(block->parent_data);
1130 		}
1131 
1132 		// we "allocate" the parent data lazily, that means, we don't copy
1133 		// the data (and allocate memory for it) until we need to
1134 		block->parent_data = block->current_data;
1135 	}
1136 
1137 	// all subsequent changes will go into the sub transaction
1138 	transaction->has_sub_transaction = true;
1139 	transaction->sub_num_blocks = 0;
1140 
1141 	return B_OK;
1142 }
1143 
1144 
1145 extern "C" status_t
1146 cache_next_block_in_transaction(void *_cache, int32 id, uint32 *_cookie,
1147 	off_t *_blockNumber, void **_data, void **_unchangedData)
1148 {
1149 	cached_block *block = (cached_block *)*_cookie;
1150 	block_cache *cache = (block_cache *)_cache;
1151 
1152 	BenaphoreLocker locker(&cache->lock);
1153 
1154 	cache_transaction *transaction = lookup_transaction(cache, id);
1155 	if (transaction == NULL)
1156 		return B_BAD_VALUE;
1157 
1158 	if (block == NULL)
1159 		block = transaction->first_block;
1160 	else
1161 		block = block->transaction_next;
1162 
1163 	if (block == NULL)
1164 		return B_ENTRY_NOT_FOUND;
1165 
1166 	if (_blockNumber)
1167 		*_blockNumber = block->block_number;
1168 	if (_data)
1169 		*_data = block->current_data;
1170 	if (_unchangedData)
1171 		*_unchangedData = block->original_data;
1172 
1173 	*_cookie = (uint32)block;
1174 	return B_OK;
1175 }
1176 
1177 
1178 extern "C" int32
1179 cache_blocks_in_transaction(void *_cache, int32 id)
1180 {
1181 	block_cache *cache = (block_cache *)_cache;
1182 	BenaphoreLocker locker(&cache->lock);
1183 
1184 	cache_transaction *transaction = lookup_transaction(cache, id);
1185 	if (transaction == NULL)
1186 		return B_BAD_VALUE;
1187 
1188 	return transaction->num_blocks;
1189 }
1190 
1191 
1192 extern "C" int32
1193 cache_blocks_in_sub_transaction(void *_cache, int32 id)
1194 {
1195 	block_cache *cache = (block_cache *)_cache;
1196 	BenaphoreLocker locker(&cache->lock);
1197 
1198 	cache_transaction *transaction = lookup_transaction(cache, id);
1199 	if (transaction == NULL)
1200 		return B_BAD_VALUE;
1201 
1202 	return transaction->sub_num_blocks;
1203 }
1204 
1205 
1206 //	#pragma mark - public block cache API
1207 //	public interface
1208 
1209 
1210 extern "C" void
1211 block_cache_delete(void *_cache, bool allowWrites)
1212 {
1213 	block_cache *cache = (block_cache *)_cache;
1214 
1215 	if (allowWrites)
1216 		block_cache_sync(cache);
1217 
1218 	BenaphoreLocker locker(&cache->lock);
1219 
1220 	// free all blocks
1221 
1222 	uint32 cookie = 0;
1223 	cached_block *block;
1224 	while ((block = (cached_block *)hash_remove_first(cache->hash,
1225 			&cookie)) != NULL) {
1226 		cache->FreeBlock(block);
1227 	}
1228 
1229 	// free all transactions (they will all be aborted)
1230 
1231 	cookie = 0;
1232 	cache_transaction *transaction;
1233 	while ((transaction = (cache_transaction *)hash_remove_first(
1234 			cache->transaction_hash, &cookie)) != NULL) {
1235 		delete transaction;
1236 	}
1237 
1238 	delete cache;
1239 }
1240 
1241 
1242 extern "C" void *
1243 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
1244 {
1245 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
1246 		readOnly);
1247 	if (cache == NULL)
1248 		return NULL;
1249 
1250 	if (cache->InitCheck() != B_OK) {
1251 		delete cache;
1252 		return NULL;
1253 	}
1254 
1255 	return cache;
1256 }
1257 
1258 
1259 extern "C" status_t
1260 block_cache_sync(void *_cache)
1261 {
1262 	block_cache *cache = (block_cache *)_cache;
1263 
1264 	// we will sync all dirty blocks to disk that have a completed
1265 	// transaction or no transaction only
1266 
1267 	BenaphoreLocker locker(&cache->lock);
1268 	hash_iterator iterator;
1269 	hash_open(cache->hash, &iterator);
1270 
1271 	cached_block *block;
1272 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1273 		if (block->previous_transaction != NULL
1274 			|| (block->transaction == NULL && block->is_dirty)) {
1275 			status_t status = write_cached_block(cache, block);
1276 			if (status != B_OK)
1277 				return status;
1278 		}
1279 	}
1280 
1281 	hash_close(cache->hash, &iterator, false);
1282 	return B_OK;
1283 }
1284 
1285 
1286 extern "C" status_t
1287 block_cache_sync_etc(void *_cache, off_t blockNumber, size_t numBlocks)
1288 {
1289 	block_cache *cache = (block_cache *)_cache;
1290 
1291 	// we will sync all dirty blocks to disk that have a completed
1292 	// transaction or no transaction only
1293 
1294 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1295 		panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
1296 			blockNumber, cache->max_blocks - 1);
1297 		return B_BAD_VALUE;
1298 	}
1299 
1300 	BenaphoreLocker locker(&cache->lock);
1301 
1302 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
1303 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1304 			&blockNumber);
1305 		if (block == NULL)
1306 			continue;
1307 		if (block->previous_transaction != NULL
1308 			|| (block->transaction == NULL && block->is_dirty)) {
1309 			status_t status = write_cached_block(cache, block);
1310 			if (status != B_OK)
1311 				return status;
1312 		}
1313 	}
1314 
1315 	return B_OK;
1316 }
1317 
1318 
1319 extern "C" status_t
1320 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
1321 {
1322 	block_cache *cache = (block_cache *)_cache;
1323 	BenaphoreLocker locker(&cache->lock);
1324 
1325 	if (cache->read_only)
1326 		panic("tried to make block writable on a read-only cache!");
1327 
1328 	// ToDo: this can be done better!
1329 	void *block = get_writable_cached_block(cache, blockNumber,
1330 		blockNumber, 1, transaction, false);
1331 	if (block != NULL) {
1332 		put_cached_block((block_cache *)_cache, blockNumber);
1333 		return B_OK;
1334 	}
1335 
1336 	return B_ERROR;
1337 }
1338 
1339 
1340 extern "C" void *
1341 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base,
1342 	off_t length, int32 transaction)
1343 {
1344 	block_cache *cache = (block_cache *)_cache;
1345 	BenaphoreLocker locker(&cache->lock);
1346 
1347 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1348 		blockNumber, transaction));
1349 	if (cache->read_only)
1350 		panic("tried to get writable block on a read-only cache!");
1351 
1352 	return get_writable_cached_block(cache, blockNumber, base, length,
1353 		transaction, false);
1354 }
1355 
1356 
1357 extern "C" void *
1358 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
1359 {
1360 	return block_cache_get_writable_etc(_cache, blockNumber,
1361 		blockNumber, 1, transaction);
1362 }
1363 
1364 
1365 extern "C" void *
1366 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
1367 {
1368 	block_cache *cache = (block_cache *)_cache;
1369 	BenaphoreLocker locker(&cache->lock);
1370 
1371 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1372 		blockNumber, transaction));
1373 	if (cache->read_only)
1374 		panic("tried to get empty writable block on a read-only cache!");
1375 
1376 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1377 				blockNumber, 1, transaction, true);
1378 }
1379 
1380 
1381 extern "C" const void *
1382 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
1383 {
1384 	block_cache *cache = (block_cache *)_cache;
1385 	BenaphoreLocker locker(&cache->lock);
1386 	bool allocated;
1387 
1388 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1389 	if (block == NULL)
1390 		return NULL;
1391 
1392 #ifdef DEBUG_CHANGED
1393 	if (block->compare == NULL)
1394 		block->compare = cache->Allocate();
1395 	if (block->compare != NULL)
1396 		memcpy(block->compare, block->current_data, cache->block_size);
1397 #endif
1398 	return block->current_data;
1399 }
1400 
1401 
1402 extern "C" const void *
1403 block_cache_get(void *_cache, off_t blockNumber)
1404 {
1405 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1406 }
1407 
1408 
1409 /*!
1410 	Changes the internal status of a writable block to \a dirty. This can be
1411 	helpful in case you realize you don't need to change that block anymore
1412 	for whatever reason.
1413 
1414 	Note, you must only use this function on blocks that were acquired
1415 	writable!
1416 */
1417 extern "C" status_t
1418 block_cache_set_dirty(void *_cache, off_t blockNumber, bool dirty,
1419 	int32 transaction)
1420 {
1421 	// TODO: not yet implemented
1422 	if (dirty)
1423 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
1424 
1425 	return B_OK;
1426 }
1427 
1428 
1429 extern "C" void
1430 block_cache_put(void *_cache, off_t blockNumber)
1431 {
1432 	block_cache *cache = (block_cache *)_cache;
1433 	BenaphoreLocker locker(&cache->lock);
1434 
1435 	put_cached_block(cache, blockNumber);
1436 }
1437 
1438