xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 1b8f7f13a3dc70e0e903cb94248220b40b732204)
1 /*
2  * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "block_cache_private.h"
8 
9 #include <KernelExport.h>
10 #include <fs_cache.h>
11 
12 #include <block_cache.h>
13 #include <lock.h>
14 #include <vm_low_memory.h>
15 #include <util/kernel_cpp.h>
16 #include <util/DoublyLinkedList.h>
17 #include <util/AutoLock.h>
18 #include <util/khash.h>
19 
20 #include <unistd.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 
26 // TODO: this is a naive but growing implementation to test the API:
27 //	1) block reading/writing is not at all optimized for speed, it will
28 //	   just read and write single blocks.
29 //	2) the locking could be improved; getting a block should not need to
30 //	   wait for blocks to be written
31 //	3) dirty blocks are only written back if asked for
32 // TODO: the retrieval/copy of the original data could be delayed until the
33 //		new data must be written, ie. in low memory situations.
34 
35 //#define TRACE_BLOCK_CACHE
36 #ifdef TRACE_BLOCK_CACHE
37 #	define TRACE(x)	dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 #define DEBUG_BLOCK_CACHE
43 
44 // This macro is used for fatal situations that are acceptable in a running
45 // system, like out of memory situations - should only panic for debugging.
46 #define FATAL(x) panic x
47 
48 struct cache_transaction {
49 	cache_transaction();
50 
51 	cache_transaction *next;
52 	int32			id;
53 	int32			num_blocks;
54 	int32			sub_num_blocks;
55 	cached_block	*first_block;
56 	block_list		blocks;
57 	transaction_notification_hook notification_hook;
58 	void			*notification_data;
59 	bool			open;
60 	bool			has_sub_transaction;
61 };
62 
63 static status_t write_cached_block(block_cache *cache, cached_block *block,
64 	bool deleteTransaction = true);
65 
66 
67 #ifdef DEBUG_BLOCK_CACHE
68 static DoublyLinkedList<block_cache> sCaches;
69 static mutex sCachesLock;
70 #endif
71 
72 
73 //	#pragma mark - private transaction
74 
75 
76 cache_transaction::cache_transaction()
77 {
78 	num_blocks = 0;
79 	sub_num_blocks = 0;
80 	first_block = NULL;
81 	notification_hook = NULL;
82 	notification_data = NULL;
83 	open = true;
84 }
85 
86 
87 static int
88 transaction_compare(void *_transaction, const void *_id)
89 {
90 	cache_transaction *transaction = (cache_transaction *)_transaction;
91 	const int32 *id = (const int32 *)_id;
92 
93 	return transaction->id - *id;
94 }
95 
96 
97 static uint32
98 transaction_hash(void *_transaction, const void *_id, uint32 range)
99 {
100 	cache_transaction *transaction = (cache_transaction *)_transaction;
101 	const int32 *id = (const int32 *)_id;
102 
103 	if (transaction != NULL)
104 		return transaction->id % range;
105 
106 	return (uint32)*id % range;
107 }
108 
109 
110 static void
111 delete_transaction(block_cache *cache, cache_transaction *transaction)
112 {
113 	if (cache->last_transaction == transaction)
114 		cache->last_transaction = NULL;
115 
116 	delete transaction;
117 }
118 
119 
120 static cache_transaction *
121 lookup_transaction(block_cache *cache, int32 id)
122 {
123 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
124 }
125 
126 
127 //	#pragma mark - cached_block
128 
129 
130 /* static */
131 int
132 cached_block::Compare(void *_cacheEntry, const void *_block)
133 {
134 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
135 	const off_t *block = (const off_t *)_block;
136 
137 	return cacheEntry->block_number - *block;
138 }
139 
140 
141 
142 /* static */
143 uint32
144 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
145 {
146 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
147 	const off_t *block = (const off_t *)_block;
148 
149 	if (cacheEntry != NULL)
150 		return cacheEntry->block_number % range;
151 
152 	return (uint64)*block % range;
153 }
154 
155 
156 //	#pragma mark - block_cache
157 
158 
159 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
160 		bool readOnly)
161 	:
162 	hash(NULL),
163 	fd(_fd),
164 	max_blocks(numBlocks),
165 	block_size(blockSize),
166 	next_transaction_id(1),
167 	last_transaction(NULL),
168 	transaction_hash(NULL),
169 	ranges_hash(NULL),
170 	read_only(readOnly)
171 {
172 #ifdef DEBUG_BLOCK_CACHE
173 	mutex_lock(&sCachesLock);
174 	sCaches.Add(this);
175 	mutex_unlock(&sCachesLock);
176 #endif
177 
178 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
179 	if (hash == NULL)
180 		return;
181 
182 	transaction_hash = hash_init(16, 0, &transaction_compare,
183 		&::transaction_hash);
184 	if (transaction_hash == NULL)
185 		return;
186 
187 	ranges_hash = hash_init(16, 0, &block_range::Compare, &block_range::Hash);
188 	if (ranges_hash == NULL)
189 		return;
190 
191 	if (benaphore_init(&lock, "block cache") < B_OK)
192 		return;
193 
194 	chunk_size = max_c(blockSize, B_PAGE_SIZE);
195 	chunks_per_range = kBlockRangeSize / chunk_size;
196 	range_mask = (1UL << chunks_per_range) - 1;
197 	chunk_mask = (1UL << (chunk_size / blockSize)) - 1;
198 
199 	register_low_memory_handler(&block_cache::LowMemoryHandler, this, 0);
200 }
201 
202 
203 block_cache::~block_cache()
204 {
205 #ifdef DEBUG_BLOCK_CACHE
206 	mutex_lock(&sCachesLock);
207 	sCaches.Remove(this);
208 	mutex_unlock(&sCachesLock);
209 #endif
210 
211 	unregister_low_memory_handler(&block_cache::LowMemoryHandler, this);
212 
213 	benaphore_destroy(&lock);
214 
215 	hash_uninit(ranges_hash);
216 	hash_uninit(transaction_hash);
217 	hash_uninit(hash);
218 }
219 
220 
221 status_t
222 block_cache::InitCheck()
223 {
224 	if (lock.sem < B_OK)
225 		return lock.sem;
226 
227 	if (hash == NULL || transaction_hash == NULL || ranges_hash == NULL)
228 		return B_NO_MEMORY;
229 
230 	return B_OK;
231 }
232 
233 
234 block_range *
235 block_cache::GetFreeRange()
236 {
237 	if (!free_ranges.IsEmpty())
238 		return free_ranges.First();
239 
240 	// we need to allocate a new range
241 	block_range *range;
242 	if (block_range::New(this, &range) != B_OK) {
243 		RemoveUnusedBlocks(2, 50);
244 
245 		if (!free_ranges.IsEmpty())
246 			return free_ranges.First();
247 
248 		RemoveUnusedBlocks(LONG_MAX, 50);
249 
250 		if (!free_ranges.IsEmpty())
251 			return free_ranges.First();
252 
253 		// TODO: We also need to free ranges from other caches to get a free one
254 		// (if not, an active volume might have stolen all free ranges already)
255 		return NULL;
256 	}
257 
258 	return range;
259 }
260 
261 
262 block_range *
263 block_cache::GetRange(void *address)
264 {
265 	return (block_range *)hash_lookup(ranges_hash, address);
266 }
267 
268 
269 void
270 block_cache::Free(void *address)
271 {
272 	if (address == NULL)
273 		return;
274 
275 	block_range *range = GetRange(address);
276 	if (range == NULL)
277 		panic("no range for address %p\n", address);
278 
279 	ASSERT(range != NULL);
280 	range->Free(this, address);
281 
282 	if (range->Unused(this))
283 		block_range::Delete(this, range);
284 }
285 
286 
287 void *
288 block_cache::Allocate()
289 {
290 	block_range *range = GetFreeRange();
291 	if (range == NULL)
292 		return NULL;
293 
294 	return range->Allocate(this);
295 }
296 
297 
298 void
299 block_cache::FreeBlock(cached_block *block)
300 {
301 	block_range *range = GetRange(block->current_data);
302 	ASSERT(range != NULL);
303 	range->Free(this, block);
304 
305 	if (block->original_data != NULL || block->parent_data != NULL) {
306 		panic("block_cache::FreeBlock(): %p, %p\n", block->original_data,
307 			block->parent_data);
308 	}
309 
310 #ifdef DEBUG_CHANGED
311 	Free(block->compare);
312 #endif
313 
314 	if (range->Unused(this))
315 		block_range::Delete(this, range);
316 
317 	delete block;
318 }
319 
320 
321 /*! Allocates a new block for \a blockNumber, ready for use */
322 cached_block *
323 block_cache::NewBlock(off_t blockNumber)
324 {
325 	cached_block *block = new(nothrow) cached_block;
326 	if (block == NULL) {
327 		FATAL(("could not allocate block!\n"));
328 		return NULL;
329 	}
330 
331 	block_range *range = GetFreeRange();
332 	if (range == NULL) {
333 		FATAL(("could not get range!\n"));
334 		delete block;
335 		return NULL;
336 	}
337 
338 	range->Allocate(this, block);
339 
340 	block->block_number = blockNumber;
341 	block->ref_count = 0;
342 	block->accessed = 0;
343 	block->transaction_next = NULL;
344 	block->transaction = block->previous_transaction = NULL;
345 	block->original_data = NULL;
346 	block->parent_data = NULL;
347 	block->is_dirty = false;
348 	block->unused = false;
349 #ifdef DEBUG_CHANGED
350 	block->compare = NULL;
351 #endif
352 
353 	return block;
354 }
355 
356 
357 void
358 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
359 {
360 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
361 
362 	for (block_list::Iterator it = unused_blocks.GetIterator();
363 		 cached_block *block = it.Next();) {
364 
365 		if (maxAccessed < block->accessed)
366 			continue;
367 
368 		TRACE(("  remove block %Ld, accessed %ld times\n",
369 			block->block_number, block->accessed));
370 
371 		// this can only happen if no transactions are used
372 		if (block->is_dirty)
373 			write_cached_block(this, block, false);
374 
375 		// remove block from lists
376 		it.Remove();
377 		hash_remove(hash, block);
378 
379 		FreeBlock(block);
380 
381 		if (--count <= 0)
382 			break;
383 	}
384 }
385 
386 
387 void
388 block_cache::LowMemoryHandler(void *data, int32 level)
389 {
390 	block_cache *cache = (block_cache *)data;
391 	BenaphoreLocker locker(&cache->lock);
392 
393 	if (!locker.IsLocked()) {
394 		// If our block_cache were deleted, it could be that we had
395 		// been called before that deletion went through, therefore,
396 		// acquiring its lock might fail.
397 		return;
398 	}
399 
400 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
401 
402 	// free some blocks according to the low memory state
403 	// (if there is enough memory left, we don't free any)
404 
405 	int32 free = 1;
406 	int32 accessed = 1;
407 	switch (vm_low_memory_state()) {
408 		case B_NO_LOW_MEMORY:
409 			return;
410 		case B_LOW_MEMORY_NOTE:
411 			free = 50;
412 			accessed = 2;
413 			break;
414 		case B_LOW_MEMORY_WARNING:
415 			free = 200;
416 			accessed = 10;
417 			break;
418 		case B_LOW_MEMORY_CRITICAL:
419 			free = LONG_MAX;
420 			accessed = LONG_MAX;
421 			break;
422 	}
423 
424 	cache->RemoveUnusedBlocks(accessed, free);
425 }
426 
427 
428 //	#pragma mark - private block functions
429 
430 
431 static void
432 put_cached_block(block_cache *cache, cached_block *block)
433 {
434 #ifdef DEBUG_CHANGED
435 	if (!block->is_dirty && block->compare != NULL
436 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
437 		dprintf("new block:\n");
438 		dump_block((const char *)block->current_data, 256, "  ");
439 		dprintf("unchanged block:\n");
440 		dump_block((const char *)block->compare, 256, "  ");
441 		write_cached_block(cache, block);
442 		panic("block_cache: supposed to be clean block was changed!\n");
443 
444 		cache->Free(block->compare);
445 		block->compare = NULL;
446 	}
447 #endif
448 
449 	if (block->ref_count < 1) {
450 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
451 		return;
452 	}
453 
454 	if (--block->ref_count == 0
455 		&& block->transaction == NULL
456 		&& block->previous_transaction == NULL) {
457 		// put this block in the list of unused blocks
458 		block->unused = true;
459 if (block->original_data != NULL || block->parent_data != NULL)
460 	panic("put_cached_block(): %p (%Ld): %p, %p\n", block, block->block_number, block->original_data, block->parent_data);
461 		cache->unused_blocks.Add(block);
462 //		block->current_data = cache->allocator->Release(block->current_data);
463 	}
464 
465 	// free some blocks according to the low memory state
466 	// (if there is enough memory left, we don't free any)
467 
468 	int32 free = 1;
469 	switch (vm_low_memory_state()) {
470 		case B_NO_LOW_MEMORY:
471 			return;
472 		case B_LOW_MEMORY_NOTE:
473 			free = 1;
474 			break;
475 		case B_LOW_MEMORY_WARNING:
476 			free = 5;
477 			break;
478 		case B_LOW_MEMORY_CRITICAL:
479 			free = 20;
480 			break;
481 	}
482 
483 	cache->RemoveUnusedBlocks(LONG_MAX, free);
484 }
485 
486 
487 static void
488 put_cached_block(block_cache *cache, off_t blockNumber)
489 {
490 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
491 		panic("put_cached_block: invalid block number %lld (max %lld)",
492 			blockNumber, cache->max_blocks - 1);
493 	}
494 
495 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
496 	if (block != NULL)
497 		put_cached_block(cache, block);
498 }
499 
500 
501 /*!
502 	Retrieves the block \a blockNumber from the hash table, if it's already
503 	there, or reads it from the disk.
504 
505 	\param _allocated tells you wether or not a new block has been allocated
506 		to satisfy your request.
507 	\param readBlock if \c false, the block will not be read in case it was
508 		not already in the cache. The block you retrieve may contain random
509 		data.
510 */
511 static cached_block *
512 get_cached_block(block_cache *cache, off_t blockNumber, bool *_allocated,
513 	bool readBlock = true)
514 {
515 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
516 		panic("get_cached_block: invalid block number %lld (max %lld)",
517 			blockNumber, cache->max_blocks - 1);
518 		return NULL;
519 	}
520 
521 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
522 		&blockNumber);
523 	*_allocated = false;
524 
525 	if (block == NULL) {
526 		// read block into cache
527 		block = cache->NewBlock(blockNumber);
528 		if (block == NULL)
529 			return NULL;
530 
531 		hash_insert(cache->hash, block);
532 		*_allocated = true;
533 	} else {
534 		// TODO: currently, the data is always mapped in
535 /*
536 		if (block->ref_count == 0 && block->current_data != NULL) {
537 			// see if the old block can be resurrected
538 			block->current_data = cache->allocator->Acquire(block->current_data);
539 		}
540 
541 		if (block->current_data == NULL) {
542 			// there is no block yet, but we need one
543 			block->current_data = cache->allocator->Get();
544 			if (block->current_data == NULL)
545 				return NULL;
546 
547 			*_allocated = true;
548 		}
549 */
550 	}
551 
552 	if (*_allocated && readBlock) {
553 		int32 blockSize = cache->block_size;
554 
555 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
556 			block->current_data, blockSize);
557 		if (bytesRead < blockSize) {
558 			hash_remove(cache->hash, block);
559 			cache->FreeBlock(block);
560 			FATAL(("could not read block %Ld: bytesRead: %ld, error: %s\n",
561 				blockNumber, bytesRead, strerror(errno)));
562 			return NULL;
563 		}
564 	}
565 
566 	if (block->unused) {
567 		//TRACE(("remove block %Ld from unused\n", blockNumber));
568 		block->unused = false;
569 		cache->unused_blocks.Remove(block);
570 	}
571 
572 	block->ref_count++;
573 	block->accessed++;
574 
575 	return block;
576 }
577 
578 
579 /*!
580 	Returns the writable block data for the requested blockNumber.
581 	If \a cleared is true, the block is not read from disk; an empty block
582 	is returned.
583 
584 	This is the only method to insert a block into a transaction. It makes
585 	sure that the previous block contents are preserved in that case.
586 */
587 static void *
588 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base,
589 	off_t length, int32 transactionID, bool cleared)
590 {
591 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
592 		blockNumber, transactionID));
593 
594 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
595 		panic("get_writable_cached_block: invalid block number %lld (max %lld)",
596 			blockNumber, cache->max_blocks - 1);
597 	}
598 
599 	bool allocated;
600 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
601 		!cleared);
602 	if (block == NULL)
603 		return NULL;
604 
605 	// if there is no transaction support, we just return the current block
606 	if (transactionID == -1) {
607 		if (cleared)
608 			memset(block->current_data, 0, cache->block_size);
609 
610 		block->is_dirty = true;
611 			// mark the block as dirty
612 
613 		return block->current_data;
614 	}
615 
616 	if (block->transaction != NULL && block->transaction->id != transactionID) {
617 		// ToDo: we have to wait here until the other transaction is done.
618 		//	Maybe we should even panic, since we can't prevent any deadlocks.
619 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
620 		put_cached_block(cache, block);
621 		return NULL;
622 	}
623 	if (block->transaction == NULL && transactionID != -1) {
624 		// get new transaction
625 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
626 		if (transaction == NULL) {
627 			panic("get_writable_cached_block(): invalid transaction %ld!\n",
628 				transactionID);
629 			put_cached_block(cache, block);
630 			return NULL;
631 		}
632 		if (!transaction->open) {
633 			panic("get_writable_cached_block(): transaction already done!\n");
634 			put_cached_block(cache, block);
635 			return NULL;
636 		}
637 
638 		block->transaction = transaction;
639 
640 		// attach the block to the transaction block list
641 		block->transaction_next = transaction->first_block;
642 		transaction->first_block = block;
643 		transaction->num_blocks++;
644 	}
645 
646 	if (!(allocated && cleared) && block->original_data == NULL) {
647 		// we already have data, so we need to preserve it
648 		block->original_data = cache->Allocate();
649 		if (block->original_data == NULL) {
650 			FATAL(("could not allocate original_data\n"));
651 			put_cached_block(cache, block);
652 			return NULL;
653 		}
654 
655 		memcpy(block->original_data, block->current_data, cache->block_size);
656 	}
657 	if (block->parent_data == block->current_data) {
658 		// remember any previous contents for the parent transaction
659 		block->parent_data = cache->Allocate();
660 		if (block->parent_data == NULL) {
661 			// TODO: maybe we should just continue the current transaction in this case...
662 			FATAL(("could not allocate parent\n"));
663 			put_cached_block(cache, block);
664 			return NULL;
665 		}
666 
667 		memcpy(block->parent_data, block->current_data, cache->block_size);
668 		block->transaction->sub_num_blocks++;
669 	}
670 
671 	if (cleared)
672 		memset(block->current_data, 0, cache->block_size);
673 
674 	block->is_dirty = true;
675 
676 	return block->current_data;
677 }
678 
679 
680 static status_t
681 write_cached_block(block_cache *cache, cached_block *block,
682 	bool deleteTransaction)
683 {
684 	cache_transaction *previous = block->previous_transaction;
685 	int32 blockSize = cache->block_size;
686 
687 	void *data = previous && block->original_data
688 		? block->original_data : block->current_data;
689 		// we first need to write back changes from previous transactions
690 
691 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
692 
693 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize,
694 		data, blockSize);
695 
696 	if (written < blockSize) {
697 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
698 			strerror(errno)));
699 		return B_IO_ERROR;
700 	}
701 
702 	if (data == block->current_data)
703 		block->is_dirty = false;
704 
705 	if (previous != NULL) {
706 		previous->blocks.Remove(block);
707 		block->previous_transaction = NULL;
708 
709 		// Has the previous transation been finished with that write?
710 		if (--previous->num_blocks == 0) {
711 			TRACE(("cache transaction %ld finished!\n", previous->id));
712 
713 			if (previous->notification_hook != NULL) {
714 				previous->notification_hook(previous->id,
715 					previous->notification_data);
716 			}
717 
718 			if (deleteTransaction) {
719 				hash_remove(cache->transaction_hash, previous);
720 				delete_transaction(cache, previous);
721 			}
722 		}
723 	}
724 
725 	return B_OK;
726 }
727 
728 
729 #ifdef DEBUG_BLOCK_CACHE
730 static int
731 dump_cache(int argc, char **argv)
732 {
733 	if (argc < 2) {
734 		kprintf("usage: %s [-b] <address>\n", argv[0]);
735 		return 0;
736 	}
737 
738 	bool showBlocks = false;
739 	int32 i = 1;
740 	if (!strcmp(argv[1], "-b")) {
741 		showBlocks = true;
742 		i++;
743 	}
744 
745 	block_cache *cache = (struct block_cache *)strtoul(argv[i], NULL, 0);
746 	if (cache == NULL) {
747 		kprintf("invalid cache address\n");
748 		return 0;
749 	}
750 
751 	kprintf("BLOCK CACHE: %p\n", cache);
752 
753 	kprintf(" fd:         %d\n", cache->fd);
754 	kprintf(" max_blocks: %Ld\n", cache->max_blocks);
755 	kprintf(" block_size: %lu\n", cache->block_size);
756 	kprintf(" next_transaction_id: %ld\n", cache->next_transaction_id);
757 	kprintf(" chunks_per_range: %lu\n", cache->chunks_per_range);
758 	kprintf(" chunks_size: %lu\n", cache->chunk_size);
759 	kprintf(" range_mask: %lu\n", cache->range_mask);
760 	kprintf(" chunks_mask: %lu\n", cache->chunk_mask);
761 
762 	if (showBlocks) {
763 		kprintf(" blocks:\n");
764 		kprintf("address  block no. current  original parent    refs access flags transact prev. trans\n");
765 	}
766 
767 	uint32 count = 0;
768 	hash_iterator iterator;
769 	hash_open(cache->hash, &iterator);
770 	cached_block *block;
771 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
772 		if (showBlocks) {
773 			kprintf("%08lx %9Ld %08lx %08lx %08lx %5ld %6ld %c%c%c%c%c %08lx %08lx\n",
774 				(addr_t)block, block->block_number, (addr_t)block->current_data,
775 				(addr_t)block->original_data, (addr_t)block->parent_data,
776 				block->ref_count, block->accessed, block->busy ? 'B' : '-',
777 				block->is_writing ? 'W' : '-', block->is_dirty ? 'B' : '-',
778 				block->unused ? 'U' : '-', block->unmapped ? 'M' : '-',
779 				(addr_t)block->transaction, (addr_t)block->previous_transaction);
780 		}
781 		count++;
782 	}
783 
784 	kprintf(" %ld blocks.\n", count);
785 
786 	hash_close(cache->hash, &iterator, false);
787 	return 0;
788 }
789 
790 
791 static int
792 dump_caches(int argc, char **argv)
793 {
794 	kprintf("Block caches:\n");
795 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
796 	while (i.HasNext()) {
797 		kprintf("  %p\n", i.Next());
798 	}
799 
800 	return 0;
801 }
802 #endif	// DEBUG_BLOCK_CACHE
803 
804 
805 extern "C" status_t
806 block_cache_init(void)
807 {
808 #ifdef DEBUG_BLOCK_CACHE
809 	mutex_init(&sCachesLock, "block caches");
810 	new (&sCaches) DoublyLinkedList<block_cache>;
811 		// manually call constructor
812 
813 	add_debugger_command("block_caches", &dump_caches, "dumps all block caches");
814 	add_debugger_command("block_cache", &dump_cache, "dumps a specific block cache");
815 #endif
816 
817 	return init_block_allocator();
818 }
819 
820 
821 //	#pragma mark - public transaction API
822 
823 
824 extern "C" int32
825 cache_start_transaction(void *_cache)
826 {
827 	block_cache *cache = (block_cache *)_cache;
828 	BenaphoreLocker locker(&cache->lock);
829 
830 	if (cache->last_transaction && cache->last_transaction->open) {
831 		panic("last transaction (%ld) still open!\n",
832 			cache->last_transaction->id);
833 	}
834 
835 	cache_transaction *transaction = new(nothrow) cache_transaction;
836 	if (transaction == NULL)
837 		return B_NO_MEMORY;
838 
839 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
840 	cache->last_transaction = transaction;
841 
842 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
843 
844 	hash_insert(cache->transaction_hash, transaction);
845 
846 	return transaction->id;
847 }
848 
849 
850 extern "C" status_t
851 cache_sync_transaction(void *_cache, int32 id)
852 {
853 	block_cache *cache = (block_cache *)_cache;
854 	BenaphoreLocker locker(&cache->lock);
855 	status_t status = B_ENTRY_NOT_FOUND;
856 
857 	hash_iterator iterator;
858 	hash_open(cache->transaction_hash, &iterator);
859 
860 	cache_transaction *transaction;
861 	while ((transaction = (cache_transaction *)hash_next(
862 			cache->transaction_hash, &iterator)) != NULL) {
863 		// close all earlier transactions which haven't been closed yet
864 
865 		if (transaction->id <= id && !transaction->open) {
866 			// write back all of their remaining dirty blocks
867 			while (transaction->num_blocks > 0) {
868 				status = write_cached_block(cache, transaction->blocks.Head(),
869 					false);
870 				if (status != B_OK)
871 					return status;
872 			}
873 
874 			hash_remove_current(cache->transaction_hash, &iterator);
875 			delete_transaction(cache, transaction);
876 		}
877 	}
878 
879 	hash_close(cache->transaction_hash, &iterator, false);
880 	return B_OK;
881 }
882 
883 
884 extern "C" status_t
885 cache_end_transaction(void *_cache, int32 id,
886 	transaction_notification_hook hook, void *data)
887 {
888 	block_cache *cache = (block_cache *)_cache;
889 	BenaphoreLocker locker(&cache->lock);
890 
891 	TRACE(("cache_end_transaction(id = %ld)\n", id));
892 
893 	cache_transaction *transaction = lookup_transaction(cache, id);
894 	if (transaction == NULL) {
895 		panic("cache_end_transaction(): invalid transaction ID\n");
896 		return B_BAD_VALUE;
897 	}
898 
899 	transaction->notification_hook = hook;
900 	transaction->notification_data = data;
901 
902 	// iterate through all blocks and free the unchanged original contents
903 
904 	cached_block *block = transaction->first_block, *next;
905 	for (; block != NULL; block = next) {
906 		next = block->transaction_next;
907 
908 		if (block->previous_transaction != NULL) {
909 			// need to write back pending changes
910 			write_cached_block(cache, block);
911 		}
912 
913 		if (block->original_data != NULL) {
914 			cache->Free(block->original_data);
915 			block->original_data = NULL;
916 		}
917 		if (transaction->has_sub_transaction) {
918 			if (block->parent_data != block->current_data)
919 				cache->Free(block->parent_data);
920 			block->parent_data = NULL;
921 		}
922 
923 		// move the block to the previous transaction list
924 		transaction->blocks.Add(block);
925 
926 		block->previous_transaction = transaction;
927 		block->transaction_next = NULL;
928 		block->transaction = NULL;
929 	}
930 
931 	transaction->open = false;
932 
933 	return B_OK;
934 }
935 
936 
937 extern "C" status_t
938 cache_abort_transaction(void *_cache, int32 id)
939 {
940 	block_cache *cache = (block_cache *)_cache;
941 	BenaphoreLocker locker(&cache->lock);
942 
943 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
944 
945 	cache_transaction *transaction = lookup_transaction(cache, id);
946 	if (transaction == NULL) {
947 		panic("cache_abort_transaction(): invalid transaction ID\n");
948 		return B_BAD_VALUE;
949 	}
950 
951 	// iterate through all blocks and restore their original contents
952 
953 	cached_block *block = transaction->first_block, *next;
954 	for (; block != NULL; block = next) {
955 		next = block->transaction_next;
956 
957 		if (block->original_data != NULL) {
958 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
959 				transaction->id, block->block_number));
960 			memcpy(block->current_data, block->original_data, cache->block_size);
961 			cache->Free(block->original_data);
962 			block->original_data = NULL;
963 		}
964 		if (transaction->has_sub_transaction) {
965 			if (block->parent_data != block->current_data)
966 				cache->Free(block->parent_data);
967 			block->parent_data = NULL;
968 		}
969 
970 		block->transaction_next = NULL;
971 		block->transaction = NULL;
972 	}
973 
974 	hash_remove(cache->transaction_hash, transaction);
975 	delete_transaction(cache, transaction);
976 	return B_OK;
977 }
978 
979 
980 /*!
981 	Acknowledges the current parent transaction, and starts a new transaction
982 	from its sub transaction.
983 	The new transaction also gets a new transaction ID.
984 */
985 extern "C" int32
986 cache_detach_sub_transaction(void *_cache, int32 id,
987 	transaction_notification_hook hook, void *data)
988 {
989 	block_cache *cache = (block_cache *)_cache;
990 	BenaphoreLocker locker(&cache->lock);
991 
992 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
993 
994 	cache_transaction *transaction = lookup_transaction(cache, id);
995 	if (transaction == NULL) {
996 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
997 		return B_BAD_VALUE;
998 	}
999 	if (!transaction->has_sub_transaction)
1000 		return B_BAD_VALUE;
1001 
1002 	// create a new transaction for the sub transaction
1003 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
1004 	if (transaction == NULL)
1005 		return B_NO_MEMORY;
1006 
1007 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
1008 
1009 	transaction->notification_hook = hook;
1010 	transaction->notification_data = data;
1011 
1012 	// iterate through all blocks and free the unchanged original contents
1013 
1014 	cached_block *block = transaction->first_block, *next, *last = NULL;
1015 	for (; block != NULL; block = next) {
1016 		next = block->transaction_next;
1017 
1018 		if (block->previous_transaction != NULL) {
1019 			// need to write back pending changes
1020 			write_cached_block(cache, block);
1021 		}
1022 
1023 		if (block->original_data != NULL && block->parent_data != NULL
1024 			&& block->parent_data != block->current_data) {
1025 			// free the original data if the parent data of the transaction
1026 			// will be made current - but keep them otherwise
1027 			cache->Free(block->original_data);
1028 			block->original_data = NULL;
1029 		}
1030 		if (block->parent_data != NULL
1031 			&& block->parent_data != block->current_data) {
1032 			// we need to move this block over to the new transaction
1033 			block->original_data = block->parent_data;
1034 			if (last == NULL)
1035 				newTransaction->first_block = block;
1036 			else
1037 				last->transaction_next = block;
1038 
1039 			last = block;
1040 		}
1041 		block->parent_data = NULL;
1042 
1043 		// move the block to the previous transaction list
1044 		transaction->blocks.Add(block);
1045 
1046 		block->previous_transaction = transaction;
1047 		block->transaction_next = NULL;
1048 		block->transaction = newTransaction;
1049 	}
1050 
1051 	transaction->open = false;
1052 
1053 	hash_insert(cache->transaction_hash, newTransaction);
1054 	cache->last_transaction = newTransaction;
1055 
1056 	return B_OK;
1057 }
1058 
1059 
1060 extern "C" status_t
1061 cache_abort_sub_transaction(void *_cache, int32 id)
1062 {
1063 	block_cache *cache = (block_cache *)_cache;
1064 	BenaphoreLocker locker(&cache->lock);
1065 
1066 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
1067 
1068 	cache_transaction *transaction = lookup_transaction(cache, id);
1069 	if (transaction == NULL) {
1070 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
1071 		return B_BAD_VALUE;
1072 	}
1073 	if (!transaction->has_sub_transaction)
1074 		return B_BAD_VALUE;
1075 
1076 	// revert all changes back to the version of the parent
1077 
1078 	cached_block *block = transaction->first_block, *next;
1079 	for (; block != NULL; block = next) {
1080 		next = block->transaction_next;
1081 
1082 		if (block->parent_data == NULL) {
1083 			if (block->original_data != NULL) {
1084 				// the parent transaction didn't change the block, but the sub
1085 				// transaction did - we need to revert from the original data
1086 				memcpy(block->current_data, block->original_data,
1087 					cache->block_size);
1088 			}
1089 		} else if (block->parent_data != block->current_data) {
1090 			// the block has been changed and must be restored
1091 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
1092 				transaction->id, block->block_number));
1093 			memcpy(block->current_data, block->parent_data, cache->block_size);
1094 			cache->Free(block->parent_data);
1095 		}
1096 
1097 		block->parent_data = NULL;
1098 	}
1099 
1100 	// all subsequent changes will go into the main transaction
1101 	transaction->has_sub_transaction = false;
1102 	return B_OK;
1103 }
1104 
1105 
1106 extern "C" status_t
1107 cache_start_sub_transaction(void *_cache, int32 id)
1108 {
1109 	block_cache *cache = (block_cache *)_cache;
1110 	BenaphoreLocker locker(&cache->lock);
1111 
1112 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
1113 
1114 	cache_transaction *transaction = lookup_transaction(cache, id);
1115 	if (transaction == NULL) {
1116 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
1117 		return B_BAD_VALUE;
1118 	}
1119 
1120 	// move all changed blocks up to the parent
1121 
1122 	cached_block *block = transaction->first_block, *next;
1123 	for (; block != NULL; block = next) {
1124 		next = block->transaction_next;
1125 
1126 		if (transaction->has_sub_transaction
1127 			&& block->parent_data != NULL
1128 			&& block->parent_data != block->current_data) {
1129 			// there already is an older sub transaction - we acknowledge
1130 			// its changes and move its blocks up to the parent
1131 			cache->Free(block->parent_data);
1132 		}
1133 
1134 		// we "allocate" the parent data lazily, that means, we don't copy
1135 		// the data (and allocate memory for it) until we need to
1136 		block->parent_data = block->current_data;
1137 	}
1138 
1139 	// all subsequent changes will go into the sub transaction
1140 	transaction->has_sub_transaction = true;
1141 	transaction->sub_num_blocks = 0;
1142 
1143 	return B_OK;
1144 }
1145 
1146 
1147 extern "C" status_t
1148 cache_next_block_in_transaction(void *_cache, int32 id, uint32 *_cookie,
1149 	off_t *_blockNumber, void **_data, void **_unchangedData)
1150 {
1151 	cached_block *block = (cached_block *)*_cookie;
1152 	block_cache *cache = (block_cache *)_cache;
1153 
1154 	BenaphoreLocker locker(&cache->lock);
1155 
1156 	cache_transaction *transaction = lookup_transaction(cache, id);
1157 	if (transaction == NULL)
1158 		return B_BAD_VALUE;
1159 
1160 	if (block == NULL)
1161 		block = transaction->first_block;
1162 	else
1163 		block = block->transaction_next;
1164 
1165 	if (block == NULL)
1166 		return B_ENTRY_NOT_FOUND;
1167 
1168 	if (_blockNumber)
1169 		*_blockNumber = block->block_number;
1170 	if (_data)
1171 		*_data = block->current_data;
1172 	if (_unchangedData)
1173 		*_unchangedData = block->original_data;
1174 
1175 	*_cookie = (uint32)block;
1176 	return B_OK;
1177 }
1178 
1179 
1180 extern "C" int32
1181 cache_blocks_in_transaction(void *_cache, int32 id)
1182 {
1183 	block_cache *cache = (block_cache *)_cache;
1184 	BenaphoreLocker locker(&cache->lock);
1185 
1186 	cache_transaction *transaction = lookup_transaction(cache, id);
1187 	if (transaction == NULL)
1188 		return B_BAD_VALUE;
1189 
1190 	return transaction->num_blocks;
1191 }
1192 
1193 
1194 extern "C" int32
1195 cache_blocks_in_sub_transaction(void *_cache, int32 id)
1196 {
1197 	block_cache *cache = (block_cache *)_cache;
1198 	BenaphoreLocker locker(&cache->lock);
1199 
1200 	cache_transaction *transaction = lookup_transaction(cache, id);
1201 	if (transaction == NULL)
1202 		return B_BAD_VALUE;
1203 
1204 	return transaction->sub_num_blocks;
1205 }
1206 
1207 
1208 //	#pragma mark - public block cache API
1209 //	public interface
1210 
1211 
1212 extern "C" void
1213 block_cache_delete(void *_cache, bool allowWrites)
1214 {
1215 	block_cache *cache = (block_cache *)_cache;
1216 
1217 	if (allowWrites)
1218 		block_cache_sync(cache);
1219 
1220 	BenaphoreLocker locker(&cache->lock);
1221 
1222 	// free all blocks
1223 
1224 	uint32 cookie = 0;
1225 	cached_block *block;
1226 	while ((block = (cached_block *)hash_remove_first(cache->hash,
1227 			&cookie)) != NULL) {
1228 		cache->FreeBlock(block);
1229 	}
1230 
1231 	// free all transactions (they will all be aborted)
1232 
1233 	cookie = 0;
1234 	cache_transaction *transaction;
1235 	while ((transaction = (cache_transaction *)hash_remove_first(
1236 			cache->transaction_hash, &cookie)) != NULL) {
1237 		delete transaction;
1238 	}
1239 
1240 	delete cache;
1241 }
1242 
1243 
1244 extern "C" void *
1245 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
1246 {
1247 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
1248 		readOnly);
1249 	if (cache == NULL)
1250 		return NULL;
1251 
1252 	if (cache->InitCheck() != B_OK) {
1253 		delete cache;
1254 		return NULL;
1255 	}
1256 
1257 	return cache;
1258 }
1259 
1260 
1261 extern "C" status_t
1262 block_cache_sync(void *_cache)
1263 {
1264 	block_cache *cache = (block_cache *)_cache;
1265 
1266 	// we will sync all dirty blocks to disk that have a completed
1267 	// transaction or no transaction only
1268 
1269 	BenaphoreLocker locker(&cache->lock);
1270 	hash_iterator iterator;
1271 	hash_open(cache->hash, &iterator);
1272 
1273 	cached_block *block;
1274 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1275 		if (block->previous_transaction != NULL
1276 			|| (block->transaction == NULL && block->is_dirty)) {
1277 			status_t status = write_cached_block(cache, block);
1278 			if (status != B_OK)
1279 				return status;
1280 		}
1281 	}
1282 
1283 	hash_close(cache->hash, &iterator, false);
1284 	return B_OK;
1285 }
1286 
1287 
1288 extern "C" status_t
1289 block_cache_sync_etc(void *_cache, off_t blockNumber, size_t numBlocks)
1290 {
1291 	block_cache *cache = (block_cache *)_cache;
1292 
1293 	// we will sync all dirty blocks to disk that have a completed
1294 	// transaction or no transaction only
1295 
1296 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1297 		panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
1298 			blockNumber, cache->max_blocks - 1);
1299 		return B_BAD_VALUE;
1300 	}
1301 
1302 	BenaphoreLocker locker(&cache->lock);
1303 
1304 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
1305 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1306 			&blockNumber);
1307 		if (block == NULL)
1308 			continue;
1309 		if (block->previous_transaction != NULL
1310 			|| (block->transaction == NULL && block->is_dirty)) {
1311 			status_t status = write_cached_block(cache, block);
1312 			if (status != B_OK)
1313 				return status;
1314 		}
1315 	}
1316 
1317 	return B_OK;
1318 }
1319 
1320 
1321 extern "C" status_t
1322 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
1323 {
1324 	block_cache *cache = (block_cache *)_cache;
1325 	BenaphoreLocker locker(&cache->lock);
1326 
1327 	if (cache->read_only)
1328 		panic("tried to make block writable on a read-only cache!");
1329 
1330 	// ToDo: this can be done better!
1331 	void *block = get_writable_cached_block(cache, blockNumber,
1332 		blockNumber, 1, transaction, false);
1333 	if (block != NULL) {
1334 		put_cached_block((block_cache *)_cache, blockNumber);
1335 		return B_OK;
1336 	}
1337 
1338 	return B_ERROR;
1339 }
1340 
1341 
1342 extern "C" void *
1343 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base,
1344 	off_t length, int32 transaction)
1345 {
1346 	block_cache *cache = (block_cache *)_cache;
1347 	BenaphoreLocker locker(&cache->lock);
1348 
1349 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1350 		blockNumber, transaction));
1351 	if (cache->read_only)
1352 		panic("tried to get writable block on a read-only cache!");
1353 
1354 	return get_writable_cached_block(cache, blockNumber, base, length,
1355 		transaction, false);
1356 }
1357 
1358 
1359 extern "C" void *
1360 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
1361 {
1362 	return block_cache_get_writable_etc(_cache, blockNumber,
1363 		blockNumber, 1, transaction);
1364 }
1365 
1366 
1367 extern "C" void *
1368 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
1369 {
1370 	block_cache *cache = (block_cache *)_cache;
1371 	BenaphoreLocker locker(&cache->lock);
1372 
1373 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1374 		blockNumber, transaction));
1375 	if (cache->read_only)
1376 		panic("tried to get empty writable block on a read-only cache!");
1377 
1378 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1379 				blockNumber, 1, transaction, true);
1380 }
1381 
1382 
1383 extern "C" const void *
1384 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
1385 {
1386 	block_cache *cache = (block_cache *)_cache;
1387 	BenaphoreLocker locker(&cache->lock);
1388 	bool allocated;
1389 
1390 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1391 	if (block == NULL)
1392 		return NULL;
1393 
1394 #ifdef DEBUG_CHANGED
1395 	if (block->compare == NULL)
1396 		block->compare = cache->Allocate();
1397 	if (block->compare != NULL)
1398 		memcpy(block->compare, block->current_data, cache->block_size);
1399 #endif
1400 	return block->current_data;
1401 }
1402 
1403 
1404 extern "C" const void *
1405 block_cache_get(void *_cache, off_t blockNumber)
1406 {
1407 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1408 }
1409 
1410 
1411 /*!
1412 	Changes the internal status of a writable block to \a dirty. This can be
1413 	helpful in case you realize you don't need to change that block anymore
1414 	for whatever reason.
1415 
1416 	Note, you must only use this function on blocks that were acquired
1417 	writable!
1418 */
1419 extern "C" status_t
1420 block_cache_set_dirty(void *_cache, off_t blockNumber, bool dirty,
1421 	int32 transaction)
1422 {
1423 	block_cache *cache = (block_cache *)_cache;
1424 	BenaphoreLocker locker(&cache->lock);
1425 
1426 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
1427 		&blockNumber);
1428 	if (block == NULL)
1429 		return B_BAD_VALUE;
1430 	if (block->is_dirty == dirty) {
1431 		// there is nothing to do for us
1432 		return B_OK;
1433 	}
1434 
1435 	// TODO: not yet implemented
1436 	if (dirty)
1437 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
1438 
1439 	return B_OK;
1440 }
1441 
1442 
1443 extern "C" void
1444 block_cache_put(void *_cache, off_t blockNumber)
1445 {
1446 	block_cache *cache = (block_cache *)_cache;
1447 	BenaphoreLocker locker(&cache->lock);
1448 
1449 	put_cached_block(cache, blockNumber);
1450 }
1451 
1452