xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 5115ca085884f7b604a3d607688f0ca20fb7cf57)
1 /*
2  * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "block_cache_private.h"
8 
9 #include <KernelExport.h>
10 #include <fs_cache.h>
11 
12 #include <block_cache.h>
13 #include <lock.h>
14 #include <vm_low_memory.h>
15 #include <util/kernel_cpp.h>
16 #include <util/DoublyLinkedList.h>
17 #include <util/AutoLock.h>
18 #include <util/khash.h>
19 
20 #include <unistd.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 
26 // TODO: this is a naive but growing implementation to test the API:
27 //	1) block reading/writing is not at all optimized for speed, it will
28 //	   just read and write single blocks.
29 //	2) the locking could be improved; getting a block should not need to
30 //	   wait for blocks to be written
31 //	3) dirty blocks are only written back if asked for
32 // TODO: the retrieval/copy of the original data could be delayed until the
33 //		new data must be written, ie. in low memory situations.
34 
35 //#define TRACE_BLOCK_CACHE
36 #ifdef TRACE_BLOCK_CACHE
37 #	define TRACE(x)	dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 // This macro is used for fatal situations that are acceptable in a running
43 // system, like out of memory situations - should only panic for debugging.
44 #define FATAL(x) panic x
45 
46 struct cache_transaction {
47 	cache_transaction();
48 
49 	cache_transaction *next;
50 	int32			id;
51 	int32			num_blocks;
52 	int32			sub_num_blocks;
53 	cached_block	*first_block;
54 	block_list		blocks;
55 	transaction_notification_hook notification_hook;
56 	void			*notification_data;
57 	bool			open;
58 	bool			has_sub_transaction;
59 };
60 
61 static status_t write_cached_block(block_cache *cache, cached_block *block,
62 	bool deleteTransaction = true);
63 
64 
65 //	#pragma mark - private transaction
66 
67 
68 cache_transaction::cache_transaction()
69 {
70 	num_blocks = 0;
71 	sub_num_blocks = 0;
72 	first_block = NULL;
73 	notification_hook = NULL;
74 	notification_data = NULL;
75 	open = true;
76 }
77 
78 
79 static int
80 transaction_compare(void *_transaction, const void *_id)
81 {
82 	cache_transaction *transaction = (cache_transaction *)_transaction;
83 	const int32 *id = (const int32 *)_id;
84 
85 	return transaction->id - *id;
86 }
87 
88 
89 static uint32
90 transaction_hash(void *_transaction, const void *_id, uint32 range)
91 {
92 	cache_transaction *transaction = (cache_transaction *)_transaction;
93 	const int32 *id = (const int32 *)_id;
94 
95 	if (transaction != NULL)
96 		return transaction->id % range;
97 
98 	return (uint32)*id % range;
99 }
100 
101 
102 static void
103 delete_transaction(block_cache *cache, cache_transaction *transaction)
104 {
105 	if (cache->last_transaction == transaction)
106 		cache->last_transaction = NULL;
107 
108 	delete transaction;
109 }
110 
111 
112 static cache_transaction *
113 lookup_transaction(block_cache *cache, int32 id)
114 {
115 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
116 }
117 
118 
119 //	#pragma mark - cached_block
120 
121 
122 /* static */
123 int
124 cached_block::Compare(void *_cacheEntry, const void *_block)
125 {
126 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
127 	const off_t *block = (const off_t *)_block;
128 
129 	return cacheEntry->block_number - *block;
130 }
131 
132 
133 
134 /* static */
135 uint32
136 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
137 {
138 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
139 	const off_t *block = (const off_t *)_block;
140 
141 	if (cacheEntry != NULL)
142 		return cacheEntry->block_number % range;
143 
144 	return (uint64)*block % range;
145 }
146 
147 
148 //	#pragma mark - block_cache
149 
150 
151 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
152 		bool readOnly)
153 	:
154 	hash(NULL),
155 	fd(_fd),
156 	max_blocks(numBlocks),
157 	block_size(blockSize),
158 	next_transaction_id(1),
159 	last_transaction(NULL),
160 	transaction_hash(NULL),
161 	ranges_hash(NULL),
162 	read_only(readOnly)
163 {
164 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
165 	if (hash == NULL)
166 		return;
167 
168 	transaction_hash = hash_init(16, 0, &transaction_compare,
169 		&::transaction_hash);
170 	if (transaction_hash == NULL)
171 		return;
172 
173 	ranges_hash = hash_init(16, 0, &block_range::Compare, &block_range::Hash);
174 	if (ranges_hash == NULL)
175 		return;
176 
177 	if (benaphore_init(&lock, "block cache") < B_OK)
178 		return;
179 
180 	chunk_size = max_c(blockSize, B_PAGE_SIZE);
181 	chunks_per_range = kBlockRangeSize / chunk_size;
182 	range_mask = (1UL << chunks_per_range) - 1;
183 	chunk_mask = (1UL << (chunk_size / blockSize)) - 1;
184 
185 	register_low_memory_handler(&block_cache::LowMemoryHandler, this, 0);
186 }
187 
188 
189 block_cache::~block_cache()
190 {
191 	unregister_low_memory_handler(&block_cache::LowMemoryHandler, this);
192 
193 	benaphore_destroy(&lock);
194 
195 	hash_uninit(ranges_hash);
196 	hash_uninit(transaction_hash);
197 	hash_uninit(hash);
198 }
199 
200 
201 status_t
202 block_cache::InitCheck()
203 {
204 	if (lock.sem < B_OK)
205 		return lock.sem;
206 
207 	if (hash == NULL || transaction_hash == NULL || ranges_hash == NULL)
208 		return B_NO_MEMORY;
209 
210 	return B_OK;
211 }
212 
213 
214 block_range *
215 block_cache::GetFreeRange()
216 {
217 	if (!free_ranges.IsEmpty())
218 		return free_ranges.First();
219 
220 	// we need to allocate a new range
221 	block_range *range;
222 	if (block_range::New(this, &range) != B_OK) {
223 		RemoveUnusedBlocks(2, 50);
224 
225 		if (!free_ranges.IsEmpty())
226 			return free_ranges.First();
227 
228 		RemoveUnusedBlocks(LONG_MAX, 50);
229 
230 		if (!free_ranges.IsEmpty())
231 			return free_ranges.First();
232 
233 		// TODO: We also need to free ranges from other caches to get a free one
234 		// (if not, an active volume might have stolen all free ranges already)
235 		return NULL;
236 	}
237 
238 	return range;
239 }
240 
241 
242 block_range *
243 block_cache::GetRange(void *address)
244 {
245 	return (block_range *)hash_lookup(ranges_hash, address);
246 }
247 
248 
249 void
250 block_cache::Free(void *address)
251 {
252 	if (address == NULL)
253 		return;
254 
255 	block_range *range = GetRange(address);
256 	if (range == NULL)
257 		panic("no range for address %p\n", address);
258 
259 	ASSERT(range != NULL);
260 	range->Free(this, address);
261 
262 	if (range->Unused(this))
263 		block_range::Delete(this, range);
264 }
265 
266 
267 void *
268 block_cache::Allocate()
269 {
270 	block_range *range = GetFreeRange();
271 	if (range == NULL)
272 		return NULL;
273 
274 	return range->Allocate(this);
275 }
276 
277 
278 void
279 block_cache::FreeBlock(cached_block *block)
280 {
281 	block_range *range = GetRange(block->current_data);
282 	ASSERT(range != NULL);
283 	range->Free(this, block);
284 
285 	if (block->original_data != NULL || block->parent_data != NULL) {
286 		panic("block_cache::FreeBlock(): %p, %p\n", block->original_data,
287 			block->parent_data);
288 	}
289 
290 #ifdef DEBUG_CHANGED
291 	Free(block->compare);
292 #endif
293 
294 	if (range->Unused(this))
295 		block_range::Delete(this, range);
296 
297 	delete block;
298 }
299 
300 
301 /*! Allocates a new block for \a blockNumber, ready for use */
302 cached_block *
303 block_cache::NewBlock(off_t blockNumber)
304 {
305 	cached_block *block = new(nothrow) cached_block;
306 	if (block == NULL) {
307 		FATAL(("could not allocate block!\n"));
308 		return NULL;
309 	}
310 
311 	block_range *range = GetFreeRange();
312 	if (range == NULL) {
313 		FATAL(("could not get range!\n"));
314 		delete block;
315 		return NULL;
316 	}
317 
318 	range->Allocate(this, block);
319 
320 	block->block_number = blockNumber;
321 	block->ref_count = 0;
322 	block->accessed = 0;
323 	block->transaction_next = NULL;
324 	block->transaction = block->previous_transaction = NULL;
325 	block->original_data = NULL;
326 	block->parent_data = NULL;
327 	block->is_dirty = false;
328 	block->unused = false;
329 #ifdef DEBUG_CHANGED
330 	block->compare = NULL;
331 #endif
332 
333 	return block;
334 }
335 
336 
337 void
338 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
339 {
340 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
341 
342 	for (block_list::Iterator it = unused_blocks.GetIterator();
343 		 cached_block *block = it.Next();) {
344 
345 		if (maxAccessed < block->accessed)
346 			continue;
347 
348 		TRACE(("  remove block %Ld, accessed %ld times\n",
349 			block->block_number, block->accessed));
350 
351 		// this can only happen if no transactions are used
352 		if (block->is_dirty)
353 			write_cached_block(this, block, false);
354 
355 		// remove block from lists
356 		it.Remove();
357 		hash_remove(hash, block);
358 
359 		FreeBlock(block);
360 
361 		if (--count <= 0)
362 			break;
363 	}
364 }
365 
366 
367 void
368 block_cache::LowMemoryHandler(void *data, int32 level)
369 {
370 	block_cache *cache = (block_cache *)data;
371 	BenaphoreLocker locker(&cache->lock);
372 
373 	if (!locker.IsLocked()) {
374 		// If our block_cache were deleted, it could be that we had
375 		// been called before that deletion went through, therefore,
376 		// acquiring its lock might fail.
377 		return;
378 	}
379 
380 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
381 
382 	// free some blocks according to the low memory state
383 	// (if there is enough memory left, we don't free any)
384 
385 	int32 free = 1;
386 	int32 accessed = 1;
387 	switch (vm_low_memory_state()) {
388 		case B_NO_LOW_MEMORY:
389 			return;
390 		case B_LOW_MEMORY_NOTE:
391 			free = 50;
392 			accessed = 2;
393 			break;
394 		case B_LOW_MEMORY_WARNING:
395 			free = 200;
396 			accessed = 10;
397 			break;
398 		case B_LOW_MEMORY_CRITICAL:
399 			free = LONG_MAX;
400 			accessed = LONG_MAX;
401 			break;
402 	}
403 
404 	cache->RemoveUnusedBlocks(accessed, free);
405 }
406 
407 
408 //	#pragma mark - private block functions
409 
410 
411 static void
412 put_cached_block(block_cache *cache, cached_block *block)
413 {
414 #ifdef DEBUG_CHANGED
415 	if (!block->is_dirty && block->compare != NULL
416 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
417 		dprintf("new block:\n");
418 		dump_block((const char *)block->current_data, 256, "  ");
419 		dprintf("unchanged block:\n");
420 		dump_block((const char *)block->compare, 256, "  ");
421 		write_cached_block(cache, block);
422 		panic("block_cache: supposed to be clean block was changed!\n");
423 
424 		cache->Free(block->compare);
425 		block->compare = NULL;
426 	}
427 #endif
428 
429 	if (--block->ref_count == 0
430 		&& block->transaction == NULL
431 		&& block->previous_transaction == NULL) {
432 		// put this block in the list of unused blocks
433 		block->unused = true;
434 		cache->unused_blocks.Add(block);
435 //		block->current_data = cache->allocator->Release(block->current_data);
436 	}
437 
438 	// free some blocks according to the low memory state
439 	// (if there is enough memory left, we don't free any)
440 
441 	int32 free = 1;
442 	switch (vm_low_memory_state()) {
443 		case B_NO_LOW_MEMORY:
444 			return;
445 		case B_LOW_MEMORY_NOTE:
446 			free = 1;
447 			break;
448 		case B_LOW_MEMORY_WARNING:
449 			free = 5;
450 			break;
451 		case B_LOW_MEMORY_CRITICAL:
452 			free = 20;
453 			break;
454 	}
455 
456 	cache->RemoveUnusedBlocks(LONG_MAX, free);
457 }
458 
459 
460 static void
461 put_cached_block(block_cache *cache, off_t blockNumber)
462 {
463 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
464 		panic("put_cached_block: invalid block number %lld (max %lld)",
465 			blockNumber, cache->max_blocks - 1);
466 	}
467 
468 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
469 	if (block != NULL)
470 		put_cached_block(cache, block);
471 }
472 
473 
474 /*!
475 	Retrieves the block \a blockNumber from the hash table, if it's already
476 	there, or reads it from the disk.
477 
478 	\param _allocated tells you wether or not a new block has been allocated
479 		to satisfy your request.
480 	\param readBlock if \c false, the block will not be read in case it was
481 		not already in the cache. The block you retrieve may contain random
482 		data.
483 */
484 static cached_block *
485 get_cached_block(block_cache *cache, off_t blockNumber, bool *_allocated,
486 	bool readBlock = true)
487 {
488 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
489 		panic("get_cached_block: invalid block number %lld (max %lld)",
490 			blockNumber, cache->max_blocks - 1);
491 		return NULL;
492 	}
493 
494 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
495 		&blockNumber);
496 	*_allocated = false;
497 
498 	if (block == NULL) {
499 		// read block into cache
500 		block = cache->NewBlock(blockNumber);
501 		if (block == NULL)
502 			return NULL;
503 
504 		hash_insert(cache->hash, block);
505 		*_allocated = true;
506 	} else {
507 		// TODO: currently, the data is always mapped in
508 /*
509 		if (block->ref_count == 0 && block->current_data != NULL) {
510 			// see if the old block can be resurrected
511 			block->current_data = cache->allocator->Acquire(block->current_data);
512 		}
513 
514 		if (block->current_data == NULL) {
515 			// there is no block yet, but we need one
516 			block->current_data = cache->allocator->Get();
517 			if (block->current_data == NULL)
518 				return NULL;
519 
520 			*_allocated = true;
521 		}
522 */
523 	}
524 
525 	if (*_allocated && readBlock) {
526 		int32 blockSize = cache->block_size;
527 
528 		if (read_pos(cache->fd, blockNumber * blockSize, block->current_data,
529 				blockSize) < blockSize) {
530 			hash_remove(cache->hash, block);
531 			cache->FreeBlock(block);
532 			FATAL(("could not read block %Ld\n", blockNumber));
533 			return NULL;
534 		}
535 	}
536 
537 	if (block->unused) {
538 		//TRACE(("remove block %Ld from unused\n", blockNumber));
539 		block->unused = false;
540 		cache->unused_blocks.Remove(block);
541 	}
542 
543 	block->ref_count++;
544 	block->accessed++;
545 
546 	return block;
547 }
548 
549 
550 /*!
551 	Returns the writable block data for the requested blockNumber.
552 	If \a cleared is true, the block is not read from disk; an empty block
553 	is returned.
554 
555 	This is the only method to insert a block into a transaction. It makes
556 	sure that the previous block contents are preserved in that case.
557 */
558 static void *
559 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base,
560 	off_t length, int32 transactionID, bool cleared)
561 {
562 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
563 		blockNumber, transactionID));
564 
565 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
566 		panic("get_writable_cached_block: invalid block number %lld (max %lld)",
567 			blockNumber, cache->max_blocks - 1);
568 	}
569 
570 	bool allocated;
571 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
572 		!cleared);
573 	if (block == NULL)
574 		return NULL;
575 
576 	// if there is no transaction support, we just return the current block
577 	if (transactionID == -1) {
578 		if (cleared)
579 			memset(block->current_data, 0, cache->block_size);
580 
581 		block->is_dirty = true;
582 			// mark the block as dirty
583 
584 		return block->current_data;
585 	}
586 
587 	if (block->transaction != NULL && block->transaction->id != transactionID) {
588 		// ToDo: we have to wait here until the other transaction is done.
589 		//	Maybe we should even panic, since we can't prevent any deadlocks.
590 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
591 		put_cached_block(cache, block);
592 		return NULL;
593 	}
594 	if (block->transaction == NULL && transactionID != -1) {
595 		// get new transaction
596 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
597 		if (transaction == NULL) {
598 			panic("get_writable_cached_block(): invalid transaction %ld!\n",
599 				transactionID);
600 			put_cached_block(cache, block);
601 			return NULL;
602 		}
603 		if (!transaction->open) {
604 			panic("get_writable_cached_block(): transaction already done!\n");
605 			put_cached_block(cache, block);
606 			return NULL;
607 		}
608 
609 		block->transaction = transaction;
610 
611 		// attach the block to the transaction block list
612 		block->transaction_next = transaction->first_block;
613 		transaction->first_block = block;
614 		transaction->num_blocks++;
615 	}
616 
617 	if (!(allocated && cleared) && block->original_data == NULL) {
618 		// we already have data, so we need to preserve it
619 		block->original_data = cache->Allocate();
620 		if (block->original_data == NULL) {
621 			FATAL(("could not allocate original_data\n"));
622 			put_cached_block(cache, block);
623 			return NULL;
624 		}
625 
626 		memcpy(block->original_data, block->current_data, cache->block_size);
627 	}
628 	if (block->parent_data == block->current_data) {
629 		// remember any previous contents for the parent transaction
630 		block->parent_data = cache->Allocate();
631 		if (block->parent_data == NULL) {
632 			// TODO: maybe we should just continue the current transaction in this case...
633 			FATAL(("could not allocate parent\n"));
634 			put_cached_block(cache, block);
635 			return NULL;
636 		}
637 
638 		memcpy(block->parent_data, block->current_data, cache->block_size);
639 		block->transaction->sub_num_blocks++;
640 	}
641 
642 	if (cleared)
643 		memset(block->current_data, 0, cache->block_size);
644 
645 	block->is_dirty = true;
646 
647 	return block->current_data;
648 }
649 
650 
651 static status_t
652 write_cached_block(block_cache *cache, cached_block *block,
653 	bool deleteTransaction)
654 {
655 	cache_transaction *previous = block->previous_transaction;
656 	int32 blockSize = cache->block_size;
657 
658 	void *data = previous && block->original_data
659 		? block->original_data : block->current_data;
660 		// we first need to write back changes from previous transactions
661 
662 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
663 
664 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize,
665 		data, blockSize);
666 
667 	if (written < blockSize) {
668 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
669 			strerror(errno)));
670 		return B_IO_ERROR;
671 	}
672 
673 	if (data == block->current_data)
674 		block->is_dirty = false;
675 
676 	if (previous != NULL) {
677 		previous->blocks.Remove(block);
678 		block->previous_transaction = NULL;
679 
680 		// Has the previous transation been finished with that write?
681 		if (--previous->num_blocks == 0) {
682 			TRACE(("cache transaction %ld finished!\n", previous->id));
683 
684 			if (previous->notification_hook != NULL) {
685 				previous->notification_hook(previous->id,
686 					previous->notification_data);
687 			}
688 
689 			if (deleteTransaction) {
690 				hash_remove(cache->transaction_hash, previous);
691 				delete_transaction(cache, previous);
692 			}
693 		}
694 	}
695 
696 	return B_OK;
697 }
698 
699 
700 extern "C" status_t
701 block_cache_init(void)
702 {
703 	return init_block_allocator();
704 }
705 
706 
707 //	#pragma mark - public transaction API
708 
709 
710 extern "C" int32
711 cache_start_transaction(void *_cache)
712 {
713 	block_cache *cache = (block_cache *)_cache;
714 	BenaphoreLocker locker(&cache->lock);
715 
716 	if (cache->last_transaction && cache->last_transaction->open) {
717 		panic("last transaction (%ld) still open!\n",
718 			cache->last_transaction->id);
719 	}
720 
721 	cache_transaction *transaction = new(nothrow) cache_transaction;
722 	if (transaction == NULL)
723 		return B_NO_MEMORY;
724 
725 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
726 	cache->last_transaction = transaction;
727 
728 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
729 
730 	hash_insert(cache->transaction_hash, transaction);
731 
732 	return transaction->id;
733 }
734 
735 
736 extern "C" status_t
737 cache_sync_transaction(void *_cache, int32 id)
738 {
739 	block_cache *cache = (block_cache *)_cache;
740 	BenaphoreLocker locker(&cache->lock);
741 	status_t status = B_ENTRY_NOT_FOUND;
742 
743 	hash_iterator iterator;
744 	hash_open(cache->transaction_hash, &iterator);
745 
746 	cache_transaction *transaction;
747 	while ((transaction = (cache_transaction *)hash_next(
748 			cache->transaction_hash, &iterator)) != NULL) {
749 		// close all earlier transactions which haven't been closed yet
750 
751 		if (transaction->id <= id && !transaction->open) {
752 			// write back all of their remaining dirty blocks
753 			while (transaction->num_blocks > 0) {
754 				status = write_cached_block(cache, transaction->blocks.Head(),
755 					false);
756 				if (status != B_OK)
757 					return status;
758 			}
759 
760 			hash_remove_current(cache->transaction_hash, &iterator);
761 			delete_transaction(cache, transaction);
762 		}
763 	}
764 
765 	hash_close(cache->transaction_hash, &iterator, false);
766 	return B_OK;
767 }
768 
769 
770 extern "C" status_t
771 cache_end_transaction(void *_cache, int32 id,
772 	transaction_notification_hook hook, void *data)
773 {
774 	block_cache *cache = (block_cache *)_cache;
775 	BenaphoreLocker locker(&cache->lock);
776 
777 	TRACE(("cache_end_transaction(id = %ld)\n", id));
778 
779 	cache_transaction *transaction = lookup_transaction(cache, id);
780 	if (transaction == NULL) {
781 		panic("cache_end_transaction(): invalid transaction ID\n");
782 		return B_BAD_VALUE;
783 	}
784 
785 	transaction->notification_hook = hook;
786 	transaction->notification_data = data;
787 
788 	// iterate through all blocks and free the unchanged original contents
789 
790 	cached_block *block = transaction->first_block, *next;
791 	for (; block != NULL; block = next) {
792 		next = block->transaction_next;
793 
794 		if (block->previous_transaction != NULL) {
795 			// need to write back pending changes
796 			write_cached_block(cache, block);
797 		}
798 
799 		if (block->original_data != NULL) {
800 			cache->Free(block->original_data);
801 			block->original_data = NULL;
802 		}
803 		if (transaction->has_sub_transaction) {
804 			if (block->parent_data != block->current_data)
805 				cache->Free(block->parent_data);
806 			block->parent_data = NULL;
807 		}
808 
809 		// move the block to the previous transaction list
810 		transaction->blocks.Add(block);
811 
812 		block->previous_transaction = transaction;
813 		block->transaction_next = NULL;
814 		block->transaction = NULL;
815 	}
816 
817 	transaction->open = false;
818 
819 	return B_OK;
820 }
821 
822 
823 extern "C" status_t
824 cache_abort_transaction(void *_cache, int32 id)
825 {
826 	block_cache *cache = (block_cache *)_cache;
827 	BenaphoreLocker locker(&cache->lock);
828 
829 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
830 
831 	cache_transaction *transaction = lookup_transaction(cache, id);
832 	if (transaction == NULL) {
833 		panic("cache_abort_transaction(): invalid transaction ID\n");
834 		return B_BAD_VALUE;
835 	}
836 
837 	// iterate through all blocks and restore their original contents
838 
839 	cached_block *block = transaction->first_block, *next;
840 	for (; block != NULL; block = next) {
841 		next = block->transaction_next;
842 
843 		if (block->original_data != NULL) {
844 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
845 				transaction->id, block->block_number));
846 			memcpy(block->current_data, block->original_data, cache->block_size);
847 			cache->Free(block->original_data);
848 			block->original_data = NULL;
849 		}
850 		if (transaction->has_sub_transaction) {
851 			if (block->parent_data != block->current_data)
852 				cache->Free(block->parent_data);
853 			block->parent_data = NULL;
854 		}
855 
856 		block->transaction_next = NULL;
857 		block->transaction = NULL;
858 	}
859 
860 	hash_remove(cache->transaction_hash, transaction);
861 	delete_transaction(cache, transaction);
862 	return B_OK;
863 }
864 
865 
866 /*!
867 	Acknowledges the current parent transaction, and starts a new transaction
868 	from its sub transaction.
869 	The new transaction also gets a new transaction ID.
870 */
871 extern "C" int32
872 cache_detach_sub_transaction(void *_cache, int32 id,
873 	transaction_notification_hook hook, void *data)
874 {
875 	block_cache *cache = (block_cache *)_cache;
876 	BenaphoreLocker locker(&cache->lock);
877 
878 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
879 
880 	cache_transaction *transaction = lookup_transaction(cache, id);
881 	if (transaction == NULL) {
882 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
883 		return B_BAD_VALUE;
884 	}
885 	if (!transaction->has_sub_transaction)
886 		return B_BAD_VALUE;
887 
888 	// create a new transaction for the sub transaction
889 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
890 	if (transaction == NULL)
891 		return B_NO_MEMORY;
892 
893 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
894 
895 	transaction->notification_hook = hook;
896 	transaction->notification_data = data;
897 
898 	// iterate through all blocks and free the unchanged original contents
899 
900 	cached_block *block = transaction->first_block, *next, *last = NULL;
901 	for (; block != NULL; block = next) {
902 		next = block->transaction_next;
903 
904 		if (block->previous_transaction != NULL) {
905 			// need to write back pending changes
906 			write_cached_block(cache, block);
907 		}
908 
909 		if (block->original_data != NULL && block->parent_data != NULL
910 			&& block->parent_data != block->current_data) {
911 			// free the original data if the parent data of the transaction
912 			// will be made current - but keep them otherwise
913 			cache->Free(block->original_data);
914 			block->original_data = NULL;
915 		}
916 		if (block->parent_data != NULL
917 			&& block->parent_data != block->current_data) {
918 			// we need to move this block over to the new transaction
919 			block->original_data = block->parent_data;
920 			if (last == NULL)
921 				newTransaction->first_block = block;
922 			else
923 				last->transaction_next = block;
924 
925 			last = block;
926 		}
927 		block->parent_data = NULL;
928 
929 		// move the block to the previous transaction list
930 		transaction->blocks.Add(block);
931 
932 		block->previous_transaction = transaction;
933 		block->transaction_next = NULL;
934 		block->transaction = newTransaction;
935 	}
936 
937 	transaction->open = false;
938 
939 	hash_insert(cache->transaction_hash, newTransaction);
940 	cache->last_transaction = newTransaction;
941 
942 	return B_OK;
943 }
944 
945 
946 extern "C" status_t
947 cache_abort_sub_transaction(void *_cache, int32 id)
948 {
949 	block_cache *cache = (block_cache *)_cache;
950 	BenaphoreLocker locker(&cache->lock);
951 
952 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
953 
954 	cache_transaction *transaction = lookup_transaction(cache, id);
955 	if (transaction == NULL) {
956 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
957 		return B_BAD_VALUE;
958 	}
959 	if (!transaction->has_sub_transaction)
960 		return B_BAD_VALUE;
961 
962 	// revert all changes back to the version of the parent
963 
964 	cached_block *block = transaction->first_block, *next;
965 	for (; block != NULL; block = next) {
966 		next = block->transaction_next;
967 
968 		if (block->parent_data == NULL) {
969 			if (block->original_data != NULL) {
970 				// the parent transaction didn't change the block, but the sub
971 				// transaction did - we need to revert from the original data
972 				memcpy(block->current_data, block->original_data,
973 					cache->block_size);
974 			}
975 		} else if (block->parent_data != block->current_data) {
976 			// the block has been changed and must be restored
977 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
978 				transaction->id, block->block_number));
979 			memcpy(block->current_data, block->parent_data, cache->block_size);
980 			cache->Free(block->parent_data);
981 		}
982 
983 		block->parent_data = NULL;
984 	}
985 
986 	// all subsequent changes will go into the main transaction
987 	transaction->has_sub_transaction = false;
988 	return B_OK;
989 }
990 
991 
992 extern "C" status_t
993 cache_start_sub_transaction(void *_cache, int32 id)
994 {
995 	block_cache *cache = (block_cache *)_cache;
996 	BenaphoreLocker locker(&cache->lock);
997 
998 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
999 
1000 	cache_transaction *transaction = lookup_transaction(cache, id);
1001 	if (transaction == NULL) {
1002 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
1003 		return B_BAD_VALUE;
1004 	}
1005 
1006 	// move all changed blocks up to the parent
1007 
1008 	cached_block *block = transaction->first_block, *next;
1009 	for (; block != NULL; block = next) {
1010 		next = block->transaction_next;
1011 
1012 		if (transaction->has_sub_transaction
1013 			&& block->parent_data != NULL
1014 			&& block->parent_data != block->current_data) {
1015 			// there already is an older sub transaction - we acknowledge
1016 			// its changes and move its blocks up to the parent
1017 			cache->Free(block->parent_data);
1018 		}
1019 
1020 		// we "allocate" the parent data lazily, that means, we don't copy
1021 		// the data (and allocate memory for it) until we need to
1022 		block->parent_data = block->current_data;
1023 	}
1024 
1025 	// all subsequent changes will go into the sub transaction
1026 	transaction->has_sub_transaction = true;
1027 	transaction->sub_num_blocks = 0;
1028 
1029 	return B_OK;
1030 }
1031 
1032 
1033 extern "C" status_t
1034 cache_next_block_in_transaction(void *_cache, int32 id, uint32 *_cookie,
1035 	off_t *_blockNumber, void **_data, void **_unchangedData)
1036 {
1037 	cached_block *block = (cached_block *)*_cookie;
1038 	block_cache *cache = (block_cache *)_cache;
1039 
1040 	BenaphoreLocker locker(&cache->lock);
1041 
1042 	cache_transaction *transaction = lookup_transaction(cache, id);
1043 	if (transaction == NULL)
1044 		return B_BAD_VALUE;
1045 
1046 	if (block == NULL)
1047 		block = transaction->first_block;
1048 	else
1049 		block = block->transaction_next;
1050 
1051 	if (block == NULL)
1052 		return B_ENTRY_NOT_FOUND;
1053 
1054 	if (_blockNumber)
1055 		*_blockNumber = block->block_number;
1056 	if (_data)
1057 		*_data = block->current_data;
1058 	if (_unchangedData)
1059 		*_unchangedData = block->original_data;
1060 
1061 	*_cookie = (uint32)block;
1062 	return B_OK;
1063 }
1064 
1065 
1066 extern "C" int32
1067 cache_blocks_in_transaction(void *_cache, int32 id)
1068 {
1069 	block_cache *cache = (block_cache *)_cache;
1070 	BenaphoreLocker locker(&cache->lock);
1071 
1072 	cache_transaction *transaction = lookup_transaction(cache, id);
1073 	if (transaction == NULL)
1074 		return B_BAD_VALUE;
1075 
1076 	return transaction->num_blocks;
1077 }
1078 
1079 
1080 extern "C" int32
1081 cache_blocks_in_sub_transaction(void *_cache, int32 id)
1082 {
1083 	block_cache *cache = (block_cache *)_cache;
1084 	BenaphoreLocker locker(&cache->lock);
1085 
1086 	cache_transaction *transaction = lookup_transaction(cache, id);
1087 	if (transaction == NULL)
1088 		return B_BAD_VALUE;
1089 
1090 	return transaction->sub_num_blocks;
1091 }
1092 
1093 
1094 //	#pragma mark - public block cache API
1095 //	public interface
1096 
1097 
1098 extern "C" void
1099 block_cache_delete(void *_cache, bool allowWrites)
1100 {
1101 	block_cache *cache = (block_cache *)_cache;
1102 
1103 	if (allowWrites)
1104 		block_cache_sync(cache);
1105 
1106 	BenaphoreLocker locker(&cache->lock);
1107 
1108 	// free all blocks
1109 
1110 	uint32 cookie = 0;
1111 	cached_block *block;
1112 	while ((block = (cached_block *)hash_remove_first(cache->hash,
1113 			&cookie)) != NULL) {
1114 		cache->FreeBlock(block);
1115 	}
1116 
1117 	// free all transactions (they will all be aborted)
1118 
1119 	cookie = 0;
1120 	cache_transaction *transaction;
1121 	while ((transaction = (cache_transaction *)hash_remove_first(
1122 			cache->transaction_hash, &cookie)) != NULL) {
1123 		delete transaction;
1124 	}
1125 
1126 	delete cache;
1127 }
1128 
1129 
1130 extern "C" void *
1131 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
1132 {
1133 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
1134 		readOnly);
1135 	if (cache == NULL)
1136 		return NULL;
1137 
1138 	if (cache->InitCheck() != B_OK) {
1139 		delete cache;
1140 		return NULL;
1141 	}
1142 
1143 	return cache;
1144 }
1145 
1146 
1147 extern "C" status_t
1148 block_cache_sync(void *_cache)
1149 {
1150 	block_cache *cache = (block_cache *)_cache;
1151 
1152 	// we will sync all dirty blocks to disk that have a completed
1153 	// transaction or no transaction only
1154 
1155 	BenaphoreLocker locker(&cache->lock);
1156 	hash_iterator iterator;
1157 	hash_open(cache->hash, &iterator);
1158 
1159 	cached_block *block;
1160 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1161 		if (block->previous_transaction != NULL
1162 			|| (block->transaction == NULL && block->is_dirty)) {
1163 			status_t status = write_cached_block(cache, block);
1164 			if (status != B_OK)
1165 				return status;
1166 		}
1167 	}
1168 
1169 	hash_close(cache->hash, &iterator, false);
1170 	return B_OK;
1171 }
1172 
1173 
1174 extern "C" status_t
1175 block_cache_sync_etc(void *_cache, off_t blockNumber, size_t numBlocks)
1176 {
1177 	block_cache *cache = (block_cache *)_cache;
1178 
1179 	// we will sync all dirty blocks to disk that have a completed
1180 	// transaction or no transaction only
1181 
1182 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1183 		panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
1184 			blockNumber, cache->max_blocks - 1);
1185 		return B_BAD_VALUE;
1186 	}
1187 
1188 	BenaphoreLocker locker(&cache->lock);
1189 
1190 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
1191 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1192 			&blockNumber);
1193 		if (block == NULL)
1194 			continue;
1195 		if (block->previous_transaction != NULL
1196 			|| (block->transaction == NULL && block->is_dirty)) {
1197 			status_t status = write_cached_block(cache, block);
1198 			if (status != B_OK)
1199 				return status;
1200 		}
1201 	}
1202 
1203 	return B_OK;
1204 }
1205 
1206 
1207 extern "C" status_t
1208 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
1209 {
1210 	block_cache *cache = (block_cache *)_cache;
1211 	BenaphoreLocker locker(&cache->lock);
1212 
1213 	if (cache->read_only)
1214 		panic("tried to make block writable on a read-only cache!");
1215 
1216 	// ToDo: this can be done better!
1217 	void *block = get_writable_cached_block(cache, blockNumber,
1218 		blockNumber, 1, transaction, false);
1219 	if (block != NULL) {
1220 		put_cached_block((block_cache *)_cache, blockNumber);
1221 		return B_OK;
1222 	}
1223 
1224 	return B_ERROR;
1225 }
1226 
1227 
1228 extern "C" void *
1229 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base,
1230 	off_t length, int32 transaction)
1231 {
1232 	block_cache *cache = (block_cache *)_cache;
1233 	BenaphoreLocker locker(&cache->lock);
1234 
1235 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1236 		blockNumber, transaction));
1237 	if (cache->read_only)
1238 		panic("tried to get writable block on a read-only cache!");
1239 
1240 	return get_writable_cached_block(cache, blockNumber, base, length,
1241 		transaction, false);
1242 }
1243 
1244 
1245 extern "C" void *
1246 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
1247 {
1248 	return block_cache_get_writable_etc(_cache, blockNumber,
1249 		blockNumber, 1, transaction);
1250 }
1251 
1252 
1253 extern "C" void *
1254 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
1255 {
1256 	block_cache *cache = (block_cache *)_cache;
1257 	BenaphoreLocker locker(&cache->lock);
1258 
1259 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1260 		blockNumber, transaction));
1261 	if (cache->read_only)
1262 		panic("tried to get empty writable block on a read-only cache!");
1263 
1264 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1265 				blockNumber, 1, transaction, true);
1266 }
1267 
1268 
1269 extern "C" const void *
1270 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
1271 {
1272 	block_cache *cache = (block_cache *)_cache;
1273 	BenaphoreLocker locker(&cache->lock);
1274 	bool allocated;
1275 
1276 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1277 	if (block == NULL)
1278 		return NULL;
1279 
1280 #ifdef DEBUG_CHANGED
1281 	if (block->compare == NULL)
1282 		block->compare = cache->Allocate();
1283 	if (block->compare != NULL)
1284 		memcpy(block->compare, block->current_data, cache->block_size);
1285 #endif
1286 	return block->current_data;
1287 }
1288 
1289 
1290 extern "C" const void *
1291 block_cache_get(void *_cache, off_t blockNumber)
1292 {
1293 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1294 }
1295 
1296 
1297 /*!
1298 	Changes the internal status of a writable block to \a dirty. This can be
1299 	helpful in case you realize you don't need to change that block anymore
1300 	for whatever reason.
1301 
1302 	Note, you must only use this function on blocks that were acquired
1303 	writable!
1304 */
1305 extern "C" status_t
1306 block_cache_set_dirty(void *_cache, off_t blockNumber, bool dirty,
1307 	int32 transaction)
1308 {
1309 	// TODO: not yet implemented
1310 	if (dirty)
1311 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
1312 
1313 	return B_OK;
1314 }
1315 
1316 
1317 extern "C" void
1318 block_cache_put(void *_cache, off_t blockNumber)
1319 {
1320 	block_cache *cache = (block_cache *)_cache;
1321 	BenaphoreLocker locker(&cache->lock);
1322 
1323 	put_cached_block(cache, blockNumber);
1324 }
1325 
1326