xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 9ecf9d1c1d4888d341a6eac72112c72d1ae3a4cb)
1 /*
2  * Copyright 2004-2006, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "block_cache_private.h"
8 
9 #include <KernelExport.h>
10 #include <fs_cache.h>
11 
12 #include <block_cache.h>
13 #include <lock.h>
14 #include <vm_low_memory.h>
15 #include <util/kernel_cpp.h>
16 #include <util/DoublyLinkedList.h>
17 #include <util/AutoLock.h>
18 #include <util/khash.h>
19 
20 #include <unistd.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 
26 // ToDo: this is a naive but growing implementation to test the API:
27 //	1) block reading/writing is not at all optimized for speed, it will
28 //	   just read and write single blocks.
29 //	2) the locking could be improved; getting a block should not need to
30 //	   wait for blocks to be written
31 //	3) dirty blocks are only written back if asked for
32 // TODO: the retrieval/copy of the original data could be delayed until the
33 //		new data must be written, ie. in low memory situations.
34 
35 //#define TRACE_BLOCK_CACHE
36 #ifdef TRACE_BLOCK_CACHE
37 #	define TRACE(x)	dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 // This macro is used for fatal situations that are acceptable in a running
43 // system, like out of memory situations - should only panic for debugging.
44 #define FATAL(x) panic x
45 
46 struct cache_transaction {
47 	cache_transaction();
48 
49 	cache_transaction *next;
50 	int32			id;
51 	int32			num_blocks;
52 	int32			sub_num_blocks;
53 	cached_block	*first_block;
54 	block_list		blocks;
55 	transaction_notification_hook notification_hook;
56 	void			*notification_data;
57 	bool			open;
58 	bool			has_sub_transaction;
59 };
60 
61 static status_t write_cached_block(block_cache *cache, cached_block *block,
62 	bool deleteTransaction = true);
63 
64 
65 //	#pragma mark - private transaction
66 
67 
68 cache_transaction::cache_transaction()
69 {
70 	num_blocks = 0;
71 	sub_num_blocks = 0;
72 	first_block = NULL;
73 	notification_hook = NULL;
74 	notification_data = NULL;
75 	open = true;
76 }
77 
78 
79 static int
80 transaction_compare(void *_transaction, const void *_id)
81 {
82 	cache_transaction *transaction = (cache_transaction *)_transaction;
83 	const int32 *id = (const int32 *)_id;
84 
85 	return transaction->id - *id;
86 }
87 
88 
89 static uint32
90 transaction_hash(void *_transaction, const void *_id, uint32 range)
91 {
92 	cache_transaction *transaction = (cache_transaction *)_transaction;
93 	const int32 *id = (const int32 *)_id;
94 
95 	if (transaction != NULL)
96 		return transaction->id % range;
97 
98 	return (uint32)*id % range;
99 }
100 
101 
102 static void
103 delete_transaction(block_cache *cache, cache_transaction *transaction)
104 {
105 	hash_remove(cache->transaction_hash, transaction);
106 	if (cache->last_transaction == transaction)
107 		cache->last_transaction = NULL;
108 
109 	delete transaction;
110 }
111 
112 
113 static cache_transaction *
114 lookup_transaction(block_cache *cache, int32 id)
115 {
116 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
117 }
118 
119 
120 //	#pragma mark - cached_block
121 
122 
123 /* static */
124 int
125 cached_block::Compare(void *_cacheEntry, const void *_block)
126 {
127 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
128 	const off_t *block = (const off_t *)_block;
129 
130 	return cacheEntry->block_number - *block;
131 }
132 
133 
134 
135 /* static */
136 uint32
137 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
138 {
139 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
140 	const off_t *block = (const off_t *)_block;
141 
142 	if (cacheEntry != NULL)
143 		return cacheEntry->block_number % range;
144 
145 	return (uint64)*block % range;
146 }
147 
148 
149 //	#pragma mark - block_cache
150 
151 
152 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize)
153 	:
154 	hash(NULL),
155 	fd(_fd),
156 	max_blocks(numBlocks),
157 	block_size(blockSize),
158 	next_transaction_id(1),
159 	last_transaction(NULL),
160 	transaction_hash(NULL),
161 	ranges_hash(NULL)
162 {
163 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
164 	if (hash == NULL)
165 		return;
166 
167 	transaction_hash = hash_init(16, 0, &transaction_compare, &::transaction_hash);
168 	if (transaction_hash == NULL)
169 		return;
170 
171 	ranges_hash = hash_init(16, 0, &block_range::Compare, &block_range::Hash);
172 	if (ranges_hash == NULL)
173 		return;
174 
175 	if (benaphore_init(&lock, "block cache") < B_OK)
176 		return;
177 
178 	chunk_size = max_c(blockSize, B_PAGE_SIZE);
179 	chunks_per_range = kBlockRangeSize / chunk_size;
180 	range_mask = (1UL << chunks_per_range) - 1;
181 	chunk_mask = (1UL << (chunk_size / blockSize)) - 1;
182 
183 	register_low_memory_handler(&block_cache::LowMemoryHandler, this, 0);
184 }
185 
186 
187 block_cache::~block_cache()
188 {
189 	unregister_low_memory_handler(&block_cache::LowMemoryHandler, this);
190 
191 	benaphore_destroy(&lock);
192 
193 	hash_uninit(ranges_hash);
194 	hash_uninit(transaction_hash);
195 	hash_uninit(hash);
196 }
197 
198 
199 status_t
200 block_cache::InitCheck()
201 {
202 	if (lock.sem < B_OK)
203 		return lock.sem;
204 
205 	if (hash == NULL || transaction_hash == NULL || ranges_hash == NULL)
206 		return B_NO_MEMORY;
207 
208 	return B_OK;
209 }
210 
211 
212 block_range *
213 block_cache::GetFreeRange()
214 {
215 	if (!free_ranges.IsEmpty())
216 		return free_ranges.First();
217 
218 	// we need to allocate a new range
219 	block_range *range;
220 	if (block_range::New(this, &range) != B_OK) {
221 		RemoveUnusedBlocks(2, 50);
222 
223 		if (!free_ranges.IsEmpty())
224 			return free_ranges.First();
225 
226 		RemoveUnusedBlocks(LONG_MAX, 50);
227 
228 		if (!free_ranges.IsEmpty())
229 			return free_ranges.First();
230 
231 		// TODO: We also need to free ranges from other caches to get a free one
232 		// (if not, an active volume might have stolen all free ranges already)
233 		return NULL;
234 	}
235 
236 	return range;
237 }
238 
239 
240 block_range *
241 block_cache::GetRange(void *address)
242 {
243 	return (block_range *)hash_lookup(ranges_hash, address);
244 }
245 
246 
247 void
248 block_cache::Free(void *address)
249 {
250 	if (address == NULL)
251 		return;
252 
253 	block_range *range = GetRange(address);
254 	if (range == NULL)
255 		panic("no range for address %p\n", address);
256 
257 	ASSERT(range != NULL);
258 	range->Free(this, address);
259 
260 	if (range->Unused(this))
261 		block_range::Delete(this, range);
262 }
263 
264 
265 void *
266 block_cache::Allocate()
267 {
268 	block_range *range = GetFreeRange();
269 	if (range == NULL)
270 		return NULL;
271 
272 	return range->Allocate(this);
273 }
274 
275 
276 void
277 block_cache::FreeBlock(cached_block *block)
278 {
279 	block_range *range = GetRange(block->current_data);
280 	ASSERT(range != NULL);
281 	range->Free(this, block);
282 
283 	if (block->original_data != NULL || block->parent_data != NULL)
284 		panic("block_cache::FreeBlock(): %p, %p\n", block->original_data, block->parent_data);
285 
286 #ifdef DEBUG_CHANGED
287 	Free(block->compare);
288 #endif
289 
290 	if (range->Unused(this))
291 		block_range::Delete(this, range);
292 
293 	delete block;
294 }
295 
296 
297 cached_block *
298 block_cache::NewBlock(off_t blockNumber)
299 {
300 	cached_block *block = new cached_block;
301 	if (block == NULL) {
302 		FATAL(("could not allocate block!\n"));
303 		return NULL;
304 	}
305 
306 	block_range *range = GetFreeRange();
307 	if (range == NULL) {
308 		FATAL(("could not get range!\n"));
309 		delete block;
310 		return NULL;
311 	}
312 
313 	range->Allocate(this, block);
314 
315 	block->block_number = blockNumber;
316 	block->ref_count = 0;
317 	block->accessed = 0;
318 	block->transaction_next = NULL;
319 	block->transaction = block->previous_transaction = NULL;
320 	block->original_data = NULL;
321 	block->parent_data = NULL;
322 	block->is_dirty = false;
323 	block->unused = false;
324 #ifdef DEBUG_CHANGED
325 	block->compare = NULL;
326 #endif
327 
328 	hash_insert(hash, block);
329 
330 	return block;
331 }
332 
333 
334 void
335 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
336 {
337 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
338 
339 	cached_block *next = NULL;
340 	for (cached_block *block = unused_blocks.First(); block != NULL; block = next) {
341 		next = block->next;
342 
343 		if (maxAccessed < block->accessed)
344 			continue;
345 
346 		TRACE(("  remove block %Ld, accessed %ld times\n",
347 			block->block_number, block->accessed));
348 
349 		// this can only happen if no transactions are used
350 		if (block->is_dirty)
351 			write_cached_block(this, block, false);
352 
353 		// remove block from lists
354 		unused_blocks.Remove(block);
355 		hash_remove(hash, block);
356 
357 		FreeBlock(block);
358 
359 		if (--count <= 0)
360 			break;
361 	}
362 }
363 
364 
365 void
366 block_cache::LowMemoryHandler(void *data, int32 level)
367 {
368 	block_cache *cache = (block_cache *)data;
369 	BenaphoreLocker locker(&cache->lock);
370 
371 	if (!locker.IsLocked()) {
372 		// If our block_cache were deleted, it could be that we had
373 		// been called before that deletion went through, therefore,
374 		// acquiring its lock might fail.
375 		return;
376 	}
377 
378 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
379 
380 	// free some blocks according to the low memory state
381 	// (if there is enough memory left, we don't free any)
382 
383 	int32 free = 1;
384 	int32 accessed = 1;
385 	switch (vm_low_memory_state()) {
386 		case B_NO_LOW_MEMORY:
387 			return;
388 		case B_LOW_MEMORY_NOTE:
389 			free = 50;
390 			accessed = 2;
391 			break;
392 		case B_LOW_MEMORY_WARNING:
393 			free = 200;
394 			accessed = 10;
395 			break;
396 		case B_LOW_MEMORY_CRITICAL:
397 			free = LONG_MAX;
398 			accessed = LONG_MAX;
399 			break;
400 	}
401 
402 	cache->RemoveUnusedBlocks(accessed, free);
403 }
404 
405 
406 //	#pragma mark -
407 
408 
409 static void
410 put_cached_block(block_cache *cache, cached_block *block)
411 {
412 #ifdef DEBUG_CHANGED
413 	if (!block->is_dirty && block->compare != NULL && memcmp(block->current_data, block->compare, cache->block_size)) {
414 		dprintf("new block:\n");
415 		dump_block((const char *)block->current_data, 256, "  ");
416 		dprintf("unchanged block:\n");
417 		dump_block((const char *)block->compare, 256, "  ");
418 		write_cached_block(cache, block);
419 		panic("block_cache: supposed to be clean block was changed!\n");
420 
421 		cache->Free(block->compare);
422 		block->compare = NULL;
423 	}
424 #endif
425 
426 	if (--block->ref_count == 0
427 		&& block->transaction == NULL
428 		&& block->previous_transaction == NULL) {
429 		// put this block in the list of unused blocks
430 		block->unused = true;
431 		cache->unused_blocks.Add(block);
432 //		block->current_data = cache->allocator->Release(block->current_data);
433 	}
434 
435 	// free some blocks according to the low memory state
436 	// (if there is enough memory left, we don't free any)
437 
438 	int32 free = 1;
439 	switch (vm_low_memory_state()) {
440 		case B_NO_LOW_MEMORY:
441 			return;
442 		case B_LOW_MEMORY_NOTE:
443 			free = 1;
444 			break;
445 		case B_LOW_MEMORY_WARNING:
446 			free = 5;
447 			break;
448 		case B_LOW_MEMORY_CRITICAL:
449 			free = 20;
450 			break;
451 	}
452 
453 	cache->RemoveUnusedBlocks(LONG_MAX, free);
454 }
455 
456 
457 static void
458 put_cached_block(block_cache *cache, off_t blockNumber)
459 {
460 	if (blockNumber < 0 || blockNumber >= cache->max_blocks)
461 		panic("put_cached_block: invalid block number %lld (max %lld)", blockNumber, cache->max_blocks - 1);
462 
463 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
464 	if (block != NULL)
465 		put_cached_block(cache, block);
466 }
467 
468 
469 static cached_block *
470 get_cached_block(block_cache *cache, off_t blockNumber, bool *allocated, bool readBlock = true)
471 {
472 	if (blockNumber < 0 || blockNumber >= cache->max_blocks)
473 		panic("get_cached_block: invalid block number %lld (max %lld)", blockNumber, cache->max_blocks - 1);
474 
475 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
476 	*allocated = false;
477 
478 	if (block == NULL) {
479 		// read block into cache
480 		block = cache->NewBlock(blockNumber);
481 		if (block == NULL)
482 			return NULL;
483 
484 		*allocated = true;
485 	} else {
486 /*
487 		if (block->ref_count == 0 && block->current_data != NULL) {
488 			// see if the old block can be resurrected
489 			block->current_data = cache->allocator->Acquire(block->current_data);
490 		}
491 
492 		if (block->current_data == NULL) {
493 			// there is no block yet, but we need one
494 			block->current_data = cache->allocator->Get();
495 			if (block->current_data == NULL)
496 				return NULL;
497 
498 			*allocated = true;
499 		}
500 */
501 	}
502 
503 	if (*allocated && readBlock) {
504 		int32 blockSize = cache->block_size;
505 
506 		if (read_pos(cache->fd, blockNumber * blockSize, block->current_data, blockSize) < blockSize) {
507 			cache->FreeBlock(block);
508 			FATAL(("could not read block %Ld\n", blockNumber));
509 			return NULL;
510 		}
511 	}
512 
513 	if (block->unused) {
514 		//TRACE(("remove block %Ld from unused\n", blockNumber));
515 		block->unused = false;
516 		cache->unused_blocks.Remove(block);
517 	}
518 
519 	block->ref_count++;
520 	block->accessed++;
521 
522 	return block;
523 }
524 
525 
526 /**	Returns the writable block data for the requested blockNumber.
527  *	If \a cleared is true, the block is not read from disk; an empty block
528  *	is returned.
529  *	This is the only method to insert a block into a transaction. It makes
530  *	sure that the previous block contents are preserved in that case.
531  */
532 
533 static void *
534 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base, off_t length,
535 	int32 transactionID, bool cleared)
536 {
537 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n", blockNumber, transactionID));
538 
539 	if (blockNumber < 0 || blockNumber >= cache->max_blocks)
540 		panic("get_writable_cached_block: invalid block number %lld (max %lld)", blockNumber, cache->max_blocks - 1);
541 
542 	bool allocated;
543 	cached_block *block = get_cached_block(cache, blockNumber, &allocated, !cleared);
544 	if (block == NULL)
545 		return NULL;
546 
547 	// if there is no transaction support, we just return the current block
548 	if (transactionID == -1) {
549 		if (cleared)
550 			memset(block->current_data, 0, cache->block_size);
551 
552 		block->is_dirty = true;
553 			// mark the block as dirty
554 
555 		return block->current_data;
556 	}
557 
558 	if (block->transaction != NULL && block->transaction->id != transactionID) {
559 		// ToDo: we have to wait here until the other transaction is done.
560 		//	Maybe we should even panic, since we can't prevent any deadlocks.
561 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
562 		put_cached_block(cache, block);
563 		return NULL;
564 	}
565 	if (block->transaction == NULL && transactionID != -1) {
566 		// get new transaction
567 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
568 		if (transaction == NULL) {
569 			panic("get_writable_cached_block(): invalid transaction %ld!\n", transactionID);
570 			put_cached_block(cache, block);
571 			return NULL;
572 		}
573 		if (!transaction->open) {
574 			panic("get_writable_cached_block(): transaction already done!\n");
575 			put_cached_block(cache, block);
576 			return NULL;
577 		}
578 
579 		block->transaction = transaction;
580 
581 		// attach the block to the transaction block list
582 		block->transaction_next = transaction->first_block;
583 		transaction->first_block = block;
584 		transaction->num_blocks++;
585 	}
586 
587 	if (!(allocated && cleared) && block->original_data == NULL) {
588 		// we already have data, so we need to preserve it
589 		block->original_data = cache->Allocate();
590 		if (block->original_data == NULL) {
591 			FATAL(("could not allocate original_data\n"));
592 			put_cached_block(cache, block);
593 			return NULL;
594 		}
595 
596 		memcpy(block->original_data, block->current_data, cache->block_size);
597 	}
598 	if (block->parent_data == block->current_data) {
599 		// remember any previous contents for the parent transaction
600 		block->parent_data = cache->Allocate();
601 		if (block->parent_data == NULL) {
602 			// ToDo: maybe we should just continue the current transaction in this case...
603 			FATAL(("could not allocate parent\n"));
604 			put_cached_block(cache, block);
605 			return NULL;
606 		}
607 
608 		memcpy(block->parent_data, block->current_data, cache->block_size);
609 		block->transaction->sub_num_blocks++;
610 	}
611 
612 	if (cleared)
613 		memset(block->current_data, 0, cache->block_size);
614 
615 	block->is_dirty = true;
616 
617 	return block->current_data;
618 }
619 
620 
621 static status_t
622 write_cached_block(block_cache *cache, cached_block *block, bool deleteTransaction)
623 {
624 	cache_transaction *previous = block->previous_transaction;
625 	int32 blockSize = cache->block_size;
626 
627 	void *data = previous && block->original_data ? block->original_data : block->current_data;
628 		// we first need to write back changes from previous transactions
629 
630 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
631 
632 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize, data, blockSize);
633 
634 	if (written < blockSize) {
635 		FATAL(("could not write back block %Ld (%s)\n", block->block_number, strerror(errno)));
636 		return B_IO_ERROR;
637 	}
638 
639 	if (data == block->current_data)
640 		block->is_dirty = false;
641 
642 	if (previous != NULL) {
643 		previous->blocks.Remove(block);
644 		block->previous_transaction = NULL;
645 
646 		// Has the previous transation been finished with that write?
647 		if (--previous->num_blocks == 0) {
648 			TRACE(("cache transaction %ld finished!\n", previous->id));
649 
650 			if (previous->notification_hook != NULL)
651 				previous->notification_hook(previous->id, previous->notification_data);
652 
653 			if (deleteTransaction)
654 				delete_transaction(cache, previous);
655 		}
656 	}
657 
658 	return B_OK;
659 }
660 
661 
662 extern "C" status_t
663 block_cache_init(void)
664 {
665 	return init_block_allocator();
666 }
667 
668 
669 //	#pragma mark - public transaction API
670 
671 
672 extern "C" int32
673 cache_start_transaction(void *_cache)
674 {
675 	block_cache *cache = (block_cache *)_cache;
676 	BenaphoreLocker locker(&cache->lock);
677 
678 	if (cache->last_transaction && cache->last_transaction->open)
679 		panic("last transaction (%ld) still open!\n", cache->last_transaction->id);
680 
681 	cache_transaction *transaction = new cache_transaction;
682 	if (transaction == NULL)
683 		return B_NO_MEMORY;
684 
685 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
686 	cache->last_transaction = transaction;
687 
688 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
689 
690 	hash_insert(cache->transaction_hash, transaction);
691 
692 	return transaction->id;
693 }
694 
695 
696 extern "C" status_t
697 cache_sync_transaction(void *_cache, int32 id)
698 {
699 	block_cache *cache = (block_cache *)_cache;
700 	BenaphoreLocker locker(&cache->lock);
701 	status_t status = B_ENTRY_NOT_FOUND;
702 
703 	hash_iterator iterator;
704 	hash_open(cache->transaction_hash, &iterator);
705 
706 	cache_transaction *transaction;
707 	while ((transaction = (cache_transaction *)hash_next(cache->transaction_hash, &iterator)) != NULL) {
708 		// ToDo: fix hash interface to make this easier
709 
710 		if (transaction->id <= id && !transaction->open) {
711 			while (transaction->num_blocks > 0) {
712 				status = write_cached_block(cache, transaction->blocks.Head(), false);
713 				if (status != B_OK)
714 					return status;
715 			}
716 			delete_transaction(cache, transaction);
717 			hash_rewind(cache->transaction_hash, &iterator);
718 		}
719 	}
720 
721 	hash_close(cache->transaction_hash, &iterator, false);
722 	return B_OK;
723 }
724 
725 
726 extern "C" status_t
727 cache_end_transaction(void *_cache, int32 id, transaction_notification_hook hook, void *data)
728 {
729 	block_cache *cache = (block_cache *)_cache;
730 	BenaphoreLocker locker(&cache->lock);
731 
732 	TRACE(("cache_end_transaction(id = %ld)\n", id));
733 
734 	cache_transaction *transaction = lookup_transaction(cache, id);
735 	if (transaction == NULL) {
736 		panic("cache_end_transaction(): invalid transaction ID\n");
737 		return B_BAD_VALUE;
738 	}
739 
740 	transaction->notification_hook = hook;
741 	transaction->notification_data = data;
742 
743 	// iterate through all blocks and free the unchanged original contents
744 
745 	cached_block *block = transaction->first_block, *next;
746 	for (; block != NULL; block = next) {
747 		next = block->transaction_next;
748 
749 		if (block->previous_transaction != NULL) {
750 			// need to write back pending changes
751 			write_cached_block(cache, block);
752 		}
753 
754 		if (block->original_data != NULL) {
755 			cache->Free(block->original_data);
756 			block->original_data = NULL;
757 		}
758 		if (transaction->has_sub_transaction) {
759 			if (block->parent_data != block->current_data)
760 				cache->Free(block->parent_data);
761 			block->parent_data = NULL;
762 		}
763 
764 		// move the block to the previous transaction list
765 		transaction->blocks.Add(block);
766 
767 		block->previous_transaction = transaction;
768 		block->transaction_next = NULL;
769 		block->transaction = NULL;
770 	}
771 
772 	transaction->open = false;
773 
774 	return B_OK;
775 }
776 
777 
778 extern "C" status_t
779 cache_abort_transaction(void *_cache, int32 id)
780 {
781 	block_cache *cache = (block_cache *)_cache;
782 	BenaphoreLocker locker(&cache->lock);
783 
784 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
785 
786 	cache_transaction *transaction = lookup_transaction(cache, id);
787 	if (transaction == NULL) {
788 		panic("cache_abort_transaction(): invalid transaction ID\n");
789 		return B_BAD_VALUE;
790 	}
791 
792 	// iterate through all blocks and restore their original contents
793 
794 	cached_block *block = transaction->first_block, *next;
795 	for (; block != NULL; block = next) {
796 		next = block->transaction_next;
797 
798 		if (block->original_data != NULL) {
799 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
800 				transaction->id, block->block_number));
801 			memcpy(block->current_data, block->original_data, cache->block_size);
802 			cache->Free(block->original_data);
803 			block->original_data = NULL;
804 		}
805 		if (transaction->has_sub_transaction) {
806 			if (block->parent_data != block->current_data)
807 				cache->Free(block->parent_data);
808 			block->parent_data = NULL;
809 		}
810 
811 		block->transaction_next = NULL;
812 		block->transaction = NULL;
813 	}
814 
815 	delete_transaction(cache, transaction);
816 	return B_OK;
817 }
818 
819 
820 /**	Acknowledges the current parent transaction, and starts a new transaction
821  *	from its sub transaction.
822  *	The new transaction also gets a new transaction ID.
823  */
824 
825 extern "C" int32
826 cache_detach_sub_transaction(void *_cache, int32 id,
827 	transaction_notification_hook hook, void *data)
828 {
829 	block_cache *cache = (block_cache *)_cache;
830 	BenaphoreLocker locker(&cache->lock);
831 
832 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
833 
834 	cache_transaction *transaction = lookup_transaction(cache, id);
835 	if (transaction == NULL) {
836 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
837 		return B_BAD_VALUE;
838 	}
839 	if (!transaction->has_sub_transaction)
840 		return B_BAD_VALUE;
841 
842 	// create a new transaction for the sub transaction
843 	cache_transaction *newTransaction = new cache_transaction;
844 	if (transaction == NULL)
845 		return B_NO_MEMORY;
846 
847 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
848 
849 	transaction->notification_hook = hook;
850 	transaction->notification_data = data;
851 
852 	// iterate through all blocks and free the unchanged original contents
853 
854 	cached_block *block = transaction->first_block, *next, *last = NULL;
855 	for (; block != NULL; block = next) {
856 		next = block->transaction_next;
857 
858 		if (block->previous_transaction != NULL) {
859 			// need to write back pending changes
860 			write_cached_block(cache, block);
861 		}
862 
863 		if (block->original_data != NULL && block->parent_data != NULL
864 			&& block->parent_data != block->current_data) {
865 			// free the original data if the parent data of the transaction
866 			// will be made current - but keep them otherwise
867 			cache->Free(block->original_data);
868 			block->original_data = NULL;
869 		}
870 		if (block->parent_data != NULL && block->parent_data != block->current_data) {
871 			// we need to move this block over to the new transaction
872 			block->original_data = block->parent_data;
873 			if (last == NULL)
874 				newTransaction->first_block = block;
875 			else
876 				last->transaction_next = block;
877 
878 			last = block;
879 		}
880 		block->parent_data = NULL;
881 
882 		// move the block to the previous transaction list
883 		transaction->blocks.Add(block);
884 
885 		block->previous_transaction = transaction;
886 		block->transaction_next = NULL;
887 		block->transaction = newTransaction;
888 	}
889 
890 	transaction->open = false;
891 
892 	hash_insert(cache->transaction_hash, newTransaction);
893 	cache->last_transaction = newTransaction;
894 
895 	return B_OK;
896 }
897 
898 
899 extern "C" status_t
900 cache_abort_sub_transaction(void *_cache, int32 id)
901 {
902 	block_cache *cache = (block_cache *)_cache;
903 	BenaphoreLocker locker(&cache->lock);
904 
905 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
906 
907 	cache_transaction *transaction = lookup_transaction(cache, id);
908 	if (transaction == NULL) {
909 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
910 		return B_BAD_VALUE;
911 	}
912 	if (!transaction->has_sub_transaction)
913 		return B_BAD_VALUE;
914 
915 	// revert all changes back to the version of the parent
916 
917 	cached_block *block = transaction->first_block, *next;
918 	for (; block != NULL; block = next) {
919 		next = block->transaction_next;
920 
921 		if (block->parent_data == NULL) {
922 			if (block->original_data != NULL) {
923 				// the parent transaction didn't change the block, but the sub
924 				// transaction did - we need to revert from the original data
925 				memcpy(block->current_data, block->original_data, cache->block_size);
926 			}
927 		} else if (block->parent_data != block->current_data) {
928 			// the block has been changed and must be restored
929 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
930 				transaction->id, block->block_number));
931 			memcpy(block->current_data, block->parent_data, cache->block_size);
932 			cache->Free(block->parent_data);
933 		}
934 
935 		block->parent_data = NULL;
936 	}
937 
938 	// all subsequent changes will go into the main transaction
939 	transaction->has_sub_transaction = false;
940 	return B_OK;
941 }
942 
943 
944 extern "C" status_t
945 cache_start_sub_transaction(void *_cache, int32 id)
946 {
947 	block_cache *cache = (block_cache *)_cache;
948 	BenaphoreLocker locker(&cache->lock);
949 
950 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
951 
952 	cache_transaction *transaction = lookup_transaction(cache, id);
953 	if (transaction == NULL) {
954 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
955 		return B_BAD_VALUE;
956 	}
957 
958 	// move all changed blocks up to the parent
959 
960 	cached_block *block = transaction->first_block, *next;
961 	for (; block != NULL; block = next) {
962 		next = block->transaction_next;
963 
964 		if (transaction->has_sub_transaction
965 			&& block->parent_data != NULL
966 			&& block->parent_data != block->current_data) {
967 			// there already is an older sub transaction - we acknowledge
968 			// its changes and move its blocks up to the parent
969 			cache->Free(block->parent_data);
970 		}
971 
972 		// we "allocate" the parent data lazily, that means, we don't copy
973 		// the data (and allocate memory for it) until we need to
974 		block->parent_data = block->current_data;
975 	}
976 
977 	// all subsequent changes will go into the sub transaction
978 	transaction->has_sub_transaction = true;
979 	transaction->sub_num_blocks = 0;
980 
981 	return B_OK;
982 }
983 
984 
985 extern "C" status_t
986 cache_next_block_in_transaction(void *_cache, int32 id, uint32 *_cookie, off_t *_blockNumber,
987 	void **_data, void **_unchangedData)
988 {
989 	cached_block *block = (cached_block *)*_cookie;
990 	block_cache *cache = (block_cache *)_cache;
991 
992 	BenaphoreLocker locker(&cache->lock);
993 
994 	cache_transaction *transaction = lookup_transaction(cache, id);
995 	if (transaction == NULL)
996 		return B_BAD_VALUE;
997 
998 	if (block == NULL)
999 		block = transaction->first_block;
1000 	else
1001 		block = block->transaction_next;
1002 
1003 	if (block == NULL)
1004 		return B_ENTRY_NOT_FOUND;
1005 
1006 	if (_blockNumber)
1007 		*_blockNumber = block->block_number;
1008 	if (_data)
1009 		*_data = block->current_data;
1010 	if (_unchangedData)
1011 		*_unchangedData = block->original_data;
1012 
1013 	*_cookie = (uint32)block;
1014 	return B_OK;
1015 }
1016 
1017 
1018 extern "C" int32
1019 cache_blocks_in_transaction(void *_cache, int32 id)
1020 {
1021 	block_cache *cache = (block_cache *)_cache;
1022 	BenaphoreLocker locker(&cache->lock);
1023 
1024 	cache_transaction *transaction = lookup_transaction(cache, id);
1025 	if (transaction == NULL)
1026 		return B_BAD_VALUE;
1027 
1028 	return transaction->num_blocks;
1029 }
1030 
1031 
1032 extern "C" int32
1033 cache_blocks_in_sub_transaction(void *_cache, int32 id)
1034 {
1035 	block_cache *cache = (block_cache *)_cache;
1036 	BenaphoreLocker locker(&cache->lock);
1037 
1038 	cache_transaction *transaction = lookup_transaction(cache, id);
1039 	if (transaction == NULL)
1040 		return B_BAD_VALUE;
1041 
1042 	return transaction->sub_num_blocks;
1043 }
1044 
1045 
1046 //	#pragma mark - public block cache API
1047 //	public interface
1048 
1049 
1050 extern "C" void
1051 block_cache_delete(void *_cache, bool allowWrites)
1052 {
1053 	block_cache *cache = (block_cache *)_cache;
1054 
1055 	if (allowWrites)
1056 		block_cache_sync(cache);
1057 
1058 	BenaphoreLocker locker(&cache->lock);
1059 
1060 	// free all blocks
1061 
1062 	uint32 cookie = 0;
1063 	cached_block *block;
1064 	while ((block = (cached_block *)hash_remove_first(cache->hash, &cookie)) != NULL) {
1065 		cache->FreeBlock(block);
1066 	}
1067 
1068 	// free all transactions (they will all be aborted)
1069 
1070 	cookie = 0;
1071 	cache_transaction *transaction;
1072 	while ((transaction = (cache_transaction *)hash_remove_first(cache->transaction_hash, &cookie)) != NULL) {
1073 		delete transaction;
1074 	}
1075 
1076 	delete cache;
1077 }
1078 
1079 
1080 extern "C" void *
1081 block_cache_create(int fd, off_t numBlocks, size_t blockSize)
1082 {
1083 	block_cache *cache = new block_cache(fd, numBlocks, blockSize);
1084 	if (cache == NULL)
1085 		return NULL;
1086 
1087 	if (cache->InitCheck() != B_OK) {
1088 		delete cache;
1089 		return NULL;
1090 	}
1091 
1092 	return cache;
1093 }
1094 
1095 
1096 extern "C" status_t
1097 block_cache_sync(void *_cache)
1098 {
1099 	block_cache *cache = (block_cache *)_cache;
1100 
1101 	// we will sync all dirty blocks to disk that have a completed
1102 	// transaction or no transaction only
1103 
1104 	BenaphoreLocker locker(&cache->lock);
1105 	hash_iterator iterator;
1106 	hash_open(cache->hash, &iterator);
1107 
1108 	cached_block *block;
1109 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1110 		if (block->previous_transaction != NULL
1111 			|| (block->transaction == NULL && block->is_dirty)) {
1112 			status_t status = write_cached_block(cache, block);
1113 			if (status != B_OK)
1114 				return status;
1115 		}
1116 	}
1117 
1118 	hash_close(cache->hash, &iterator, false);
1119 	return B_OK;
1120 }
1121 
1122 
1123 extern "C" status_t
1124 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
1125 {
1126 	block_cache *cache = (block_cache *)_cache;
1127 	BenaphoreLocker locker(&cache->lock);
1128 
1129 	// ToDo: this can be done better!
1130 	void *block = get_writable_cached_block(cache, blockNumber,
1131 		blockNumber, 1, transaction, false);
1132 	if (block != NULL) {
1133 		put_cached_block((block_cache *)_cache, blockNumber);
1134 		return B_OK;
1135 	}
1136 
1137 	return B_ERROR;
1138 }
1139 
1140 
1141 extern "C" void *
1142 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base,
1143 	off_t length, int32 transaction)
1144 {
1145 	block_cache *cache = (block_cache *)_cache;
1146 	BenaphoreLocker locker(&cache->lock);
1147 
1148 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1149 		blockNumber, transaction));
1150 
1151 	return get_writable_cached_block(cache, blockNumber, base, length,
1152 		transaction, false);
1153 }
1154 
1155 
1156 extern "C" void *
1157 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
1158 {
1159 	return block_cache_get_writable_etc(_cache, blockNumber,
1160 		blockNumber, 1, transaction);
1161 }
1162 
1163 
1164 extern "C" void *
1165 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
1166 {
1167 	block_cache *cache = (block_cache *)_cache;
1168 	BenaphoreLocker locker(&cache->lock);
1169 
1170 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1171 		blockNumber, transaction));
1172 
1173 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1174 				blockNumber, 1, transaction, true);
1175 }
1176 
1177 
1178 extern "C" const void *
1179 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
1180 {
1181 	block_cache *cache = (block_cache *)_cache;
1182 	BenaphoreLocker locker(&cache->lock);
1183 	bool allocated;
1184 
1185 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1186 	if (block == NULL)
1187 		return NULL;
1188 
1189 #ifdef DEBUG_CHANGED
1190 	if (block->compare == NULL)
1191 		block->compare = cache->Allocate();
1192 	if (block->compare != NULL)
1193 		memcpy(block->compare, block->current_data, cache->block_size);
1194 #endif
1195 	return block->current_data;
1196 }
1197 
1198 
1199 extern "C" const void *
1200 block_cache_get(void *_cache, off_t blockNumber)
1201 {
1202 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1203 }
1204 
1205 
1206 extern "C" status_t
1207 block_cache_set_dirty(void *_cache, off_t blockNumber, bool isDirty, int32 transaction)
1208 {
1209 	// not yet implemented
1210 	// Note, you must only use this function on blocks that were acquired writable!
1211 	if (isDirty)
1212 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
1213 
1214 	return B_OK;
1215 }
1216 
1217 
1218 extern "C" void
1219 block_cache_put(void *_cache, off_t blockNumber)
1220 {
1221 	block_cache *cache = (block_cache *)_cache;
1222 	BenaphoreLocker locker(&cache->lock);
1223 
1224 	put_cached_block(cache, blockNumber);
1225 }
1226 
1227