xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision a4f6a81235ca2522c01f532de13cad9b729d4029)
1 /*
2  * Copyright 2004-2006, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "block_cache_private.h"
8 
9 #include <KernelExport.h>
10 #include <fs_cache.h>
11 
12 #include <block_cache.h>
13 #include <lock.h>
14 #include <vm_low_memory.h>
15 #include <util/kernel_cpp.h>
16 #include <util/DoublyLinkedList.h>
17 #include <util/AutoLock.h>
18 #include <util/khash.h>
19 
20 #include <unistd.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 
26 // ToDo: this is a naive but growing implementation to test the API:
27 //	1) block reading/writing is not at all optimized for speed, it will
28 //	   just read and write single blocks.
29 //	2) the locking could be improved; getting a block should not need to
30 //	   wait for blocks to be written
31 //	3) dirty blocks are only written back if asked for
32 // TODO: the retrieval/copy of the original data could be delayed until the
33 //		new data must be written, ie. in low memory situations.
34 
35 //#define TRACE_BLOCK_CACHE
36 #ifdef TRACE_BLOCK_CACHE
37 #	define TRACE(x)	dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 // This macro is used for fatal situations that are acceptable in a running
43 // system, like out of memory situations - should only panic for debugging.
44 #define FATAL(x) panic x
45 
46 struct cache_transaction {
47 	cache_transaction();
48 
49 	cache_transaction *next;
50 	int32			id;
51 	int32			num_blocks;
52 	int32			sub_num_blocks;
53 	cached_block	*first_block;
54 	block_list		blocks;
55 	transaction_notification_hook notification_hook;
56 	void			*notification_data;
57 	bool			open;
58 	bool			has_sub_transaction;
59 };
60 
61 static status_t write_cached_block(block_cache *cache, cached_block *block,
62 	bool deleteTransaction = true);
63 
64 
65 //	#pragma mark - private transaction
66 
67 
68 cache_transaction::cache_transaction()
69 {
70 	num_blocks = 0;
71 	sub_num_blocks = 0;
72 	first_block = NULL;
73 	notification_hook = NULL;
74 	notification_data = NULL;
75 	open = true;
76 }
77 
78 
79 static int
80 transaction_compare(void *_transaction, const void *_id)
81 {
82 	cache_transaction *transaction = (cache_transaction *)_transaction;
83 	const int32 *id = (const int32 *)_id;
84 
85 	return transaction->id - *id;
86 }
87 
88 
89 static uint32
90 transaction_hash(void *_transaction, const void *_id, uint32 range)
91 {
92 	cache_transaction *transaction = (cache_transaction *)_transaction;
93 	const int32 *id = (const int32 *)_id;
94 
95 	if (transaction != NULL)
96 		return transaction->id % range;
97 
98 	return (uint32)*id % range;
99 }
100 
101 
102 static void
103 delete_transaction(block_cache *cache, cache_transaction *transaction)
104 {
105 	hash_remove(cache->transaction_hash, transaction);
106 	if (cache->last_transaction == transaction)
107 		cache->last_transaction = NULL;
108 
109 	delete transaction;
110 }
111 
112 
113 static cache_transaction *
114 lookup_transaction(block_cache *cache, int32 id)
115 {
116 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
117 }
118 
119 
120 //	#pragma mark - cached_block
121 
122 
123 /* static */
124 int
125 cached_block::Compare(void *_cacheEntry, const void *_block)
126 {
127 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
128 	const off_t *block = (const off_t *)_block;
129 
130 	return cacheEntry->block_number - *block;
131 }
132 
133 
134 
135 /* static */
136 uint32
137 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
138 {
139 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
140 	const off_t *block = (const off_t *)_block;
141 
142 	if (cacheEntry != NULL)
143 		return cacheEntry->block_number % range;
144 
145 	return (uint64)*block % range;
146 }
147 
148 
149 //	#pragma mark - block_cache
150 
151 
152 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize)
153 	:
154 	hash(NULL),
155 	fd(_fd),
156 	max_blocks(numBlocks),
157 	block_size(blockSize),
158 	next_transaction_id(1),
159 	last_transaction(NULL),
160 	transaction_hash(NULL),
161 	ranges_hash(NULL)
162 {
163 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
164 	if (hash == NULL)
165 		return;
166 
167 	transaction_hash = hash_init(16, 0, &transaction_compare, &::transaction_hash);
168 	if (transaction_hash == NULL)
169 		return;
170 
171 	ranges_hash = hash_init(16, 0, &block_range::Compare, &block_range::Hash);
172 	if (ranges_hash == NULL)
173 		return;
174 
175 	if (benaphore_init(&lock, "block cache") < B_OK)
176 		return;
177 
178 	chunk_size = max_c(blockSize, B_PAGE_SIZE);
179 	chunks_per_range = kBlockRangeSize / chunk_size;
180 	range_mask = (1UL << chunks_per_range) - 1;
181 	chunk_mask = (1UL << (chunk_size / blockSize)) - 1;
182 
183 	register_low_memory_handler(&block_cache::LowMemoryHandler, this, 0);
184 }
185 
186 
187 block_cache::~block_cache()
188 {
189 	unregister_low_memory_handler(&block_cache::LowMemoryHandler, this);
190 
191 	benaphore_destroy(&lock);
192 
193 	hash_uninit(ranges_hash);
194 	hash_uninit(transaction_hash);
195 	hash_uninit(hash);
196 }
197 
198 
199 status_t
200 block_cache::InitCheck()
201 {
202 	if (lock.sem < B_OK)
203 		return lock.sem;
204 
205 	if (hash == NULL || transaction_hash == NULL || ranges_hash == NULL)
206 		return B_NO_MEMORY;
207 
208 	return B_OK;
209 }
210 
211 
212 block_range *
213 block_cache::GetFreeRange()
214 {
215 	if (!free_ranges.IsEmpty())
216 		return free_ranges.First();
217 
218 	// we need to allocate a new range
219 	block_range *range;
220 	if (block_range::New(this, &range) != B_OK) {
221 		// ToDo: free up space in existing ranges
222 		// We may also need to free ranges from other caches to get a free one
223 		// (if not, an active volume might have stolen all free ranges already)
224 		return NULL;
225 	}
226 
227 	return range;
228 }
229 
230 
231 block_range *
232 block_cache::GetRange(void *address)
233 {
234 	return (block_range *)hash_lookup(ranges_hash, address);
235 }
236 
237 
238 void
239 block_cache::Free(void *address)
240 {
241 	if (address == NULL)
242 		return;
243 
244 	block_range *range = GetRange(address);
245 	if (range == NULL)
246 		panic("no range for address %p\n", address);
247 
248 	ASSERT(range != NULL);
249 	range->Free(this, address);
250 
251 	if (range->Unused(this))
252 		block_range::Delete(this, range);
253 }
254 
255 
256 void *
257 block_cache::Allocate()
258 {
259 	block_range *range = GetFreeRange();
260 	if (range == NULL)
261 		return NULL;
262 
263 	return range->Allocate(this);
264 }
265 
266 
267 void
268 block_cache::FreeBlock(cached_block *block)
269 {
270 	block_range *range = GetRange(block->current_data);
271 	ASSERT(range != NULL);
272 	range->Free(this, block);
273 
274 	if (block->original_data != NULL || block->parent_data != NULL)
275 		panic("block_cache::FreeBlock(): %p, %p\n", block->original_data, block->parent_data);
276 
277 #ifdef DEBUG_CHANGED
278 	Free(block->compare);
279 #endif
280 
281 	if (range->Unused(this))
282 		block_range::Delete(this, range);
283 
284 	delete block;
285 }
286 
287 
288 cached_block *
289 block_cache::NewBlock(off_t blockNumber)
290 {
291 	cached_block *block = new cached_block;
292 	if (block == NULL) {
293 		FATAL(("could not allocate block!\n"));
294 		return NULL;
295 	}
296 
297 	block_range *range = GetFreeRange();
298 	if (range == NULL) {
299 		FATAL(("could not get range!\n"));
300 		delete block;
301 		return NULL;
302 	}
303 
304 	range->Allocate(this, block);
305 
306 	block->block_number = blockNumber;
307 	block->ref_count = 0;
308 	block->accessed = 0;
309 	block->transaction_next = NULL;
310 	block->transaction = block->previous_transaction = NULL;
311 	block->original_data = NULL;
312 	block->parent_data = NULL;
313 	block->is_dirty = false;
314 	block->unused = false;
315 #ifdef DEBUG_CHANGED
316 	block->compare = NULL;
317 #endif
318 
319 	hash_insert(hash, block);
320 
321 	return block;
322 }
323 
324 
325 void
326 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
327 {
328 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
329 
330 	cached_block *next = NULL;
331 	for (cached_block *block = unused_blocks.First(); block != NULL; block = next) {
332 		next = block->next;
333 
334 		if (maxAccessed < block->accessed)
335 			continue;
336 
337 		TRACE(("  remove block %Ld, accessed %ld times\n",
338 			block->block_number, block->accessed));
339 
340 		// this can only happen if no transactions are used
341 		if (block->is_dirty)
342 			write_cached_block(this, block, false);
343 
344 		// remove block from lists
345 		unused_blocks.Remove(block);
346 		hash_remove(hash, block);
347 
348 		FreeBlock(block);
349 
350 		if (--count <= 0)
351 			break;
352 	}
353 }
354 
355 
356 void
357 block_cache::LowMemoryHandler(void *data, int32 level)
358 {
359 	block_cache *cache = (block_cache *)data;
360 	BenaphoreLocker locker(&cache->lock);
361 
362 	if (!locker.IsLocked()) {
363 		// If our block_cache were deleted, it could be that we had
364 		// been called before that deletion went through, therefore,
365 		// acquiring its lock might fail.
366 		return;
367 	}
368 
369 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
370 
371 	// free some blocks according to the low memory state
372 	// (if there is enough memory left, we don't free any)
373 
374 	int32 free = 1;
375 	int32 accessed = 1;
376 	switch (vm_low_memory_state()) {
377 		case B_NO_LOW_MEMORY:
378 			return;
379 		case B_LOW_MEMORY_NOTE:
380 			free = 10;
381 			accessed = 2;
382 			break;
383 		case B_LOW_MEMORY_WARNING:
384 			free = 50;
385 			accessed = 10;
386 			break;
387 		case B_LOW_MEMORY_CRITICAL:
388 			free = LONG_MAX;
389 			accessed = LONG_MAX;
390 			break;
391 	}
392 
393 	cache->RemoveUnusedBlocks(accessed, free);
394 }
395 
396 
397 //	#pragma mark -
398 
399 
400 #ifdef DEBUG_CHANGED
401 
402 #define DUMPED_BLOCK_SIZE 16
403 
404 void
405 dumpBlock(const char *buffer, int size, const char *prefix)
406 {
407 	int i;
408 
409 	for (i = 0; i < size;) {
410 		int start = i;
411 
412 		dprintf(prefix);
413 		for (; i < start+DUMPED_BLOCK_SIZE; i++) {
414 			if (!(i % 4))
415 				dprintf(" ");
416 
417 			if (i >= size)
418 				dprintf("  ");
419 			else
420 				dprintf("%02x", *(unsigned char *)(buffer + i));
421 		}
422 		dprintf("  ");
423 
424 		for (i = start; i < start + DUMPED_BLOCK_SIZE; i++) {
425 			if (i < size) {
426 				char c = buffer[i];
427 
428 				if (c < 30)
429 					dprintf(".");
430 				else
431 					dprintf("%c", c);
432 			} else
433 				break;
434 		}
435 		dprintf("\n");
436 	}
437 }
438 #endif
439 
440 
441 static void
442 put_cached_block(block_cache *cache, cached_block *block)
443 {
444 #ifdef DEBUG_CHANGED
445 	if (!block->is_dirty && block->compare != NULL && memcmp(block->current_data, block->compare, cache->block_size)) {
446 		dprintf("new block:\n");
447 		dumpBlock((const char *)block->current_data, 256, "  ");
448 		dprintf("unchanged block:\n");
449 		dumpBlock((const char *)block->compare, 256, "  ");
450 		write_cached_block(cache, block);
451 		panic("block_cache: supposed to be clean block was changed!\n");
452 
453 		cache->Free(block->compare);
454 		block->compare = NULL;
455 	}
456 #endif
457 
458 	if (--block->ref_count == 0
459 		&& block->transaction == NULL
460 		&& block->previous_transaction == NULL) {
461 		// put this block in the list of unused blocks
462 		block->unused = true;
463 		cache->unused_blocks.Add(block);
464 //		block->current_data = cache->allocator->Release(block->current_data);
465 	}
466 
467 	// free some blocks according to the low memory state
468 	// (if there is enough memory left, we don't free any)
469 
470 	int32 free = 1;
471 	switch (vm_low_memory_state()) {
472 		case B_NO_LOW_MEMORY:
473 			return;
474 		case B_LOW_MEMORY_NOTE:
475 			free = 1;
476 			break;
477 		case B_LOW_MEMORY_WARNING:
478 			free = 5;
479 			break;
480 		case B_LOW_MEMORY_CRITICAL:
481 			free = 20;
482 			break;
483 	}
484 
485 	cache->RemoveUnusedBlocks(LONG_MAX, free);
486 }
487 
488 
489 static void
490 put_cached_block(block_cache *cache, off_t blockNumber)
491 {
492 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
493 	if (block != NULL)
494 		put_cached_block(cache, block);
495 }
496 
497 
498 static cached_block *
499 get_cached_block(block_cache *cache, off_t blockNumber, bool &allocated, bool readBlock = true)
500 {
501 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
502 	allocated = false;
503 
504 	if (block == NULL) {
505 		// read block into cache
506 		block = cache->NewBlock(blockNumber);
507 		if (block == NULL)
508 			return NULL;
509 
510 		allocated = true;
511 	} else {
512 /*
513 		if (block->ref_count == 0 && block->current_data != NULL) {
514 			// see if the old block can be resurrected
515 			block->current_data = cache->allocator->Acquire(block->current_data);
516 		}
517 
518 		if (block->current_data == NULL) {
519 			// there is no block yet, but we need one
520 			block->current_data = cache->allocator->Get();
521 			if (block->current_data == NULL)
522 				return NULL;
523 
524 			allocated = true;
525 		}
526 */
527 	}
528 
529 	if (allocated && readBlock) {
530 		int32 blockSize = cache->block_size;
531 
532 		if (read_pos(cache->fd, blockNumber * blockSize, block->current_data, blockSize) < blockSize) {
533 			cache->FreeBlock(block);
534 			FATAL(("could not read block %Ld\n", blockNumber));
535 			return NULL;
536 		}
537 	}
538 
539 	if (block->unused) {
540 		//TRACE(("remove block %Ld from unused\n", blockNumber));
541 		block->unused = false;
542 		cache->unused_blocks.Remove(block);
543 	}
544 
545 	block->ref_count++;
546 	block->accessed++;
547 
548 	return block;
549 }
550 
551 
552 /**	Returns the writable block data for the requested blockNumber.
553  *	If \a cleared is true, the block is not read from disk; an empty block
554  *	is returned.
555  *	This is the only method to insert a block into a transaction. It makes
556  *	sure that the previous block contents are preserved in that case.
557  */
558 
559 static void *
560 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base, off_t length,
561 	int32 transactionID, bool cleared)
562 {
563 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n", blockNumber, transactionID));
564 
565 	bool allocated;
566 	cached_block *block = get_cached_block(cache, blockNumber, allocated, !cleared);
567 	if (block == NULL)
568 		return NULL;
569 
570 	// if there is no transaction support, we just return the current block
571 	if (transactionID == -1) {
572 		if (cleared)
573 			memset(block->current_data, 0, cache->block_size);
574 
575 		block->is_dirty = true;
576 			// mark the block as dirty
577 
578 		return block->current_data;
579 	}
580 
581 	if (block->transaction != NULL && block->transaction->id != transactionID) {
582 		// ToDo: we have to wait here until the other transaction is done.
583 		//	Maybe we should even panic, since we can't prevent any deadlocks.
584 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
585 		put_cached_block(cache, block);
586 		return NULL;
587 	}
588 	if (block->transaction == NULL && transactionID != -1) {
589 		// get new transaction
590 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
591 		if (transaction == NULL) {
592 			panic("get_writable_cached_block(): invalid transaction %ld!\n", transactionID);
593 			put_cached_block(cache, block);
594 			return NULL;
595 		}
596 		if (!transaction->open) {
597 			panic("get_writable_cached_block(): transaction already done!\n");
598 			put_cached_block(cache, block);
599 			return NULL;
600 		}
601 
602 		block->transaction = transaction;
603 
604 		// attach the block to the transaction block list
605 		block->transaction_next = transaction->first_block;
606 		transaction->first_block = block;
607 		transaction->num_blocks++;
608 	}
609 
610 	if (!(allocated && cleared) && block->original_data == NULL) {
611 		// we already have data, so we need to preserve it
612 		block->original_data = cache->Allocate();
613 		if (block->original_data == NULL) {
614 			FATAL(("could not allocate original_data\n"));
615 			put_cached_block(cache, block);
616 			return NULL;
617 		}
618 
619 		memcpy(block->original_data, block->current_data, cache->block_size);
620 	}
621 	if (block->parent_data == block->current_data) {
622 		// remember any previous contents for the parent transaction
623 		block->parent_data = cache->Allocate();
624 		if (block->parent_data == NULL) {
625 			// ToDo: maybe we should just continue the current transaction in this case...
626 			FATAL(("could not allocate parent\n"));
627 			put_cached_block(cache, block);
628 			return NULL;
629 		}
630 
631 		memcpy(block->parent_data, block->current_data, cache->block_size);
632 		block->transaction->sub_num_blocks++;
633 	}
634 
635 	if (cleared)
636 		memset(block->current_data, 0, cache->block_size);
637 
638 	block->is_dirty = true;
639 
640 	return block->current_data;
641 }
642 
643 
644 static status_t
645 write_cached_block(block_cache *cache, cached_block *block, bool deleteTransaction)
646 {
647 	cache_transaction *previous = block->previous_transaction;
648 	int32 blockSize = cache->block_size;
649 
650 	void *data = previous && block->original_data ? block->original_data : block->current_data;
651 		// we first need to write back changes from previous transactions
652 
653 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
654 
655 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize, data, blockSize);
656 
657 	if (written < blockSize) {
658 		FATAL(("could not write back block %Ld (%s)\n", block->block_number, strerror(errno)));
659 		return B_IO_ERROR;
660 	}
661 
662 	if (data == block->current_data)
663 		block->is_dirty = false;
664 
665 	if (previous != NULL) {
666 		previous->blocks.Remove(block);
667 		block->previous_transaction = NULL;
668 
669 		// Has the previous transation been finished with that write?
670 		if (--previous->num_blocks == 0) {
671 			TRACE(("cache transaction %ld finished!\n", previous->id));
672 
673 			if (previous->notification_hook != NULL)
674 				previous->notification_hook(previous->id, previous->notification_data);
675 
676 			if (deleteTransaction)
677 				delete_transaction(cache, previous);
678 		}
679 	}
680 
681 	return B_OK;
682 }
683 
684 
685 extern "C" status_t
686 block_cache_init(void)
687 {
688 	return init_block_allocator();
689 }
690 
691 
692 //	#pragma mark - public transaction API
693 
694 
695 extern "C" int32
696 cache_start_transaction(void *_cache)
697 {
698 	block_cache *cache = (block_cache *)_cache;
699 	BenaphoreLocker locker(&cache->lock);
700 
701 	if (cache->last_transaction && cache->last_transaction->open)
702 		panic("last transaction (%ld) still open!\n", cache->last_transaction->id);
703 
704 	cache_transaction *transaction = new cache_transaction;
705 	if (transaction == NULL)
706 		return B_NO_MEMORY;
707 
708 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
709 	cache->last_transaction = transaction;
710 
711 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
712 
713 	hash_insert(cache->transaction_hash, transaction);
714 
715 	return transaction->id;
716 }
717 
718 
719 extern "C" status_t
720 cache_sync_transaction(void *_cache, int32 id)
721 {
722 	block_cache *cache = (block_cache *)_cache;
723 	BenaphoreLocker locker(&cache->lock);
724 	status_t status = B_ENTRY_NOT_FOUND;
725 
726 	hash_iterator iterator;
727 	hash_open(cache->transaction_hash, &iterator);
728 
729 	cache_transaction *transaction;
730 	while ((transaction = (cache_transaction *)hash_next(cache->transaction_hash, &iterator)) != NULL) {
731 		// ToDo: fix hash interface to make this easier
732 
733 		if (transaction->id <= id && !transaction->open) {
734 			while (transaction->num_blocks > 0) {
735 				status = write_cached_block(cache, transaction->blocks.Head(), false);
736 				if (status != B_OK)
737 					return status;
738 			}
739 			delete_transaction(cache, transaction);
740 			hash_rewind(cache->transaction_hash, &iterator);
741 		}
742 	}
743 
744 	hash_close(cache->transaction_hash, &iterator, false);
745 	return B_OK;
746 }
747 
748 
749 extern "C" status_t
750 cache_end_transaction(void *_cache, int32 id, transaction_notification_hook hook, void *data)
751 {
752 	block_cache *cache = (block_cache *)_cache;
753 	BenaphoreLocker locker(&cache->lock);
754 
755 	TRACE(("cache_end_transaction(id = %ld)\n", id));
756 
757 	cache_transaction *transaction = lookup_transaction(cache, id);
758 	if (transaction == NULL) {
759 		panic("cache_end_transaction(): invalid transaction ID\n");
760 		return B_BAD_VALUE;
761 	}
762 
763 	transaction->notification_hook = hook;
764 	transaction->notification_data = data;
765 
766 	// iterate through all blocks and free the unchanged original contents
767 
768 	cached_block *block = transaction->first_block, *next;
769 	for (; block != NULL; block = next) {
770 		next = block->transaction_next;
771 
772 		if (block->previous_transaction != NULL) {
773 			// need to write back pending changes
774 			write_cached_block(cache, block);
775 		}
776 
777 		if (block->original_data != NULL) {
778 			cache->Free(block->original_data);
779 			block->original_data = NULL;
780 		}
781 		if (transaction->has_sub_transaction) {
782 			if (block->parent_data != block->current_data)
783 				cache->Free(block->parent_data);
784 			block->parent_data = NULL;
785 		}
786 
787 		// move the block to the previous transaction list
788 		transaction->blocks.Add(block);
789 
790 		block->previous_transaction = transaction;
791 		block->transaction_next = NULL;
792 		block->transaction = NULL;
793 	}
794 
795 	transaction->open = false;
796 
797 	return B_OK;
798 }
799 
800 
801 extern "C" status_t
802 cache_abort_transaction(void *_cache, int32 id)
803 {
804 	block_cache *cache = (block_cache *)_cache;
805 	BenaphoreLocker locker(&cache->lock);
806 
807 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
808 
809 	cache_transaction *transaction = lookup_transaction(cache, id);
810 	if (transaction == NULL) {
811 		panic("cache_abort_transaction(): invalid transaction ID\n");
812 		return B_BAD_VALUE;
813 	}
814 
815 	// iterate through all blocks and restore their original contents
816 
817 	cached_block *block = transaction->first_block, *next;
818 	for (; block != NULL; block = next) {
819 		next = block->transaction_next;
820 
821 		if (block->original_data != NULL) {
822 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
823 				transaction->id, block->block_number));
824 			memcpy(block->current_data, block->original_data, cache->block_size);
825 			cache->Free(block->original_data);
826 			block->original_data = NULL;
827 		}
828 		if (transaction->has_sub_transaction) {
829 			if (block->parent_data != block->current_data)
830 				cache->Free(block->parent_data);
831 			block->parent_data = NULL;
832 		}
833 
834 		block->transaction_next = NULL;
835 		block->transaction = NULL;
836 	}
837 
838 	delete_transaction(cache, transaction);
839 	return B_OK;
840 }
841 
842 
843 /**	Acknowledges the current parent transaction, and starts a new transaction
844  *	from its sub transaction.
845  *	The new transaction also gets a new transaction ID.
846  */
847 
848 extern "C" int32
849 cache_detach_sub_transaction(void *_cache, int32 id,
850 	transaction_notification_hook hook, void *data)
851 {
852 	block_cache *cache = (block_cache *)_cache;
853 	BenaphoreLocker locker(&cache->lock);
854 
855 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
856 
857 	cache_transaction *transaction = lookup_transaction(cache, id);
858 	if (transaction == NULL) {
859 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
860 		return B_BAD_VALUE;
861 	}
862 	if (!transaction->has_sub_transaction)
863 		return B_BAD_VALUE;
864 
865 	// create a new transaction for the sub transaction
866 	cache_transaction *newTransaction = new cache_transaction;
867 	if (transaction == NULL)
868 		return B_NO_MEMORY;
869 
870 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
871 
872 	transaction->notification_hook = hook;
873 	transaction->notification_data = data;
874 
875 	// iterate through all blocks and free the unchanged original contents
876 
877 	cached_block *block = transaction->first_block, *next, *last = NULL;
878 	for (; block != NULL; block = next) {
879 		next = block->transaction_next;
880 
881 		if (block->previous_transaction != NULL) {
882 			// need to write back pending changes
883 			write_cached_block(cache, block);
884 		}
885 
886 		if (block->original_data != NULL && block->parent_data != NULL
887 			&& block->parent_data != block->current_data) {
888 			// free the original data if the parent data of the transaction
889 			// will be made current - but keep them otherwise
890 			cache->Free(block->original_data);
891 			block->original_data = NULL;
892 		}
893 		if (block->parent_data != NULL && block->parent_data != block->current_data) {
894 			// we need to move this block over to the new transaction
895 			block->original_data = block->parent_data;
896 			if (last == NULL)
897 				newTransaction->first_block = block;
898 			else
899 				last->transaction_next = block;
900 
901 			last = block;
902 		}
903 		block->parent_data = NULL;
904 
905 		// move the block to the previous transaction list
906 		transaction->blocks.Add(block);
907 
908 		block->previous_transaction = transaction;
909 		block->transaction_next = NULL;
910 		block->transaction = newTransaction;
911 	}
912 
913 	transaction->open = false;
914 
915 	hash_insert(cache->transaction_hash, newTransaction);
916 	cache->last_transaction = newTransaction;
917 
918 	return B_OK;
919 }
920 
921 
922 extern "C" status_t
923 cache_abort_sub_transaction(void *_cache, int32 id)
924 {
925 	block_cache *cache = (block_cache *)_cache;
926 	BenaphoreLocker locker(&cache->lock);
927 
928 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
929 
930 	cache_transaction *transaction = lookup_transaction(cache, id);
931 	if (transaction == NULL) {
932 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
933 		return B_BAD_VALUE;
934 	}
935 	if (!transaction->has_sub_transaction)
936 		return B_BAD_VALUE;
937 
938 	// revert all changes back to the version of the parent
939 
940 	cached_block *block = transaction->first_block, *next;
941 	for (; block != NULL; block = next) {
942 		next = block->transaction_next;
943 
944 		if (block->parent_data == NULL) {
945 			if (block->original_data != NULL) {
946 				// the parent transaction didn't change the block, but the sub
947 				// transaction did - we need to revert from the original data
948 				memcpy(block->current_data, block->original_data, cache->block_size);
949 			}
950 		} else if (block->parent_data != block->current_data) {
951 			// the block has been changed and must be restored
952 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
953 				transaction->id, block->block_number));
954 			memcpy(block->current_data, block->parent_data, cache->block_size);
955 			cache->Free(block->parent_data);
956 		}
957 
958 		block->parent_data = NULL;
959 	}
960 
961 	// all subsequent changes will go into the main transaction
962 	transaction->has_sub_transaction = false;
963 	return B_OK;
964 }
965 
966 
967 extern "C" status_t
968 cache_start_sub_transaction(void *_cache, int32 id)
969 {
970 	block_cache *cache = (block_cache *)_cache;
971 	BenaphoreLocker locker(&cache->lock);
972 
973 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
974 
975 	cache_transaction *transaction = lookup_transaction(cache, id);
976 	if (transaction == NULL) {
977 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
978 		return B_BAD_VALUE;
979 	}
980 
981 	// move all changed blocks up to the parent
982 
983 	cached_block *block = transaction->first_block, *next;
984 	for (; block != NULL; block = next) {
985 		next = block->transaction_next;
986 
987 		if (transaction->has_sub_transaction
988 			&& block->parent_data != NULL
989 			&& block->parent_data != block->current_data) {
990 			// there already is an older sub transaction - we acknowledge
991 			// its changes and move its blocks up to the parent
992 			cache->Free(block->parent_data);
993 		}
994 
995 		// we "allocate" the parent data lazily, that means, we don't copy
996 		// the data (and allocate memory for it) until we need to
997 		block->parent_data = block->current_data;
998 	}
999 
1000 	// all subsequent changes will go into the sub transaction
1001 	transaction->has_sub_transaction = true;
1002 	transaction->sub_num_blocks = 0;
1003 
1004 	return B_OK;
1005 }
1006 
1007 
1008 extern "C" status_t
1009 cache_next_block_in_transaction(void *_cache, int32 id, uint32 *_cookie, off_t *_blockNumber,
1010 	void **_data, void **_unchangedData)
1011 {
1012 	cached_block *block = (cached_block *)*_cookie;
1013 	block_cache *cache = (block_cache *)_cache;
1014 
1015 	BenaphoreLocker locker(&cache->lock);
1016 
1017 	cache_transaction *transaction = lookup_transaction(cache, id);
1018 	if (transaction == NULL)
1019 		return B_BAD_VALUE;
1020 
1021 	if (block == NULL)
1022 		block = transaction->first_block;
1023 	else
1024 		block = block->transaction_next;
1025 
1026 	if (block == NULL)
1027 		return B_ENTRY_NOT_FOUND;
1028 
1029 	if (_blockNumber)
1030 		*_blockNumber = block->block_number;
1031 	if (_data)
1032 		*_data = block->current_data;
1033 	if (_unchangedData)
1034 		*_unchangedData = block->original_data;
1035 
1036 	*_cookie = (uint32)block;
1037 	return B_OK;
1038 }
1039 
1040 
1041 extern "C" int32
1042 cache_blocks_in_transaction(void *_cache, int32 id)
1043 {
1044 	block_cache *cache = (block_cache *)_cache;
1045 	BenaphoreLocker locker(&cache->lock);
1046 
1047 	cache_transaction *transaction = lookup_transaction(cache, id);
1048 	if (transaction == NULL)
1049 		return B_BAD_VALUE;
1050 
1051 	return transaction->num_blocks;
1052 }
1053 
1054 
1055 extern "C" int32
1056 cache_blocks_in_sub_transaction(void *_cache, int32 id)
1057 {
1058 	block_cache *cache = (block_cache *)_cache;
1059 	BenaphoreLocker locker(&cache->lock);
1060 
1061 	cache_transaction *transaction = lookup_transaction(cache, id);
1062 	if (transaction == NULL)
1063 		return B_BAD_VALUE;
1064 
1065 	return transaction->sub_num_blocks;
1066 }
1067 
1068 
1069 //	#pragma mark - public block cache API
1070 //	public interface
1071 
1072 
1073 extern "C" void
1074 block_cache_delete(void *_cache, bool allowWrites)
1075 {
1076 	block_cache *cache = (block_cache *)_cache;
1077 
1078 	if (allowWrites)
1079 		block_cache_sync(cache);
1080 
1081 	BenaphoreLocker locker(&cache->lock);
1082 
1083 	// free all blocks
1084 
1085 	uint32 cookie = 0;
1086 	cached_block *block;
1087 	while ((block = (cached_block *)hash_remove_first(cache->hash, &cookie)) != NULL) {
1088 		cache->FreeBlock(block);
1089 	}
1090 
1091 	// free all transactions (they will all be aborted)
1092 
1093 	cookie = 0;
1094 	cache_transaction *transaction;
1095 	while ((transaction = (cache_transaction *)hash_remove_first(cache->transaction_hash, &cookie)) != NULL) {
1096 		delete transaction;
1097 	}
1098 
1099 	delete cache;
1100 }
1101 
1102 
1103 extern "C" void *
1104 block_cache_create(int fd, off_t numBlocks, size_t blockSize)
1105 {
1106 	block_cache *cache = new block_cache(fd, numBlocks, blockSize);
1107 	if (cache == NULL)
1108 		return NULL;
1109 
1110 	if (cache->InitCheck() != B_OK) {
1111 		delete cache;
1112 		return NULL;
1113 	}
1114 
1115 	return cache;
1116 }
1117 
1118 
1119 extern "C" status_t
1120 block_cache_sync(void *_cache)
1121 {
1122 	block_cache *cache = (block_cache *)_cache;
1123 
1124 	// we will sync all dirty blocks to disk that have a completed
1125 	// transaction or no transaction only
1126 
1127 	BenaphoreLocker locker(&cache->lock);
1128 	hash_iterator iterator;
1129 	hash_open(cache->hash, &iterator);
1130 
1131 	cached_block *block;
1132 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1133 		if (block->previous_transaction != NULL
1134 			|| (block->transaction == NULL && block->is_dirty)) {
1135 			status_t status = write_cached_block(cache, block);
1136 			if (status != B_OK)
1137 				return status;
1138 		}
1139 	}
1140 
1141 	hash_close(cache->hash, &iterator, false);
1142 	return B_OK;
1143 }
1144 
1145 
1146 extern "C" status_t
1147 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
1148 {
1149 	block_cache *cache = (block_cache *)_cache;
1150 	BenaphoreLocker locker(&cache->lock);
1151 
1152 	// ToDo: this can be done better!
1153 	void *block = get_writable_cached_block(cache, blockNumber,
1154 		blockNumber, 1, transaction, false);
1155 	if (block != NULL) {
1156 		put_cached_block((block_cache *)_cache, blockNumber);
1157 		return B_OK;
1158 	}
1159 
1160 	return B_ERROR;
1161 }
1162 
1163 
1164 extern "C" void *
1165 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base,
1166 	off_t length, int32 transaction)
1167 {
1168 	block_cache *cache = (block_cache *)_cache;
1169 	BenaphoreLocker locker(&cache->lock);
1170 
1171 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1172 		blockNumber, transaction));
1173 
1174 	return get_writable_cached_block(cache, blockNumber, base, length,
1175 		transaction, false);
1176 }
1177 
1178 
1179 extern "C" void *
1180 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
1181 {
1182 	return block_cache_get_writable_etc(_cache, blockNumber,
1183 		blockNumber, 1, transaction);
1184 }
1185 
1186 
1187 extern "C" void *
1188 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
1189 {
1190 	block_cache *cache = (block_cache *)_cache;
1191 	BenaphoreLocker locker(&cache->lock);
1192 
1193 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1194 		blockNumber, transaction));
1195 
1196 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1197 				blockNumber, 1, transaction, true);
1198 }
1199 
1200 
1201 extern "C" const void *
1202 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
1203 {
1204 	block_cache *cache = (block_cache *)_cache;
1205 	BenaphoreLocker locker(&cache->lock);
1206 	bool allocated;
1207 
1208 	cached_block *block = get_cached_block(cache, blockNumber, allocated);
1209 	if (block == NULL)
1210 		return NULL;
1211 
1212 #ifdef DEBUG_CHANGED
1213 	if (block->compare == NULL)
1214 		block->compare = cache->Allocate();
1215 	if (block->compare != NULL)
1216 		memcpy(block->compare, block->current_data, cache->block_size);
1217 #endif
1218 	return block->current_data;
1219 }
1220 
1221 
1222 extern "C" const void *
1223 block_cache_get(void *_cache, off_t blockNumber)
1224 {
1225 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1226 }
1227 
1228 
1229 extern "C" status_t
1230 block_cache_set_dirty(void *_cache, off_t blockNumber, bool isDirty, int32 transaction)
1231 {
1232 	// not yet implemented
1233 	// Note, you must only use this function on blocks that were acquired writable!
1234 	if (isDirty)
1235 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
1236 
1237 	return B_OK;
1238 }
1239 
1240 
1241 extern "C" void
1242 block_cache_put(void *_cache, off_t blockNumber)
1243 {
1244 	block_cache *cache = (block_cache *)_cache;
1245 	BenaphoreLocker locker(&cache->lock);
1246 
1247 	put_cached_block(cache, blockNumber);
1248 }
1249 
1250