xref: /haiku/src/tools/fs_shell/block_cache.cpp (revision 4b3b81da9e459443d75329cfd08bc9a57ad02653)
1 /*
2  * Copyright 2004-2008, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 #include "block_cache_priv.h"
7 
8 #include <new>
9 
10 #include <stdlib.h>
11 
12 #include "fssh_atomic.h"
13 #include "fssh_errno.h"
14 #include "fssh_fs_cache.h"
15 #include "fssh_kernel_export.h"
16 #include "fssh_string.h"
17 #include "fssh_unistd.h"
18 #include "hash.h"
19 #include "vfs.h"
20 
21 // TODO: this is a naive but growing implementation to test the API:
22 //	1) block reading/writing is not at all optimized for speed, it will
23 //	   just read and write single blocks.
24 //	2) the locking could be improved; getting a block should not need to
25 //	   wait for blocks to be written
26 //	3) dirty blocks are only written back if asked for
27 // TODO: the retrieval/copy of the original data could be delayed until the
28 //		new data must be written, ie. in low memory situations.
29 
30 //#define TRACE_BLOCK_CACHE
31 #ifdef TRACE_BLOCK_CACHE
32 #	define TRACE(x)	fssh_dprintf x
33 #else
34 #	define TRACE(x) ;
35 #endif
36 
37 using std::nothrow;
38 
39 // This macro is used for fatal situations that are acceptable in a running
40 // system, like out of memory situations - should only panic for debugging.
41 #define FATAL(x) fssh_panic x
42 
43 
44 namespace FSShell {
45 
46 static const int32_t kMaxBlockCount = 1024;
47 
48 struct cache_hook : DoublyLinkedListLinkImpl<cache_hook> {
49 	fssh_transaction_notification_hook	hook;
50 	void								*data;
51 };
52 
53 typedef DoublyLinkedList<cache_hook> HookList;
54 
55 struct cache_transaction {
56 	cache_transaction();
57 
58 	cache_transaction *next;
59 	int32_t			id;
60 	int32_t			num_blocks;
61 	int32_t			main_num_blocks;
62 	int32_t			sub_num_blocks;
63 	cached_block	*first_block;
64 	block_list		blocks;
65 	fssh_transaction_notification_hook notification_hook;
66 	void			*notification_data;
67 	HookList		listeners;
68 	bool			open;
69 	bool			has_sub_transaction;
70 };
71 
72 static fssh_status_t write_cached_block(block_cache *cache, cached_block *block,
73 	bool deleteTransaction = true);
74 
75 
76 //	#pragma mark - private transaction
77 
78 
79 cache_transaction::cache_transaction()
80 {
81 	num_blocks = 0;
82 	main_num_blocks = 0;
83 	sub_num_blocks = 0;
84 	first_block = NULL;
85 	notification_hook = NULL;
86 	notification_data = NULL;
87 	open = true;
88 }
89 
90 
91 static int
92 transaction_compare(void *_transaction, const void *_id)
93 {
94 	cache_transaction *transaction = (cache_transaction *)_transaction;
95 	const int32_t *id = (const int32_t *)_id;
96 
97 	return transaction->id - *id;
98 }
99 
100 
101 static uint32_t
102 transaction_hash(void *_transaction, const void *_id, uint32_t range)
103 {
104 	cache_transaction *transaction = (cache_transaction *)_transaction;
105 	const int32_t *id = (const int32_t *)_id;
106 
107 	if (transaction != NULL)
108 		return transaction->id % range;
109 
110 	return (uint32_t)*id % range;
111 }
112 
113 
114 /*!	Notifies all listeners of this transaction, and removes them
115 	afterwards.
116 */
117 static void
118 notify_transaction_listeners(cache_transaction *transaction, int32_t event)
119 {
120 	HookList::Iterator iterator = transaction->listeners.GetIterator();
121 	while (iterator.HasNext()) {
122 		cache_hook *hook = iterator.Next();
123 
124 		hook->hook(transaction->id, event, hook->data);
125 
126 		iterator.Remove();
127 		delete hook;
128 	}
129 }
130 
131 
132 static void
133 delete_transaction(block_cache *cache, cache_transaction *transaction)
134 {
135 	if (cache->last_transaction == transaction)
136 		cache->last_transaction = NULL;
137 
138 	delete transaction;
139 }
140 
141 
142 static cache_transaction *
143 lookup_transaction(block_cache *cache, int32_t id)
144 {
145 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
146 }
147 
148 
149 //	#pragma mark - cached_block
150 
151 
152 /* static */
153 int
154 cached_block::Compare(void *_cacheEntry, const void *_block)
155 {
156 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
157 	const fssh_off_t *block = (const fssh_off_t *)_block;
158 
159 	return cacheEntry->block_number - *block;
160 }
161 
162 
163 
164 /* static */
165 uint32_t
166 cached_block::Hash(void *_cacheEntry, const void *_block, uint32_t range)
167 {
168 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
169 	const fssh_off_t *block = (const fssh_off_t *)_block;
170 
171 	if (cacheEntry != NULL)
172 		return cacheEntry->block_number % range;
173 
174 	return (uint64_t)*block % range;
175 }
176 
177 
178 //	#pragma mark - block_cache
179 
180 
181 block_cache::block_cache(int _fd, fssh_off_t numBlocks, fssh_size_t blockSize,
182 	bool readOnly)
183 	:
184 	hash(NULL),
185 	fd(_fd),
186 	max_blocks(numBlocks),
187 	block_size(blockSize),
188 	allocated_block_count(0),
189 	next_transaction_id(1),
190 	last_transaction(NULL),
191 	transaction_hash(NULL),
192 	read_only(readOnly)
193 {
194 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
195 	if (hash == NULL)
196 		return;
197 
198 	transaction_hash = hash_init(16, 0, &transaction_compare,
199 		&FSShell::transaction_hash);
200 	if (transaction_hash == NULL)
201 		return;
202 
203 	if (benaphore_init(&lock, "block cache") < FSSH_B_OK)
204 		return;
205 }
206 
207 
208 block_cache::~block_cache()
209 {
210 	benaphore_destroy(&lock);
211 
212 	hash_uninit(transaction_hash);
213 	hash_uninit(hash);
214 }
215 
216 
217 fssh_status_t
218 block_cache::InitCheck()
219 {
220 	if (lock.sem < FSSH_B_OK)
221 		return lock.sem;
222 
223 	if (hash == NULL || transaction_hash == NULL)
224 		return FSSH_B_NO_MEMORY;
225 
226 	return FSSH_B_OK;
227 }
228 
229 
230 void
231 block_cache::Free(void *address)
232 {
233 	if (address == NULL)
234 		return;
235 
236 	free(address);
237 }
238 
239 
240 void *
241 block_cache::Allocate()
242 {
243 	return malloc(block_size);
244 }
245 
246 
247 void
248 block_cache::FreeBlock(cached_block *block)
249 {
250 	Free(block->current_data);
251 
252 	if (block->original_data != NULL || block->parent_data != NULL) {
253 		fssh_panic("block_cache::FreeBlock(): %Ld, original %p, parent %p\n",
254 			block->block_number, block->original_data, block->parent_data);
255 	}
256 
257 #ifdef DEBUG_CHANGED
258 	Free(block->compare);
259 #endif
260 
261 	delete block;
262 }
263 
264 
265 /*! Allocates a new block for \a blockNumber, ready for use */
266 cached_block *
267 block_cache::NewBlock(fssh_off_t blockNumber)
268 {
269 	cached_block *block = new(nothrow) cached_block;
270 	if (block == NULL) {
271 		FATAL(("could not allocate block!\n"));
272 		return NULL;
273 	}
274 
275 	// if we hit the limit of blocks to cache¸ try to free one or more
276 	if (allocated_block_count >= kMaxBlockCount) {
277 		RemoveUnusedBlocks(LONG_MAX,
278 			allocated_block_count - kMaxBlockCount + 1);
279 	}
280 
281 	block->current_data = Allocate();
282 	if (!block->current_data) {
283 		FATAL(("could not allocate block data!\n"));
284 		delete block;
285 		return NULL;
286 	}
287 
288 	block->block_number = blockNumber;
289 	block->ref_count = 0;
290 	block->accessed = 0;
291 	block->transaction_next = NULL;
292 	block->transaction = block->previous_transaction = NULL;
293 	block->original_data = NULL;
294 	block->parent_data = NULL;
295 	block->is_dirty = false;
296 	block->unused = false;
297 #ifdef DEBUG_CHANGED
298 	block->compare = NULL;
299 #endif
300 
301 	allocated_block_count++;
302 
303 	return block;
304 }
305 
306 
307 void
308 block_cache::RemoveUnusedBlocks(int32_t maxAccessed, int32_t count)
309 {
310 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
311 
312 	for (block_list::Iterator it = unused_blocks.GetIterator();
313 		 cached_block *block = it.Next();) {
314 
315 		if (maxAccessed < block->accessed)
316 			continue;
317 
318 		TRACE(("  remove block %Ld, accessed %ld times\n",
319 			block->block_number, block->accessed));
320 
321 		// this can only happen if no transactions are used
322 		if (block->is_dirty)
323 			write_cached_block(this, block, false);
324 
325 		// remove block from lists
326 		it.Remove();
327 		hash_remove(hash, block);
328 
329 		FreeBlock(block);
330 
331 		if (--count <= 0)
332 			break;
333 	}
334 }
335 
336 
337 //	#pragma mark - private block functions
338 
339 
340 static void
341 put_cached_block(block_cache *cache, cached_block *block)
342 {
343 #ifdef DEBUG_CHANGED
344 	if (!block->is_dirty && block->compare != NULL
345 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
346 		fssh_dprintf("new block:\n");
347 		fssh_dump_block((const char *)block->current_data, 256, "  ");
348 		fssh_dprintf("unchanged block:\n");
349 		fssh_dump_block((const char *)block->compare, 256, "  ");
350 		write_cached_block(cache, block);
351 		fssh_panic("block_cache: supposed to be clean block was changed!\n");
352 
353 		cache->Free(block->compare);
354 		block->compare = NULL;
355 	}
356 #endif
357 
358 	if (--block->ref_count == 0
359 		&& block->transaction == NULL
360 		&& block->previous_transaction == NULL) {
361 		// put this block in the list of unused blocks
362 		block->unused = true;
363 		cache->unused_blocks.Add(block);
364 	}
365 
366 	if (cache->allocated_block_count > kMaxBlockCount) {
367 		cache->RemoveUnusedBlocks(LONG_MAX,
368 			cache->allocated_block_count - kMaxBlockCount);
369 	}
370 }
371 
372 
373 static void
374 put_cached_block(block_cache *cache, fssh_off_t blockNumber)
375 {
376 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
377 		fssh_panic("put_cached_block: invalid block number %lld (max %lld)",
378 			blockNumber, cache->max_blocks - 1);
379 	}
380 
381 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
382 	if (block != NULL)
383 		put_cached_block(cache, block);
384 }
385 
386 
387 /*!
388 	Retrieves the block \a blockNumber from the hash table, if it's already
389 	there, or reads it from the disk.
390 
391 	\param _allocated tells you wether or not a new block has been allocated
392 		to satisfy your request.
393 	\param readBlock if \c false, the block will not be read in case it was
394 		not already in the cache. The block you retrieve may contain random
395 		data.
396 */
397 static cached_block *
398 get_cached_block(block_cache *cache, fssh_off_t blockNumber, bool *_allocated,
399 	bool readBlock = true)
400 {
401 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
402 		fssh_panic("get_cached_block: invalid block number %lld (max %lld)",
403 			blockNumber, cache->max_blocks - 1);
404 		return NULL;
405 	}
406 
407 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
408 		&blockNumber);
409 	*_allocated = false;
410 
411 	if (block == NULL) {
412 		// read block into cache
413 		block = cache->NewBlock(blockNumber);
414 		if (block == NULL)
415 			return NULL;
416 
417 		hash_insert(cache->hash, block);
418 		*_allocated = true;
419 	}
420 
421 	if (*_allocated && readBlock) {
422 		int32_t blockSize = cache->block_size;
423 
424 		if (fssh_read_pos(cache->fd, blockNumber * blockSize, block->current_data,
425 				blockSize) < blockSize) {
426 			hash_remove(cache->hash, block);
427 			cache->FreeBlock(block);
428 			FATAL(("could not read block %Ld\n", blockNumber));
429 			return NULL;
430 		}
431 	}
432 
433 	if (block->unused) {
434 		//TRACE(("remove block %Ld from unused\n", blockNumber));
435 		block->unused = false;
436 		cache->unused_blocks.Remove(block);
437 	}
438 
439 	block->ref_count++;
440 	block->accessed++;
441 
442 	return block;
443 }
444 
445 
446 /*!
447 	Returns the writable block data for the requested blockNumber.
448 	If \a cleared is true, the block is not read from disk; an empty block
449 	is returned.
450 
451 	This is the only method to insert a block into a transaction. It makes
452 	sure that the previous block contents are preserved in that case.
453 */
454 static void *
455 get_writable_cached_block(block_cache *cache, fssh_off_t blockNumber, fssh_off_t base,
456 	fssh_off_t length, int32_t transactionID, bool cleared)
457 {
458 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %d)\n",
459 		blockNumber, transactionID));
460 
461 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
462 		fssh_panic("get_writable_cached_block: invalid block number %lld (max %lld)",
463 			blockNumber, cache->max_blocks - 1);
464 	}
465 
466 	bool allocated;
467 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
468 		!cleared);
469 	if (block == NULL)
470 		return NULL;
471 
472 	// if there is no transaction support, we just return the current block
473 	if (transactionID == -1) {
474 		if (cleared)
475 			fssh_memset(block->current_data, 0, cache->block_size);
476 
477 		block->is_dirty = true;
478 			// mark the block as dirty
479 
480 		return block->current_data;
481 	}
482 
483 	cache_transaction *transaction = block->transaction;
484 
485 	if (transaction != NULL && transaction->id != transactionID) {
486 		// ToDo: we have to wait here until the other transaction is done.
487 		//	Maybe we should even panic, since we can't prevent any deadlocks.
488 		fssh_panic("get_writable_cached_block(): asked to get busy writable block (transaction %d)\n", (int)transaction->id);
489 		put_cached_block(cache, block);
490 		return NULL;
491 	}
492 	if (transaction == NULL && transactionID != -1) {
493 		// get new transaction
494 		transaction = lookup_transaction(cache, transactionID);
495 		if (transaction == NULL) {
496 			fssh_panic("get_writable_cached_block(): invalid transaction %d!\n",
497 				(int)transactionID);
498 			put_cached_block(cache, block);
499 			return NULL;
500 		}
501 		if (!transaction->open) {
502 			fssh_panic("get_writable_cached_block(): transaction already done!\n");
503 			put_cached_block(cache, block);
504 			return NULL;
505 		}
506 
507 		block->transaction = transaction;
508 
509 		// attach the block to the transaction block list
510 		block->transaction_next = transaction->first_block;
511 		transaction->first_block = block;
512 		transaction->num_blocks++;
513 	}
514 
515 	bool wasUnchanged = block->original_data == NULL
516 		|| block->previous_transaction != NULL;
517 
518 	if (!(allocated && cleared) && block->original_data == NULL) {
519 		// we already have data, so we need to preserve it
520 		block->original_data = cache->Allocate();
521 		if (block->original_data == NULL) {
522 			FATAL(("could not allocate original_data\n"));
523 			put_cached_block(cache, block);
524 			return NULL;
525 		}
526 
527 		fssh_memcpy(block->original_data, block->current_data, cache->block_size);
528 	}
529 	if (block->parent_data == block->current_data) {
530 		// remember any previous contents for the parent transaction
531 		block->parent_data = cache->Allocate();
532 		if (block->parent_data == NULL) {
533 			// TODO: maybe we should just continue the current transaction in this case...
534 			FATAL(("could not allocate parent\n"));
535 			put_cached_block(cache, block);
536 			return NULL;
537 		}
538 
539 		fssh_memcpy(block->parent_data, block->current_data, cache->block_size);
540 		transaction->sub_num_blocks++;
541 	} else if (transaction != NULL && transaction->has_sub_transaction
542 		&& block->parent_data == NULL && wasUnchanged)
543 		transaction->sub_num_blocks++;
544 
545 	if (cleared)
546 		fssh_memset(block->current_data, 0, cache->block_size);
547 
548 	block->is_dirty = true;
549 
550 	return block->current_data;
551 }
552 
553 
554 static fssh_status_t
555 write_cached_block(block_cache *cache, cached_block *block,
556 	bool deleteTransaction)
557 {
558 	cache_transaction *previous = block->previous_transaction;
559 	int32_t blockSize = cache->block_size;
560 
561 	void *data = previous && block->original_data
562 		? block->original_data : block->current_data;
563 		// we first need to write back changes from previous transactions
564 
565 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
566 
567 	fssh_ssize_t written = fssh_write_pos(cache->fd, block->block_number * blockSize,
568 		data, blockSize);
569 
570 	if (written < blockSize) {
571 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
572 			fssh_strerror(fssh_get_errno())));
573 		return FSSH_B_IO_ERROR;
574 	}
575 
576 	if (data == block->current_data)
577 		block->is_dirty = false;
578 
579 	if (previous != NULL) {
580 		previous->blocks.Remove(block);
581 		block->previous_transaction = NULL;
582 
583 		if (block->original_data != NULL && block->transaction == NULL) {
584 			// This block is not part of a transaction, so it does not need
585 			// its original pointer anymore.
586 			cache->Free(block->original_data);
587 			block->original_data = NULL;
588 		}
589 
590 		// Has the previous transation been finished with that write?
591 		if (--previous->num_blocks == 0) {
592 			TRACE(("cache transaction %ld finished!\n", previous->id));
593 
594 			if (previous->notification_hook != NULL) {
595 				previous->notification_hook(previous->id,
596 					FSSH_TRANSACTION_WRITTEN, previous->notification_data);
597 			}
598 
599 			if (deleteTransaction) {
600 				hash_remove(cache->transaction_hash, previous);
601 				delete_transaction(cache, previous);
602 			}
603 		}
604 	}
605 
606 	return FSSH_B_OK;
607 }
608 
609 
610 fssh_status_t
611 block_cache_init()
612 {
613 	return FSSH_B_OK;
614 }
615 
616 
617 }	// namespace FSShell
618 
619 
620 //	#pragma mark - public transaction API
621 
622 
623 using namespace FSShell;
624 
625 
626 int32_t
627 fssh_cache_start_transaction(void *_cache)
628 {
629 	block_cache *cache = (block_cache *)_cache;
630 	BenaphoreLocker locker(&cache->lock);
631 
632 	if (cache->last_transaction && cache->last_transaction->open) {
633 		fssh_panic("last transaction (%d) still open!\n",
634 			(int)cache->last_transaction->id);
635 	}
636 
637 	cache_transaction *transaction = new(nothrow) cache_transaction;
638 	if (transaction == NULL)
639 		return FSSH_B_NO_MEMORY;
640 
641 	transaction->id = fssh_atomic_add(&cache->next_transaction_id, 1);
642 	cache->last_transaction = transaction;
643 
644 	TRACE(("cache_start_transaction(): id %d started\n", transaction->id));
645 
646 	hash_insert(cache->transaction_hash, transaction);
647 
648 	return transaction->id;
649 }
650 
651 
652 fssh_status_t
653 fssh_cache_sync_transaction(void *_cache, int32_t id)
654 {
655 	block_cache *cache = (block_cache *)_cache;
656 	BenaphoreLocker locker(&cache->lock);
657 	fssh_status_t status = FSSH_B_ENTRY_NOT_FOUND;
658 
659 	TRACE(("cache_sync_transaction(id %d)\n", id));
660 
661 	hash_iterator iterator;
662 	hash_open(cache->transaction_hash, &iterator);
663 
664 	cache_transaction *transaction;
665 	while ((transaction = (cache_transaction *)hash_next(
666 			cache->transaction_hash, &iterator)) != NULL) {
667 		// close all earlier transactions which haven't been closed yet
668 
669 		if (transaction->id <= id && !transaction->open) {
670 			// write back all of their remaining dirty blocks
671 			while (transaction->num_blocks > 0) {
672 				status = write_cached_block(cache, transaction->blocks.Head(),
673 					false);
674 				if (status != FSSH_B_OK)
675 					return status;
676 			}
677 
678 			hash_remove_current(cache->transaction_hash, &iterator);
679 			delete_transaction(cache, transaction);
680 		}
681 	}
682 
683 	hash_close(cache->transaction_hash, &iterator, false);
684 	return FSSH_B_OK;
685 }
686 
687 
688 fssh_status_t
689 fssh_cache_end_transaction(void *_cache, int32_t id,
690 	fssh_transaction_notification_hook hook, void *data)
691 {
692 	block_cache *cache = (block_cache *)_cache;
693 	BenaphoreLocker locker(&cache->lock);
694 
695 	TRACE(("cache_end_transaction(id = %d)\n", id));
696 
697 	cache_transaction *transaction = lookup_transaction(cache, id);
698 	if (transaction == NULL) {
699 		fssh_panic("cache_end_transaction(): invalid transaction ID\n");
700 		return FSSH_B_BAD_VALUE;
701 	}
702 
703 	transaction->notification_hook = hook;
704 	transaction->notification_data = data;
705 
706 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ENDED);
707 
708 	// iterate through all blocks and free the unchanged original contents
709 
710 	cached_block *block = transaction->first_block, *next;
711 	for (; block != NULL; block = next) {
712 		next = block->transaction_next;
713 
714 		if (block->previous_transaction != NULL) {
715 			// need to write back pending changes
716 			write_cached_block(cache, block);
717 		}
718 
719 		if (block->original_data != NULL) {
720 			cache->Free(block->original_data);
721 			block->original_data = NULL;
722 		}
723 		if (transaction->has_sub_transaction) {
724 			if (block->parent_data != block->current_data)
725 				cache->Free(block->parent_data);
726 			block->parent_data = NULL;
727 		}
728 
729 		// move the block to the previous transaction list
730 		transaction->blocks.Add(block);
731 
732 		block->previous_transaction = transaction;
733 		block->transaction_next = NULL;
734 		block->transaction = NULL;
735 	}
736 
737 	transaction->open = false;
738 
739 	return FSSH_B_OK;
740 }
741 
742 
743 fssh_status_t
744 fssh_cache_abort_transaction(void *_cache, int32_t id)
745 {
746 	block_cache *cache = (block_cache *)_cache;
747 	BenaphoreLocker locker(&cache->lock);
748 
749 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
750 
751 	cache_transaction *transaction = lookup_transaction(cache, id);
752 	if (transaction == NULL) {
753 		fssh_panic("cache_abort_transaction(): invalid transaction ID\n");
754 		return FSSH_B_BAD_VALUE;
755 	}
756 
757 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ABORTED);
758 
759 	// iterate through all blocks and restore their original contents
760 
761 	cached_block *block = transaction->first_block, *next;
762 	for (; block != NULL; block = next) {
763 		next = block->transaction_next;
764 
765 		if (block->original_data != NULL) {
766 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
767 				transaction->id, block->block_number));
768 			fssh_memcpy(block->current_data, block->original_data, cache->block_size);
769 			cache->Free(block->original_data);
770 			block->original_data = NULL;
771 		}
772 		if (transaction->has_sub_transaction) {
773 			if (block->parent_data != block->current_data)
774 				cache->Free(block->parent_data);
775 			block->parent_data = NULL;
776 		}
777 
778 		block->transaction_next = NULL;
779 		block->transaction = NULL;
780 	}
781 
782 	hash_remove(cache->transaction_hash, transaction);
783 	delete_transaction(cache, transaction);
784 	return FSSH_B_OK;
785 }
786 
787 
788 /*!	Acknowledges the current parent transaction, and starts a new transaction
789 	from its sub transaction.
790 	The new transaction also gets a new transaction ID.
791 */
792 int32_t
793 fssh_cache_detach_sub_transaction(void *_cache, int32_t id,
794 	fssh_transaction_notification_hook hook, void *data)
795 {
796 	block_cache *cache = (block_cache *)_cache;
797 	BenaphoreLocker locker(&cache->lock);
798 
799 	TRACE(("cache_detach_sub_transaction(id = %d)\n", id));
800 
801 	cache_transaction *transaction = lookup_transaction(cache, id);
802 	if (transaction == NULL) {
803 		fssh_panic("cache_detach_sub_transaction(): invalid transaction ID\n");
804 		return FSSH_B_BAD_VALUE;
805 	}
806 	if (!transaction->has_sub_transaction)
807 		return FSSH_B_BAD_VALUE;
808 
809 	// create a new transaction for the sub transaction
810 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
811 	if (transaction == NULL)
812 		return FSSH_B_NO_MEMORY;
813 
814 	newTransaction->id = fssh_atomic_add(&cache->next_transaction_id, 1);
815 
816 	transaction->notification_hook = hook;
817 	transaction->notification_data = data;
818 
819 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ENDED);
820 
821 	// iterate through all blocks and free the unchanged original contents
822 
823 	cached_block *block = transaction->first_block, *next, *last = NULL;
824 	for (; block != NULL; block = next) {
825 		next = block->transaction_next;
826 
827 		if (block->previous_transaction != NULL) {
828 			// need to write back pending changes
829 			write_cached_block(cache, block);
830 		}
831 
832 		if (block->original_data != NULL && block->parent_data != NULL
833 			&& block->parent_data != block->current_data) {
834 			// free the original data if the parent data of the transaction
835 			// will be made current - but keep them otherwise
836 			cache->Free(block->original_data);
837 			block->original_data = NULL;
838 		}
839 		if (block->parent_data == NULL
840 			|| block->parent_data != block->current_data) {
841 			// we need to move this block over to the new transaction
842 			block->original_data = block->parent_data;
843 			if (last == NULL)
844 				newTransaction->first_block = block;
845 			else
846 				last->transaction_next = block;
847 
848 			block->transaction = newTransaction;
849 			last = block;
850 		} else
851 			block->transaction = NULL;
852 
853 		if (block->parent_data != NULL) {
854 			// move the block to the previous transaction list
855 			transaction->blocks.Add(block);
856 			block->parent_data = NULL;
857 		}
858 
859 		block->previous_transaction = transaction;
860 		block->transaction_next = NULL;
861 	}
862 
863 	newTransaction->num_blocks = transaction->sub_num_blocks;
864 
865 	transaction->open = false;
866 	transaction->has_sub_transaction = false;
867 	transaction->num_blocks = transaction->main_num_blocks;
868 	transaction->sub_num_blocks = 0;
869 
870 	hash_insert(cache->transaction_hash, newTransaction);
871 	cache->last_transaction = newTransaction;
872 
873 	return newTransaction->id;
874 }
875 
876 
877 fssh_status_t
878 fssh_cache_abort_sub_transaction(void *_cache, int32_t id)
879 {
880 	block_cache *cache = (block_cache *)_cache;
881 	BenaphoreLocker locker(&cache->lock);
882 
883 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
884 
885 	cache_transaction *transaction = lookup_transaction(cache, id);
886 	if (transaction == NULL) {
887 		fssh_panic("cache_abort_sub_transaction(): invalid transaction ID\n");
888 		return FSSH_B_BAD_VALUE;
889 	}
890 	if (!transaction->has_sub_transaction)
891 		return FSSH_B_BAD_VALUE;
892 
893 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ABORTED);
894 
895 	// revert all changes back to the version of the parent
896 
897 	cached_block *block = transaction->first_block, *next;
898 	for (; block != NULL; block = next) {
899 		next = block->transaction_next;
900 
901 		if (block->parent_data == NULL) {
902 			if (block->original_data != NULL) {
903 				// the parent transaction didn't change the block, but the sub
904 				// transaction did - we need to revert from the original data
905 				fssh_memcpy(block->current_data, block->original_data,
906 					cache->block_size);
907 			}
908 		} else if (block->parent_data != block->current_data) {
909 			// the block has been changed and must be restored
910 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
911 				transaction->id, block->block_number));
912 			fssh_memcpy(block->current_data, block->parent_data,
913 				cache->block_size);
914 			cache->Free(block->parent_data);
915 		}
916 
917 		block->parent_data = NULL;
918 	}
919 
920 	// all subsequent changes will go into the main transaction
921 	transaction->has_sub_transaction = false;
922 	transaction->sub_num_blocks = 0;
923 
924 	return FSSH_B_OK;
925 }
926 
927 
928 fssh_status_t
929 fssh_cache_start_sub_transaction(void *_cache, int32_t id)
930 {
931 	block_cache *cache = (block_cache *)_cache;
932 	BenaphoreLocker locker(&cache->lock);
933 
934 	TRACE(("cache_start_sub_transaction(id = %d)\n", id));
935 
936 	cache_transaction *transaction = lookup_transaction(cache, id);
937 	if (transaction == NULL) {
938 		fssh_panic("cache_start_sub_transaction(): invalid transaction ID %d\n", (int)id);
939 		return FSSH_B_BAD_VALUE;
940 	}
941 
942 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ENDED);
943 
944 	// move all changed blocks up to the parent
945 
946 	cached_block *block = transaction->first_block, *next;
947 	for (; block != NULL; block = next) {
948 		next = block->transaction_next;
949 
950 		if (transaction->has_sub_transaction
951 			&& block->parent_data != NULL
952 			&& block->parent_data != block->current_data) {
953 			// there already is an older sub transaction - we acknowledge
954 			// its changes and move its blocks up to the parent
955 			cache->Free(block->parent_data);
956 		}
957 
958 		// we "allocate" the parent data lazily, that means, we don't copy
959 		// the data (and allocate memory for it) until we need to
960 		block->parent_data = block->current_data;
961 	}
962 
963 	// all subsequent changes will go into the sub transaction
964 	transaction->has_sub_transaction = true;
965 	transaction->main_num_blocks = transaction->num_blocks;
966 	transaction->sub_num_blocks = 0;
967 
968 	return FSSH_B_OK;
969 }
970 
971 
972 /*!	Adds a transaction listener that gets notified when the transaction
973 	is ended or aborted.
974 	The listener gets automatically removed in this case.
975 */
976 fssh_status_t
977 fssh_cache_add_transaction_listener(void *_cache, int32_t id, int32_t events,
978 	fssh_transaction_notification_hook hookFunction, void *data)
979 {
980 // TODO: this is currently not used in a critical context in BFS
981 #if 0
982 	block_cache *cache = (block_cache *)_cache;
983 
984 	cache_hook *hook = new(std::nothrow) cache_hook;
985 	if (hook == NULL)
986 		return FSSH_B_NO_MEMORY;
987 
988 	BenaphoreLocker locker(&cache->lock);
989 
990 	cache_transaction *transaction = lookup_transaction(cache, id);
991 	if (transaction == NULL) {
992 		delete hook;
993 		return FSSH_B_BAD_VALUE;
994 	}
995 
996 	hook->hook = hookFunction;
997 	hook->data = data;
998 
999 	transaction->listeners.Add(hook);
1000 #endif
1001 	return FSSH_B_OK;
1002 }
1003 
1004 
1005 fssh_status_t
1006 fssh_cache_remove_transaction_listener(void *_cache, int32_t id,
1007 	fssh_transaction_notification_hook hookFunction, void *data)
1008 {
1009 	block_cache *cache = (block_cache *)_cache;
1010 
1011 	BenaphoreLocker locker(&cache->lock);
1012 
1013 	cache_transaction *transaction = lookup_transaction(cache, id);
1014 	if (transaction == NULL)
1015 		return FSSH_B_BAD_VALUE;
1016 
1017 	HookList::Iterator iterator = transaction->listeners.GetIterator();
1018 	while (iterator.HasNext()) {
1019 		cache_hook *hook = iterator.Next();
1020 		if (hook->data == data && hook->hook == hookFunction) {
1021 			iterator.Remove();
1022 			delete hook;
1023 			return FSSH_B_OK;
1024 		}
1025 	}
1026 
1027 	return FSSH_B_ENTRY_NOT_FOUND;
1028 }
1029 
1030 
1031 fssh_status_t
1032 fssh_cache_next_block_in_transaction(void *_cache, int32_t id, bool mainOnly,
1033 	long *_cookie, fssh_off_t *_blockNumber, void **_data,
1034 	void **_unchangedData)
1035 {
1036 	cached_block *block = (cached_block *)*_cookie;
1037 	block_cache *cache = (block_cache *)_cache;
1038 
1039 	BenaphoreLocker locker(&cache->lock);
1040 
1041 	cache_transaction *transaction = lookup_transaction(cache, id);
1042 	if (transaction == NULL || !transaction->open)
1043 		return FSSH_B_BAD_VALUE;
1044 
1045 	if (block == NULL)
1046 		block = transaction->first_block;
1047 	else
1048 		block = block->transaction_next;
1049 
1050 	if (mainOnly && transaction->has_sub_transaction) {
1051 		// find next block that the parent changed
1052 		while (block != NULL && block->parent_data == NULL)
1053 			block = block->transaction_next;
1054 	}
1055 
1056 	if (block == NULL)
1057 		return FSSH_B_ENTRY_NOT_FOUND;
1058 
1059 	if (_blockNumber)
1060 		*_blockNumber = block->block_number;
1061 	if (_data)
1062 		*_data = mainOnly ? block->parent_data : block->current_data;
1063 	if (_unchangedData)
1064 		*_unchangedData = block->original_data;
1065 
1066 	*_cookie = (fssh_addr_t)block;
1067 	return FSSH_B_OK;
1068 }
1069 
1070 
1071 int32_t
1072 fssh_cache_blocks_in_transaction(void *_cache, int32_t id)
1073 {
1074 	block_cache *cache = (block_cache *)_cache;
1075 	BenaphoreLocker locker(&cache->lock);
1076 
1077 	cache_transaction *transaction = lookup_transaction(cache, id);
1078 	if (transaction == NULL)
1079 		return FSSH_B_BAD_VALUE;
1080 
1081 	return transaction->num_blocks;
1082 }
1083 
1084 
1085 int32_t
1086 fssh_cache_blocks_in_main_transaction(void *_cache, int32_t id)
1087 {
1088 	block_cache *cache = (block_cache *)_cache;
1089 	BenaphoreLocker locker(&cache->lock);
1090 
1091 	cache_transaction *transaction = lookup_transaction(cache, id);
1092 	if (transaction == NULL)
1093 		return FSSH_B_BAD_VALUE;
1094 
1095 	return transaction->main_num_blocks;
1096 }
1097 
1098 
1099 int32_t
1100 fssh_cache_blocks_in_sub_transaction(void *_cache, int32_t id)
1101 {
1102 	block_cache *cache = (block_cache *)_cache;
1103 	BenaphoreLocker locker(&cache->lock);
1104 
1105 	cache_transaction *transaction = lookup_transaction(cache, id);
1106 	if (transaction == NULL)
1107 		return FSSH_B_BAD_VALUE;
1108 
1109 	return transaction->sub_num_blocks;
1110 }
1111 
1112 
1113 //	#pragma mark - public block cache API
1114 //	public interface
1115 
1116 
1117 void
1118 fssh_block_cache_delete(void *_cache, bool allowWrites)
1119 {
1120 	block_cache *cache = (block_cache *)_cache;
1121 
1122 	if (allowWrites)
1123 		fssh_block_cache_sync(cache);
1124 
1125 	BenaphoreLocker locker(&cache->lock);
1126 
1127 	// free all blocks
1128 
1129 	uint32_t cookie = 0;
1130 	cached_block *block;
1131 	while ((block = (cached_block *)hash_remove_first(cache->hash,
1132 			&cookie)) != NULL) {
1133 		cache->FreeBlock(block);
1134 	}
1135 
1136 	// free all transactions (they will all be aborted)
1137 
1138 	cookie = 0;
1139 	cache_transaction *transaction;
1140 	while ((transaction = (cache_transaction *)hash_remove_first(
1141 			cache->transaction_hash, &cookie)) != NULL) {
1142 		delete transaction;
1143 	}
1144 
1145 	delete cache;
1146 }
1147 
1148 
1149 void *
1150 fssh_block_cache_create(int fd, fssh_off_t numBlocks, fssh_size_t blockSize, bool readOnly)
1151 {
1152 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
1153 		readOnly);
1154 	if (cache == NULL)
1155 		return NULL;
1156 
1157 	if (cache->InitCheck() != FSSH_B_OK) {
1158 		delete cache;
1159 		return NULL;
1160 	}
1161 
1162 	return cache;
1163 }
1164 
1165 
1166 fssh_status_t
1167 fssh_block_cache_sync(void *_cache)
1168 {
1169 	block_cache *cache = (block_cache *)_cache;
1170 
1171 	// we will sync all dirty blocks to disk that have a completed
1172 	// transaction or no transaction only
1173 
1174 	BenaphoreLocker locker(&cache->lock);
1175 	hash_iterator iterator;
1176 	hash_open(cache->hash, &iterator);
1177 
1178 	cached_block *block;
1179 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1180 		if (block->previous_transaction != NULL
1181 			|| (block->transaction == NULL && block->is_dirty)) {
1182 			fssh_status_t status = write_cached_block(cache, block);
1183 			if (status != FSSH_B_OK)
1184 				return status;
1185 		}
1186 	}
1187 
1188 	hash_close(cache->hash, &iterator, false);
1189 	return FSSH_B_OK;
1190 }
1191 
1192 
1193 fssh_status_t
1194 fssh_block_cache_sync_etc(void *_cache, fssh_off_t blockNumber,
1195 	fssh_size_t numBlocks)
1196 {
1197 	block_cache *cache = (block_cache *)_cache;
1198 
1199 	// we will sync all dirty blocks to disk that have a completed
1200 	// transaction or no transaction only
1201 
1202 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1203 		fssh_panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
1204 			blockNumber, cache->max_blocks - 1);
1205 		return FSSH_B_BAD_VALUE;
1206 	}
1207 
1208 	BenaphoreLocker locker(&cache->lock);
1209 
1210 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
1211 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1212 			&blockNumber);
1213 		if (block == NULL)
1214 			continue;
1215 		if (block->previous_transaction != NULL
1216 			|| (block->transaction == NULL && block->is_dirty)) {
1217 			fssh_status_t status = write_cached_block(cache, block);
1218 			if (status != FSSH_B_OK)
1219 				return status;
1220 		}
1221 	}
1222 
1223 	return FSSH_B_OK;
1224 }
1225 
1226 
1227 fssh_status_t
1228 fssh_block_cache_make_writable(void *_cache, fssh_off_t blockNumber, int32_t transaction)
1229 {
1230 	block_cache *cache = (block_cache *)_cache;
1231 	BenaphoreLocker locker(&cache->lock);
1232 
1233 	if (cache->read_only)
1234 		fssh_panic("tried to make block writable on a read-only cache!");
1235 
1236 	// ToDo: this can be done better!
1237 	void *block = get_writable_cached_block(cache, blockNumber,
1238 		blockNumber, 1, transaction, false);
1239 	if (block != NULL) {
1240 		put_cached_block((block_cache *)_cache, blockNumber);
1241 		return FSSH_B_OK;
1242 	}
1243 
1244 	return FSSH_B_ERROR;
1245 }
1246 
1247 
1248 void *
1249 fssh_block_cache_get_writable_etc(void *_cache, fssh_off_t blockNumber, fssh_off_t base,
1250 	fssh_off_t length, int32_t transaction)
1251 {
1252 	block_cache *cache = (block_cache *)_cache;
1253 	BenaphoreLocker locker(&cache->lock);
1254 
1255 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1256 		blockNumber, transaction));
1257 	if (cache->read_only)
1258 		fssh_panic("tried to get writable block on a read-only cache!");
1259 
1260 	return get_writable_cached_block(cache, blockNumber, base, length,
1261 		transaction, false);
1262 }
1263 
1264 
1265 void *
1266 fssh_block_cache_get_writable(void *_cache, fssh_off_t blockNumber,
1267 	int32_t transaction)
1268 {
1269 	return fssh_block_cache_get_writable_etc(_cache, blockNumber,
1270 		blockNumber, 1, transaction);
1271 }
1272 
1273 
1274 void *
1275 fssh_block_cache_get_empty(void *_cache, fssh_off_t blockNumber,
1276 	int32_t transaction)
1277 {
1278 	block_cache *cache = (block_cache *)_cache;
1279 	BenaphoreLocker locker(&cache->lock);
1280 
1281 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1282 		blockNumber, transaction));
1283 	if (cache->read_only)
1284 		fssh_panic("tried to get empty writable block on a read-only cache!");
1285 
1286 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1287 		blockNumber, 1, transaction, true);
1288 }
1289 
1290 
1291 const void *
1292 fssh_block_cache_get_etc(void *_cache, fssh_off_t blockNumber, fssh_off_t base,
1293 	fssh_off_t length)
1294 {
1295 	block_cache *cache = (block_cache *)_cache;
1296 	BenaphoreLocker locker(&cache->lock);
1297 	bool allocated;
1298 
1299 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1300 	if (block == NULL)
1301 		return NULL;
1302 
1303 #ifdef DEBUG_CHANGED
1304 	if (block->compare == NULL)
1305 		block->compare = cache->Allocate();
1306 	if (block->compare != NULL)
1307 		memcpy(block->compare, block->current_data, cache->block_size);
1308 #endif
1309 	return block->current_data;
1310 }
1311 
1312 
1313 const void *
1314 fssh_block_cache_get(void *_cache, fssh_off_t blockNumber)
1315 {
1316 	return fssh_block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1317 }
1318 
1319 
1320 /*!
1321 	Changes the internal status of a writable block to \a dirty. This can be
1322 	helpful in case you realize you don't need to change that block anymore
1323 	for whatever reason.
1324 
1325 	Note, you must only use this function on blocks that were acquired
1326 	writable!
1327 */
1328 fssh_status_t
1329 fssh_block_cache_set_dirty(void *_cache, fssh_off_t blockNumber, bool dirty,
1330 	int32_t transaction)
1331 {
1332 	// TODO: not yet implemented
1333 	if (dirty)
1334 		fssh_panic("block_cache_set_dirty(): not yet implemented that way!\n");
1335 
1336 	return FSSH_B_OK;
1337 }
1338 
1339 
1340 void
1341 fssh_block_cache_put(void *_cache, fssh_off_t blockNumber)
1342 {
1343 	block_cache *cache = (block_cache *)_cache;
1344 	BenaphoreLocker locker(&cache->lock);
1345 
1346 	put_cached_block(cache, blockNumber);
1347 }
1348 
1349