xref: /haiku/src/tools/fs_shell/block_cache.cpp (revision 746cac055adc6ac3308c7bc2d29040fb95689cc9)
1 /*
2  * Copyright 2004-2008, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 #include <new>
7 #include <stdlib.h>
8 
9 #include "DoublyLinkedList.h"
10 #include "fssh_atomic.h"
11 #include "fssh_errno.h"
12 #include "fssh_fs_cache.h"
13 #include "fssh_kernel_export.h"
14 #include "fssh_lock.h"
15 #include "fssh_string.h"
16 #include "fssh_unistd.h"
17 #include "hash.h"
18 #include "vfs.h"
19 
20 // TODO: this is a naive but growing implementation to test the API:
21 //	1) block reading/writing is not at all optimized for speed, it will
22 //	   just read and write single blocks.
23 //	2) the locking could be improved; getting a block should not need to
24 //	   wait for blocks to be written
25 //	3) dirty blocks are only written back if asked for
26 // TODO: the retrieval/copy of the original data could be delayed until the
27 //		new data must be written, ie. in low memory situations.
28 
29 //#define TRACE_BLOCK_CACHE
30 #ifdef TRACE_BLOCK_CACHE
31 #	define TRACE(x)	fssh_dprintf x
32 #else
33 #	define TRACE(x) ;
34 #endif
35 
36 using std::nothrow;
37 
38 // This macro is used for fatal situations that are acceptable in a running
39 // system, like out of memory situations - should only panic for debugging.
40 #define FATAL(x) fssh_panic x
41 
42 
43 namespace FSShell {
44 
45 struct hash_table;
46 struct vm_page;
47 
48 
49 //#define DEBUG_CHANGED
50 #undef DEBUG_CHANGED
51 
52 
53 struct cache_transaction;
54 struct cached_block;
55 struct block_cache;
56 typedef DoublyLinkedListLink<cached_block> block_link;
57 
58 
59 struct cached_block {
60 	cached_block	*next;			// next in hash
61 	cached_block	*transaction_next;
62 	block_link		link;
63 	fssh_off_t		block_number;
64 	void			*current_data;
65 	void			*original_data;
66 	void			*parent_data;
67 #ifdef DEBUG_CHANGED
68 	void			*compare;
69 #endif
70 	int32_t			ref_count;
71 	int32_t			accessed;
72 	bool			busy : 1;
73 	bool			is_writing : 1;
74 	bool			is_dirty : 1;
75 	bool			unused : 1;
76 	bool			unmapped : 1;
77 	cache_transaction *transaction;
78 	cache_transaction *previous_transaction;
79 
80 	static int Compare(void *_cacheEntry, const void *_block);
81 	static uint32_t Hash(void *_cacheEntry, const void *_block, uint32_t range);
82 };
83 
84 typedef DoublyLinkedList<cached_block,
85 	DoublyLinkedListMemberGetLink<cached_block,
86 		&cached_block::link> > block_list;
87 
88 struct block_cache {
89 	hash_table	*hash;
90 	fssh_mutex	lock;
91 	int			fd;
92 	fssh_off_t	max_blocks;
93 	fssh_size_t	block_size;
94 	int32_t		allocated_block_count;
95 	int32_t		next_transaction_id;
96 	cache_transaction *last_transaction;
97 	hash_table	*transaction_hash;
98 
99 	block_list	unused_blocks;
100 
101 	bool		read_only;
102 
103 	block_cache(int fd, fssh_off_t numBlocks, fssh_size_t blockSize, bool readOnly);
104 	~block_cache();
105 
106 	fssh_status_t InitCheck();
107 
108 	void RemoveUnusedBlocks(int32_t maxAccessed = LONG_MAX, int32_t count = LONG_MAX);
109 	void FreeBlock(cached_block *block);
110 	cached_block *NewBlock(fssh_off_t blockNumber);
111 	void Free(void *address);
112 	void *Allocate();
113 
114 	static void LowMemoryHandler(void *data, int32_t level);
115 };
116 
117 static const int32_t kMaxBlockCount = 1024;
118 
119 struct cache_hook : DoublyLinkedListLinkImpl<cache_hook> {
120 	fssh_transaction_notification_hook	hook;
121 	void								*data;
122 };
123 
124 typedef DoublyLinkedList<cache_hook> HookList;
125 
126 struct cache_transaction {
127 	cache_transaction();
128 
129 	cache_transaction *next;
130 	int32_t			id;
131 	int32_t			num_blocks;
132 	int32_t			main_num_blocks;
133 	int32_t			sub_num_blocks;
134 	cached_block	*first_block;
135 	block_list		blocks;
136 	fssh_transaction_notification_hook notification_hook;
137 	void			*notification_data;
138 	HookList		listeners;
139 	bool			open;
140 	bool			has_sub_transaction;
141 };
142 
143 static fssh_status_t write_cached_block(block_cache *cache, cached_block *block,
144 	bool deleteTransaction = true);
145 
146 
147 //	#pragma mark - private transaction
148 
149 
150 cache_transaction::cache_transaction()
151 {
152 	num_blocks = 0;
153 	main_num_blocks = 0;
154 	sub_num_blocks = 0;
155 	first_block = NULL;
156 	notification_hook = NULL;
157 	notification_data = NULL;
158 	open = true;
159 }
160 
161 
162 static int
163 transaction_compare(void *_transaction, const void *_id)
164 {
165 	cache_transaction *transaction = (cache_transaction *)_transaction;
166 	const int32_t *id = (const int32_t *)_id;
167 
168 	return transaction->id - *id;
169 }
170 
171 
172 static uint32_t
173 transaction_hash(void *_transaction, const void *_id, uint32_t range)
174 {
175 	cache_transaction *transaction = (cache_transaction *)_transaction;
176 	const int32_t *id = (const int32_t *)_id;
177 
178 	if (transaction != NULL)
179 		return transaction->id % range;
180 
181 	return (uint32_t)*id % range;
182 }
183 
184 
185 /*!	Notifies all listeners of this transaction, and removes them
186 	afterwards.
187 */
188 static void
189 notify_transaction_listeners(cache_transaction *transaction, int32_t event)
190 {
191 	HookList::Iterator iterator = transaction->listeners.GetIterator();
192 	while (iterator.HasNext()) {
193 		cache_hook *hook = iterator.Next();
194 
195 		hook->hook(transaction->id, event, hook->data);
196 
197 		iterator.Remove();
198 		delete hook;
199 	}
200 }
201 
202 
203 static void
204 delete_transaction(block_cache *cache, cache_transaction *transaction)
205 {
206 	if (cache->last_transaction == transaction)
207 		cache->last_transaction = NULL;
208 
209 	delete transaction;
210 }
211 
212 
213 static cache_transaction *
214 lookup_transaction(block_cache *cache, int32_t id)
215 {
216 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
217 }
218 
219 
220 //	#pragma mark - cached_block
221 
222 
223 /* static */
224 int
225 cached_block::Compare(void *_cacheEntry, const void *_block)
226 {
227 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
228 	const fssh_off_t *block = (const fssh_off_t *)_block;
229 
230 	return cacheEntry->block_number - *block;
231 }
232 
233 
234 
235 /* static */
236 uint32_t
237 cached_block::Hash(void *_cacheEntry, const void *_block, uint32_t range)
238 {
239 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
240 	const fssh_off_t *block = (const fssh_off_t *)_block;
241 
242 	if (cacheEntry != NULL)
243 		return cacheEntry->block_number % range;
244 
245 	return (uint64_t)*block % range;
246 }
247 
248 
249 //	#pragma mark - block_cache
250 
251 
252 block_cache::block_cache(int _fd, fssh_off_t numBlocks, fssh_size_t blockSize,
253 	bool readOnly)
254 	:
255 	hash(NULL),
256 	fd(_fd),
257 	max_blocks(numBlocks),
258 	block_size(blockSize),
259 	allocated_block_count(0),
260 	next_transaction_id(1),
261 	last_transaction(NULL),
262 	transaction_hash(NULL),
263 	read_only(readOnly)
264 {
265 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
266 	if (hash == NULL)
267 		return;
268 
269 	transaction_hash = hash_init(16, 0, &transaction_compare,
270 		&FSShell::transaction_hash);
271 	if (transaction_hash == NULL)
272 		return;
273 
274 	fssh_mutex_init(&lock, "block cache");
275 }
276 
277 
278 block_cache::~block_cache()
279 {
280 	fssh_mutex_destroy(&lock);
281 
282 	hash_uninit(transaction_hash);
283 	hash_uninit(hash);
284 }
285 
286 
287 fssh_status_t
288 block_cache::InitCheck()
289 {
290 	if (lock.sem < FSSH_B_OK)
291 		return lock.sem;
292 
293 	if (hash == NULL || transaction_hash == NULL)
294 		return FSSH_B_NO_MEMORY;
295 
296 	return FSSH_B_OK;
297 }
298 
299 
300 void
301 block_cache::Free(void *address)
302 {
303 	if (address == NULL)
304 		return;
305 
306 	free(address);
307 }
308 
309 
310 void *
311 block_cache::Allocate()
312 {
313 	return malloc(block_size);
314 }
315 
316 
317 void
318 block_cache::FreeBlock(cached_block *block)
319 {
320 	Free(block->current_data);
321 
322 	if (block->original_data != NULL || block->parent_data != NULL) {
323 		fssh_panic("block_cache::FreeBlock(): %Ld, original %p, parent %p\n",
324 			block->block_number, block->original_data, block->parent_data);
325 	}
326 
327 #ifdef DEBUG_CHANGED
328 	Free(block->compare);
329 #endif
330 
331 	delete block;
332 }
333 
334 
335 /*! Allocates a new block for \a blockNumber, ready for use */
336 cached_block *
337 block_cache::NewBlock(fssh_off_t blockNumber)
338 {
339 	cached_block *block = new(nothrow) cached_block;
340 	if (block == NULL) {
341 		FATAL(("could not allocate block!\n"));
342 		return NULL;
343 	}
344 
345 	// if we hit the limit of blocks to cache¸ try to free one or more
346 	if (allocated_block_count >= kMaxBlockCount) {
347 		RemoveUnusedBlocks(LONG_MAX,
348 			allocated_block_count - kMaxBlockCount + 1);
349 	}
350 
351 	block->current_data = Allocate();
352 	if (!block->current_data) {
353 		FATAL(("could not allocate block data!\n"));
354 		delete block;
355 		return NULL;
356 	}
357 
358 	block->block_number = blockNumber;
359 	block->ref_count = 0;
360 	block->accessed = 0;
361 	block->transaction_next = NULL;
362 	block->transaction = block->previous_transaction = NULL;
363 	block->original_data = NULL;
364 	block->parent_data = NULL;
365 	block->is_dirty = false;
366 	block->unused = false;
367 #ifdef DEBUG_CHANGED
368 	block->compare = NULL;
369 #endif
370 
371 	allocated_block_count++;
372 
373 	return block;
374 }
375 
376 
377 void
378 block_cache::RemoveUnusedBlocks(int32_t maxAccessed, int32_t count)
379 {
380 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
381 
382 	for (block_list::Iterator it = unused_blocks.GetIterator();
383 		 cached_block *block = it.Next();) {
384 
385 		if (maxAccessed < block->accessed)
386 			continue;
387 
388 		TRACE(("  remove block %Ld, accessed %ld times\n",
389 			block->block_number, block->accessed));
390 
391 		// this can only happen if no transactions are used
392 		if (block->is_dirty)
393 			write_cached_block(this, block, false);
394 
395 		// remove block from lists
396 		it.Remove();
397 		hash_remove(hash, block);
398 
399 		FreeBlock(block);
400 
401 		if (--count <= 0)
402 			break;
403 	}
404 }
405 
406 
407 //	#pragma mark - private block functions
408 
409 
410 static void
411 put_cached_block(block_cache *cache, cached_block *block)
412 {
413 #ifdef DEBUG_CHANGED
414 	if (!block->is_dirty && block->compare != NULL
415 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
416 		fssh_dprintf("new block:\n");
417 		fssh_dump_block((const char *)block->current_data, 256, "  ");
418 		fssh_dprintf("unchanged block:\n");
419 		fssh_dump_block((const char *)block->compare, 256, "  ");
420 		write_cached_block(cache, block);
421 		fssh_panic("block_cache: supposed to be clean block was changed!\n");
422 
423 		cache->Free(block->compare);
424 		block->compare = NULL;
425 	}
426 #endif
427 
428 	if (--block->ref_count == 0
429 		&& block->transaction == NULL
430 		&& block->previous_transaction == NULL) {
431 		// put this block in the list of unused blocks
432 		block->unused = true;
433 		cache->unused_blocks.Add(block);
434 	}
435 
436 	if (cache->allocated_block_count > kMaxBlockCount) {
437 		cache->RemoveUnusedBlocks(LONG_MAX,
438 			cache->allocated_block_count - kMaxBlockCount);
439 	}
440 }
441 
442 
443 static void
444 put_cached_block(block_cache *cache, fssh_off_t blockNumber)
445 {
446 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
447 		fssh_panic("put_cached_block: invalid block number %lld (max %lld)",
448 			blockNumber, cache->max_blocks - 1);
449 	}
450 
451 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
452 	if (block != NULL)
453 		put_cached_block(cache, block);
454 }
455 
456 
457 /*!
458 	Retrieves the block \a blockNumber from the hash table, if it's already
459 	there, or reads it from the disk.
460 
461 	\param _allocated tells you wether or not a new block has been allocated
462 		to satisfy your request.
463 	\param readBlock if \c false, the block will not be read in case it was
464 		not already in the cache. The block you retrieve may contain random
465 		data.
466 */
467 static cached_block *
468 get_cached_block(block_cache *cache, fssh_off_t blockNumber, bool *_allocated,
469 	bool readBlock = true)
470 {
471 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
472 		fssh_panic("get_cached_block: invalid block number %lld (max %lld)",
473 			blockNumber, cache->max_blocks - 1);
474 		return NULL;
475 	}
476 
477 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
478 		&blockNumber);
479 	*_allocated = false;
480 
481 	if (block == NULL) {
482 		// read block into cache
483 		block = cache->NewBlock(blockNumber);
484 		if (block == NULL)
485 			return NULL;
486 
487 		hash_insert(cache->hash, block);
488 		*_allocated = true;
489 	}
490 
491 	if (*_allocated && readBlock) {
492 		int32_t blockSize = cache->block_size;
493 
494 		if (fssh_read_pos(cache->fd, blockNumber * blockSize, block->current_data,
495 				blockSize) < blockSize) {
496 			hash_remove(cache->hash, block);
497 			cache->FreeBlock(block);
498 			FATAL(("could not read block %Ld\n", blockNumber));
499 			return NULL;
500 		}
501 	}
502 
503 	if (block->unused) {
504 		//TRACE(("remove block %Ld from unused\n", blockNumber));
505 		block->unused = false;
506 		cache->unused_blocks.Remove(block);
507 	}
508 
509 	block->ref_count++;
510 	block->accessed++;
511 
512 	return block;
513 }
514 
515 
516 /*!
517 	Returns the writable block data for the requested blockNumber.
518 	If \a cleared is true, the block is not read from disk; an empty block
519 	is returned.
520 
521 	This is the only method to insert a block into a transaction. It makes
522 	sure that the previous block contents are preserved in that case.
523 */
524 static void *
525 get_writable_cached_block(block_cache *cache, fssh_off_t blockNumber, fssh_off_t base,
526 	fssh_off_t length, int32_t transactionID, bool cleared)
527 {
528 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %d)\n",
529 		blockNumber, transactionID));
530 
531 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
532 		fssh_panic("get_writable_cached_block: invalid block number %lld (max %lld)",
533 			blockNumber, cache->max_blocks - 1);
534 	}
535 
536 	bool allocated;
537 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
538 		!cleared);
539 	if (block == NULL)
540 		return NULL;
541 
542 	// if there is no transaction support, we just return the current block
543 	if (transactionID == -1) {
544 		if (cleared)
545 			fssh_memset(block->current_data, 0, cache->block_size);
546 
547 		block->is_dirty = true;
548 			// mark the block as dirty
549 
550 		return block->current_data;
551 	}
552 
553 	cache_transaction *transaction = block->transaction;
554 
555 	if (transaction != NULL && transaction->id != transactionID) {
556 		// ToDo: we have to wait here until the other transaction is done.
557 		//	Maybe we should even panic, since we can't prevent any deadlocks.
558 		fssh_panic("get_writable_cached_block(): asked to get busy writable block (transaction %d)\n", (int)transaction->id);
559 		put_cached_block(cache, block);
560 		return NULL;
561 	}
562 	if (transaction == NULL && transactionID != -1) {
563 		// get new transaction
564 		transaction = lookup_transaction(cache, transactionID);
565 		if (transaction == NULL) {
566 			fssh_panic("get_writable_cached_block(): invalid transaction %d!\n",
567 				(int)transactionID);
568 			put_cached_block(cache, block);
569 			return NULL;
570 		}
571 		if (!transaction->open) {
572 			fssh_panic("get_writable_cached_block(): transaction already done!\n");
573 			put_cached_block(cache, block);
574 			return NULL;
575 		}
576 
577 		block->transaction = transaction;
578 
579 		// attach the block to the transaction block list
580 		block->transaction_next = transaction->first_block;
581 		transaction->first_block = block;
582 		transaction->num_blocks++;
583 	}
584 
585 	bool wasUnchanged = block->original_data == NULL
586 		|| block->previous_transaction != NULL;
587 
588 	if (!(allocated && cleared) && block->original_data == NULL) {
589 		// we already have data, so we need to preserve it
590 		block->original_data = cache->Allocate();
591 		if (block->original_data == NULL) {
592 			FATAL(("could not allocate original_data\n"));
593 			put_cached_block(cache, block);
594 			return NULL;
595 		}
596 
597 		fssh_memcpy(block->original_data, block->current_data, cache->block_size);
598 	}
599 	if (block->parent_data == block->current_data) {
600 		// remember any previous contents for the parent transaction
601 		block->parent_data = cache->Allocate();
602 		if (block->parent_data == NULL) {
603 			// TODO: maybe we should just continue the current transaction in this case...
604 			FATAL(("could not allocate parent\n"));
605 			put_cached_block(cache, block);
606 			return NULL;
607 		}
608 
609 		fssh_memcpy(block->parent_data, block->current_data, cache->block_size);
610 		transaction->sub_num_blocks++;
611 	} else if (transaction != NULL && transaction->has_sub_transaction
612 		&& block->parent_data == NULL && wasUnchanged)
613 		transaction->sub_num_blocks++;
614 
615 	if (cleared)
616 		fssh_memset(block->current_data, 0, cache->block_size);
617 
618 	block->is_dirty = true;
619 
620 	return block->current_data;
621 }
622 
623 
624 static fssh_status_t
625 write_cached_block(block_cache *cache, cached_block *block,
626 	bool deleteTransaction)
627 {
628 	cache_transaction *previous = block->previous_transaction;
629 	int32_t blockSize = cache->block_size;
630 
631 	void *data = previous && block->original_data
632 		? block->original_data : block->current_data;
633 		// we first need to write back changes from previous transactions
634 
635 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
636 
637 	fssh_ssize_t written = fssh_write_pos(cache->fd, block->block_number * blockSize,
638 		data, blockSize);
639 
640 	if (written < blockSize) {
641 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
642 			fssh_strerror(fssh_get_errno())));
643 		return FSSH_B_IO_ERROR;
644 	}
645 
646 	if (data == block->current_data)
647 		block->is_dirty = false;
648 
649 	if (previous != NULL) {
650 		previous->blocks.Remove(block);
651 		block->previous_transaction = NULL;
652 
653 		if (block->original_data != NULL && block->transaction == NULL) {
654 			// This block is not part of a transaction, so it does not need
655 			// its original pointer anymore.
656 			cache->Free(block->original_data);
657 			block->original_data = NULL;
658 		}
659 
660 		// Has the previous transation been finished with that write?
661 		if (--previous->num_blocks == 0) {
662 			TRACE(("cache transaction %ld finished!\n", previous->id));
663 
664 			if (previous->notification_hook != NULL) {
665 				previous->notification_hook(previous->id,
666 					FSSH_TRANSACTION_WRITTEN, previous->notification_data);
667 			}
668 
669 			if (deleteTransaction) {
670 				hash_remove(cache->transaction_hash, previous);
671 				delete_transaction(cache, previous);
672 			}
673 		}
674 	}
675 
676 	return FSSH_B_OK;
677 }
678 
679 
680 fssh_status_t
681 block_cache_init()
682 {
683 	return FSSH_B_OK;
684 }
685 
686 
687 }	// namespace FSShell
688 
689 
690 //	#pragma mark - public transaction API
691 
692 
693 using namespace FSShell;
694 
695 
696 int32_t
697 fssh_cache_start_transaction(void *_cache)
698 {
699 	block_cache *cache = (block_cache *)_cache;
700 	MutexLocker locker(&cache->lock);
701 
702 	if (cache->last_transaction && cache->last_transaction->open) {
703 		fssh_panic("last transaction (%d) still open!\n",
704 			(int)cache->last_transaction->id);
705 	}
706 
707 	cache_transaction *transaction = new(nothrow) cache_transaction;
708 	if (transaction == NULL)
709 		return FSSH_B_NO_MEMORY;
710 
711 	transaction->id = fssh_atomic_add(&cache->next_transaction_id, 1);
712 	cache->last_transaction = transaction;
713 
714 	TRACE(("cache_start_transaction(): id %d started\n", transaction->id));
715 
716 	hash_insert(cache->transaction_hash, transaction);
717 
718 	return transaction->id;
719 }
720 
721 
722 fssh_status_t
723 fssh_cache_sync_transaction(void *_cache, int32_t id)
724 {
725 	block_cache *cache = (block_cache *)_cache;
726 	MutexLocker locker(&cache->lock);
727 	fssh_status_t status = FSSH_B_ENTRY_NOT_FOUND;
728 
729 	TRACE(("cache_sync_transaction(id %d)\n", id));
730 
731 	hash_iterator iterator;
732 	hash_open(cache->transaction_hash, &iterator);
733 
734 	cache_transaction *transaction;
735 	while ((transaction = (cache_transaction *)hash_next(
736 			cache->transaction_hash, &iterator)) != NULL) {
737 		// close all earlier transactions which haven't been closed yet
738 
739 		if (transaction->id <= id && !transaction->open) {
740 			// write back all of their remaining dirty blocks
741 			while (transaction->num_blocks > 0) {
742 				status = write_cached_block(cache, transaction->blocks.Head(),
743 					false);
744 				if (status != FSSH_B_OK)
745 					return status;
746 			}
747 
748 			hash_remove_current(cache->transaction_hash, &iterator);
749 			delete_transaction(cache, transaction);
750 		}
751 	}
752 
753 	hash_close(cache->transaction_hash, &iterator, false);
754 	return FSSH_B_OK;
755 }
756 
757 
758 fssh_status_t
759 fssh_cache_end_transaction(void *_cache, int32_t id,
760 	fssh_transaction_notification_hook hook, void *data)
761 {
762 	block_cache *cache = (block_cache *)_cache;
763 	MutexLocker locker(&cache->lock);
764 
765 	TRACE(("cache_end_transaction(id = %d)\n", id));
766 
767 	cache_transaction *transaction = lookup_transaction(cache, id);
768 	if (transaction == NULL) {
769 		fssh_panic("cache_end_transaction(): invalid transaction ID\n");
770 		return FSSH_B_BAD_VALUE;
771 	}
772 
773 	transaction->notification_hook = hook;
774 	transaction->notification_data = data;
775 
776 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ENDED);
777 
778 	// iterate through all blocks and free the unchanged original contents
779 
780 	cached_block *block = transaction->first_block, *next;
781 	for (; block != NULL; block = next) {
782 		next = block->transaction_next;
783 
784 		if (block->previous_transaction != NULL) {
785 			// need to write back pending changes
786 			write_cached_block(cache, block);
787 		}
788 
789 		if (block->original_data != NULL) {
790 			cache->Free(block->original_data);
791 			block->original_data = NULL;
792 		}
793 		if (transaction->has_sub_transaction) {
794 			if (block->parent_data != block->current_data)
795 				cache->Free(block->parent_data);
796 			block->parent_data = NULL;
797 		}
798 
799 		// move the block to the previous transaction list
800 		transaction->blocks.Add(block);
801 
802 		block->previous_transaction = transaction;
803 		block->transaction_next = NULL;
804 		block->transaction = NULL;
805 	}
806 
807 	transaction->open = false;
808 
809 	return FSSH_B_OK;
810 }
811 
812 
813 fssh_status_t
814 fssh_cache_abort_transaction(void *_cache, int32_t id)
815 {
816 	block_cache *cache = (block_cache *)_cache;
817 	MutexLocker locker(&cache->lock);
818 
819 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
820 
821 	cache_transaction *transaction = lookup_transaction(cache, id);
822 	if (transaction == NULL) {
823 		fssh_panic("cache_abort_transaction(): invalid transaction ID\n");
824 		return FSSH_B_BAD_VALUE;
825 	}
826 
827 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ABORTED);
828 
829 	// iterate through all blocks and restore their original contents
830 
831 	cached_block *block = transaction->first_block, *next;
832 	for (; block != NULL; block = next) {
833 		next = block->transaction_next;
834 
835 		if (block->original_data != NULL) {
836 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
837 				transaction->id, block->block_number));
838 			fssh_memcpy(block->current_data, block->original_data, cache->block_size);
839 			cache->Free(block->original_data);
840 			block->original_data = NULL;
841 		}
842 		if (transaction->has_sub_transaction) {
843 			if (block->parent_data != block->current_data)
844 				cache->Free(block->parent_data);
845 			block->parent_data = NULL;
846 		}
847 
848 		block->transaction_next = NULL;
849 		block->transaction = NULL;
850 	}
851 
852 	hash_remove(cache->transaction_hash, transaction);
853 	delete_transaction(cache, transaction);
854 	return FSSH_B_OK;
855 }
856 
857 
858 /*!	Acknowledges the current parent transaction, and starts a new transaction
859 	from its sub transaction.
860 	The new transaction also gets a new transaction ID.
861 */
862 int32_t
863 fssh_cache_detach_sub_transaction(void *_cache, int32_t id,
864 	fssh_transaction_notification_hook hook, void *data)
865 {
866 	block_cache *cache = (block_cache *)_cache;
867 	MutexLocker locker(&cache->lock);
868 
869 	TRACE(("cache_detach_sub_transaction(id = %d)\n", id));
870 
871 	cache_transaction *transaction = lookup_transaction(cache, id);
872 	if (transaction == NULL) {
873 		fssh_panic("cache_detach_sub_transaction(): invalid transaction ID\n");
874 		return FSSH_B_BAD_VALUE;
875 	}
876 	if (!transaction->has_sub_transaction)
877 		return FSSH_B_BAD_VALUE;
878 
879 	// create a new transaction for the sub transaction
880 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
881 	if (transaction == NULL)
882 		return FSSH_B_NO_MEMORY;
883 
884 	newTransaction->id = fssh_atomic_add(&cache->next_transaction_id, 1);
885 
886 	transaction->notification_hook = hook;
887 	transaction->notification_data = data;
888 
889 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ENDED);
890 
891 	// iterate through all blocks and free the unchanged original contents
892 
893 	cached_block *block = transaction->first_block, *next, *last = NULL;
894 	for (; block != NULL; block = next) {
895 		next = block->transaction_next;
896 
897 		if (block->previous_transaction != NULL) {
898 			// need to write back pending changes
899 			write_cached_block(cache, block);
900 		}
901 
902 		if (block->original_data != NULL && block->parent_data != NULL
903 			&& block->parent_data != block->current_data) {
904 			// free the original data if the parent data of the transaction
905 			// will be made current - but keep them otherwise
906 			cache->Free(block->original_data);
907 			block->original_data = NULL;
908 		}
909 		if (block->parent_data == NULL
910 			|| block->parent_data != block->current_data) {
911 			// we need to move this block over to the new transaction
912 			block->original_data = block->parent_data;
913 			if (last == NULL)
914 				newTransaction->first_block = block;
915 			else
916 				last->transaction_next = block;
917 
918 			block->transaction = newTransaction;
919 			last = block;
920 		} else
921 			block->transaction = NULL;
922 
923 		if (block->parent_data != NULL) {
924 			// move the block to the previous transaction list
925 			transaction->blocks.Add(block);
926 			block->parent_data = NULL;
927 		}
928 
929 		block->previous_transaction = transaction;
930 		block->transaction_next = NULL;
931 	}
932 
933 	newTransaction->num_blocks = transaction->sub_num_blocks;
934 
935 	transaction->open = false;
936 	transaction->has_sub_transaction = false;
937 	transaction->num_blocks = transaction->main_num_blocks;
938 	transaction->sub_num_blocks = 0;
939 
940 	hash_insert(cache->transaction_hash, newTransaction);
941 	cache->last_transaction = newTransaction;
942 
943 	return newTransaction->id;
944 }
945 
946 
947 fssh_status_t
948 fssh_cache_abort_sub_transaction(void *_cache, int32_t id)
949 {
950 	block_cache *cache = (block_cache *)_cache;
951 	MutexLocker locker(&cache->lock);
952 
953 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
954 
955 	cache_transaction *transaction = lookup_transaction(cache, id);
956 	if (transaction == NULL) {
957 		fssh_panic("cache_abort_sub_transaction(): invalid transaction ID\n");
958 		return FSSH_B_BAD_VALUE;
959 	}
960 	if (!transaction->has_sub_transaction)
961 		return FSSH_B_BAD_VALUE;
962 
963 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ABORTED);
964 
965 	// revert all changes back to the version of the parent
966 
967 	cached_block *block = transaction->first_block, *next;
968 	for (; block != NULL; block = next) {
969 		next = block->transaction_next;
970 
971 		if (block->parent_data == NULL) {
972 			if (block->original_data != NULL) {
973 				// the parent transaction didn't change the block, but the sub
974 				// transaction did - we need to revert from the original data
975 				fssh_memcpy(block->current_data, block->original_data,
976 					cache->block_size);
977 			}
978 		} else if (block->parent_data != block->current_data) {
979 			// the block has been changed and must be restored
980 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
981 				transaction->id, block->block_number));
982 			fssh_memcpy(block->current_data, block->parent_data,
983 				cache->block_size);
984 			cache->Free(block->parent_data);
985 		}
986 
987 		block->parent_data = NULL;
988 	}
989 
990 	// all subsequent changes will go into the main transaction
991 	transaction->has_sub_transaction = false;
992 	transaction->sub_num_blocks = 0;
993 
994 	return FSSH_B_OK;
995 }
996 
997 
998 fssh_status_t
999 fssh_cache_start_sub_transaction(void *_cache, int32_t id)
1000 {
1001 	block_cache *cache = (block_cache *)_cache;
1002 	MutexLocker locker(&cache->lock);
1003 
1004 	TRACE(("cache_start_sub_transaction(id = %d)\n", id));
1005 
1006 	cache_transaction *transaction = lookup_transaction(cache, id);
1007 	if (transaction == NULL) {
1008 		fssh_panic("cache_start_sub_transaction(): invalid transaction ID %d\n", (int)id);
1009 		return FSSH_B_BAD_VALUE;
1010 	}
1011 
1012 	notify_transaction_listeners(transaction, FSSH_TRANSACTION_ENDED);
1013 
1014 	// move all changed blocks up to the parent
1015 
1016 	cached_block *block = transaction->first_block, *next;
1017 	for (; block != NULL; block = next) {
1018 		next = block->transaction_next;
1019 
1020 		if (transaction->has_sub_transaction
1021 			&& block->parent_data != NULL
1022 			&& block->parent_data != block->current_data) {
1023 			// there already is an older sub transaction - we acknowledge
1024 			// its changes and move its blocks up to the parent
1025 			cache->Free(block->parent_data);
1026 		}
1027 
1028 		// we "allocate" the parent data lazily, that means, we don't copy
1029 		// the data (and allocate memory for it) until we need to
1030 		block->parent_data = block->current_data;
1031 	}
1032 
1033 	// all subsequent changes will go into the sub transaction
1034 	transaction->has_sub_transaction = true;
1035 	transaction->main_num_blocks = transaction->num_blocks;
1036 	transaction->sub_num_blocks = 0;
1037 
1038 	return FSSH_B_OK;
1039 }
1040 
1041 
1042 /*!	Adds a transaction listener that gets notified when the transaction
1043 	is ended or aborted.
1044 	The listener gets automatically removed in this case.
1045 */
1046 fssh_status_t
1047 fssh_cache_add_transaction_listener(void *_cache, int32_t id, int32_t events,
1048 	fssh_transaction_notification_hook hookFunction, void *data)
1049 {
1050 // TODO: this is currently not used in a critical context in BFS
1051 #if 0
1052 	block_cache *cache = (block_cache *)_cache;
1053 
1054 	cache_hook *hook = new(std::nothrow) cache_hook;
1055 	if (hook == NULL)
1056 		return FSSH_B_NO_MEMORY;
1057 
1058 	MutexLocker locker(&cache->lock);
1059 
1060 	cache_transaction *transaction = lookup_transaction(cache, id);
1061 	if (transaction == NULL) {
1062 		delete hook;
1063 		return FSSH_B_BAD_VALUE;
1064 	}
1065 
1066 	hook->hook = hookFunction;
1067 	hook->data = data;
1068 
1069 	transaction->listeners.Add(hook);
1070 #endif
1071 	return FSSH_B_OK;
1072 }
1073 
1074 
1075 fssh_status_t
1076 fssh_cache_remove_transaction_listener(void *_cache, int32_t id,
1077 	fssh_transaction_notification_hook hookFunction, void *data)
1078 {
1079 	block_cache *cache = (block_cache *)_cache;
1080 
1081 	MutexLocker locker(&cache->lock);
1082 
1083 	cache_transaction *transaction = lookup_transaction(cache, id);
1084 	if (transaction == NULL)
1085 		return FSSH_B_BAD_VALUE;
1086 
1087 	HookList::Iterator iterator = transaction->listeners.GetIterator();
1088 	while (iterator.HasNext()) {
1089 		cache_hook *hook = iterator.Next();
1090 		if (hook->data == data && hook->hook == hookFunction) {
1091 			iterator.Remove();
1092 			delete hook;
1093 			return FSSH_B_OK;
1094 		}
1095 	}
1096 
1097 	return FSSH_B_ENTRY_NOT_FOUND;
1098 }
1099 
1100 
1101 fssh_status_t
1102 fssh_cache_next_block_in_transaction(void *_cache, int32_t id, bool mainOnly,
1103 	long *_cookie, fssh_off_t *_blockNumber, void **_data,
1104 	void **_unchangedData)
1105 {
1106 	cached_block *block = (cached_block *)*_cookie;
1107 	block_cache *cache = (block_cache *)_cache;
1108 
1109 	MutexLocker locker(&cache->lock);
1110 
1111 	cache_transaction *transaction = lookup_transaction(cache, id);
1112 	if (transaction == NULL || !transaction->open)
1113 		return FSSH_B_BAD_VALUE;
1114 
1115 	if (block == NULL)
1116 		block = transaction->first_block;
1117 	else
1118 		block = block->transaction_next;
1119 
1120 	if (mainOnly && transaction->has_sub_transaction) {
1121 		// find next block that the parent changed
1122 		while (block != NULL && block->parent_data == NULL)
1123 			block = block->transaction_next;
1124 	}
1125 
1126 	if (block == NULL)
1127 		return FSSH_B_ENTRY_NOT_FOUND;
1128 
1129 	if (_blockNumber)
1130 		*_blockNumber = block->block_number;
1131 	if (_data)
1132 		*_data = mainOnly ? block->parent_data : block->current_data;
1133 	if (_unchangedData)
1134 		*_unchangedData = block->original_data;
1135 
1136 	*_cookie = (fssh_addr_t)block;
1137 	return FSSH_B_OK;
1138 }
1139 
1140 
1141 int32_t
1142 fssh_cache_blocks_in_transaction(void *_cache, int32_t id)
1143 {
1144 	block_cache *cache = (block_cache *)_cache;
1145 	MutexLocker locker(&cache->lock);
1146 
1147 	cache_transaction *transaction = lookup_transaction(cache, id);
1148 	if (transaction == NULL)
1149 		return FSSH_B_BAD_VALUE;
1150 
1151 	return transaction->num_blocks;
1152 }
1153 
1154 
1155 int32_t
1156 fssh_cache_blocks_in_main_transaction(void *_cache, int32_t id)
1157 {
1158 	block_cache *cache = (block_cache *)_cache;
1159 	MutexLocker locker(&cache->lock);
1160 
1161 	cache_transaction *transaction = lookup_transaction(cache, id);
1162 	if (transaction == NULL)
1163 		return FSSH_B_BAD_VALUE;
1164 
1165 	return transaction->main_num_blocks;
1166 }
1167 
1168 
1169 int32_t
1170 fssh_cache_blocks_in_sub_transaction(void *_cache, int32_t id)
1171 {
1172 	block_cache *cache = (block_cache *)_cache;
1173 	MutexLocker locker(&cache->lock);
1174 
1175 	cache_transaction *transaction = lookup_transaction(cache, id);
1176 	if (transaction == NULL)
1177 		return FSSH_B_BAD_VALUE;
1178 
1179 	return transaction->sub_num_blocks;
1180 }
1181 
1182 
1183 //	#pragma mark - public block cache API
1184 //	public interface
1185 
1186 
1187 void
1188 fssh_block_cache_delete(void *_cache, bool allowWrites)
1189 {
1190 	block_cache *cache = (block_cache *)_cache;
1191 
1192 	if (allowWrites)
1193 		fssh_block_cache_sync(cache);
1194 
1195 	fssh_mutex_lock(&cache->lock);
1196 
1197 	// free all blocks
1198 
1199 	uint32_t cookie = 0;
1200 	cached_block *block;
1201 	while ((block = (cached_block *)hash_remove_first(cache->hash,
1202 			&cookie)) != NULL) {
1203 		cache->FreeBlock(block);
1204 	}
1205 
1206 	// free all transactions (they will all be aborted)
1207 
1208 	cookie = 0;
1209 	cache_transaction *transaction;
1210 	while ((transaction = (cache_transaction *)hash_remove_first(
1211 			cache->transaction_hash, &cookie)) != NULL) {
1212 		delete transaction;
1213 	}
1214 
1215 	delete cache;
1216 }
1217 
1218 
1219 void *
1220 fssh_block_cache_create(int fd, fssh_off_t numBlocks, fssh_size_t blockSize, bool readOnly)
1221 {
1222 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
1223 		readOnly);
1224 	if (cache == NULL)
1225 		return NULL;
1226 
1227 	if (cache->InitCheck() != FSSH_B_OK) {
1228 		delete cache;
1229 		return NULL;
1230 	}
1231 
1232 	return cache;
1233 }
1234 
1235 
1236 fssh_status_t
1237 fssh_block_cache_sync(void *_cache)
1238 {
1239 	block_cache *cache = (block_cache *)_cache;
1240 
1241 	// we will sync all dirty blocks to disk that have a completed
1242 	// transaction or no transaction only
1243 
1244 	MutexLocker locker(&cache->lock);
1245 	hash_iterator iterator;
1246 	hash_open(cache->hash, &iterator);
1247 
1248 	cached_block *block;
1249 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1250 		if (block->previous_transaction != NULL
1251 			|| (block->transaction == NULL && block->is_dirty)) {
1252 			fssh_status_t status = write_cached_block(cache, block);
1253 			if (status != FSSH_B_OK)
1254 				return status;
1255 		}
1256 	}
1257 
1258 	hash_close(cache->hash, &iterator, false);
1259 	return FSSH_B_OK;
1260 }
1261 
1262 
1263 fssh_status_t
1264 fssh_block_cache_sync_etc(void *_cache, fssh_off_t blockNumber,
1265 	fssh_size_t numBlocks)
1266 {
1267 	block_cache *cache = (block_cache *)_cache;
1268 
1269 	// we will sync all dirty blocks to disk that have a completed
1270 	// transaction or no transaction only
1271 
1272 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1273 		fssh_panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
1274 			blockNumber, cache->max_blocks - 1);
1275 		return FSSH_B_BAD_VALUE;
1276 	}
1277 
1278 	MutexLocker locker(&cache->lock);
1279 
1280 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
1281 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1282 			&blockNumber);
1283 		if (block == NULL)
1284 			continue;
1285 		if (block->previous_transaction != NULL
1286 			|| (block->transaction == NULL && block->is_dirty)) {
1287 			fssh_status_t status = write_cached_block(cache, block);
1288 			if (status != FSSH_B_OK)
1289 				return status;
1290 		}
1291 	}
1292 
1293 	return FSSH_B_OK;
1294 }
1295 
1296 
1297 fssh_status_t
1298 fssh_block_cache_make_writable(void *_cache, fssh_off_t blockNumber, int32_t transaction)
1299 {
1300 	block_cache *cache = (block_cache *)_cache;
1301 	MutexLocker locker(&cache->lock);
1302 
1303 	if (cache->read_only)
1304 		fssh_panic("tried to make block writable on a read-only cache!");
1305 
1306 	// ToDo: this can be done better!
1307 	void *block = get_writable_cached_block(cache, blockNumber,
1308 		blockNumber, 1, transaction, false);
1309 	if (block != NULL) {
1310 		put_cached_block((block_cache *)_cache, blockNumber);
1311 		return FSSH_B_OK;
1312 	}
1313 
1314 	return FSSH_B_ERROR;
1315 }
1316 
1317 
1318 void *
1319 fssh_block_cache_get_writable_etc(void *_cache, fssh_off_t blockNumber, fssh_off_t base,
1320 	fssh_off_t length, int32_t transaction)
1321 {
1322 	block_cache *cache = (block_cache *)_cache;
1323 	MutexLocker locker(&cache->lock);
1324 
1325 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1326 		blockNumber, transaction));
1327 	if (cache->read_only)
1328 		fssh_panic("tried to get writable block on a read-only cache!");
1329 
1330 	return get_writable_cached_block(cache, blockNumber, base, length,
1331 		transaction, false);
1332 }
1333 
1334 
1335 void *
1336 fssh_block_cache_get_writable(void *_cache, fssh_off_t blockNumber,
1337 	int32_t transaction)
1338 {
1339 	return fssh_block_cache_get_writable_etc(_cache, blockNumber,
1340 		blockNumber, 1, transaction);
1341 }
1342 
1343 
1344 void *
1345 fssh_block_cache_get_empty(void *_cache, fssh_off_t blockNumber,
1346 	int32_t transaction)
1347 {
1348 	block_cache *cache = (block_cache *)_cache;
1349 	MutexLocker locker(&cache->lock);
1350 
1351 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1352 		blockNumber, transaction));
1353 	if (cache->read_only)
1354 		fssh_panic("tried to get empty writable block on a read-only cache!");
1355 
1356 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1357 		blockNumber, 1, transaction, true);
1358 }
1359 
1360 
1361 const void *
1362 fssh_block_cache_get_etc(void *_cache, fssh_off_t blockNumber, fssh_off_t base,
1363 	fssh_off_t length)
1364 {
1365 	block_cache *cache = (block_cache *)_cache;
1366 	MutexLocker locker(&cache->lock);
1367 	bool allocated;
1368 
1369 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1370 	if (block == NULL)
1371 		return NULL;
1372 
1373 #ifdef DEBUG_CHANGED
1374 	if (block->compare == NULL)
1375 		block->compare = cache->Allocate();
1376 	if (block->compare != NULL)
1377 		memcpy(block->compare, block->current_data, cache->block_size);
1378 #endif
1379 	return block->current_data;
1380 }
1381 
1382 
1383 const void *
1384 fssh_block_cache_get(void *_cache, fssh_off_t blockNumber)
1385 {
1386 	return fssh_block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1387 }
1388 
1389 
1390 /*!
1391 	Changes the internal status of a writable block to \a dirty. This can be
1392 	helpful in case you realize you don't need to change that block anymore
1393 	for whatever reason.
1394 
1395 	Note, you must only use this function on blocks that were acquired
1396 	writable!
1397 */
1398 fssh_status_t
1399 fssh_block_cache_set_dirty(void *_cache, fssh_off_t blockNumber, bool dirty,
1400 	int32_t transaction)
1401 {
1402 	// TODO: not yet implemented
1403 	if (dirty)
1404 		fssh_panic("block_cache_set_dirty(): not yet implemented that way!\n");
1405 
1406 	return FSSH_B_OK;
1407 }
1408 
1409 
1410 void
1411 fssh_block_cache_put(void *_cache, fssh_off_t blockNumber)
1412 {
1413 	block_cache *cache = (block_cache *)_cache;
1414 	MutexLocker locker(&cache->lock);
1415 
1416 	put_cached_block(cache, blockNumber);
1417 }
1418 
1419