xref: /haiku/src/tools/fs_shell/block_cache.cpp (revision b028e77473189065f2baefc6f5e10d451cf591e2)
1 /*
2  * Copyright 2004-2006, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 #include "block_cache_priv.h"
7 
8 #include <new>
9 
10 #include <stdlib.h>
11 
12 #include "fssh_atomic.h"
13 #include "fssh_errno.h"
14 #include "fssh_fs_cache.h"
15 #include "fssh_kernel_export.h"
16 #include "fssh_string.h"
17 #include "fssh_unistd.h"
18 #include "hash.h"
19 #include "vfs.h"
20 
21 // TODO: this is a naive but growing implementation to test the API:
22 //	1) block reading/writing is not at all optimized for speed, it will
23 //	   just read and write single blocks.
24 //	2) the locking could be improved; getting a block should not need to
25 //	   wait for blocks to be written
26 //	3) dirty blocks are only written back if asked for
27 // TODO: the retrieval/copy of the original data could be delayed until the
28 //		new data must be written, ie. in low memory situations.
29 
30 //#define TRACE_BLOCK_CACHE
31 #ifdef TRACE_BLOCK_CACHE
32 #	define TRACE(x)	fssh_dprintf x
33 #else
34 #	define TRACE(x) ;
35 #endif
36 
37 using std::nothrow;
38 
39 // This macro is used for fatal situations that are acceptable in a running
40 // system, like out of memory situations - should only panic for debugging.
41 #define FATAL(x) fssh_panic x
42 
43 
44 namespace FSShell {
45 
46 static const int32_t kMaxBlockCount = 1024;
47 
48 
49 struct cache_transaction {
50 	cache_transaction();
51 
52 	cache_transaction *next;
53 	int32_t			id;
54 	int32_t			num_blocks;
55 	int32_t			sub_num_blocks;
56 	cached_block	*first_block;
57 	block_list		blocks;
58 	fssh_transaction_notification_hook notification_hook;
59 	void			*notification_data;
60 	bool			open;
61 	bool			has_sub_transaction;
62 };
63 
64 static fssh_status_t write_cached_block(block_cache *cache, cached_block *block,
65 	bool deleteTransaction = true);
66 
67 
68 //	#pragma mark - private transaction
69 
70 
71 cache_transaction::cache_transaction()
72 {
73 	num_blocks = 0;
74 	sub_num_blocks = 0;
75 	first_block = NULL;
76 	notification_hook = NULL;
77 	notification_data = NULL;
78 	open = true;
79 }
80 
81 
82 static int
83 transaction_compare(void *_transaction, const void *_id)
84 {
85 	cache_transaction *transaction = (cache_transaction *)_transaction;
86 	const int32_t *id = (const int32_t *)_id;
87 
88 	return transaction->id - *id;
89 }
90 
91 
92 static uint32_t
93 transaction_hash(void *_transaction, const void *_id, uint32_t range)
94 {
95 	cache_transaction *transaction = (cache_transaction *)_transaction;
96 	const int32_t *id = (const int32_t *)_id;
97 
98 	if (transaction != NULL)
99 		return transaction->id % range;
100 
101 	return (uint32_t)*id % range;
102 }
103 
104 
105 static void
106 delete_transaction(block_cache *cache, cache_transaction *transaction)
107 {
108 	if (cache->last_transaction == transaction)
109 		cache->last_transaction = NULL;
110 
111 	delete transaction;
112 }
113 
114 
115 static cache_transaction *
116 lookup_transaction(block_cache *cache, int32_t id)
117 {
118 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
119 }
120 
121 
122 //	#pragma mark - cached_block
123 
124 
125 /* static */
126 int
127 cached_block::Compare(void *_cacheEntry, const void *_block)
128 {
129 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
130 	const fssh_off_t *block = (const fssh_off_t *)_block;
131 
132 	return cacheEntry->block_number - *block;
133 }
134 
135 
136 
137 /* static */
138 uint32_t
139 cached_block::Hash(void *_cacheEntry, const void *_block, uint32_t range)
140 {
141 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
142 	const fssh_off_t *block = (const fssh_off_t *)_block;
143 
144 	if (cacheEntry != NULL)
145 		return cacheEntry->block_number % range;
146 
147 	return (uint64_t)*block % range;
148 }
149 
150 
151 //	#pragma mark - block_cache
152 
153 
154 block_cache::block_cache(int _fd, fssh_off_t numBlocks, fssh_size_t blockSize,
155 	bool readOnly)
156 	:
157 	hash(NULL),
158 	fd(_fd),
159 	max_blocks(numBlocks),
160 	block_size(blockSize),
161 	allocated_block_count(0),
162 	next_transaction_id(1),
163 	last_transaction(NULL),
164 	transaction_hash(NULL),
165 	read_only(readOnly)
166 {
167 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
168 	if (hash == NULL)
169 		return;
170 
171 	transaction_hash = hash_init(16, 0, &transaction_compare,
172 		&FSShell::transaction_hash);
173 	if (transaction_hash == NULL)
174 		return;
175 
176 	if (benaphore_init(&lock, "block cache") < FSSH_B_OK)
177 		return;
178 }
179 
180 
181 block_cache::~block_cache()
182 {
183 	benaphore_destroy(&lock);
184 
185 	hash_uninit(transaction_hash);
186 	hash_uninit(hash);
187 }
188 
189 
190 fssh_status_t
191 block_cache::InitCheck()
192 {
193 	if (lock.sem < FSSH_B_OK)
194 		return lock.sem;
195 
196 	if (hash == NULL || transaction_hash == NULL)
197 		return FSSH_B_NO_MEMORY;
198 
199 	return FSSH_B_OK;
200 }
201 
202 
203 void
204 block_cache::Free(void *address)
205 {
206 	if (address == NULL)
207 		return;
208 
209 	free(address);
210 }
211 
212 
213 void *
214 block_cache::Allocate()
215 {
216 	return malloc(block_size);
217 }
218 
219 
220 void
221 block_cache::FreeBlock(cached_block *block)
222 {
223 	Free(block->current_data);
224 	block->current_data = NULL;
225 
226 	if (block->original_data != NULL || block->parent_data != NULL) {
227 		fssh_panic("block_cache::FreeBlock(): %p, %p\n", block->original_data,
228 			block->parent_data);
229 	}
230 
231 #ifdef DEBUG_CHANGED
232 	Free(block->compare);
233 #endif
234 
235 	delete block;
236 }
237 
238 
239 /*! Allocates a new block for \a blockNumber, ready for use */
240 cached_block *
241 block_cache::NewBlock(fssh_off_t blockNumber)
242 {
243 	cached_block *block = new(nothrow) cached_block;
244 	if (block == NULL) {
245 		FATAL(("could not allocate block!\n"));
246 		return NULL;
247 	}
248 
249 	// if we hit the limit of blocks to cache¸ try to free one or more
250 	if (allocated_block_count >= kMaxBlockCount) {
251 		RemoveUnusedBlocks(LONG_MAX,
252 			allocated_block_count - kMaxBlockCount + 1);
253 	}
254 
255 	block->current_data = Allocate();
256 	if (!block->current_data) {
257 		FATAL(("could not allocate block data!\n"));
258 		delete block;
259 		return NULL;
260 	}
261 
262 	block->block_number = blockNumber;
263 	block->ref_count = 0;
264 	block->accessed = 0;
265 	block->transaction_next = NULL;
266 	block->transaction = block->previous_transaction = NULL;
267 	block->original_data = NULL;
268 	block->parent_data = NULL;
269 	block->is_dirty = false;
270 	block->unused = false;
271 #ifdef DEBUG_CHANGED
272 	block->compare = NULL;
273 #endif
274 
275 	allocated_block_count++;
276 
277 	return block;
278 }
279 
280 
281 void
282 block_cache::RemoveUnusedBlocks(int32_t maxAccessed, int32_t count)
283 {
284 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
285 
286 	for (block_list::Iterator it = unused_blocks.GetIterator();
287 		 cached_block *block = it.Next();) {
288 
289 		if (maxAccessed < block->accessed)
290 			continue;
291 
292 		TRACE(("  remove block %Ld, accessed %ld times\n",
293 			block->block_number, block->accessed));
294 
295 		// this can only happen if no transactions are used
296 		if (block->is_dirty)
297 			write_cached_block(this, block, false);
298 
299 		// remove block from lists
300 		it.Remove();
301 		hash_remove(hash, block);
302 
303 		FreeBlock(block);
304 
305 		if (--count <= 0)
306 			break;
307 	}
308 }
309 
310 
311 //	#pragma mark - private block functions
312 
313 
314 static void
315 put_cached_block(block_cache *cache, cached_block *block)
316 {
317 #ifdef DEBUG_CHANGED
318 	if (!block->is_dirty && block->compare != NULL
319 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
320 		fssh_dprintf("new block:\n");
321 		fssh_dump_block((const char *)block->current_data, 256, "  ");
322 		fssh_dprintf("unchanged block:\n");
323 		fssh_dump_block((const char *)block->compare, 256, "  ");
324 		write_cached_block(cache, block);
325 		fssh_panic("block_cache: supposed to be clean block was changed!\n");
326 
327 		cache->Free(block->compare);
328 		block->compare = NULL;
329 	}
330 #endif
331 
332 	if (--block->ref_count == 0
333 		&& block->transaction == NULL
334 		&& block->previous_transaction == NULL) {
335 		// put this block in the list of unused blocks
336 		block->unused = true;
337 		cache->unused_blocks.Add(block);
338 	}
339 
340 	if (cache->allocated_block_count > kMaxBlockCount) {
341 		cache->RemoveUnusedBlocks(LONG_MAX,
342 			cache->allocated_block_count - kMaxBlockCount);
343 	}
344 }
345 
346 
347 static void
348 put_cached_block(block_cache *cache, fssh_off_t blockNumber)
349 {
350 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
351 		fssh_panic("put_cached_block: invalid block number %lld (max %lld)",
352 			blockNumber, cache->max_blocks - 1);
353 	}
354 
355 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
356 	if (block != NULL)
357 		put_cached_block(cache, block);
358 }
359 
360 
361 /*!
362 	Retrieves the block \a blockNumber from the hash table, if it's already
363 	there, or reads it from the disk.
364 
365 	\param _allocated tells you wether or not a new block has been allocated
366 		to satisfy your request.
367 	\param readBlock if \c false, the block will not be read in case it was
368 		not already in the cache. The block you retrieve may contain random
369 		data.
370 */
371 static cached_block *
372 get_cached_block(block_cache *cache, fssh_off_t blockNumber, bool *_allocated,
373 	bool readBlock = true)
374 {
375 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
376 		fssh_panic("get_cached_block: invalid block number %lld (max %lld)",
377 			blockNumber, cache->max_blocks - 1);
378 		return NULL;
379 	}
380 
381 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
382 		&blockNumber);
383 	*_allocated = false;
384 
385 	if (block == NULL) {
386 		// read block into cache
387 		block = cache->NewBlock(blockNumber);
388 		if (block == NULL)
389 			return NULL;
390 
391 		hash_insert(cache->hash, block);
392 		*_allocated = true;
393 	} else {
394 		// TODO: currently, the data is always mapped in
395 /*
396 		if (block->ref_count == 0 && block->current_data != NULL) {
397 			// see if the old block can be resurrected
398 			block->current_data = cache->allocator->Acquire(block->current_data);
399 		}
400 
401 		if (block->current_data == NULL) {
402 			// there is no block yet, but we need one
403 			block->current_data = cache->allocator->Get();
404 			if (block->current_data == NULL)
405 				return NULL;
406 
407 			*_allocated = true;
408 		}
409 */
410 	}
411 
412 	if (*_allocated && readBlock) {
413 		int32_t blockSize = cache->block_size;
414 
415 		if (fssh_read_pos(cache->fd, blockNumber * blockSize, block->current_data,
416 				blockSize) < blockSize) {
417 			hash_remove(cache->hash, block);
418 			cache->FreeBlock(block);
419 			FATAL(("could not read block %Ld\n", blockNumber));
420 			return NULL;
421 		}
422 	}
423 
424 	if (block->unused) {
425 		//TRACE(("remove block %Ld from unused\n", blockNumber));
426 		block->unused = false;
427 		cache->unused_blocks.Remove(block);
428 	}
429 
430 	block->ref_count++;
431 	block->accessed++;
432 
433 	return block;
434 }
435 
436 
437 /*!
438 	Returns the writable block data for the requested blockNumber.
439 	If \a cleared is true, the block is not read from disk; an empty block
440 	is returned.
441 
442 	This is the only method to insert a block into a transaction. It makes
443 	sure that the previous block contents are preserved in that case.
444 */
445 static void *
446 get_writable_cached_block(block_cache *cache, fssh_off_t blockNumber, fssh_off_t base,
447 	fssh_off_t length, int32_t transactionID, bool cleared)
448 {
449 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
450 		blockNumber, transactionID));
451 
452 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
453 		fssh_panic("get_writable_cached_block: invalid block number %lld (max %lld)",
454 			blockNumber, cache->max_blocks - 1);
455 	}
456 
457 	bool allocated;
458 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
459 		!cleared);
460 	if (block == NULL)
461 		return NULL;
462 
463 	// if there is no transaction support, we just return the current block
464 	if (transactionID == -1) {
465 		if (cleared)
466 			fssh_memset(block->current_data, 0, cache->block_size);
467 
468 		block->is_dirty = true;
469 			// mark the block as dirty
470 
471 		return block->current_data;
472 	}
473 
474 	if (block->transaction != NULL && block->transaction->id != transactionID) {
475 		// ToDo: we have to wait here until the other transaction is done.
476 		//	Maybe we should even panic, since we can't prevent any deadlocks.
477 		fssh_panic("get_writable_cached_block(): asked to get busy writable block (transaction %d)\n", (int)block->transaction->id);
478 		put_cached_block(cache, block);
479 		return NULL;
480 	}
481 	if (block->transaction == NULL && transactionID != -1) {
482 		// get new transaction
483 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
484 		if (transaction == NULL) {
485 			fssh_panic("get_writable_cached_block(): invalid transaction %d!\n",
486 				(int)transactionID);
487 			put_cached_block(cache, block);
488 			return NULL;
489 		}
490 		if (!transaction->open) {
491 			fssh_panic("get_writable_cached_block(): transaction already done!\n");
492 			put_cached_block(cache, block);
493 			return NULL;
494 		}
495 
496 		block->transaction = transaction;
497 
498 		// attach the block to the transaction block list
499 		block->transaction_next = transaction->first_block;
500 		transaction->first_block = block;
501 		transaction->num_blocks++;
502 	}
503 
504 	if (!(allocated && cleared) && block->original_data == NULL) {
505 		// we already have data, so we need to preserve it
506 		block->original_data = cache->Allocate();
507 		if (block->original_data == NULL) {
508 			FATAL(("could not allocate original_data\n"));
509 			put_cached_block(cache, block);
510 			return NULL;
511 		}
512 
513 		fssh_memcpy(block->original_data, block->current_data, cache->block_size);
514 	}
515 	if (block->parent_data == block->current_data) {
516 		// remember any previous contents for the parent transaction
517 		block->parent_data = cache->Allocate();
518 		if (block->parent_data == NULL) {
519 			// TODO: maybe we should just continue the current transaction in this case...
520 			FATAL(("could not allocate parent\n"));
521 			put_cached_block(cache, block);
522 			return NULL;
523 		}
524 
525 		fssh_memcpy(block->parent_data, block->current_data, cache->block_size);
526 		block->transaction->sub_num_blocks++;
527 	}
528 
529 	if (cleared)
530 		fssh_memset(block->current_data, 0, cache->block_size);
531 
532 	block->is_dirty = true;
533 
534 	return block->current_data;
535 }
536 
537 
538 static fssh_status_t
539 write_cached_block(block_cache *cache, cached_block *block,
540 	bool deleteTransaction)
541 {
542 	cache_transaction *previous = block->previous_transaction;
543 	int32_t blockSize = cache->block_size;
544 
545 	void *data = previous && block->original_data
546 		? block->original_data : block->current_data;
547 		// we first need to write back changes from previous transactions
548 
549 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
550 
551 	fssh_ssize_t written = fssh_write_pos(cache->fd, block->block_number * blockSize,
552 		data, blockSize);
553 
554 	if (written < blockSize) {
555 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
556 			fssh_strerror(fssh_get_errno())));
557 		return FSSH_B_IO_ERROR;
558 	}
559 
560 	if (data == block->current_data)
561 		block->is_dirty = false;
562 
563 	if (previous != NULL) {
564 		previous->blocks.Remove(block);
565 		block->previous_transaction = NULL;
566 
567 		// Has the previous transation been finished with that write?
568 		if (--previous->num_blocks == 0) {
569 			TRACE(("cache transaction %ld finished!\n", previous->id));
570 
571 			if (previous->notification_hook != NULL) {
572 				previous->notification_hook(previous->id,
573 					previous->notification_data);
574 			}
575 
576 			if (deleteTransaction) {
577 				hash_remove(cache->transaction_hash, previous);
578 				delete_transaction(cache, previous);
579 			}
580 		}
581 	}
582 
583 	return FSSH_B_OK;
584 }
585 
586 
587 fssh_status_t
588 block_cache_init()
589 {
590 	return FSSH_B_OK;
591 }
592 
593 
594 }	// namespace FSShell
595 
596 
597 //	#pragma mark - public transaction API
598 
599 
600 using namespace FSShell;
601 
602 
603 int32_t
604 fssh_cache_start_transaction(void *_cache)
605 {
606 	block_cache *cache = (block_cache *)_cache;
607 	BenaphoreLocker locker(&cache->lock);
608 
609 	if (cache->last_transaction && cache->last_transaction->open) {
610 		fssh_panic("last transaction (%d) still open!\n",
611 			(int)cache->last_transaction->id);
612 	}
613 
614 	cache_transaction *transaction = new(nothrow) cache_transaction;
615 	if (transaction == NULL)
616 		return FSSH_B_NO_MEMORY;
617 
618 	transaction->id = fssh_atomic_add(&cache->next_transaction_id, 1);
619 	cache->last_transaction = transaction;
620 
621 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
622 
623 	hash_insert(cache->transaction_hash, transaction);
624 
625 	return transaction->id;
626 }
627 
628 
629 fssh_status_t
630 fssh_cache_sync_transaction(void *_cache, int32_t id)
631 {
632 	block_cache *cache = (block_cache *)_cache;
633 	BenaphoreLocker locker(&cache->lock);
634 	fssh_status_t status = FSSH_B_ENTRY_NOT_FOUND;
635 
636 	hash_iterator iterator;
637 	hash_open(cache->transaction_hash, &iterator);
638 
639 	cache_transaction *transaction;
640 	while ((transaction = (cache_transaction *)hash_next(
641 			cache->transaction_hash, &iterator)) != NULL) {
642 		// close all earlier transactions which haven't been closed yet
643 
644 		if (transaction->id <= id && !transaction->open) {
645 			// write back all of their remaining dirty blocks
646 			while (transaction->num_blocks > 0) {
647 				status = write_cached_block(cache, transaction->blocks.Head(),
648 					false);
649 				if (status != FSSH_B_OK)
650 					return status;
651 			}
652 
653 			hash_remove_current(cache->transaction_hash, &iterator);
654 			delete_transaction(cache, transaction);
655 		}
656 	}
657 
658 	hash_close(cache->transaction_hash, &iterator, false);
659 	return FSSH_B_OK;
660 }
661 
662 
663 fssh_status_t
664 fssh_cache_end_transaction(void *_cache, int32_t id,
665 	fssh_transaction_notification_hook hook, void *data)
666 {
667 	block_cache *cache = (block_cache *)_cache;
668 	BenaphoreLocker locker(&cache->lock);
669 
670 	TRACE(("cache_end_transaction(id = %ld)\n", id));
671 
672 	cache_transaction *transaction = lookup_transaction(cache, id);
673 	if (transaction == NULL) {
674 		fssh_panic("cache_end_transaction(): invalid transaction ID\n");
675 		return FSSH_B_BAD_VALUE;
676 	}
677 
678 	transaction->notification_hook = hook;
679 	transaction->notification_data = data;
680 
681 	// iterate through all blocks and free the unchanged original contents
682 
683 	cached_block *block = transaction->first_block, *next;
684 	for (; block != NULL; block = next) {
685 		next = block->transaction_next;
686 
687 		if (block->previous_transaction != NULL) {
688 			// need to write back pending changes
689 			write_cached_block(cache, block);
690 		}
691 
692 		if (block->original_data != NULL) {
693 			cache->Free(block->original_data);
694 			block->original_data = NULL;
695 		}
696 		if (transaction->has_sub_transaction) {
697 			if (block->parent_data != block->current_data)
698 				cache->Free(block->parent_data);
699 			block->parent_data = NULL;
700 		}
701 
702 		// move the block to the previous transaction list
703 		transaction->blocks.Add(block);
704 
705 		block->previous_transaction = transaction;
706 		block->transaction_next = NULL;
707 		block->transaction = NULL;
708 	}
709 
710 	transaction->open = false;
711 
712 	return FSSH_B_OK;
713 }
714 
715 
716 fssh_status_t
717 fssh_cache_abort_transaction(void *_cache, int32_t id)
718 {
719 	block_cache *cache = (block_cache *)_cache;
720 	BenaphoreLocker locker(&cache->lock);
721 
722 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
723 
724 	cache_transaction *transaction = lookup_transaction(cache, id);
725 	if (transaction == NULL) {
726 		fssh_panic("cache_abort_transaction(): invalid transaction ID\n");
727 		return FSSH_B_BAD_VALUE;
728 	}
729 
730 	// iterate through all blocks and restore their original contents
731 
732 	cached_block *block = transaction->first_block, *next;
733 	for (; block != NULL; block = next) {
734 		next = block->transaction_next;
735 
736 		if (block->original_data != NULL) {
737 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
738 				transaction->id, block->block_number));
739 			fssh_memcpy(block->current_data, block->original_data, cache->block_size);
740 			cache->Free(block->original_data);
741 			block->original_data = NULL;
742 		}
743 		if (transaction->has_sub_transaction) {
744 			if (block->parent_data != block->current_data)
745 				cache->Free(block->parent_data);
746 			block->parent_data = NULL;
747 		}
748 
749 		block->transaction_next = NULL;
750 		block->transaction = NULL;
751 	}
752 
753 	hash_remove(cache->transaction_hash, transaction);
754 	delete_transaction(cache, transaction);
755 	return FSSH_B_OK;
756 }
757 
758 
759 /*!
760 	Acknowledges the current parent transaction, and starts a new transaction
761 	from its sub transaction.
762 	The new transaction also gets a new transaction ID.
763 */
764 int32_t
765 fssh_cache_detach_sub_transaction(void *_cache, int32_t id,
766 	fssh_transaction_notification_hook hook, void *data)
767 {
768 	block_cache *cache = (block_cache *)_cache;
769 	BenaphoreLocker locker(&cache->lock);
770 
771 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
772 
773 	cache_transaction *transaction = lookup_transaction(cache, id);
774 	if (transaction == NULL) {
775 		fssh_panic("cache_detach_sub_transaction(): invalid transaction ID\n");
776 		return FSSH_B_BAD_VALUE;
777 	}
778 	if (!transaction->has_sub_transaction)
779 		return FSSH_B_BAD_VALUE;
780 
781 	// create a new transaction for the sub transaction
782 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
783 	if (transaction == NULL)
784 		return FSSH_B_NO_MEMORY;
785 
786 	newTransaction->id = fssh_atomic_add(&cache->next_transaction_id, 1);
787 
788 	transaction->notification_hook = hook;
789 	transaction->notification_data = data;
790 
791 	// iterate through all blocks and free the unchanged original contents
792 
793 	cached_block *block = transaction->first_block, *next, *last = NULL;
794 	for (; block != NULL; block = next) {
795 		next = block->transaction_next;
796 
797 		if (block->previous_transaction != NULL) {
798 			// need to write back pending changes
799 			write_cached_block(cache, block);
800 		}
801 
802 		if (block->original_data != NULL && block->parent_data != NULL
803 			&& block->parent_data != block->current_data) {
804 			// free the original data if the parent data of the transaction
805 			// will be made current - but keep them otherwise
806 			cache->Free(block->original_data);
807 			block->original_data = NULL;
808 		}
809 		if (block->parent_data != NULL
810 			&& block->parent_data != block->current_data) {
811 			// we need to move this block over to the new transaction
812 			block->original_data = block->parent_data;
813 			if (last == NULL)
814 				newTransaction->first_block = block;
815 			else
816 				last->transaction_next = block;
817 
818 			last = block;
819 		}
820 		block->parent_data = NULL;
821 
822 		// move the block to the previous transaction list
823 		transaction->blocks.Add(block);
824 
825 		block->previous_transaction = transaction;
826 		block->transaction_next = NULL;
827 		block->transaction = newTransaction;
828 	}
829 
830 	transaction->open = false;
831 
832 	hash_insert(cache->transaction_hash, newTransaction);
833 	cache->last_transaction = newTransaction;
834 
835 	return FSSH_B_OK;
836 }
837 
838 
839 fssh_status_t
840 fssh_cache_abort_sub_transaction(void *_cache, int32_t id)
841 {
842 	block_cache *cache = (block_cache *)_cache;
843 	BenaphoreLocker locker(&cache->lock);
844 
845 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
846 
847 	cache_transaction *transaction = lookup_transaction(cache, id);
848 	if (transaction == NULL) {
849 		fssh_panic("cache_abort_sub_transaction(): invalid transaction ID\n");
850 		return FSSH_B_BAD_VALUE;
851 	}
852 	if (!transaction->has_sub_transaction)
853 		return FSSH_B_BAD_VALUE;
854 
855 	// revert all changes back to the version of the parent
856 
857 	cached_block *block = transaction->first_block, *next;
858 	for (; block != NULL; block = next) {
859 		next = block->transaction_next;
860 
861 		if (block->parent_data == NULL) {
862 			if (block->original_data != NULL) {
863 				// the parent transaction didn't change the block, but the sub
864 				// transaction did - we need to revert from the original data
865 				fssh_memcpy(block->current_data, block->original_data,
866 					cache->block_size);
867 			}
868 		} else if (block->parent_data != block->current_data) {
869 			// the block has been changed and must be restored
870 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
871 				transaction->id, block->block_number));
872 			fssh_memcpy(block->current_data, block->parent_data, cache->block_size);
873 			cache->Free(block->parent_data);
874 		}
875 
876 		block->parent_data = NULL;
877 	}
878 
879 	// all subsequent changes will go into the main transaction
880 	transaction->has_sub_transaction = false;
881 	return FSSH_B_OK;
882 }
883 
884 
885 fssh_status_t
886 fssh_cache_start_sub_transaction(void *_cache, int32_t id)
887 {
888 	block_cache *cache = (block_cache *)_cache;
889 	BenaphoreLocker locker(&cache->lock);
890 
891 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
892 
893 	cache_transaction *transaction = lookup_transaction(cache, id);
894 	if (transaction == NULL) {
895 		fssh_panic("cache_start_sub_transaction(): invalid transaction ID %d\n", (int)id);
896 		return FSSH_B_BAD_VALUE;
897 	}
898 
899 	// move all changed blocks up to the parent
900 
901 	cached_block *block = transaction->first_block, *next;
902 	for (; block != NULL; block = next) {
903 		next = block->transaction_next;
904 
905 		if (transaction->has_sub_transaction
906 			&& block->parent_data != NULL
907 			&& block->parent_data != block->current_data) {
908 			// there already is an older sub transaction - we acknowledge
909 			// its changes and move its blocks up to the parent
910 			cache->Free(block->parent_data);
911 		}
912 
913 		// we "allocate" the parent data lazily, that means, we don't copy
914 		// the data (and allocate memory for it) until we need to
915 		block->parent_data = block->current_data;
916 	}
917 
918 	// all subsequent changes will go into the sub transaction
919 	transaction->has_sub_transaction = true;
920 	transaction->sub_num_blocks = 0;
921 
922 	return FSSH_B_OK;
923 }
924 
925 
926 fssh_status_t
927 fssh_cache_next_block_in_transaction(void *_cache, int32_t id, uint32_t *_cookie,
928 	fssh_off_t *_blockNumber, void **_data, void **_unchangedData)
929 {
930 	cached_block *block = (cached_block *)*_cookie;
931 	block_cache *cache = (block_cache *)_cache;
932 
933 	BenaphoreLocker locker(&cache->lock);
934 
935 	cache_transaction *transaction = lookup_transaction(cache, id);
936 	if (transaction == NULL)
937 		return FSSH_B_BAD_VALUE;
938 
939 	if (block == NULL)
940 		block = transaction->first_block;
941 	else
942 		block = block->transaction_next;
943 
944 	if (block == NULL)
945 		return FSSH_B_ENTRY_NOT_FOUND;
946 
947 	if (_blockNumber)
948 		*_blockNumber = block->block_number;
949 	if (_data)
950 		*_data = block->current_data;
951 	if (_unchangedData)
952 		*_unchangedData = block->original_data;
953 
954 	*_cookie = (fssh_addr_t)block;
955 	return FSSH_B_OK;
956 }
957 
958 
959 int32_t
960 fssh_cache_blocks_in_transaction(void *_cache, int32_t id)
961 {
962 	block_cache *cache = (block_cache *)_cache;
963 	BenaphoreLocker locker(&cache->lock);
964 
965 	cache_transaction *transaction = lookup_transaction(cache, id);
966 	if (transaction == NULL)
967 		return FSSH_B_BAD_VALUE;
968 
969 	return transaction->num_blocks;
970 }
971 
972 
973 int32_t
974 fssh_cache_blocks_in_sub_transaction(void *_cache, int32_t id)
975 {
976 	block_cache *cache = (block_cache *)_cache;
977 	BenaphoreLocker locker(&cache->lock);
978 
979 	cache_transaction *transaction = lookup_transaction(cache, id);
980 	if (transaction == NULL)
981 		return FSSH_B_BAD_VALUE;
982 
983 	return transaction->sub_num_blocks;
984 }
985 
986 
987 //	#pragma mark - public block cache API
988 //	public interface
989 
990 
991 void
992 fssh_block_cache_delete(void *_cache, bool allowWrites)
993 {
994 	block_cache *cache = (block_cache *)_cache;
995 
996 	if (allowWrites)
997 		fssh_block_cache_sync(cache);
998 
999 	BenaphoreLocker locker(&cache->lock);
1000 
1001 	// free all blocks
1002 
1003 	uint32_t cookie = 0;
1004 	cached_block *block;
1005 	while ((block = (cached_block *)hash_remove_first(cache->hash,
1006 			&cookie)) != NULL) {
1007 		cache->FreeBlock(block);
1008 	}
1009 
1010 	// free all transactions (they will all be aborted)
1011 
1012 	cookie = 0;
1013 	cache_transaction *transaction;
1014 	while ((transaction = (cache_transaction *)hash_remove_first(
1015 			cache->transaction_hash, &cookie)) != NULL) {
1016 		delete transaction;
1017 	}
1018 
1019 	delete cache;
1020 }
1021 
1022 
1023 void *
1024 fssh_block_cache_create(int fd, fssh_off_t numBlocks, fssh_size_t blockSize, bool readOnly)
1025 {
1026 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
1027 		readOnly);
1028 	if (cache == NULL)
1029 		return NULL;
1030 
1031 	if (cache->InitCheck() != FSSH_B_OK) {
1032 		delete cache;
1033 		return NULL;
1034 	}
1035 
1036 	return cache;
1037 }
1038 
1039 
1040 fssh_status_t
1041 fssh_block_cache_sync(void *_cache)
1042 {
1043 	block_cache *cache = (block_cache *)_cache;
1044 
1045 	// we will sync all dirty blocks to disk that have a completed
1046 	// transaction or no transaction only
1047 
1048 	BenaphoreLocker locker(&cache->lock);
1049 	hash_iterator iterator;
1050 	hash_open(cache->hash, &iterator);
1051 
1052 	cached_block *block;
1053 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1054 		if (block->previous_transaction != NULL
1055 			|| (block->transaction == NULL && block->is_dirty)) {
1056 			fssh_status_t status = write_cached_block(cache, block);
1057 			if (status != FSSH_B_OK)
1058 				return status;
1059 		}
1060 	}
1061 
1062 	hash_close(cache->hash, &iterator, false);
1063 	return FSSH_B_OK;
1064 }
1065 
1066 
1067 fssh_status_t
1068 fssh_block_cache_sync_etc(void *_cache, fssh_off_t blockNumber,
1069 	fssh_size_t numBlocks)
1070 {
1071 	block_cache *cache = (block_cache *)_cache;
1072 
1073 	// we will sync all dirty blocks to disk that have a completed
1074 	// transaction or no transaction only
1075 
1076 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1077 		fssh_panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
1078 			blockNumber, cache->max_blocks - 1);
1079 		return FSSH_B_BAD_VALUE;
1080 	}
1081 
1082 	BenaphoreLocker locker(&cache->lock);
1083 
1084 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
1085 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1086 			&blockNumber);
1087 		if (block == NULL)
1088 			continue;
1089 		if (block->previous_transaction != NULL
1090 			|| (block->transaction == NULL && block->is_dirty)) {
1091 			fssh_status_t status = write_cached_block(cache, block);
1092 			if (status != FSSH_B_OK)
1093 				return status;
1094 		}
1095 	}
1096 
1097 	return FSSH_B_OK;
1098 }
1099 
1100 
1101 fssh_status_t
1102 fssh_block_cache_make_writable(void *_cache, fssh_off_t blockNumber, int32_t transaction)
1103 {
1104 	block_cache *cache = (block_cache *)_cache;
1105 	BenaphoreLocker locker(&cache->lock);
1106 
1107 	if (cache->read_only)
1108 		fssh_panic("tried to make block writable on a read-only cache!");
1109 
1110 	// ToDo: this can be done better!
1111 	void *block = get_writable_cached_block(cache, blockNumber,
1112 		blockNumber, 1, transaction, false);
1113 	if (block != NULL) {
1114 		put_cached_block((block_cache *)_cache, blockNumber);
1115 		return FSSH_B_OK;
1116 	}
1117 
1118 	return FSSH_B_ERROR;
1119 }
1120 
1121 
1122 void *
1123 fssh_block_cache_get_writable_etc(void *_cache, fssh_off_t blockNumber, fssh_off_t base,
1124 	fssh_off_t length, int32_t transaction)
1125 {
1126 	block_cache *cache = (block_cache *)_cache;
1127 	BenaphoreLocker locker(&cache->lock);
1128 
1129 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
1130 		blockNumber, transaction));
1131 	if (cache->read_only)
1132 		fssh_panic("tried to get writable block on a read-only cache!");
1133 
1134 	return get_writable_cached_block(cache, blockNumber, base, length,
1135 		transaction, false);
1136 }
1137 
1138 
1139 void *
1140 fssh_block_cache_get_writable(void *_cache, fssh_off_t blockNumber, int32_t transaction)
1141 {
1142 	return fssh_block_cache_get_writable_etc(_cache, blockNumber,
1143 		blockNumber, 1, transaction);
1144 }
1145 
1146 
1147 void *
1148 fssh_block_cache_get_empty(void *_cache, fssh_off_t blockNumber, int32_t transaction)
1149 {
1150 	block_cache *cache = (block_cache *)_cache;
1151 	BenaphoreLocker locker(&cache->lock);
1152 
1153 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
1154 		blockNumber, transaction));
1155 	if (cache->read_only)
1156 		fssh_panic("tried to get empty writable block on a read-only cache!");
1157 
1158 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1159 				blockNumber, 1, transaction, true);
1160 }
1161 
1162 
1163 const void *
1164 fssh_block_cache_get_etc(void *_cache, fssh_off_t blockNumber, fssh_off_t base, fssh_off_t length)
1165 {
1166 	block_cache *cache = (block_cache *)_cache;
1167 	BenaphoreLocker locker(&cache->lock);
1168 	bool allocated;
1169 
1170 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
1171 	if (block == NULL)
1172 		return NULL;
1173 
1174 #ifdef DEBUG_CHANGED
1175 	if (block->compare == NULL)
1176 		block->compare = cache->Allocate();
1177 	if (block->compare != NULL)
1178 		memcpy(block->compare, block->current_data, cache->block_size);
1179 #endif
1180 	return block->current_data;
1181 }
1182 
1183 
1184 const void *
1185 fssh_block_cache_get(void *_cache, fssh_off_t blockNumber)
1186 {
1187 	return fssh_block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1188 }
1189 
1190 
1191 /*!
1192 	Changes the internal status of a writable block to \a dirty. This can be
1193 	helpful in case you realize you don't need to change that block anymore
1194 	for whatever reason.
1195 
1196 	Note, you must only use this function on blocks that were acquired
1197 	writable!
1198 */
1199 fssh_status_t
1200 fssh_block_cache_set_dirty(void *_cache, fssh_off_t blockNumber, bool dirty,
1201 	int32_t transaction)
1202 {
1203 	// TODO: not yet implemented
1204 	if (dirty)
1205 		fssh_panic("block_cache_set_dirty(): not yet implemented that way!\n");
1206 
1207 	return FSSH_B_OK;
1208 }
1209 
1210 
1211 void
1212 fssh_block_cache_put(void *_cache, fssh_off_t blockNumber)
1213 {
1214 	block_cache *cache = (block_cache *)_cache;
1215 	BenaphoreLocker locker(&cache->lock);
1216 
1217 	put_cached_block(cache, blockNumber);
1218 }
1219 
1220