xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 93aeb8c3bc3f13cb1f282e3e749258a23790d947)
1 /*
2  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "block_cache_private.h"
8 
9 #include <KernelExport.h>
10 #include <fs_cache.h>
11 
12 #include <block_cache.h>
13 #include <lock.h>
14 #include <util/kernel_cpp.h>
15 #include <util/DoublyLinkedList.h>
16 #include <util/AutoLock.h>
17 #include <util/khash.h>
18 
19 #include <unistd.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <errno.h>
23 
24 
25 // ToDo: this is a naive but growing implementation to test the API:
26 //	1) block reading/writing is not at all optimized for speed, it will
27 //	   just read and write single blocks.
28 //	2) the locking could be improved; getting a block should not need to
29 //	   wait for blocks to be written
30 //	3) dirty blocks are only written back if asked for
31 //	4) blocks are never removed yet
32 
33 //#define TRACE_BLOCK_CACHE
34 #ifdef TRACE_BLOCK_CACHE
35 #	define TRACE(x)	dprintf x
36 #else
37 #	define TRACE(x) ;
38 #endif
39 
40 
41 struct cache_transaction {
42 	cache_transaction();
43 
44 	cache_transaction *next;
45 	int32			id;
46 	int32			num_blocks;
47 	int32			sub_num_blocks;
48 	cached_block	*first_block;
49 	block_list		blocks;
50 	transaction_notification_hook notification_hook;
51 	void			*notification_data;
52 	bool			open;
53 	bool			has_sub_transaction;
54 };
55 
56 static status_t write_cached_block(block_cache *cache, cached_block *block, bool deleteTransaction = true);
57 
58 
59 //	#pragma mark - private transaction
60 
61 
62 cache_transaction::cache_transaction()
63 {
64 	num_blocks = 0;
65 	sub_num_blocks = 0;
66 	first_block = NULL;
67 	notification_hook = NULL;
68 	notification_data = NULL;
69 	open = true;
70 }
71 
72 
73 static int
74 transaction_compare(void *_transaction, const void *_id)
75 {
76 	cache_transaction *transaction = (cache_transaction *)_transaction;
77 	const int32 *id = (const int32 *)_id;
78 
79 	return transaction->id - *id;
80 }
81 
82 
83 static uint32
84 transaction_hash(void *_transaction, const void *_id, uint32 range)
85 {
86 	cache_transaction *transaction = (cache_transaction *)_transaction;
87 	const int32 *id = (const int32 *)_id;
88 
89 	if (transaction != NULL)
90 		return transaction->id % range;
91 
92 	return (uint32)*id % range;
93 }
94 
95 
96 static void
97 delete_transaction(block_cache *cache, cache_transaction *transaction)
98 {
99 	hash_remove(cache->transaction_hash, transaction);
100 	if (cache->last_transaction == transaction)
101 		cache->last_transaction = NULL;
102 
103 	delete transaction;
104 }
105 
106 
107 static cache_transaction *
108 lookup_transaction(block_cache *cache, int32 id)
109 {
110 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
111 }
112 
113 
114 //	#pragma mark - private block_cache
115 
116 
117 /* static */
118 int
119 cached_block::Compare(void *_cacheEntry, const void *_block)
120 {
121 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
122 	const off_t *block = (const off_t *)_block;
123 
124 	return cacheEntry->block_number - *block;
125 }
126 
127 
128 
129 /* static */
130 uint32
131 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
132 {
133 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
134 	const off_t *block = (const off_t *)_block;
135 
136 	if (cacheEntry != NULL)
137 		return cacheEntry->block_number % range;
138 
139 	return (uint64)*block % range;
140 }
141 
142 
143 //	#pragma mark -
144 
145 
146 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize)
147 	:
148 	hash(NULL),
149 	fd(_fd),
150 	max_blocks(numBlocks),
151 	block_size(blockSize),
152 	next_transaction_id(1),
153 	last_transaction(NULL),
154 	transaction_hash(NULL),
155 	ranges_hash(NULL),
156 	free_ranges(NULL)
157 {
158 	hash = hash_init(32, 0, &cached_block::Compare, &cached_block::Hash);
159 	if (hash == NULL)
160 		return;
161 
162 	transaction_hash = hash_init(16, 0, &transaction_compare, &::transaction_hash);
163 	if (transaction_hash == NULL)
164 		return;
165 
166 	ranges_hash = hash_init(16, 0, &block_range::Compare, &block_range::Hash);
167 	if (ranges_hash == NULL)
168 		return;
169 
170 	if (benaphore_init(&lock, "block cache") < B_OK)
171 		return;
172 
173 	chunk_size = max_c(blockSize, B_PAGE_SIZE);
174 	chunks_per_range = kBlockRangeSize / chunk_size;
175 	range_mask = (1UL << chunks_per_range) - 1;
176 	chunk_mask = (1UL << (chunk_size / blockSize)) - 1;
177 }
178 
179 
180 block_cache::~block_cache()
181 {
182 	benaphore_destroy(&lock);
183 
184 	hash_uninit(ranges_hash);
185 	hash_uninit(transaction_hash);
186 	hash_uninit(hash);
187 }
188 
189 
190 status_t
191 block_cache::InitCheck()
192 {
193 	if (lock.sem < B_OK)
194 		return lock.sem;
195 
196 	if (hash == NULL || transaction_hash == NULL || ranges_hash == NULL)
197 		return B_NO_MEMORY;
198 
199 	return B_OK;
200 }
201 
202 
203 block_range *
204 block_cache::GetFreeRange()
205 {
206 	if (free_ranges != NULL)
207 		return free_ranges;
208 
209 	// we need to allocate a new range
210 	block_range *range;
211 	if (block_range::NewBlockRange(this, &range) != B_OK) {
212 		// ToDo: free up space in existing ranges
213 		// We may also need to free ranges from other caches to get a free one
214 		// (if not, an active volume might have stolen all free ranges already)
215 		return NULL;
216 	}
217 
218 	hash_insert(ranges_hash, range);
219 	return range;
220 }
221 
222 
223 block_range *
224 block_cache::GetRange(void *address)
225 {
226 	return (block_range *)hash_lookup(ranges_hash, address);
227 }
228 
229 
230 void
231 block_cache::Free(void *address)
232 {
233 	if (address == NULL)
234 		return;
235 
236 	block_range *range = GetRange(address);
237 	ASSERT(range != NULL);
238 	range->Free(this, address);
239 }
240 
241 
242 void *
243 block_cache::Allocate()
244 {
245 	block_range *range = GetFreeRange();
246 	if (range == NULL)
247 		return NULL;
248 
249 	return range->Allocate(this);
250 }
251 
252 
253 void
254 block_cache::FreeBlock(cached_block *block)
255 {
256 	block_range *range = GetRange(block->data);
257 	ASSERT(range != NULL);
258 	range->Free(this, block);
259 
260 	Free(block->original);
261 	Free(block->parent_data);
262 #ifdef DEBUG_CHANGED
263 	Free(block->compare);
264 #endif
265 
266 	free(block);
267 }
268 
269 
270 cached_block *
271 block_cache::NewBlock(off_t blockNumber)
272 {
273 	cached_block *block = (cached_block *)malloc(sizeof(cached_block));
274 	if (block == NULL)
275 		return NULL;
276 
277 	block_range *range = GetFreeRange();
278 	if (range == NULL) {
279 		free(block);
280 		return NULL;
281 	}
282 
283 	range->Allocate(this, block);
284 
285 	block->block_number = blockNumber;
286 	block->lock = 0;
287 	block->transaction_next = NULL;
288 	block->transaction = block->previous_transaction = NULL;
289 	block->original = NULL;
290 	block->parent_data = NULL;
291 	block->is_dirty = false;
292 #ifdef DEBUG_CHANGED
293 	block->compare = NULL;
294 #endif
295 
296 	hash_insert(hash, block);
297 
298 	return block;
299 }
300 
301 
302 #ifdef DEBUG_CHANGED
303 
304 #define DUMPED_BLOCK_SIZE 16
305 
306 void
307 dumpBlock(const char *buffer, int size, const char *prefix)
308 {
309 	int i;
310 
311 	for (i = 0; i < size;) {
312 		int start = i;
313 
314 		dprintf(prefix);
315 		for (; i < start+DUMPED_BLOCK_SIZE; i++) {
316 			if (!(i % 4))
317 				dprintf(" ");
318 
319 			if (i >= size)
320 				dprintf("  ");
321 			else
322 				dprintf("%02x", *(unsigned char *)(buffer + i));
323 		}
324 		dprintf("  ");
325 
326 		for (i = start; i < start + DUMPED_BLOCK_SIZE; i++) {
327 			if (i < size) {
328 				char c = buffer[i];
329 
330 				if (c < 30)
331 					dprintf(".");
332 				else
333 					dprintf("%c", c);
334 			} else
335 				break;
336 		}
337 		dprintf("\n");
338 	}
339 }
340 #endif
341 
342 
343 static void
344 put_cached_block(block_cache *cache, cached_block *block)
345 {
346 #ifdef DEBUG_CHANGED
347 	if (!block->is_dirty && block->compare != NULL && memcmp(block->data, block->compare, cache->block_size)) {
348 		dprintf("new block:\n");
349 		dumpBlock((const char *)block->data, 256, "  ");
350 		dprintf("unchanged block:\n");
351 		dumpBlock((const char *)block->compare, 256, "  ");
352 		write_cached_block(cache, block);
353 		panic("block_cache: supposed to be clean block was changed!\n");
354 
355 		cache->Free(block->compare);
356 		block->compare = NULL;
357 	}
358 #endif
359 
360 	if (--block->lock == 0)
361 		;
362 //		block->data = cache->allocator->Release(block->data);
363 }
364 
365 
366 static void
367 put_cached_block(block_cache *cache, off_t blockNumber)
368 {
369 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
370 	if (block != NULL)
371 		put_cached_block(cache, block);
372 }
373 
374 
375 static cached_block *
376 get_cached_block(block_cache *cache, off_t blockNumber, bool &allocated, bool readBlock = true)
377 {
378 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
379 	allocated = false;
380 
381 	if (block == NULL) {
382 		// read block into cache
383 		block = cache->NewBlock(blockNumber);
384 		if (block == NULL)
385 			return NULL;
386 
387 		allocated = true;
388 	} else {
389 /*
390 		if (block->lock == 0 && block->data != NULL) {
391 			// see if the old block can be resurrected
392 			block->data = cache->allocator->Acquire(block->data);
393 		}
394 
395 		if (block->data == NULL) {
396 			// there is no block yet, but we need one
397 			block->data = cache->allocator->Get();
398 			if (block->data == NULL)
399 				return NULL;
400 
401 			allocated = true;
402 		}
403 */
404 	}
405 
406 	if (allocated && readBlock) {
407 		int32 blockSize = cache->block_size;
408 
409 		if (read_pos(cache->fd, blockNumber * blockSize, block->data, blockSize) < blockSize) {
410 			cache->FreeBlock(block);
411 			return NULL;
412 		}
413 	}
414 
415 	block->lock++;
416 	return block;
417 }
418 
419 
420 static void *
421 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base, off_t length,
422 	int32 transactionID, bool cleared)
423 {
424 	BenaphoreLocker locker(&cache->lock);
425 
426 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n", blockNumber, transactionID));
427 
428 	bool allocated;
429 	cached_block *block = get_cached_block(cache, blockNumber, allocated, !cleared);
430 	if (block == NULL)
431 		return NULL;
432 
433 	// if there is no transaction support, we just return the current block
434 	if (transactionID == -1) {
435 		if (cleared)
436 			memset(block->data, 0, cache->block_size);
437 
438 		block->is_dirty = true;
439 			// mark the block as dirty
440 
441 		return block->data;
442 	}
443 
444 	if (block->transaction != NULL && block->transaction->id != transactionID) {
445 		// ToDo: we have to wait here until the other transaction is done.
446 		//	Maybe we should even panic, since we can't prevent any deadlocks.
447 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
448 		put_cached_block(cache, block);
449 		return NULL;
450 	}
451 	if (block->transaction == NULL && transactionID != -1) {
452 		// get new transaction
453 		cache_transaction *transaction = lookup_transaction(cache, transactionID);
454 		if (transaction == NULL) {
455 			panic("get_writable_cached_block(): invalid transaction %ld!\n", transactionID);
456 			put_cached_block(cache, block);
457 			return NULL;
458 		}
459 		if (!transaction->open) {
460 			panic("get_writable_cached_block(): transaction already done!\n");
461 			put_cached_block(cache, block);
462 			return NULL;
463 		}
464 
465 		block->transaction = transaction;
466 
467 		// attach the block to the transaction block list
468 		block->transaction_next = transaction->first_block;
469 		transaction->first_block = block;
470 		transaction->num_blocks++;
471 	}
472 
473 	if (!(allocated && cleared) && block->original == NULL) {
474 		// we already have data, so we need to save it
475 		block->original = cache->Allocate();
476 		if (block->original == NULL) {
477 			put_cached_block(cache, block);
478 			return NULL;
479 		}
480 
481 		memcpy(block->original, block->data, cache->block_size);
482 	}
483 	if (block->parent_data == block->data) {
484 		// remember any previous contents for the parent transaction
485 		block->parent_data = cache->Allocate();
486 		if (block->parent_data == NULL) {
487 			// ToDo: maybe we should just continue the current transaction in this case...
488 			put_cached_block(cache, block);
489 			return NULL;
490 		}
491 
492 		memcpy(block->parent_data, block->data, cache->block_size);
493 		block->transaction->sub_num_blocks++;
494 	}
495 
496 	if (cleared)
497 		memset(block->data, 0, cache->block_size);
498 
499 	block->is_dirty = true;
500 
501 	return block->data;
502 }
503 
504 
505 static status_t
506 write_cached_block(block_cache *cache, cached_block *block, bool deleteTransaction)
507 {
508 	cache_transaction *previous = block->previous_transaction;
509 	int32 blockSize = cache->block_size;
510 
511 	void *data = previous && block->original ? block->original : block->data;
512 		// we first need to write back changes from previous transactions
513 
514 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
515 
516 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize, data, blockSize);
517 
518 	if (written < blockSize) {
519 		dprintf("could not write back block %Ld (%s)\n", block->block_number, strerror(errno));
520 		return B_IO_ERROR;
521 	}
522 
523 	if (data == block->data)
524 		block->is_dirty = false;
525 
526 	if (previous != NULL) {
527 		previous->blocks.Remove(block);
528 		block->previous_transaction = NULL;
529 
530 		// Has the previous transation been finished with that write?
531 		if (--previous->num_blocks == 0) {
532 			TRACE(("cache transaction %ld finished!\n", previous->id));
533 
534 			if (previous->notification_hook != NULL)
535 				previous->notification_hook(previous->id, previous->notification_data);
536 
537 			if (deleteTransaction)
538 				delete_transaction(cache, previous);
539 		}
540 	}
541 
542 	return B_OK;
543 }
544 
545 
546 extern "C" status_t
547 block_cache_init(void)
548 {
549 	return init_block_allocator();
550 }
551 
552 
553 //	#pragma mark - public transaction
554 
555 
556 extern "C" int32
557 cache_start_transaction(void *_cache)
558 {
559 	block_cache *cache = (block_cache *)_cache;
560 
561 	if (cache->last_transaction && cache->last_transaction->open)
562 		panic("last transaction (%ld) still open!\n", cache->last_transaction->id);
563 
564 	cache_transaction *transaction = new cache_transaction;
565 	if (transaction == NULL)
566 		return B_NO_MEMORY;
567 
568 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
569 	cache->last_transaction = transaction;
570 
571 	TRACE(("cache_transaction_start(): id %ld started\n", transaction->id));
572 
573 	BenaphoreLocker locker(&cache->lock);
574 	hash_insert(cache->transaction_hash, transaction);
575 
576 	return transaction->id;
577 }
578 
579 
580 extern "C" status_t
581 cache_sync_transaction(void *_cache, int32 id)
582 {
583 	block_cache *cache = (block_cache *)_cache;
584 	BenaphoreLocker locker(&cache->lock);
585 	status_t status = B_ENTRY_NOT_FOUND;
586 
587 	hash_iterator iterator;
588 	hash_open(cache->transaction_hash, &iterator);
589 
590 	cache_transaction *transaction;
591 	while ((transaction = (cache_transaction *)hash_next(cache->transaction_hash, &iterator)) != NULL) {
592 		// ToDo: fix hash interface to make this easier
593 
594 		if (transaction->id <= id && !transaction->open) {
595 			while (transaction->num_blocks > 0) {
596 				status = write_cached_block(cache, transaction->blocks.Head(), false);
597 				if (status != B_OK)
598 					return status;
599 			}
600 			delete_transaction(cache, transaction);
601 			hash_rewind(cache->transaction_hash, &iterator);
602 		}
603 	}
604 
605 	hash_close(cache->transaction_hash, &iterator, false);
606 	return B_OK;
607 }
608 
609 
610 extern "C" status_t
611 cache_end_transaction(void *_cache, int32 id, transaction_notification_hook hook, void *data)
612 {
613 	block_cache *cache = (block_cache *)_cache;
614 	BenaphoreLocker locker(&cache->lock);
615 
616 	TRACE(("cache_end_transaction(id = %ld)\n", id));
617 
618 	cache_transaction *transaction = lookup_transaction(cache, id);
619 	if (transaction == NULL) {
620 		panic("cache_end_transaction(): invalid transaction ID\n");
621 		return B_BAD_VALUE;
622 	}
623 
624 	transaction->notification_hook = hook;
625 	transaction->notification_data = data;
626 
627 	// iterate through all blocks and free the unchanged original contents
628 
629 	cached_block *block = transaction->first_block, *next;
630 	for (; block != NULL; block = next) {
631 		next = block->transaction_next;
632 
633 		if (block->previous_transaction != NULL) {
634 			// need to write back pending changes
635 			write_cached_block(cache, block);
636 		}
637 
638 		if (block->original != NULL) {
639 			cache->Free(block->original);
640 			block->original = NULL;
641 		}
642 		if (transaction->has_sub_transaction) {
643 			if (block->parent_data != block->data)
644 				cache->Free(block->parent_data);
645 			block->parent_data = NULL;
646 		}
647 
648 		// move the block to the previous transaction list
649 		transaction->blocks.Add(block);
650 
651 		block->previous_transaction = transaction;
652 		block->transaction_next = NULL;
653 		block->transaction = NULL;
654 	}
655 
656 	transaction->open = false;
657 
658 	return B_OK;
659 }
660 
661 
662 extern "C" status_t
663 cache_abort_transaction(void *_cache, int32 id)
664 {
665 	block_cache *cache = (block_cache *)_cache;
666 	BenaphoreLocker locker(&cache->lock);
667 
668 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
669 
670 	cache_transaction *transaction = lookup_transaction(cache, id);
671 	if (transaction == NULL) {
672 		panic("cache_abort_transaction(): invalid transaction ID\n");
673 		return B_BAD_VALUE;
674 	}
675 
676 	// iterate through all blocks and restore their original contents
677 
678 	cached_block *block = transaction->first_block, *next;
679 	for (; block != NULL; block = next) {
680 		next = block->transaction_next;
681 
682 		if (block->original != NULL) {
683 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
684 				transaction->id, block->block_number));
685 			memcpy(block->data, block->original, cache->block_size);
686 			cache->Free(block->original);
687 			block->original = NULL;
688 		}
689 		if (transaction->has_sub_transaction && block->parent_data != block->data) {
690 			cache->Free(block->parent_data);
691 			block->parent_data = NULL;
692 		}
693 
694 		block->transaction_next = NULL;
695 		block->transaction = NULL;
696 	}
697 
698 	delete_transaction(cache, transaction);
699 	return B_OK;
700 }
701 
702 
703 extern "C" int32
704 cache_detach_sub_transaction(void *_cache, int32 id,
705 	transaction_notification_hook hook, void *data)
706 {
707 	block_cache *cache = (block_cache *)_cache;
708 	BenaphoreLocker locker(&cache->lock);
709 
710 	TRACE(("cache_end_transaction(id = %ld)\n", id));
711 
712 	cache_transaction *transaction = lookup_transaction(cache, id);
713 	if (transaction == NULL) {
714 		panic("cache_end_transaction(): invalid transaction ID\n");
715 		return B_BAD_VALUE;
716 	}
717 	if (!transaction->has_sub_transaction)
718 		return B_BAD_VALUE;
719 
720 	// create a new transaction for the sub transaction
721 	cache_transaction *newTransaction = new cache_transaction;
722 	if (transaction == NULL)
723 		return B_NO_MEMORY;
724 
725 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
726 
727 	transaction->notification_hook = hook;
728 	transaction->notification_data = data;
729 
730 	// iterate through all blocks and free the unchanged original contents
731 
732 	cached_block *block = transaction->first_block, *next, *last = NULL;
733 	for (; block != NULL; block = next) {
734 		next = block->transaction_next;
735 
736 		if (block->previous_transaction != NULL) {
737 			// need to write back pending changes
738 			write_cached_block(cache, block);
739 		}
740 
741 		if (block->original != NULL) {
742 			cache->Free(block->original);
743 			block->original = NULL;
744 		}
745 		if (block->parent_data != block->data) {
746 			// we need to move this block over to the new transaction
747 			block->original = block->parent_data;
748 			if (last == NULL)
749 				newTransaction->first_block = block;
750 			else
751 				last->transaction_next = block;
752 
753 			last = block;
754 		}
755 		block->parent_data = NULL;
756 
757 		// move the block to the previous transaction list
758 		transaction->blocks.Add(block);
759 
760 		block->previous_transaction = transaction;
761 		block->transaction_next = NULL;
762 		block->transaction = newTransaction;
763 	}
764 
765 	transaction->open = false;
766 
767 	hash_insert(cache->transaction_hash, newTransaction);
768 	cache->last_transaction = newTransaction;
769 
770 	return B_OK;
771 }
772 
773 
774 extern "C" status_t
775 cache_abort_sub_transaction(void *_cache, int32 id)
776 {
777 	block_cache *cache = (block_cache *)_cache;
778 	BenaphoreLocker locker(&cache->lock);
779 
780 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
781 
782 	cache_transaction *transaction = lookup_transaction(cache, id);
783 	if (transaction == NULL) {
784 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
785 		return B_BAD_VALUE;
786 	}
787 	if (!transaction->has_sub_transaction)
788 		return B_BAD_VALUE;
789 
790 	// revert all changes back to the version of the parent
791 
792 	cached_block *block = transaction->first_block, *next;
793 	for (; block != NULL; block = next) {
794 		next = block->transaction_next;
795 
796 		if (block->parent_data != block->data) {
797 			// the block has been changed and must be restored
798 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
799 				transaction->id, block->block_number));
800 			memcpy(block->data, block->parent_data, cache->block_size);
801 			cache->Free(block->parent_data);
802 		}
803 
804 		block->parent_data = NULL;
805 	}
806 
807 	// all subsequent changes will go into the main transaction
808 	transaction->has_sub_transaction = false;
809 	return B_OK;
810 }
811 
812 
813 extern "C" status_t
814 cache_start_sub_transaction(void *_cache, int32 id)
815 {
816 	block_cache *cache = (block_cache *)_cache;
817 	BenaphoreLocker locker(&cache->lock);
818 
819 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
820 
821 	cache_transaction *transaction = lookup_transaction(cache, id);
822 	if (transaction == NULL) {
823 		panic("cache_start_sub_transaction(): invalid transaction ID\n");
824 		return B_BAD_VALUE;
825 	}
826 
827 	// move all changed blocks up to the parent
828 
829 	cached_block *block = transaction->first_block, *next;
830 	for (; block != NULL; block = next) {
831 		next = block->transaction_next;
832 
833 		if (transaction->has_sub_transaction && block->parent_data != block->data) {
834 			// there already is an older sub transaction - we acknowledge
835 			// its changes and move its blocks up to the parent
836 			cache->Free(block->parent_data);
837 		}
838 
839 		// we "allocate" the parent data lazily, that means, we don't copy
840 		// the data (and allocate memory for it) until we need to
841 		block->parent_data = block->data;
842 	}
843 
844 	// all subsequent changes will go into the sub transaction
845 	transaction->has_sub_transaction = true;
846 	transaction->sub_num_blocks = 0;
847 
848 	return B_OK;
849 }
850 
851 
852 extern "C" status_t
853 cache_next_block_in_transaction(void *_cache, int32 id, uint32 *_cookie, off_t *_blockNumber,
854 	void **_data, void **_unchangedData)
855 {
856 	cached_block *block = (cached_block *)*_cookie;
857 	block_cache *cache = (block_cache *)_cache;
858 
859 	BenaphoreLocker locker(&cache->lock);
860 
861 	cache_transaction *transaction = lookup_transaction(cache, id);
862 	if (transaction == NULL)
863 		return B_BAD_VALUE;
864 
865 	if (block == NULL)
866 		block = transaction->first_block;
867 	else
868 		block = block->transaction_next;
869 
870 	if (block == NULL)
871 		return B_ENTRY_NOT_FOUND;
872 
873 	if (_blockNumber)
874 		*_blockNumber = block->block_number;
875 	if (_data)
876 		*_data = block->data;
877 	if (_unchangedData)
878 		*_unchangedData = block->original;
879 
880 	*_cookie = (uint32)block;
881 	return B_OK;
882 }
883 
884 
885 extern "C" int32
886 cache_blocks_in_transaction(void *_cache, int32 id)
887 {
888 	block_cache *cache = (block_cache *)_cache;
889 	BenaphoreLocker locker(&cache->lock);
890 
891 	cache_transaction *transaction = lookup_transaction(cache, id);
892 	if (transaction == NULL)
893 		return B_BAD_VALUE;
894 
895 	return transaction->num_blocks;
896 }
897 
898 
899 extern "C" int32
900 cache_blocks_in_sub_transaction(void *_cache, int32 id)
901 {
902 	block_cache *cache = (block_cache *)_cache;
903 	BenaphoreLocker locker(&cache->lock);
904 
905 	cache_transaction *transaction = lookup_transaction(cache, id);
906 	if (transaction == NULL)
907 		return B_BAD_VALUE;
908 
909 	return transaction->sub_num_blocks;
910 }
911 
912 
913 //	#pragma mark - public block cache
914 //	public interface
915 
916 
917 extern "C" void
918 block_cache_delete(void *_cache, bool allowWrites)
919 {
920 	block_cache *cache = (block_cache *)_cache;
921 
922 	if (allowWrites)
923 		block_cache_sync(cache);
924 
925 	// free all blocks
926 
927 	uint32 cookie = 0;
928 	cached_block *block;
929 	while ((block = (cached_block *)hash_remove_first(cache->hash, &cookie)) != NULL) {
930 		cache->FreeBlock(block);
931 	}
932 
933 	// free all transactions (they will all be aborted)
934 
935 	cookie = 0;
936 	cache_transaction *transaction;
937 	while ((transaction = (cache_transaction *)hash_remove_first(cache->transaction_hash, &cookie)) != NULL) {
938 		delete transaction;
939 	}
940 
941 	delete cache;
942 }
943 
944 
945 extern "C" void *
946 block_cache_create(int fd, off_t numBlocks, size_t blockSize)
947 {
948 	block_cache *cache = new block_cache(fd, numBlocks, blockSize);
949 	if (cache == NULL)
950 		return NULL;
951 
952 	if (cache->InitCheck() != B_OK) {
953 		delete cache;
954 		return NULL;
955 	}
956 
957 	return cache;
958 }
959 
960 
961 extern "C" status_t
962 block_cache_sync(void *_cache)
963 {
964 	block_cache *cache = (block_cache *)_cache;
965 
966 	// we will sync all dirty blocks to disk that have a completed
967 	// transaction or no transaction only
968 
969 	BenaphoreLocker locker(&cache->lock);
970 	hash_iterator iterator;
971 	hash_open(cache->hash, &iterator);
972 
973 	cached_block *block;
974 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
975 		if (block->previous_transaction != NULL
976 			|| (block->transaction == NULL && block->is_dirty)) {
977 			status_t status = write_cached_block(cache, block);
978 			if (status != B_OK)
979 				return status;
980 		}
981 	}
982 
983 	hash_close(cache->hash, &iterator, false);
984 	return B_OK;
985 }
986 
987 
988 extern "C" status_t
989 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
990 {
991 	// ToDo: this can be done better!
992 	void *block = block_cache_get_writable_etc(_cache, blockNumber, blockNumber, 1, transaction);
993 	if (block != NULL) {
994 		put_cached_block((block_cache *)_cache, blockNumber);
995 		return B_OK;
996 	}
997 
998 	return B_ERROR;
999 }
1000 
1001 
1002 extern "C" void *
1003 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base, off_t length,
1004 	int32 transaction)
1005 {
1006 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n", blockNumber, transaction));
1007 
1008 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1009 				base, length, transaction, false);
1010 }
1011 
1012 
1013 extern "C" void *
1014 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
1015 {
1016 	return block_cache_get_writable_etc(_cache, blockNumber, blockNumber, 1, transaction);
1017 }
1018 
1019 
1020 extern "C" void *
1021 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
1022 {
1023 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n", blockNumber, transaction));
1024 
1025 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
1026 				blockNumber, 1, transaction, true);
1027 }
1028 
1029 
1030 extern "C" const void *
1031 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
1032 {
1033 	block_cache *cache = (block_cache *)_cache;
1034 	BenaphoreLocker locker(&cache->lock);
1035 	bool allocated;
1036 
1037 	cached_block *block = get_cached_block(cache, blockNumber, allocated);
1038 	if (block == NULL)
1039 		return NULL;
1040 
1041 #ifdef DEBUG_CHANGED
1042 	if (block->compare == NULL)
1043 		block->compare = cache->Allocate();
1044 	if (block->compare != NULL)
1045 		memcpy(block->compare, block->data, cache->block_size);
1046 #endif
1047 	return block->data;
1048 }
1049 
1050 
1051 extern "C" const void *
1052 block_cache_get(void *_cache, off_t blockNumber)
1053 {
1054 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
1055 }
1056 
1057 
1058 extern "C" status_t
1059 block_cache_set_dirty(void *_cache, off_t blockNumber, bool isDirty, int32 transaction)
1060 {
1061 	// not yet implemented
1062 	// Note, you must only use this function on blocks that were acquired writable!
1063 	if (isDirty)
1064 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
1065 
1066 	return B_OK;
1067 }
1068 
1069 
1070 extern "C" void
1071 block_cache_put(void *_cache, off_t blockNumber)
1072 {
1073 	block_cache *cache = (block_cache *)_cache;
1074 	BenaphoreLocker locker(&cache->lock);
1075 
1076 	put_cached_block(cache, blockNumber);
1077 }
1078 
1079