xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 03187b607b2b5eec7ee059f1ead09bdba14991fb)
1 /*
2  * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <signal.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <errno.h>
14 
15 #include <KernelExport.h>
16 #include <fs_cache.h>
17 
18 #include <condition_variable.h>
19 #include <lock.h>
20 #include <low_resource_manager.h>
21 #include <slab/Slab.h>
22 #include <tracing.h>
23 #include <util/kernel_cpp.h>
24 #include <util/DoublyLinkedList.h>
25 #include <util/AutoLock.h>
26 #include <util/khash.h>
27 
28 #include "kernel_debug_config.h"
29 
30 
31 // TODO: this is a naive but growing implementation to test the API:
32 //	1) block reading/writing is not at all optimized for speed, it will
33 //	   just read and write single blocks.
34 //	2) the locking could be improved; getting a block should not need to
35 //	   wait for blocks to be written
36 // TODO: the retrieval/copy of the original data could be delayed until the
37 //		new data must be written, ie. in low memory situations.
38 
39 //#define TRACE_BLOCK_CACHE
40 #ifdef TRACE_BLOCK_CACHE
41 #	define TRACE(x)	dprintf x
42 #else
43 #	define TRACE(x) ;
44 #endif
45 
46 // This macro is used for fatal situations that are acceptable in a running
47 // system, like out of memory situations - should only panic for debugging.
48 #define FATAL(x) panic x
49 
50 static const bigtime_t kTransactionIdleTime = 2000000LL;
51 	// a transaction is considered idle after 2 seconds of inactivity
52 
53 
54 struct cache_transaction;
55 struct cached_block;
56 struct block_cache;
57 typedef DoublyLinkedListLink<cached_block> block_link;
58 
59 struct cached_block {
60 	cached_block*	next;			// next in hash
61 	cached_block*	transaction_next;
62 	block_link		link;
63 	off_t			block_number;
64 	void*			current_data;
65 	void*			original_data;
66 	void*			parent_data;
67 #if BLOCK_CACHE_DEBUG_CHANGED
68 	void*			compare;
69 #endif
70 	int32			ref_count;
71 	int32			accessed;
72 	bool			busy : 1;
73 	bool			is_writing : 1;
74 	bool			is_dirty : 1;
75 	bool			unused : 1;
76 	bool			discard : 1;
77 	cache_transaction* transaction;
78 	cache_transaction* previous_transaction;
79 
80 	static int Compare(void* _cacheEntry, const void* _block);
81 	static uint32 Hash(void* _cacheEntry, const void* _block, uint32 range);
82 };
83 
84 typedef DoublyLinkedList<cached_block,
85 	DoublyLinkedListMemberGetLink<cached_block,
86 		&cached_block::link> > block_list;
87 
88 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
89 	int32			transaction_id;
90 	int32			events_pending;
91 	int32			events;
92 	transaction_notification_hook hook;
93 	void*			data;
94 	bool			delete_after_event;
95 };
96 
97 typedef DoublyLinkedList<cache_notification> NotificationList;
98 
99 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
100 	hash_table*		hash;
101 	mutex			lock;
102 	int				fd;
103 	off_t			max_blocks;
104 	size_t			block_size;
105 	int32			next_transaction_id;
106 	cache_transaction* last_transaction;
107 	hash_table*		transaction_hash;
108 
109 	object_cache*	buffer_cache;
110 	block_list		unused_blocks;
111 
112 	uint32			num_dirty_blocks;
113 	bool			read_only;
114 
115 	NotificationList pending_notifications;
116 	ConditionVariable condition_variable;
117 
118 					block_cache(int fd, off_t numBlocks, size_t blockSize,
119 						bool readOnly);
120 					~block_cache();
121 
122 	status_t		Init();
123 
124 	void			Free(void* buffer);
125 	void*			Allocate();
126 	void			RemoveUnusedBlocks(int32 maxAccessed = LONG_MAX,
127 						int32 count = LONG_MAX);
128 	void			RemoveBlock(cached_block* block);
129 	void			DiscardBlock(cached_block* block);
130 	void			FreeBlock(cached_block* block);
131 	cached_block*	NewBlock(off_t blockNumber);
132 
133 
134 private:
135 	static void		_LowMemoryHandler(void* data, uint32 resources,
136 						int32 level);
137 	cached_block*	_GetUnusedBlock();
138 };
139 
140 struct cache_listener;
141 typedef DoublyLinkedListLink<cache_listener> listener_link;
142 
143 struct cache_listener : cache_notification {
144 	listener_link	link;
145 };
146 
147 typedef DoublyLinkedList<cache_listener,
148 	DoublyLinkedListMemberGetLink<cache_listener,
149 		&cache_listener::link> > ListenerList;
150 
151 struct cache_transaction {
152 	cache_transaction();
153 
154 	cache_transaction* next;
155 	int32			id;
156 	int32			num_blocks;
157 	int32			main_num_blocks;
158 	int32			sub_num_blocks;
159 	cached_block*	first_block;
160 	block_list		blocks;
161 	ListenerList	listeners;
162 	bool			open;
163 	bool			has_sub_transaction;
164 	bigtime_t		last_used;
165 };
166 
167 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
168 namespace BlockTracing {
169 
170 class Action : public AbstractTraceEntry {
171 public:
172 	Action(block_cache* cache, cached_block* block)
173 		:
174 		fCache(cache),
175 		fBlockNumber(block->block_number),
176 		fIsDirty(block->is_dirty),
177 		fHasOriginal(block->original_data != NULL),
178 		fHasParent(block->parent_data != NULL),
179 		fTransactionID(-1),
180 		fPreviousID(-1)
181 	{
182 		if (block->transaction != NULL)
183 			fTransactionID = block->transaction->id;
184 		if (block->previous_transaction != NULL)
185 			fPreviousID = block->previous_transaction->id;
186 	}
187 
188 	virtual void AddDump(TraceOutput& out)
189 	{
190 		out.Print("block cache %p, %s %Ld, %c%c%c transaction %ld "
191 			"(previous id %ld)\n", fCache, _Action(), fBlockNumber,
192 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
193 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
194 	}
195 
196 	virtual const char* _Action() const = 0;
197 
198 private:
199 	block_cache*		fCache;
200 	uint64				fBlockNumber;
201 	bool				fIsDirty;
202 	bool				fHasOriginal;
203 	bool				fHasParent;
204 	int32				fTransactionID;
205 	int32				fPreviousID;
206 };
207 
208 class Get : public Action {
209 public:
210 	Get(block_cache* cache, cached_block* block)
211 		:
212 		Action(cache, block)
213 	{
214 		Initialized();
215 	}
216 
217 	virtual const char* _Action() const { return "get"; }
218 };
219 
220 class Put : public Action {
221 public:
222 	Put(block_cache* cache, cached_block* block)
223 		:
224 		Action(cache, block)
225 	{
226 		Initialized();
227 	}
228 
229 	virtual const char* _Action() const { return "put"; }
230 };
231 
232 class Read : public Action {
233 public:
234 	Read(block_cache* cache, cached_block* block)
235 		:
236 		Action(cache, block)
237 	{
238 		Initialized();
239 	}
240 
241 	virtual const char* _Action() const { return "read"; }
242 };
243 
244 class Write : public Action {
245 public:
246 	Write(block_cache* cache, cached_block* block)
247 		:
248 		Action(cache, block)
249 	{
250 		Initialized();
251 	}
252 
253 	virtual const char* _Action() const { return "write"; }
254 };
255 
256 class Flush : public Action {
257 public:
258 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
259 		:
260 		Action(cache, block),
261 		fGetUnused(getUnused)
262 	{
263 		Initialized();
264 	}
265 
266 	virtual const char* _Action() const
267 		{ return fGetUnused ? "get-unused" : "flush"; }
268 
269 private:
270 	bool	fGetUnused;
271 };
272 
273 class Error : public AbstractTraceEntry {
274 public:
275 	Error(block_cache* cache, uint64 blockNumber, const char* message,
276 			status_t status = B_OK)
277 		:
278 		fCache(cache),
279 		fBlockNumber(blockNumber),
280 		fMessage(message),
281 		fStatus(status)
282 	{
283 		Initialized();
284 	}
285 
286 	virtual void AddDump(TraceOutput& out)
287 	{
288 		out.Print("block cache %p, error %Ld, %s%s%s",
289 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
290 			fStatus != B_OK ? strerror(fStatus) : "");
291 	}
292 
293 private:
294 	block_cache*	fCache;
295 	uint64			fBlockNumber;
296 	const char*		fMessage;
297 	status_t		fStatus;
298 };
299 
300 #if BLOCK_CACHE_BLOCK_TRACING >= 2
301 class BlockData : public AbstractTraceEntry {
302 public:
303 	enum {
304 		kCurrent	= 0x01,
305 		kParent		= 0x02,
306 		kOriginal	= 0x04
307 	};
308 
309 	BlockData(block_cache* cache, cached_block* block, const char* message)
310 		:
311 		fCache(cache),
312 		fSize(cache->block_size),
313 		fBlockNumber(block->block_number),
314 		fMessage(message)
315 	{
316 		_Allocate(fCurrent, block->current_data);
317 		_Allocate(fParent, block->parent_data);
318 		_Allocate(fOriginal, block->original_data);
319 
320 #if KTRACE_PRINTF_STACK_TRACE
321 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
322 			false);
323 #endif
324 
325 		Initialized();
326 	}
327 
328 	virtual void AddDump(TraceOutput& out)
329 	{
330 		out.Print("block cache %p, block %Ld, data %c%c%c: %s",
331 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
332 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
333 			fMessage);
334 	}
335 
336 #if KTRACE_PRINTF_STACK_TRACE
337 	virtual void DumpStackTrace(TraceOutput& out)
338 	{
339 		out.PrintStackTrace(fStackTrace);
340 	}
341 #endif
342 
343 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
344 	{
345 		if ((which & kCurrent) != 0)
346 			DumpBlock(kCurrent, offset, size);
347 		if ((which & kParent) != 0)
348 			DumpBlock(kParent, offset, size);
349 		if ((which & kOriginal) != 0)
350 			DumpBlock(kOriginal, offset, size);
351 	}
352 
353 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
354 	{
355 		if (offset > fSize) {
356 			kprintf("invalid offset (block size %lu)\n", fSize);
357 			return;
358 		}
359 		if (offset + size > fSize)
360 			size = fSize - offset;
361 
362 		const char* label;
363 		uint8* data;
364 
365 		if ((which & kCurrent) != 0) {
366 			label = "current";
367 			data = fCurrent;
368 		} else if ((which & kParent) != 0) {
369 			label = "parent";
370 			data = fParent;
371 		} else if ((which & kOriginal) != 0) {
372 			label = "original";
373 			data = fOriginal;
374 		} else
375 			return;
376 
377 		kprintf("%s: offset %lu, %lu bytes\n", label, offset, size);
378 
379 		static const uint32 kBlockSize = 16;
380 		data += offset;
381 
382 		for (uint32 i = 0; i < size;) {
383 			int start = i;
384 
385 			kprintf("  %04lx ", i);
386 			for (; i < start + kBlockSize; i++) {
387 				if (!(i % 4))
388 					kprintf(" ");
389 
390 				if (i >= size)
391 					kprintf("  ");
392 				else
393 					kprintf("%02x", data[i]);
394 			}
395 
396 			kprintf("\n");
397 		}
398 	}
399 
400 private:
401 	void _Allocate(uint8*& target, void* source)
402 	{
403 		if (source == NULL) {
404 			target = NULL;
405 			return;
406 		}
407 
408 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
409 	}
410 
411 	block_cache*	fCache;
412 	uint32			fSize;
413 	uint64			fBlockNumber;
414 	const char*		fMessage;
415 	uint8*			fCurrent;
416 	uint8*			fParent;
417 	uint8*			fOriginal;
418 #if KTRACE_PRINTF_STACK_TRACE
419 	tracing_stack_trace* fStackTrace;
420 #endif
421 };
422 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
423 
424 }	// namespace BlockTracing
425 
426 #	define TB(x) new(std::nothrow) BlockTracing::x;
427 #else
428 #	define TB(x) ;
429 #endif
430 
431 #if BLOCK_CACHE_BLOCK_TRACING >= 2
432 #	define TB2(x) new(std::nothrow) BlockTracing::x;
433 #else
434 #	define TB2(x) ;
435 #endif
436 
437 
438 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
439 namespace TransactionTracing {
440 
441 class Action : public AbstractTraceEntry {
442 public:
443 	Action(const char* label, block_cache* cache,
444 			cache_transaction* transaction)
445 		:
446 		fCache(cache),
447 		fTransaction(transaction),
448 		fID(transaction->id),
449 		fSub(transaction->has_sub_transaction),
450 		fNumBlocks(transaction->num_blocks),
451 		fSubNumBlocks(transaction->sub_num_blocks)
452 	{
453 		strlcpy(fLabel, label, sizeof(fLabel));
454 		Initialized();
455 	}
456 
457 	virtual void AddDump(TraceOutput& out)
458 	{
459 		out.Print("block cache %p, %s transaction %p (id %ld)%s"
460 			", %ld/%ld blocks", fCache, fLabel, fTransaction, fID,
461 			fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
462 	}
463 
464 private:
465 	char				fLabel[12];
466 	block_cache*		fCache;
467 	cache_transaction*	fTransaction;
468 	int32				fID;
469 	bool				fSub;
470 	int32				fNumBlocks;
471 	int32				fSubNumBlocks;
472 };
473 
474 class Detach : public AbstractTraceEntry {
475 public:
476 	Detach(block_cache* cache, cache_transaction* transaction,
477 			cache_transaction* newTransaction)
478 		:
479 		fCache(cache),
480 		fTransaction(transaction),
481 		fID(transaction->id),
482 		fSub(transaction->has_sub_transaction),
483 		fNewTransaction(newTransaction),
484 		fNewID(newTransaction->id)
485 	{
486 		Initialized();
487 	}
488 
489 	virtual void AddDump(TraceOutput& out)
490 	{
491 		out.Print("block cache %p, detach transaction %p (id %ld)"
492 			"from transaction %p (id %ld)%s",
493 			fCache, fNewTransaction, fNewID, fTransaction, fID,
494 			fSub ? " sub" : "");
495 	}
496 
497 private:
498 	block_cache*		fCache;
499 	cache_transaction*	fTransaction;
500 	int32				fID;
501 	bool				fSub;
502 	cache_transaction*	fNewTransaction;
503 	int32				fNewID;
504 };
505 
506 class Abort : public AbstractTraceEntry {
507 public:
508 	Abort(block_cache* cache, cache_transaction* transaction)
509 		:
510 		fCache(cache),
511 		fTransaction(transaction),
512 		fID(transaction->id),
513 		fNumBlocks(0)
514 	{
515 		bool isSub = transaction->has_sub_transaction;
516 		fNumBlocks = isSub ? transaction->sub_num_blocks
517 			: transaction->num_blocks;
518 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
519 		if (fBlocks != NULL) {
520 			cached_block* block = transaction->first_block;
521 			for (int32 i = 0; block != NULL && i < fNumBlocks;
522 					block = block->transaction_next) {
523 				fBlocks[i++] = block->block_number;
524 			}
525 		} else
526 			fNumBlocks = 0;
527 
528 #if KTRACE_PRINTF_STACK_TRACE
529 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
530 			false);
531 #endif
532 
533 		Initialized();
534 	}
535 
536 	virtual void AddDump(TraceOutput& out)
537 	{
538 		out.Print("block cache %p, abort transaction "
539 			"%p (id %ld), blocks", fCache, fTransaction, fID);
540 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
541 			out.Print(" %Ld", fBlocks[i]);
542 	}
543 
544 #if KTRACE_PRINTF_STACK_TRACE
545 	virtual void DumpStackTrace(TraceOutput& out)
546 	{
547 		out.PrintStackTrace(fStackTrace);
548 	}
549 #endif
550 
551 private:
552 	block_cache*		fCache;
553 	cache_transaction*	fTransaction;
554 	int32				fID;
555 	off_t*				fBlocks;
556 	int32				fNumBlocks;
557 #if KTRACE_PRINTF_STACK_TRACE
558 	tracing_stack_trace* fStackTrace;
559 #endif
560 };
561 
562 }	// namespace TransactionTracing
563 
564 #	define T(x) new(std::nothrow) TransactionTracing::x;
565 #else
566 #	define T(x) ;
567 #endif
568 
569 
570 static status_t write_cached_block(block_cache* cache, cached_block* block,
571 	bool deleteTransaction = true);
572 
573 
574 static DoublyLinkedList<block_cache> sCaches;
575 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
576 static mutex sCachesMemoryUseLock
577 	= MUTEX_INITIALIZER("block caches memory use");
578 static size_t sUsedMemory;
579 static sem_id sEventSemaphore;
580 static mutex sNotificationsLock
581 	= MUTEX_INITIALIZER("block cache notifications");
582 static thread_id sNotifierWriterThread;
583 static DoublyLinkedListLink<block_cache> sMarkCache;
584 	// TODO: this only works if the link is the first entry of block_cache
585 static object_cache* sBlockCache;
586 
587 
588 //	#pragma mark - notifications/listener
589 
590 
591 /*!	Checks wether or not this is an event that closes a transaction. */
592 static inline bool
593 is_closing_event(int32 event)
594 {
595 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
596 }
597 
598 
599 static inline bool
600 is_written_event(int32 event)
601 {
602 	return (event & TRANSACTION_WRITTEN) != 0;
603 }
604 
605 
606 /*!	From the specified \a notification, it will remove the lowest pending
607 	event, and return that one in \a _event.
608 	If there is no pending event anymore, it will return \c false.
609 */
610 static bool
611 get_next_pending_event(cache_notification* notification, int32* _event)
612 {
613 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
614 		int32 pending = atomic_and(&notification->events_pending,
615 			~eventMask);
616 
617 		bool more = (pending & ~eventMask) != 0;
618 
619 		if ((pending & eventMask) != 0) {
620 			*_event = eventMask;
621 			return more;
622 		}
623 	}
624 
625 	return false;
626 }
627 
628 
629 static void
630 flush_pending_notifications(block_cache* cache)
631 {
632 	ASSERT_LOCKED_MUTEX(&sCachesLock);
633 
634 	while (true) {
635 		MutexLocker locker(sNotificationsLock);
636 
637 		cache_notification* notification = cache->pending_notifications.Head();
638 		if (notification == NULL)
639 			return;
640 
641 		bool deleteAfterEvent = false;
642 		int32 event = -1;
643 		if (!get_next_pending_event(notification, &event)) {
644 			// remove the notification if this was the last pending event
645 			cache->pending_notifications.Remove(notification);
646 			deleteAfterEvent = notification->delete_after_event;
647 		}
648 
649 		if (event >= 0) {
650 			// Notify listener, we need to copy the notification, as it might
651 			// be removed when we unlock the list.
652 			cache_notification copy = *notification;
653 			locker.Unlock();
654 
655 			copy.hook(copy.transaction_id, event, copy.data);
656 
657 			locker.Lock();
658 		}
659 
660 		if (deleteAfterEvent)
661 			delete notification;
662 	}
663 }
664 
665 
666 /*!	Flushes all pending notifications by calling the appropriate hook
667 	functions.
668 	Must not be called with a cache lock held.
669 */
670 static void
671 flush_pending_notifications()
672 {
673 	MutexLocker _(sCachesLock);
674 
675 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
676 	while (iterator.HasNext()) {
677 		block_cache* cache = iterator.Next();
678 
679 		flush_pending_notifications(cache);
680 	}
681 }
682 
683 
684 /*!	Initializes the \a notification as specified. */
685 static void
686 set_notification(cache_transaction* transaction,
687 	cache_notification &notification, int32 events,
688 	transaction_notification_hook hook, void* data)
689 {
690 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
691 	notification.events_pending = 0;
692 	notification.events = events;
693 	notification.hook = hook;
694 	notification.data = data;
695 	notification.delete_after_event = false;
696 }
697 
698 
699 /*!	Makes sure the notification is deleted. It either deletes it directly,
700 	when possible, or marks it for deletion if the notification is pending.
701 */
702 static void
703 delete_notification(cache_notification* notification)
704 {
705 	MutexLocker locker(sNotificationsLock);
706 
707 	if (notification->events_pending != 0)
708 		notification->delete_after_event = true;
709 	else
710 		delete notification;
711 }
712 
713 
714 /*!	Adds the notification to the pending notifications list, or, if it's
715 	already part of it, updates its events_pending field.
716 	Also marks the notification to be deleted if \a deleteNotification
717 	is \c true.
718 	Triggers the notifier thread to run.
719 */
720 static void
721 add_notification(block_cache* cache, cache_notification* notification,
722 	int32 event, bool deleteNotification)
723 {
724 	if (notification->hook == NULL)
725 		return;
726 
727 	int32 pending = atomic_or(&notification->events_pending, event);
728 	if (pending == 0) {
729 		// not yet part of the notification list
730 		MutexLocker locker(sNotificationsLock);
731 		if (deleteNotification)
732 			notification->delete_after_event = true;
733 		cache->pending_notifications.Add(notification);
734 	} else if (deleteNotification) {
735 		// we might need to delete it ourselves if we're late
736 		delete_notification(notification);
737 	}
738 
739 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
740 		// We're probably still holding some locks that makes rescheduling
741 		// not a good idea at this point.
742 }
743 
744 
745 /*!	Notifies all interested listeners of this transaction about the \a event.
746 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
747 	TRANSACTION_ABORTED), all listeners except those listening to
748 	TRANSACTION_WRITTEN will be removed.
749 */
750 static void
751 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
752 	int32 event)
753 {
754 	T(Action("notify", cache, transaction));
755 
756 	bool isClosing = is_closing_event(event);
757 	bool isWritten = is_written_event(event);
758 
759 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
760 	while (iterator.HasNext()) {
761 		cache_listener* listener = iterator.Next();
762 
763 		bool remove = (isClosing && !is_written_event(listener->events))
764 			|| (isWritten && is_written_event(listener->events));
765 		if (remove)
766 			iterator.Remove();
767 
768 		if ((listener->events & event) != 0)
769 			add_notification(cache, listener, event, remove);
770 		else if (remove)
771 			delete_notification(listener);
772 	}
773 }
774 
775 
776 /*!	Removes and deletes all listeners that are still monitoring this
777 	transaction.
778 */
779 static void
780 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
781 {
782 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
783 	while (iterator.HasNext()) {
784 		cache_listener* listener = iterator.Next();
785 		iterator.Remove();
786 
787 		delete_notification(listener);
788 	}
789 }
790 
791 
792 static status_t
793 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
794 	int32 events, transaction_notification_hook hookFunction, void* data)
795 {
796 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
797 	while (iterator.HasNext()) {
798 		cache_listener* listener = iterator.Next();
799 
800 		if (listener->data == data && listener->hook == hookFunction) {
801 			// this listener already exists, just update it
802 			listener->events |= events;
803 			return B_OK;
804 		}
805 	}
806 
807 	cache_listener* listener = new(std::nothrow) cache_listener;
808 	if (listener == NULL)
809 		return B_NO_MEMORY;
810 
811 	set_notification(transaction, *listener, events, hookFunction, data);
812 	transaction->listeners.Add(listener);
813 	return B_OK;
814 }
815 
816 
817 //	#pragma mark - private transaction
818 
819 
820 cache_transaction::cache_transaction()
821 {
822 	num_blocks = 0;
823 	main_num_blocks = 0;
824 	sub_num_blocks = 0;
825 	first_block = NULL;
826 	open = true;
827 	has_sub_transaction = false;
828 	last_used = system_time();
829 }
830 
831 
832 static int
833 transaction_compare(void* _transaction, const void* _id)
834 {
835 	cache_transaction* transaction = (cache_transaction*)_transaction;
836 	const int32* id = (const int32*)_id;
837 
838 	return transaction->id - *id;
839 }
840 
841 
842 static uint32
843 transaction_hash(void* _transaction, const void* _id, uint32 range)
844 {
845 	cache_transaction* transaction = (cache_transaction*)_transaction;
846 	const int32* id = (const int32*)_id;
847 
848 	if (transaction != NULL)
849 		return transaction->id % range;
850 
851 	return (uint32)*id % range;
852 }
853 
854 
855 static void
856 delete_transaction(block_cache* cache, cache_transaction* transaction)
857 {
858 	if (cache->last_transaction == transaction)
859 		cache->last_transaction = NULL;
860 
861 	remove_transaction_listeners(cache, transaction);
862 	delete transaction;
863 }
864 
865 
866 static cache_transaction*
867 lookup_transaction(block_cache* cache, int32 id)
868 {
869 	return (cache_transaction*)hash_lookup(cache->transaction_hash, &id);
870 }
871 
872 
873 //	#pragma mark - cached_block
874 
875 
876 int
877 compare_blocks(const void* _blockA, const void* _blockB)
878 {
879 	cached_block* blockA = *(cached_block**)_blockA;
880 	cached_block* blockB = *(cached_block**)_blockB;
881 
882 	off_t diff = blockA->block_number - blockB->block_number;
883 	if (diff > 0)
884 		return 1;
885 
886 	return diff < 0 ? -1 : 0;
887 }
888 
889 
890 /*static*/ int
891 cached_block::Compare(void* _cacheEntry, const void* _block)
892 {
893 	cached_block* cacheEntry = (cached_block*)_cacheEntry;
894 	const off_t* block = (const off_t*)_block;
895 
896 	off_t diff = cacheEntry->block_number - *block;
897 	if (diff > 0)
898 		return 1;
899 
900 	return diff < 0 ? -1 : 0;
901 }
902 
903 
904 
905 /*static*/ uint32
906 cached_block::Hash(void* _cacheEntry, const void* _block, uint32 range)
907 {
908 	cached_block* cacheEntry = (cached_block*)_cacheEntry;
909 	const off_t* block = (const off_t*)_block;
910 
911 	if (cacheEntry != NULL)
912 		return cacheEntry->block_number % range;
913 
914 	return (uint64)*block % range;
915 }
916 
917 
918 //	#pragma mark - block_cache
919 
920 
921 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
922 		bool readOnly)
923 	:
924 	hash(NULL),
925 	fd(_fd),
926 	max_blocks(numBlocks),
927 	block_size(blockSize),
928 	next_transaction_id(1),
929 	last_transaction(NULL),
930 	transaction_hash(NULL),
931 	buffer_cache(NULL),
932 	num_dirty_blocks(0),
933 	read_only(readOnly)
934 {
935 }
936 
937 
938 /*! Should be called with the cache's lock held. */
939 block_cache::~block_cache()
940 {
941 	unregister_low_resource_handler(&_LowMemoryHandler, this);
942 
943 	hash_uninit(transaction_hash);
944 	hash_uninit(hash);
945 
946 	delete_object_cache(buffer_cache);
947 
948 	mutex_destroy(&lock);
949 }
950 
951 
952 status_t
953 block_cache::Init()
954 {
955 	condition_variable.Init(this, "cache transaction sync");
956 	mutex_init(&lock, "block cache");
957 
958 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
959 		8, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
960 	if (buffer_cache == NULL)
961 		return B_NO_MEMORY;
962 
963 	cached_block dummyBlock;
964 	hash = hash_init(1024, offset_of_member(dummyBlock, next),
965 		&cached_block::Compare, &cached_block::Hash);
966 	if (hash == NULL)
967 		return B_NO_MEMORY;
968 
969 	cache_transaction dummyTransaction;
970 	transaction_hash = hash_init(16, offset_of_member(dummyTransaction, next),
971 		&transaction_compare, &::transaction_hash);
972 	if (transaction_hash == NULL)
973 		return B_NO_MEMORY;
974 
975 	return register_low_resource_handler(&_LowMemoryHandler, this,
976 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
977 }
978 
979 
980 void
981 block_cache::Free(void* buffer)
982 {
983 	if (buffer != NULL)
984 		object_cache_free(buffer_cache, buffer);
985 }
986 
987 
988 void*
989 block_cache::Allocate()
990 {
991 	return object_cache_alloc(buffer_cache, 0);
992 }
993 
994 
995 void
996 block_cache::FreeBlock(cached_block* block)
997 {
998 	Free(block->current_data);
999 
1000 	if (block->original_data != NULL || block->parent_data != NULL) {
1001 		panic("block_cache::FreeBlock(): %Ld, original %p, parent %p\n",
1002 			block->block_number, block->original_data, block->parent_data);
1003 	}
1004 
1005 #if BLOCK_CACHE_DEBUG_CHANGED
1006 	Free(block->compare);
1007 #endif
1008 
1009 	object_cache_free(sBlockCache, block);
1010 }
1011 
1012 
1013 /*! Allocates a new block for \a blockNumber, ready for use */
1014 cached_block*
1015 block_cache::NewBlock(off_t blockNumber)
1016 {
1017 	cached_block* block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1018 	if (block == NULL) {
1019 		TB(Error(this, blockNumber, "allocation failed"));
1020 		dprintf("block allocation failed, unused list is %sempty.\n",
1021 			unused_blocks.IsEmpty() ? "" : "not ");
1022 
1023 		// allocation failed, try to reuse an unused block
1024 		block = _GetUnusedBlock();
1025 		if (block == NULL) {
1026 			TB(Error(this, blockNumber, "get unused failed"));
1027 			FATAL(("could not allocate block!\n"));
1028 			return NULL;
1029 		}
1030 	}
1031 
1032 	block->current_data = Allocate();
1033 	if (block->current_data == NULL) {
1034 		object_cache_free(sBlockCache, block);
1035 		return NULL;
1036 	}
1037 
1038 	block->block_number = blockNumber;
1039 	block->ref_count = 0;
1040 	block->accessed = 0;
1041 	block->transaction_next = NULL;
1042 	block->transaction = block->previous_transaction = NULL;
1043 	block->original_data = NULL;
1044 	block->parent_data = NULL;
1045 	block->is_dirty = false;
1046 	block->unused = false;
1047 	block->discard = false;
1048 #if BLOCK_CACHE_DEBUG_CHANGED
1049 	block->compare = NULL;
1050 #endif
1051 
1052 	return block;
1053 }
1054 
1055 
1056 void
1057 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
1058 {
1059 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
1060 
1061 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1062 			cached_block* block = iterator.Next();) {
1063 		if (maxAccessed < block->accessed)
1064 			continue;
1065 
1066 		TB(Flush(this, block));
1067 		TRACE(("  remove block %Ld, accessed %ld times\n",
1068 			block->block_number, block->accessed));
1069 
1070 		// this can only happen if no transactions are used
1071 		if (block->is_dirty && !block->discard)
1072 			write_cached_block(this, block, false);
1073 
1074 		// remove block from lists
1075 		iterator.Remove();
1076 		RemoveBlock(block);
1077 
1078 		if (--count <= 0)
1079 			break;
1080 	}
1081 }
1082 
1083 
1084 void
1085 block_cache::RemoveBlock(cached_block* block)
1086 {
1087 	hash_remove(hash, block);
1088 	FreeBlock(block);
1089 }
1090 
1091 
1092 /*!	Discards the block from a transaction (this method must not be called
1093 	for blocks not part of a transaction).
1094 */
1095 void
1096 block_cache::DiscardBlock(cached_block* block)
1097 {
1098 	ASSERT(block->discard);
1099 
1100 	if (block->parent_data != NULL && block->parent_data != block->current_data)
1101 		Free(block->parent_data);
1102 
1103 	block->parent_data = NULL;
1104 
1105 	if (block->original_data != NULL) {
1106 		Free(block->original_data);
1107 		block->original_data = NULL;
1108 	}
1109 
1110 	RemoveBlock(block);
1111 }
1112 
1113 
1114 void
1115 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1116 {
1117 	block_cache* cache = (block_cache*)data;
1118 	MutexLocker locker(&cache->lock);
1119 
1120 	if (!locker.IsLocked()) {
1121 		// If our block_cache were deleted, it could be that we had
1122 		// been called before that deletion went through, therefore,
1123 		// acquiring its lock might fail.
1124 		return;
1125 	}
1126 
1127 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
1128 
1129 	// free some blocks according to the low memory state
1130 	// (if there is enough memory left, we don't free any)
1131 
1132 	int32 free = 1;
1133 	int32 accessed = 1;
1134 	switch (low_resource_state(
1135 			B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)) {
1136 		case B_NO_LOW_RESOURCE:
1137 			return;
1138 		case B_LOW_RESOURCE_NOTE:
1139 			free = 50;
1140 			accessed = 2;
1141 			break;
1142 		case B_LOW_RESOURCE_WARNING:
1143 			free = 200;
1144 			accessed = 10;
1145 			break;
1146 		case B_LOW_RESOURCE_CRITICAL:
1147 			free = LONG_MAX;
1148 			accessed = LONG_MAX;
1149 			break;
1150 	}
1151 
1152 	cache->RemoveUnusedBlocks(accessed, free);
1153 }
1154 
1155 
1156 cached_block*
1157 block_cache::_GetUnusedBlock()
1158 {
1159 	TRACE(("block_cache: get unused block\n"));
1160 
1161 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1162 			cached_block* block = iterator.Next();) {
1163 		TB(Flush(this, block, true));
1164 		// this can only happen if no transactions are used
1165 		if (block->is_dirty)
1166 			write_cached_block(this, block, false);
1167 
1168 		// remove block from lists
1169 		iterator.Remove();
1170 		hash_remove(hash, block);
1171 
1172 		// TODO: see if parent/compare data is handled correctly here!
1173 		if (block->parent_data != NULL
1174 			&& block->parent_data != block->original_data)
1175 			Free(block->parent_data);
1176 		if (block->original_data != NULL)
1177 			Free(block->original_data);
1178 
1179 		return block;
1180 	}
1181 
1182 	return NULL;
1183 }
1184 
1185 
1186 //	#pragma mark - private block functions
1187 
1188 
1189 /*!	Removes a reference from the specified \a block. If this was the last
1190 	reference, the block is moved into the unused list.
1191 	In low memory situations, it will also free some blocks from that list,
1192 	but not necessarily the \a block it just released.
1193 */
1194 static void
1195 put_cached_block(block_cache* cache, cached_block* block)
1196 {
1197 #if BLOCK_CACHE_DEBUG_CHANGED
1198 	if (!block->is_dirty && block->compare != NULL
1199 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1200 		dprintf("new block:\n");
1201 		dump_block((const char*)block->current_data, 256, "  ");
1202 		dprintf("unchanged block:\n");
1203 		dump_block((const char*)block->compare, 256, "  ");
1204 		write_cached_block(cache, block);
1205 		panic("block_cache: supposed to be clean block was changed!\n");
1206 
1207 		cache->Free(block->compare);
1208 		block->compare = NULL;
1209 	}
1210 #endif
1211 	TB(Put(cache, block));
1212 
1213 	if (block->ref_count < 1) {
1214 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1215 		return;
1216 	}
1217 
1218 	if (--block->ref_count == 0
1219 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1220 		// This block is not used anymore, and not part of any transaction
1221 		if (block->discard) {
1222 			cache->RemoveBlock(block);
1223 		} else {
1224 			// put this block in the list of unused blocks
1225 			block->unused = true;
1226 			ASSERT(block->original_data == NULL
1227 				&& block->parent_data == NULL);
1228 			cache->unused_blocks.Add(block);
1229 		}
1230 	}
1231 
1232 	// Free some blocks according to the low memory state
1233 	// (if there is enough memory left, we don't free any)
1234 
1235 	int32 free = 1;
1236 	switch (low_resource_state(
1237 			B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)) {
1238 		case B_NO_LOW_RESOURCE:
1239 			return;
1240 		case B_LOW_RESOURCE_NOTE:
1241 			free = 1;
1242 			break;
1243 		case B_LOW_RESOURCE_WARNING:
1244 			free = 5;
1245 			break;
1246 		case B_LOW_RESOURCE_CRITICAL:
1247 			free = 20;
1248 			break;
1249 	}
1250 
1251 	cache->RemoveUnusedBlocks(LONG_MAX, free);
1252 }
1253 
1254 
1255 static void
1256 put_cached_block(block_cache* cache, off_t blockNumber)
1257 {
1258 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1259 		panic("put_cached_block: invalid block number %lld (max %lld)",
1260 			blockNumber, cache->max_blocks - 1);
1261 	}
1262 
1263 	cached_block* block = (cached_block*)hash_lookup(cache->hash, &blockNumber);
1264 	if (block != NULL)
1265 		put_cached_block(cache, block);
1266 	else {
1267 		TB(Error(cache, blockNumber, "put unknown"));
1268 	}
1269 }
1270 
1271 
1272 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1273 	there, or reads it from the disk.
1274 
1275 	\param _allocated tells you wether or not a new block has been allocated
1276 		to satisfy your request.
1277 	\param readBlock if \c false, the block will not be read in case it was
1278 		not already in the cache. The block you retrieve may contain random
1279 		data.
1280 */
1281 static cached_block*
1282 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1283 	bool readBlock = true)
1284 {
1285 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1286 		panic("get_cached_block: invalid block number %lld (max %lld)",
1287 			blockNumber, cache->max_blocks - 1);
1288 		return NULL;
1289 	}
1290 
1291 	cached_block* block = (cached_block*)hash_lookup(cache->hash,
1292 		&blockNumber);
1293 	*_allocated = false;
1294 
1295 	if (block == NULL) {
1296 		// read block into cache
1297 		block = cache->NewBlock(blockNumber);
1298 		if (block == NULL)
1299 			return NULL;
1300 
1301 		hash_insert_grow(cache->hash, block);
1302 		*_allocated = true;
1303 	}
1304 
1305 	if (*_allocated && readBlock) {
1306 		int32 blockSize = cache->block_size;
1307 
1308 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1309 			block->current_data, blockSize);
1310 		if (bytesRead < blockSize) {
1311 			cache->RemoveBlock(block);
1312 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1313 
1314 			FATAL(("could not read block %Ld: bytesRead: %ld, error: %s\n",
1315 				blockNumber, bytesRead, strerror(errno)));
1316 			return NULL;
1317 		}
1318 		TB(Read(cache, block));
1319 	}
1320 
1321 	if (block->unused) {
1322 		//TRACE(("remove block %Ld from unused\n", blockNumber));
1323 		block->unused = false;
1324 		cache->unused_blocks.Remove(block);
1325 	}
1326 
1327 	block->ref_count++;
1328 	block->accessed++;
1329 
1330 	return block;
1331 }
1332 
1333 
1334 /*!	Returns the writable block data for the requested blockNumber.
1335 	If \a cleared is true, the block is not read from disk; an empty block
1336 	is returned.
1337 
1338 	This is the only method to insert a block into a transaction. It makes
1339 	sure that the previous block contents are preserved in that case.
1340 */
1341 static void*
1342 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1343 	off_t length, int32 transactionID, bool cleared)
1344 {
1345 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
1346 		blockNumber, transactionID));
1347 
1348 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1349 		panic("get_writable_cached_block: invalid block number %lld (max %lld)",
1350 			blockNumber, cache->max_blocks - 1);
1351 	}
1352 
1353 	bool allocated;
1354 	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1355 		!cleared);
1356 	if (block == NULL)
1357 		return NULL;
1358 
1359 	block->discard = false;
1360 
1361 	// if there is no transaction support, we just return the current block
1362 	if (transactionID == -1) {
1363 		if (cleared)
1364 			memset(block->current_data, 0, cache->block_size);
1365 
1366 		if (!block->is_dirty) {
1367 			cache->num_dirty_blocks++;
1368 			block->is_dirty = true;
1369 				// mark the block as dirty
1370 		}
1371 
1372 		TB(Get(cache, block));
1373 		return block->current_data;
1374 	}
1375 
1376 	cache_transaction* transaction = block->transaction;
1377 
1378 	if (transaction != NULL && transaction->id != transactionID) {
1379 		// TODO: we have to wait here until the other transaction is done.
1380 		//	Maybe we should even panic, since we can't prevent any deadlocks.
1381 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
1382 		put_cached_block(cache, block);
1383 		return NULL;
1384 	}
1385 	if (transaction == NULL && transactionID != -1) {
1386 		// get new transaction
1387 		transaction = lookup_transaction(cache, transactionID);
1388 		if (transaction == NULL) {
1389 			panic("get_writable_cached_block(): invalid transaction %ld!\n",
1390 				transactionID);
1391 			put_cached_block(cache, block);
1392 			return NULL;
1393 		}
1394 		if (!transaction->open) {
1395 			panic("get_writable_cached_block(): transaction already done!\n");
1396 			put_cached_block(cache, block);
1397 			return NULL;
1398 		}
1399 
1400 		block->transaction = transaction;
1401 
1402 		// attach the block to the transaction block list
1403 		block->transaction_next = transaction->first_block;
1404 		transaction->first_block = block;
1405 		transaction->num_blocks++;
1406 	}
1407 	if (transaction != NULL)
1408 		transaction->last_used = system_time();
1409 
1410 	bool wasUnchanged = block->original_data == NULL
1411 		|| block->previous_transaction != NULL;
1412 
1413 	if (!(allocated && cleared) && block->original_data == NULL) {
1414 		// we already have data, so we need to preserve it
1415 		block->original_data = cache->Allocate();
1416 		if (block->original_data == NULL) {
1417 			TB(Error(cache, blockNumber, "allocate original failed"));
1418 			FATAL(("could not allocate original_data\n"));
1419 			put_cached_block(cache, block);
1420 			return NULL;
1421 		}
1422 
1423 		memcpy(block->original_data, block->current_data, cache->block_size);
1424 	}
1425 	if (block->parent_data == block->current_data) {
1426 		// remember any previous contents for the parent transaction
1427 		block->parent_data = cache->Allocate();
1428 		if (block->parent_data == NULL) {
1429 			// TODO: maybe we should just continue the current transaction in this case...
1430 			TB(Error(cache, blockNumber, "allocate parent failed"));
1431 			FATAL(("could not allocate parent\n"));
1432 			put_cached_block(cache, block);
1433 			return NULL;
1434 		}
1435 
1436 		memcpy(block->parent_data, block->current_data, cache->block_size);
1437 		transaction->sub_num_blocks++;
1438 	} else if (transaction != NULL && transaction->has_sub_transaction
1439 		&& block->parent_data == NULL && wasUnchanged)
1440 		transaction->sub_num_blocks++;
1441 
1442 	if (cleared)
1443 		memset(block->current_data, 0, cache->block_size);
1444 
1445 	block->is_dirty = true;
1446 	TB(Get(cache, block));
1447 	TB2(BlockData(cache, block, "get writable"));
1448 
1449 	return block->current_data;
1450 }
1451 
1452 
1453 /*!	Writes the specified \a block back to disk. It will always only write back
1454 	the oldest change of the block if it is part of more than one transaction.
1455 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1456 	delete transactions when they are no longer used, and \a deleteTransaction
1457 	is \c true.
1458 */
1459 static status_t
1460 write_cached_block(block_cache* cache, cached_block* block,
1461 	bool deleteTransaction)
1462 {
1463 	cache_transaction* previous = block->previous_transaction;
1464 	int32 blockSize = cache->block_size;
1465 
1466 	void* data = previous && block->original_data
1467 		? block->original_data : block->current_data;
1468 		// we first need to write back changes from previous transactions
1469 
1470 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
1471 	TB(Write(cache, block));
1472 	TB2(BlockData(cache, block, "before write"));
1473 
1474 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize,
1475 		data, blockSize);
1476 
1477 	if (written < blockSize) {
1478 		TB(Error(cache, block->block_number, "write failed", written));
1479 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
1480 			strerror(errno)));
1481 		return B_IO_ERROR;
1482 	}
1483 
1484 	if (cache->num_dirty_blocks > 0)
1485 		cache->num_dirty_blocks--;
1486 
1487 	if (data == block->current_data)
1488 		block->is_dirty = false;
1489 
1490 	if (previous != NULL) {
1491 		previous->blocks.Remove(block);
1492 		block->previous_transaction = NULL;
1493 
1494 		if (block->original_data != NULL && block->transaction == NULL) {
1495 			// This block is not part of a transaction, so it does not need
1496 			// its original pointer anymore.
1497 			cache->Free(block->original_data);
1498 			block->original_data = NULL;
1499 		}
1500 
1501 		// Has the previous transation been finished with that write?
1502 		if (--previous->num_blocks == 0) {
1503 			TRACE(("cache transaction %ld finished!\n", previous->id));
1504 			T(Action("written", cache, previous));
1505 
1506 			notify_transaction_listeners(cache, previous, TRANSACTION_WRITTEN);
1507 
1508 			if (deleteTransaction) {
1509 				hash_remove(cache->transaction_hash, previous);
1510 				delete_transaction(cache, previous);
1511 			}
1512 		}
1513 	}
1514 	if (block->transaction == NULL && block->ref_count == 0) {
1515 		// the block is no longer used
1516 		block->unused = true;
1517 		cache->unused_blocks.Add(block);
1518 	}
1519 
1520 	TB2(BlockData(cache, block, "after write"));
1521 	return B_OK;
1522 }
1523 
1524 
1525 #if DEBUG_BLOCK_CACHE
1526 
1527 static void
1528 dump_block(cached_block* block)
1529 {
1530 	kprintf("%08lx %9Ld %08lx %08lx %08lx %5ld %6ld %c%c%c%c%c %08lx %08lx\n",
1531 		(addr_t)block, block->block_number,
1532 		(addr_t)block->current_data, (addr_t)block->original_data,
1533 		(addr_t)block->parent_data, block->ref_count, block->accessed,
1534 		block->busy ? 'B' : '-', block->is_writing ? 'W' : '-',
1535 		block->is_dirty ? 'D' : '-', block->unused ? 'U' : '-',
1536 		block->discard ? 'D' : '-',
1537 		(addr_t)block->transaction,
1538 		(addr_t)block->previous_transaction);
1539 }
1540 
1541 
1542 static int
1543 dump_cache(int argc, char** argv)
1544 {
1545 	bool showTransactions = false;
1546 	bool showBlocks = false;
1547 	int32 i = 1;
1548 	while (argv[i] != NULL && argv[i][0] == '-') {
1549 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
1550 			switch (arg[0]) {
1551 				case 'b':
1552 					showBlocks = true;
1553 					break;
1554 				case 't':
1555 					showTransactions = true;
1556 					break;
1557 				default:
1558 					print_debugger_command_usage(argv[0]);
1559 					return 0;
1560 			}
1561 		}
1562 		i++;
1563 	}
1564 
1565 	if (i >= argc) {
1566 		print_debugger_command_usage(argv[0]);
1567 		return 0;
1568 	}
1569 
1570 	block_cache* cache = (struct block_cache*)parse_expression(argv[i]);
1571 	if (cache == NULL) {
1572 		kprintf("invalid cache address\n");
1573 		return 0;
1574 	}
1575 
1576 	off_t blockNumber = -1;
1577 	if (i + 1 < argc) {
1578 		blockNumber = parse_expression(argv[i + 1]);
1579 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
1580 			&blockNumber);
1581 		if (block != NULL) {
1582 			kprintf("BLOCK %p\n", block);
1583 			kprintf(" current data:  %p\n", block->current_data);
1584 			kprintf(" original data: %p\n", block->original_data);
1585 			kprintf(" parent data:   %p\n", block->parent_data);
1586 			kprintf(" ref_count:     %ld\n", block->ref_count);
1587 			kprintf(" accessed:      %ld\n", block->accessed);
1588 			kprintf(" flags:        ");
1589 			if (block->is_writing)
1590 				kprintf(" is-writing");
1591 			if (block->is_dirty)
1592 				kprintf(" is-dirty");
1593 			if (block->unused)
1594 				kprintf(" unused");
1595 			if (block->discard)
1596 				kprintf(" discard");
1597 			kprintf("\n");
1598 			if (block->transaction != NULL) {
1599 				kprintf(" transaction:   %p (%ld)\n", block->transaction,
1600 					block->transaction->id);
1601 				if (block->transaction_next != NULL) {
1602 					kprintf(" next in transaction: %Ld\n",
1603 						block->transaction_next->block_number);
1604 				}
1605 			}
1606 			if (block->previous_transaction != NULL) {
1607 				kprintf(" previous transaction: %p (%ld)\n",
1608 					block->previous_transaction,
1609 					block->previous_transaction->id);
1610 			}
1611 
1612 			set_debug_variable("_current", (addr_t)block->current_data);
1613 			set_debug_variable("_original", (addr_t)block->original_data);
1614 			set_debug_variable("_parent", (addr_t)block->parent_data);
1615 		} else
1616 			kprintf("block %Ld not found\n", blockNumber);
1617 		return 0;
1618 	}
1619 
1620 	kprintf("BLOCK CACHE: %p\n", cache);
1621 
1622 	kprintf(" fd:         %d\n", cache->fd);
1623 	kprintf(" max_blocks: %Ld\n", cache->max_blocks);
1624 	kprintf(" block_size: %lu\n", cache->block_size);
1625 	kprintf(" next_transaction_id: %ld\n", cache->next_transaction_id);
1626 
1627 	if (!cache->pending_notifications.IsEmpty()) {
1628 		kprintf(" pending notifications:\n");
1629 
1630 		NotificationList::Iterator iterator
1631 			= cache->pending_notifications.GetIterator();
1632 		while (iterator.HasNext()) {
1633 			cache_notification* notification = iterator.Next();
1634 
1635 			kprintf("  %p %5lx %p - %p\n", notification,
1636 				notification->events_pending, notification->hook,
1637 				notification->data);
1638 		}
1639 	}
1640 
1641 	if (showTransactions) {
1642 		kprintf(" transactions:\n");
1643 		kprintf("address       id state  blocks  main   sub\n");
1644 
1645 		hash_iterator iterator;
1646 		hash_open(cache->transaction_hash, &iterator);
1647 
1648 		cache_transaction* transaction;
1649 		while ((transaction = (cache_transaction*)hash_next(
1650 				cache->transaction_hash, &iterator)) != NULL) {
1651 			kprintf("%p %5ld %-7s %5ld %5ld %5ld\n", transaction,
1652 				transaction->id, transaction->open ? "open" : "closed",
1653 				transaction->num_blocks, transaction->main_num_blocks,
1654 				transaction->sub_num_blocks);
1655 		}
1656 	}
1657 
1658 	if (showBlocks) {
1659 		kprintf(" blocks:\n");
1660 		kprintf("address  block no. current  original parent    refs access "
1661 			"flags transact prev. trans\n");
1662 	}
1663 
1664 	uint32 referenced = 0;
1665 	uint32 count = 0;
1666 	uint32 dirty = 0;
1667 	uint32 discarded = 0;
1668 	hash_iterator iterator;
1669 	hash_open(cache->hash, &iterator);
1670 	cached_block* block;
1671 	while ((block = (cached_block*)hash_next(cache->hash, &iterator)) != NULL) {
1672 		if (showBlocks)
1673 			dump_block(block);
1674 
1675 		if (block->is_dirty)
1676 			dirty++;
1677 		if (block->discard)
1678 			discarded++;
1679 		if (block->ref_count)
1680 			referenced++;
1681 		count++;
1682 	}
1683 
1684 	kprintf(" %ld blocks total, %ld dirty, %ld discarded, %ld referenced, %ld "
1685 		"in unused.\n", count, dirty, discarded, referenced,
1686 		cache->unused_blocks.Count());
1687 
1688 	hash_close(cache->hash, &iterator, false);
1689 	return 0;
1690 }
1691 
1692 
1693 static int
1694 dump_transaction(int argc, char** argv)
1695 {
1696 	bool showBlocks = false;
1697 	int i = 1;
1698 	if (argc > 1 && !strcmp(argv[1], "-b")) {
1699 		showBlocks = true;
1700 		i++;
1701 	}
1702 
1703 	if (argc - i < 1 || argc - i > 2) {
1704 		print_debugger_command_usage(argv[0]);
1705 		return 0;
1706 	}
1707 
1708 	cache_transaction* transaction = NULL;
1709 
1710 	if (argc - i == 1) {
1711 		transaction = (cache_transaction*)parse_expression(argv[i]);
1712 	} else {
1713 		block_cache* cache = (block_cache*)parse_expression(argv[i]);
1714 		int32 id = parse_expression(argv[i + 1]);
1715 		transaction = lookup_transaction(cache, id);
1716 		if (transaction == NULL) {
1717 			kprintf("No transaction with ID %ld found.\n", id);
1718 			return 0;
1719 		}
1720 	}
1721 
1722 	kprintf("TRANSACTION %p\n", transaction);
1723 
1724 	kprintf(" id:             %ld\n", transaction->id);
1725 	kprintf(" num block:      %ld\n", transaction->num_blocks);
1726 	kprintf(" main num block: %ld\n", transaction->main_num_blocks);
1727 	kprintf(" sub num block:  %ld\n", transaction->sub_num_blocks);
1728 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
1729 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
1730 	kprintf(" idle:           %Ld secs\n",
1731 		(system_time() - transaction->last_used) / 1000000);
1732 
1733 	kprintf(" listeners:\n");
1734 
1735 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
1736 	while (iterator.HasNext()) {
1737 		cache_listener* listener = iterator.Next();
1738 
1739 		kprintf("  %p %5lx %p - %p\n", listener, listener->events_pending,
1740 			listener->hook, listener->data);
1741 	}
1742 
1743 	if (!showBlocks)
1744 		return 0;
1745 
1746 	kprintf(" blocks:\n");
1747 	kprintf("address  block no. current  original parent    refs access "
1748 		"flags transact prev. trans\n");
1749 
1750 	cached_block* block = transaction->first_block;
1751 	while (block != NULL) {
1752 		dump_block(block);
1753 		block = block->transaction_next;
1754 	}
1755 
1756 	kprintf("--\n");
1757 
1758 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1759 	while (blockIterator.HasNext()) {
1760 		block = blockIterator.Next();
1761 		dump_block(block);
1762 	}
1763 
1764 	return 0;
1765 }
1766 
1767 
1768 static int
1769 dump_caches(int argc, char** argv)
1770 {
1771 	kprintf("Block caches:\n");
1772 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
1773 	while (i.HasNext()) {
1774 		block_cache* cache = i.Next();
1775 		if (cache == (block_cache*)&sMarkCache)
1776 			continue;
1777 
1778 		kprintf("  %p\n", cache);
1779 	}
1780 
1781 	return 0;
1782 }
1783 
1784 
1785 #if BLOCK_CACHE_BLOCK_TRACING >= 2
1786 static int
1787 dump_block_data(int argc, char** argv)
1788 {
1789 	using namespace BlockTracing;
1790 
1791 	// Determine which blocks to show
1792 
1793 	bool printStackTrace = true;
1794 	uint32 which = 0;
1795 	int32 i = 1;
1796 	while (i < argc && argv[i][0] == '-') {
1797 		char* arg = &argv[i][1];
1798 		while (arg[0]) {
1799 			switch (arg[0]) {
1800 				case 'c':
1801 					which |= BlockData::kCurrent;
1802 					break;
1803 				case 'p':
1804 					which |= BlockData::kParent;
1805 					break;
1806 				case 'o':
1807 					which |= BlockData::kOriginal;
1808 					break;
1809 
1810 				default:
1811 					kprintf("invalid block specifier (only o/c/p are "
1812 						"allowed).\n");
1813 					return 0;
1814 			}
1815 			arg++;
1816 		}
1817 
1818 		i++;
1819 	}
1820 	if (which == 0)
1821 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
1822 
1823 	if (i == argc) {
1824 		print_debugger_command_usage(argv[0]);
1825 		return 0;
1826 	}
1827 
1828 	// Get the range of blocks to print
1829 
1830 	int64 from = parse_expression(argv[i]);
1831 	int64 to = from;
1832 	if (argc > i + 1)
1833 		to = parse_expression(argv[i + 1]);
1834 	if (to < from)
1835 		to = from;
1836 
1837 	uint32 offset = 0;
1838 	uint32 size = LONG_MAX;
1839 	if (argc > i + 2)
1840 		offset = parse_expression(argv[i + 2]);
1841 	if (argc > i + 3)
1842 		size = parse_expression(argv[i + 3]);
1843 
1844 	TraceEntryIterator iterator;
1845 	iterator.MoveTo(from - 1);
1846 
1847 	static char sBuffer[1024];
1848 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
1849 
1850 	while (TraceEntry* entry = iterator.Next()) {
1851 		int32 index = iterator.Index();
1852 		if (index > to)
1853 			break;
1854 
1855 		Action* action = dynamic_cast<Action*>(entry);
1856 		if (action != NULL) {
1857 			out.Clear();
1858 			out.DumpEntry(action);
1859 			continue;
1860 		}
1861 
1862 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
1863 		if (blockData == NULL)
1864 			continue;
1865 
1866 		out.Clear();
1867 
1868 		const char* dump = out.DumpEntry(entry);
1869 		int length = strlen(dump);
1870 		if (length > 0 && dump[length - 1] == '\n')
1871 			length--;
1872 
1873 		kprintf("%5ld. %.*s\n", index, length, dump);
1874 
1875 		if (printStackTrace) {
1876 			out.Clear();
1877 			entry->DumpStackTrace(out);
1878 			if (out.Size() > 0)
1879 				kputs(out.Buffer());
1880 		}
1881 
1882 		blockData->DumpBlocks(which, offset, size);
1883 	}
1884 
1885 	return 0;
1886 }
1887 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
1888 
1889 #endif	// DEBUG_BLOCK_CACHE
1890 
1891 
1892 /*!	Traverses through the block_cache list, and returns one cache after the
1893 	other. The cache returned is automatically locked when you get it, and
1894 	unlocked with the next call to this function. Ignores caches that are in
1895 	deletion state.
1896 	Returns \c NULL when the end of the list is reached.
1897 */
1898 static block_cache*
1899 get_next_locked_block_cache(block_cache* last)
1900 {
1901 	MutexLocker _(sCachesLock);
1902 
1903 	block_cache* cache;
1904 	if (last != NULL) {
1905 		mutex_unlock(&last->lock);
1906 
1907 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
1908 		sCaches.Remove((block_cache*)&sMarkCache);
1909 	} else
1910 		cache = sCaches.Head();
1911 
1912 	if (cache != NULL) {
1913 		mutex_lock(&cache->lock);
1914 		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
1915 	}
1916 
1917 	return cache;
1918 }
1919 
1920 
1921 /*!	Background thread that continuously checks for pending notifications of
1922 	all caches.
1923 	Every two seconds, it will also write back up to 64 blocks per cache.
1924 */
1925 static status_t
1926 block_notifier_and_writer(void* /*data*/)
1927 {
1928 	const bigtime_t kTimeout = 2000000LL;
1929 	bigtime_t timeout = kTimeout;
1930 
1931 	while (true) {
1932 		bigtime_t start = system_time();
1933 
1934 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
1935 			B_RELATIVE_TIMEOUT, timeout);
1936 		if (status == B_OK) {
1937 			flush_pending_notifications();
1938 			timeout -= system_time() - start;
1939 			continue;
1940 		}
1941 
1942 		// write 64 blocks of each block_cache every two seconds
1943 		// TODO: change this once we have an I/O scheduler
1944 		timeout = kTimeout;
1945 		size_t usedMemory = 0;
1946 
1947 		block_cache* cache = NULL;
1948 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
1949 			const uint32 kMaxCount = 64;
1950 			cached_block* blocks[kMaxCount];
1951 			uint32 count = 0;
1952 
1953 			size_t cacheUsedMemory;
1954 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
1955 			usedMemory += cacheUsedMemory;
1956 
1957 			if (cache->num_dirty_blocks) {
1958 				// This cache is not using transactions, we'll scan the blocks
1959 				// directly
1960 				hash_iterator iterator;
1961 				hash_open(cache->hash, &iterator);
1962 
1963 				cached_block* block;
1964 				while (count < kMaxCount
1965 					&& (block = (cached_block*)hash_next(cache->hash,
1966 							&iterator)) != NULL) {
1967 					if (block->is_dirty)
1968 						blocks[count++] = block;
1969 				}
1970 
1971 				hash_close(cache->hash, &iterator, false);
1972 			} else {
1973 				hash_iterator iterator;
1974 				hash_open(cache->transaction_hash, &iterator);
1975 
1976 				cache_transaction* transaction;
1977 				while ((transaction = (cache_transaction*)hash_next(
1978 						cache->transaction_hash, &iterator)) != NULL
1979 					&& count < kMaxCount) {
1980 					if (transaction->open) {
1981 						if (system_time() > transaction->last_used
1982 								+ kTransactionIdleTime) {
1983 							// Transaction is open but idle
1984 							notify_transaction_listeners(cache, transaction,
1985 								TRANSACTION_IDLE);
1986 						}
1987 						continue;
1988 					}
1989 
1990 					// sort blocks to speed up writing them back
1991 					// TODO: ideally, this should be handled by the I/O scheduler
1992 					block_list::Iterator iterator
1993 						= transaction->blocks.GetIterator();
1994 
1995 					for (; count < kMaxCount && iterator.HasNext(); count++) {
1996 						blocks[count] = iterator.Next();
1997 					}
1998 				}
1999 
2000 				hash_close(cache->transaction_hash, &iterator, false);
2001 			}
2002 
2003 			qsort(blocks, count, sizeof(void*), &compare_blocks);
2004 
2005 			for (uint32 i = 0; i < count; i++) {
2006 				if (write_cached_block(cache, blocks[i], true) != B_OK)
2007 					break;
2008 			}
2009 		}
2010 
2011 		MutexLocker _(sCachesMemoryUseLock);
2012 		sUsedMemory = usedMemory;
2013 	}
2014 
2015 	// never can get here
2016 	return B_OK;
2017 }
2018 
2019 
2020 /*!	Notify function for wait_for_notifications(). */
2021 static void
2022 notify_sync(int32 transactionID, int32 event, void* _cache)
2023 {
2024 	block_cache* cache = (block_cache*)_cache;
2025 
2026 	cache->condition_variable.NotifyOne();
2027 }
2028 
2029 
2030 /*!	Must be called with the sCachesLock held. */
2031 static bool
2032 is_valid_cache(block_cache* cache)
2033 {
2034 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2035 
2036 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2037 	while (iterator.HasNext()) {
2038 		if (cache == iterator.Next())
2039 			return true;
2040 	}
2041 
2042 	return false;
2043 }
2044 
2045 
2046 /*!	Waits until all pending notifications are carried out.
2047 	Safe to be called from the block writer/notifier thread.
2048 	You must not hold the \a cache lock when calling this function.
2049 */
2050 static void
2051 wait_for_notifications(block_cache* cache)
2052 {
2053 	MutexLocker locker(sCachesLock);
2054 
2055 	if (find_thread(NULL) == sNotifierWriterThread) {
2056 		// We're the notifier thread, don't wait, but flush all pending
2057 		// notifications directly.
2058 		if (is_valid_cache(cache))
2059 			flush_pending_notifications(cache);
2060 		return;
2061 	}
2062 
2063 	// add sync notification
2064 	cache_notification notification;
2065 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2066 		cache);
2067 
2068 	ConditionVariableEntry entry;
2069 	cache->condition_variable.Add(&entry);
2070 
2071 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2072 	locker.Unlock();
2073 
2074 	// wait for notification hook to be called
2075 	entry.Wait();
2076 
2077 	ASSERT(notification.GetDoublyLinkedListLink()->next == NULL
2078 		&& notification.GetDoublyLinkedListLink()->previous == NULL
2079 		&& cache->pending_notifications.Head() != &notification);
2080 }
2081 
2082 
2083 status_t
2084 block_cache_init(void)
2085 {
2086 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2087 		8, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2088 	if (sBlockCache == NULL)
2089 		return B_NO_MEMORY;
2090 
2091 	new (&sCaches) DoublyLinkedList<block_cache>;
2092 		// manually call constructor
2093 
2094 	sEventSemaphore = create_sem(0, "block cache event");
2095 	if (sEventSemaphore < B_OK)
2096 		return sEventSemaphore;
2097 
2098 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2099 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2100 	if (sNotifierWriterThread >= B_OK)
2101 		send_signal_etc(sNotifierWriterThread, SIGCONT, B_DO_NOT_RESCHEDULE);
2102 
2103 #if DEBUG_BLOCK_CACHE
2104 	add_debugger_command_etc("block_caches", &dump_caches,
2105 		"dumps all block caches", "\n", 0);
2106 	add_debugger_command_etc("block_cache", &dump_cache,
2107 		"dumps a specific block cache",
2108 		"[-bt] <cache-address> [block-number]\n"
2109 		"  -t lists the transactions\n"
2110 		"  -b lists all blocks\n", 0);
2111 	add_debugger_command_etc("transaction", &dump_transaction,
2112 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2113 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2114 		"  -b lists all blocks that are part of this transaction\n", 0);
2115 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2116 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2117 		"dumps the data blocks logged for the actions",
2118 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2119 		"If no data specifier is used, all blocks are shown by default.\n"
2120 		" -c       the current data is shown, if available.\n"
2121 		" -p       the parent data is shown, if available.\n"
2122 		" -o       the original data is shown, if available.\n"
2123 		" <from>   first index of tracing entries to show.\n"
2124 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2125 		" <offset> the offset of the block data.\n"
2126 		" <from>   the size of the block data that is dumped\n", 0);
2127 #	endif
2128 #endif	// DEBUG_BLOCK_CACHE
2129 
2130 	return B_OK;
2131 }
2132 
2133 
2134 size_t
2135 block_cache_used_memory(void)
2136 {
2137 	MutexLocker _(sCachesMemoryUseLock);
2138 	return sUsedMemory;
2139 }
2140 
2141 
2142 //	#pragma mark - public transaction API
2143 
2144 
2145 int32
2146 cache_start_transaction(void* _cache)
2147 {
2148 	block_cache* cache = (block_cache*)_cache;
2149 	MutexLocker locker(&cache->lock);
2150 
2151 	if (cache->last_transaction && cache->last_transaction->open) {
2152 		panic("last transaction (%ld) still open!\n",
2153 			cache->last_transaction->id);
2154 	}
2155 
2156 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2157 	if (transaction == NULL)
2158 		return B_NO_MEMORY;
2159 
2160 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2161 	cache->last_transaction = transaction;
2162 
2163 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
2164 	T(Action("start", cache, transaction));
2165 
2166 	hash_insert_grow(cache->transaction_hash, transaction);
2167 
2168 	return transaction->id;
2169 }
2170 
2171 
2172 status_t
2173 cache_sync_transaction(void* _cache, int32 id)
2174 {
2175 	block_cache* cache = (block_cache*)_cache;
2176 	MutexLocker locker(&cache->lock);
2177 	status_t status = B_ENTRY_NOT_FOUND;
2178 
2179 	TRACE(("cache_sync_transaction(id %ld)\n", id));
2180 
2181 	hash_iterator iterator;
2182 	hash_open(cache->transaction_hash, &iterator);
2183 
2184 	cache_transaction* transaction;
2185 	while ((transaction = (cache_transaction*)hash_next(
2186 			cache->transaction_hash, &iterator)) != NULL) {
2187 		// close all earlier transactions which haven't been closed yet
2188 
2189 		if (transaction->id <= id && !transaction->open) {
2190 			// write back all of their remaining dirty blocks
2191 			T(Action("sync", cache, transaction));
2192 			while (transaction->num_blocks > 0) {
2193 				// sort blocks to speed up writing them back
2194 				// TODO: this should be handled by the I/O scheduler
2195 				block_list::Iterator iterator
2196 					= transaction->blocks.GetIterator();
2197 				uint32 maxCount = transaction->num_blocks;
2198 				cached_block* buffer[16];
2199 				cached_block** blocks = (cached_block**)malloc(maxCount
2200 					* sizeof(void*));
2201 				if (blocks == NULL) {
2202 					maxCount = 16;
2203 					blocks = buffer;
2204 				}
2205 
2206 				uint32 count = 0;
2207 				for (; count < maxCount && iterator.HasNext(); count++) {
2208 					blocks[count] = iterator.Next();
2209 				}
2210 				qsort(blocks, count, sizeof(void*), &compare_blocks);
2211 
2212 				for (uint32 i = 0; i < count; i++) {
2213 					status = write_cached_block(cache, blocks[i], false);
2214 					if (status != B_OK)
2215 						break;
2216 				}
2217 
2218 				if (blocks != buffer)
2219 					free(blocks);
2220 
2221 				if (status != B_OK)
2222 					return status;
2223 			}
2224 
2225 			hash_remove_current(cache->transaction_hash, &iterator);
2226 			delete_transaction(cache, transaction);
2227 		}
2228 	}
2229 
2230 	hash_close(cache->transaction_hash, &iterator, false);
2231 	locker.Unlock();
2232 
2233 	wait_for_notifications(cache);
2234 		// make sure that all pending TRANSACTION_WRITTEN notifications
2235 		// are handled after we return
2236 	return B_OK;
2237 }
2238 
2239 
2240 status_t
2241 cache_end_transaction(void* _cache, int32 id,
2242 	transaction_notification_hook hook, void* data)
2243 {
2244 	block_cache* cache = (block_cache*)_cache;
2245 	MutexLocker locker(&cache->lock);
2246 
2247 	TRACE(("cache_end_transaction(id = %ld)\n", id));
2248 
2249 	cache_transaction* transaction = lookup_transaction(cache, id);
2250 	if (transaction == NULL) {
2251 		panic("cache_end_transaction(): invalid transaction ID\n");
2252 		return B_BAD_VALUE;
2253 	}
2254 
2255 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2256 
2257 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2258 			data) != B_OK) {
2259 		return B_NO_MEMORY;
2260 	}
2261 
2262 	T(Action("end", cache, transaction));
2263 
2264 	// iterate through all blocks and free the unchanged original contents
2265 
2266 	cached_block* block = transaction->first_block;
2267 	cached_block* next;
2268 	for (; block != NULL; block = next) {
2269 		next = block->transaction_next;
2270 
2271 		if (block->previous_transaction != NULL) {
2272 			// need to write back pending changes
2273 			write_cached_block(cache, block);
2274 		}
2275 		if (block->discard) {
2276 			// This block has been discarded in the transaction
2277 			cache->DiscardBlock(block);
2278 			transaction->num_blocks--;
2279 			continue;
2280 		}
2281 
2282 		if (block->original_data != NULL) {
2283 			cache->Free(block->original_data);
2284 			block->original_data = NULL;
2285 		}
2286 		if (transaction->has_sub_transaction) {
2287 			if (block->parent_data != block->current_data)
2288 				cache->Free(block->parent_data);
2289 			block->parent_data = NULL;
2290 		}
2291 
2292 		// move the block to the previous transaction list
2293 		transaction->blocks.Add(block);
2294 
2295 		block->previous_transaction = transaction;
2296 		block->transaction_next = NULL;
2297 		block->transaction = NULL;
2298 	}
2299 
2300 	transaction->open = false;
2301 	return B_OK;
2302 }
2303 
2304 
2305 status_t
2306 cache_abort_transaction(void* _cache, int32 id)
2307 {
2308 	block_cache* cache = (block_cache*)_cache;
2309 	MutexLocker locker(&cache->lock);
2310 
2311 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
2312 
2313 	cache_transaction* transaction = lookup_transaction(cache, id);
2314 	if (transaction == NULL) {
2315 		panic("cache_abort_transaction(): invalid transaction ID\n");
2316 		return B_BAD_VALUE;
2317 	}
2318 
2319 	T(Abort(cache, transaction));
2320 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2321 
2322 	// iterate through all blocks and restore their original contents
2323 
2324 	cached_block* block = transaction->first_block;
2325 	cached_block* next;
2326 	for (; block != NULL; block = next) {
2327 		next = block->transaction_next;
2328 
2329 		if (block->original_data != NULL) {
2330 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
2331 				transaction->id, block->block_number));
2332 			memcpy(block->current_data, block->original_data, cache->block_size);
2333 			cache->Free(block->original_data);
2334 			block->original_data = NULL;
2335 		}
2336 		if (transaction->has_sub_transaction) {
2337 			if (block->parent_data != block->current_data)
2338 				cache->Free(block->parent_data);
2339 			block->parent_data = NULL;
2340 		}
2341 
2342 		block->transaction_next = NULL;
2343 		block->transaction = NULL;
2344 		block->discard = false;
2345 	}
2346 
2347 	hash_remove(cache->transaction_hash, transaction);
2348 	delete_transaction(cache, transaction);
2349 	return B_OK;
2350 }
2351 
2352 
2353 /*!	Acknowledges the current parent transaction, and starts a new transaction
2354 	from its sub transaction.
2355 	The new transaction also gets a new transaction ID.
2356 */
2357 int32
2358 cache_detach_sub_transaction(void* _cache, int32 id,
2359 	transaction_notification_hook hook, void* data)
2360 {
2361 	block_cache* cache = (block_cache*)_cache;
2362 	MutexLocker locker(&cache->lock);
2363 
2364 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
2365 
2366 	cache_transaction* transaction = lookup_transaction(cache, id);
2367 	if (transaction == NULL) {
2368 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2369 		return B_BAD_VALUE;
2370 	}
2371 	if (!transaction->has_sub_transaction)
2372 		return B_BAD_VALUE;
2373 
2374 	// create a new transaction for the sub transaction
2375 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
2376 	if (newTransaction == NULL)
2377 		return B_NO_MEMORY;
2378 
2379 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2380 	T(Detach(cache, transaction, newTransaction));
2381 
2382 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2383 
2384 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2385 			data) != B_OK) {
2386 		delete newTransaction;
2387 		return B_NO_MEMORY;
2388 	}
2389 
2390 	// iterate through all blocks and free the unchanged original contents
2391 
2392 	cached_block* block = transaction->first_block;
2393 	cached_block* last = NULL;
2394 	cached_block* next;
2395 	for (; block != NULL; block = next) {
2396 		next = block->transaction_next;
2397 
2398 		if (block->previous_transaction != NULL) {
2399 			// need to write back pending changes
2400 			write_cached_block(cache, block);
2401 		}
2402 		if (block->discard) {
2403 			cache->DiscardBlock(block);
2404 			transaction->main_num_blocks--;
2405 			continue;
2406 		}
2407 
2408 		if (block->original_data != NULL && block->parent_data != NULL) {
2409 			// free the original data if the parent data of the transaction
2410 			// will be made current - but keep them otherwise
2411 			cache->Free(block->original_data);
2412 			block->original_data = NULL;
2413 		}
2414 		if (block->parent_data == NULL
2415 			|| block->parent_data != block->current_data) {
2416 			// we need to move this block over to the new transaction
2417 			block->original_data = block->parent_data;
2418 			if (last == NULL)
2419 				newTransaction->first_block = block;
2420 			else
2421 				last->transaction_next = block;
2422 
2423 			block->transaction = newTransaction;
2424 			last = block;
2425 		} else
2426 			block->transaction = NULL;
2427 
2428 		if (block->parent_data != NULL) {
2429 			// move the block to the previous transaction list
2430 			transaction->blocks.Add(block);
2431 			block->previous_transaction = transaction;
2432 			block->parent_data = NULL;
2433 		}
2434 
2435 		block->transaction_next = NULL;
2436 	}
2437 
2438 	newTransaction->num_blocks = transaction->sub_num_blocks;
2439 
2440 	transaction->open = false;
2441 	transaction->has_sub_transaction = false;
2442 	transaction->num_blocks = transaction->main_num_blocks;
2443 	transaction->sub_num_blocks = 0;
2444 
2445 	hash_insert_grow(cache->transaction_hash, newTransaction);
2446 	cache->last_transaction = newTransaction;
2447 
2448 	return newTransaction->id;
2449 }
2450 
2451 
2452 status_t
2453 cache_abort_sub_transaction(void* _cache, int32 id)
2454 {
2455 	block_cache* cache = (block_cache*)_cache;
2456 	MutexLocker locker(&cache->lock);
2457 
2458 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
2459 
2460 	cache_transaction* transaction = lookup_transaction(cache, id);
2461 	if (transaction == NULL) {
2462 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
2463 		return B_BAD_VALUE;
2464 	}
2465 	if (!transaction->has_sub_transaction)
2466 		return B_BAD_VALUE;
2467 
2468 	T(Abort(cache, transaction));
2469 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2470 
2471 	// revert all changes back to the version of the parent
2472 
2473 	cached_block* block = transaction->first_block;
2474 	cached_block* next;
2475 	for (; block != NULL; block = next) {
2476 		next = block->transaction_next;
2477 
2478 		if (block->parent_data == NULL) {
2479 			if (block->original_data != NULL) {
2480 				// the parent transaction didn't change the block, but the sub
2481 				// transaction did - we need to revert from the original data
2482 				memcpy(block->current_data, block->original_data,
2483 					cache->block_size);
2484 			}
2485 		} else if (block->parent_data != block->current_data) {
2486 			// the block has been changed and must be restored
2487 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
2488 				transaction->id, block->block_number));
2489 			memcpy(block->current_data, block->parent_data, cache->block_size);
2490 			cache->Free(block->parent_data);
2491 		}
2492 
2493 		block->parent_data = NULL;
2494 		block->discard = false;
2495 	}
2496 
2497 	// all subsequent changes will go into the main transaction
2498 	transaction->has_sub_transaction = false;
2499 	transaction->sub_num_blocks = 0;
2500 
2501 	return B_OK;
2502 }
2503 
2504 
2505 status_t
2506 cache_start_sub_transaction(void* _cache, int32 id)
2507 {
2508 	block_cache* cache = (block_cache*)_cache;
2509 	MutexLocker locker(&cache->lock);
2510 
2511 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
2512 
2513 	cache_transaction* transaction = lookup_transaction(cache, id);
2514 	if (transaction == NULL) {
2515 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
2516 		return B_BAD_VALUE;
2517 	}
2518 
2519 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2520 
2521 	// move all changed blocks up to the parent
2522 
2523 	cached_block* block = transaction->first_block;
2524 	cached_block* last = NULL;
2525 	cached_block* next;
2526 	for (; block != NULL; block = next) {
2527 		next = block->transaction_next;
2528 
2529 		if (block->discard) {
2530 			// This block has been discarded in the parent transaction
2531 			if (last != NULL)
2532 				last->transaction_next = next;
2533 			else
2534 				transaction->first_block = next;
2535 
2536 			cache->DiscardBlock(block);
2537 			transaction->num_blocks--;
2538 			continue;
2539 		}
2540 		if (transaction->has_sub_transaction
2541 			&& block->parent_data != NULL
2542 			&& block->parent_data != block->current_data) {
2543 			// there already is an older sub transaction - we acknowledge
2544 			// its changes and move its blocks up to the parent
2545 			cache->Free(block->parent_data);
2546 		}
2547 
2548 		// we "allocate" the parent data lazily, that means, we don't copy
2549 		// the data (and allocate memory for it) until we need to
2550 		block->parent_data = block->current_data;
2551 		last = block;
2552 	}
2553 
2554 	// all subsequent changes will go into the sub transaction
2555 	transaction->has_sub_transaction = true;
2556 	transaction->main_num_blocks = transaction->num_blocks;
2557 	transaction->sub_num_blocks = 0;
2558 	T(Action("start-sub", cache, transaction));
2559 
2560 	return B_OK;
2561 }
2562 
2563 
2564 /*!	Adds a transaction listener that gets notified when the transaction
2565 	is ended, aborted, written, or idle as specified by \a events.
2566 	The listener gets automatically removed when the transaction ends.
2567 */
2568 status_t
2569 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
2570 	transaction_notification_hook hook, void* data)
2571 {
2572 	block_cache* cache = (block_cache*)_cache;
2573 
2574 	MutexLocker locker(&cache->lock);
2575 
2576 	cache_transaction* transaction = lookup_transaction(cache, id);
2577 	if (transaction == NULL)
2578 		return B_BAD_VALUE;
2579 
2580 	return add_transaction_listener(cache, transaction, events, hook, data);
2581 }
2582 
2583 
2584 status_t
2585 cache_remove_transaction_listener(void* _cache, int32 id,
2586 	transaction_notification_hook hookFunction, void* data)
2587 {
2588 	block_cache* cache = (block_cache*)_cache;
2589 
2590 	MutexLocker locker(&cache->lock);
2591 
2592 	cache_transaction* transaction = lookup_transaction(cache, id);
2593 	if (transaction == NULL)
2594 		return B_BAD_VALUE;
2595 
2596 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2597 	while (iterator.HasNext()) {
2598 		cache_listener* listener = iterator.Next();
2599 		if (listener->data == data && listener->hook == hookFunction) {
2600 			iterator.Remove();
2601 
2602 			if (listener->events_pending != 0) {
2603 				MutexLocker _(sNotificationsLock);
2604 				if (listener->events_pending != 0)
2605 					cache->pending_notifications.Remove(listener);
2606 			}
2607 			delete listener;
2608 			return B_OK;
2609 		}
2610 	}
2611 
2612 	return B_ENTRY_NOT_FOUND;
2613 }
2614 
2615 
2616 status_t
2617 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
2618 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
2619 {
2620 	cached_block* block = (cached_block*)*_cookie;
2621 	block_cache* cache = (block_cache*)_cache;
2622 
2623 	MutexLocker locker(&cache->lock);
2624 
2625 	cache_transaction* transaction = lookup_transaction(cache, id);
2626 	if (transaction == NULL || !transaction->open)
2627 		return B_BAD_VALUE;
2628 
2629 	if (block == NULL)
2630 		block = transaction->first_block;
2631 	else
2632 		block = block->transaction_next;
2633 
2634 	if (transaction->has_sub_transaction) {
2635 		if (mainOnly) {
2636 			// find next block that the parent changed
2637 			while (block != NULL && block->parent_data == NULL)
2638 				block = block->transaction_next;
2639 		} else {
2640 			// find next non-discarded block
2641 			while (block != NULL && block->discard)
2642 				block = block->transaction_next;
2643 		}
2644 	}
2645 
2646 	if (block == NULL)
2647 		return B_ENTRY_NOT_FOUND;
2648 
2649 	if (_blockNumber)
2650 		*_blockNumber = block->block_number;
2651 	if (_data)
2652 		*_data = mainOnly ? block->parent_data : block->current_data;
2653 	if (_unchangedData)
2654 		*_unchangedData = block->original_data;
2655 
2656 	*_cookie = (addr_t)block;
2657 	return B_OK;
2658 }
2659 
2660 
2661 int32
2662 cache_blocks_in_transaction(void* _cache, int32 id)
2663 {
2664 	block_cache* cache = (block_cache*)_cache;
2665 	MutexLocker locker(&cache->lock);
2666 
2667 	cache_transaction* transaction = lookup_transaction(cache, id);
2668 	if (transaction == NULL)
2669 		return B_BAD_VALUE;
2670 
2671 	return transaction->num_blocks;
2672 }
2673 
2674 
2675 /*!	Returns the number of blocks that are part of the main transaction. If this
2676 	transaction does not have a sub transaction yet, this is the same value as
2677 	cache_blocks_in_transaction() would return.
2678 */
2679 int32
2680 cache_blocks_in_main_transaction(void* _cache, int32 id)
2681 {
2682 	block_cache* cache = (block_cache*)_cache;
2683 	MutexLocker locker(&cache->lock);
2684 
2685 	cache_transaction* transaction = lookup_transaction(cache, id);
2686 	if (transaction == NULL)
2687 		return B_BAD_VALUE;
2688 
2689 	if (transaction->has_sub_transaction)
2690 		return transaction->main_num_blocks;
2691 
2692 	return transaction->num_blocks;
2693 }
2694 
2695 
2696 int32
2697 cache_blocks_in_sub_transaction(void* _cache, int32 id)
2698 {
2699 	block_cache* cache = (block_cache*)_cache;
2700 	MutexLocker locker(&cache->lock);
2701 
2702 	cache_transaction* transaction = lookup_transaction(cache, id);
2703 	if (transaction == NULL)
2704 		return B_BAD_VALUE;
2705 
2706 	return transaction->sub_num_blocks;
2707 }
2708 
2709 
2710 //	#pragma mark - public block cache API
2711 
2712 
2713 void
2714 block_cache_delete(void* _cache, bool allowWrites)
2715 {
2716 	block_cache* cache = (block_cache*)_cache;
2717 
2718 	if (allowWrites)
2719 		block_cache_sync(cache);
2720 
2721 	mutex_lock(&sCachesLock);
2722 	sCaches.Remove(cache);
2723 	mutex_unlock(&sCachesLock);
2724 
2725 	mutex_lock(&cache->lock);
2726 
2727 	// free all blocks
2728 
2729 	uint32 cookie = 0;
2730 	cached_block* block;
2731 	while ((block = (cached_block*)hash_remove_first(cache->hash,
2732 			&cookie)) != NULL) {
2733 		cache->FreeBlock(block);
2734 	}
2735 
2736 	// free all transactions (they will all be aborted)
2737 
2738 	cookie = 0;
2739 	cache_transaction* transaction;
2740 	while ((transaction = (cache_transaction*)hash_remove_first(
2741 			cache->transaction_hash, &cookie)) != NULL) {
2742 		delete transaction;
2743 	}
2744 
2745 	delete cache;
2746 }
2747 
2748 
2749 void*
2750 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
2751 {
2752 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
2753 		readOnly);
2754 	if (cache == NULL)
2755 		return NULL;
2756 
2757 	if (cache->Init() != B_OK) {
2758 		delete cache;
2759 		return NULL;
2760 	}
2761 
2762 	MutexLocker _(sCachesLock);
2763 	sCaches.Add(cache);
2764 
2765 	return cache;
2766 }
2767 
2768 
2769 status_t
2770 block_cache_sync(void* _cache)
2771 {
2772 	block_cache* cache = (block_cache*)_cache;
2773 
2774 	// we will sync all dirty blocks to disk that have a completed
2775 	// transaction or no transaction only
2776 
2777 	MutexLocker locker(&cache->lock);
2778 	hash_iterator iterator;
2779 	hash_open(cache->hash, &iterator);
2780 
2781 	cached_block* block;
2782 	while ((block = (cached_block*)hash_next(cache->hash, &iterator)) != NULL) {
2783 		if (block->previous_transaction != NULL
2784 			|| (block->transaction == NULL && block->is_dirty)) {
2785 			status_t status = write_cached_block(cache, block);
2786 			if (status != B_OK)
2787 				return status;
2788 		}
2789 	}
2790 
2791 	hash_close(cache->hash, &iterator, false);
2792 	locker.Unlock();
2793 
2794 	wait_for_notifications(cache);
2795 		// make sure that all pending TRANSACTION_WRITTEN notifications
2796 		// are handled after we return
2797 	return B_OK;
2798 }
2799 
2800 
2801 status_t
2802 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
2803 {
2804 	block_cache* cache = (block_cache*)_cache;
2805 
2806 	// we will sync all dirty blocks to disk that have a completed
2807 	// transaction or no transaction only
2808 
2809 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
2810 		panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
2811 			blockNumber, cache->max_blocks - 1);
2812 		return B_BAD_VALUE;
2813 	}
2814 
2815 	MutexLocker locker(&cache->lock);
2816 
2817 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
2818 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
2819 			&blockNumber);
2820 		if (block == NULL)
2821 			continue;
2822 
2823 		if (block->previous_transaction != NULL
2824 			|| (block->transaction == NULL && block->is_dirty)) {
2825 			status_t status = write_cached_block(cache, block);
2826 			if (status != B_OK)
2827 				return status;
2828 		}
2829 	}
2830 
2831 	locker.Unlock();
2832 
2833 	wait_for_notifications(cache);
2834 		// make sure that all pending TRANSACTION_WRITTEN notifications
2835 		// are handled after we return
2836 	return B_OK;
2837 }
2838 
2839 
2840 void
2841 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
2842 {
2843 	block_cache* cache = (block_cache*)_cache;
2844 	MutexLocker locker(&cache->lock);
2845 
2846 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
2847 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
2848 			&blockNumber);
2849 		if (block == NULL)
2850 			continue;
2851 
2852 		if (block->previous_transaction != NULL)
2853 			write_cached_block(cache, block);
2854 
2855 		if (block->unused) {
2856 			cache->unused_blocks.Remove(block);
2857 			cache->RemoveBlock(block);
2858 		} else {
2859 			if (block->transaction != NULL && block->parent_data != NULL
2860 				&& block->parent_data != block->current_data) {
2861 				panic("Discarded block %Ld has already been changed in this "
2862 					"transaction!", blockNumber);
2863 			}
2864 
2865 			// mark it as discarded (in the current transaction only, if any)
2866 			block->discard = true;
2867 		}
2868 	}
2869 }
2870 
2871 
2872 status_t
2873 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
2874 {
2875 	block_cache* cache = (block_cache*)_cache;
2876 	MutexLocker locker(&cache->lock);
2877 
2878 	if (cache->read_only)
2879 		panic("tried to make block writable on a read-only cache!");
2880 
2881 	// TODO: this can be done better!
2882 	void* block = get_writable_cached_block(cache, blockNumber,
2883 		blockNumber, 1, transaction, false);
2884 	if (block != NULL) {
2885 		put_cached_block((block_cache*)_cache, blockNumber);
2886 		return B_OK;
2887 	}
2888 
2889 	return B_ERROR;
2890 }
2891 
2892 
2893 void*
2894 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
2895 	off_t length, int32 transaction)
2896 {
2897 	block_cache* cache = (block_cache*)_cache;
2898 	MutexLocker locker(&cache->lock);
2899 
2900 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
2901 		blockNumber, transaction));
2902 	if (cache->read_only)
2903 		panic("tried to get writable block on a read-only cache!");
2904 
2905 	return get_writable_cached_block(cache, blockNumber, base, length,
2906 		transaction, false);
2907 }
2908 
2909 
2910 void*
2911 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
2912 {
2913 	return block_cache_get_writable_etc(_cache, blockNumber,
2914 		blockNumber, 1, transaction);
2915 }
2916 
2917 
2918 void*
2919 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
2920 {
2921 	block_cache* cache = (block_cache*)_cache;
2922 	MutexLocker locker(&cache->lock);
2923 
2924 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
2925 		blockNumber, transaction));
2926 	if (cache->read_only)
2927 		panic("tried to get empty writable block on a read-only cache!");
2928 
2929 	return get_writable_cached_block((block_cache*)_cache, blockNumber,
2930 		blockNumber, 1, transaction, true);
2931 }
2932 
2933 
2934 const void*
2935 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
2936 {
2937 	block_cache* cache = (block_cache*)_cache;
2938 	MutexLocker locker(&cache->lock);
2939 	bool allocated;
2940 
2941 	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
2942 	if (block == NULL)
2943 		return NULL;
2944 
2945 #if BLOCK_CACHE_DEBUG_CHANGED
2946 	if (block->compare == NULL)
2947 		block->compare = cache->Allocate();
2948 	if (block->compare != NULL)
2949 		memcpy(block->compare, block->current_data, cache->block_size);
2950 #endif
2951 	TB(Get(cache, block));
2952 
2953 	return block->current_data;
2954 }
2955 
2956 
2957 const void*
2958 block_cache_get(void* _cache, off_t blockNumber)
2959 {
2960 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
2961 }
2962 
2963 
2964 /*!	Changes the internal status of a writable block to \a dirty. This can be
2965 	helpful in case you realize you don't need to change that block anymore
2966 	for whatever reason.
2967 
2968 	Note, you must only use this function on blocks that were acquired
2969 	writable!
2970 */
2971 status_t
2972 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
2973 	int32 transaction)
2974 {
2975 	block_cache* cache = (block_cache*)_cache;
2976 	MutexLocker locker(&cache->lock);
2977 
2978 	cached_block* block = (cached_block*)hash_lookup(cache->hash,
2979 		&blockNumber);
2980 	if (block == NULL)
2981 		return B_BAD_VALUE;
2982 	if (block->is_dirty == dirty) {
2983 		// there is nothing to do for us
2984 		return B_OK;
2985 	}
2986 
2987 	// TODO: not yet implemented
2988 	if (dirty)
2989 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
2990 
2991 	return B_OK;
2992 }
2993 
2994 
2995 void
2996 block_cache_put(void* _cache, off_t blockNumber)
2997 {
2998 	block_cache* cache = (block_cache*)_cache;
2999 	MutexLocker locker(&cache->lock);
3000 
3001 	put_cached_block(cache, blockNumber);
3002 }
3003 
3004