xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 746cac055adc6ac3308c7bc2d29040fb95689cc9)
1 /*
2  * Copyright 2004-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <signal.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <errno.h>
14 
15 #include <KernelExport.h>
16 #include <fs_cache.h>
17 
18 #include <condition_variable.h>
19 #include <lock.h>
20 #include <low_resource_manager.h>
21 #include <slab/Slab.h>
22 #include <tracing.h>
23 #include <util/kernel_cpp.h>
24 #include <util/DoublyLinkedList.h>
25 #include <util/AutoLock.h>
26 #include <util/khash.h>
27 
28 
29 // TODO: this is a naive but growing implementation to test the API:
30 //	1) block reading/writing is not at all optimized for speed, it will
31 //	   just read and write single blocks.
32 //	2) the locking could be improved; getting a block should not need to
33 //	   wait for blocks to be written
34 // TODO: the retrieval/copy of the original data could be delayed until the
35 //		new data must be written, ie. in low memory situations.
36 
37 //#define TRACE_BLOCK_CACHE
38 #ifdef TRACE_BLOCK_CACHE
39 #	define TRACE(x)	dprintf x
40 #else
41 #	define TRACE(x) ;
42 #endif
43 
44 #define DEBUG_BLOCK_CACHE
45 #define DEBUG_CHANGED
46 
47 // This macro is used for fatal situations that are acceptable in a running
48 // system, like out of memory situations - should only panic for debugging.
49 #define FATAL(x) panic x
50 
51 
52 static const bigtime_t kTransactionIdleTime = 2000000LL;
53 	// a transaction is considered idle after 2 seconds of inactivity
54 
55 
56 struct cache_transaction;
57 struct cached_block;
58 struct block_cache;
59 typedef DoublyLinkedListLink<cached_block> block_link;
60 
61 struct cached_block {
62 	cached_block	*next;			// next in hash
63 	cached_block	*transaction_next;
64 	block_link		link;
65 	off_t			block_number;
66 	void			*current_data;
67 	void			*original_data;
68 	void			*parent_data;
69 #ifdef DEBUG_CHANGED
70 	void			*compare;
71 #endif
72 	int32			ref_count;
73 	int32			accessed;
74 	bool			busy : 1;
75 	bool			is_writing : 1;
76 	bool			is_dirty : 1;
77 	bool			unused : 1;
78 	cache_transaction *transaction;
79 	cache_transaction *previous_transaction;
80 
81 	static int Compare(void *_cacheEntry, const void *_block);
82 	static uint32 Hash(void *_cacheEntry, const void *_block, uint32 range);
83 };
84 
85 typedef DoublyLinkedList<cached_block,
86 	DoublyLinkedListMemberGetLink<cached_block,
87 		&cached_block::link> > block_list;
88 
89 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
90 	int32			transaction_id;
91 	int32			events_pending;
92 	int32			events;
93 	transaction_notification_hook hook;
94 	void			*data;
95 	bool			delete_after_event;
96 };
97 
98 typedef DoublyLinkedList<cache_notification> NotificationList;
99 
100 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
101 	hash_table		*hash;
102 	mutex			lock;
103 	int				fd;
104 	off_t			max_blocks;
105 	size_t			block_size;
106 	int32			next_transaction_id;
107 	cache_transaction *last_transaction;
108 	hash_table		*transaction_hash;
109 
110 	object_cache	*buffer_cache;
111 	block_list		unused_blocks;
112 
113 	uint32			num_dirty_blocks;
114 	bool			read_only;
115 
116 	NotificationList pending_notifications;
117 	ConditionVariable condition_variable;
118 
119 	block_cache(int fd, off_t numBlocks, size_t blockSize, bool readOnly);
120 	~block_cache();
121 
122 	status_t Init();
123 
124 	void RemoveUnusedBlocks(int32 maxAccessed = LONG_MAX,
125 		int32 count = LONG_MAX);
126 	void FreeBlock(cached_block *block);
127 	cached_block *NewBlock(off_t blockNumber);
128 	void Free(void *buffer);
129 	void *Allocate();
130 
131 	static void LowMemoryHandler(void *data, uint32 resources, int32 level);
132 
133 private:
134 	cached_block *_GetUnusedBlock();
135 };
136 
137 struct cache_listener;
138 typedef DoublyLinkedListLink<cache_listener> listener_link;
139 
140 struct cache_listener : cache_notification {
141 	listener_link	link;
142 };
143 
144 typedef DoublyLinkedList<cache_listener,
145 	DoublyLinkedListMemberGetLink<cache_listener,
146 		&cache_listener::link> > ListenerList;
147 
148 struct cache_transaction {
149 	cache_transaction();
150 
151 	cache_transaction *next;
152 	int32			id;
153 	int32			num_blocks;
154 	int32			main_num_blocks;
155 	int32			sub_num_blocks;
156 	cached_block	*first_block;
157 	block_list		blocks;
158 	ListenerList	listeners;
159 	bool			open;
160 	bool			has_sub_transaction;
161 	bigtime_t		last_used;
162 };
163 
164 #if BLOCK_CACHE_BLOCK_TRACING
165 namespace BlockTracing {
166 
167 class Action : public AbstractTraceEntry {
168 public:
169 	Action(block_cache *cache, cached_block* block)
170 		:
171 		fCache(cache),
172 		fBlockNumber(block->block_number),
173 		fIsDirty(block->is_dirty),
174 		fHasOriginal(block->original_data != NULL),
175 		fHasParent(block->parent_data != NULL),
176 		fTransactionID(-1),
177 		fPreviousID(-1)
178 	{
179 		if (block->transaction != NULL)
180 			fTransactionID = block->transaction->id;
181 		if (block->previous_transaction != NULL)
182 			fPreviousID = block->previous_transaction->id;
183 	}
184 
185 	virtual void AddDump(TraceOutput& out)
186 	{
187 		out.Print("block cache %p, %s %Ld, %c%c%c transaction %ld "
188 			"(previous id %ld)\n", fCache, _Action(), fBlockNumber,
189 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
190 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
191 	}
192 
193 	virtual const char* _Action() const = 0;
194 
195 private:
196 	block_cache			*fCache;
197 	uint64				fBlockNumber;
198 	bool				fIsDirty;
199 	bool				fHasOriginal;
200 	bool				fHasParent;
201 	int32				fTransactionID;
202 	int32				fPreviousID;
203 };
204 
205 class Get : public Action {
206 public:
207 	Get(block_cache *cache, cached_block* block)
208 		: Action(cache, block)
209 	{
210 		Initialized();
211 	}
212 
213 	virtual const char* _Action() const { return "get"; }
214 };
215 
216 class Put : public Action {
217 public:
218 	Put(block_cache *cache, cached_block* block)
219 		: Action(cache, block)
220 	{
221 		Initialized();
222 	}
223 
224 	virtual const char* _Action() const { return "put"; }
225 };
226 
227 class Read : public Action {
228 public:
229 	Read(block_cache *cache, cached_block* block)
230 		: Action(cache, block)
231 	{
232 		Initialized();
233 	}
234 
235 	virtual const char* _Action() const { return "read"; }
236 };
237 
238 class Write : public Action {
239 public:
240 	Write(block_cache *cache, cached_block* block)
241 		: Action(cache, block)
242 	{
243 		Initialized();
244 	}
245 
246 	virtual const char* _Action() const { return "write"; }
247 };
248 
249 class Flush : public Action {
250 public:
251 	Flush(block_cache *cache, cached_block* block, bool getUnused = false)
252 		: Action(cache, block),
253 		fGetUnused(getUnused)
254 	{
255 		Initialized();
256 	}
257 
258 	virtual const char* _Action() const
259 		{ return fGetUnused ? "get-unused" : "flush"; }
260 
261 private:
262 	bool	fGetUnused;
263 };
264 
265 class Error : public AbstractTraceEntry {
266 public:
267 	Error(block_cache *cache, uint64 blockNumber, const char* message,
268 			status_t status = B_OK)
269 		:
270 		fCache(cache),
271 		fBlockNumber(blockNumber),
272 		fMessage(message),
273 		fStatus(status)
274 	{
275 		Initialized();
276 	}
277 
278 	virtual void AddDump(TraceOutput& out)
279 	{
280 		out.Print("block cache %p, error %Ld, %s%s%s",
281 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
282 			fStatus != B_OK ? strerror(fStatus) : "");
283 	}
284 
285 private:
286 	block_cache*	fCache;
287 	uint64			fBlockNumber;
288 	const char*		fMessage;
289 	status_t		fStatus;
290 };
291 
292 }	// namespace BlockTracing
293 
294 #	define TB(x) new(std::nothrow) BlockTracing::x;
295 #else
296 #	define TB(x) ;
297 #endif
298 
299 #if BLOCK_CACHE_TRANSACTION_TRACING
300 namespace TransactionTracing {
301 
302 class Action : public AbstractTraceEntry {
303 	public:
304 		Action(const char *label, block_cache *cache,
305 				cache_transaction *transaction)
306 			:
307 			fCache(cache),
308 			fTransaction(transaction),
309 			fID(transaction->id),
310 			fSub(transaction->has_sub_transaction),
311 			fNumBlocks(transaction->num_blocks),
312 			fSubNumBlocks(transaction->sub_num_blocks)
313 		{
314 			strlcpy(fLabel, label, sizeof(fLabel));
315 			Initialized();
316 		}
317 
318 		virtual void AddDump(TraceOutput& out)
319 		{
320 			out.Print("block cache %p, %s transaction %p (id %ld)%s"
321 				", %ld/%ld blocks", fCache, fLabel, fTransaction, fID,
322 				fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
323 		}
324 
325 	private:
326 		char				fLabel[12];
327 		block_cache			*fCache;
328 		cache_transaction	*fTransaction;
329 		int32				fID;
330 		bool				fSub;
331 		int32				fNumBlocks;
332 		int32				fSubNumBlocks;
333 };
334 
335 class Detach : public AbstractTraceEntry {
336 	public:
337 		Detach(block_cache *cache, cache_transaction *transaction,
338 				cache_transaction *newTransaction)
339 			:
340 			fCache(cache),
341 			fTransaction(transaction),
342 			fID(transaction->id),
343 			fSub(transaction->has_sub_transaction),
344 			fNewTransaction(newTransaction),
345 			fNewID(newTransaction->id)
346 		{
347 			Initialized();
348 		}
349 
350 		virtual void AddDump(TraceOutput& out)
351 		{
352 			out.Print("block cache %p, detach transaction %p (id %ld)"
353 				"from transaction %p (id %ld)%s",
354 				fCache, fNewTransaction, fNewID, fTransaction, fID,
355 				fSub ? " sub" : "");
356 		}
357 
358 	private:
359 		block_cache			*fCache;
360 		cache_transaction	*fTransaction;
361 		int32				fID;
362 		bool				fSub;
363 		cache_transaction	*fNewTransaction;
364 		int32				fNewID;
365 };
366 
367 class Abort : public AbstractTraceEntry {
368 	public:
369 		Abort(block_cache *cache, cache_transaction *transaction)
370 			:
371 			fCache(cache),
372 			fTransaction(transaction),
373 			fID(transaction->id),
374 			fNumBlocks(0)
375 		{
376 			bool isSub = transaction->has_sub_transaction;
377 			fNumBlocks = isSub ? transaction->sub_num_blocks
378 				: transaction->num_blocks;
379 			fBlocks = (off_t *)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
380 			if (fBlocks != NULL) {
381 				cached_block *block = transaction->first_block;
382 				for (int32 i = 0; block != NULL && i < fNumBlocks;
383 						block = block->transaction_next) {
384 					fBlocks[i++] = block->block_number;
385 				}
386 			} else
387 				fNumBlocks = 0;
388 			Initialized();
389 		}
390 
391 		virtual void AddDump(TraceOutput& out)
392 		{
393 			out.Print("block cache %p, abort transaction "
394 				"%p (id %ld), blocks", fCache, fTransaction, fID);
395 			for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
396 				out.Print(" %Ld", fBlocks[i]);
397 		}
398 
399 	private:
400 		block_cache			*fCache;
401 		cache_transaction	*fTransaction;
402 		int32				fID;
403 		off_t				*fBlocks;
404 		int32				fNumBlocks;
405 };
406 
407 }	// namespace TransactionTracing
408 
409 #	define T(x) new(std::nothrow) TransactionTracing::x;
410 #else
411 #	define T(x) ;
412 #endif
413 
414 
415 static status_t write_cached_block(block_cache *cache, cached_block *block,
416 	bool deleteTransaction = true);
417 
418 
419 static DoublyLinkedList<block_cache> sCaches;
420 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
421 static sem_id sEventSemaphore;
422 static mutex sNotificationsLock = MUTEX_INITIALIZER("block cache notifications");
423 static thread_id sNotifierWriterThread;
424 static DoublyLinkedListLink<block_cache> sMarkCache;
425 	// TODO: this only works if the link is the first entry of block_cache
426 static object_cache *sBlockCache;
427 
428 
429 //	#pragma mark - notifications/listener
430 
431 
432 /*!	Checks wether or not this is an event that closes a transaction. */
433 static inline bool
434 is_closing_event(int32 event)
435 {
436 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
437 }
438 
439 
440 static inline bool
441 is_written_event(int32 event)
442 {
443 	return (event & TRANSACTION_WRITTEN) != 0;
444 }
445 
446 
447 /*!	From the specified \a notification, it will remove the lowest pending
448 	event, and return that one in \a _event.
449 	If there is no pending event anymore, it will return \c false.
450 */
451 static bool
452 get_next_pending_event(cache_notification *notification, int32 *_event)
453 {
454 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
455 		int32 pending = atomic_and(&notification->events_pending,
456 			~eventMask);
457 
458 		bool more = (pending & ~eventMask) != 0;
459 
460 		if ((pending & eventMask) != 0) {
461 			*_event = eventMask;
462 			return more;
463 		}
464 	}
465 
466 	return false;
467 }
468 
469 
470 /*!	Initializes the \a notification as specified. */
471 static void
472 set_notification(cache_transaction *transaction,
473 	cache_notification &notification, int32 events,
474 	transaction_notification_hook hook, void *data)
475 {
476 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
477 	notification.events_pending = 0;
478 	notification.events = events;
479 	notification.hook = hook;
480 	notification.data = data;
481 	notification.delete_after_event = false;
482 }
483 
484 
485 /*!	Makes sure the notification is deleted. It either deletes it directly,
486 	when possible, or marks it for deletion if the notification is pending.
487 */
488 static void
489 delete_notification(cache_notification *notification)
490 {
491 	MutexLocker locker(sNotificationsLock);
492 
493 	if (notification->events_pending != 0)
494 		notification->delete_after_event = true;
495 	else
496 		delete notification;
497 }
498 
499 
500 /*!	Adds the notification to the pending notifications list, or, if it's
501 	already part of it, updates its events_pending field.
502 	Also marks the notification to be deleted if \a deleteNotification
503 	is \c true.
504 	Triggers the notifier thread to run.
505 */
506 static void
507 add_notification(block_cache *cache, cache_notification *notification,
508 	int32 event, bool deleteNotification)
509 {
510 	if (notification->hook == NULL)
511 		return;
512 
513 	int32 pending = atomic_or(&notification->events_pending, event);
514 	if (pending == 0) {
515 		// not yet part of the notification list
516 		MutexLocker locker(sNotificationsLock);
517 		if (deleteNotification)
518 			notification->delete_after_event = true;
519 		cache->pending_notifications.Add(notification);
520 	} else if (deleteNotification) {
521 		// we might need to delete it ourselves if we're late
522 		delete_notification(notification);
523 	}
524 
525 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
526 		// We're probably still holding some locks that makes rescheduling
527 		// not a good idea at this point.
528 }
529 
530 
531 /*!	Notifies all interested listeners of this transaction about the \a event.
532 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
533 	TRANSACTION_ABORTED), all listeners except those listening to
534 	TRANSACTION_WRITTEN will be removed.
535 */
536 static void
537 notify_transaction_listeners(block_cache *cache, cache_transaction *transaction,
538 	int32 event)
539 {
540 	T(Action("notify", cache, transaction));
541 
542 	bool isClosing = is_closing_event(event);
543 	bool isWritten = is_written_event(event);
544 
545 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
546 	while (iterator.HasNext()) {
547 		cache_listener *listener = iterator.Next();
548 
549 		bool remove = isClosing && !is_written_event(listener->events)
550 			|| isWritten && is_written_event(listener->events);
551 		if (remove)
552 			iterator.Remove();
553 
554 		if ((listener->events & event) != 0)
555 			add_notification(cache, listener, event, remove);
556 		else if (remove)
557 			delete_notification(listener);
558 	}
559 }
560 
561 
562 static void
563 flush_pending_notifications(block_cache *cache)
564 {
565 	while (true) {
566 		MutexLocker locker(sNotificationsLock);
567 
568 		cache_notification *notification = cache->pending_notifications.Head();
569 		if (notification == NULL)
570 			return;
571 
572 		bool deleteAfterEvent = false;
573 		int32 event = -1;
574 		if (!get_next_pending_event(notification, &event)) {
575 			// remove the notification if this was the last pending event
576 			cache->pending_notifications.Remove(notification);
577 			deleteAfterEvent = notification->delete_after_event;
578 		}
579 
580 		if (event >= 0) {
581 			// Notify listener, we need to copy the notification, as it might
582 			// be removed when we unlock the list.
583 			cache_notification copy = *notification;
584 			locker.Unlock();
585 
586 			copy.hook(copy.transaction_id, event, copy.data);
587 
588 			locker.Lock();
589 		}
590 
591 		if (deleteAfterEvent)
592 			delete notification;
593 	}
594 }
595 
596 
597 /*!	Flushes all pending notifications by calling the appropriate hook
598 	functions.
599 	Must not be called with a cache lock held.
600 */
601 static void
602 flush_pending_notifications()
603 {
604 	MutexLocker _(sCachesLock);
605 
606 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
607 	while (iterator.HasNext()) {
608 		block_cache *cache = iterator.Next();
609 
610 		flush_pending_notifications(cache);
611 	}
612 }
613 
614 
615 /*!	Removes and deletes all listeners that are still monitoring this
616 	transaction.
617 */
618 static void
619 remove_transaction_listeners(block_cache *cache, cache_transaction *transaction)
620 {
621 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
622 	while (iterator.HasNext()) {
623 		cache_listener *listener = iterator.Next();
624 		iterator.Remove();
625 
626 		delete_notification(listener);
627 	}
628 }
629 
630 
631 static status_t
632 add_transaction_listener(block_cache *cache, cache_transaction *transaction,
633 	int32 events, transaction_notification_hook hookFunction, void *data)
634 {
635 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
636 	while (iterator.HasNext()) {
637 		cache_listener *listener = iterator.Next();
638 
639 		if (listener->data == data && listener->hook == hookFunction) {
640 			// this listener already exists, just update it
641 			listener->events |= events;
642 			return B_OK;
643 		}
644 	}
645 
646 	cache_listener *listener = new(std::nothrow) cache_listener;
647 	if (listener == NULL)
648 		return B_NO_MEMORY;
649 
650 	set_notification(transaction, *listener, events, hookFunction, data);
651 	transaction->listeners.Add(listener);
652 	return B_OK;
653 }
654 
655 
656 //	#pragma mark - private transaction
657 
658 
659 cache_transaction::cache_transaction()
660 {
661 	num_blocks = 0;
662 	main_num_blocks = 0;
663 	sub_num_blocks = 0;
664 	first_block = NULL;
665 	open = true;
666 	last_used = system_time();
667 }
668 
669 
670 static int
671 transaction_compare(void *_transaction, const void *_id)
672 {
673 	cache_transaction *transaction = (cache_transaction *)_transaction;
674 	const int32 *id = (const int32 *)_id;
675 
676 	return transaction->id - *id;
677 }
678 
679 
680 static uint32
681 transaction_hash(void *_transaction, const void *_id, uint32 range)
682 {
683 	cache_transaction *transaction = (cache_transaction *)_transaction;
684 	const int32 *id = (const int32 *)_id;
685 
686 	if (transaction != NULL)
687 		return transaction->id % range;
688 
689 	return (uint32)*id % range;
690 }
691 
692 
693 static void
694 delete_transaction(block_cache *cache, cache_transaction *transaction)
695 {
696 	if (cache->last_transaction == transaction)
697 		cache->last_transaction = NULL;
698 
699 	remove_transaction_listeners(cache, transaction);
700 	delete transaction;
701 }
702 
703 
704 static cache_transaction *
705 lookup_transaction(block_cache *cache, int32 id)
706 {
707 	return (cache_transaction *)hash_lookup(cache->transaction_hash, &id);
708 }
709 
710 
711 //	#pragma mark - cached_block
712 
713 
714 int
715 compare_blocks(const void *_blockA, const void *_blockB)
716 {
717 	cached_block *blockA = *(cached_block **)_blockA;
718 	cached_block *blockB = *(cached_block **)_blockB;
719 
720 	off_t diff = blockA->block_number - blockB->block_number;
721 	if (diff > 0)
722 		return 1;
723 
724 	return diff < 0 ? -1 : 0;
725 }
726 
727 
728 /*static*/ int
729 cached_block::Compare(void *_cacheEntry, const void *_block)
730 {
731 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
732 	const off_t *block = (const off_t *)_block;
733 
734 	off_t diff = cacheEntry->block_number - *block;
735 	if (diff > 0)
736 		return 1;
737 
738 	return diff < 0 ? -1 : 0;
739 }
740 
741 
742 
743 /*static*/ uint32
744 cached_block::Hash(void *_cacheEntry, const void *_block, uint32 range)
745 {
746 	cached_block *cacheEntry = (cached_block *)_cacheEntry;
747 	const off_t *block = (const off_t *)_block;
748 
749 	if (cacheEntry != NULL)
750 		return cacheEntry->block_number % range;
751 
752 	return (uint64)*block % range;
753 }
754 
755 
756 //	#pragma mark - block_cache
757 
758 
759 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
760 		bool readOnly)
761 	:
762 	hash(NULL),
763 	fd(_fd),
764 	max_blocks(numBlocks),
765 	block_size(blockSize),
766 	next_transaction_id(1),
767 	last_transaction(NULL),
768 	transaction_hash(NULL),
769 	buffer_cache(NULL),
770 	num_dirty_blocks(0),
771 	read_only(readOnly)
772 {
773 }
774 
775 
776 /*! Should be called with the cache's lock held. */
777 block_cache::~block_cache()
778 {
779 	unregister_low_resource_handler(&block_cache::LowMemoryHandler, this);
780 
781 	hash_uninit(transaction_hash);
782 	hash_uninit(hash);
783 
784 	delete_object_cache(buffer_cache);
785 
786 	mutex_destroy(&lock);
787 }
788 
789 
790 status_t
791 block_cache::Init()
792 {
793 	condition_variable.Init(this, "cache transaction sync");
794 	mutex_init(&lock, "block cache");
795 
796 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
797 		8, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
798 	if (buffer_cache == NULL)
799 		return B_NO_MEMORY;
800 
801 	hash = hash_init(1024, offsetof(cached_block, next), &cached_block::Compare,
802 		&cached_block::Hash);
803 	if (hash == NULL)
804 		return B_NO_MEMORY;
805 
806 	transaction_hash = hash_init(16, offsetof(cache_transaction, next),
807 		&transaction_compare, &::transaction_hash);
808 	if (transaction_hash == NULL)
809 		return B_NO_MEMORY;
810 
811 	return register_low_resource_handler(&block_cache::LowMemoryHandler, this,
812 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
813 }
814 
815 
816 void
817 block_cache::Free(void *buffer)
818 {
819 	if (buffer != NULL)
820 		object_cache_free(buffer_cache, buffer);
821 }
822 
823 
824 void *
825 block_cache::Allocate()
826 {
827 	return object_cache_alloc(buffer_cache, 0);
828 }
829 
830 
831 void
832 block_cache::FreeBlock(cached_block *block)
833 {
834 	Free(block->current_data);
835 
836 	if (block->original_data != NULL || block->parent_data != NULL) {
837 		panic("block_cache::FreeBlock(): %Ld, original %p, parent %p\n",
838 			block->block_number, block->original_data, block->parent_data);
839 	}
840 
841 #ifdef DEBUG_CHANGED
842 	Free(block->compare);
843 #endif
844 
845 	object_cache_free(sBlockCache, block);
846 }
847 
848 
849 cached_block *
850 block_cache::_GetUnusedBlock()
851 {
852 	TRACE(("block_cache: get unused block\n"));
853 
854 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
855 			cached_block *block = iterator.Next();) {
856 		TB(Flush(this, block, true));
857 		// this can only happen if no transactions are used
858 		if (block->is_dirty)
859 			write_cached_block(this, block, false);
860 
861 		// remove block from lists
862 		iterator.Remove();
863 		hash_remove(hash, block);
864 
865 		// TODO: see if parent/compare data is handled correctly here!
866 		if (block->parent_data != NULL
867 			&& block->parent_data != block->original_data)
868 			Free(block->parent_data);
869 		if (block->original_data != NULL)
870 			Free(block->original_data);
871 
872 		return block;
873 	}
874 
875 	return NULL;
876 }
877 
878 
879 /*! Allocates a new block for \a blockNumber, ready for use */
880 cached_block *
881 block_cache::NewBlock(off_t blockNumber)
882 {
883 	cached_block *block = (cached_block *)object_cache_alloc(sBlockCache, 0);
884 	if (block == NULL) {
885 		TB(Error(this, blockNumber, "allocation failed"));
886 		dprintf("block allocation failed, unused list is %sempty.\n",
887 			unused_blocks.IsEmpty() ? "" : "not ");
888 
889 		// allocation failed, try to reuse an unused block
890 		block = _GetUnusedBlock();
891 		if (block == NULL) {
892 			TB(Error(this, blockNumber, "get unused failed"));
893 			FATAL(("could not allocate block!\n"));
894 			return NULL;
895 		}
896 	}
897 
898 	block->current_data = Allocate();
899 	if (block->current_data == NULL) {
900 		object_cache_free(sBlockCache, block);
901 		return NULL;
902 	}
903 
904 	block->block_number = blockNumber;
905 	block->ref_count = 0;
906 	block->accessed = 0;
907 	block->transaction_next = NULL;
908 	block->transaction = block->previous_transaction = NULL;
909 	block->original_data = NULL;
910 	block->parent_data = NULL;
911 	block->is_dirty = false;
912 	block->unused = false;
913 #ifdef DEBUG_CHANGED
914 	block->compare = NULL;
915 #endif
916 
917 	return block;
918 }
919 
920 
921 void
922 block_cache::RemoveUnusedBlocks(int32 maxAccessed, int32 count)
923 {
924 	TRACE(("block_cache: remove up to %ld unused blocks\n", count));
925 
926 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
927 			cached_block *block = iterator.Next();) {
928 		if (maxAccessed < block->accessed)
929 			continue;
930 
931 		TB(Flush(this, block));
932 		TRACE(("  remove block %Ld, accessed %ld times\n",
933 			block->block_number, block->accessed));
934 
935 		// this can only happen if no transactions are used
936 		if (block->is_dirty)
937 			write_cached_block(this, block, false);
938 
939 		// remove block from lists
940 		iterator.Remove();
941 		hash_remove(hash, block);
942 
943 		FreeBlock(block);
944 
945 		if (--count <= 0)
946 			break;
947 	}
948 }
949 
950 
951 void
952 block_cache::LowMemoryHandler(void *data, uint32 resources, int32 level)
953 {
954 	block_cache *cache = (block_cache *)data;
955 	MutexLocker locker(&cache->lock);
956 
957 	if (!locker.IsLocked()) {
958 		// If our block_cache were deleted, it could be that we had
959 		// been called before that deletion went through, therefore,
960 		// acquiring its lock might fail.
961 		return;
962 	}
963 
964 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
965 
966 	// free some blocks according to the low memory state
967 	// (if there is enough memory left, we don't free any)
968 
969 	int32 free = 1;
970 	int32 accessed = 1;
971 	switch (low_resource_state(
972 			B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)) {
973 		case B_NO_LOW_RESOURCE:
974 			return;
975 		case B_LOW_RESOURCE_NOTE:
976 			free = 50;
977 			accessed = 2;
978 			break;
979 		case B_LOW_RESOURCE_WARNING:
980 			free = 200;
981 			accessed = 10;
982 			break;
983 		case B_LOW_RESOURCE_CRITICAL:
984 			free = LONG_MAX;
985 			accessed = LONG_MAX;
986 			break;
987 	}
988 
989 	cache->RemoveUnusedBlocks(accessed, free);
990 }
991 
992 
993 //	#pragma mark - private block functions
994 
995 
996 /*!	Removes a reference from the specified \a block. If this was the last
997 	reference, the block is moved into the unused list.
998 	In low memory situations, it will also free some blocks from that list,
999 	but not necessarily the \a block it just released.
1000 */
1001 static void
1002 put_cached_block(block_cache *cache, cached_block *block)
1003 {
1004 #ifdef DEBUG_CHANGED
1005 	if (!block->is_dirty && block->compare != NULL
1006 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1007 		dprintf("new block:\n");
1008 		dump_block((const char *)block->current_data, 256, "  ");
1009 		dprintf("unchanged block:\n");
1010 		dump_block((const char *)block->compare, 256, "  ");
1011 		write_cached_block(cache, block);
1012 		panic("block_cache: supposed to be clean block was changed!\n");
1013 
1014 		cache->Free(block->compare);
1015 		block->compare = NULL;
1016 	}
1017 #endif
1018 	TB(Put(cache, block));
1019 
1020 	if (block->ref_count < 1) {
1021 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1022 		return;
1023 	}
1024 
1025 	if (--block->ref_count == 0
1026 		&& block->transaction == NULL
1027 		&& block->previous_transaction == NULL) {
1028 		// put this block in the list of unused blocks
1029 		block->unused = true;
1030 if (block->original_data != NULL || block->parent_data != NULL)
1031 	panic("put_cached_block(): %p (%Ld): %p, %p\n", block, block->block_number, block->original_data, block->parent_data);
1032 		cache->unused_blocks.Add(block);
1033 //		block->current_data = cache->allocator->Release(block->current_data);
1034 	}
1035 
1036 	// free some blocks according to the low memory state
1037 	// (if there is enough memory left, we don't free any)
1038 
1039 	int32 free = 1;
1040 	switch (low_resource_state(
1041 			B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)) {
1042 		case B_NO_LOW_RESOURCE:
1043 			return;
1044 		case B_LOW_RESOURCE_NOTE:
1045 			free = 1;
1046 			break;
1047 		case B_LOW_RESOURCE_WARNING:
1048 			free = 5;
1049 			break;
1050 		case B_LOW_RESOURCE_CRITICAL:
1051 			free = 20;
1052 			break;
1053 	}
1054 
1055 	cache->RemoveUnusedBlocks(LONG_MAX, free);
1056 }
1057 
1058 
1059 static void
1060 put_cached_block(block_cache *cache, off_t blockNumber)
1061 {
1062 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1063 		panic("put_cached_block: invalid block number %lld (max %lld)",
1064 			blockNumber, cache->max_blocks - 1);
1065 	}
1066 
1067 	cached_block *block = (cached_block *)hash_lookup(cache->hash, &blockNumber);
1068 	if (block != NULL)
1069 		put_cached_block(cache, block);
1070 	else {
1071 		TB(Error(cache, blockNumber, "put unknown"));
1072 	}
1073 }
1074 
1075 
1076 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1077 	there, or reads it from the disk.
1078 
1079 	\param _allocated tells you wether or not a new block has been allocated
1080 		to satisfy your request.
1081 	\param readBlock if \c false, the block will not be read in case it was
1082 		not already in the cache. The block you retrieve may contain random
1083 		data.
1084 */
1085 static cached_block *
1086 get_cached_block(block_cache *cache, off_t blockNumber, bool *_allocated,
1087 	bool readBlock = true)
1088 {
1089 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1090 		panic("get_cached_block: invalid block number %lld (max %lld)",
1091 			blockNumber, cache->max_blocks - 1);
1092 		return NULL;
1093 	}
1094 
1095 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
1096 		&blockNumber);
1097 	*_allocated = false;
1098 
1099 	if (block == NULL) {
1100 		// read block into cache
1101 		block = cache->NewBlock(blockNumber);
1102 		if (block == NULL)
1103 			return NULL;
1104 
1105 		hash_insert_grow(cache->hash, block);
1106 		*_allocated = true;
1107 	}
1108 
1109 	if (*_allocated && readBlock) {
1110 		int32 blockSize = cache->block_size;
1111 
1112 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1113 			block->current_data, blockSize);
1114 		if (bytesRead < blockSize) {
1115 			hash_remove(cache->hash, block);
1116 			cache->FreeBlock(block);
1117 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1118 
1119 			FATAL(("could not read block %Ld: bytesRead: %ld, error: %s\n",
1120 				blockNumber, bytesRead, strerror(errno)));
1121 			return NULL;
1122 		}
1123 		TB(Read(cache, block));
1124 	}
1125 
1126 	if (block->unused) {
1127 		//TRACE(("remove block %Ld from unused\n", blockNumber));
1128 		block->unused = false;
1129 		cache->unused_blocks.Remove(block);
1130 	}
1131 
1132 	block->ref_count++;
1133 	block->accessed++;
1134 
1135 	return block;
1136 }
1137 
1138 
1139 /*!	Returns the writable block data for the requested blockNumber.
1140 	If \a cleared is true, the block is not read from disk; an empty block
1141 	is returned.
1142 
1143 	This is the only method to insert a block into a transaction. It makes
1144 	sure that the previous block contents are preserved in that case.
1145 */
1146 static void *
1147 get_writable_cached_block(block_cache *cache, off_t blockNumber, off_t base,
1148 	off_t length, int32 transactionID, bool cleared)
1149 {
1150 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
1151 		blockNumber, transactionID));
1152 
1153 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1154 		panic("get_writable_cached_block: invalid block number %lld (max %lld)",
1155 			blockNumber, cache->max_blocks - 1);
1156 	}
1157 
1158 	bool allocated;
1159 	cached_block *block = get_cached_block(cache, blockNumber, &allocated,
1160 		!cleared);
1161 	if (block == NULL)
1162 		return NULL;
1163 
1164 	// if there is no transaction support, we just return the current block
1165 	if (transactionID == -1) {
1166 		if (cleared)
1167 			memset(block->current_data, 0, cache->block_size);
1168 
1169 		if (!block->is_dirty)
1170 			cache->num_dirty_blocks++;
1171 
1172 		block->is_dirty = true;
1173 			// mark the block as dirty
1174 
1175 		TB(Get(cache, block));
1176 		return block->current_data;
1177 	}
1178 
1179 	cache_transaction *transaction = block->transaction;
1180 
1181 	if (transaction != NULL && transaction->id != transactionID) {
1182 		// ToDo: we have to wait here until the other transaction is done.
1183 		//	Maybe we should even panic, since we can't prevent any deadlocks.
1184 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
1185 		put_cached_block(cache, block);
1186 		return NULL;
1187 	}
1188 	if (transaction == NULL && transactionID != -1) {
1189 		// get new transaction
1190 		transaction = lookup_transaction(cache, transactionID);
1191 		if (transaction == NULL) {
1192 			panic("get_writable_cached_block(): invalid transaction %ld!\n",
1193 				transactionID);
1194 			put_cached_block(cache, block);
1195 			return NULL;
1196 		}
1197 		if (!transaction->open) {
1198 			panic("get_writable_cached_block(): transaction already done!\n");
1199 			put_cached_block(cache, block);
1200 			return NULL;
1201 		}
1202 
1203 		block->transaction = transaction;
1204 
1205 		// attach the block to the transaction block list
1206 		block->transaction_next = transaction->first_block;
1207 		transaction->first_block = block;
1208 		transaction->num_blocks++;
1209 	}
1210 	if (transaction != NULL)
1211 		transaction->last_used = system_time();
1212 
1213 	bool wasUnchanged = block->original_data == NULL
1214 		|| block->previous_transaction != NULL;
1215 
1216 	if (!(allocated && cleared) && block->original_data == NULL) {
1217 		// we already have data, so we need to preserve it
1218 		block->original_data = cache->Allocate();
1219 		if (block->original_data == NULL) {
1220 			TB(Error(cache, blockNumber, "allocate original failed"));
1221 			FATAL(("could not allocate original_data\n"));
1222 			put_cached_block(cache, block);
1223 			return NULL;
1224 		}
1225 
1226 		memcpy(block->original_data, block->current_data, cache->block_size);
1227 	}
1228 	if (block->parent_data == block->current_data) {
1229 		// remember any previous contents for the parent transaction
1230 		block->parent_data = cache->Allocate();
1231 		if (block->parent_data == NULL) {
1232 			// TODO: maybe we should just continue the current transaction in this case...
1233 			TB(Error(cache, blockNumber, "allocate parent failed"));
1234 			FATAL(("could not allocate parent\n"));
1235 			put_cached_block(cache, block);
1236 			return NULL;
1237 		}
1238 
1239 		memcpy(block->parent_data, block->current_data, cache->block_size);
1240 		transaction->sub_num_blocks++;
1241 	} else if (transaction != NULL && transaction->has_sub_transaction
1242 		&& block->parent_data == NULL && wasUnchanged)
1243 		transaction->sub_num_blocks++;
1244 
1245 	if (cleared)
1246 		memset(block->current_data, 0, cache->block_size);
1247 
1248 	block->is_dirty = true;
1249 	TB(Get(cache, block));
1250 
1251 	return block->current_data;
1252 }
1253 
1254 
1255 /*!	Writes the specified \a block back to disk. It will always only write back
1256 	the oldest change of the block if it is part of more than one transaction.
1257 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1258 	delete transactions when they are no longer used, and \a deleteTransaction
1259 	is \c true.
1260 */
1261 static status_t
1262 write_cached_block(block_cache *cache, cached_block *block,
1263 	bool deleteTransaction)
1264 {
1265 	cache_transaction *previous = block->previous_transaction;
1266 	int32 blockSize = cache->block_size;
1267 
1268 	void *data = previous && block->original_data
1269 		? block->original_data : block->current_data;
1270 		// we first need to write back changes from previous transactions
1271 
1272 	TRACE(("write_cached_block(block %Ld)\n", block->block_number));
1273 	TB(Write(cache, block));
1274 
1275 	ssize_t written = write_pos(cache->fd, block->block_number * blockSize,
1276 		data, blockSize);
1277 
1278 	if (written < blockSize) {
1279 		TB(Error(cache, block->block_number, "write failed", written));
1280 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
1281 			strerror(errno)));
1282 		return B_IO_ERROR;
1283 	}
1284 
1285 	if (cache->num_dirty_blocks > 0)
1286 		cache->num_dirty_blocks--;
1287 
1288 	if (data == block->current_data)
1289 		block->is_dirty = false;
1290 
1291 	if (previous != NULL) {
1292 		previous->blocks.Remove(block);
1293 		block->previous_transaction = NULL;
1294 
1295 		if (block->original_data != NULL && block->transaction == NULL) {
1296 			// This block is not part of a transaction, so it does not need
1297 			// its original pointer anymore.
1298 			cache->Free(block->original_data);
1299 			block->original_data = NULL;
1300 		}
1301 
1302 		// Has the previous transation been finished with that write?
1303 		if (--previous->num_blocks == 0) {
1304 			TRACE(("cache transaction %ld finished!\n", previous->id));
1305 			T(Action("written", cache, previous));
1306 
1307 			notify_transaction_listeners(cache, previous, TRANSACTION_WRITTEN);
1308 
1309 			if (deleteTransaction) {
1310 				hash_remove(cache->transaction_hash, previous);
1311 				delete_transaction(cache, previous);
1312 			}
1313 		}
1314 	}
1315 	if (block->transaction == NULL && block->ref_count == 0) {
1316 		// the block is no longer used
1317 		block->unused = true;
1318 		cache->unused_blocks.Add(block);
1319 	}
1320 
1321 	return B_OK;
1322 }
1323 
1324 
1325 #ifdef DEBUG_BLOCK_CACHE
1326 
1327 static void
1328 dump_block(cached_block *block)
1329 {
1330 	kprintf("%08lx %9Ld %08lx %08lx %08lx %5ld %6ld %c%c%c%c  %08lx "
1331 		"%08lx\n", (addr_t)block, block->block_number,
1332 		(addr_t)block->current_data, (addr_t)block->original_data,
1333 		(addr_t)block->parent_data, block->ref_count, block->accessed,
1334 		block->busy ? 'B' : '-', block->is_writing ? 'W' : '-',
1335 		block->is_dirty ? 'B' : '-', block->unused ? 'U' : '-',
1336 		(addr_t)block->transaction,
1337 		(addr_t)block->previous_transaction);
1338 }
1339 
1340 
1341 static int
1342 dump_cache(int argc, char **argv)
1343 {
1344 	bool showTransactions = false;
1345 	bool showBlocks = false;
1346 	int32 i = 1;
1347 	while (argv[i] != NULL && argv[i][0] == '-') {
1348 		for (char *arg = &argv[i][1]; arg[0]; arg++) {
1349 			switch (arg[0]) {
1350 				case 'b':
1351 					showBlocks = true;
1352 					break;
1353 				case 't':
1354 					showTransactions = true;
1355 					break;
1356 				default:
1357 					print_debugger_command_usage(argv[0]);
1358 					return 0;
1359 			}
1360 		}
1361 		i++;
1362 	}
1363 
1364 	if (i >= argc) {
1365 		print_debugger_command_usage(argv[0]);
1366 		return 0;
1367 	}
1368 
1369 	block_cache *cache = (struct block_cache *)parse_expression(argv[i]);
1370 	if (cache == NULL) {
1371 		kprintf("invalid cache address\n");
1372 		return 0;
1373 	}
1374 
1375 	off_t blockNumber = -1;
1376 	if (i + 1 < argc) {
1377 		blockNumber = parse_expression(argv[i + 1]);
1378 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
1379 			&blockNumber);
1380 		if (block != NULL) {
1381 			kprintf("BLOCK %p\n", block);
1382 			kprintf(" current data:  %p\n", block->current_data);
1383 			kprintf(" original data: %p\n", block->original_data);
1384 			kprintf(" parent data:   %p\n", block->parent_data);
1385 			kprintf(" ref_count:     %ld\n", block->ref_count);
1386 			kprintf(" accessed:      %ld\n", block->accessed);
1387 			kprintf(" flags:        ");
1388 			if (block->is_writing)
1389 				kprintf(" is-writing");
1390 			if (block->is_dirty)
1391 				kprintf(" is-dirty");
1392 			if (block->unused)
1393 				kprintf(" unused");
1394 			kprintf("\n");
1395 			if (block->transaction != NULL) {
1396 				kprintf(" transaction:   %p (%ld)\n", block->transaction,
1397 					block->transaction->id);
1398 				if (block->transaction_next != NULL) {
1399 					kprintf(" next in transaction: %Ld\n",
1400 						block->transaction_next->block_number);
1401 				}
1402 			}
1403 			if (block->previous_transaction != NULL) {
1404 				kprintf(" previous transaction: %p (%ld)\n",
1405 					block->previous_transaction,
1406 					block->previous_transaction->id);
1407 			}
1408 
1409 			set_debug_variable("_current", (addr_t)block->current_data);
1410 			set_debug_variable("_original", (addr_t)block->original_data);
1411 			set_debug_variable("_parent", (addr_t)block->parent_data);
1412 		} else
1413 			kprintf("block %Ld not found\n", blockNumber);
1414 		return 0;
1415 	}
1416 
1417 	kprintf("BLOCK CACHE: %p\n", cache);
1418 
1419 	kprintf(" fd:         %d\n", cache->fd);
1420 	kprintf(" max_blocks: %Ld\n", cache->max_blocks);
1421 	kprintf(" block_size: %lu\n", cache->block_size);
1422 	kprintf(" next_transaction_id: %ld\n", cache->next_transaction_id);
1423 
1424 	if (!cache->pending_notifications.IsEmpty()) {
1425 		kprintf(" pending notifications:\n");
1426 
1427 		NotificationList::Iterator iterator
1428 			= cache->pending_notifications.GetIterator();
1429 		while (iterator.HasNext()) {
1430 			cache_notification *notification = iterator.Next();
1431 
1432 			kprintf("  %p %5lx %p - %p\n", notification,
1433 				notification->events_pending, notification->hook,
1434 				notification->data);
1435 		}
1436 	}
1437 
1438 	if (showTransactions) {
1439 		kprintf(" transactions:\n");
1440 		kprintf("address       id state  blocks  main   sub\n");
1441 
1442 		hash_iterator iterator;
1443 		hash_open(cache->transaction_hash, &iterator);
1444 
1445 		cache_transaction *transaction;
1446 		while ((transaction = (cache_transaction *)hash_next(
1447 				cache->transaction_hash, &iterator)) != NULL) {
1448 			kprintf("%p %5ld %-7s %5ld %5ld %5ld\n", transaction,
1449 				transaction->id, transaction->open ? "open" : "closed",
1450 				transaction->num_blocks, transaction->main_num_blocks,
1451 				transaction->sub_num_blocks);
1452 		}
1453 	}
1454 
1455 	if (showBlocks) {
1456 		kprintf(" blocks:\n");
1457 		kprintf("address  block no. current  original parent    refs access "
1458 			"flags transact prev. trans\n");
1459 	}
1460 
1461 	uint32 referenced = 0;
1462 	uint32 count = 0;
1463 	uint32 dirty = 0;
1464 	hash_iterator iterator;
1465 	hash_open(cache->hash, &iterator);
1466 	cached_block *block;
1467 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
1468 		if (showBlocks)
1469 			dump_block(block);
1470 
1471 		if (block->is_dirty)
1472 			dirty++;
1473 		if (block->ref_count)
1474 			referenced++;
1475 		count++;
1476 	}
1477 
1478 	kprintf(" %ld blocks total, %ld dirty, %ld referenced, %ld in unused.\n", count, dirty,
1479 		referenced, cache->unused_blocks.Size());
1480 
1481 	hash_close(cache->hash, &iterator, false);
1482 	return 0;
1483 }
1484 
1485 
1486 static int
1487 dump_transaction(int argc, char **argv)
1488 {
1489 	bool showBlocks = false;
1490 	int i = 1;
1491 	if (argc > 1 && !strcmp(argv[1], "-b")) {
1492 		showBlocks = true;
1493 		i++;
1494 	}
1495 
1496 	if (argc - i < 1 || argc - i > 2) {
1497 		print_debugger_command_usage(argv[0]);
1498 		return 0;
1499 	}
1500 
1501 	cache_transaction *transaction = NULL;
1502 
1503 	if (argc - i == 1) {
1504 		transaction = (cache_transaction *)parse_expression(argv[i]);
1505 	} else {
1506 		block_cache *cache = (block_cache *)parse_expression(argv[i]);
1507 		int32 id = parse_expression(argv[i + 1]);
1508 		transaction = lookup_transaction(cache, id);
1509 		if (transaction == NULL) {
1510 			kprintf("No transaction with ID %ld found.\n", id);
1511 			return 0;
1512 		}
1513 	}
1514 
1515 	kprintf("TRANSACTION %p\n", transaction);
1516 
1517 	kprintf(" id:             %ld\n", transaction->id);
1518 	kprintf(" num block:      %ld\n", transaction->num_blocks);
1519 	kprintf(" main num block: %ld\n", transaction->main_num_blocks);
1520 	kprintf(" sub num block:  %ld\n", transaction->sub_num_blocks);
1521 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
1522 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
1523 	kprintf(" idle:           %Ld secs\n",
1524 		(system_time() - transaction->last_used) / 1000000);
1525 
1526 	kprintf(" listeners:\n");
1527 
1528 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
1529 	while (iterator.HasNext()) {
1530 		cache_listener *listener = iterator.Next();
1531 
1532 		kprintf("  %p %5lx %p - %p\n", listener, listener->events_pending,
1533 			listener->hook, listener->data);
1534 	}
1535 
1536 	if (!showBlocks)
1537 		return 0;
1538 
1539 	kprintf(" blocks:\n");
1540 	kprintf("address  block no. current  original parent    refs access "
1541 		"flags transact prev. trans\n");
1542 
1543 	cached_block *block = transaction->first_block;
1544 	while (block != NULL) {
1545 		dump_block(block);
1546 		block = block->transaction_next;
1547 	}
1548 
1549 	kprintf("--\n");
1550 
1551 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1552 	while (blockIterator.HasNext()) {
1553 		block = blockIterator.Next();
1554 		dump_block(block);
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 
1561 static int
1562 dump_caches(int argc, char **argv)
1563 {
1564 	kprintf("Block caches:\n");
1565 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
1566 	while (i.HasNext()) {
1567 		block_cache *cache = i.Next();
1568 		if (cache == (block_cache *)&sMarkCache)
1569 			continue;
1570 
1571 		kprintf("  %p\n", cache);
1572 	}
1573 
1574 	return 0;
1575 }
1576 
1577 #endif	// DEBUG_BLOCK_CACHE
1578 
1579 
1580 /*!	Traverses through the block_cache list, and returns one cache after the
1581 	other. The cache returned is automatically locked when you get it, and
1582 	unlocked with the next call to this function. Ignores caches that are in
1583 	deletion state.
1584 	Returns \c NULL when the end of the list is reached.
1585 */
1586 static block_cache *
1587 get_next_locked_block_cache(block_cache *last)
1588 {
1589 	MutexLocker _(sCachesLock);
1590 
1591 	block_cache *cache;
1592 	if (last != NULL) {
1593 		mutex_unlock(&last->lock);
1594 
1595 		cache = sCaches.GetNext((block_cache *)&sMarkCache);
1596 		sCaches.Remove((block_cache *)&sMarkCache);
1597 	} else
1598 		cache = sCaches.Head();
1599 
1600 	while (cache != NULL) {
1601 		cache = sCaches.GetNext(cache);
1602 		if (cache == NULL)
1603 			break;
1604 
1605 		mutex_lock(&cache->lock);
1606 		break;
1607 	}
1608 
1609 	if (cache != NULL)
1610 		sCaches.Insert(sCaches.GetNext(cache), (block_cache *)&sMarkCache);
1611 
1612 	return cache;
1613 }
1614 
1615 
1616 /*!	Background thread that continuously checks for pending notifications of
1617 	all caches.
1618 	Every two seconds, it will also write back up to 64 blocks per cache.
1619 */
1620 static status_t
1621 block_notifier_and_writer(void */*data*/)
1622 {
1623 	const bigtime_t kTimeout = 2000000LL;
1624 	bigtime_t timeout = kTimeout;
1625 
1626 	while (true) {
1627 		bigtime_t start = system_time();
1628 
1629 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
1630 			B_RELATIVE_TIMEOUT, timeout);
1631 		if (status == B_OK) {
1632 			flush_pending_notifications();
1633 			timeout -= system_time() - start;
1634 			continue;
1635 		}
1636 
1637 		// write 64 blocks of each block_cache every two seconds
1638 		// TODO: change this once we have an I/O scheduler
1639 		timeout = kTimeout;
1640 
1641 		block_cache *cache = NULL;
1642 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
1643 			const uint32 kMaxCount = 64;
1644 			cached_block *blocks[kMaxCount];
1645 			uint32 count = 0;
1646 
1647 			if (cache->num_dirty_blocks) {
1648 				// This cache is not using transactions, we'll scan the blocks
1649 				// directly
1650 				hash_iterator iterator;
1651 				hash_open(cache->hash, &iterator);
1652 
1653 				cached_block *block;
1654 				while (count < kMaxCount
1655 					&& (block = (cached_block *)hash_next(cache->hash,
1656 							&iterator)) != NULL) {
1657 					if (block->is_dirty)
1658 						blocks[count++] = block;
1659 				}
1660 
1661 				hash_close(cache->hash, &iterator, false);
1662 			} else {
1663 				hash_iterator iterator;
1664 				hash_open(cache->transaction_hash, &iterator);
1665 
1666 				cache_transaction *transaction;
1667 				while ((transaction = (cache_transaction *)hash_next(
1668 						cache->transaction_hash, &iterator)) != NULL
1669 					&& count < kMaxCount) {
1670 					if (transaction->open) {
1671 						if (system_time() > transaction->last_used
1672 								+ kTransactionIdleTime) {
1673 							// Transaction is open but idle
1674 							notify_transaction_listeners(cache, transaction,
1675 								TRANSACTION_IDLE);
1676 						}
1677 						continue;
1678 					}
1679 
1680 					// sort blocks to speed up writing them back
1681 					// TODO: ideally, this should be handled by the I/O scheduler
1682 					block_list::Iterator iterator
1683 						= transaction->blocks.GetIterator();
1684 
1685 					for (; count < kMaxCount && iterator.HasNext(); count++) {
1686 						blocks[count] = iterator.Next();
1687 					}
1688 				}
1689 
1690 				hash_close(cache->transaction_hash, &iterator, false);
1691 			}
1692 
1693 			qsort(blocks, count, sizeof(void *), &compare_blocks);
1694 
1695 			for (uint32 i = 0; i < count; i++) {
1696 				if (write_cached_block(cache, blocks[i], true) != B_OK)
1697 					break;
1698 			}
1699 		}
1700 	}
1701 }
1702 
1703 
1704 /*!	Notify function for wait_for_notifications(). */
1705 static void
1706 notify_sync(int32 transactionID, int32 event, void *_cache)
1707 {
1708 	block_cache *cache = (block_cache *)_cache;
1709 
1710 	cache->condition_variable.NotifyOne();
1711 }
1712 
1713 
1714 /*!	Waits until all pending notifications are carried out.
1715 	Safe to be called from the block writer/notifier thread.
1716 	You must not hold the \a cache lock when calling this function.
1717 */
1718 static void
1719 wait_for_notifications(block_cache *cache)
1720 {
1721 	if (find_thread(NULL) == sNotifierWriterThread) {
1722 		// We're the notifier thread, don't wait, but flush all pending
1723 		// notifications directly.
1724 		flush_pending_notifications(cache);
1725 		return;
1726 	}
1727 
1728 	// add sync notification
1729 	cache_notification notification;
1730 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
1731 		cache);
1732 
1733 	ConditionVariableEntry entry;
1734 	cache->condition_variable.Add(&entry);
1735 
1736 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
1737 
1738 	// wait for notification hook to be called
1739 	entry.Wait();
1740 }
1741 
1742 
1743 extern "C" status_t
1744 block_cache_init(void)
1745 {
1746 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
1747 		8, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1748 	if (sBlockCache == NULL)
1749 		return B_NO_MEMORY;
1750 
1751 	new (&sCaches) DoublyLinkedList<block_cache>;
1752 		// manually call constructor
1753 
1754 	sEventSemaphore = create_sem(0, "block cache event");
1755 	if (sEventSemaphore < B_OK)
1756 		return sEventSemaphore;
1757 
1758 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
1759 		"block notifier/writer", B_LOW_PRIORITY, NULL);
1760 	if (sNotifierWriterThread >= B_OK)
1761 		send_signal_etc(sNotifierWriterThread, SIGCONT, B_DO_NOT_RESCHEDULE);
1762 
1763 #ifdef DEBUG_BLOCK_CACHE
1764 	add_debugger_command_etc("block_caches", &dump_caches,
1765 		"dumps all block caches", "\n", 0);
1766 	add_debugger_command_etc("block_cache", &dump_cache,
1767 		"dumps a specific block cache",
1768 		"[-bt] <cache-address> [block-number]\n"
1769 		"  -t lists the transactions\n"
1770 		"  -b lists all blocks\n", 0);
1771 	add_debugger_command_etc("transaction", &dump_transaction,
1772 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
1773 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
1774 		"  -b lists all blocks that are part of this transaction\n", 0);
1775 #endif
1776 
1777 	return B_OK;
1778 }
1779 
1780 
1781 extern "C" size_t
1782 block_cache_used_memory()
1783 {
1784 	size_t usedMemory = 0;
1785 
1786 	MutexLocker _(sCachesLock);
1787 
1788 	DoublyLinkedList<block_cache>::Iterator it = sCaches.GetIterator();
1789 	while (block_cache* cache = it.Next()) {
1790 		if (cache == (block_cache*)&sMarkCache)
1791 			continue;
1792 
1793 		size_t cacheUsedMemory;
1794 		object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
1795 		usedMemory += cacheUsedMemory;
1796 	}
1797 
1798 	return usedMemory;
1799 }
1800 
1801 
1802 //	#pragma mark - public transaction API
1803 
1804 
1805 extern "C" int32
1806 cache_start_transaction(void *_cache)
1807 {
1808 	block_cache *cache = (block_cache *)_cache;
1809 	MutexLocker locker(&cache->lock);
1810 
1811 	if (cache->last_transaction && cache->last_transaction->open) {
1812 		panic("last transaction (%ld) still open!\n",
1813 			cache->last_transaction->id);
1814 	}
1815 
1816 	cache_transaction *transaction = new(nothrow) cache_transaction;
1817 	if (transaction == NULL)
1818 		return B_NO_MEMORY;
1819 
1820 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
1821 	cache->last_transaction = transaction;
1822 
1823 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
1824 	T(Action("start", cache, transaction));
1825 
1826 	hash_insert_grow(cache->transaction_hash, transaction);
1827 
1828 	return transaction->id;
1829 }
1830 
1831 
1832 extern "C" status_t
1833 cache_sync_transaction(void *_cache, int32 id)
1834 {
1835 	block_cache *cache = (block_cache *)_cache;
1836 	MutexLocker locker(&cache->lock);
1837 	status_t status = B_ENTRY_NOT_FOUND;
1838 
1839 	TRACE(("cache_sync_transaction(id %ld)\n", id));
1840 
1841 	hash_iterator iterator;
1842 	hash_open(cache->transaction_hash, &iterator);
1843 
1844 	cache_transaction *transaction;
1845 	while ((transaction = (cache_transaction *)hash_next(
1846 			cache->transaction_hash, &iterator)) != NULL) {
1847 		// close all earlier transactions which haven't been closed yet
1848 
1849 		if (transaction->id <= id && !transaction->open) {
1850 			// write back all of their remaining dirty blocks
1851 			T(Action("sync", cache, transaction));
1852 			while (transaction->num_blocks > 0) {
1853 				// sort blocks to speed up writing them back
1854 				// TODO: ideally, this should be handled by the I/O scheduler
1855 				block_list::Iterator iterator = transaction->blocks.GetIterator();
1856 				uint32 maxCount = transaction->num_blocks;
1857 				cached_block *buffer[16];
1858 				cached_block **blocks = (cached_block **)malloc(maxCount
1859 					* sizeof(void *));
1860 				if (blocks == NULL) {
1861 					maxCount = 16;
1862 					blocks = buffer;
1863 				}
1864 
1865 				uint32 count = 0;
1866 				for (; count < maxCount && iterator.HasNext(); count++) {
1867 					blocks[count] = iterator.Next();
1868 				}
1869 				qsort(blocks, count, sizeof(void *), &compare_blocks);
1870 
1871 				for (uint32 i = 0; i < count; i++) {
1872 					status = write_cached_block(cache, blocks[i], false);
1873 					if (status != B_OK)
1874 						break;
1875 				}
1876 
1877 				if (blocks != buffer)
1878 					free(blocks);
1879 
1880 				if (status != B_OK)
1881 					return status;
1882 			}
1883 
1884 			hash_remove_current(cache->transaction_hash, &iterator);
1885 			delete_transaction(cache, transaction);
1886 		}
1887 	}
1888 
1889 	hash_close(cache->transaction_hash, &iterator, false);
1890 	locker.Unlock();
1891 
1892 	wait_for_notifications(cache);
1893 		// make sure that all pending TRANSACTION_WRITTEN notifications
1894 		// are handled after we return
1895 	return B_OK;
1896 }
1897 
1898 
1899 extern "C" status_t
1900 cache_end_transaction(void *_cache, int32 id,
1901 	transaction_notification_hook hook, void *data)
1902 {
1903 	block_cache *cache = (block_cache *)_cache;
1904 	MutexLocker locker(&cache->lock);
1905 
1906 	TRACE(("cache_end_transaction(id = %ld)\n", id));
1907 
1908 	cache_transaction *transaction = lookup_transaction(cache, id);
1909 	if (transaction == NULL) {
1910 		panic("cache_end_transaction(): invalid transaction ID\n");
1911 		return B_BAD_VALUE;
1912 	}
1913 
1914 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
1915 
1916 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
1917 			data) != B_OK) {
1918 		return B_NO_MEMORY;
1919 	}
1920 
1921 	T(Action("end", cache, transaction));
1922 
1923 	// iterate through all blocks and free the unchanged original contents
1924 
1925 	cached_block *block = transaction->first_block, *next;
1926 	for (; block != NULL; block = next) {
1927 		next = block->transaction_next;
1928 
1929 		if (block->previous_transaction != NULL) {
1930 			// need to write back pending changes
1931 			write_cached_block(cache, block);
1932 		}
1933 
1934 		if (block->original_data != NULL) {
1935 			cache->Free(block->original_data);
1936 			block->original_data = NULL;
1937 		}
1938 		if (transaction->has_sub_transaction) {
1939 			if (block->parent_data != block->current_data)
1940 				cache->Free(block->parent_data);
1941 			block->parent_data = NULL;
1942 		}
1943 
1944 		// move the block to the previous transaction list
1945 		transaction->blocks.Add(block);
1946 
1947 		block->previous_transaction = transaction;
1948 		block->transaction_next = NULL;
1949 		block->transaction = NULL;
1950 	}
1951 
1952 	transaction->open = false;
1953 	return B_OK;
1954 }
1955 
1956 
1957 extern "C" status_t
1958 cache_abort_transaction(void *_cache, int32 id)
1959 {
1960 	block_cache *cache = (block_cache *)_cache;
1961 	MutexLocker locker(&cache->lock);
1962 
1963 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
1964 
1965 	cache_transaction *transaction = lookup_transaction(cache, id);
1966 	if (transaction == NULL) {
1967 		panic("cache_abort_transaction(): invalid transaction ID\n");
1968 		return B_BAD_VALUE;
1969 	}
1970 
1971 	T(Abort(cache, transaction));
1972 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
1973 
1974 	// iterate through all blocks and restore their original contents
1975 
1976 	cached_block *block = transaction->first_block, *next;
1977 	for (; block != NULL; block = next) {
1978 		next = block->transaction_next;
1979 
1980 		if (block->original_data != NULL) {
1981 			TRACE(("cache_abort_transaction(id = %ld): restored contents of block %Ld\n",
1982 				transaction->id, block->block_number));
1983 			memcpy(block->current_data, block->original_data, cache->block_size);
1984 			cache->Free(block->original_data);
1985 			block->original_data = NULL;
1986 		}
1987 		if (transaction->has_sub_transaction) {
1988 			if (block->parent_data != block->current_data)
1989 				cache->Free(block->parent_data);
1990 			block->parent_data = NULL;
1991 		}
1992 
1993 		block->transaction_next = NULL;
1994 		block->transaction = NULL;
1995 	}
1996 
1997 	hash_remove(cache->transaction_hash, transaction);
1998 	delete_transaction(cache, transaction);
1999 	return B_OK;
2000 }
2001 
2002 
2003 /*!	Acknowledges the current parent transaction, and starts a new transaction
2004 	from its sub transaction.
2005 	The new transaction also gets a new transaction ID.
2006 */
2007 extern "C" int32
2008 cache_detach_sub_transaction(void *_cache, int32 id,
2009 	transaction_notification_hook hook, void *data)
2010 {
2011 	block_cache *cache = (block_cache *)_cache;
2012 	MutexLocker locker(&cache->lock);
2013 
2014 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
2015 
2016 	cache_transaction *transaction = lookup_transaction(cache, id);
2017 	if (transaction == NULL) {
2018 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2019 		return B_BAD_VALUE;
2020 	}
2021 	if (!transaction->has_sub_transaction)
2022 		return B_BAD_VALUE;
2023 
2024 	// create a new transaction for the sub transaction
2025 	cache_transaction *newTransaction = new(nothrow) cache_transaction;
2026 	if (transaction == NULL)
2027 		return B_NO_MEMORY;
2028 
2029 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2030 	T(Detach(cache, transaction, newTransaction));
2031 
2032 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2033 
2034 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2035 			data) != B_OK) {
2036 		delete newTransaction;
2037 		return B_NO_MEMORY;
2038 	}
2039 
2040 	// iterate through all blocks and free the unchanged original contents
2041 
2042 	cached_block *block = transaction->first_block, *next, *last = NULL;
2043 	for (; block != NULL; block = next) {
2044 		next = block->transaction_next;
2045 
2046 		if (block->previous_transaction != NULL) {
2047 			// need to write back pending changes
2048 			write_cached_block(cache, block);
2049 		}
2050 
2051 		if (block->original_data != NULL && block->parent_data != NULL
2052 			&& block->parent_data != block->current_data) {
2053 			// free the original data if the parent data of the transaction
2054 			// will be made current - but keep them otherwise
2055 			cache->Free(block->original_data);
2056 			block->original_data = NULL;
2057 		}
2058 		if (block->parent_data == NULL
2059 			|| block->parent_data != block->current_data) {
2060 			// we need to move this block over to the new transaction
2061 			block->original_data = block->parent_data;
2062 			if (last == NULL)
2063 				newTransaction->first_block = block;
2064 			else
2065 				last->transaction_next = block;
2066 
2067 			block->transaction = newTransaction;
2068 			last = block;
2069 		} else
2070 			block->transaction = NULL;
2071 
2072 		if (block->parent_data != NULL) {
2073 			// move the block to the previous transaction list
2074 			transaction->blocks.Add(block);
2075 			block->parent_data = NULL;
2076 		}
2077 
2078 		block->previous_transaction = transaction;
2079 		block->transaction_next = NULL;
2080 	}
2081 
2082 	newTransaction->num_blocks = transaction->sub_num_blocks;
2083 
2084 	transaction->open = false;
2085 	transaction->has_sub_transaction = false;
2086 	transaction->num_blocks = transaction->main_num_blocks;
2087 	transaction->sub_num_blocks = 0;
2088 
2089 	hash_insert_grow(cache->transaction_hash, newTransaction);
2090 	cache->last_transaction = newTransaction;
2091 
2092 	return newTransaction->id;
2093 }
2094 
2095 
2096 extern "C" status_t
2097 cache_abort_sub_transaction(void *_cache, int32 id)
2098 {
2099 	block_cache *cache = (block_cache *)_cache;
2100 	MutexLocker locker(&cache->lock);
2101 
2102 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
2103 
2104 	cache_transaction *transaction = lookup_transaction(cache, id);
2105 	if (transaction == NULL) {
2106 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
2107 		return B_BAD_VALUE;
2108 	}
2109 	if (!transaction->has_sub_transaction)
2110 		return B_BAD_VALUE;
2111 
2112 	T(Abort(cache, transaction));
2113 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2114 
2115 	// revert all changes back to the version of the parent
2116 
2117 	cached_block *block = transaction->first_block, *next;
2118 	for (; block != NULL; block = next) {
2119 		next = block->transaction_next;
2120 
2121 		if (block->parent_data == NULL) {
2122 			if (block->original_data != NULL) {
2123 				// the parent transaction didn't change the block, but the sub
2124 				// transaction did - we need to revert from the original data
2125 				memcpy(block->current_data, block->original_data,
2126 					cache->block_size);
2127 			}
2128 		} else if (block->parent_data != block->current_data) {
2129 			// the block has been changed and must be restored
2130 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents of block %Ld\n",
2131 				transaction->id, block->block_number));
2132 			memcpy(block->current_data, block->parent_data, cache->block_size);
2133 			cache->Free(block->parent_data);
2134 		}
2135 
2136 		block->parent_data = NULL;
2137 	}
2138 
2139 	// all subsequent changes will go into the main transaction
2140 	transaction->has_sub_transaction = false;
2141 	transaction->sub_num_blocks = 0;
2142 
2143 	return B_OK;
2144 }
2145 
2146 
2147 extern "C" status_t
2148 cache_start_sub_transaction(void *_cache, int32 id)
2149 {
2150 	block_cache *cache = (block_cache *)_cache;
2151 	MutexLocker locker(&cache->lock);
2152 
2153 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
2154 
2155 	cache_transaction *transaction = lookup_transaction(cache, id);
2156 	if (transaction == NULL) {
2157 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n", id);
2158 		return B_BAD_VALUE;
2159 	}
2160 
2161 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2162 
2163 	// move all changed blocks up to the parent
2164 
2165 	cached_block *block = transaction->first_block, *next;
2166 	for (; block != NULL; block = next) {
2167 		next = block->transaction_next;
2168 
2169 		if (transaction->has_sub_transaction
2170 			&& block->parent_data != NULL
2171 			&& block->parent_data != block->current_data) {
2172 			// there already is an older sub transaction - we acknowledge
2173 			// its changes and move its blocks up to the parent
2174 			cache->Free(block->parent_data);
2175 		}
2176 
2177 		// we "allocate" the parent data lazily, that means, we don't copy
2178 		// the data (and allocate memory for it) until we need to
2179 		block->parent_data = block->current_data;
2180 	}
2181 
2182 	// all subsequent changes will go into the sub transaction
2183 	transaction->has_sub_transaction = true;
2184 	transaction->main_num_blocks = transaction->num_blocks;
2185 	transaction->sub_num_blocks = 0;
2186 	T(Action("start-sub", cache, transaction));
2187 
2188 	return B_OK;
2189 }
2190 
2191 
2192 /*!	Adds a transaction listener that gets notified when the transaction
2193 	is ended, aborted, written, or idle as specified by \a events.
2194 	The listener gets automatically removed when the transaction ends.
2195 */
2196 status_t
2197 cache_add_transaction_listener(void *_cache, int32 id, int32 events,
2198 	transaction_notification_hook hook, void *data)
2199 {
2200 	block_cache *cache = (block_cache *)_cache;
2201 
2202 	MutexLocker locker(&cache->lock);
2203 
2204 	cache_transaction *transaction = lookup_transaction(cache, id);
2205 	if (transaction == NULL)
2206 		return B_BAD_VALUE;
2207 
2208 	return add_transaction_listener(cache, transaction, events, hook, data);
2209 }
2210 
2211 
2212 status_t
2213 cache_remove_transaction_listener(void *_cache, int32 id,
2214 	transaction_notification_hook hookFunction, void *data)
2215 {
2216 	block_cache *cache = (block_cache *)_cache;
2217 
2218 	MutexLocker locker(&cache->lock);
2219 
2220 	cache_transaction *transaction = lookup_transaction(cache, id);
2221 	if (transaction == NULL)
2222 		return B_BAD_VALUE;
2223 
2224 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2225 	while (iterator.HasNext()) {
2226 		cache_listener *listener = iterator.Next();
2227 		if (listener->data == data && listener->hook == hookFunction) {
2228 			iterator.Remove();
2229 
2230 			if (listener->events_pending != 0) {
2231 				MutexLocker _(sNotificationsLock);
2232 				if (listener->events_pending != 0)
2233 					cache->pending_notifications.Remove(listener);
2234 			}
2235 			delete listener;
2236 			return B_OK;
2237 		}
2238 	}
2239 
2240 	return B_ENTRY_NOT_FOUND;
2241 }
2242 
2243 
2244 extern "C" status_t
2245 cache_next_block_in_transaction(void *_cache, int32 id, bool mainOnly,
2246 	long *_cookie, off_t *_blockNumber, void **_data, void **_unchangedData)
2247 {
2248 	cached_block *block = (cached_block *)*_cookie;
2249 	block_cache *cache = (block_cache *)_cache;
2250 
2251 	MutexLocker locker(&cache->lock);
2252 
2253 	cache_transaction *transaction = lookup_transaction(cache, id);
2254 	if (transaction == NULL || !transaction->open)
2255 		return B_BAD_VALUE;
2256 
2257 	if (block == NULL)
2258 		block = transaction->first_block;
2259 	else
2260 		block = block->transaction_next;
2261 
2262 	if (mainOnly && transaction->has_sub_transaction) {
2263 		// find next block that the parent changed
2264 		while (block != NULL && block->parent_data == NULL)
2265 			block = block->transaction_next;
2266 	}
2267 
2268 	if (block == NULL)
2269 		return B_ENTRY_NOT_FOUND;
2270 
2271 	if (_blockNumber)
2272 		*_blockNumber = block->block_number;
2273 	if (_data)
2274 		*_data = mainOnly ? block->parent_data : block->current_data;
2275 	if (_unchangedData)
2276 		*_unchangedData = block->original_data;
2277 
2278 	*_cookie = (addr_t)block;
2279 	return B_OK;
2280 }
2281 
2282 
2283 extern "C" int32
2284 cache_blocks_in_transaction(void *_cache, int32 id)
2285 {
2286 	block_cache *cache = (block_cache *)_cache;
2287 	MutexLocker locker(&cache->lock);
2288 
2289 	cache_transaction *transaction = lookup_transaction(cache, id);
2290 	if (transaction == NULL)
2291 		return B_BAD_VALUE;
2292 
2293 	return transaction->num_blocks;
2294 }
2295 
2296 
2297 extern "C" int32
2298 cache_blocks_in_main_transaction(void *_cache, int32 id)
2299 {
2300 	block_cache *cache = (block_cache *)_cache;
2301 	MutexLocker locker(&cache->lock);
2302 
2303 	cache_transaction *transaction = lookup_transaction(cache, id);
2304 	if (transaction == NULL)
2305 		return B_BAD_VALUE;
2306 
2307 	return transaction->main_num_blocks;
2308 }
2309 
2310 
2311 extern "C" int32
2312 cache_blocks_in_sub_transaction(void *_cache, int32 id)
2313 {
2314 	block_cache *cache = (block_cache *)_cache;
2315 	MutexLocker locker(&cache->lock);
2316 
2317 	cache_transaction *transaction = lookup_transaction(cache, id);
2318 	if (transaction == NULL)
2319 		return B_BAD_VALUE;
2320 
2321 	return transaction->sub_num_blocks;
2322 }
2323 
2324 
2325 //	#pragma mark - public block cache API
2326 
2327 
2328 extern "C" void
2329 block_cache_delete(void *_cache, bool allowWrites)
2330 {
2331 	block_cache *cache = (block_cache *)_cache;
2332 
2333 	if (allowWrites)
2334 		block_cache_sync(cache);
2335 
2336 	mutex_lock(&sCachesLock);
2337 	sCaches.Remove(cache);
2338 	mutex_unlock(&sCachesLock);
2339 
2340 	mutex_lock(&cache->lock);
2341 
2342 	// free all blocks
2343 
2344 	uint32 cookie = 0;
2345 	cached_block *block;
2346 	while ((block = (cached_block *)hash_remove_first(cache->hash,
2347 			&cookie)) != NULL) {
2348 		cache->FreeBlock(block);
2349 	}
2350 
2351 	// free all transactions (they will all be aborted)
2352 
2353 	cookie = 0;
2354 	cache_transaction *transaction;
2355 	while ((transaction = (cache_transaction *)hash_remove_first(
2356 			cache->transaction_hash, &cookie)) != NULL) {
2357 		delete transaction;
2358 	}
2359 
2360 	delete cache;
2361 }
2362 
2363 
2364 extern "C" void *
2365 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
2366 {
2367 	block_cache *cache = new(nothrow) block_cache(fd, numBlocks, blockSize,
2368 		readOnly);
2369 	if (cache == NULL)
2370 		return NULL;
2371 
2372 	if (cache->Init() != B_OK) {
2373 		delete cache;
2374 		return NULL;
2375 	}
2376 
2377 	MutexLocker _(sCachesLock);
2378 	sCaches.Add(cache);
2379 
2380 	return cache;
2381 }
2382 
2383 
2384 extern "C" status_t
2385 block_cache_sync(void *_cache)
2386 {
2387 	block_cache *cache = (block_cache *)_cache;
2388 
2389 	// we will sync all dirty blocks to disk that have a completed
2390 	// transaction or no transaction only
2391 
2392 	MutexLocker locker(&cache->lock);
2393 	hash_iterator iterator;
2394 	hash_open(cache->hash, &iterator);
2395 
2396 	cached_block *block;
2397 	while ((block = (cached_block *)hash_next(cache->hash, &iterator)) != NULL) {
2398 		if (block->previous_transaction != NULL
2399 			|| (block->transaction == NULL && block->is_dirty)) {
2400 			status_t status = write_cached_block(cache, block);
2401 			if (status != B_OK)
2402 				return status;
2403 		}
2404 	}
2405 
2406 	hash_close(cache->hash, &iterator, false);
2407 	locker.Unlock();
2408 
2409 	wait_for_notifications(cache);
2410 		// make sure that all pending TRANSACTION_WRITTEN notifications
2411 		// are handled after we return
2412 	return B_OK;
2413 }
2414 
2415 
2416 extern "C" status_t
2417 block_cache_sync_etc(void *_cache, off_t blockNumber, size_t numBlocks)
2418 {
2419 	block_cache *cache = (block_cache *)_cache;
2420 
2421 	// we will sync all dirty blocks to disk that have a completed
2422 	// transaction or no transaction only
2423 
2424 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
2425 		panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
2426 			blockNumber, cache->max_blocks - 1);
2427 		return B_BAD_VALUE;
2428 	}
2429 
2430 	MutexLocker locker(&cache->lock);
2431 
2432 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
2433 		cached_block *block = (cached_block *)hash_lookup(cache->hash,
2434 			&blockNumber);
2435 		if (block == NULL)
2436 			continue;
2437 
2438 		// TODO: sort blocks!
2439 
2440 		if (block->previous_transaction != NULL
2441 			|| (block->transaction == NULL && block->is_dirty)) {
2442 			status_t status = write_cached_block(cache, block);
2443 			if (status != B_OK)
2444 				return status;
2445 		}
2446 	}
2447 
2448 	locker.Unlock();
2449 
2450 	wait_for_notifications(cache);
2451 		// make sure that all pending TRANSACTION_WRITTEN notifications
2452 		// are handled after we return
2453 	return B_OK;
2454 }
2455 
2456 
2457 extern "C" status_t
2458 block_cache_make_writable(void *_cache, off_t blockNumber, int32 transaction)
2459 {
2460 	block_cache *cache = (block_cache *)_cache;
2461 	MutexLocker locker(&cache->lock);
2462 
2463 	if (cache->read_only)
2464 		panic("tried to make block writable on a read-only cache!");
2465 
2466 	// ToDo: this can be done better!
2467 	void *block = get_writable_cached_block(cache, blockNumber,
2468 		blockNumber, 1, transaction, false);
2469 	if (block != NULL) {
2470 		put_cached_block((block_cache *)_cache, blockNumber);
2471 		return B_OK;
2472 	}
2473 
2474 	return B_ERROR;
2475 }
2476 
2477 
2478 extern "C" void *
2479 block_cache_get_writable_etc(void *_cache, off_t blockNumber, off_t base,
2480 	off_t length, int32 transaction)
2481 {
2482 	block_cache *cache = (block_cache *)_cache;
2483 	MutexLocker locker(&cache->lock);
2484 
2485 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
2486 		blockNumber, transaction));
2487 	if (cache->read_only)
2488 		panic("tried to get writable block on a read-only cache!");
2489 
2490 	return get_writable_cached_block(cache, blockNumber, base, length,
2491 		transaction, false);
2492 }
2493 
2494 
2495 extern "C" void *
2496 block_cache_get_writable(void *_cache, off_t blockNumber, int32 transaction)
2497 {
2498 	return block_cache_get_writable_etc(_cache, blockNumber,
2499 		blockNumber, 1, transaction);
2500 }
2501 
2502 
2503 extern "C" void *
2504 block_cache_get_empty(void *_cache, off_t blockNumber, int32 transaction)
2505 {
2506 	block_cache *cache = (block_cache *)_cache;
2507 	MutexLocker locker(&cache->lock);
2508 
2509 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
2510 		blockNumber, transaction));
2511 	if (cache->read_only)
2512 		panic("tried to get empty writable block on a read-only cache!");
2513 
2514 	return get_writable_cached_block((block_cache *)_cache, blockNumber,
2515 		blockNumber, 1, transaction, true);
2516 }
2517 
2518 
2519 extern "C" const void *
2520 block_cache_get_etc(void *_cache, off_t blockNumber, off_t base, off_t length)
2521 {
2522 	block_cache *cache = (block_cache *)_cache;
2523 	MutexLocker locker(&cache->lock);
2524 	bool allocated;
2525 
2526 	cached_block *block = get_cached_block(cache, blockNumber, &allocated);
2527 	if (block == NULL)
2528 		return NULL;
2529 
2530 #ifdef DEBUG_CHANGED
2531 	if (block->compare == NULL)
2532 		block->compare = cache->Allocate();
2533 	if (block->compare != NULL)
2534 		memcpy(block->compare, block->current_data, cache->block_size);
2535 #endif
2536 	TB(Get(cache, block));
2537 
2538 	return block->current_data;
2539 }
2540 
2541 
2542 extern "C" const void *
2543 block_cache_get(void *_cache, off_t blockNumber)
2544 {
2545 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
2546 }
2547 
2548 
2549 /*!	Changes the internal status of a writable block to \a dirty. This can be
2550 	helpful in case you realize you don't need to change that block anymore
2551 	for whatever reason.
2552 
2553 	Note, you must only use this function on blocks that were acquired
2554 	writable!
2555 */
2556 extern "C" status_t
2557 block_cache_set_dirty(void *_cache, off_t blockNumber, bool dirty,
2558 	int32 transaction)
2559 {
2560 	block_cache *cache = (block_cache *)_cache;
2561 	MutexLocker locker(&cache->lock);
2562 
2563 	cached_block *block = (cached_block *)hash_lookup(cache->hash,
2564 		&blockNumber);
2565 	if (block == NULL)
2566 		return B_BAD_VALUE;
2567 	if (block->is_dirty == dirty) {
2568 		// there is nothing to do for us
2569 		return B_OK;
2570 	}
2571 
2572 	// TODO: not yet implemented
2573 	if (dirty)
2574 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
2575 
2576 	return B_OK;
2577 }
2578 
2579 
2580 extern "C" void
2581 block_cache_put(void *_cache, off_t blockNumber)
2582 {
2583 	block_cache *cache = (block_cache *)_cache;
2584 	MutexLocker locker(&cache->lock);
2585 
2586 	put_cached_block(cache, blockNumber);
2587 }
2588 
2589