xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision a085e81e62d7a860f809b4fb7c7bf5654c396985)
1 /*
2  * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <KernelExport.h>
15 #include <fs_cache.h>
16 
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <util/khash.h>
26 #include <vm/vm_page.h>
27 
28 #include "kernel_debug_config.h"
29 
30 
31 // TODO: this is a naive but growing implementation to test the API:
32 //	block reading/writing is not at all optimized for speed, it will
33 //	just read and write single blocks.
34 // TODO: the retrieval/copy of the original data could be delayed until the
35 //		new data must be written, ie. in low memory situations.
36 
37 //#define TRACE_BLOCK_CACHE
38 #ifdef TRACE_BLOCK_CACHE
39 #	define TRACE(x)	dprintf x
40 #else
41 #	define TRACE(x) ;
42 #endif
43 
44 // This macro is used for fatal situations that are acceptable in a running
45 // system, like out of memory situations - should only panic for debugging.
46 #define FATAL(x) panic x
47 
48 static const bigtime_t kTransactionIdleTime = 2000000LL;
49 	// a transaction is considered idle after 2 seconds of inactivity
50 
51 
52 struct cache_transaction;
53 struct cached_block;
54 struct block_cache;
55 typedef DoublyLinkedListLink<cached_block> block_link;
56 
57 struct cached_block {
58 	cached_block*	next;			// next in hash
59 	cached_block*	transaction_next;
60 	block_link		link;
61 	off_t			block_number;
62 	void*			current_data;
63 		// The data that is seen by everyone using the API; this one is always
64 		// present.
65 	void*			original_data;
66 		// When in a transaction, this contains the original data from before
67 		// the transaction.
68 	void*			parent_data;
69 		// This is a lazily alloced buffer that represents the contents of the
70 		// block in the parent transaction. It may point to current_data if the
71 		// contents have been changed only in the parent transaction, or, if the
72 		// block has been changed in the current sub transaction already, to a
73 		// new block containing the contents changed in the parent transaction.
74 		// If this is NULL, the block has not been changed in the parent
75 		// transaction at all.
76 #if BLOCK_CACHE_DEBUG_CHANGED
77 	void*			compare;
78 #endif
79 	int32			ref_count;
80 	int32			last_accessed;
81 	bool			busy_reading : 1;
82 	bool			busy_writing : 1;
83 	bool			is_writing : 1;
84 		// Block has been checked out for writing without transactions, and
85 		// cannot be written back if set
86 	bool			is_dirty : 1;
87 	bool			unused : 1;
88 	bool			discard : 1;
89 	bool			busy_reading_waiters : 1;
90 	bool			busy_writing_waiters : 1;
91 	cache_transaction* transaction;
92 		// This is the current active transaction, if any, the block is
93 		// currently in (meaning was changed as a part of it).
94 	cache_transaction* previous_transaction;
95 		// This is set to the last transaction that was ended containing this
96 		// block. In this case, the block has not yet written back yet, and
97 		// the changed data is either in current_data, or original_data -- the
98 		// latter if the block is already being part of another transaction.
99 		// There can only be one previous transaction, so when the active
100 		// transaction ends, the changes of the previous transaction have to
101 		// be written back before that transaction becomes the next previous
102 		// transaction.
103 
104 	bool CanBeWritten() const;
105 	int32 LastAccess() const
106 		{ return system_time() / 1000000L - last_accessed; }
107 
108 	static int Compare(void* _cacheEntry, const void* _block);
109 	static uint32 Hash(void* _cacheEntry, const void* _block, uint32 range);
110 };
111 
112 typedef DoublyLinkedList<cached_block,
113 	DoublyLinkedListMemberGetLink<cached_block,
114 		&cached_block::link> > block_list;
115 
116 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
117 	int32			transaction_id;
118 	int32			events_pending;
119 	int32			events;
120 	transaction_notification_hook hook;
121 	void*			data;
122 	bool			delete_after_event;
123 };
124 
125 typedef DoublyLinkedList<cache_notification> NotificationList;
126 
127 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
128 	hash_table*		hash;
129 	mutex			lock;
130 	int				fd;
131 	off_t			max_blocks;
132 	size_t			block_size;
133 	int32			next_transaction_id;
134 	cache_transaction* last_transaction;
135 	hash_table*		transaction_hash;
136 
137 	object_cache*	buffer_cache;
138 	block_list		unused_blocks;
139 	uint32			unused_block_count;
140 
141 	ConditionVariable busy_reading_condition;
142 	uint32			busy_reading_count;
143 	bool			busy_reading_waiters;
144 
145 	ConditionVariable busy_writing_condition;
146 	uint32			busy_writing_count;
147 	bool			busy_writing_waiters;
148 
149 	uint32			num_dirty_blocks;
150 	bool			read_only;
151 
152 	NotificationList pending_notifications;
153 	ConditionVariable condition_variable;
154 
155 					block_cache(int fd, off_t numBlocks, size_t blockSize,
156 						bool readOnly);
157 					~block_cache();
158 
159 	status_t		Init();
160 
161 	void			Free(void* buffer);
162 	void*			Allocate();
163 	void			FreeBlock(cached_block* block);
164 	cached_block*	NewBlock(off_t blockNumber);
165 	void			FreeBlockParentData(cached_block* block);
166 
167 	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
168 	void			RemoveBlock(cached_block* block);
169 	void			DiscardBlock(cached_block* block);
170 
171 private:
172 	static void		_LowMemoryHandler(void* data, uint32 resources,
173 						int32 level);
174 	cached_block*	_GetUnusedBlock();
175 };
176 
177 struct cache_listener;
178 typedef DoublyLinkedListLink<cache_listener> listener_link;
179 
180 struct cache_listener : cache_notification {
181 	listener_link	link;
182 };
183 
184 typedef DoublyLinkedList<cache_listener,
185 	DoublyLinkedListMemberGetLink<cache_listener,
186 		&cache_listener::link> > ListenerList;
187 
188 
189 struct cache_transaction {
190 	cache_transaction();
191 
192 	cache_transaction* next;
193 	int32			id;
194 	int32			num_blocks;
195 	int32			main_num_blocks;
196 	int32			sub_num_blocks;
197 	cached_block*	first_block;
198 	block_list		blocks;
199 	ListenerList	listeners;
200 	bool			open;
201 	bool			has_sub_transaction;
202 	bigtime_t		last_used;
203 	int32			busy_writing_count;
204 };
205 
206 
207 class BlockWriter {
208 public:
209 								BlockWriter(block_cache* cache,
210 									size_t max = SIZE_MAX);
211 								~BlockWriter();
212 
213 			bool				Add(cached_block* block,
214 									hash_iterator* iterator = NULL);
215 			bool				Add(cache_transaction* transaction,
216 									hash_iterator* iterator,
217 									bool& hasLeftOvers);
218 
219 			status_t			Write(hash_iterator* iterator = NULL,
220 									bool canUnlock = true);
221 
222 			bool				DeletedTransaction() const
223 									{ return fDeletedTransaction; }
224 
225 	static	status_t			WriteBlock(block_cache* cache,
226 									cached_block* block);
227 
228 private:
229 			void*				_Data(cached_block* block) const;
230 			status_t			_WriteBlock(cached_block* block);
231 			void				_BlockDone(cached_block* block,
232 									hash_iterator* iterator);
233 			void				_UnmarkWriting(cached_block* block);
234 
235 	static	int					_CompareBlocks(const void* _blockA,
236 									const void* _blockB);
237 
238 private:
239 	static	const size_t		kBufferSize = 64;
240 
241 			block_cache*		fCache;
242 			cached_block*		fBuffer[kBufferSize];
243 			cached_block**		fBlocks;
244 			size_t				fCount;
245 			size_t				fTotal;
246 			size_t				fCapacity;
247 			size_t				fMax;
248 			status_t			fStatus;
249 			bool				fDeletedTransaction;
250 };
251 
252 
253 class TransactionLocking {
254 public:
255 	inline bool Lock(block_cache* cache)
256 	{
257 		mutex_lock(&cache->lock);
258 
259 		while (cache->busy_writing_count != 0) {
260 			// wait for all blocks to be written
261 			ConditionVariableEntry entry;
262 			cache->busy_writing_condition.Add(&entry);
263 			cache->busy_writing_waiters = true;
264 
265 			mutex_unlock(&cache->lock);
266 
267 			entry.Wait();
268 
269 			mutex_lock(&cache->lock);
270 		}
271 
272 		return true;
273 	}
274 
275 	inline void Unlock(block_cache* cache)
276 	{
277 		mutex_unlock(&cache->lock);
278 	}
279 };
280 
281 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
282 
283 
284 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
285 namespace BlockTracing {
286 
287 class Action : public AbstractTraceEntry {
288 public:
289 	Action(block_cache* cache, cached_block* block)
290 		:
291 		fCache(cache),
292 		fBlockNumber(block->block_number),
293 		fIsDirty(block->is_dirty),
294 		fHasOriginal(block->original_data != NULL),
295 		fHasParent(block->parent_data != NULL),
296 		fTransactionID(-1),
297 		fPreviousID(-1)
298 	{
299 		if (block->transaction != NULL)
300 			fTransactionID = block->transaction->id;
301 		if (block->previous_transaction != NULL)
302 			fPreviousID = block->previous_transaction->id;
303 	}
304 
305 	virtual void AddDump(TraceOutput& out)
306 	{
307 		out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
308 			" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
309 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
310 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
311 	}
312 
313 	virtual const char* _Action() const = 0;
314 
315 private:
316 	block_cache*		fCache;
317 	uint64				fBlockNumber;
318 	bool				fIsDirty;
319 	bool				fHasOriginal;
320 	bool				fHasParent;
321 	int32				fTransactionID;
322 	int32				fPreviousID;
323 };
324 
325 class Get : public Action {
326 public:
327 	Get(block_cache* cache, cached_block* block)
328 		:
329 		Action(cache, block)
330 	{
331 		Initialized();
332 	}
333 
334 	virtual const char* _Action() const { return "get"; }
335 };
336 
337 class Put : public Action {
338 public:
339 	Put(block_cache* cache, cached_block* block)
340 		:
341 		Action(cache, block)
342 	{
343 		Initialized();
344 	}
345 
346 	virtual const char* _Action() const { return "put"; }
347 };
348 
349 class Read : public Action {
350 public:
351 	Read(block_cache* cache, cached_block* block)
352 		:
353 		Action(cache, block)
354 	{
355 		Initialized();
356 	}
357 
358 	virtual const char* _Action() const { return "read"; }
359 };
360 
361 class Write : public Action {
362 public:
363 	Write(block_cache* cache, cached_block* block)
364 		:
365 		Action(cache, block)
366 	{
367 		Initialized();
368 	}
369 
370 	virtual const char* _Action() const { return "write"; }
371 };
372 
373 class Flush : public Action {
374 public:
375 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
376 		:
377 		Action(cache, block),
378 		fGetUnused(getUnused)
379 	{
380 		Initialized();
381 	}
382 
383 	virtual const char* _Action() const
384 		{ return fGetUnused ? "get-unused" : "flush"; }
385 
386 private:
387 	bool	fGetUnused;
388 };
389 
390 class Error : public AbstractTraceEntry {
391 public:
392 	Error(block_cache* cache, uint64 blockNumber, const char* message,
393 			status_t status = B_OK)
394 		:
395 		fCache(cache),
396 		fBlockNumber(blockNumber),
397 		fMessage(message),
398 		fStatus(status)
399 	{
400 		Initialized();
401 	}
402 
403 	virtual void AddDump(TraceOutput& out)
404 	{
405 		out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
406 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
407 			fStatus != B_OK ? strerror(fStatus) : "");
408 	}
409 
410 private:
411 	block_cache*	fCache;
412 	uint64			fBlockNumber;
413 	const char*		fMessage;
414 	status_t		fStatus;
415 };
416 
417 #if BLOCK_CACHE_BLOCK_TRACING >= 2
418 class BlockData : public AbstractTraceEntry {
419 public:
420 	enum {
421 		kCurrent	= 0x01,
422 		kParent		= 0x02,
423 		kOriginal	= 0x04
424 	};
425 
426 	BlockData(block_cache* cache, cached_block* block, const char* message)
427 		:
428 		fCache(cache),
429 		fSize(cache->block_size),
430 		fBlockNumber(block->block_number),
431 		fMessage(message)
432 	{
433 		_Allocate(fCurrent, block->current_data);
434 		_Allocate(fParent, block->parent_data);
435 		_Allocate(fOriginal, block->original_data);
436 
437 #if KTRACE_PRINTF_STACK_TRACE
438 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
439 			false);
440 #endif
441 
442 		Initialized();
443 	}
444 
445 	virtual void AddDump(TraceOutput& out)
446 	{
447 		out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
448 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
449 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
450 			fMessage);
451 	}
452 
453 #if KTRACE_PRINTF_STACK_TRACE
454 	virtual void DumpStackTrace(TraceOutput& out)
455 	{
456 		out.PrintStackTrace(fStackTrace);
457 	}
458 #endif
459 
460 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
461 	{
462 		if ((which & kCurrent) != 0)
463 			DumpBlock(kCurrent, offset, size);
464 		if ((which & kParent) != 0)
465 			DumpBlock(kParent, offset, size);
466 		if ((which & kOriginal) != 0)
467 			DumpBlock(kOriginal, offset, size);
468 	}
469 
470 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
471 	{
472 		if (offset > fSize) {
473 			kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
474 			return;
475 		}
476 		if (offset + size > fSize)
477 			size = fSize - offset;
478 
479 		const char* label;
480 		uint8* data;
481 
482 		if ((which & kCurrent) != 0) {
483 			label = "current";
484 			data = fCurrent;
485 		} else if ((which & kParent) != 0) {
486 			label = "parent";
487 			data = fParent;
488 		} else if ((which & kOriginal) != 0) {
489 			label = "original";
490 			data = fOriginal;
491 		} else
492 			return;
493 
494 		kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
495 
496 		static const uint32 kBlockSize = 16;
497 		data += offset;
498 
499 		for (uint32 i = 0; i < size;) {
500 			int start = i;
501 
502 			kprintf("  %04" B_PRIx32 " ", i);
503 			for (; i < start + kBlockSize; i++) {
504 				if (!(i % 4))
505 					kprintf(" ");
506 
507 				if (i >= size)
508 					kprintf("  ");
509 				else
510 					kprintf("%02x", data[i]);
511 			}
512 
513 			kprintf("\n");
514 		}
515 	}
516 
517 private:
518 	void _Allocate(uint8*& target, void* source)
519 	{
520 		if (source == NULL) {
521 			target = NULL;
522 			return;
523 		}
524 
525 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
526 	}
527 
528 	block_cache*	fCache;
529 	uint32			fSize;
530 	uint64			fBlockNumber;
531 	const char*		fMessage;
532 	uint8*			fCurrent;
533 	uint8*			fParent;
534 	uint8*			fOriginal;
535 #if KTRACE_PRINTF_STACK_TRACE
536 	tracing_stack_trace* fStackTrace;
537 #endif
538 };
539 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
540 
541 }	// namespace BlockTracing
542 
543 #	define TB(x) new(std::nothrow) BlockTracing::x;
544 #else
545 #	define TB(x) ;
546 #endif
547 
548 #if BLOCK_CACHE_BLOCK_TRACING >= 2
549 #	define TB2(x) new(std::nothrow) BlockTracing::x;
550 #else
551 #	define TB2(x) ;
552 #endif
553 
554 
555 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
556 namespace TransactionTracing {
557 
558 class Action : public AbstractTraceEntry {
559 public:
560 	Action(const char* label, block_cache* cache,
561 			cache_transaction* transaction)
562 		:
563 		fCache(cache),
564 		fTransaction(transaction),
565 		fID(transaction->id),
566 		fSub(transaction->has_sub_transaction),
567 		fNumBlocks(transaction->num_blocks),
568 		fSubNumBlocks(transaction->sub_num_blocks)
569 	{
570 		strlcpy(fLabel, label, sizeof(fLabel));
571 		Initialized();
572 	}
573 
574 	virtual void AddDump(TraceOutput& out)
575 	{
576 		out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
577 			", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
578 			fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
579 	}
580 
581 private:
582 	char				fLabel[12];
583 	block_cache*		fCache;
584 	cache_transaction*	fTransaction;
585 	int32				fID;
586 	bool				fSub;
587 	int32				fNumBlocks;
588 	int32				fSubNumBlocks;
589 };
590 
591 class Detach : public AbstractTraceEntry {
592 public:
593 	Detach(block_cache* cache, cache_transaction* transaction,
594 			cache_transaction* newTransaction)
595 		:
596 		fCache(cache),
597 		fTransaction(transaction),
598 		fID(transaction->id),
599 		fSub(transaction->has_sub_transaction),
600 		fNewTransaction(newTransaction),
601 		fNewID(newTransaction->id)
602 	{
603 		Initialized();
604 	}
605 
606 	virtual void AddDump(TraceOutput& out)
607 	{
608 		out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
609 			"from transaction %p (id %" B_PRId32 ")%s",
610 			fCache, fNewTransaction, fNewID, fTransaction, fID,
611 			fSub ? " sub" : "");
612 	}
613 
614 private:
615 	block_cache*		fCache;
616 	cache_transaction*	fTransaction;
617 	int32				fID;
618 	bool				fSub;
619 	cache_transaction*	fNewTransaction;
620 	int32				fNewID;
621 };
622 
623 class Abort : public AbstractTraceEntry {
624 public:
625 	Abort(block_cache* cache, cache_transaction* transaction)
626 		:
627 		fCache(cache),
628 		fTransaction(transaction),
629 		fID(transaction->id),
630 		fNumBlocks(0)
631 	{
632 		bool isSub = transaction->has_sub_transaction;
633 		fNumBlocks = isSub ? transaction->sub_num_blocks
634 			: transaction->num_blocks;
635 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
636 		if (fBlocks != NULL) {
637 			cached_block* block = transaction->first_block;
638 			for (int32 i = 0; block != NULL && i < fNumBlocks;
639 					block = block->transaction_next) {
640 				fBlocks[i++] = block->block_number;
641 			}
642 		} else
643 			fNumBlocks = 0;
644 
645 #if KTRACE_PRINTF_STACK_TRACE
646 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
647 			false);
648 #endif
649 
650 		Initialized();
651 	}
652 
653 	virtual void AddDump(TraceOutput& out)
654 	{
655 		out.Print("block cache %p, abort transaction "
656 			"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
657 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
658 			out.Print(" %" B_PRIdOFF, fBlocks[i]);
659 	}
660 
661 #if KTRACE_PRINTF_STACK_TRACE
662 	virtual void DumpStackTrace(TraceOutput& out)
663 	{
664 		out.PrintStackTrace(fStackTrace);
665 	}
666 #endif
667 
668 private:
669 	block_cache*		fCache;
670 	cache_transaction*	fTransaction;
671 	int32				fID;
672 	off_t*				fBlocks;
673 	int32				fNumBlocks;
674 #if KTRACE_PRINTF_STACK_TRACE
675 	tracing_stack_trace* fStackTrace;
676 #endif
677 };
678 
679 }	// namespace TransactionTracing
680 
681 #	define T(x) new(std::nothrow) TransactionTracing::x;
682 #else
683 #	define T(x) ;
684 #endif
685 
686 
687 static DoublyLinkedList<block_cache> sCaches;
688 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
689 static mutex sCachesMemoryUseLock
690 	= MUTEX_INITIALIZER("block caches memory use");
691 static size_t sUsedMemory;
692 static sem_id sEventSemaphore;
693 static mutex sNotificationsLock
694 	= MUTEX_INITIALIZER("block cache notifications");
695 static thread_id sNotifierWriterThread;
696 static DoublyLinkedListLink<block_cache> sMarkCache;
697 	// TODO: this only works if the link is the first entry of block_cache
698 static object_cache* sBlockCache;
699 
700 
701 //	#pragma mark - notifications/listener
702 
703 
704 /*!	Checks whether or not this is an event that closes a transaction. */
705 static inline bool
706 is_closing_event(int32 event)
707 {
708 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
709 }
710 
711 
712 static inline bool
713 is_written_event(int32 event)
714 {
715 	return (event & TRANSACTION_WRITTEN) != 0;
716 }
717 
718 
719 /*!	From the specified \a notification, it will remove the lowest pending
720 	event, and return that one in \a _event.
721 	If there is no pending event anymore, it will return \c false.
722 */
723 static bool
724 get_next_pending_event(cache_notification* notification, int32* _event)
725 {
726 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
727 		int32 pending = atomic_and(&notification->events_pending,
728 			~eventMask);
729 
730 		bool more = (pending & ~eventMask) != 0;
731 
732 		if ((pending & eventMask) != 0) {
733 			*_event = eventMask;
734 			return more;
735 		}
736 	}
737 
738 	return false;
739 }
740 
741 
742 static void
743 flush_pending_notifications(block_cache* cache)
744 {
745 	ASSERT_LOCKED_MUTEX(&sCachesLock);
746 
747 	while (true) {
748 		MutexLocker locker(sNotificationsLock);
749 
750 		cache_notification* notification = cache->pending_notifications.Head();
751 		if (notification == NULL)
752 			return;
753 
754 		bool deleteAfterEvent = false;
755 		int32 event = -1;
756 		if (!get_next_pending_event(notification, &event)) {
757 			// remove the notification if this was the last pending event
758 			cache->pending_notifications.Remove(notification);
759 			deleteAfterEvent = notification->delete_after_event;
760 		}
761 
762 		if (event >= 0) {
763 			// Notify listener, we need to copy the notification, as it might
764 			// be removed when we unlock the list.
765 			cache_notification copy = *notification;
766 			locker.Unlock();
767 
768 			copy.hook(copy.transaction_id, event, copy.data);
769 
770 			locker.Lock();
771 		}
772 
773 		if (deleteAfterEvent)
774 			delete notification;
775 	}
776 }
777 
778 
779 /*!	Flushes all pending notifications by calling the appropriate hook
780 	functions.
781 	Must not be called with a cache lock held.
782 */
783 static void
784 flush_pending_notifications()
785 {
786 	MutexLocker _(sCachesLock);
787 
788 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
789 	while (iterator.HasNext()) {
790 		block_cache* cache = iterator.Next();
791 
792 		flush_pending_notifications(cache);
793 	}
794 }
795 
796 
797 /*!	Initializes the \a notification as specified. */
798 static void
799 set_notification(cache_transaction* transaction,
800 	cache_notification &notification, int32 events,
801 	transaction_notification_hook hook, void* data)
802 {
803 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
804 	notification.events_pending = 0;
805 	notification.events = events;
806 	notification.hook = hook;
807 	notification.data = data;
808 	notification.delete_after_event = false;
809 }
810 
811 
812 /*!	Makes sure the notification is deleted. It either deletes it directly,
813 	when possible, or marks it for deletion if the notification is pending.
814 */
815 static void
816 delete_notification(cache_notification* notification)
817 {
818 	MutexLocker locker(sNotificationsLock);
819 
820 	if (notification->events_pending != 0)
821 		notification->delete_after_event = true;
822 	else
823 		delete notification;
824 }
825 
826 
827 /*!	Adds the notification to the pending notifications list, or, if it's
828 	already part of it, updates its events_pending field.
829 	Also marks the notification to be deleted if \a deleteNotification
830 	is \c true.
831 	Triggers the notifier thread to run.
832 */
833 static void
834 add_notification(block_cache* cache, cache_notification* notification,
835 	int32 event, bool deleteNotification)
836 {
837 	if (notification->hook == NULL)
838 		return;
839 
840 	int32 pending = atomic_or(&notification->events_pending, event);
841 	if (pending == 0) {
842 		// not yet part of the notification list
843 		MutexLocker locker(sNotificationsLock);
844 		if (deleteNotification)
845 			notification->delete_after_event = true;
846 		cache->pending_notifications.Add(notification);
847 	} else if (deleteNotification) {
848 		// we might need to delete it ourselves if we're late
849 		delete_notification(notification);
850 	}
851 
852 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
853 		// We're probably still holding some locks that makes rescheduling
854 		// not a good idea at this point.
855 }
856 
857 
858 /*!	Notifies all interested listeners of this transaction about the \a event.
859 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
860 	TRANSACTION_ABORTED), all listeners except those listening to
861 	TRANSACTION_WRITTEN will be removed.
862 */
863 static void
864 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
865 	int32 event)
866 {
867 	T(Action("notify", cache, transaction));
868 
869 	bool isClosing = is_closing_event(event);
870 	bool isWritten = is_written_event(event);
871 
872 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
873 	while (iterator.HasNext()) {
874 		cache_listener* listener = iterator.Next();
875 
876 		bool remove = (isClosing && !is_written_event(listener->events))
877 			|| (isWritten && is_written_event(listener->events));
878 		if (remove)
879 			iterator.Remove();
880 
881 		if ((listener->events & event) != 0)
882 			add_notification(cache, listener, event, remove);
883 		else if (remove)
884 			delete_notification(listener);
885 	}
886 }
887 
888 
889 /*!	Removes and deletes all listeners that are still monitoring this
890 	transaction.
891 */
892 static void
893 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
894 {
895 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
896 	while (iterator.HasNext()) {
897 		cache_listener* listener = iterator.Next();
898 		iterator.Remove();
899 
900 		delete_notification(listener);
901 	}
902 }
903 
904 
905 static status_t
906 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
907 	int32 events, transaction_notification_hook hookFunction, void* data)
908 {
909 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
910 	while (iterator.HasNext()) {
911 		cache_listener* listener = iterator.Next();
912 
913 		if (listener->data == data && listener->hook == hookFunction) {
914 			// this listener already exists, just update it
915 			listener->events |= events;
916 			return B_OK;
917 		}
918 	}
919 
920 	cache_listener* listener = new(std::nothrow) cache_listener;
921 	if (listener == NULL)
922 		return B_NO_MEMORY;
923 
924 	set_notification(transaction, *listener, events, hookFunction, data);
925 	transaction->listeners.Add(listener);
926 	return B_OK;
927 }
928 
929 
930 //	#pragma mark - private transaction
931 
932 
933 cache_transaction::cache_transaction()
934 {
935 	num_blocks = 0;
936 	main_num_blocks = 0;
937 	sub_num_blocks = 0;
938 	first_block = NULL;
939 	open = true;
940 	has_sub_transaction = false;
941 	last_used = system_time();
942 	busy_writing_count = 0;
943 }
944 
945 
946 static int
947 transaction_compare(void* _transaction, const void* _id)
948 {
949 	cache_transaction* transaction = (cache_transaction*)_transaction;
950 	const int32* id = (const int32*)_id;
951 
952 	return transaction->id - *id;
953 }
954 
955 
956 static uint32
957 transaction_hash(void* _transaction, const void* _id, uint32 range)
958 {
959 	cache_transaction* transaction = (cache_transaction*)_transaction;
960 	const int32* id = (const int32*)_id;
961 
962 	if (transaction != NULL)
963 		return transaction->id % range;
964 
965 	return (uint32)*id % range;
966 }
967 
968 
969 static void
970 delete_transaction(block_cache* cache, cache_transaction* transaction)
971 {
972 	if (cache->last_transaction == transaction)
973 		cache->last_transaction = NULL;
974 
975 	remove_transaction_listeners(cache, transaction);
976 	delete transaction;
977 }
978 
979 
980 static cache_transaction*
981 lookup_transaction(block_cache* cache, int32 id)
982 {
983 	return (cache_transaction*)hash_lookup(cache->transaction_hash, &id);
984 }
985 
986 
987 /*!	Writes back any changes made to blocks in \a transaction that are still
988 	part of a previous transacton.
989 */
990 static status_t
991 write_blocks_in_previous_transaction(block_cache* cache,
992 	cache_transaction* transaction)
993 {
994 	BlockWriter writer(cache);
995 
996 	cached_block* block = transaction->first_block;
997 	for (; block != NULL; block = block->transaction_next) {
998 		if (block->previous_transaction != NULL) {
999 			// need to write back pending changes
1000 			writer.Add(block);
1001 		}
1002 	}
1003 
1004 	return writer.Write();
1005 }
1006 
1007 
1008 //	#pragma mark - cached_block
1009 
1010 
1011 bool
1012 cached_block::CanBeWritten() const
1013 {
1014 	return !busy_writing && !busy_reading
1015 		&& (previous_transaction != NULL
1016 			|| (transaction == NULL && is_dirty && !is_writing));
1017 }
1018 
1019 
1020 /*static*/ int
1021 cached_block::Compare(void* _cacheEntry, const void* _block)
1022 {
1023 	cached_block* cacheEntry = (cached_block*)_cacheEntry;
1024 	const off_t* block = (const off_t*)_block;
1025 
1026 	off_t diff = cacheEntry->block_number - *block;
1027 	if (diff > 0)
1028 		return 1;
1029 
1030 	return diff < 0 ? -1 : 0;
1031 }
1032 
1033 
1034 /*static*/ uint32
1035 cached_block::Hash(void* _cacheEntry, const void* _block, uint32 range)
1036 {
1037 	cached_block* cacheEntry = (cached_block*)_cacheEntry;
1038 	const off_t* block = (const off_t*)_block;
1039 
1040 	if (cacheEntry != NULL)
1041 		return cacheEntry->block_number % range;
1042 
1043 	return (uint64)*block % range;
1044 }
1045 
1046 
1047 //	#pragma mark - BlockWriter
1048 
1049 
1050 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1051 	:
1052 	fCache(cache),
1053 	fBlocks(fBuffer),
1054 	fCount(0),
1055 	fTotal(0),
1056 	fCapacity(kBufferSize),
1057 	fMax(max),
1058 	fStatus(B_OK),
1059 	fDeletedTransaction(false)
1060 {
1061 }
1062 
1063 
1064 BlockWriter::~BlockWriter()
1065 {
1066 	if (fBlocks != fBuffer)
1067 		free(fBlocks);
1068 }
1069 
1070 
1071 /*!	Adds the specified block to the to be written array. If no more blocks can
1072 	be added, false is returned, otherwise true.
1073 */
1074 bool
1075 BlockWriter::Add(cached_block* block, hash_iterator* iterator)
1076 {
1077 	ASSERT(block->CanBeWritten());
1078 
1079 	if (fTotal == fMax)
1080 		return false;
1081 
1082 	if (fCount >= fCapacity) {
1083 		// Enlarge array if necessary
1084 		cached_block** newBlocks;
1085 		size_t newCapacity = max_c(256, fCapacity * 2);
1086 		if (fBlocks == fBuffer)
1087 			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1088 		else {
1089 			newBlocks = (cached_block**)realloc(fBlocks,
1090 				newCapacity * sizeof(void*));
1091 		}
1092 
1093 		if (newBlocks == NULL) {
1094 			// Allocating a larger array failed - we need to write back what
1095 			// we have synchronously now (this will also clear the array)
1096 			Write(iterator, false);
1097 		} else {
1098 			if (fBlocks == fBuffer)
1099 				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1100 
1101 			fBlocks = newBlocks;
1102 			fCapacity = newCapacity;
1103 		}
1104 	}
1105 
1106 	fBlocks[fCount++] = block;
1107 	fTotal++;
1108 	block->busy_writing = true;
1109 	fCache->busy_writing_count++;
1110 	if (block->previous_transaction != NULL)
1111 		block->previous_transaction->busy_writing_count++;
1112 
1113 	return true;
1114 }
1115 
1116 
1117 /*!	Adds all blocks of the specified transaction to the to be written array.
1118 	If no more blocks can be added, false is returned, otherwise true.
1119 */
1120 bool
1121 BlockWriter::Add(cache_transaction* transaction, hash_iterator* iterator,
1122 	bool& hasLeftOvers)
1123 {
1124 	ASSERT(!transaction->open);
1125 
1126 	if (transaction->busy_writing_count != 0) {
1127 		hasLeftOvers = true;
1128 		return true;
1129 	}
1130 
1131 	hasLeftOvers = false;
1132 
1133 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1134 	while (cached_block* block = blockIterator.Next()) {
1135 		if (!block->CanBeWritten()) {
1136 			// This block was already part of a previous transaction within this
1137 			// writer
1138 			hasLeftOvers = true;
1139 			continue;
1140 		}
1141 		if (!Add(block, iterator))
1142 			return false;
1143 
1144 		if (DeletedTransaction())
1145 			break;
1146 	}
1147 
1148 	return true;
1149 }
1150 
1151 
1152 /*! Cache must be locked when calling this method, but it will be unlocked
1153 	while the blocks are written back.
1154 */
1155 status_t
1156 BlockWriter::Write(hash_iterator* iterator, bool canUnlock)
1157 {
1158 	if (fCount == 0)
1159 		return B_OK;
1160 
1161 	if (canUnlock)
1162 		mutex_unlock(&fCache->lock);
1163 
1164 	// Sort blocks in their on-disk order
1165 	// TODO: ideally, this should be handled by the I/O scheduler
1166 
1167 	qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1168 	fDeletedTransaction = false;
1169 
1170 	for (uint32 i = 0; i < fCount; i++) {
1171 		status_t status = _WriteBlock(fBlocks[i]);
1172 		if (status != B_OK) {
1173 			// propagate to global error handling
1174 			if (fStatus == B_OK)
1175 				fStatus = status;
1176 
1177 			_UnmarkWriting(fBlocks[i]);
1178 			fBlocks[i] = NULL;
1179 				// This block will not be marked clean
1180 		}
1181 	}
1182 
1183 	if (canUnlock)
1184 		mutex_lock(&fCache->lock);
1185 
1186 	for (uint32 i = 0; i < fCount; i++)
1187 		_BlockDone(fBlocks[i], iterator);
1188 
1189 	fCount = 0;
1190 	return fStatus;
1191 }
1192 
1193 
1194 /*!	Writes the specified \a block back to disk. It will always only write back
1195 	the oldest change of the block if it is part of more than one transaction.
1196 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1197 	delete transactions when they are no longer used, and \a deleteTransaction
1198 	is \c true.
1199 */
1200 /*static*/ status_t
1201 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1202 {
1203 	BlockWriter writer(cache);
1204 
1205 	writer.Add(block);
1206 	return writer.Write();
1207 }
1208 
1209 
1210 void*
1211 BlockWriter::_Data(cached_block* block) const
1212 {
1213 	return block->previous_transaction != NULL && block->original_data != NULL
1214 		? block->original_data : block->current_data;
1215 		// We first need to write back changes from previous transactions
1216 }
1217 
1218 
1219 status_t
1220 BlockWriter::_WriteBlock(cached_block* block)
1221 {
1222 	ASSERT(block->busy_writing);
1223 
1224 	TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
1225 	TB(Write(fCache, block));
1226 	TB2(BlockData(fCache, block, "before write"));
1227 
1228 	size_t blockSize = fCache->block_size;
1229 
1230 	ssize_t written = write_pos(fCache->fd,
1231 		block->block_number * blockSize, _Data(block), blockSize);
1232 
1233 	if (written != (ssize_t)blockSize) {
1234 		TB(Error(fCache, block->block_number, "write failed", written));
1235 		FATAL(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number,
1236 			strerror(errno)));
1237 		if (written < 0)
1238 			return errno;
1239 
1240 		return B_IO_ERROR;
1241 	}
1242 
1243 	return B_OK;
1244 }
1245 
1246 
1247 void
1248 BlockWriter::_BlockDone(cached_block* block, hash_iterator* iterator)
1249 {
1250 	if (block == NULL) {
1251 		// An error occured when trying to write this block
1252 		return;
1253 	}
1254 
1255 	if (fCache->num_dirty_blocks > 0)
1256 		fCache->num_dirty_blocks--;
1257 
1258 	if (_Data(block) == block->current_data)
1259 		block->is_dirty = false;
1260 
1261 	_UnmarkWriting(block);
1262 
1263 	cache_transaction* previous = block->previous_transaction;
1264 	if (previous != NULL) {
1265 		previous->blocks.Remove(block);
1266 		block->previous_transaction = NULL;
1267 
1268 		if (block->original_data != NULL && block->transaction == NULL) {
1269 			// This block is not part of a transaction, so it does not need
1270 			// its original pointer anymore.
1271 			fCache->Free(block->original_data);
1272 			block->original_data = NULL;
1273 		}
1274 
1275 		// Has the previous transaction been finished with that write?
1276 		if (--previous->num_blocks == 0) {
1277 			TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1278 			T(Action("written", fCache, previous));
1279 
1280 			notify_transaction_listeners(fCache, previous,
1281 				TRANSACTION_WRITTEN);
1282 
1283 			if (iterator != NULL)
1284 				hash_remove_current(fCache->transaction_hash, iterator);
1285 			else
1286 				hash_remove(fCache->transaction_hash, previous);
1287 
1288 			delete_transaction(fCache, previous);
1289 			fDeletedTransaction = true;
1290 		}
1291 	}
1292 	if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1293 		// the block is no longer used
1294 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1295 		block->unused = true;
1296 		fCache->unused_blocks.Add(block);
1297 		fCache->unused_block_count++;
1298 	}
1299 
1300 	TB2(BlockData(fCache, block, "after write"));
1301 }
1302 
1303 
1304 void
1305 BlockWriter::_UnmarkWriting(cached_block* block)
1306 {
1307 	block->busy_writing = false;
1308 	if (block->previous_transaction != NULL)
1309 		block->previous_transaction->busy_writing_count--;
1310 	fCache->busy_writing_count--;
1311 
1312 	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1313 		|| block->busy_writing_waiters) {
1314 		fCache->busy_writing_waiters = false;
1315 		block->busy_writing_waiters = false;
1316 		fCache->busy_writing_condition.NotifyAll();
1317 	}
1318 }
1319 
1320 
1321 /*static*/ int
1322 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1323 {
1324 	cached_block* blockA = *(cached_block**)_blockA;
1325 	cached_block* blockB = *(cached_block**)_blockB;
1326 
1327 	off_t diff = blockA->block_number - blockB->block_number;
1328 	if (diff > 0)
1329 		return 1;
1330 
1331 	return diff < 0 ? -1 : 0;
1332 }
1333 
1334 
1335 //	#pragma mark - block_cache
1336 
1337 
1338 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1339 		bool readOnly)
1340 	:
1341 	hash(NULL),
1342 	fd(_fd),
1343 	max_blocks(numBlocks),
1344 	block_size(blockSize),
1345 	next_transaction_id(1),
1346 	last_transaction(NULL),
1347 	transaction_hash(NULL),
1348 	buffer_cache(NULL),
1349 	unused_block_count(0),
1350 	busy_reading_count(0),
1351 	busy_reading_waiters(false),
1352 	busy_writing_count(0),
1353 	busy_writing_waiters(0),
1354 	num_dirty_blocks(0),
1355 	read_only(readOnly)
1356 {
1357 }
1358 
1359 
1360 /*! Should be called with the cache's lock held. */
1361 block_cache::~block_cache()
1362 {
1363 	unregister_low_resource_handler(&_LowMemoryHandler, this);
1364 
1365 	hash_uninit(transaction_hash);
1366 	hash_uninit(hash);
1367 
1368 	delete_object_cache(buffer_cache);
1369 
1370 	mutex_destroy(&lock);
1371 }
1372 
1373 
1374 status_t
1375 block_cache::Init()
1376 {
1377 	busy_reading_condition.Init(this, "cache block busy_reading");
1378 	busy_writing_condition.Init(this, "cache block busy writing");
1379 	condition_variable.Init(this, "cache transaction sync");
1380 	mutex_init(&lock, "block cache");
1381 
1382 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1383 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1384 	if (buffer_cache == NULL)
1385 		return B_NO_MEMORY;
1386 
1387 	cached_block dummyBlock;
1388 	hash = hash_init(1024, offset_of_member(dummyBlock, next),
1389 		&cached_block::Compare, &cached_block::Hash);
1390 	if (hash == NULL)
1391 		return B_NO_MEMORY;
1392 
1393 	cache_transaction dummyTransaction;
1394 	transaction_hash = hash_init(16, offset_of_member(dummyTransaction, next),
1395 		&transaction_compare, &::transaction_hash);
1396 	if (transaction_hash == NULL)
1397 		return B_NO_MEMORY;
1398 
1399 	return register_low_resource_handler(&_LowMemoryHandler, this,
1400 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1401 			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1402 }
1403 
1404 
1405 void
1406 block_cache::Free(void* buffer)
1407 {
1408 	if (buffer != NULL)
1409 		object_cache_free(buffer_cache, buffer, 0);
1410 }
1411 
1412 
1413 void*
1414 block_cache::Allocate()
1415 {
1416 	void* block = object_cache_alloc(buffer_cache, 0);
1417 	if (block != NULL)
1418 		return block;
1419 
1420 	// recycle existing before allocating a new one
1421 	RemoveUnusedBlocks(100);
1422 
1423 	return object_cache_alloc(buffer_cache, 0);
1424 }
1425 
1426 
1427 void
1428 block_cache::FreeBlock(cached_block* block)
1429 {
1430 	Free(block->current_data);
1431 
1432 	if (block->original_data != NULL || block->parent_data != NULL) {
1433 		panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1434 			block->block_number, block->original_data, block->parent_data);
1435 	}
1436 
1437 #if BLOCK_CACHE_DEBUG_CHANGED
1438 	Free(block->compare);
1439 #endif
1440 
1441 	object_cache_free(sBlockCache, block, 0);
1442 }
1443 
1444 
1445 /*! Allocates a new block for \a blockNumber, ready for use */
1446 cached_block*
1447 block_cache::NewBlock(off_t blockNumber)
1448 {
1449 	cached_block* block = NULL;
1450 
1451 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1452 			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1453 		// recycle existing instead of allocating a new one
1454 		block = _GetUnusedBlock();
1455 	}
1456 	if (block == NULL) {
1457 		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1458 		if (block != NULL) {
1459 			block->current_data = Allocate();
1460 			if (block->current_data == NULL) {
1461 				object_cache_free(sBlockCache, block, 0);
1462 				return NULL;
1463 			}
1464 		} else {
1465 			TB(Error(this, blockNumber, "allocation failed"));
1466 			dprintf("block allocation failed, unused list is %sempty.\n",
1467 				unused_blocks.IsEmpty() ? "" : "not ");
1468 
1469 			// allocation failed, try to reuse an unused block
1470 			block = _GetUnusedBlock();
1471 			if (block == NULL) {
1472 				TB(Error(this, blockNumber, "get unused failed"));
1473 				FATAL(("could not allocate block!\n"));
1474 				return NULL;
1475 			}
1476 		}
1477 	}
1478 
1479 	block->block_number = blockNumber;
1480 	block->ref_count = 0;
1481 	block->last_accessed = 0;
1482 	block->transaction_next = NULL;
1483 	block->transaction = block->previous_transaction = NULL;
1484 	block->original_data = NULL;
1485 	block->parent_data = NULL;
1486 	block->busy_reading = false;
1487 	block->busy_writing = false;
1488 	block->is_writing = false;
1489 	block->is_dirty = false;
1490 	block->unused = false;
1491 	block->discard = false;
1492 	block->busy_reading_waiters = false;
1493 	block->busy_writing_waiters = false;
1494 #if BLOCK_CACHE_DEBUG_CHANGED
1495 	block->compare = NULL;
1496 #endif
1497 
1498 	return block;
1499 }
1500 
1501 
1502 void
1503 block_cache::FreeBlockParentData(cached_block* block)
1504 {
1505 	ASSERT(block->parent_data != NULL);
1506 	if (block->parent_data != block->current_data)
1507 		Free(block->parent_data);
1508 	block->parent_data = NULL;
1509 }
1510 
1511 
1512 void
1513 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1514 {
1515 	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1516 
1517 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1518 			cached_block* block = iterator.Next();) {
1519 		if (minSecondsOld >= block->LastAccess()) {
1520 			// The list is sorted by last access
1521 			break;
1522 		}
1523 		if (block->busy_reading || block->busy_writing)
1524 			continue;
1525 
1526 		TB(Flush(this, block));
1527 		TRACE(("  remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1528 			block->block_number, block->last_accessed));
1529 
1530 		// this can only happen if no transactions are used
1531 		if (block->is_dirty && !block->discard) {
1532 			if (block->busy_writing)
1533 				continue;
1534 
1535 			BlockWriter::WriteBlock(this, block);
1536 		}
1537 
1538 		// remove block from lists
1539 		iterator.Remove();
1540 		unused_block_count--;
1541 		RemoveBlock(block);
1542 
1543 		if (--count <= 0)
1544 			break;
1545 	}
1546 }
1547 
1548 
1549 void
1550 block_cache::RemoveBlock(cached_block* block)
1551 {
1552 	hash_remove(hash, block);
1553 	FreeBlock(block);
1554 }
1555 
1556 
1557 /*!	Discards the block from a transaction (this method must not be called
1558 	for blocks not part of a transaction).
1559 */
1560 void
1561 block_cache::DiscardBlock(cached_block* block)
1562 {
1563 	ASSERT(block->discard);
1564 	ASSERT(block->previous_transaction == NULL);
1565 
1566 	if (block->parent_data != NULL)
1567 		FreeBlockParentData(block);
1568 
1569 	if (block->original_data != NULL) {
1570 		Free(block->original_data);
1571 		block->original_data = NULL;
1572 	}
1573 
1574 	RemoveBlock(block);
1575 }
1576 
1577 
1578 void
1579 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1580 {
1581 	TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1582 
1583 	// free some blocks according to the low memory state
1584 	// (if there is enough memory left, we don't free any)
1585 
1586 	block_cache* cache = (block_cache*)data;
1587 	int32 free = 0;
1588 	int32 secondsOld = 0;
1589 	switch (level) {
1590 		case B_NO_LOW_RESOURCE:
1591 			return;
1592 		case B_LOW_RESOURCE_NOTE:
1593 			free = cache->unused_block_count / 8;
1594 			secondsOld = 120;
1595 			break;
1596 		case B_LOW_RESOURCE_WARNING:
1597 			free = cache->unused_block_count / 4;
1598 			secondsOld = 10;
1599 			break;
1600 		case B_LOW_RESOURCE_CRITICAL:
1601 			free = cache->unused_block_count / 2;
1602 			secondsOld = 0;
1603 			break;
1604 	}
1605 
1606 	MutexLocker locker(&cache->lock);
1607 
1608 	if (!locker.IsLocked()) {
1609 		// If our block_cache were deleted, it could be that we had
1610 		// been called before that deletion went through, therefore,
1611 		// acquiring its lock might fail.
1612 		return;
1613 	}
1614 
1615 #ifdef TRACE_BLOCK_CACHE
1616 	uint32 oldUnused = cache->unused_block_count;
1617 #endif
1618 
1619 	cache->RemoveUnusedBlocks(free, secondsOld);
1620 
1621 	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1622 		cache, oldUnused, cache->unused_block_count));
1623 }
1624 
1625 
1626 cached_block*
1627 block_cache::_GetUnusedBlock()
1628 {
1629 	TRACE(("block_cache: get unused block\n"));
1630 
1631 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1632 			cached_block* block = iterator.Next();) {
1633 		TB(Flush(this, block, true));
1634 		// this can only happen if no transactions are used
1635 		if (block->is_dirty && !block->busy_writing && !block->discard)
1636 			BlockWriter::WriteBlock(this, block);
1637 
1638 		// remove block from lists
1639 		iterator.Remove();
1640 		unused_block_count--;
1641 		hash_remove(hash, block);
1642 
1643 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1644 		block->unused = false;
1645 
1646 		// TODO: see if compare data is handled correctly here!
1647 #if BLOCK_CACHE_DEBUG_CHANGED
1648 		if (block->compare != NULL)
1649 			Free(block->compare);
1650 #endif
1651 		return block;
1652 	}
1653 
1654 	return NULL;
1655 }
1656 
1657 
1658 //	#pragma mark - private block functions
1659 
1660 
1661 /*!	Cache must be locked.
1662 */
1663 static void
1664 mark_block_busy_reading(block_cache* cache, cached_block* block)
1665 {
1666 	block->busy_reading = true;
1667 	cache->busy_reading_count++;
1668 }
1669 
1670 
1671 /*!	Cache must be locked.
1672 */
1673 static void
1674 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1675 {
1676 	block->busy_reading = false;
1677 	cache->busy_reading_count--;
1678 
1679 	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1680 		|| block->busy_reading_waiters) {
1681 		cache->busy_reading_waiters = false;
1682 		block->busy_reading_waiters = false;
1683 		cache->busy_reading_condition.NotifyAll();
1684 	}
1685 }
1686 
1687 
1688 /*!	Cache must be locked.
1689 */
1690 static void
1691 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1692 {
1693 	while (block->busy_reading) {
1694 		// wait for at least the specified block to be read in
1695 		ConditionVariableEntry entry;
1696 		cache->busy_reading_condition.Add(&entry);
1697 		block->busy_reading_waiters = true;
1698 
1699 		mutex_unlock(&cache->lock);
1700 
1701 		entry.Wait();
1702 
1703 		mutex_lock(&cache->lock);
1704 	}
1705 }
1706 
1707 
1708 /*!	Cache must be locked.
1709 */
1710 static void
1711 wait_for_busy_reading_blocks(block_cache* cache)
1712 {
1713 	while (cache->busy_reading_count != 0) {
1714 		// wait for all blocks to be read in
1715 		ConditionVariableEntry entry;
1716 		cache->busy_reading_condition.Add(&entry);
1717 		cache->busy_reading_waiters = true;
1718 
1719 		mutex_unlock(&cache->lock);
1720 
1721 		entry.Wait();
1722 
1723 		mutex_lock(&cache->lock);
1724 	}
1725 }
1726 
1727 
1728 /*!	Cache must be locked.
1729 */
1730 static void
1731 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1732 {
1733 	while (block->busy_writing) {
1734 		// wait for all blocks to be written back
1735 		ConditionVariableEntry entry;
1736 		cache->busy_writing_condition.Add(&entry);
1737 		block->busy_writing_waiters = true;
1738 
1739 		mutex_unlock(&cache->lock);
1740 
1741 		entry.Wait();
1742 
1743 		mutex_lock(&cache->lock);
1744 	}
1745 }
1746 
1747 
1748 /*!	Cache must be locked.
1749 */
1750 static void
1751 wait_for_busy_writing_blocks(block_cache* cache)
1752 {
1753 	while (cache->busy_writing_count != 0) {
1754 		// wait for all blocks to be written back
1755 		ConditionVariableEntry entry;
1756 		cache->busy_writing_condition.Add(&entry);
1757 		cache->busy_writing_waiters = true;
1758 
1759 		mutex_unlock(&cache->lock);
1760 
1761 		entry.Wait();
1762 
1763 		mutex_lock(&cache->lock);
1764 	}
1765 }
1766 
1767 
1768 /*!	Removes a reference from the specified \a block. If this was the last
1769 	reference, the block is moved into the unused list.
1770 	In low memory situations, it will also free some blocks from that list,
1771 	but not necessarily the \a block it just released.
1772 */
1773 static void
1774 put_cached_block(block_cache* cache, cached_block* block)
1775 {
1776 #if BLOCK_CACHE_DEBUG_CHANGED
1777 	if (!block->is_dirty && block->compare != NULL
1778 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1779 		dprintf("new block:\n");
1780 		dump_block((const char*)block->current_data, 256, "  ");
1781 		dprintf("unchanged block:\n");
1782 		dump_block((const char*)block->compare, 256, "  ");
1783 		BlockWriter::WriteBlock(cache, block);
1784 		panic("block_cache: supposed to be clean block was changed!\n");
1785 
1786 		cache->Free(block->compare);
1787 		block->compare = NULL;
1788 	}
1789 #endif
1790 	TB(Put(cache, block));
1791 
1792 	if (block->ref_count < 1) {
1793 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1794 		return;
1795 	}
1796 
1797 	if (--block->ref_count == 0
1798 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1799 		// This block is not used anymore, and not part of any transaction
1800 		block->is_writing = false;
1801 
1802 		if (block->discard) {
1803 			cache->RemoveBlock(block);
1804 		} else {
1805 			// put this block in the list of unused blocks
1806 			ASSERT(!block->unused);
1807 			block->unused = true;
1808 
1809 			ASSERT(block->original_data == NULL && block->parent_data == NULL);
1810 			cache->unused_blocks.Add(block);
1811 			cache->unused_block_count++;
1812 		}
1813 	}
1814 }
1815 
1816 
1817 static void
1818 put_cached_block(block_cache* cache, off_t blockNumber)
1819 {
1820 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1821 		panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1822 			blockNumber, cache->max_blocks - 1);
1823 	}
1824 
1825 	cached_block* block = (cached_block*)hash_lookup(cache->hash, &blockNumber);
1826 	if (block != NULL)
1827 		put_cached_block(cache, block);
1828 	else {
1829 		TB(Error(cache, blockNumber, "put unknown"));
1830 	}
1831 }
1832 
1833 
1834 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1835 	there, or reads it from the disk.
1836 	You need to have the cache locked when calling this function.
1837 
1838 	\param _allocated tells you whether or not a new block has been allocated
1839 		to satisfy your request.
1840 	\param readBlock if \c false, the block will not be read in case it was
1841 		not already in the cache. The block you retrieve may contain random
1842 		data. If \c true, the cache will be temporarily unlocked while the
1843 		block is read in.
1844 */
1845 static cached_block*
1846 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1847 	bool readBlock = true)
1848 {
1849 	ASSERT_LOCKED_MUTEX(&cache->lock);
1850 
1851 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1852 		panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1853 			blockNumber, cache->max_blocks - 1);
1854 		return NULL;
1855 	}
1856 
1857 retry:
1858 	cached_block* block = (cached_block*)hash_lookup(cache->hash,
1859 		&blockNumber);
1860 	*_allocated = false;
1861 
1862 	if (block == NULL) {
1863 		// put block into cache
1864 		block = cache->NewBlock(blockNumber);
1865 		if (block == NULL)
1866 			return NULL;
1867 
1868 		hash_insert_grow(cache->hash, block);
1869 		*_allocated = true;
1870 	} else if (block->busy_reading) {
1871 		// The block is currently busy_reading - wait and try again later
1872 		wait_for_busy_reading_block(cache, block);
1873 		goto retry;
1874 	}
1875 
1876 	if (block->unused) {
1877 		//TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
1878 		block->unused = false;
1879 		cache->unused_blocks.Remove(block);
1880 		cache->unused_block_count--;
1881 	}
1882 
1883 	if (*_allocated && readBlock) {
1884 		// read block into cache
1885 		int32 blockSize = cache->block_size;
1886 
1887 		mark_block_busy_reading(cache, block);
1888 		mutex_unlock(&cache->lock);
1889 
1890 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1891 			block->current_data, blockSize);
1892 
1893 		mutex_lock(&cache->lock);
1894 		if (bytesRead < blockSize) {
1895 			cache->RemoveBlock(block);
1896 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1897 
1898 			FATAL(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n",
1899 				blockNumber, bytesRead, strerror(errno)));
1900 			return NULL;
1901 		}
1902 		TB(Read(cache, block));
1903 
1904 		mark_block_unbusy_reading(cache, block);
1905 	}
1906 
1907 	block->ref_count++;
1908 	block->last_accessed = system_time() / 1000000L;
1909 
1910 	return block;
1911 }
1912 
1913 
1914 /*!	Returns the writable block data for the requested blockNumber.
1915 	If \a cleared is true, the block is not read from disk; an empty block
1916 	is returned.
1917 
1918 	This is the only method to insert a block into a transaction. It makes
1919 	sure that the previous block contents are preserved in that case.
1920 */
1921 static void*
1922 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1923 	off_t length, int32 transactionID, bool cleared)
1924 {
1925 	TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
1926 		blockNumber, transactionID));
1927 
1928 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1929 		panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1930 			blockNumber, cache->max_blocks - 1);
1931 	}
1932 
1933 	bool allocated;
1934 	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1935 		!cleared);
1936 	if (block == NULL)
1937 		return NULL;
1938 
1939 	if (block->busy_writing)
1940 		wait_for_busy_writing_block(cache, block);
1941 
1942 	block->discard = false;
1943 
1944 	// if there is no transaction support, we just return the current block
1945 	if (transactionID == -1) {
1946 		if (cleared) {
1947 			mark_block_busy_reading(cache, block);
1948 			mutex_unlock(&cache->lock);
1949 
1950 			memset(block->current_data, 0, cache->block_size);
1951 
1952 			mutex_lock(&cache->lock);
1953 			mark_block_unbusy_reading(cache, block);
1954 		}
1955 
1956 		block->is_writing = true;
1957 
1958 		if (!block->is_dirty) {
1959 			cache->num_dirty_blocks++;
1960 			block->is_dirty = true;
1961 				// mark the block as dirty
1962 		}
1963 
1964 		TB(Get(cache, block));
1965 		return block->current_data;
1966 	}
1967 
1968 	cache_transaction* transaction = block->transaction;
1969 
1970 	if (transaction != NULL && transaction->id != transactionID) {
1971 		// TODO: we have to wait here until the other transaction is done.
1972 		//	Maybe we should even panic, since we can't prevent any deadlocks.
1973 		panic("get_writable_cached_block(): asked to get busy writable block "
1974 			"(transaction %" B_PRId32 ")\n", block->transaction->id);
1975 		put_cached_block(cache, block);
1976 		return NULL;
1977 	}
1978 	if (transaction == NULL && transactionID != -1) {
1979 		// get new transaction
1980 		transaction = lookup_transaction(cache, transactionID);
1981 		if (transaction == NULL) {
1982 			panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
1983 				transactionID);
1984 			put_cached_block(cache, block);
1985 			return NULL;
1986 		}
1987 		if (!transaction->open) {
1988 			panic("get_writable_cached_block(): transaction already done!\n");
1989 			put_cached_block(cache, block);
1990 			return NULL;
1991 		}
1992 
1993 		block->transaction = transaction;
1994 
1995 		// attach the block to the transaction block list
1996 		block->transaction_next = transaction->first_block;
1997 		transaction->first_block = block;
1998 		transaction->num_blocks++;
1999 	}
2000 	if (transaction != NULL)
2001 		transaction->last_used = system_time();
2002 
2003 	bool wasUnchanged = block->original_data == NULL
2004 		|| block->previous_transaction != NULL;
2005 
2006 	if (!(allocated && cleared) && block->original_data == NULL) {
2007 		// we already have data, so we need to preserve it
2008 		block->original_data = cache->Allocate();
2009 		if (block->original_data == NULL) {
2010 			TB(Error(cache, blockNumber, "allocate original failed"));
2011 			FATAL(("could not allocate original_data\n"));
2012 			put_cached_block(cache, block);
2013 			return NULL;
2014 		}
2015 
2016 		mark_block_busy_reading(cache, block);
2017 		mutex_unlock(&cache->lock);
2018 
2019 		memcpy(block->original_data, block->current_data, cache->block_size);
2020 
2021 		mutex_lock(&cache->lock);
2022 		mark_block_unbusy_reading(cache, block);
2023 	}
2024 	if (block->parent_data == block->current_data) {
2025 		// remember any previous contents for the parent transaction
2026 		block->parent_data = cache->Allocate();
2027 		if (block->parent_data == NULL) {
2028 			// TODO: maybe we should just continue the current transaction in
2029 			// this case...
2030 			TB(Error(cache, blockNumber, "allocate parent failed"));
2031 			FATAL(("could not allocate parent\n"));
2032 			put_cached_block(cache, block);
2033 			return NULL;
2034 		}
2035 
2036 		mark_block_busy_reading(cache, block);
2037 		mutex_unlock(&cache->lock);
2038 
2039 		memcpy(block->parent_data, block->current_data, cache->block_size);
2040 
2041 		mutex_lock(&cache->lock);
2042 		mark_block_unbusy_reading(cache, block);
2043 
2044 		transaction->sub_num_blocks++;
2045 	} else if (transaction != NULL && transaction->has_sub_transaction
2046 		&& block->parent_data == NULL && wasUnchanged)
2047 		transaction->sub_num_blocks++;
2048 
2049 	if (cleared) {
2050 		mark_block_busy_reading(cache, block);
2051 		mutex_unlock(&cache->lock);
2052 
2053 		memset(block->current_data, 0, cache->block_size);
2054 
2055 		mutex_lock(&cache->lock);
2056 		mark_block_unbusy_reading(cache, block);
2057 	}
2058 
2059 	block->is_dirty = true;
2060 	TB(Get(cache, block));
2061 	TB2(BlockData(cache, block, "get writable"));
2062 
2063 	return block->current_data;
2064 }
2065 
2066 
2067 #if DEBUG_BLOCK_CACHE
2068 
2069 
2070 static void
2071 dump_block(cached_block* block)
2072 {
2073 	kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2074 		" %c%c%c%c%c%c %08lx %08lx\n",
2075 		(addr_t)block, block->block_number,
2076 		(addr_t)block->current_data, (addr_t)block->original_data,
2077 		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2078 		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2079 		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2080 		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2081 		(addr_t)block->transaction,
2082 		(addr_t)block->previous_transaction);
2083 }
2084 
2085 
2086 static void
2087 dump_block_long(cached_block* block)
2088 {
2089 	kprintf("BLOCK %p\n", block);
2090 	kprintf(" current data:  %p\n", block->current_data);
2091 	kprintf(" original data: %p\n", block->original_data);
2092 	kprintf(" parent data:   %p\n", block->parent_data);
2093 #if BLOCK_CACHE_DEBUG_CHANGED
2094 	kprintf(" compare data:  %p\n", block->compare);
2095 #endif
2096 	kprintf(" ref_count:     %" B_PRId32 "\n", block->ref_count);
2097 	kprintf(" accessed:      %" B_PRId32 "\n", block->LastAccess());
2098 	kprintf(" flags:        ");
2099 	if (block->busy_reading)
2100 		kprintf(" busy_reading");
2101 	if (block->busy_writing)
2102 		kprintf(" busy_writing");
2103 	if (block->is_writing)
2104 		kprintf(" is-writing");
2105 	if (block->is_dirty)
2106 		kprintf(" is-dirty");
2107 	if (block->unused)
2108 		kprintf(" unused");
2109 	if (block->discard)
2110 		kprintf(" discard");
2111 	kprintf("\n");
2112 	if (block->transaction != NULL) {
2113 		kprintf(" transaction:   %p (%" B_PRId32 ")\n", block->transaction,
2114 			block->transaction->id);
2115 		if (block->transaction_next != NULL) {
2116 			kprintf(" next in transaction: %" B_PRIdOFF "\n",
2117 				block->transaction_next->block_number);
2118 		}
2119 	}
2120 	if (block->previous_transaction != NULL) {
2121 		kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2122 			block->previous_transaction,
2123 			block->previous_transaction->id);
2124 	}
2125 
2126 	set_debug_variable("_current", (addr_t)block->current_data);
2127 	set_debug_variable("_original", (addr_t)block->original_data);
2128 	set_debug_variable("_parent", (addr_t)block->parent_data);
2129 }
2130 
2131 
2132 static int
2133 dump_cached_block(int argc, char** argv)
2134 {
2135 	if (argc != 2) {
2136 		kprintf("usage: %s <block-address>\n", argv[0]);
2137 		return 0;
2138 	}
2139 
2140 	dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2141 	return 0;
2142 }
2143 
2144 
2145 static int
2146 dump_cache(int argc, char** argv)
2147 {
2148 	bool showTransactions = false;
2149 	bool showBlocks = false;
2150 	int32 i = 1;
2151 	while (argv[i] != NULL && argv[i][0] == '-') {
2152 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
2153 			switch (arg[0]) {
2154 				case 'b':
2155 					showBlocks = true;
2156 					break;
2157 				case 't':
2158 					showTransactions = true;
2159 					break;
2160 				default:
2161 					print_debugger_command_usage(argv[0]);
2162 					return 0;
2163 			}
2164 		}
2165 		i++;
2166 	}
2167 
2168 	if (i >= argc) {
2169 		print_debugger_command_usage(argv[0]);
2170 		return 0;
2171 	}
2172 
2173 	block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2174 	if (cache == NULL) {
2175 		kprintf("invalid cache address\n");
2176 		return 0;
2177 	}
2178 
2179 	off_t blockNumber = -1;
2180 	if (i + 1 < argc) {
2181 		blockNumber = parse_expression(argv[i + 1]);
2182 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
2183 			&blockNumber);
2184 		if (block != NULL)
2185 			dump_block_long(block);
2186 		else
2187 			kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2188 		return 0;
2189 	}
2190 
2191 	kprintf("BLOCK CACHE: %p\n", cache);
2192 
2193 	kprintf(" fd:           %d\n", cache->fd);
2194 	kprintf(" max_blocks:   %" B_PRIdOFF "\n", cache->max_blocks);
2195 	kprintf(" block_size:   %zu\n", cache->block_size);
2196 	kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2197 	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2198 	kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2199 		cache->busy_reading_waiters ? "has" : "no");
2200 	kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2201 		cache->busy_writing_waiters ? "has" : "no");
2202 
2203 	if (!cache->pending_notifications.IsEmpty()) {
2204 		kprintf(" pending notifications:\n");
2205 
2206 		NotificationList::Iterator iterator
2207 			= cache->pending_notifications.GetIterator();
2208 		while (iterator.HasNext()) {
2209 			cache_notification* notification = iterator.Next();
2210 
2211 			kprintf("  %p %5" B_PRIx32 " %p - %p\n", notification,
2212 				notification->events_pending, notification->hook,
2213 				notification->data);
2214 		}
2215 	}
2216 
2217 	if (showTransactions) {
2218 		kprintf(" transactions:\n");
2219 		kprintf("address       id state  blocks  main   sub\n");
2220 
2221 		hash_iterator iterator;
2222 		hash_open(cache->transaction_hash, &iterator);
2223 
2224 		cache_transaction* transaction;
2225 		while ((transaction = (cache_transaction*)hash_next(
2226 				cache->transaction_hash, &iterator)) != NULL) {
2227 			kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" B_PRId32 "\n",
2228 				transaction, transaction->id, transaction->open ? "open" : "closed",
2229 				transaction->num_blocks, transaction->main_num_blocks,
2230 				transaction->sub_num_blocks);
2231 		}
2232 	}
2233 
2234 	if (showBlocks) {
2235 		kprintf(" blocks:\n");
2236 		kprintf("address  block no. current  original parent    refs access "
2237 			"flags transact prev. trans\n");
2238 	}
2239 
2240 	uint32 referenced = 0;
2241 	uint32 count = 0;
2242 	uint32 dirty = 0;
2243 	uint32 discarded = 0;
2244 	hash_iterator iterator;
2245 	hash_open(cache->hash, &iterator);
2246 	cached_block* block;
2247 	while ((block = (cached_block*)hash_next(cache->hash, &iterator)) != NULL) {
2248 		if (showBlocks)
2249 			dump_block(block);
2250 
2251 		if (block->is_dirty)
2252 			dirty++;
2253 		if (block->discard)
2254 			discarded++;
2255 		if (block->ref_count)
2256 			referenced++;
2257 		count++;
2258 	}
2259 
2260 	kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 " discarded"
2261 		", %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 " in unused.\n",
2262 		count, dirty, discarded, referenced, cache->busy_reading_count,
2263 		cache->unused_block_count);
2264 
2265 	hash_close(cache->hash, &iterator, false);
2266 	return 0;
2267 }
2268 
2269 
2270 static int
2271 dump_transaction(int argc, char** argv)
2272 {
2273 	bool showBlocks = false;
2274 	int i = 1;
2275 	if (argc > 1 && !strcmp(argv[1], "-b")) {
2276 		showBlocks = true;
2277 		i++;
2278 	}
2279 
2280 	if (argc - i < 1 || argc - i > 2) {
2281 		print_debugger_command_usage(argv[0]);
2282 		return 0;
2283 	}
2284 
2285 	cache_transaction* transaction = NULL;
2286 
2287 	if (argc - i == 1) {
2288 		transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2289 	} else {
2290 		block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2291 		int32 id = parse_expression(argv[i + 1]);
2292 		transaction = lookup_transaction(cache, id);
2293 		if (transaction == NULL) {
2294 			kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2295 			return 0;
2296 		}
2297 	}
2298 
2299 	kprintf("TRANSACTION %p\n", transaction);
2300 
2301 	kprintf(" id:             %" B_PRId32 "\n", transaction->id);
2302 	kprintf(" num block:      %" B_PRId32 "\n", transaction->num_blocks);
2303 	kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2304 	kprintf(" sub num block:  %" B_PRId32 "\n", transaction->sub_num_blocks);
2305 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
2306 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
2307 	kprintf(" idle:           %" B_PRId64 " secs\n",
2308 		(system_time() - transaction->last_used) / 1000000);
2309 
2310 	kprintf(" listeners:\n");
2311 
2312 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2313 	while (iterator.HasNext()) {
2314 		cache_listener* listener = iterator.Next();
2315 
2316 		kprintf("  %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2317 			listener->hook, listener->data);
2318 	}
2319 
2320 	if (!showBlocks)
2321 		return 0;
2322 
2323 	kprintf(" blocks:\n");
2324 	kprintf("address  block no. current  original parent    refs access "
2325 		"flags transact prev. trans\n");
2326 
2327 	cached_block* block = transaction->first_block;
2328 	while (block != NULL) {
2329 		dump_block(block);
2330 		block = block->transaction_next;
2331 	}
2332 
2333 	kprintf("--\n");
2334 
2335 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2336 	while (blockIterator.HasNext()) {
2337 		block = blockIterator.Next();
2338 		dump_block(block);
2339 	}
2340 
2341 	return 0;
2342 }
2343 
2344 
2345 static int
2346 dump_caches(int argc, char** argv)
2347 {
2348 	kprintf("Block caches:\n");
2349 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2350 	while (i.HasNext()) {
2351 		block_cache* cache = i.Next();
2352 		if (cache == (block_cache*)&sMarkCache)
2353 			continue;
2354 
2355 		kprintf("  %p\n", cache);
2356 	}
2357 
2358 	return 0;
2359 }
2360 
2361 
2362 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2363 static int
2364 dump_block_data(int argc, char** argv)
2365 {
2366 	using namespace BlockTracing;
2367 
2368 	// Determine which blocks to show
2369 
2370 	bool printStackTrace = true;
2371 	uint32 which = 0;
2372 	int32 i = 1;
2373 	while (i < argc && argv[i][0] == '-') {
2374 		char* arg = &argv[i][1];
2375 		while (arg[0]) {
2376 			switch (arg[0]) {
2377 				case 'c':
2378 					which |= BlockData::kCurrent;
2379 					break;
2380 				case 'p':
2381 					which |= BlockData::kParent;
2382 					break;
2383 				case 'o':
2384 					which |= BlockData::kOriginal;
2385 					break;
2386 
2387 				default:
2388 					kprintf("invalid block specifier (only o/c/p are "
2389 						"allowed).\n");
2390 					return 0;
2391 			}
2392 			arg++;
2393 		}
2394 
2395 		i++;
2396 	}
2397 	if (which == 0)
2398 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2399 
2400 	if (i == argc) {
2401 		print_debugger_command_usage(argv[0]);
2402 		return 0;
2403 	}
2404 
2405 	// Get the range of blocks to print
2406 
2407 	int64 from = parse_expression(argv[i]);
2408 	int64 to = from;
2409 	if (argc > i + 1)
2410 		to = parse_expression(argv[i + 1]);
2411 	if (to < from)
2412 		to = from;
2413 
2414 	uint32 offset = 0;
2415 	uint32 size = LONG_MAX;
2416 	if (argc > i + 2)
2417 		offset = parse_expression(argv[i + 2]);
2418 	if (argc > i + 3)
2419 		size = parse_expression(argv[i + 3]);
2420 
2421 	TraceEntryIterator iterator;
2422 	iterator.MoveTo(from - 1);
2423 
2424 	static char sBuffer[1024];
2425 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2426 
2427 	while (TraceEntry* entry = iterator.Next()) {
2428 		int32 index = iterator.Index();
2429 		if (index > to)
2430 			break;
2431 
2432 		Action* action = dynamic_cast<Action*>(entry);
2433 		if (action != NULL) {
2434 			out.Clear();
2435 			out.DumpEntry(action);
2436 			continue;
2437 		}
2438 
2439 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
2440 		if (blockData == NULL)
2441 			continue;
2442 
2443 		out.Clear();
2444 
2445 		const char* dump = out.DumpEntry(entry);
2446 		int length = strlen(dump);
2447 		if (length > 0 && dump[length - 1] == '\n')
2448 			length--;
2449 
2450 		kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2451 
2452 		if (printStackTrace) {
2453 			out.Clear();
2454 			entry->DumpStackTrace(out);
2455 			if (out.Size() > 0)
2456 				kputs(out.Buffer());
2457 		}
2458 
2459 		blockData->DumpBlocks(which, offset, size);
2460 	}
2461 
2462 	return 0;
2463 }
2464 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
2465 
2466 
2467 #endif	// DEBUG_BLOCK_CACHE
2468 
2469 
2470 /*!	Traverses through the block_cache list, and returns one cache after the
2471 	other. The cache returned is automatically locked when you get it, and
2472 	unlocked with the next call to this function. Ignores caches that are in
2473 	deletion state.
2474 	Returns \c NULL when the end of the list is reached.
2475 */
2476 static block_cache*
2477 get_next_locked_block_cache(block_cache* last)
2478 {
2479 	MutexLocker _(sCachesLock);
2480 
2481 	block_cache* cache;
2482 	if (last != NULL) {
2483 		mutex_unlock(&last->lock);
2484 
2485 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
2486 		sCaches.Remove((block_cache*)&sMarkCache);
2487 	} else
2488 		cache = sCaches.Head();
2489 
2490 	if (cache != NULL) {
2491 		mutex_lock(&cache->lock);
2492 		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2493 	}
2494 
2495 	return cache;
2496 }
2497 
2498 
2499 /*!	Background thread that continuously checks for pending notifications of
2500 	all caches.
2501 	Every two seconds, it will also write back up to 64 blocks per cache.
2502 */
2503 static status_t
2504 block_notifier_and_writer(void* /*data*/)
2505 {
2506 	const bigtime_t kTimeout = 2000000LL;
2507 	bigtime_t timeout = kTimeout;
2508 
2509 	while (true) {
2510 		bigtime_t start = system_time();
2511 
2512 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
2513 			B_RELATIVE_TIMEOUT, timeout);
2514 		if (status == B_OK) {
2515 			flush_pending_notifications();
2516 			timeout -= system_time() - start;
2517 			continue;
2518 		}
2519 
2520 		// write 64 blocks of each block_cache every two seconds
2521 		// TODO: change this once we have an I/O scheduler
2522 		timeout = kTimeout;
2523 		size_t usedMemory;
2524 		object_cache_get_usage(sBlockCache, &usedMemory);
2525 
2526 		block_cache* cache = NULL;
2527 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2528 			BlockWriter writer(cache, 64);
2529 
2530 			size_t cacheUsedMemory;
2531 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2532 			usedMemory += cacheUsedMemory;
2533 
2534 			if (cache->num_dirty_blocks) {
2535 				// This cache is not using transactions, we'll scan the blocks
2536 				// directly
2537 				hash_iterator iterator;
2538 				hash_open(cache->hash, &iterator);
2539 
2540 				cached_block* block;
2541 				while ((block = (cached_block*)hash_next(cache->hash, &iterator))
2542 						!= NULL) {
2543 					if (block->CanBeWritten() && !writer.Add(block))
2544 						break;
2545 				}
2546 
2547 				hash_close(cache->hash, &iterator, false);
2548 			} else {
2549 				hash_iterator iterator;
2550 				hash_open(cache->transaction_hash, &iterator);
2551 
2552 				cache_transaction* transaction;
2553 				while ((transaction = (cache_transaction*)hash_next(
2554 						cache->transaction_hash, &iterator)) != NULL) {
2555 					if (transaction->open) {
2556 						if (system_time() > transaction->last_used
2557 								+ kTransactionIdleTime) {
2558 							// Transaction is open but idle
2559 							notify_transaction_listeners(cache, transaction,
2560 								TRANSACTION_IDLE);
2561 						}
2562 						continue;
2563 					}
2564 
2565 					bool hasLeftOvers;
2566 						// we ignore this one
2567 					if (!writer.Add(transaction, &iterator, hasLeftOvers))
2568 						break;
2569 				}
2570 
2571 				hash_close(cache->transaction_hash, &iterator, false);
2572 			}
2573 
2574 			writer.Write();
2575 
2576 			if ((block_cache_used_memory() / B_PAGE_SIZE)
2577 					> vm_page_num_pages() / 2) {
2578 				// Try to reduce memory usage to half of the available
2579 				// RAM at maximum
2580 				cache->RemoveUnusedBlocks(1000, 10);
2581 			}
2582 		}
2583 
2584 		MutexLocker _(sCachesMemoryUseLock);
2585 		sUsedMemory = usedMemory;
2586 	}
2587 
2588 	// never can get here
2589 	return B_OK;
2590 }
2591 
2592 
2593 /*!	Notify function for wait_for_notifications(). */
2594 static void
2595 notify_sync(int32 transactionID, int32 event, void* _cache)
2596 {
2597 	block_cache* cache = (block_cache*)_cache;
2598 
2599 	cache->condition_variable.NotifyOne();
2600 }
2601 
2602 
2603 /*!	Must be called with the sCachesLock held. */
2604 static bool
2605 is_valid_cache(block_cache* cache)
2606 {
2607 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2608 
2609 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2610 	while (iterator.HasNext()) {
2611 		if (cache == iterator.Next())
2612 			return true;
2613 	}
2614 
2615 	return false;
2616 }
2617 
2618 
2619 /*!	Waits until all pending notifications are carried out.
2620 	Safe to be called from the block writer/notifier thread.
2621 	You must not hold the \a cache lock when calling this function.
2622 */
2623 static void
2624 wait_for_notifications(block_cache* cache)
2625 {
2626 	MutexLocker locker(sCachesLock);
2627 
2628 	if (find_thread(NULL) == sNotifierWriterThread) {
2629 		// We're the notifier thread, don't wait, but flush all pending
2630 		// notifications directly.
2631 		if (is_valid_cache(cache))
2632 			flush_pending_notifications(cache);
2633 		return;
2634 	}
2635 
2636 	// add sync notification
2637 	cache_notification notification;
2638 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2639 		cache);
2640 
2641 	ConditionVariableEntry entry;
2642 	cache->condition_variable.Add(&entry);
2643 
2644 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2645 	locker.Unlock();
2646 
2647 	// wait for notification hook to be called
2648 	entry.Wait();
2649 }
2650 
2651 
2652 status_t
2653 block_cache_init(void)
2654 {
2655 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2656 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2657 	if (sBlockCache == NULL)
2658 		return B_NO_MEMORY;
2659 
2660 	new (&sCaches) DoublyLinkedList<block_cache>;
2661 		// manually call constructor
2662 
2663 	sEventSemaphore = create_sem(0, "block cache event");
2664 	if (sEventSemaphore < B_OK)
2665 		return sEventSemaphore;
2666 
2667 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2668 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2669 	if (sNotifierWriterThread >= B_OK)
2670 		resume_thread(sNotifierWriterThread);
2671 
2672 #if DEBUG_BLOCK_CACHE
2673 	add_debugger_command_etc("block_caches", &dump_caches,
2674 		"dumps all block caches", "\n", 0);
2675 	add_debugger_command_etc("block_cache", &dump_cache,
2676 		"dumps a specific block cache",
2677 		"[-bt] <cache-address> [block-number]\n"
2678 		"  -t lists the transactions\n"
2679 		"  -b lists all blocks\n", 0);
2680 	add_debugger_command("cached_block", &dump_cached_block,
2681 		"dumps the specified cached block");
2682 	add_debugger_command_etc("transaction", &dump_transaction,
2683 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2684 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2685 		"  -b lists all blocks that are part of this transaction\n", 0);
2686 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2687 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2688 		"dumps the data blocks logged for the actions",
2689 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2690 		"If no data specifier is used, all blocks are shown by default.\n"
2691 		" -c       the current data is shown, if available.\n"
2692 		" -p       the parent data is shown, if available.\n"
2693 		" -o       the original data is shown, if available.\n"
2694 		" <from>   first index of tracing entries to show.\n"
2695 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2696 		" <offset> the offset of the block data.\n"
2697 		" <from>   the size of the block data that is dumped\n", 0);
2698 #	endif
2699 #endif	// DEBUG_BLOCK_CACHE
2700 
2701 	return B_OK;
2702 }
2703 
2704 
2705 size_t
2706 block_cache_used_memory(void)
2707 {
2708 	MutexLocker _(sCachesMemoryUseLock);
2709 	return sUsedMemory;
2710 }
2711 
2712 
2713 //	#pragma mark - public transaction API
2714 
2715 
2716 int32
2717 cache_start_transaction(void* _cache)
2718 {
2719 	block_cache* cache = (block_cache*)_cache;
2720 	TransactionLocker locker(cache);
2721 
2722 	if (cache->last_transaction && cache->last_transaction->open) {
2723 		panic("last transaction (%" B_PRId32 ") still open!\n",
2724 			cache->last_transaction->id);
2725 	}
2726 
2727 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2728 	if (transaction == NULL)
2729 		return B_NO_MEMORY;
2730 
2731 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2732 	cache->last_transaction = transaction;
2733 
2734 	TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
2735 	T(Action("start", cache, transaction));
2736 
2737 	hash_insert_grow(cache->transaction_hash, transaction);
2738 
2739 	return transaction->id;
2740 }
2741 
2742 
2743 status_t
2744 cache_sync_transaction(void* _cache, int32 id)
2745 {
2746 	block_cache* cache = (block_cache*)_cache;
2747 	bool hadBusy;
2748 
2749 	TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
2750 
2751 	do {
2752 		TransactionLocker locker(cache);
2753 		hadBusy = false;
2754 
2755 		BlockWriter writer(cache);
2756 		hash_iterator iterator;
2757 		hash_open(cache->transaction_hash, &iterator);
2758 
2759 		cache_transaction* transaction;
2760 		while ((transaction = (cache_transaction*)hash_next(
2761 				cache->transaction_hash, &iterator)) != NULL) {
2762 			// close all earlier transactions which haven't been closed yet
2763 
2764 			if (transaction->busy_writing_count != 0) {
2765 				hadBusy = true;
2766 				continue;
2767 			}
2768 			if (transaction->id <= id && !transaction->open) {
2769 				// write back all of their remaining dirty blocks
2770 				T(Action("sync", cache, transaction));
2771 
2772 				bool hasLeftOvers;
2773 				writer.Add(transaction, &iterator, hasLeftOvers);
2774 
2775 				if (hasLeftOvers) {
2776 					// This transaction contains blocks that a previous
2777 					// transaction is trying to write back in this write run
2778 					hadBusy = true;
2779 				}
2780 			}
2781 		}
2782 
2783 		hash_close(cache->transaction_hash, &iterator, false);
2784 
2785 		status_t status = writer.Write();
2786 		if (status != B_OK)
2787 			return status;
2788 	} while (hadBusy);
2789 
2790 	wait_for_notifications(cache);
2791 		// make sure that all pending TRANSACTION_WRITTEN notifications
2792 		// are handled after we return
2793 	return B_OK;
2794 }
2795 
2796 
2797 status_t
2798 cache_end_transaction(void* _cache, int32 id,
2799 	transaction_notification_hook hook, void* data)
2800 {
2801 	block_cache* cache = (block_cache*)_cache;
2802 	TransactionLocker locker(cache);
2803 
2804 	TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
2805 
2806 	cache_transaction* transaction = lookup_transaction(cache, id);
2807 	if (transaction == NULL) {
2808 		panic("cache_end_transaction(): invalid transaction ID\n");
2809 		return B_BAD_VALUE;
2810 	}
2811 
2812 	// Write back all pending transaction blocks
2813 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2814 	if (status != B_OK)
2815 		return status;
2816 
2817 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2818 
2819 	if (hook != NULL
2820 		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2821 			hook, data) != B_OK) {
2822 		return B_NO_MEMORY;
2823 	}
2824 
2825 	T(Action("end", cache, transaction));
2826 
2827 	// iterate through all blocks and free the unchanged original contents
2828 
2829 	cached_block* next;
2830 	for (cached_block* block = transaction->first_block; block != NULL;
2831 			block = next) {
2832 		next = block->transaction_next;
2833 		ASSERT(block->previous_transaction == NULL);
2834 
2835 		if (block->discard) {
2836 			// This block has been discarded in the transaction
2837 			cache->DiscardBlock(block);
2838 			transaction->num_blocks--;
2839 			continue;
2840 		}
2841 
2842 		if (block->original_data != NULL) {
2843 			cache->Free(block->original_data);
2844 			block->original_data = NULL;
2845 		}
2846 		if (block->parent_data != NULL) {
2847 			ASSERT(transaction->has_sub_transaction);
2848 			cache->FreeBlockParentData(block);
2849 		}
2850 
2851 		// move the block to the previous transaction list
2852 		transaction->blocks.Add(block);
2853 
2854 		block->previous_transaction = transaction;
2855 		block->transaction_next = NULL;
2856 		block->transaction = NULL;
2857 	}
2858 
2859 	transaction->open = false;
2860 	return B_OK;
2861 }
2862 
2863 
2864 status_t
2865 cache_abort_transaction(void* _cache, int32 id)
2866 {
2867 	block_cache* cache = (block_cache*)_cache;
2868 	TransactionLocker locker(cache);
2869 
2870 	TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
2871 
2872 	cache_transaction* transaction = lookup_transaction(cache, id);
2873 	if (transaction == NULL) {
2874 		panic("cache_abort_transaction(): invalid transaction ID\n");
2875 		return B_BAD_VALUE;
2876 	}
2877 
2878 	T(Abort(cache, transaction));
2879 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2880 
2881 	// iterate through all blocks and restore their original contents
2882 
2883 	cached_block* block = transaction->first_block;
2884 	cached_block* next;
2885 	for (; block != NULL; block = next) {
2886 		next = block->transaction_next;
2887 
2888 		if (block->original_data != NULL) {
2889 			TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
2890 				"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
2891 			memcpy(block->current_data, block->original_data,
2892 				cache->block_size);
2893 			cache->Free(block->original_data);
2894 			block->original_data = NULL;
2895 		}
2896 		if (transaction->has_sub_transaction && block->parent_data != NULL)
2897 			cache->FreeBlockParentData(block);
2898 
2899 		block->transaction_next = NULL;
2900 		block->transaction = NULL;
2901 		block->discard = false;
2902 		if (block->previous_transaction == NULL)
2903 			block->is_dirty = false;
2904 	}
2905 
2906 	hash_remove(cache->transaction_hash, transaction);
2907 	delete_transaction(cache, transaction);
2908 	return B_OK;
2909 }
2910 
2911 
2912 /*!	Acknowledges the current parent transaction, and starts a new transaction
2913 	from its sub transaction.
2914 	The new transaction also gets a new transaction ID.
2915 */
2916 int32
2917 cache_detach_sub_transaction(void* _cache, int32 id,
2918 	transaction_notification_hook hook, void* data)
2919 {
2920 	block_cache* cache = (block_cache*)_cache;
2921 	TransactionLocker locker(cache);
2922 
2923 	TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
2924 
2925 	cache_transaction* transaction = lookup_transaction(cache, id);
2926 	if (transaction == NULL) {
2927 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2928 		return B_BAD_VALUE;
2929 	}
2930 	if (!transaction->has_sub_transaction)
2931 		return B_BAD_VALUE;
2932 
2933 	// iterate through all blocks and free the unchanged original contents
2934 
2935 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2936 	if (status != B_OK)
2937 		return status;
2938 
2939 	// create a new transaction for the sub transaction
2940 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
2941 	if (newTransaction == NULL)
2942 		return B_NO_MEMORY;
2943 
2944 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2945 	T(Detach(cache, transaction, newTransaction));
2946 
2947 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2948 
2949 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2950 			data) != B_OK) {
2951 		delete newTransaction;
2952 		return B_NO_MEMORY;
2953 	}
2954 
2955 	cached_block* last = NULL;
2956 	cached_block* next;
2957 	for (cached_block* block = transaction->first_block; block != NULL;
2958 			block = next) {
2959 		next = block->transaction_next;
2960 		ASSERT(block->previous_transaction == NULL);
2961 
2962 		if (block->discard) {
2963 			cache->DiscardBlock(block);
2964 			transaction->main_num_blocks--;
2965 			continue;
2966 		}
2967 
2968 		if (block->parent_data != NULL) {
2969 			// The block changed in the parent - free the original data, since
2970 			// they will be replaced by what is in current.
2971 			ASSERT(block->original_data != NULL);
2972 			cache->Free(block->original_data);
2973 
2974 			if (block->parent_data != block->current_data) {
2975 				// The block had been changed in both transactions
2976 				block->original_data = block->parent_data;
2977 			} else {
2978 				// The block has only been changed in the parent
2979 				block->original_data = NULL;
2980 			}
2981 
2982 			// move the block to the previous transaction list
2983 			transaction->blocks.Add(block);
2984 			block->previous_transaction = transaction;
2985 			block->parent_data = NULL;
2986 		}
2987 
2988 		if (block->original_data != NULL) {
2989 			// This block had been changed in the current sub transaction,
2990 			// we need to move this block over to the new transaction.
2991 			ASSERT(block->parent_data == NULL);
2992 
2993 			if (last == NULL)
2994 				newTransaction->first_block = block;
2995 			else
2996 				last->transaction_next = block;
2997 
2998 			block->transaction = newTransaction;
2999 			last = block;
3000 		} else
3001 			block->transaction = NULL;
3002 
3003 		block->transaction_next = NULL;
3004 	}
3005 
3006 	newTransaction->num_blocks = transaction->sub_num_blocks;
3007 
3008 	transaction->open = false;
3009 	transaction->has_sub_transaction = false;
3010 	transaction->num_blocks = transaction->main_num_blocks;
3011 	transaction->sub_num_blocks = 0;
3012 
3013 	hash_insert_grow(cache->transaction_hash, newTransaction);
3014 	cache->last_transaction = newTransaction;
3015 
3016 	return newTransaction->id;
3017 }
3018 
3019 
3020 status_t
3021 cache_abort_sub_transaction(void* _cache, int32 id)
3022 {
3023 	block_cache* cache = (block_cache*)_cache;
3024 	TransactionLocker locker(cache);
3025 
3026 	TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3027 
3028 	cache_transaction* transaction = lookup_transaction(cache, id);
3029 	if (transaction == NULL) {
3030 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3031 		return B_BAD_VALUE;
3032 	}
3033 	if (!transaction->has_sub_transaction)
3034 		return B_BAD_VALUE;
3035 
3036 	T(Abort(cache, transaction));
3037 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3038 
3039 	// revert all changes back to the version of the parent
3040 
3041 	cached_block* block = transaction->first_block;
3042 	cached_block* last = NULL;
3043 	cached_block* next;
3044 	for (; block != NULL; block = next) {
3045 		next = block->transaction_next;
3046 
3047 		if (block->parent_data == NULL) {
3048 			// The parent transaction didn't change the block, but the sub
3049 			// transaction did - we need to revert to the original data.
3050 			// The block is no longer part of the transaction
3051 			if (block->original_data != NULL) {
3052 				// The block might not have original data if was empty
3053 				memcpy(block->current_data, block->original_data,
3054 					cache->block_size);
3055 			}
3056 
3057 			if (last != NULL)
3058 				last->transaction_next = next;
3059 			else
3060 				transaction->first_block = next;
3061 
3062 			block->transaction_next = NULL;
3063 			block->transaction = NULL;
3064 			transaction->num_blocks--;
3065 
3066 			if (block->previous_transaction == NULL) {
3067 				cache->Free(block->original_data);
3068 				block->original_data = NULL;
3069 				block->is_dirty = false;
3070 
3071 				if (block->ref_count == 0) {
3072 					// Move the block into the unused list if possible
3073 					block->unused = true;
3074 					cache->unused_blocks.Add(block);
3075 					cache->unused_block_count++;
3076 				}
3077 			}
3078 		} else {
3079 			if (block->parent_data != block->current_data) {
3080 				// The block has been changed and must be restored - the block
3081 				// is still dirty and part of the transaction
3082 				TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3083 					"restored contents of block %" B_PRIdOFF "\n",
3084 					transaction->id, block->block_number));
3085 				memcpy(block->current_data, block->parent_data,
3086 					cache->block_size);
3087 				cache->Free(block->parent_data);
3088 				// The block stays dirty
3089 			}
3090 			block->parent_data = NULL;
3091 			last = block;
3092 		}
3093 
3094 		block->discard = false;
3095 	}
3096 
3097 	// all subsequent changes will go into the main transaction
3098 	transaction->has_sub_transaction = false;
3099 	transaction->sub_num_blocks = 0;
3100 
3101 	return B_OK;
3102 }
3103 
3104 
3105 status_t
3106 cache_start_sub_transaction(void* _cache, int32 id)
3107 {
3108 	block_cache* cache = (block_cache*)_cache;
3109 	TransactionLocker locker(cache);
3110 
3111 	TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3112 
3113 	cache_transaction* transaction = lookup_transaction(cache, id);
3114 	if (transaction == NULL) {
3115 		panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3116 			id);
3117 		return B_BAD_VALUE;
3118 	}
3119 
3120 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3121 
3122 	// move all changed blocks up to the parent
3123 
3124 	cached_block* block = transaction->first_block;
3125 	cached_block* next;
3126 	for (; block != NULL; block = next) {
3127 		next = block->transaction_next;
3128 
3129 		if (block->parent_data != NULL) {
3130 			// There already is an older sub transaction - we acknowledge
3131 			// its changes and move its blocks up to the parent
3132 			ASSERT(transaction->has_sub_transaction);
3133 			cache->FreeBlockParentData(block);
3134 		}
3135 		if (block->discard) {
3136 			// This block has been discarded in the parent transaction.
3137 			// Just throw away any changes made in this transaction, so that
3138 			// it can still be reverted to its original contents if needed
3139 			ASSERT(block->previous_transaction == NULL);
3140 			if (block->original_data != NULL) {
3141 				memcpy(block->current_data, block->original_data,
3142 					cache->block_size);
3143 				block->original_data = NULL;
3144 			}
3145 			continue;
3146 		}
3147 
3148 		// we "allocate" the parent data lazily, that means, we don't copy
3149 		// the data (and allocate memory for it) until we need to
3150 		block->parent_data = block->current_data;
3151 	}
3152 
3153 	// all subsequent changes will go into the sub transaction
3154 	transaction->has_sub_transaction = true;
3155 	transaction->main_num_blocks = transaction->num_blocks;
3156 	transaction->sub_num_blocks = 0;
3157 	T(Action("start-sub", cache, transaction));
3158 
3159 	return B_OK;
3160 }
3161 
3162 
3163 /*!	Adds a transaction listener that gets notified when the transaction
3164 	is ended, aborted, written, or idle as specified by \a events.
3165 	The listener gets automatically removed when the transaction ends.
3166 */
3167 status_t
3168 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3169 	transaction_notification_hook hook, void* data)
3170 {
3171 	block_cache* cache = (block_cache*)_cache;
3172 	TransactionLocker locker(cache);
3173 
3174 	cache_transaction* transaction = lookup_transaction(cache, id);
3175 	if (transaction == NULL)
3176 		return B_BAD_VALUE;
3177 
3178 	return add_transaction_listener(cache, transaction, events, hook, data);
3179 }
3180 
3181 
3182 status_t
3183 cache_remove_transaction_listener(void* _cache, int32 id,
3184 	transaction_notification_hook hookFunction, void* data)
3185 {
3186 	block_cache* cache = (block_cache*)_cache;
3187 	TransactionLocker locker(cache);
3188 
3189 	cache_transaction* transaction = lookup_transaction(cache, id);
3190 	if (transaction == NULL)
3191 		return B_BAD_VALUE;
3192 
3193 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3194 	while (iterator.HasNext()) {
3195 		cache_listener* listener = iterator.Next();
3196 		if (listener->data == data && listener->hook == hookFunction) {
3197 			iterator.Remove();
3198 
3199 			if (listener->events_pending != 0) {
3200 				MutexLocker _(sNotificationsLock);
3201 				if (listener->events_pending != 0)
3202 					cache->pending_notifications.Remove(listener);
3203 			}
3204 			delete listener;
3205 			return B_OK;
3206 		}
3207 	}
3208 
3209 	return B_ENTRY_NOT_FOUND;
3210 }
3211 
3212 
3213 status_t
3214 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3215 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3216 {
3217 	cached_block* block = (cached_block*)*_cookie;
3218 	block_cache* cache = (block_cache*)_cache;
3219 	TransactionLocker locker(cache);
3220 
3221 	cache_transaction* transaction = lookup_transaction(cache, id);
3222 	if (transaction == NULL || !transaction->open)
3223 		return B_BAD_VALUE;
3224 
3225 	if (block == NULL)
3226 		block = transaction->first_block;
3227 	else
3228 		block = block->transaction_next;
3229 
3230 	if (transaction->has_sub_transaction) {
3231 		if (mainOnly) {
3232 			// find next block that the parent changed
3233 			while (block != NULL && block->parent_data == NULL)
3234 				block = block->transaction_next;
3235 		} else {
3236 			// find next non-discarded block
3237 			while (block != NULL && block->discard)
3238 				block = block->transaction_next;
3239 		}
3240 	}
3241 
3242 	if (block == NULL)
3243 		return B_ENTRY_NOT_FOUND;
3244 
3245 	if (_blockNumber)
3246 		*_blockNumber = block->block_number;
3247 	if (_data)
3248 		*_data = mainOnly ? block->parent_data : block->current_data;
3249 	if (_unchangedData)
3250 		*_unchangedData = block->original_data;
3251 
3252 	*_cookie = (addr_t)block;
3253 	return B_OK;
3254 }
3255 
3256 
3257 int32
3258 cache_blocks_in_transaction(void* _cache, int32 id)
3259 {
3260 	block_cache* cache = (block_cache*)_cache;
3261 	TransactionLocker locker(cache);
3262 
3263 	cache_transaction* transaction = lookup_transaction(cache, id);
3264 	if (transaction == NULL)
3265 		return B_BAD_VALUE;
3266 
3267 	return transaction->num_blocks;
3268 }
3269 
3270 
3271 /*!	Returns the number of blocks that are part of the main transaction. If this
3272 	transaction does not have a sub transaction yet, this is the same value as
3273 	cache_blocks_in_transaction() would return.
3274 */
3275 int32
3276 cache_blocks_in_main_transaction(void* _cache, int32 id)
3277 {
3278 	block_cache* cache = (block_cache*)_cache;
3279 	TransactionLocker locker(cache);
3280 
3281 	cache_transaction* transaction = lookup_transaction(cache, id);
3282 	if (transaction == NULL)
3283 		return B_BAD_VALUE;
3284 
3285 	if (transaction->has_sub_transaction)
3286 		return transaction->main_num_blocks;
3287 
3288 	return transaction->num_blocks;
3289 }
3290 
3291 
3292 int32
3293 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3294 {
3295 	block_cache* cache = (block_cache*)_cache;
3296 	TransactionLocker locker(cache);
3297 
3298 	cache_transaction* transaction = lookup_transaction(cache, id);
3299 	if (transaction == NULL)
3300 		return B_BAD_VALUE;
3301 
3302 	return transaction->sub_num_blocks;
3303 }
3304 
3305 
3306 //	#pragma mark - public block cache API
3307 
3308 
3309 void
3310 block_cache_delete(void* _cache, bool allowWrites)
3311 {
3312 	block_cache* cache = (block_cache*)_cache;
3313 
3314 	if (allowWrites)
3315 		block_cache_sync(cache);
3316 
3317 	mutex_lock(&sCachesLock);
3318 	sCaches.Remove(cache);
3319 	mutex_unlock(&sCachesLock);
3320 
3321 	mutex_lock(&cache->lock);
3322 
3323 	// wait for all blocks to become unbusy
3324 	wait_for_busy_reading_blocks(cache);
3325 	wait_for_busy_writing_blocks(cache);
3326 
3327 	// free all blocks
3328 
3329 	uint32 cookie = 0;
3330 	cached_block* block;
3331 	while ((block = (cached_block*)hash_remove_first(cache->hash,
3332 			&cookie)) != NULL) {
3333 		cache->FreeBlock(block);
3334 	}
3335 
3336 	// free all transactions (they will all be aborted)
3337 
3338 	cookie = 0;
3339 	cache_transaction* transaction;
3340 	while ((transaction = (cache_transaction*)hash_remove_first(
3341 			cache->transaction_hash, &cookie)) != NULL) {
3342 		delete transaction;
3343 	}
3344 
3345 	delete cache;
3346 }
3347 
3348 
3349 void*
3350 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3351 {
3352 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3353 		readOnly);
3354 	if (cache == NULL)
3355 		return NULL;
3356 
3357 	if (cache->Init() != B_OK) {
3358 		delete cache;
3359 		return NULL;
3360 	}
3361 
3362 	MutexLocker _(sCachesLock);
3363 	sCaches.Add(cache);
3364 
3365 	return cache;
3366 }
3367 
3368 
3369 status_t
3370 block_cache_sync(void* _cache)
3371 {
3372 	block_cache* cache = (block_cache*)_cache;
3373 
3374 	// We will sync all dirty blocks to disk that have a completed
3375 	// transaction or no transaction only
3376 
3377 	MutexLocker locker(&cache->lock);
3378 
3379 	BlockWriter writer(cache);
3380 	hash_iterator iterator;
3381 	hash_open(cache->hash, &iterator);
3382 
3383 	cached_block* block;
3384 	while ((block = (cached_block*)hash_next(cache->hash, &iterator)) != NULL) {
3385 		if (block->CanBeWritten())
3386 			writer.Add(block);
3387 	}
3388 
3389 	hash_close(cache->hash, &iterator, false);
3390 
3391 	status_t status = writer.Write();
3392 
3393 	locker.Unlock();
3394 
3395 	wait_for_notifications(cache);
3396 		// make sure that all pending TRANSACTION_WRITTEN notifications
3397 		// are handled after we return
3398 	return status;
3399 }
3400 
3401 
3402 status_t
3403 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3404 {
3405 	block_cache* cache = (block_cache*)_cache;
3406 
3407 	// We will sync all dirty blocks to disk that have a completed
3408 	// transaction or no transaction only
3409 
3410 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3411 		panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
3412 			blockNumber, cache->max_blocks - 1);
3413 		return B_BAD_VALUE;
3414 	}
3415 
3416 	MutexLocker locker(&cache->lock);
3417 	BlockWriter writer(cache);
3418 
3419 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
3420 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
3421 			&blockNumber);
3422 		if (block == NULL)
3423 			continue;
3424 
3425 		if (block->CanBeWritten())
3426 			writer.Add(block);
3427 	}
3428 
3429 	status_t status = writer.Write();
3430 
3431 	locker.Unlock();
3432 
3433 	wait_for_notifications(cache);
3434 		// make sure that all pending TRANSACTION_WRITTEN notifications
3435 		// are handled after we return
3436 	return status;
3437 }
3438 
3439 
3440 /*!	Discards a block from the current transaction or from the cache.
3441 	You have to call this function when you no longer use a block, ie. when it
3442 	might be reclaimed by the file cache in order to make sure they won't
3443 	interfere.
3444 */
3445 void
3446 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3447 {
3448 	// TODO: this could be a nice place to issue the ATA trim command
3449 	block_cache* cache = (block_cache*)_cache;
3450 	TransactionLocker locker(cache);
3451 
3452 	BlockWriter writer(cache);
3453 
3454 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3455 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
3456 			&blockNumber);
3457 		if (block != NULL && block->previous_transaction != NULL)
3458 			writer.Add(block);
3459 	}
3460 
3461 	writer.Write();
3462 		// TODO: this can fail, too!
3463 
3464 	blockNumber -= numBlocks;
3465 		// reset blockNumber to its original value
3466 
3467 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3468 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
3469 			&blockNumber);
3470 		if (block == NULL)
3471 			continue;
3472 
3473 		ASSERT(block->previous_transaction == NULL);
3474 
3475 		if (block->unused) {
3476 			cache->unused_blocks.Remove(block);
3477 			cache->unused_block_count--;
3478 			cache->RemoveBlock(block);
3479 		} else {
3480 			if (block->transaction != NULL && block->parent_data != NULL
3481 				&& block->parent_data != block->current_data) {
3482 				panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3483 					"transaction!", blockNumber);
3484 			}
3485 
3486 			// mark it as discarded (in the current transaction only, if any)
3487 			block->discard = true;
3488 		}
3489 	}
3490 }
3491 
3492 
3493 status_t
3494 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3495 {
3496 	block_cache* cache = (block_cache*)_cache;
3497 	MutexLocker locker(&cache->lock);
3498 
3499 	if (cache->read_only) {
3500 		panic("tried to make block writable on a read-only cache!");
3501 		return B_ERROR;
3502 	}
3503 
3504 	// TODO: this can be done better!
3505 	void* block = get_writable_cached_block(cache, blockNumber,
3506 		blockNumber, 1, transaction, false);
3507 	if (block != NULL) {
3508 		put_cached_block((block_cache*)_cache, blockNumber);
3509 		return B_OK;
3510 	}
3511 
3512 	return B_ERROR;
3513 }
3514 
3515 
3516 void*
3517 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3518 	off_t length, int32 transaction)
3519 {
3520 	block_cache* cache = (block_cache*)_cache;
3521 	MutexLocker locker(&cache->lock);
3522 
3523 	TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3524 		blockNumber, transaction));
3525 	if (cache->read_only)
3526 		panic("tried to get writable block on a read-only cache!");
3527 
3528 	return get_writable_cached_block(cache, blockNumber, base, length,
3529 		transaction, false);
3530 }
3531 
3532 
3533 void*
3534 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3535 {
3536 	return block_cache_get_writable_etc(_cache, blockNumber,
3537 		blockNumber, 1, transaction);
3538 }
3539 
3540 
3541 void*
3542 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3543 {
3544 	block_cache* cache = (block_cache*)_cache;
3545 	MutexLocker locker(&cache->lock);
3546 
3547 	TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3548 		blockNumber, transaction));
3549 	if (cache->read_only)
3550 		panic("tried to get empty writable block on a read-only cache!");
3551 
3552 	return get_writable_cached_block((block_cache*)_cache, blockNumber,
3553 		blockNumber, 1, transaction, true);
3554 }
3555 
3556 
3557 const void*
3558 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
3559 {
3560 	block_cache* cache = (block_cache*)_cache;
3561 	MutexLocker locker(&cache->lock);
3562 	bool allocated;
3563 
3564 	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
3565 	if (block == NULL)
3566 		return NULL;
3567 
3568 #if BLOCK_CACHE_DEBUG_CHANGED
3569 	if (block->compare == NULL)
3570 		block->compare = cache->Allocate();
3571 	if (block->compare != NULL)
3572 		memcpy(block->compare, block->current_data, cache->block_size);
3573 #endif
3574 	TB(Get(cache, block));
3575 
3576 	return block->current_data;
3577 }
3578 
3579 
3580 const void*
3581 block_cache_get(void* _cache, off_t blockNumber)
3582 {
3583 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
3584 }
3585 
3586 
3587 /*!	Changes the internal status of a writable block to \a dirty. This can be
3588 	helpful in case you realize you don't need to change that block anymore
3589 	for whatever reason.
3590 
3591 	Note, you must only use this function on blocks that were acquired
3592 	writable!
3593 */
3594 status_t
3595 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3596 	int32 transaction)
3597 {
3598 	block_cache* cache = (block_cache*)_cache;
3599 	MutexLocker locker(&cache->lock);
3600 
3601 	cached_block* block = (cached_block*)hash_lookup(cache->hash,
3602 		&blockNumber);
3603 	if (block == NULL)
3604 		return B_BAD_VALUE;
3605 	if (block->is_dirty == dirty) {
3606 		// there is nothing to do for us
3607 		return B_OK;
3608 	}
3609 
3610 	// TODO: not yet implemented
3611 	if (dirty)
3612 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
3613 
3614 	return B_OK;
3615 }
3616 
3617 
3618 void
3619 block_cache_put(void* _cache, off_t blockNumber)
3620 {
3621 	block_cache* cache = (block_cache*)_cache;
3622 	MutexLocker locker(&cache->lock);
3623 
3624 	put_cached_block(cache, blockNumber);
3625 }
3626 
3627