xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 97dfeb96704e5dbc5bec32ad7b21379d0125e031)
1 /*
2  * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <KernelExport.h>
15 #include <fs_cache.h>
16 
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <vm/vm_page.h>
26 
27 #include "kernel_debug_config.h"
28 
29 
30 // TODO: this is a naive but growing implementation to test the API:
31 //	block reading/writing is not at all optimized for speed, it will
32 //	just read and write single blocks.
33 // TODO: the retrieval/copy of the original data could be delayed until the
34 //		new data must be written, ie. in low memory situations.
35 
36 //#define TRACE_BLOCK_CACHE
37 #ifdef TRACE_BLOCK_CACHE
38 #	define TRACE(x)	dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 #define TRACE_ALWAYS(x) dprintf x
44 
45 // This macro is used for fatal situations that are acceptable in a running
46 // system, like out of memory situations - should only panic for debugging.
47 #define FATAL(x) panic x
48 
49 static const bigtime_t kTransactionIdleTime = 2000000LL;
50 	// a transaction is considered idle after 2 seconds of inactivity
51 
52 
53 namespace {
54 
55 struct cache_transaction;
56 struct cached_block;
57 struct block_cache;
58 typedef DoublyLinkedListLink<cached_block> block_link;
59 
60 struct cached_block {
61 	cached_block*	next;			// next in hash
62 	cached_block*	transaction_next;
63 	block_link		link;
64 	off_t			block_number;
65 	void*			current_data;
66 		// The data that is seen by everyone using the API; this one is always
67 		// present.
68 	void*			original_data;
69 		// When in a transaction, this contains the original data from before
70 		// the transaction.
71 	void*			parent_data;
72 		// This is a lazily alloced buffer that represents the contents of the
73 		// block in the parent transaction. It may point to current_data if the
74 		// contents have been changed only in the parent transaction, or, if the
75 		// block has been changed in the current sub transaction already, to a
76 		// new block containing the contents changed in the parent transaction.
77 		// If this is NULL, the block has not been changed in the parent
78 		// transaction at all.
79 #if BLOCK_CACHE_DEBUG_CHANGED
80 	void*			compare;
81 #endif
82 	int32			ref_count;
83 	int32			last_accessed;
84 	bool			busy_reading : 1;
85 	bool			busy_writing : 1;
86 	bool			is_writing : 1;
87 		// Block has been checked out for writing without transactions, and
88 		// cannot be written back if set
89 	bool			is_dirty : 1;
90 	bool			unused : 1;
91 	bool			discard : 1;
92 	bool			busy_reading_waiters : 1;
93 	bool			busy_writing_waiters : 1;
94 	cache_transaction* transaction;
95 		// This is the current active transaction, if any, the block is
96 		// currently in (meaning was changed as a part of it).
97 	cache_transaction* previous_transaction;
98 		// This is set to the last transaction that was ended containing this
99 		// block. In this case, the block has not yet written back yet, and
100 		// the changed data is either in current_data, or original_data -- the
101 		// latter if the block is already being part of another transaction.
102 		// There can only be one previous transaction, so when the active
103 		// transaction ends, the changes of the previous transaction have to
104 		// be written back before that transaction becomes the next previous
105 		// transaction.
106 
107 	bool CanBeWritten() const;
108 	int32 LastAccess() const
109 		{ return system_time() / 1000000L - last_accessed; }
110 };
111 
112 typedef DoublyLinkedList<cached_block,
113 	DoublyLinkedListMemberGetLink<cached_block,
114 		&cached_block::link> > block_list;
115 
116 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
117 	int32			transaction_id;
118 	int32			events_pending;
119 	int32			events;
120 	transaction_notification_hook hook;
121 	void*			data;
122 	bool			delete_after_event;
123 };
124 
125 typedef DoublyLinkedList<cache_notification> NotificationList;
126 
127 struct BlockHash {
128 	typedef off_t			KeyType;
129 	typedef	cached_block	ValueType;
130 
131 	size_t HashKey(KeyType key) const
132 	{
133 		return key;
134 	}
135 
136 	size_t Hash(ValueType* block) const
137 	{
138 		return block->block_number;
139 	}
140 
141 	bool Compare(KeyType key, ValueType* block) const
142 	{
143 		return block->block_number == key;
144 	}
145 
146 	ValueType*& GetLink(ValueType* value) const
147 	{
148 		return value->next;
149 	}
150 };
151 
152 typedef BOpenHashTable<BlockHash> BlockTable;
153 
154 
155 struct TransactionHash {
156 	typedef int32				KeyType;
157 	typedef	cache_transaction	ValueType;
158 
159 	size_t HashKey(KeyType key) const
160 	{
161 		return key;
162 	}
163 
164 	size_t Hash(ValueType* transaction) const;
165 	bool Compare(KeyType key, ValueType* transaction) const;
166 	ValueType*& GetLink(ValueType* value) const;
167 };
168 
169 typedef BOpenHashTable<TransactionHash> TransactionTable;
170 
171 
172 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
173 	BlockTable*		hash;
174 	mutex			lock;
175 	int				fd;
176 	off_t			max_blocks;
177 	size_t			block_size;
178 	int32			next_transaction_id;
179 	cache_transaction* last_transaction;
180 	TransactionTable* transaction_hash;
181 
182 	object_cache*	buffer_cache;
183 	block_list		unused_blocks;
184 	uint32			unused_block_count;
185 
186 	ConditionVariable busy_reading_condition;
187 	uint32			busy_reading_count;
188 	bool			busy_reading_waiters;
189 
190 	ConditionVariable busy_writing_condition;
191 	uint32			busy_writing_count;
192 	bool			busy_writing_waiters;
193 
194 	uint32			num_dirty_blocks;
195 	bool			read_only;
196 
197 	NotificationList pending_notifications;
198 	ConditionVariable condition_variable;
199 
200 					block_cache(int fd, off_t numBlocks, size_t blockSize,
201 						bool readOnly);
202 					~block_cache();
203 
204 	status_t		Init();
205 
206 	void			Free(void* buffer);
207 	void*			Allocate();
208 	void			FreeBlock(cached_block* block);
209 	cached_block*	NewBlock(off_t blockNumber);
210 	void			FreeBlockParentData(cached_block* block);
211 
212 	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
213 	void			RemoveBlock(cached_block* block);
214 	void			DiscardBlock(cached_block* block);
215 
216 private:
217 	static void		_LowMemoryHandler(void* data, uint32 resources,
218 						int32 level);
219 	cached_block*	_GetUnusedBlock();
220 };
221 
222 struct cache_listener;
223 typedef DoublyLinkedListLink<cache_listener> listener_link;
224 
225 struct cache_listener : cache_notification {
226 	listener_link	link;
227 };
228 
229 typedef DoublyLinkedList<cache_listener,
230 	DoublyLinkedListMemberGetLink<cache_listener,
231 		&cache_listener::link> > ListenerList;
232 
233 
234 struct cache_transaction {
235 	cache_transaction();
236 
237 	cache_transaction* next;
238 	int32			id;
239 	int32			num_blocks;
240 	int32			main_num_blocks;
241 	int32			sub_num_blocks;
242 	cached_block*	first_block;
243 	block_list		blocks;
244 	ListenerList	listeners;
245 	bool			open;
246 	bool			has_sub_transaction;
247 	bigtime_t		last_used;
248 	int32			busy_writing_count;
249 };
250 
251 
252 class BlockWriter {
253 public:
254 								BlockWriter(block_cache* cache,
255 									size_t max = SIZE_MAX);
256 								~BlockWriter();
257 
258 			bool				Add(cached_block* block,
259 									cache_transaction* transaction = NULL);
260 			bool				Add(cache_transaction* transaction,
261 									bool& hasLeftOvers);
262 
263 			status_t			Write(cache_transaction* transaction = NULL,
264 									bool canUnlock = true);
265 
266 			bool				DeletedTransaction() const
267 									{ return fDeletedTransaction; }
268 
269 	static	status_t			WriteBlock(block_cache* cache,
270 									cached_block* block);
271 
272 private:
273 			void*				_Data(cached_block* block) const;
274 			status_t			_WriteBlock(cached_block* block);
275 			void				_BlockDone(cached_block* block,
276 									cache_transaction* transaction);
277 			void				_UnmarkWriting(cached_block* block);
278 
279 	static	int					_CompareBlocks(const void* _blockA,
280 									const void* _blockB);
281 
282 private:
283 	static	const size_t		kBufferSize = 64;
284 
285 			block_cache*		fCache;
286 			cached_block*		fBuffer[kBufferSize];
287 			cached_block**		fBlocks;
288 			size_t				fCount;
289 			size_t				fTotal;
290 			size_t				fCapacity;
291 			size_t				fMax;
292 			status_t			fStatus;
293 			bool				fDeletedTransaction;
294 };
295 
296 
297 class TransactionLocking {
298 public:
299 	inline bool Lock(block_cache* cache)
300 	{
301 		mutex_lock(&cache->lock);
302 
303 		while (cache->busy_writing_count != 0) {
304 			// wait for all blocks to be written
305 			ConditionVariableEntry entry;
306 			cache->busy_writing_condition.Add(&entry);
307 			cache->busy_writing_waiters = true;
308 
309 			mutex_unlock(&cache->lock);
310 
311 			entry.Wait();
312 
313 			mutex_lock(&cache->lock);
314 		}
315 
316 		return true;
317 	}
318 
319 	inline void Unlock(block_cache* cache)
320 	{
321 		mutex_unlock(&cache->lock);
322 	}
323 };
324 
325 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
326 
327 } // namespace
328 
329 
330 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
331 namespace BlockTracing {
332 
333 class Action : public AbstractTraceEntry {
334 public:
335 	Action(block_cache* cache, cached_block* block)
336 		:
337 		fCache(cache),
338 		fBlockNumber(block->block_number),
339 		fIsDirty(block->is_dirty),
340 		fHasOriginal(block->original_data != NULL),
341 		fHasParent(block->parent_data != NULL),
342 		fTransactionID(-1),
343 		fPreviousID(-1)
344 	{
345 		if (block->transaction != NULL)
346 			fTransactionID = block->transaction->id;
347 		if (block->previous_transaction != NULL)
348 			fPreviousID = block->previous_transaction->id;
349 	}
350 
351 	virtual void AddDump(TraceOutput& out)
352 	{
353 		out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
354 			" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
355 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
356 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
357 	}
358 
359 	virtual const char* _Action() const = 0;
360 
361 private:
362 	block_cache*		fCache;
363 	uint64				fBlockNumber;
364 	bool				fIsDirty;
365 	bool				fHasOriginal;
366 	bool				fHasParent;
367 	int32				fTransactionID;
368 	int32				fPreviousID;
369 };
370 
371 class Get : public Action {
372 public:
373 	Get(block_cache* cache, cached_block* block)
374 		:
375 		Action(cache, block)
376 	{
377 		Initialized();
378 	}
379 
380 	virtual const char* _Action() const { return "get"; }
381 };
382 
383 class Put : public Action {
384 public:
385 	Put(block_cache* cache, cached_block* block)
386 		:
387 		Action(cache, block)
388 	{
389 		Initialized();
390 	}
391 
392 	virtual const char* _Action() const { return "put"; }
393 };
394 
395 class Read : public Action {
396 public:
397 	Read(block_cache* cache, cached_block* block)
398 		:
399 		Action(cache, block)
400 	{
401 		Initialized();
402 	}
403 
404 	virtual const char* _Action() const { return "read"; }
405 };
406 
407 class Write : public Action {
408 public:
409 	Write(block_cache* cache, cached_block* block)
410 		:
411 		Action(cache, block)
412 	{
413 		Initialized();
414 	}
415 
416 	virtual const char* _Action() const { return "write"; }
417 };
418 
419 class Flush : public Action {
420 public:
421 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
422 		:
423 		Action(cache, block),
424 		fGetUnused(getUnused)
425 	{
426 		Initialized();
427 	}
428 
429 	virtual const char* _Action() const
430 		{ return fGetUnused ? "get-unused" : "flush"; }
431 
432 private:
433 	bool	fGetUnused;
434 };
435 
436 class Error : public AbstractTraceEntry {
437 public:
438 	Error(block_cache* cache, uint64 blockNumber, const char* message,
439 			status_t status = B_OK)
440 		:
441 		fCache(cache),
442 		fBlockNumber(blockNumber),
443 		fMessage(message),
444 		fStatus(status)
445 	{
446 		Initialized();
447 	}
448 
449 	virtual void AddDump(TraceOutput& out)
450 	{
451 		out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
452 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
453 			fStatus != B_OK ? strerror(fStatus) : "");
454 	}
455 
456 private:
457 	block_cache*	fCache;
458 	uint64			fBlockNumber;
459 	const char*		fMessage;
460 	status_t		fStatus;
461 };
462 
463 #if BLOCK_CACHE_BLOCK_TRACING >= 2
464 class BlockData : public AbstractTraceEntry {
465 public:
466 	enum {
467 		kCurrent	= 0x01,
468 		kParent		= 0x02,
469 		kOriginal	= 0x04
470 	};
471 
472 	BlockData(block_cache* cache, cached_block* block, const char* message)
473 		:
474 		fCache(cache),
475 		fSize(cache->block_size),
476 		fBlockNumber(block->block_number),
477 		fMessage(message)
478 	{
479 		_Allocate(fCurrent, block->current_data);
480 		_Allocate(fParent, block->parent_data);
481 		_Allocate(fOriginal, block->original_data);
482 
483 #if KTRACE_PRINTF_STACK_TRACE
484 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
485 			false);
486 #endif
487 
488 		Initialized();
489 	}
490 
491 	virtual void AddDump(TraceOutput& out)
492 	{
493 		out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
494 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
495 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
496 			fMessage);
497 	}
498 
499 #if KTRACE_PRINTF_STACK_TRACE
500 	virtual void DumpStackTrace(TraceOutput& out)
501 	{
502 		out.PrintStackTrace(fStackTrace);
503 	}
504 #endif
505 
506 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
507 	{
508 		if ((which & kCurrent) != 0)
509 			DumpBlock(kCurrent, offset, size);
510 		if ((which & kParent) != 0)
511 			DumpBlock(kParent, offset, size);
512 		if ((which & kOriginal) != 0)
513 			DumpBlock(kOriginal, offset, size);
514 	}
515 
516 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
517 	{
518 		if (offset > fSize) {
519 			kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
520 			return;
521 		}
522 		if (offset + size > fSize)
523 			size = fSize - offset;
524 
525 		const char* label;
526 		uint8* data;
527 
528 		if ((which & kCurrent) != 0) {
529 			label = "current";
530 			data = fCurrent;
531 		} else if ((which & kParent) != 0) {
532 			label = "parent";
533 			data = fParent;
534 		} else if ((which & kOriginal) != 0) {
535 			label = "original";
536 			data = fOriginal;
537 		} else
538 			return;
539 
540 		kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
541 
542 		static const uint32 kBlockSize = 16;
543 		data += offset;
544 
545 		for (uint32 i = 0; i < size;) {
546 			int start = i;
547 
548 			kprintf("  %04" B_PRIx32 " ", i);
549 			for (; i < start + kBlockSize; i++) {
550 				if (!(i % 4))
551 					kprintf(" ");
552 
553 				if (i >= size)
554 					kprintf("  ");
555 				else
556 					kprintf("%02x", data[i]);
557 			}
558 
559 			kprintf("\n");
560 		}
561 	}
562 
563 private:
564 	void _Allocate(uint8*& target, void* source)
565 	{
566 		if (source == NULL) {
567 			target = NULL;
568 			return;
569 		}
570 
571 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
572 	}
573 
574 	block_cache*	fCache;
575 	uint32			fSize;
576 	uint64			fBlockNumber;
577 	const char*		fMessage;
578 	uint8*			fCurrent;
579 	uint8*			fParent;
580 	uint8*			fOriginal;
581 #if KTRACE_PRINTF_STACK_TRACE
582 	tracing_stack_trace* fStackTrace;
583 #endif
584 };
585 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
586 
587 }	// namespace BlockTracing
588 
589 #	define TB(x) new(std::nothrow) BlockTracing::x;
590 #else
591 #	define TB(x) ;
592 #endif
593 
594 #if BLOCK_CACHE_BLOCK_TRACING >= 2
595 #	define TB2(x) new(std::nothrow) BlockTracing::x;
596 #else
597 #	define TB2(x) ;
598 #endif
599 
600 
601 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
602 namespace TransactionTracing {
603 
604 class Action : public AbstractTraceEntry {
605 public:
606 	Action(const char* label, block_cache* cache,
607 			cache_transaction* transaction)
608 		:
609 		fCache(cache),
610 		fTransaction(transaction),
611 		fID(transaction->id),
612 		fSub(transaction->has_sub_transaction),
613 		fNumBlocks(transaction->num_blocks),
614 		fSubNumBlocks(transaction->sub_num_blocks)
615 	{
616 		strlcpy(fLabel, label, sizeof(fLabel));
617 		Initialized();
618 	}
619 
620 	virtual void AddDump(TraceOutput& out)
621 	{
622 		out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
623 			", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
624 			fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
625 	}
626 
627 private:
628 	char				fLabel[12];
629 	block_cache*		fCache;
630 	cache_transaction*	fTransaction;
631 	int32				fID;
632 	bool				fSub;
633 	int32				fNumBlocks;
634 	int32				fSubNumBlocks;
635 };
636 
637 class Detach : public AbstractTraceEntry {
638 public:
639 	Detach(block_cache* cache, cache_transaction* transaction,
640 			cache_transaction* newTransaction)
641 		:
642 		fCache(cache),
643 		fTransaction(transaction),
644 		fID(transaction->id),
645 		fSub(transaction->has_sub_transaction),
646 		fNewTransaction(newTransaction),
647 		fNewID(newTransaction->id)
648 	{
649 		Initialized();
650 	}
651 
652 	virtual void AddDump(TraceOutput& out)
653 	{
654 		out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
655 			"from transaction %p (id %" B_PRId32 ")%s",
656 			fCache, fNewTransaction, fNewID, fTransaction, fID,
657 			fSub ? " sub" : "");
658 	}
659 
660 private:
661 	block_cache*		fCache;
662 	cache_transaction*	fTransaction;
663 	int32				fID;
664 	bool				fSub;
665 	cache_transaction*	fNewTransaction;
666 	int32				fNewID;
667 };
668 
669 class Abort : public AbstractTraceEntry {
670 public:
671 	Abort(block_cache* cache, cache_transaction* transaction)
672 		:
673 		fCache(cache),
674 		fTransaction(transaction),
675 		fID(transaction->id),
676 		fNumBlocks(0)
677 	{
678 		bool isSub = transaction->has_sub_transaction;
679 		fNumBlocks = isSub ? transaction->sub_num_blocks
680 			: transaction->num_blocks;
681 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
682 		if (fBlocks != NULL) {
683 			cached_block* block = transaction->first_block;
684 			for (int32 i = 0; block != NULL && i < fNumBlocks;
685 					block = block->transaction_next) {
686 				fBlocks[i++] = block->block_number;
687 			}
688 		} else
689 			fNumBlocks = 0;
690 
691 #if KTRACE_PRINTF_STACK_TRACE
692 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
693 			false);
694 #endif
695 
696 		Initialized();
697 	}
698 
699 	virtual void AddDump(TraceOutput& out)
700 	{
701 		out.Print("block cache %p, abort transaction "
702 			"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
703 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
704 			out.Print(" %" B_PRIdOFF, fBlocks[i]);
705 	}
706 
707 #if KTRACE_PRINTF_STACK_TRACE
708 	virtual void DumpStackTrace(TraceOutput& out)
709 	{
710 		out.PrintStackTrace(fStackTrace);
711 	}
712 #endif
713 
714 private:
715 	block_cache*		fCache;
716 	cache_transaction*	fTransaction;
717 	int32				fID;
718 	off_t*				fBlocks;
719 	int32				fNumBlocks;
720 #if KTRACE_PRINTF_STACK_TRACE
721 	tracing_stack_trace* fStackTrace;
722 #endif
723 };
724 
725 }	// namespace TransactionTracing
726 
727 #	define T(x) new(std::nothrow) TransactionTracing::x;
728 #else
729 #	define T(x) ;
730 #endif
731 
732 
733 static DoublyLinkedList<block_cache> sCaches;
734 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
735 static mutex sCachesMemoryUseLock
736 	= MUTEX_INITIALIZER("block caches memory use");
737 static size_t sUsedMemory;
738 static sem_id sEventSemaphore;
739 static mutex sNotificationsLock
740 	= MUTEX_INITIALIZER("block cache notifications");
741 static thread_id sNotifierWriterThread;
742 static DoublyLinkedListLink<block_cache> sMarkCache;
743 	// TODO: this only works if the link is the first entry of block_cache
744 static object_cache* sBlockCache;
745 
746 
747 //	#pragma mark - notifications/listener
748 
749 
750 /*!	Checks whether or not this is an event that closes a transaction. */
751 static inline bool
752 is_closing_event(int32 event)
753 {
754 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
755 }
756 
757 
758 static inline bool
759 is_written_event(int32 event)
760 {
761 	return (event & TRANSACTION_WRITTEN) != 0;
762 }
763 
764 
765 /*!	From the specified \a notification, it will remove the lowest pending
766 	event, and return that one in \a _event.
767 	If there is no pending event anymore, it will return \c false.
768 */
769 static bool
770 get_next_pending_event(cache_notification* notification, int32* _event)
771 {
772 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
773 		int32 pending = atomic_and(&notification->events_pending,
774 			~eventMask);
775 
776 		bool more = (pending & ~eventMask) != 0;
777 
778 		if ((pending & eventMask) != 0) {
779 			*_event = eventMask;
780 			return more;
781 		}
782 	}
783 
784 	return false;
785 }
786 
787 
788 static void
789 flush_pending_notifications(block_cache* cache)
790 {
791 	ASSERT_LOCKED_MUTEX(&sCachesLock);
792 
793 	while (true) {
794 		MutexLocker locker(sNotificationsLock);
795 
796 		cache_notification* notification = cache->pending_notifications.Head();
797 		if (notification == NULL)
798 			return;
799 
800 		bool deleteAfterEvent = false;
801 		int32 event = -1;
802 		if (!get_next_pending_event(notification, &event)) {
803 			// remove the notification if this was the last pending event
804 			cache->pending_notifications.Remove(notification);
805 			deleteAfterEvent = notification->delete_after_event;
806 		}
807 
808 		if (event >= 0) {
809 			// Notify listener, we need to copy the notification, as it might
810 			// be removed when we unlock the list.
811 			cache_notification copy = *notification;
812 			locker.Unlock();
813 
814 			copy.hook(copy.transaction_id, event, copy.data);
815 
816 			locker.Lock();
817 		}
818 
819 		if (deleteAfterEvent)
820 			delete notification;
821 	}
822 }
823 
824 
825 /*!	Flushes all pending notifications by calling the appropriate hook
826 	functions.
827 	Must not be called with a cache lock held.
828 */
829 static void
830 flush_pending_notifications()
831 {
832 	MutexLocker _(sCachesLock);
833 
834 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
835 	while (iterator.HasNext()) {
836 		block_cache* cache = iterator.Next();
837 
838 		flush_pending_notifications(cache);
839 	}
840 }
841 
842 
843 /*!	Initializes the \a notification as specified. */
844 static void
845 set_notification(cache_transaction* transaction,
846 	cache_notification &notification, int32 events,
847 	transaction_notification_hook hook, void* data)
848 {
849 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
850 	notification.events_pending = 0;
851 	notification.events = events;
852 	notification.hook = hook;
853 	notification.data = data;
854 	notification.delete_after_event = false;
855 }
856 
857 
858 /*!	Makes sure the notification is deleted. It either deletes it directly,
859 	when possible, or marks it for deletion if the notification is pending.
860 */
861 static void
862 delete_notification(cache_notification* notification)
863 {
864 	MutexLocker locker(sNotificationsLock);
865 
866 	if (notification->events_pending != 0)
867 		notification->delete_after_event = true;
868 	else
869 		delete notification;
870 }
871 
872 
873 /*!	Adds the notification to the pending notifications list, or, if it's
874 	already part of it, updates its events_pending field.
875 	Also marks the notification to be deleted if \a deleteNotification
876 	is \c true.
877 	Triggers the notifier thread to run.
878 */
879 static void
880 add_notification(block_cache* cache, cache_notification* notification,
881 	int32 event, bool deleteNotification)
882 {
883 	if (notification->hook == NULL)
884 		return;
885 
886 	int32 pending = atomic_or(&notification->events_pending, event);
887 	if (pending == 0) {
888 		// not yet part of the notification list
889 		MutexLocker locker(sNotificationsLock);
890 		if (deleteNotification)
891 			notification->delete_after_event = true;
892 		cache->pending_notifications.Add(notification);
893 	} else if (deleteNotification) {
894 		// we might need to delete it ourselves if we're late
895 		delete_notification(notification);
896 	}
897 
898 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
899 		// We're probably still holding some locks that makes rescheduling
900 		// not a good idea at this point.
901 }
902 
903 
904 /*!	Notifies all interested listeners of this transaction about the \a event.
905 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
906 	TRANSACTION_ABORTED), all listeners except those listening to
907 	TRANSACTION_WRITTEN will be removed.
908 */
909 static void
910 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
911 	int32 event)
912 {
913 	T(Action("notify", cache, transaction));
914 
915 	bool isClosing = is_closing_event(event);
916 	bool isWritten = is_written_event(event);
917 
918 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
919 	while (iterator.HasNext()) {
920 		cache_listener* listener = iterator.Next();
921 
922 		bool remove = (isClosing && !is_written_event(listener->events))
923 			|| (isWritten && is_written_event(listener->events));
924 		if (remove)
925 			iterator.Remove();
926 
927 		if ((listener->events & event) != 0)
928 			add_notification(cache, listener, event, remove);
929 		else if (remove)
930 			delete_notification(listener);
931 	}
932 }
933 
934 
935 /*!	Removes and deletes all listeners that are still monitoring this
936 	transaction.
937 */
938 static void
939 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
940 {
941 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
942 	while (iterator.HasNext()) {
943 		cache_listener* listener = iterator.Next();
944 		iterator.Remove();
945 
946 		delete_notification(listener);
947 	}
948 }
949 
950 
951 static status_t
952 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
953 	int32 events, transaction_notification_hook hookFunction, void* data)
954 {
955 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
956 	while (iterator.HasNext()) {
957 		cache_listener* listener = iterator.Next();
958 
959 		if (listener->data == data && listener->hook == hookFunction) {
960 			// this listener already exists, just update it
961 			listener->events |= events;
962 			return B_OK;
963 		}
964 	}
965 
966 	cache_listener* listener = new(std::nothrow) cache_listener;
967 	if (listener == NULL)
968 		return B_NO_MEMORY;
969 
970 	set_notification(transaction, *listener, events, hookFunction, data);
971 	transaction->listeners.Add(listener);
972 	return B_OK;
973 }
974 
975 
976 //	#pragma mark - private transaction
977 
978 
979 cache_transaction::cache_transaction()
980 {
981 	num_blocks = 0;
982 	main_num_blocks = 0;
983 	sub_num_blocks = 0;
984 	first_block = NULL;
985 	open = true;
986 	has_sub_transaction = false;
987 	last_used = system_time();
988 	busy_writing_count = 0;
989 }
990 
991 
992 static void
993 delete_transaction(block_cache* cache, cache_transaction* transaction)
994 {
995 	if (cache->last_transaction == transaction)
996 		cache->last_transaction = NULL;
997 
998 	remove_transaction_listeners(cache, transaction);
999 	delete transaction;
1000 }
1001 
1002 
1003 static cache_transaction*
1004 lookup_transaction(block_cache* cache, int32 id)
1005 {
1006 	return cache->transaction_hash->Lookup(id);
1007 }
1008 
1009 
1010 size_t TransactionHash::Hash(cache_transaction* transaction) const
1011 {
1012 	return transaction->id;
1013 }
1014 
1015 
1016 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
1017 {
1018 	return transaction->id == key;
1019 }
1020 
1021 
1022 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
1023 {
1024 	return value->next;
1025 }
1026 
1027 
1028 /*!	Writes back any changes made to blocks in \a transaction that are still
1029 	part of a previous transacton.
1030 */
1031 static status_t
1032 write_blocks_in_previous_transaction(block_cache* cache,
1033 	cache_transaction* transaction)
1034 {
1035 	BlockWriter writer(cache);
1036 
1037 	cached_block* block = transaction->first_block;
1038 	for (; block != NULL; block = block->transaction_next) {
1039 		if (block->previous_transaction != NULL) {
1040 			// need to write back pending changes
1041 			writer.Add(block);
1042 		}
1043 	}
1044 
1045 	return writer.Write();
1046 }
1047 
1048 
1049 //	#pragma mark - cached_block
1050 
1051 
1052 bool
1053 cached_block::CanBeWritten() const
1054 {
1055 	return !busy_writing && !busy_reading
1056 		&& (previous_transaction != NULL
1057 			|| (transaction == NULL && is_dirty && !is_writing));
1058 }
1059 
1060 
1061 //	#pragma mark - BlockWriter
1062 
1063 
1064 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1065 	:
1066 	fCache(cache),
1067 	fBlocks(fBuffer),
1068 	fCount(0),
1069 	fTotal(0),
1070 	fCapacity(kBufferSize),
1071 	fMax(max),
1072 	fStatus(B_OK),
1073 	fDeletedTransaction(false)
1074 {
1075 }
1076 
1077 
1078 BlockWriter::~BlockWriter()
1079 {
1080 	if (fBlocks != fBuffer)
1081 		free(fBlocks);
1082 }
1083 
1084 
1085 /*!	Adds the specified block to the to be written array. If no more blocks can
1086 	be added, false is returned, otherwise true.
1087 */
1088 bool
1089 BlockWriter::Add(cached_block* block, cache_transaction* transaction)
1090 {
1091 	ASSERT(block->CanBeWritten());
1092 
1093 	if (fTotal == fMax)
1094 		return false;
1095 
1096 	if (fCount >= fCapacity) {
1097 		// Enlarge array if necessary
1098 		cached_block** newBlocks;
1099 		size_t newCapacity = max_c(256, fCapacity * 2);
1100 		if (fBlocks == fBuffer)
1101 			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1102 		else {
1103 			newBlocks = (cached_block**)realloc(fBlocks,
1104 				newCapacity * sizeof(void*));
1105 		}
1106 
1107 		if (newBlocks == NULL) {
1108 			// Allocating a larger array failed - we need to write back what
1109 			// we have synchronously now (this will also clear the array)
1110 			Write(transaction, false);
1111 		} else {
1112 			if (fBlocks == fBuffer)
1113 				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1114 
1115 			fBlocks = newBlocks;
1116 			fCapacity = newCapacity;
1117 		}
1118 	}
1119 
1120 	fBlocks[fCount++] = block;
1121 	fTotal++;
1122 	block->busy_writing = true;
1123 	fCache->busy_writing_count++;
1124 	if (block->previous_transaction != NULL)
1125 		block->previous_transaction->busy_writing_count++;
1126 
1127 	return true;
1128 }
1129 
1130 
1131 /*!	Adds all blocks of the specified transaction to the to be written array.
1132 	If no more blocks can be added, false is returned, otherwise true.
1133 */
1134 bool
1135 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
1136 {
1137 	ASSERT(!transaction->open);
1138 
1139 	if (transaction->busy_writing_count != 0) {
1140 		hasLeftOvers = true;
1141 		return true;
1142 	}
1143 
1144 	hasLeftOvers = false;
1145 
1146 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1147 	while (cached_block* block = blockIterator.Next()) {
1148 		if (!block->CanBeWritten()) {
1149 			// This block was already part of a previous transaction within this
1150 			// writer
1151 			hasLeftOvers = true;
1152 			continue;
1153 		}
1154 		if (!Add(block, transaction))
1155 			return false;
1156 
1157 		if (DeletedTransaction())
1158 			break;
1159 	}
1160 
1161 	return true;
1162 }
1163 
1164 
1165 /*! Cache must be locked when calling this method, but it will be unlocked
1166 	while the blocks are written back.
1167 */
1168 status_t
1169 BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
1170 {
1171 	if (fCount == 0)
1172 		return B_OK;
1173 
1174 	if (canUnlock)
1175 		mutex_unlock(&fCache->lock);
1176 
1177 	// Sort blocks in their on-disk order
1178 	// TODO: ideally, this should be handled by the I/O scheduler
1179 
1180 	qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1181 	fDeletedTransaction = false;
1182 
1183 	for (uint32 i = 0; i < fCount; i++) {
1184 		status_t status = _WriteBlock(fBlocks[i]);
1185 		if (status != B_OK) {
1186 			// propagate to global error handling
1187 			if (fStatus == B_OK)
1188 				fStatus = status;
1189 
1190 			_UnmarkWriting(fBlocks[i]);
1191 			fBlocks[i] = NULL;
1192 				// This block will not be marked clean
1193 		}
1194 	}
1195 
1196 	if (canUnlock)
1197 		mutex_lock(&fCache->lock);
1198 
1199 	for (uint32 i = 0; i < fCount; i++)
1200 		_BlockDone(fBlocks[i], transaction);
1201 
1202 	fCount = 0;
1203 	return fStatus;
1204 }
1205 
1206 
1207 /*!	Writes the specified \a block back to disk. It will always only write back
1208 	the oldest change of the block if it is part of more than one transaction.
1209 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1210 	delete transactions when they are no longer used, and \a deleteTransaction
1211 	is \c true.
1212 */
1213 /*static*/ status_t
1214 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1215 {
1216 	BlockWriter writer(cache);
1217 
1218 	writer.Add(block);
1219 	return writer.Write();
1220 }
1221 
1222 
1223 void*
1224 BlockWriter::_Data(cached_block* block) const
1225 {
1226 	return block->previous_transaction != NULL && block->original_data != NULL
1227 		? block->original_data : block->current_data;
1228 		// We first need to write back changes from previous transactions
1229 }
1230 
1231 
1232 status_t
1233 BlockWriter::_WriteBlock(cached_block* block)
1234 {
1235 	ASSERT(block->busy_writing);
1236 
1237 	TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
1238 	TB(Write(fCache, block));
1239 	TB2(BlockData(fCache, block, "before write"));
1240 
1241 	size_t blockSize = fCache->block_size;
1242 
1243 	ssize_t written = write_pos(fCache->fd,
1244 		block->block_number * blockSize, _Data(block), blockSize);
1245 
1246 	if (written != (ssize_t)blockSize) {
1247 		TB(Error(fCache, block->block_number, "write failed", written));
1248 		TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number,
1249 			strerror(errno)));
1250 		if (written < 0)
1251 			return errno;
1252 
1253 		return B_IO_ERROR;
1254 	}
1255 
1256 	return B_OK;
1257 }
1258 
1259 
1260 void
1261 BlockWriter::_BlockDone(cached_block* block,
1262 	cache_transaction* transaction)
1263 {
1264 	if (block == NULL) {
1265 		// An error occured when trying to write this block
1266 		return;
1267 	}
1268 
1269 	if (fCache->num_dirty_blocks > 0)
1270 		fCache->num_dirty_blocks--;
1271 
1272 	if (_Data(block) == block->current_data)
1273 		block->is_dirty = false;
1274 
1275 	_UnmarkWriting(block);
1276 
1277 	cache_transaction* previous = block->previous_transaction;
1278 	if (previous != NULL) {
1279 		previous->blocks.Remove(block);
1280 		block->previous_transaction = NULL;
1281 
1282 		if (block->original_data != NULL && block->transaction == NULL) {
1283 			// This block is not part of a transaction, so it does not need
1284 			// its original pointer anymore.
1285 			fCache->Free(block->original_data);
1286 			block->original_data = NULL;
1287 		}
1288 
1289 		// Has the previous transaction been finished with that write?
1290 		if (--previous->num_blocks == 0) {
1291 			TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1292 			T(Action("written", fCache, previous));
1293 
1294 			notify_transaction_listeners(fCache, previous,
1295 				TRANSACTION_WRITTEN);
1296 
1297 			if (transaction != NULL) {
1298 				// This function is called while iterating transaction_hash. We
1299 				// use RemoveUnchecked so the iterator is still valid. A regular
1300 				// Remove can trigger a resize of the hash table which would
1301 				// result in the linked items in the table changing order.
1302 				fCache->transaction_hash->RemoveUnchecked(transaction);
1303 			} else
1304 				fCache->transaction_hash->Remove(previous);
1305 
1306 			delete_transaction(fCache, previous);
1307 			fDeletedTransaction = true;
1308 		}
1309 	}
1310 	if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1311 		// the block is no longer used
1312 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1313 		block->unused = true;
1314 		fCache->unused_blocks.Add(block);
1315 		fCache->unused_block_count++;
1316 	}
1317 
1318 	TB2(BlockData(fCache, block, "after write"));
1319 }
1320 
1321 
1322 void
1323 BlockWriter::_UnmarkWriting(cached_block* block)
1324 {
1325 	block->busy_writing = false;
1326 	if (block->previous_transaction != NULL)
1327 		block->previous_transaction->busy_writing_count--;
1328 	fCache->busy_writing_count--;
1329 
1330 	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1331 		|| block->busy_writing_waiters) {
1332 		fCache->busy_writing_waiters = false;
1333 		block->busy_writing_waiters = false;
1334 		fCache->busy_writing_condition.NotifyAll();
1335 	}
1336 }
1337 
1338 
1339 /*static*/ int
1340 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1341 {
1342 	cached_block* blockA = *(cached_block**)_blockA;
1343 	cached_block* blockB = *(cached_block**)_blockB;
1344 
1345 	off_t diff = blockA->block_number - blockB->block_number;
1346 	if (diff > 0)
1347 		return 1;
1348 
1349 	return diff < 0 ? -1 : 0;
1350 }
1351 
1352 
1353 //	#pragma mark - block_cache
1354 
1355 
1356 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1357 		bool readOnly)
1358 	:
1359 	hash(NULL),
1360 	fd(_fd),
1361 	max_blocks(numBlocks),
1362 	block_size(blockSize),
1363 	next_transaction_id(1),
1364 	last_transaction(NULL),
1365 	transaction_hash(NULL),
1366 	buffer_cache(NULL),
1367 	unused_block_count(0),
1368 	busy_reading_count(0),
1369 	busy_reading_waiters(false),
1370 	busy_writing_count(0),
1371 	busy_writing_waiters(0),
1372 	num_dirty_blocks(0),
1373 	read_only(readOnly)
1374 {
1375 }
1376 
1377 
1378 /*! Should be called with the cache's lock held. */
1379 block_cache::~block_cache()
1380 {
1381 	unregister_low_resource_handler(&_LowMemoryHandler, this);
1382 
1383 	delete transaction_hash;
1384 	delete hash;
1385 
1386 	delete_object_cache(buffer_cache);
1387 
1388 	mutex_destroy(&lock);
1389 }
1390 
1391 
1392 status_t
1393 block_cache::Init()
1394 {
1395 	busy_reading_condition.Init(this, "cache block busy_reading");
1396 	busy_writing_condition.Init(this, "cache block busy writing");
1397 	condition_variable.Init(this, "cache transaction sync");
1398 	mutex_init(&lock, "block cache");
1399 
1400 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1401 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1402 	if (buffer_cache == NULL)
1403 		return B_NO_MEMORY;
1404 
1405 	hash = new BlockTable();
1406 	if (hash == NULL || hash->Init(1024) != B_OK)
1407 		return B_NO_MEMORY;
1408 
1409 	transaction_hash = new(std::nothrow) TransactionTable();
1410 	if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
1411 		return B_NO_MEMORY;
1412 
1413 	return register_low_resource_handler(&_LowMemoryHandler, this,
1414 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1415 			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1416 }
1417 
1418 
1419 void
1420 block_cache::Free(void* buffer)
1421 {
1422 	if (buffer != NULL)
1423 		object_cache_free(buffer_cache, buffer, 0);
1424 }
1425 
1426 
1427 void*
1428 block_cache::Allocate()
1429 {
1430 	void* block = object_cache_alloc(buffer_cache, 0);
1431 	if (block != NULL)
1432 		return block;
1433 
1434 	// recycle existing before allocating a new one
1435 	RemoveUnusedBlocks(100);
1436 
1437 	return object_cache_alloc(buffer_cache, 0);
1438 }
1439 
1440 
1441 void
1442 block_cache::FreeBlock(cached_block* block)
1443 {
1444 	Free(block->current_data);
1445 
1446 	if (block->original_data != NULL || block->parent_data != NULL) {
1447 		panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1448 			block->block_number, block->original_data, block->parent_data);
1449 	}
1450 
1451 #if BLOCK_CACHE_DEBUG_CHANGED
1452 	Free(block->compare);
1453 #endif
1454 
1455 	object_cache_free(sBlockCache, block, 0);
1456 }
1457 
1458 
1459 /*! Allocates a new block for \a blockNumber, ready for use */
1460 cached_block*
1461 block_cache::NewBlock(off_t blockNumber)
1462 {
1463 	cached_block* block = NULL;
1464 
1465 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1466 			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1467 		// recycle existing instead of allocating a new one
1468 		block = _GetUnusedBlock();
1469 	}
1470 	if (block == NULL) {
1471 		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1472 		if (block != NULL) {
1473 			block->current_data = Allocate();
1474 			if (block->current_data == NULL) {
1475 				object_cache_free(sBlockCache, block, 0);
1476 				return NULL;
1477 			}
1478 		} else {
1479 			TB(Error(this, blockNumber, "allocation failed"));
1480 			dprintf("block allocation failed, unused list is %sempty.\n",
1481 				unused_blocks.IsEmpty() ? "" : "not ");
1482 
1483 			// allocation failed, try to reuse an unused block
1484 			block = _GetUnusedBlock();
1485 			if (block == NULL) {
1486 				TB(Error(this, blockNumber, "get unused failed"));
1487 				FATAL(("could not allocate block!\n"));
1488 				return NULL;
1489 			}
1490 		}
1491 	}
1492 
1493 	block->block_number = blockNumber;
1494 	block->ref_count = 0;
1495 	block->last_accessed = 0;
1496 	block->transaction_next = NULL;
1497 	block->transaction = block->previous_transaction = NULL;
1498 	block->original_data = NULL;
1499 	block->parent_data = NULL;
1500 	block->busy_reading = false;
1501 	block->busy_writing = false;
1502 	block->is_writing = false;
1503 	block->is_dirty = false;
1504 	block->unused = false;
1505 	block->discard = false;
1506 	block->busy_reading_waiters = false;
1507 	block->busy_writing_waiters = false;
1508 #if BLOCK_CACHE_DEBUG_CHANGED
1509 	block->compare = NULL;
1510 #endif
1511 
1512 	return block;
1513 }
1514 
1515 
1516 void
1517 block_cache::FreeBlockParentData(cached_block* block)
1518 {
1519 	ASSERT(block->parent_data != NULL);
1520 	if (block->parent_data != block->current_data)
1521 		Free(block->parent_data);
1522 	block->parent_data = NULL;
1523 }
1524 
1525 
1526 void
1527 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1528 {
1529 	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1530 
1531 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1532 			cached_block* block = iterator.Next();) {
1533 		if (minSecondsOld >= block->LastAccess()) {
1534 			// The list is sorted by last access
1535 			break;
1536 		}
1537 		if (block->busy_reading || block->busy_writing)
1538 			continue;
1539 
1540 		TB(Flush(this, block));
1541 		TRACE(("  remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1542 			block->block_number, block->last_accessed));
1543 
1544 		// this can only happen if no transactions are used
1545 		if (block->is_dirty && !block->discard) {
1546 			if (block->busy_writing)
1547 				continue;
1548 
1549 			BlockWriter::WriteBlock(this, block);
1550 		}
1551 
1552 		// remove block from lists
1553 		iterator.Remove();
1554 		unused_block_count--;
1555 		RemoveBlock(block);
1556 
1557 		if (--count <= 0)
1558 			break;
1559 	}
1560 }
1561 
1562 
1563 void
1564 block_cache::RemoveBlock(cached_block* block)
1565 {
1566 	hash->Remove(block);
1567 	FreeBlock(block);
1568 }
1569 
1570 
1571 /*!	Discards the block from a transaction (this method must not be called
1572 	for blocks not part of a transaction).
1573 */
1574 void
1575 block_cache::DiscardBlock(cached_block* block)
1576 {
1577 	ASSERT(block->discard);
1578 	ASSERT(block->previous_transaction == NULL);
1579 
1580 	if (block->parent_data != NULL)
1581 		FreeBlockParentData(block);
1582 
1583 	if (block->original_data != NULL) {
1584 		Free(block->original_data);
1585 		block->original_data = NULL;
1586 	}
1587 
1588 	RemoveBlock(block);
1589 }
1590 
1591 
1592 void
1593 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1594 {
1595 	TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1596 
1597 	// free some blocks according to the low memory state
1598 	// (if there is enough memory left, we don't free any)
1599 
1600 	block_cache* cache = (block_cache*)data;
1601 	int32 free = 0;
1602 	int32 secondsOld = 0;
1603 	switch (level) {
1604 		case B_NO_LOW_RESOURCE:
1605 			return;
1606 		case B_LOW_RESOURCE_NOTE:
1607 			free = cache->unused_block_count / 8;
1608 			secondsOld = 120;
1609 			break;
1610 		case B_LOW_RESOURCE_WARNING:
1611 			free = cache->unused_block_count / 4;
1612 			secondsOld = 10;
1613 			break;
1614 		case B_LOW_RESOURCE_CRITICAL:
1615 			free = cache->unused_block_count / 2;
1616 			secondsOld = 0;
1617 			break;
1618 	}
1619 
1620 	MutexLocker locker(&cache->lock);
1621 
1622 	if (!locker.IsLocked()) {
1623 		// If our block_cache were deleted, it could be that we had
1624 		// been called before that deletion went through, therefore,
1625 		// acquiring its lock might fail.
1626 		return;
1627 	}
1628 
1629 #ifdef TRACE_BLOCK_CACHE
1630 	uint32 oldUnused = cache->unused_block_count;
1631 #endif
1632 
1633 	cache->RemoveUnusedBlocks(free, secondsOld);
1634 
1635 	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1636 		cache, oldUnused, cache->unused_block_count));
1637 }
1638 
1639 
1640 cached_block*
1641 block_cache::_GetUnusedBlock()
1642 {
1643 	TRACE(("block_cache: get unused block\n"));
1644 
1645 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1646 			cached_block* block = iterator.Next();) {
1647 		TB(Flush(this, block, true));
1648 		// this can only happen if no transactions are used
1649 		if (block->is_dirty && !block->busy_writing && !block->discard)
1650 			BlockWriter::WriteBlock(this, block);
1651 
1652 		// remove block from lists
1653 		iterator.Remove();
1654 		unused_block_count--;
1655 		hash->Remove(block);
1656 
1657 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1658 		block->unused = false;
1659 
1660 		// TODO: see if compare data is handled correctly here!
1661 #if BLOCK_CACHE_DEBUG_CHANGED
1662 		if (block->compare != NULL)
1663 			Free(block->compare);
1664 #endif
1665 		return block;
1666 	}
1667 
1668 	return NULL;
1669 }
1670 
1671 
1672 //	#pragma mark - private block functions
1673 
1674 
1675 /*!	Cache must be locked.
1676 */
1677 static void
1678 mark_block_busy_reading(block_cache* cache, cached_block* block)
1679 {
1680 	block->busy_reading = true;
1681 	cache->busy_reading_count++;
1682 }
1683 
1684 
1685 /*!	Cache must be locked.
1686 */
1687 static void
1688 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1689 {
1690 	block->busy_reading = false;
1691 	cache->busy_reading_count--;
1692 
1693 	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1694 		|| block->busy_reading_waiters) {
1695 		cache->busy_reading_waiters = false;
1696 		block->busy_reading_waiters = false;
1697 		cache->busy_reading_condition.NotifyAll();
1698 	}
1699 }
1700 
1701 
1702 /*!	Cache must be locked.
1703 */
1704 static void
1705 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1706 {
1707 	while (block->busy_reading) {
1708 		// wait for at least the specified block to be read in
1709 		ConditionVariableEntry entry;
1710 		cache->busy_reading_condition.Add(&entry);
1711 		block->busy_reading_waiters = true;
1712 
1713 		mutex_unlock(&cache->lock);
1714 
1715 		entry.Wait();
1716 
1717 		mutex_lock(&cache->lock);
1718 	}
1719 }
1720 
1721 
1722 /*!	Cache must be locked.
1723 */
1724 static void
1725 wait_for_busy_reading_blocks(block_cache* cache)
1726 {
1727 	while (cache->busy_reading_count != 0) {
1728 		// wait for all blocks to be read in
1729 		ConditionVariableEntry entry;
1730 		cache->busy_reading_condition.Add(&entry);
1731 		cache->busy_reading_waiters = true;
1732 
1733 		mutex_unlock(&cache->lock);
1734 
1735 		entry.Wait();
1736 
1737 		mutex_lock(&cache->lock);
1738 	}
1739 }
1740 
1741 
1742 /*!	Cache must be locked.
1743 */
1744 static void
1745 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1746 {
1747 	while (block->busy_writing) {
1748 		// wait for all blocks to be written back
1749 		ConditionVariableEntry entry;
1750 		cache->busy_writing_condition.Add(&entry);
1751 		block->busy_writing_waiters = true;
1752 
1753 		mutex_unlock(&cache->lock);
1754 
1755 		entry.Wait();
1756 
1757 		mutex_lock(&cache->lock);
1758 	}
1759 }
1760 
1761 
1762 /*!	Cache must be locked.
1763 */
1764 static void
1765 wait_for_busy_writing_blocks(block_cache* cache)
1766 {
1767 	while (cache->busy_writing_count != 0) {
1768 		// wait for all blocks to be written back
1769 		ConditionVariableEntry entry;
1770 		cache->busy_writing_condition.Add(&entry);
1771 		cache->busy_writing_waiters = true;
1772 
1773 		mutex_unlock(&cache->lock);
1774 
1775 		entry.Wait();
1776 
1777 		mutex_lock(&cache->lock);
1778 	}
1779 }
1780 
1781 
1782 /*!	Removes a reference from the specified \a block. If this was the last
1783 	reference, the block is moved into the unused list.
1784 	In low memory situations, it will also free some blocks from that list,
1785 	but not necessarily the \a block it just released.
1786 */
1787 static void
1788 put_cached_block(block_cache* cache, cached_block* block)
1789 {
1790 #if BLOCK_CACHE_DEBUG_CHANGED
1791 	if (!block->is_dirty && block->compare != NULL
1792 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1793 		dprintf("new block:\n");
1794 		dump_block((const char*)block->current_data, 256, "  ");
1795 		dprintf("unchanged block:\n");
1796 		dump_block((const char*)block->compare, 256, "  ");
1797 		BlockWriter::WriteBlock(cache, block);
1798 		panic("block_cache: supposed to be clean block was changed!\n");
1799 
1800 		cache->Free(block->compare);
1801 		block->compare = NULL;
1802 	}
1803 #endif
1804 	TB(Put(cache, block));
1805 
1806 	if (block->ref_count < 1) {
1807 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1808 		return;
1809 	}
1810 
1811 	if (--block->ref_count == 0
1812 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1813 		// This block is not used anymore, and not part of any transaction
1814 		block->is_writing = false;
1815 
1816 		if (block->discard) {
1817 			cache->RemoveBlock(block);
1818 		} else {
1819 			// put this block in the list of unused blocks
1820 			ASSERT(!block->unused);
1821 			block->unused = true;
1822 
1823 			ASSERT(block->original_data == NULL && block->parent_data == NULL);
1824 			cache->unused_blocks.Add(block);
1825 			cache->unused_block_count++;
1826 		}
1827 	}
1828 }
1829 
1830 
1831 static void
1832 put_cached_block(block_cache* cache, off_t blockNumber)
1833 {
1834 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1835 		panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1836 			blockNumber, cache->max_blocks - 1);
1837 	}
1838 
1839 	cached_block* block = cache->hash->Lookup(blockNumber);
1840 	if (block != NULL)
1841 		put_cached_block(cache, block);
1842 	else {
1843 		TB(Error(cache, blockNumber, "put unknown"));
1844 	}
1845 }
1846 
1847 
1848 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1849 	there, or reads it from the disk.
1850 	You need to have the cache locked when calling this function.
1851 
1852 	\param _allocated tells you whether or not a new block has been allocated
1853 		to satisfy your request.
1854 	\param readBlock if \c false, the block will not be read in case it was
1855 		not already in the cache. The block you retrieve may contain random
1856 		data. If \c true, the cache will be temporarily unlocked while the
1857 		block is read in.
1858 */
1859 static cached_block*
1860 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1861 	bool readBlock = true)
1862 {
1863 	ASSERT_LOCKED_MUTEX(&cache->lock);
1864 
1865 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1866 		panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1867 			blockNumber, cache->max_blocks - 1);
1868 		return NULL;
1869 	}
1870 
1871 retry:
1872 	cached_block* block = cache->hash->Lookup(blockNumber);
1873 	*_allocated = false;
1874 
1875 	if (block == NULL) {
1876 		// put block into cache
1877 		block = cache->NewBlock(blockNumber);
1878 		if (block == NULL)
1879 			return NULL;
1880 
1881 		cache->hash->Insert(block);
1882 		*_allocated = true;
1883 	} else if (block->busy_reading) {
1884 		// The block is currently busy_reading - wait and try again later
1885 		wait_for_busy_reading_block(cache, block);
1886 		goto retry;
1887 	}
1888 
1889 	if (block->unused) {
1890 		//TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
1891 		block->unused = false;
1892 		cache->unused_blocks.Remove(block);
1893 		cache->unused_block_count--;
1894 	}
1895 
1896 	if (*_allocated && readBlock) {
1897 		// read block into cache
1898 		int32 blockSize = cache->block_size;
1899 
1900 		mark_block_busy_reading(cache, block);
1901 		mutex_unlock(&cache->lock);
1902 
1903 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1904 			block->current_data, blockSize);
1905 
1906 		mutex_lock(&cache->lock);
1907 		if (bytesRead < blockSize) {
1908 			cache->RemoveBlock(block);
1909 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1910 
1911 			TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n",
1912 				blockNumber, bytesRead, strerror(errno)));
1913 			return NULL;
1914 		}
1915 		TB(Read(cache, block));
1916 
1917 		mark_block_unbusy_reading(cache, block);
1918 	}
1919 
1920 	block->ref_count++;
1921 	block->last_accessed = system_time() / 1000000L;
1922 
1923 	return block;
1924 }
1925 
1926 
1927 /*!	Returns the writable block data for the requested blockNumber.
1928 	If \a cleared is true, the block is not read from disk; an empty block
1929 	is returned.
1930 
1931 	This is the only method to insert a block into a transaction. It makes
1932 	sure that the previous block contents are preserved in that case.
1933 */
1934 static void*
1935 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1936 	off_t length, int32 transactionID, bool cleared)
1937 {
1938 	TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
1939 		blockNumber, transactionID));
1940 
1941 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1942 		panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1943 			blockNumber, cache->max_blocks - 1);
1944 	}
1945 
1946 	bool allocated;
1947 	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1948 		!cleared);
1949 	if (block == NULL)
1950 		return NULL;
1951 
1952 	if (block->busy_writing)
1953 		wait_for_busy_writing_block(cache, block);
1954 
1955 	block->discard = false;
1956 
1957 	// if there is no transaction support, we just return the current block
1958 	if (transactionID == -1) {
1959 		if (cleared) {
1960 			mark_block_busy_reading(cache, block);
1961 			mutex_unlock(&cache->lock);
1962 
1963 			memset(block->current_data, 0, cache->block_size);
1964 
1965 			mutex_lock(&cache->lock);
1966 			mark_block_unbusy_reading(cache, block);
1967 		}
1968 
1969 		block->is_writing = true;
1970 
1971 		if (!block->is_dirty) {
1972 			cache->num_dirty_blocks++;
1973 			block->is_dirty = true;
1974 				// mark the block as dirty
1975 		}
1976 
1977 		TB(Get(cache, block));
1978 		return block->current_data;
1979 	}
1980 
1981 	cache_transaction* transaction = block->transaction;
1982 
1983 	if (transaction != NULL && transaction->id != transactionID) {
1984 		// TODO: we have to wait here until the other transaction is done.
1985 		//	Maybe we should even panic, since we can't prevent any deadlocks.
1986 		panic("get_writable_cached_block(): asked to get busy writable block "
1987 			"(transaction %" B_PRId32 ")\n", block->transaction->id);
1988 		put_cached_block(cache, block);
1989 		return NULL;
1990 	}
1991 	if (transaction == NULL && transactionID != -1) {
1992 		// get new transaction
1993 		transaction = lookup_transaction(cache, transactionID);
1994 		if (transaction == NULL) {
1995 			panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
1996 				transactionID);
1997 			put_cached_block(cache, block);
1998 			return NULL;
1999 		}
2000 		if (!transaction->open) {
2001 			panic("get_writable_cached_block(): transaction already done!\n");
2002 			put_cached_block(cache, block);
2003 			return NULL;
2004 		}
2005 
2006 		block->transaction = transaction;
2007 
2008 		// attach the block to the transaction block list
2009 		block->transaction_next = transaction->first_block;
2010 		transaction->first_block = block;
2011 		transaction->num_blocks++;
2012 	}
2013 	if (transaction != NULL)
2014 		transaction->last_used = system_time();
2015 
2016 	bool wasUnchanged = block->original_data == NULL
2017 		|| block->previous_transaction != NULL;
2018 
2019 	if (!(allocated && cleared) && block->original_data == NULL) {
2020 		// we already have data, so we need to preserve it
2021 		block->original_data = cache->Allocate();
2022 		if (block->original_data == NULL) {
2023 			TB(Error(cache, blockNumber, "allocate original failed"));
2024 			FATAL(("could not allocate original_data\n"));
2025 			put_cached_block(cache, block);
2026 			return NULL;
2027 		}
2028 
2029 		mark_block_busy_reading(cache, block);
2030 		mutex_unlock(&cache->lock);
2031 
2032 		memcpy(block->original_data, block->current_data, cache->block_size);
2033 
2034 		mutex_lock(&cache->lock);
2035 		mark_block_unbusy_reading(cache, block);
2036 	}
2037 	if (block->parent_data == block->current_data) {
2038 		// remember any previous contents for the parent transaction
2039 		block->parent_data = cache->Allocate();
2040 		if (block->parent_data == NULL) {
2041 			// TODO: maybe we should just continue the current transaction in
2042 			// this case...
2043 			TB(Error(cache, blockNumber, "allocate parent failed"));
2044 			FATAL(("could not allocate parent\n"));
2045 			put_cached_block(cache, block);
2046 			return NULL;
2047 		}
2048 
2049 		mark_block_busy_reading(cache, block);
2050 		mutex_unlock(&cache->lock);
2051 
2052 		memcpy(block->parent_data, block->current_data, cache->block_size);
2053 
2054 		mutex_lock(&cache->lock);
2055 		mark_block_unbusy_reading(cache, block);
2056 
2057 		transaction->sub_num_blocks++;
2058 	} else if (transaction != NULL && transaction->has_sub_transaction
2059 		&& block->parent_data == NULL && wasUnchanged)
2060 		transaction->sub_num_blocks++;
2061 
2062 	if (cleared) {
2063 		mark_block_busy_reading(cache, block);
2064 		mutex_unlock(&cache->lock);
2065 
2066 		memset(block->current_data, 0, cache->block_size);
2067 
2068 		mutex_lock(&cache->lock);
2069 		mark_block_unbusy_reading(cache, block);
2070 	}
2071 
2072 	block->is_dirty = true;
2073 	TB(Get(cache, block));
2074 	TB2(BlockData(cache, block, "get writable"));
2075 
2076 	return block->current_data;
2077 }
2078 
2079 
2080 #if DEBUG_BLOCK_CACHE
2081 
2082 
2083 static void
2084 dump_block(cached_block* block)
2085 {
2086 	kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2087 		" %c%c%c%c%c%c %08lx %08lx\n",
2088 		(addr_t)block, block->block_number,
2089 		(addr_t)block->current_data, (addr_t)block->original_data,
2090 		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2091 		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2092 		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2093 		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2094 		(addr_t)block->transaction,
2095 		(addr_t)block->previous_transaction);
2096 }
2097 
2098 
2099 static void
2100 dump_block_long(cached_block* block)
2101 {
2102 	kprintf("BLOCK %p\n", block);
2103 	kprintf(" current data:  %p\n", block->current_data);
2104 	kprintf(" original data: %p\n", block->original_data);
2105 	kprintf(" parent data:   %p\n", block->parent_data);
2106 #if BLOCK_CACHE_DEBUG_CHANGED
2107 	kprintf(" compare data:  %p\n", block->compare);
2108 #endif
2109 	kprintf(" ref_count:     %" B_PRId32 "\n", block->ref_count);
2110 	kprintf(" accessed:      %" B_PRId32 "\n", block->LastAccess());
2111 	kprintf(" flags:        ");
2112 	if (block->busy_reading)
2113 		kprintf(" busy_reading");
2114 	if (block->busy_writing)
2115 		kprintf(" busy_writing");
2116 	if (block->is_writing)
2117 		kprintf(" is-writing");
2118 	if (block->is_dirty)
2119 		kprintf(" is-dirty");
2120 	if (block->unused)
2121 		kprintf(" unused");
2122 	if (block->discard)
2123 		kprintf(" discard");
2124 	kprintf("\n");
2125 	if (block->transaction != NULL) {
2126 		kprintf(" transaction:   %p (%" B_PRId32 ")\n", block->transaction,
2127 			block->transaction->id);
2128 		if (block->transaction_next != NULL) {
2129 			kprintf(" next in transaction: %" B_PRIdOFF "\n",
2130 				block->transaction_next->block_number);
2131 		}
2132 	}
2133 	if (block->previous_transaction != NULL) {
2134 		kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2135 			block->previous_transaction,
2136 			block->previous_transaction->id);
2137 	}
2138 
2139 	set_debug_variable("_current", (addr_t)block->current_data);
2140 	set_debug_variable("_original", (addr_t)block->original_data);
2141 	set_debug_variable("_parent", (addr_t)block->parent_data);
2142 }
2143 
2144 
2145 static int
2146 dump_cached_block(int argc, char** argv)
2147 {
2148 	if (argc != 2) {
2149 		kprintf("usage: %s <block-address>\n", argv[0]);
2150 		return 0;
2151 	}
2152 
2153 	dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2154 	return 0;
2155 }
2156 
2157 
2158 static int
2159 dump_cache(int argc, char** argv)
2160 {
2161 	bool showTransactions = false;
2162 	bool showBlocks = false;
2163 	int32 i = 1;
2164 	while (argv[i] != NULL && argv[i][0] == '-') {
2165 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
2166 			switch (arg[0]) {
2167 				case 'b':
2168 					showBlocks = true;
2169 					break;
2170 				case 't':
2171 					showTransactions = true;
2172 					break;
2173 				default:
2174 					print_debugger_command_usage(argv[0]);
2175 					return 0;
2176 			}
2177 		}
2178 		i++;
2179 	}
2180 
2181 	if (i >= argc) {
2182 		print_debugger_command_usage(argv[0]);
2183 		return 0;
2184 	}
2185 
2186 	block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2187 	if (cache == NULL) {
2188 		kprintf("invalid cache address\n");
2189 		return 0;
2190 	}
2191 
2192 	off_t blockNumber = -1;
2193 	if (i + 1 < argc) {
2194 		blockNumber = parse_expression(argv[i + 1]);
2195 		cached_block* block = cache->hash->Lookup(blockNumber);
2196 		if (block != NULL)
2197 			dump_block_long(block);
2198 		else
2199 			kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2200 		return 0;
2201 	}
2202 
2203 	kprintf("BLOCK CACHE: %p\n", cache);
2204 
2205 	kprintf(" fd:           %d\n", cache->fd);
2206 	kprintf(" max_blocks:   %" B_PRIdOFF "\n", cache->max_blocks);
2207 	kprintf(" block_size:   %zu\n", cache->block_size);
2208 	kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2209 	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2210 	kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2211 		cache->busy_reading_waiters ? "has" : "no");
2212 	kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2213 		cache->busy_writing_waiters ? "has" : "no");
2214 
2215 	if (!cache->pending_notifications.IsEmpty()) {
2216 		kprintf(" pending notifications:\n");
2217 
2218 		NotificationList::Iterator iterator
2219 			= cache->pending_notifications.GetIterator();
2220 		while (iterator.HasNext()) {
2221 			cache_notification* notification = iterator.Next();
2222 
2223 			kprintf("  %p %5" B_PRIx32 " %p - %p\n", notification,
2224 				notification->events_pending, notification->hook,
2225 				notification->data);
2226 		}
2227 	}
2228 
2229 	if (showTransactions) {
2230 		kprintf(" transactions:\n");
2231 		kprintf("address       id state  blocks  main   sub\n");
2232 
2233 		TransactionTable::Iterator iterator(cache->transaction_hash);
2234 
2235 		while (iterator.HasNext()) {
2236 			cache_transaction* transaction = iterator.Next();
2237 			kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
2238 				B_PRId32 "\n", transaction, transaction->id,
2239 				transaction->open ? "open" : "closed",
2240 				transaction->num_blocks, transaction->main_num_blocks,
2241 				transaction->sub_num_blocks);
2242 		}
2243 	}
2244 
2245 	if (showBlocks) {
2246 		kprintf(" blocks:\n");
2247 		kprintf("address  block no. current  original parent    refs access "
2248 			"flags transact prev. trans\n");
2249 	}
2250 
2251 	uint32 referenced = 0;
2252 	uint32 count = 0;
2253 	uint32 dirty = 0;
2254 	uint32 discarded = 0;
2255 	BlockTable::Iterator iterator(cache->hash);
2256 	while (iterator.HasNext()) {
2257 		cached_block* block = iterator.Next();
2258 		if (showBlocks)
2259 			dump_block(block);
2260 
2261 		if (block->is_dirty)
2262 			dirty++;
2263 		if (block->discard)
2264 			discarded++;
2265 		if (block->ref_count)
2266 			referenced++;
2267 		count++;
2268 	}
2269 
2270 	kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
2271 		" discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
2272 		" in unused.\n",
2273 		count, dirty, discarded, referenced, cache->busy_reading_count,
2274 		cache->unused_block_count);
2275 	return 0;
2276 }
2277 
2278 
2279 static int
2280 dump_transaction(int argc, char** argv)
2281 {
2282 	bool showBlocks = false;
2283 	int i = 1;
2284 	if (argc > 1 && !strcmp(argv[1], "-b")) {
2285 		showBlocks = true;
2286 		i++;
2287 	}
2288 
2289 	if (argc - i < 1 || argc - i > 2) {
2290 		print_debugger_command_usage(argv[0]);
2291 		return 0;
2292 	}
2293 
2294 	cache_transaction* transaction = NULL;
2295 
2296 	if (argc - i == 1) {
2297 		transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2298 	} else {
2299 		block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2300 		int32 id = parse_expression(argv[i + 1]);
2301 		transaction = lookup_transaction(cache, id);
2302 		if (transaction == NULL) {
2303 			kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2304 			return 0;
2305 		}
2306 	}
2307 
2308 	kprintf("TRANSACTION %p\n", transaction);
2309 
2310 	kprintf(" id:             %" B_PRId32 "\n", transaction->id);
2311 	kprintf(" num block:      %" B_PRId32 "\n", transaction->num_blocks);
2312 	kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2313 	kprintf(" sub num block:  %" B_PRId32 "\n", transaction->sub_num_blocks);
2314 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
2315 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
2316 	kprintf(" idle:           %" B_PRId64 " secs\n",
2317 		(system_time() - transaction->last_used) / 1000000);
2318 
2319 	kprintf(" listeners:\n");
2320 
2321 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2322 	while (iterator.HasNext()) {
2323 		cache_listener* listener = iterator.Next();
2324 
2325 		kprintf("  %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2326 			listener->hook, listener->data);
2327 	}
2328 
2329 	if (!showBlocks)
2330 		return 0;
2331 
2332 	kprintf(" blocks:\n");
2333 	kprintf("address  block no. current  original parent    refs access "
2334 		"flags transact prev. trans\n");
2335 
2336 	cached_block* block = transaction->first_block;
2337 	while (block != NULL) {
2338 		dump_block(block);
2339 		block = block->transaction_next;
2340 	}
2341 
2342 	kprintf("--\n");
2343 
2344 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2345 	while (blockIterator.HasNext()) {
2346 		block = blockIterator.Next();
2347 		dump_block(block);
2348 	}
2349 
2350 	return 0;
2351 }
2352 
2353 
2354 static int
2355 dump_caches(int argc, char** argv)
2356 {
2357 	kprintf("Block caches:\n");
2358 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2359 	while (i.HasNext()) {
2360 		block_cache* cache = i.Next();
2361 		if (cache == (block_cache*)&sMarkCache)
2362 			continue;
2363 
2364 		kprintf("  %p\n", cache);
2365 	}
2366 
2367 	return 0;
2368 }
2369 
2370 
2371 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2372 static int
2373 dump_block_data(int argc, char** argv)
2374 {
2375 	using namespace BlockTracing;
2376 
2377 	// Determine which blocks to show
2378 
2379 	bool printStackTrace = true;
2380 	uint32 which = 0;
2381 	int32 i = 1;
2382 	while (i < argc && argv[i][0] == '-') {
2383 		char* arg = &argv[i][1];
2384 		while (arg[0]) {
2385 			switch (arg[0]) {
2386 				case 'c':
2387 					which |= BlockData::kCurrent;
2388 					break;
2389 				case 'p':
2390 					which |= BlockData::kParent;
2391 					break;
2392 				case 'o':
2393 					which |= BlockData::kOriginal;
2394 					break;
2395 
2396 				default:
2397 					kprintf("invalid block specifier (only o/c/p are "
2398 						"allowed).\n");
2399 					return 0;
2400 			}
2401 			arg++;
2402 		}
2403 
2404 		i++;
2405 	}
2406 	if (which == 0)
2407 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2408 
2409 	if (i == argc) {
2410 		print_debugger_command_usage(argv[0]);
2411 		return 0;
2412 	}
2413 
2414 	// Get the range of blocks to print
2415 
2416 	int64 from = parse_expression(argv[i]);
2417 	int64 to = from;
2418 	if (argc > i + 1)
2419 		to = parse_expression(argv[i + 1]);
2420 	if (to < from)
2421 		to = from;
2422 
2423 	uint32 offset = 0;
2424 	uint32 size = LONG_MAX;
2425 	if (argc > i + 2)
2426 		offset = parse_expression(argv[i + 2]);
2427 	if (argc > i + 3)
2428 		size = parse_expression(argv[i + 3]);
2429 
2430 	TraceEntryIterator iterator;
2431 	iterator.MoveTo(from - 1);
2432 
2433 	static char sBuffer[1024];
2434 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2435 
2436 	while (TraceEntry* entry = iterator.Next()) {
2437 		int32 index = iterator.Index();
2438 		if (index > to)
2439 			break;
2440 
2441 		Action* action = dynamic_cast<Action*>(entry);
2442 		if (action != NULL) {
2443 			out.Clear();
2444 			out.DumpEntry(action);
2445 			continue;
2446 		}
2447 
2448 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
2449 		if (blockData == NULL)
2450 			continue;
2451 
2452 		out.Clear();
2453 
2454 		const char* dump = out.DumpEntry(entry);
2455 		int length = strlen(dump);
2456 		if (length > 0 && dump[length - 1] == '\n')
2457 			length--;
2458 
2459 		kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2460 
2461 		if (printStackTrace) {
2462 			out.Clear();
2463 			entry->DumpStackTrace(out);
2464 			if (out.Size() > 0)
2465 				kputs(out.Buffer());
2466 		}
2467 
2468 		blockData->DumpBlocks(which, offset, size);
2469 	}
2470 
2471 	return 0;
2472 }
2473 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
2474 
2475 
2476 #endif	// DEBUG_BLOCK_CACHE
2477 
2478 
2479 /*!	Traverses through the block_cache list, and returns one cache after the
2480 	other. The cache returned is automatically locked when you get it, and
2481 	unlocked with the next call to this function. Ignores caches that are in
2482 	deletion state.
2483 	Returns \c NULL when the end of the list is reached.
2484 */
2485 static block_cache*
2486 get_next_locked_block_cache(block_cache* last)
2487 {
2488 	MutexLocker _(sCachesLock);
2489 
2490 	block_cache* cache;
2491 	if (last != NULL) {
2492 		mutex_unlock(&last->lock);
2493 
2494 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
2495 		sCaches.Remove((block_cache*)&sMarkCache);
2496 	} else
2497 		cache = sCaches.Head();
2498 
2499 	if (cache != NULL) {
2500 		mutex_lock(&cache->lock);
2501 		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2502 	}
2503 
2504 	return cache;
2505 }
2506 
2507 
2508 /*!	Background thread that continuously checks for pending notifications of
2509 	all caches.
2510 	Every two seconds, it will also write back up to 64 blocks per cache.
2511 */
2512 static status_t
2513 block_notifier_and_writer(void* /*data*/)
2514 {
2515 	const bigtime_t kTimeout = 2000000LL;
2516 	bigtime_t timeout = kTimeout;
2517 
2518 	while (true) {
2519 		bigtime_t start = system_time();
2520 
2521 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
2522 			B_RELATIVE_TIMEOUT, timeout);
2523 		if (status == B_OK) {
2524 			flush_pending_notifications();
2525 			timeout -= system_time() - start;
2526 			continue;
2527 		}
2528 
2529 		// write 64 blocks of each block_cache every two seconds
2530 		// TODO: change this once we have an I/O scheduler
2531 		timeout = kTimeout;
2532 		size_t usedMemory;
2533 		object_cache_get_usage(sBlockCache, &usedMemory);
2534 
2535 		block_cache* cache = NULL;
2536 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2537 			BlockWriter writer(cache, 64);
2538 
2539 			size_t cacheUsedMemory;
2540 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2541 			usedMemory += cacheUsedMemory;
2542 
2543 			if (cache->num_dirty_blocks) {
2544 				// This cache is not using transactions, we'll scan the blocks
2545 				// directly
2546 				BlockTable::Iterator iterator(cache->hash);
2547 
2548 				while (iterator.HasNext()) {
2549 					cached_block* block = iterator.Next();
2550 					if (block->CanBeWritten() && !writer.Add(block))
2551 						break;
2552 				}
2553 
2554 			} else {
2555 				TransactionTable::Iterator iterator(cache->transaction_hash);
2556 
2557 				while (iterator.HasNext()) {
2558 					cache_transaction* transaction = iterator.Next();
2559 					if (transaction->open) {
2560 						if (system_time() > transaction->last_used
2561 								+ kTransactionIdleTime) {
2562 							// Transaction is open but idle
2563 							notify_transaction_listeners(cache, transaction,
2564 								TRANSACTION_IDLE);
2565 						}
2566 						continue;
2567 					}
2568 
2569 					bool hasLeftOvers;
2570 						// we ignore this one
2571 					if (!writer.Add(transaction, hasLeftOvers))
2572 						break;
2573 				}
2574 			}
2575 
2576 			writer.Write();
2577 
2578 			if ((block_cache_used_memory() / B_PAGE_SIZE)
2579 					> vm_page_num_pages() / 2) {
2580 				// Try to reduce memory usage to half of the available
2581 				// RAM at maximum
2582 				cache->RemoveUnusedBlocks(1000, 10);
2583 			}
2584 		}
2585 
2586 		MutexLocker _(sCachesMemoryUseLock);
2587 		sUsedMemory = usedMemory;
2588 	}
2589 
2590 	// never can get here
2591 	return B_OK;
2592 }
2593 
2594 
2595 /*!	Notify function for wait_for_notifications(). */
2596 static void
2597 notify_sync(int32 transactionID, int32 event, void* _cache)
2598 {
2599 	block_cache* cache = (block_cache*)_cache;
2600 
2601 	cache->condition_variable.NotifyOne();
2602 }
2603 
2604 
2605 /*!	Must be called with the sCachesLock held. */
2606 static bool
2607 is_valid_cache(block_cache* cache)
2608 {
2609 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2610 
2611 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2612 	while (iterator.HasNext()) {
2613 		if (cache == iterator.Next())
2614 			return true;
2615 	}
2616 
2617 	return false;
2618 }
2619 
2620 
2621 /*!	Waits until all pending notifications are carried out.
2622 	Safe to be called from the block writer/notifier thread.
2623 	You must not hold the \a cache lock when calling this function.
2624 */
2625 static void
2626 wait_for_notifications(block_cache* cache)
2627 {
2628 	MutexLocker locker(sCachesLock);
2629 
2630 	if (find_thread(NULL) == sNotifierWriterThread) {
2631 		// We're the notifier thread, don't wait, but flush all pending
2632 		// notifications directly.
2633 		if (is_valid_cache(cache))
2634 			flush_pending_notifications(cache);
2635 		return;
2636 	}
2637 
2638 	// add sync notification
2639 	cache_notification notification;
2640 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2641 		cache);
2642 
2643 	ConditionVariableEntry entry;
2644 	cache->condition_variable.Add(&entry);
2645 
2646 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2647 	locker.Unlock();
2648 
2649 	// wait for notification hook to be called
2650 	entry.Wait();
2651 }
2652 
2653 
2654 status_t
2655 block_cache_init(void)
2656 {
2657 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2658 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2659 	if (sBlockCache == NULL)
2660 		return B_NO_MEMORY;
2661 
2662 	new (&sCaches) DoublyLinkedList<block_cache>;
2663 		// manually call constructor
2664 
2665 	sEventSemaphore = create_sem(0, "block cache event");
2666 	if (sEventSemaphore < B_OK)
2667 		return sEventSemaphore;
2668 
2669 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2670 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2671 	if (sNotifierWriterThread >= B_OK)
2672 		resume_thread(sNotifierWriterThread);
2673 
2674 #if DEBUG_BLOCK_CACHE
2675 	add_debugger_command_etc("block_caches", &dump_caches,
2676 		"dumps all block caches", "\n", 0);
2677 	add_debugger_command_etc("block_cache", &dump_cache,
2678 		"dumps a specific block cache",
2679 		"[-bt] <cache-address> [block-number]\n"
2680 		"  -t lists the transactions\n"
2681 		"  -b lists all blocks\n", 0);
2682 	add_debugger_command("cached_block", &dump_cached_block,
2683 		"dumps the specified cached block");
2684 	add_debugger_command_etc("transaction", &dump_transaction,
2685 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2686 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2687 		"  -b lists all blocks that are part of this transaction\n", 0);
2688 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2689 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2690 		"dumps the data blocks logged for the actions",
2691 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2692 		"If no data specifier is used, all blocks are shown by default.\n"
2693 		" -c       the current data is shown, if available.\n"
2694 		" -p       the parent data is shown, if available.\n"
2695 		" -o       the original data is shown, if available.\n"
2696 		" <from>   first index of tracing entries to show.\n"
2697 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2698 		" <offset> the offset of the block data.\n"
2699 		" <from>   the size of the block data that is dumped\n", 0);
2700 #	endif
2701 #endif	// DEBUG_BLOCK_CACHE
2702 
2703 	return B_OK;
2704 }
2705 
2706 
2707 size_t
2708 block_cache_used_memory(void)
2709 {
2710 	MutexLocker _(sCachesMemoryUseLock);
2711 	return sUsedMemory;
2712 }
2713 
2714 
2715 //	#pragma mark - public transaction API
2716 
2717 
2718 int32
2719 cache_start_transaction(void* _cache)
2720 {
2721 	block_cache* cache = (block_cache*)_cache;
2722 	TransactionLocker locker(cache);
2723 
2724 	if (cache->last_transaction && cache->last_transaction->open) {
2725 		panic("last transaction (%" B_PRId32 ") still open!\n",
2726 			cache->last_transaction->id);
2727 	}
2728 
2729 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2730 	if (transaction == NULL)
2731 		return B_NO_MEMORY;
2732 
2733 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2734 	cache->last_transaction = transaction;
2735 
2736 	TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
2737 	T(Action("start", cache, transaction));
2738 
2739 	cache->transaction_hash->Insert(transaction);
2740 
2741 	return transaction->id;
2742 }
2743 
2744 
2745 status_t
2746 cache_sync_transaction(void* _cache, int32 id)
2747 {
2748 	block_cache* cache = (block_cache*)_cache;
2749 	bool hadBusy;
2750 
2751 	TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
2752 
2753 	do {
2754 		TransactionLocker locker(cache);
2755 		hadBusy = false;
2756 
2757 		BlockWriter writer(cache);
2758 		TransactionTable::Iterator iterator(cache->transaction_hash);
2759 
2760 		while (iterator.HasNext()) {
2761 			// close all earlier transactions which haven't been closed yet
2762 			cache_transaction* transaction = iterator.Next();
2763 
2764 			if (transaction->busy_writing_count != 0) {
2765 				hadBusy = true;
2766 				continue;
2767 			}
2768 			if (transaction->id <= id && !transaction->open) {
2769 				// write back all of their remaining dirty blocks
2770 				T(Action("sync", cache, transaction));
2771 
2772 				bool hasLeftOvers;
2773 				writer.Add(transaction, hasLeftOvers);
2774 
2775 				if (hasLeftOvers) {
2776 					// This transaction contains blocks that a previous
2777 					// transaction is trying to write back in this write run
2778 					hadBusy = true;
2779 				}
2780 			}
2781 		}
2782 
2783 		status_t status = writer.Write();
2784 		if (status != B_OK)
2785 			return status;
2786 	} while (hadBusy);
2787 
2788 	wait_for_notifications(cache);
2789 		// make sure that all pending TRANSACTION_WRITTEN notifications
2790 		// are handled after we return
2791 	return B_OK;
2792 }
2793 
2794 
2795 status_t
2796 cache_end_transaction(void* _cache, int32 id,
2797 	transaction_notification_hook hook, void* data)
2798 {
2799 	block_cache* cache = (block_cache*)_cache;
2800 	TransactionLocker locker(cache);
2801 
2802 	TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
2803 
2804 	cache_transaction* transaction = lookup_transaction(cache, id);
2805 	if (transaction == NULL) {
2806 		panic("cache_end_transaction(): invalid transaction ID\n");
2807 		return B_BAD_VALUE;
2808 	}
2809 
2810 	// Write back all pending transaction blocks
2811 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2812 	if (status != B_OK)
2813 		return status;
2814 
2815 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2816 
2817 	if (hook != NULL
2818 		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2819 			hook, data) != B_OK) {
2820 		return B_NO_MEMORY;
2821 	}
2822 
2823 	T(Action("end", cache, transaction));
2824 
2825 	// iterate through all blocks and free the unchanged original contents
2826 
2827 	cached_block* next;
2828 	for (cached_block* block = transaction->first_block; block != NULL;
2829 			block = next) {
2830 		next = block->transaction_next;
2831 		ASSERT(block->previous_transaction == NULL);
2832 
2833 		if (block->discard) {
2834 			// This block has been discarded in the transaction
2835 			cache->DiscardBlock(block);
2836 			transaction->num_blocks--;
2837 			continue;
2838 		}
2839 
2840 		if (block->original_data != NULL) {
2841 			cache->Free(block->original_data);
2842 			block->original_data = NULL;
2843 		}
2844 		if (block->parent_data != NULL) {
2845 			ASSERT(transaction->has_sub_transaction);
2846 			cache->FreeBlockParentData(block);
2847 		}
2848 
2849 		// move the block to the previous transaction list
2850 		transaction->blocks.Add(block);
2851 
2852 		block->previous_transaction = transaction;
2853 		block->transaction_next = NULL;
2854 		block->transaction = NULL;
2855 	}
2856 
2857 	transaction->open = false;
2858 	return B_OK;
2859 }
2860 
2861 
2862 status_t
2863 cache_abort_transaction(void* _cache, int32 id)
2864 {
2865 	block_cache* cache = (block_cache*)_cache;
2866 	TransactionLocker locker(cache);
2867 
2868 	TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
2869 
2870 	cache_transaction* transaction = lookup_transaction(cache, id);
2871 	if (transaction == NULL) {
2872 		panic("cache_abort_transaction(): invalid transaction ID\n");
2873 		return B_BAD_VALUE;
2874 	}
2875 
2876 	T(Abort(cache, transaction));
2877 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2878 
2879 	// iterate through all blocks and restore their original contents
2880 
2881 	cached_block* block = transaction->first_block;
2882 	cached_block* next;
2883 	for (; block != NULL; block = next) {
2884 		next = block->transaction_next;
2885 
2886 		if (block->original_data != NULL) {
2887 			TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
2888 				"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
2889 			memcpy(block->current_data, block->original_data,
2890 				cache->block_size);
2891 			cache->Free(block->original_data);
2892 			block->original_data = NULL;
2893 		}
2894 		if (transaction->has_sub_transaction && block->parent_data != NULL)
2895 			cache->FreeBlockParentData(block);
2896 
2897 		block->transaction_next = NULL;
2898 		block->transaction = NULL;
2899 		block->discard = false;
2900 		if (block->previous_transaction == NULL)
2901 			block->is_dirty = false;
2902 	}
2903 
2904 	cache->transaction_hash->Remove(transaction);
2905 	delete_transaction(cache, transaction);
2906 	return B_OK;
2907 }
2908 
2909 
2910 /*!	Acknowledges the current parent transaction, and starts a new transaction
2911 	from its sub transaction.
2912 	The new transaction also gets a new transaction ID.
2913 */
2914 int32
2915 cache_detach_sub_transaction(void* _cache, int32 id,
2916 	transaction_notification_hook hook, void* data)
2917 {
2918 	block_cache* cache = (block_cache*)_cache;
2919 	TransactionLocker locker(cache);
2920 
2921 	TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
2922 
2923 	cache_transaction* transaction = lookup_transaction(cache, id);
2924 	if (transaction == NULL) {
2925 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2926 		return B_BAD_VALUE;
2927 	}
2928 	if (!transaction->has_sub_transaction)
2929 		return B_BAD_VALUE;
2930 
2931 	// iterate through all blocks and free the unchanged original contents
2932 
2933 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2934 	if (status != B_OK)
2935 		return status;
2936 
2937 	// create a new transaction for the sub transaction
2938 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
2939 	if (newTransaction == NULL)
2940 		return B_NO_MEMORY;
2941 
2942 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2943 	T(Detach(cache, transaction, newTransaction));
2944 
2945 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2946 
2947 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2948 			data) != B_OK) {
2949 		delete newTransaction;
2950 		return B_NO_MEMORY;
2951 	}
2952 
2953 	cached_block* last = NULL;
2954 	cached_block* next;
2955 	for (cached_block* block = transaction->first_block; block != NULL;
2956 			block = next) {
2957 		next = block->transaction_next;
2958 		ASSERT(block->previous_transaction == NULL);
2959 
2960 		if (block->discard) {
2961 			cache->DiscardBlock(block);
2962 			transaction->main_num_blocks--;
2963 			continue;
2964 		}
2965 
2966 		if (block->parent_data != NULL) {
2967 			// The block changed in the parent - free the original data, since
2968 			// they will be replaced by what is in current.
2969 			ASSERT(block->original_data != NULL);
2970 			cache->Free(block->original_data);
2971 
2972 			if (block->parent_data != block->current_data) {
2973 				// The block had been changed in both transactions
2974 				block->original_data = block->parent_data;
2975 			} else {
2976 				// The block has only been changed in the parent
2977 				block->original_data = NULL;
2978 			}
2979 
2980 			// move the block to the previous transaction list
2981 			transaction->blocks.Add(block);
2982 			block->previous_transaction = transaction;
2983 			block->parent_data = NULL;
2984 		}
2985 
2986 		if (block->original_data != NULL) {
2987 			// This block had been changed in the current sub transaction,
2988 			// we need to move this block over to the new transaction.
2989 			ASSERT(block->parent_data == NULL);
2990 
2991 			if (last == NULL)
2992 				newTransaction->first_block = block;
2993 			else
2994 				last->transaction_next = block;
2995 
2996 			block->transaction = newTransaction;
2997 			last = block;
2998 		} else
2999 			block->transaction = NULL;
3000 
3001 		block->transaction_next = NULL;
3002 	}
3003 
3004 	newTransaction->num_blocks = transaction->sub_num_blocks;
3005 
3006 	transaction->open = false;
3007 	transaction->has_sub_transaction = false;
3008 	transaction->num_blocks = transaction->main_num_blocks;
3009 	transaction->sub_num_blocks = 0;
3010 
3011 	cache->transaction_hash->Insert(newTransaction);
3012 	cache->last_transaction = newTransaction;
3013 
3014 	return newTransaction->id;
3015 }
3016 
3017 
3018 status_t
3019 cache_abort_sub_transaction(void* _cache, int32 id)
3020 {
3021 	block_cache* cache = (block_cache*)_cache;
3022 	TransactionLocker locker(cache);
3023 
3024 	TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3025 
3026 	cache_transaction* transaction = lookup_transaction(cache, id);
3027 	if (transaction == NULL) {
3028 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3029 		return B_BAD_VALUE;
3030 	}
3031 	if (!transaction->has_sub_transaction)
3032 		return B_BAD_VALUE;
3033 
3034 	T(Abort(cache, transaction));
3035 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3036 
3037 	// revert all changes back to the version of the parent
3038 
3039 	cached_block* block = transaction->first_block;
3040 	cached_block* last = NULL;
3041 	cached_block* next;
3042 	for (; block != NULL; block = next) {
3043 		next = block->transaction_next;
3044 
3045 		if (block->parent_data == NULL) {
3046 			// The parent transaction didn't change the block, but the sub
3047 			// transaction did - we need to revert to the original data.
3048 			// The block is no longer part of the transaction
3049 			if (block->original_data != NULL) {
3050 				// The block might not have original data if was empty
3051 				memcpy(block->current_data, block->original_data,
3052 					cache->block_size);
3053 			}
3054 
3055 			if (last != NULL)
3056 				last->transaction_next = next;
3057 			else
3058 				transaction->first_block = next;
3059 
3060 			block->transaction_next = NULL;
3061 			block->transaction = NULL;
3062 			transaction->num_blocks--;
3063 
3064 			if (block->previous_transaction == NULL) {
3065 				cache->Free(block->original_data);
3066 				block->original_data = NULL;
3067 				block->is_dirty = false;
3068 
3069 				if (block->ref_count == 0) {
3070 					// Move the block into the unused list if possible
3071 					block->unused = true;
3072 					cache->unused_blocks.Add(block);
3073 					cache->unused_block_count++;
3074 				}
3075 			}
3076 		} else {
3077 			if (block->parent_data != block->current_data) {
3078 				// The block has been changed and must be restored - the block
3079 				// is still dirty and part of the transaction
3080 				TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3081 					"restored contents of block %" B_PRIdOFF "\n",
3082 					transaction->id, block->block_number));
3083 				memcpy(block->current_data, block->parent_data,
3084 					cache->block_size);
3085 				cache->Free(block->parent_data);
3086 				// The block stays dirty
3087 			}
3088 			block->parent_data = NULL;
3089 			last = block;
3090 		}
3091 
3092 		block->discard = false;
3093 	}
3094 
3095 	// all subsequent changes will go into the main transaction
3096 	transaction->has_sub_transaction = false;
3097 	transaction->sub_num_blocks = 0;
3098 
3099 	return B_OK;
3100 }
3101 
3102 
3103 status_t
3104 cache_start_sub_transaction(void* _cache, int32 id)
3105 {
3106 	block_cache* cache = (block_cache*)_cache;
3107 	TransactionLocker locker(cache);
3108 
3109 	TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3110 
3111 	cache_transaction* transaction = lookup_transaction(cache, id);
3112 	if (transaction == NULL) {
3113 		panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3114 			id);
3115 		return B_BAD_VALUE;
3116 	}
3117 
3118 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3119 
3120 	// move all changed blocks up to the parent
3121 
3122 	cached_block* block = transaction->first_block;
3123 	cached_block* next;
3124 	for (; block != NULL; block = next) {
3125 		next = block->transaction_next;
3126 
3127 		if (block->parent_data != NULL) {
3128 			// There already is an older sub transaction - we acknowledge
3129 			// its changes and move its blocks up to the parent
3130 			ASSERT(transaction->has_sub_transaction);
3131 			cache->FreeBlockParentData(block);
3132 		}
3133 		if (block->discard) {
3134 			// This block has been discarded in the parent transaction.
3135 			// Just throw away any changes made in this transaction, so that
3136 			// it can still be reverted to its original contents if needed
3137 			ASSERT(block->previous_transaction == NULL);
3138 			if (block->original_data != NULL) {
3139 				memcpy(block->current_data, block->original_data,
3140 					cache->block_size);
3141 				block->original_data = NULL;
3142 			}
3143 			continue;
3144 		}
3145 
3146 		// we "allocate" the parent data lazily, that means, we don't copy
3147 		// the data (and allocate memory for it) until we need to
3148 		block->parent_data = block->current_data;
3149 	}
3150 
3151 	// all subsequent changes will go into the sub transaction
3152 	transaction->has_sub_transaction = true;
3153 	transaction->main_num_blocks = transaction->num_blocks;
3154 	transaction->sub_num_blocks = 0;
3155 	T(Action("start-sub", cache, transaction));
3156 
3157 	return B_OK;
3158 }
3159 
3160 
3161 /*!	Adds a transaction listener that gets notified when the transaction
3162 	is ended, aborted, written, or idle as specified by \a events.
3163 	The listener gets automatically removed when the transaction ends.
3164 */
3165 status_t
3166 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3167 	transaction_notification_hook hook, void* data)
3168 {
3169 	block_cache* cache = (block_cache*)_cache;
3170 	TransactionLocker locker(cache);
3171 
3172 	cache_transaction* transaction = lookup_transaction(cache, id);
3173 	if (transaction == NULL)
3174 		return B_BAD_VALUE;
3175 
3176 	return add_transaction_listener(cache, transaction, events, hook, data);
3177 }
3178 
3179 
3180 status_t
3181 cache_remove_transaction_listener(void* _cache, int32 id,
3182 	transaction_notification_hook hookFunction, void* data)
3183 {
3184 	block_cache* cache = (block_cache*)_cache;
3185 	TransactionLocker locker(cache);
3186 
3187 	cache_transaction* transaction = lookup_transaction(cache, id);
3188 	if (transaction == NULL)
3189 		return B_BAD_VALUE;
3190 
3191 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3192 	while (iterator.HasNext()) {
3193 		cache_listener* listener = iterator.Next();
3194 		if (listener->data == data && listener->hook == hookFunction) {
3195 			iterator.Remove();
3196 
3197 			if (listener->events_pending != 0) {
3198 				MutexLocker _(sNotificationsLock);
3199 				if (listener->events_pending != 0)
3200 					cache->pending_notifications.Remove(listener);
3201 			}
3202 			delete listener;
3203 			return B_OK;
3204 		}
3205 	}
3206 
3207 	return B_ENTRY_NOT_FOUND;
3208 }
3209 
3210 
3211 status_t
3212 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3213 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3214 {
3215 	cached_block* block = (cached_block*)*_cookie;
3216 	block_cache* cache = (block_cache*)_cache;
3217 	TransactionLocker locker(cache);
3218 
3219 	cache_transaction* transaction = lookup_transaction(cache, id);
3220 	if (transaction == NULL || !transaction->open)
3221 		return B_BAD_VALUE;
3222 
3223 	if (block == NULL)
3224 		block = transaction->first_block;
3225 	else
3226 		block = block->transaction_next;
3227 
3228 	if (transaction->has_sub_transaction) {
3229 		if (mainOnly) {
3230 			// find next block that the parent changed
3231 			while (block != NULL && block->parent_data == NULL)
3232 				block = block->transaction_next;
3233 		} else {
3234 			// find next non-discarded block
3235 			while (block != NULL && block->discard)
3236 				block = block->transaction_next;
3237 		}
3238 	}
3239 
3240 	if (block == NULL)
3241 		return B_ENTRY_NOT_FOUND;
3242 
3243 	if (_blockNumber)
3244 		*_blockNumber = block->block_number;
3245 	if (_data)
3246 		*_data = mainOnly ? block->parent_data : block->current_data;
3247 	if (_unchangedData)
3248 		*_unchangedData = block->original_data;
3249 
3250 	*_cookie = (addr_t)block;
3251 	return B_OK;
3252 }
3253 
3254 
3255 int32
3256 cache_blocks_in_transaction(void* _cache, int32 id)
3257 {
3258 	block_cache* cache = (block_cache*)_cache;
3259 	TransactionLocker locker(cache);
3260 
3261 	cache_transaction* transaction = lookup_transaction(cache, id);
3262 	if (transaction == NULL)
3263 		return B_BAD_VALUE;
3264 
3265 	return transaction->num_blocks;
3266 }
3267 
3268 
3269 /*!	Returns the number of blocks that are part of the main transaction. If this
3270 	transaction does not have a sub transaction yet, this is the same value as
3271 	cache_blocks_in_transaction() would return.
3272 */
3273 int32
3274 cache_blocks_in_main_transaction(void* _cache, int32 id)
3275 {
3276 	block_cache* cache = (block_cache*)_cache;
3277 	TransactionLocker locker(cache);
3278 
3279 	cache_transaction* transaction = lookup_transaction(cache, id);
3280 	if (transaction == NULL)
3281 		return B_BAD_VALUE;
3282 
3283 	if (transaction->has_sub_transaction)
3284 		return transaction->main_num_blocks;
3285 
3286 	return transaction->num_blocks;
3287 }
3288 
3289 
3290 int32
3291 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3292 {
3293 	block_cache* cache = (block_cache*)_cache;
3294 	TransactionLocker locker(cache);
3295 
3296 	cache_transaction* transaction = lookup_transaction(cache, id);
3297 	if (transaction == NULL)
3298 		return B_BAD_VALUE;
3299 
3300 	return transaction->sub_num_blocks;
3301 }
3302 
3303 
3304 //	#pragma mark - public block cache API
3305 
3306 
3307 void
3308 block_cache_delete(void* _cache, bool allowWrites)
3309 {
3310 	block_cache* cache = (block_cache*)_cache;
3311 
3312 	if (allowWrites)
3313 		block_cache_sync(cache);
3314 
3315 	mutex_lock(&sCachesLock);
3316 	sCaches.Remove(cache);
3317 	mutex_unlock(&sCachesLock);
3318 
3319 	mutex_lock(&cache->lock);
3320 
3321 	// wait for all blocks to become unbusy
3322 	wait_for_busy_reading_blocks(cache);
3323 	wait_for_busy_writing_blocks(cache);
3324 
3325 	// free all blocks
3326 
3327 	cached_block* block = cache->hash->Clear(true);
3328 	while (block != NULL) {
3329 		cached_block* next = block->next;
3330 		cache->FreeBlock(block);
3331 		block = next;
3332 	}
3333 
3334 	// free all transactions (they will all be aborted)
3335 
3336 	cache_transaction* transaction = cache->transaction_hash->Clear(true);
3337 	while (transaction != NULL) {
3338 		cache_transaction* next = transaction->next;
3339 		delete transaction;
3340 		transaction = next;
3341 	}
3342 
3343 	delete cache;
3344 }
3345 
3346 
3347 void*
3348 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3349 {
3350 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3351 		readOnly);
3352 	if (cache == NULL)
3353 		return NULL;
3354 
3355 	if (cache->Init() != B_OK) {
3356 		delete cache;
3357 		return NULL;
3358 	}
3359 
3360 	MutexLocker _(sCachesLock);
3361 	sCaches.Add(cache);
3362 
3363 	return cache;
3364 }
3365 
3366 
3367 status_t
3368 block_cache_sync(void* _cache)
3369 {
3370 	block_cache* cache = (block_cache*)_cache;
3371 
3372 	// We will sync all dirty blocks to disk that have a completed
3373 	// transaction or no transaction only
3374 
3375 	MutexLocker locker(&cache->lock);
3376 
3377 	BlockWriter writer(cache);
3378 	BlockTable::Iterator iterator(cache->hash);
3379 
3380 	while (iterator.HasNext()) {
3381 		cached_block* block = iterator.Next();
3382 		if (block->CanBeWritten())
3383 			writer.Add(block);
3384 	}
3385 
3386 	status_t status = writer.Write();
3387 
3388 	locker.Unlock();
3389 
3390 	wait_for_notifications(cache);
3391 		// make sure that all pending TRANSACTION_WRITTEN notifications
3392 		// are handled after we return
3393 	return status;
3394 }
3395 
3396 
3397 status_t
3398 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3399 {
3400 	block_cache* cache = (block_cache*)_cache;
3401 
3402 	// We will sync all dirty blocks to disk that have a completed
3403 	// transaction or no transaction only
3404 
3405 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3406 		panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
3407 			" (max %" B_PRIdOFF ")",
3408 			blockNumber, cache->max_blocks - 1);
3409 		return B_BAD_VALUE;
3410 	}
3411 
3412 	MutexLocker locker(&cache->lock);
3413 	BlockWriter writer(cache);
3414 
3415 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
3416 		cached_block* block = cache->hash->Lookup(blockNumber);
3417 		if (block == NULL)
3418 			continue;
3419 
3420 		if (block->CanBeWritten())
3421 			writer.Add(block);
3422 	}
3423 
3424 	status_t status = writer.Write();
3425 
3426 	locker.Unlock();
3427 
3428 	wait_for_notifications(cache);
3429 		// make sure that all pending TRANSACTION_WRITTEN notifications
3430 		// are handled after we return
3431 	return status;
3432 }
3433 
3434 
3435 /*!	Discards a block from the current transaction or from the cache.
3436 	You have to call this function when you no longer use a block, ie. when it
3437 	might be reclaimed by the file cache in order to make sure they won't
3438 	interfere.
3439 */
3440 void
3441 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3442 {
3443 	// TODO: this could be a nice place to issue the ATA trim command
3444 	block_cache* cache = (block_cache*)_cache;
3445 	TransactionLocker locker(cache);
3446 
3447 	BlockWriter writer(cache);
3448 
3449 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3450 		cached_block* block = cache->hash->Lookup(blockNumber);
3451 		if (block != NULL && block->previous_transaction != NULL)
3452 			writer.Add(block);
3453 	}
3454 
3455 	writer.Write();
3456 		// TODO: this can fail, too!
3457 
3458 	blockNumber -= numBlocks;
3459 		// reset blockNumber to its original value
3460 
3461 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3462 		cached_block* block = cache->hash->Lookup(blockNumber);
3463 		if (block == NULL)
3464 			continue;
3465 
3466 		ASSERT(block->previous_transaction == NULL);
3467 
3468 		if (block->unused) {
3469 			cache->unused_blocks.Remove(block);
3470 			cache->unused_block_count--;
3471 			cache->RemoveBlock(block);
3472 		} else {
3473 			if (block->transaction != NULL && block->parent_data != NULL
3474 				&& block->parent_data != block->current_data) {
3475 				panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3476 					"transaction!", blockNumber);
3477 			}
3478 
3479 			// mark it as discarded (in the current transaction only, if any)
3480 			block->discard = true;
3481 		}
3482 	}
3483 }
3484 
3485 
3486 status_t
3487 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3488 {
3489 	block_cache* cache = (block_cache*)_cache;
3490 	MutexLocker locker(&cache->lock);
3491 
3492 	if (cache->read_only) {
3493 		panic("tried to make block writable on a read-only cache!");
3494 		return B_ERROR;
3495 	}
3496 
3497 	// TODO: this can be done better!
3498 	void* block = get_writable_cached_block(cache, blockNumber,
3499 		blockNumber, 1, transaction, false);
3500 	if (block != NULL) {
3501 		put_cached_block((block_cache*)_cache, blockNumber);
3502 		return B_OK;
3503 	}
3504 
3505 	return B_ERROR;
3506 }
3507 
3508 
3509 void*
3510 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3511 	off_t length, int32 transaction)
3512 {
3513 	block_cache* cache = (block_cache*)_cache;
3514 	MutexLocker locker(&cache->lock);
3515 
3516 	TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3517 		blockNumber, transaction));
3518 	if (cache->read_only)
3519 		panic("tried to get writable block on a read-only cache!");
3520 
3521 	return get_writable_cached_block(cache, blockNumber, base, length,
3522 		transaction, false);
3523 }
3524 
3525 
3526 void*
3527 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3528 {
3529 	return block_cache_get_writable_etc(_cache, blockNumber,
3530 		blockNumber, 1, transaction);
3531 }
3532 
3533 
3534 void*
3535 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3536 {
3537 	block_cache* cache = (block_cache*)_cache;
3538 	MutexLocker locker(&cache->lock);
3539 
3540 	TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3541 		blockNumber, transaction));
3542 	if (cache->read_only)
3543 		panic("tried to get empty writable block on a read-only cache!");
3544 
3545 	return get_writable_cached_block((block_cache*)_cache, blockNumber,
3546 		blockNumber, 1, transaction, true);
3547 }
3548 
3549 
3550 const void*
3551 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
3552 {
3553 	block_cache* cache = (block_cache*)_cache;
3554 	MutexLocker locker(&cache->lock);
3555 	bool allocated;
3556 
3557 	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
3558 	if (block == NULL)
3559 		return NULL;
3560 
3561 #if BLOCK_CACHE_DEBUG_CHANGED
3562 	if (block->compare == NULL)
3563 		block->compare = cache->Allocate();
3564 	if (block->compare != NULL)
3565 		memcpy(block->compare, block->current_data, cache->block_size);
3566 #endif
3567 	TB(Get(cache, block));
3568 
3569 	return block->current_data;
3570 }
3571 
3572 
3573 const void*
3574 block_cache_get(void* _cache, off_t blockNumber)
3575 {
3576 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
3577 }
3578 
3579 
3580 /*!	Changes the internal status of a writable block to \a dirty. This can be
3581 	helpful in case you realize you don't need to change that block anymore
3582 	for whatever reason.
3583 
3584 	Note, you must only use this function on blocks that were acquired
3585 	writable!
3586 */
3587 status_t
3588 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3589 	int32 transaction)
3590 {
3591 	block_cache* cache = (block_cache*)_cache;
3592 	MutexLocker locker(&cache->lock);
3593 
3594 	cached_block* block = cache->hash->Lookup(blockNumber);
3595 	if (block == NULL)
3596 		return B_BAD_VALUE;
3597 	if (block->is_dirty == dirty) {
3598 		// there is nothing to do for us
3599 		return B_OK;
3600 	}
3601 
3602 	// TODO: not yet implemented
3603 	if (dirty)
3604 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
3605 
3606 	return B_OK;
3607 }
3608 
3609 
3610 void
3611 block_cache_put(void* _cache, off_t blockNumber)
3612 {
3613 	block_cache* cache = (block_cache*)_cache;
3614 	MutexLocker locker(&cache->lock);
3615 
3616 	put_cached_block(cache, blockNumber);
3617 }
3618 
3619