xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision ca8ed5ea660fb6275799a3b7f138b201c41a667b)
1 /*
2  * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <KernelExport.h>
15 #include <fs_cache.h>
16 
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <vm/vm_page.h>
26 
27 #include "kernel_debug_config.h"
28 
29 
30 // TODO: this is a naive but growing implementation to test the API:
31 //	block reading/writing is not at all optimized for speed, it will
32 //	just read and write single blocks.
33 // TODO: the retrieval/copy of the original data could be delayed until the
34 //		new data must be written, ie. in low memory situations.
35 
36 //#define TRACE_BLOCK_CACHE
37 #ifdef TRACE_BLOCK_CACHE
38 #	define TRACE(x)	dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 #define TRACE_ALWAYS(x) dprintf x
44 
45 // This macro is used for fatal situations that are acceptable in a running
46 // system, like out of memory situations - should only panic for debugging.
47 #define FATAL(x) panic x
48 
49 static const bigtime_t kTransactionIdleTime = 2000000LL;
50 	// a transaction is considered idle after 2 seconds of inactivity
51 
52 
53 struct cache_transaction;
54 struct cached_block;
55 struct block_cache;
56 typedef DoublyLinkedListLink<cached_block> block_link;
57 
58 struct cached_block {
59 	cached_block*	next;			// next in hash
60 	cached_block*	transaction_next;
61 	block_link		link;
62 	off_t			block_number;
63 	void*			current_data;
64 		// The data that is seen by everyone using the API; this one is always
65 		// present.
66 	void*			original_data;
67 		// When in a transaction, this contains the original data from before
68 		// the transaction.
69 	void*			parent_data;
70 		// This is a lazily alloced buffer that represents the contents of the
71 		// block in the parent transaction. It may point to current_data if the
72 		// contents have been changed only in the parent transaction, or, if the
73 		// block has been changed in the current sub transaction already, to a
74 		// new block containing the contents changed in the parent transaction.
75 		// If this is NULL, the block has not been changed in the parent
76 		// transaction at all.
77 #if BLOCK_CACHE_DEBUG_CHANGED
78 	void*			compare;
79 #endif
80 	int32			ref_count;
81 	int32			last_accessed;
82 	bool			busy_reading : 1;
83 	bool			busy_writing : 1;
84 	bool			is_writing : 1;
85 		// Block has been checked out for writing without transactions, and
86 		// cannot be written back if set
87 	bool			is_dirty : 1;
88 	bool			unused : 1;
89 	bool			discard : 1;
90 	bool			busy_reading_waiters : 1;
91 	bool			busy_writing_waiters : 1;
92 	cache_transaction* transaction;
93 		// This is the current active transaction, if any, the block is
94 		// currently in (meaning was changed as a part of it).
95 	cache_transaction* previous_transaction;
96 		// This is set to the last transaction that was ended containing this
97 		// block. In this case, the block has not yet written back yet, and
98 		// the changed data is either in current_data, or original_data -- the
99 		// latter if the block is already being part of another transaction.
100 		// There can only be one previous transaction, so when the active
101 		// transaction ends, the changes of the previous transaction have to
102 		// be written back before that transaction becomes the next previous
103 		// transaction.
104 
105 	bool CanBeWritten() const;
106 	int32 LastAccess() const
107 		{ return system_time() / 1000000L - last_accessed; }
108 };
109 
110 typedef DoublyLinkedList<cached_block,
111 	DoublyLinkedListMemberGetLink<cached_block,
112 		&cached_block::link> > block_list;
113 
114 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
115 	int32			transaction_id;
116 	int32			events_pending;
117 	int32			events;
118 	transaction_notification_hook hook;
119 	void*			data;
120 	bool			delete_after_event;
121 };
122 
123 typedef DoublyLinkedList<cache_notification> NotificationList;
124 
125 struct BlockHash {
126 	typedef off_t			KeyType;
127 	typedef	cached_block	ValueType;
128 
129 	size_t HashKey(KeyType key) const
130 	{
131 		return key;
132 	}
133 
134 	size_t Hash(ValueType* block) const
135 	{
136 		return block->block_number;
137 	}
138 
139 	bool Compare(KeyType key, ValueType* block) const
140 	{
141 		return block->block_number == key;
142 	}
143 
144 	ValueType*& GetLink(ValueType* value) const
145 	{
146 		return value->next;
147 	}
148 };
149 
150 typedef BOpenHashTable<BlockHash> BlockTable;
151 
152 
153 struct TransactionHash {
154 	typedef int32				KeyType;
155 	typedef	cache_transaction	ValueType;
156 
157 	size_t HashKey(KeyType key) const
158 	{
159 		return key;
160 	}
161 
162 	size_t Hash(ValueType* transaction) const;
163 	bool Compare(KeyType key, ValueType* transaction) const;
164 	ValueType*& GetLink(ValueType* value) const;
165 };
166 
167 typedef BOpenHashTable<TransactionHash> TransactionTable;
168 
169 
170 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
171 	BlockTable*		hash;
172 	mutex			lock;
173 	int				fd;
174 	off_t			max_blocks;
175 	size_t			block_size;
176 	int32			next_transaction_id;
177 	cache_transaction* last_transaction;
178 	TransactionTable* transaction_hash;
179 
180 	object_cache*	buffer_cache;
181 	block_list		unused_blocks;
182 	uint32			unused_block_count;
183 
184 	ConditionVariable busy_reading_condition;
185 	uint32			busy_reading_count;
186 	bool			busy_reading_waiters;
187 
188 	ConditionVariable busy_writing_condition;
189 	uint32			busy_writing_count;
190 	bool			busy_writing_waiters;
191 
192 	uint32			num_dirty_blocks;
193 	bool			read_only;
194 
195 	NotificationList pending_notifications;
196 	ConditionVariable condition_variable;
197 
198 					block_cache(int fd, off_t numBlocks, size_t blockSize,
199 						bool readOnly);
200 					~block_cache();
201 
202 	status_t		Init();
203 
204 	void			Free(void* buffer);
205 	void*			Allocate();
206 	void			FreeBlock(cached_block* block);
207 	cached_block*	NewBlock(off_t blockNumber);
208 	void			FreeBlockParentData(cached_block* block);
209 
210 	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
211 	void			RemoveBlock(cached_block* block);
212 	void			DiscardBlock(cached_block* block);
213 
214 private:
215 	static void		_LowMemoryHandler(void* data, uint32 resources,
216 						int32 level);
217 	cached_block*	_GetUnusedBlock();
218 };
219 
220 struct cache_listener;
221 typedef DoublyLinkedListLink<cache_listener> listener_link;
222 
223 struct cache_listener : cache_notification {
224 	listener_link	link;
225 };
226 
227 typedef DoublyLinkedList<cache_listener,
228 	DoublyLinkedListMemberGetLink<cache_listener,
229 		&cache_listener::link> > ListenerList;
230 
231 
232 struct cache_transaction {
233 	cache_transaction();
234 
235 	cache_transaction* next;
236 	int32			id;
237 	int32			num_blocks;
238 	int32			main_num_blocks;
239 	int32			sub_num_blocks;
240 	cached_block*	first_block;
241 	block_list		blocks;
242 	ListenerList	listeners;
243 	bool			open;
244 	bool			has_sub_transaction;
245 	bigtime_t		last_used;
246 	int32			busy_writing_count;
247 };
248 
249 
250 class BlockWriter {
251 public:
252 								BlockWriter(block_cache* cache,
253 									size_t max = SIZE_MAX);
254 								~BlockWriter();
255 
256 			bool				Add(cached_block* block,
257 									cache_transaction* transaction = NULL);
258 			bool				Add(cache_transaction* transaction,
259 									bool& hasLeftOvers);
260 
261 			status_t			Write(cache_transaction* transaction = NULL,
262 									bool canUnlock = true);
263 
264 			bool				DeletedTransaction() const
265 									{ return fDeletedTransaction; }
266 
267 	static	status_t			WriteBlock(block_cache* cache,
268 									cached_block* block);
269 
270 private:
271 			void*				_Data(cached_block* block) const;
272 			status_t			_WriteBlock(cached_block* block);
273 			void				_BlockDone(cached_block* block,
274 									cache_transaction* transaction);
275 			void				_UnmarkWriting(cached_block* block);
276 
277 	static	int					_CompareBlocks(const void* _blockA,
278 									const void* _blockB);
279 
280 private:
281 	static	const size_t		kBufferSize = 64;
282 
283 			block_cache*		fCache;
284 			cached_block*		fBuffer[kBufferSize];
285 			cached_block**		fBlocks;
286 			size_t				fCount;
287 			size_t				fTotal;
288 			size_t				fCapacity;
289 			size_t				fMax;
290 			status_t			fStatus;
291 			bool				fDeletedTransaction;
292 };
293 
294 
295 class TransactionLocking {
296 public:
297 	inline bool Lock(block_cache* cache)
298 	{
299 		mutex_lock(&cache->lock);
300 
301 		while (cache->busy_writing_count != 0) {
302 			// wait for all blocks to be written
303 			ConditionVariableEntry entry;
304 			cache->busy_writing_condition.Add(&entry);
305 			cache->busy_writing_waiters = true;
306 
307 			mutex_unlock(&cache->lock);
308 
309 			entry.Wait();
310 
311 			mutex_lock(&cache->lock);
312 		}
313 
314 		return true;
315 	}
316 
317 	inline void Unlock(block_cache* cache)
318 	{
319 		mutex_unlock(&cache->lock);
320 	}
321 };
322 
323 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
324 
325 
326 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
327 namespace BlockTracing {
328 
329 class Action : public AbstractTraceEntry {
330 public:
331 	Action(block_cache* cache, cached_block* block)
332 		:
333 		fCache(cache),
334 		fBlockNumber(block->block_number),
335 		fIsDirty(block->is_dirty),
336 		fHasOriginal(block->original_data != NULL),
337 		fHasParent(block->parent_data != NULL),
338 		fTransactionID(-1),
339 		fPreviousID(-1)
340 	{
341 		if (block->transaction != NULL)
342 			fTransactionID = block->transaction->id;
343 		if (block->previous_transaction != NULL)
344 			fPreviousID = block->previous_transaction->id;
345 	}
346 
347 	virtual void AddDump(TraceOutput& out)
348 	{
349 		out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
350 			" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
351 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
352 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
353 	}
354 
355 	virtual const char* _Action() const = 0;
356 
357 private:
358 	block_cache*		fCache;
359 	uint64				fBlockNumber;
360 	bool				fIsDirty;
361 	bool				fHasOriginal;
362 	bool				fHasParent;
363 	int32				fTransactionID;
364 	int32				fPreviousID;
365 };
366 
367 class Get : public Action {
368 public:
369 	Get(block_cache* cache, cached_block* block)
370 		:
371 		Action(cache, block)
372 	{
373 		Initialized();
374 	}
375 
376 	virtual const char* _Action() const { return "get"; }
377 };
378 
379 class Put : public Action {
380 public:
381 	Put(block_cache* cache, cached_block* block)
382 		:
383 		Action(cache, block)
384 	{
385 		Initialized();
386 	}
387 
388 	virtual const char* _Action() const { return "put"; }
389 };
390 
391 class Read : public Action {
392 public:
393 	Read(block_cache* cache, cached_block* block)
394 		:
395 		Action(cache, block)
396 	{
397 		Initialized();
398 	}
399 
400 	virtual const char* _Action() const { return "read"; }
401 };
402 
403 class Write : public Action {
404 public:
405 	Write(block_cache* cache, cached_block* block)
406 		:
407 		Action(cache, block)
408 	{
409 		Initialized();
410 	}
411 
412 	virtual const char* _Action() const { return "write"; }
413 };
414 
415 class Flush : public Action {
416 public:
417 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
418 		:
419 		Action(cache, block),
420 		fGetUnused(getUnused)
421 	{
422 		Initialized();
423 	}
424 
425 	virtual const char* _Action() const
426 		{ return fGetUnused ? "get-unused" : "flush"; }
427 
428 private:
429 	bool	fGetUnused;
430 };
431 
432 class Error : public AbstractTraceEntry {
433 public:
434 	Error(block_cache* cache, uint64 blockNumber, const char* message,
435 			status_t status = B_OK)
436 		:
437 		fCache(cache),
438 		fBlockNumber(blockNumber),
439 		fMessage(message),
440 		fStatus(status)
441 	{
442 		Initialized();
443 	}
444 
445 	virtual void AddDump(TraceOutput& out)
446 	{
447 		out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
448 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
449 			fStatus != B_OK ? strerror(fStatus) : "");
450 	}
451 
452 private:
453 	block_cache*	fCache;
454 	uint64			fBlockNumber;
455 	const char*		fMessage;
456 	status_t		fStatus;
457 };
458 
459 #if BLOCK_CACHE_BLOCK_TRACING >= 2
460 class BlockData : public AbstractTraceEntry {
461 public:
462 	enum {
463 		kCurrent	= 0x01,
464 		kParent		= 0x02,
465 		kOriginal	= 0x04
466 	};
467 
468 	BlockData(block_cache* cache, cached_block* block, const char* message)
469 		:
470 		fCache(cache),
471 		fSize(cache->block_size),
472 		fBlockNumber(block->block_number),
473 		fMessage(message)
474 	{
475 		_Allocate(fCurrent, block->current_data);
476 		_Allocate(fParent, block->parent_data);
477 		_Allocate(fOriginal, block->original_data);
478 
479 #if KTRACE_PRINTF_STACK_TRACE
480 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
481 			false);
482 #endif
483 
484 		Initialized();
485 	}
486 
487 	virtual void AddDump(TraceOutput& out)
488 	{
489 		out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
490 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
491 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
492 			fMessage);
493 	}
494 
495 #if KTRACE_PRINTF_STACK_TRACE
496 	virtual void DumpStackTrace(TraceOutput& out)
497 	{
498 		out.PrintStackTrace(fStackTrace);
499 	}
500 #endif
501 
502 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
503 	{
504 		if ((which & kCurrent) != 0)
505 			DumpBlock(kCurrent, offset, size);
506 		if ((which & kParent) != 0)
507 			DumpBlock(kParent, offset, size);
508 		if ((which & kOriginal) != 0)
509 			DumpBlock(kOriginal, offset, size);
510 	}
511 
512 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
513 	{
514 		if (offset > fSize) {
515 			kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
516 			return;
517 		}
518 		if (offset + size > fSize)
519 			size = fSize - offset;
520 
521 		const char* label;
522 		uint8* data;
523 
524 		if ((which & kCurrent) != 0) {
525 			label = "current";
526 			data = fCurrent;
527 		} else if ((which & kParent) != 0) {
528 			label = "parent";
529 			data = fParent;
530 		} else if ((which & kOriginal) != 0) {
531 			label = "original";
532 			data = fOriginal;
533 		} else
534 			return;
535 
536 		kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
537 
538 		static const uint32 kBlockSize = 16;
539 		data += offset;
540 
541 		for (uint32 i = 0; i < size;) {
542 			int start = i;
543 
544 			kprintf("  %04" B_PRIx32 " ", i);
545 			for (; i < start + kBlockSize; i++) {
546 				if (!(i % 4))
547 					kprintf(" ");
548 
549 				if (i >= size)
550 					kprintf("  ");
551 				else
552 					kprintf("%02x", data[i]);
553 			}
554 
555 			kprintf("\n");
556 		}
557 	}
558 
559 private:
560 	void _Allocate(uint8*& target, void* source)
561 	{
562 		if (source == NULL) {
563 			target = NULL;
564 			return;
565 		}
566 
567 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
568 	}
569 
570 	block_cache*	fCache;
571 	uint32			fSize;
572 	uint64			fBlockNumber;
573 	const char*		fMessage;
574 	uint8*			fCurrent;
575 	uint8*			fParent;
576 	uint8*			fOriginal;
577 #if KTRACE_PRINTF_STACK_TRACE
578 	tracing_stack_trace* fStackTrace;
579 #endif
580 };
581 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
582 
583 }	// namespace BlockTracing
584 
585 #	define TB(x) new(std::nothrow) BlockTracing::x;
586 #else
587 #	define TB(x) ;
588 #endif
589 
590 #if BLOCK_CACHE_BLOCK_TRACING >= 2
591 #	define TB2(x) new(std::nothrow) BlockTracing::x;
592 #else
593 #	define TB2(x) ;
594 #endif
595 
596 
597 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
598 namespace TransactionTracing {
599 
600 class Action : public AbstractTraceEntry {
601 public:
602 	Action(const char* label, block_cache* cache,
603 			cache_transaction* transaction)
604 		:
605 		fCache(cache),
606 		fTransaction(transaction),
607 		fID(transaction->id),
608 		fSub(transaction->has_sub_transaction),
609 		fNumBlocks(transaction->num_blocks),
610 		fSubNumBlocks(transaction->sub_num_blocks)
611 	{
612 		strlcpy(fLabel, label, sizeof(fLabel));
613 		Initialized();
614 	}
615 
616 	virtual void AddDump(TraceOutput& out)
617 	{
618 		out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
619 			", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
620 			fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
621 	}
622 
623 private:
624 	char				fLabel[12];
625 	block_cache*		fCache;
626 	cache_transaction*	fTransaction;
627 	int32				fID;
628 	bool				fSub;
629 	int32				fNumBlocks;
630 	int32				fSubNumBlocks;
631 };
632 
633 class Detach : public AbstractTraceEntry {
634 public:
635 	Detach(block_cache* cache, cache_transaction* transaction,
636 			cache_transaction* newTransaction)
637 		:
638 		fCache(cache),
639 		fTransaction(transaction),
640 		fID(transaction->id),
641 		fSub(transaction->has_sub_transaction),
642 		fNewTransaction(newTransaction),
643 		fNewID(newTransaction->id)
644 	{
645 		Initialized();
646 	}
647 
648 	virtual void AddDump(TraceOutput& out)
649 	{
650 		out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
651 			"from transaction %p (id %" B_PRId32 ")%s",
652 			fCache, fNewTransaction, fNewID, fTransaction, fID,
653 			fSub ? " sub" : "");
654 	}
655 
656 private:
657 	block_cache*		fCache;
658 	cache_transaction*	fTransaction;
659 	int32				fID;
660 	bool				fSub;
661 	cache_transaction*	fNewTransaction;
662 	int32				fNewID;
663 };
664 
665 class Abort : public AbstractTraceEntry {
666 public:
667 	Abort(block_cache* cache, cache_transaction* transaction)
668 		:
669 		fCache(cache),
670 		fTransaction(transaction),
671 		fID(transaction->id),
672 		fNumBlocks(0)
673 	{
674 		bool isSub = transaction->has_sub_transaction;
675 		fNumBlocks = isSub ? transaction->sub_num_blocks
676 			: transaction->num_blocks;
677 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
678 		if (fBlocks != NULL) {
679 			cached_block* block = transaction->first_block;
680 			for (int32 i = 0; block != NULL && i < fNumBlocks;
681 					block = block->transaction_next) {
682 				fBlocks[i++] = block->block_number;
683 			}
684 		} else
685 			fNumBlocks = 0;
686 
687 #if KTRACE_PRINTF_STACK_TRACE
688 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
689 			false);
690 #endif
691 
692 		Initialized();
693 	}
694 
695 	virtual void AddDump(TraceOutput& out)
696 	{
697 		out.Print("block cache %p, abort transaction "
698 			"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
699 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
700 			out.Print(" %" B_PRIdOFF, fBlocks[i]);
701 	}
702 
703 #if KTRACE_PRINTF_STACK_TRACE
704 	virtual void DumpStackTrace(TraceOutput& out)
705 	{
706 		out.PrintStackTrace(fStackTrace);
707 	}
708 #endif
709 
710 private:
711 	block_cache*		fCache;
712 	cache_transaction*	fTransaction;
713 	int32				fID;
714 	off_t*				fBlocks;
715 	int32				fNumBlocks;
716 #if KTRACE_PRINTF_STACK_TRACE
717 	tracing_stack_trace* fStackTrace;
718 #endif
719 };
720 
721 }	// namespace TransactionTracing
722 
723 #	define T(x) new(std::nothrow) TransactionTracing::x;
724 #else
725 #	define T(x) ;
726 #endif
727 
728 
729 static DoublyLinkedList<block_cache> sCaches;
730 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
731 static mutex sCachesMemoryUseLock
732 	= MUTEX_INITIALIZER("block caches memory use");
733 static size_t sUsedMemory;
734 static sem_id sEventSemaphore;
735 static mutex sNotificationsLock
736 	= MUTEX_INITIALIZER("block cache notifications");
737 static thread_id sNotifierWriterThread;
738 static DoublyLinkedListLink<block_cache> sMarkCache;
739 	// TODO: this only works if the link is the first entry of block_cache
740 static object_cache* sBlockCache;
741 
742 
743 //	#pragma mark - notifications/listener
744 
745 
746 /*!	Checks whether or not this is an event that closes a transaction. */
747 static inline bool
748 is_closing_event(int32 event)
749 {
750 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
751 }
752 
753 
754 static inline bool
755 is_written_event(int32 event)
756 {
757 	return (event & TRANSACTION_WRITTEN) != 0;
758 }
759 
760 
761 /*!	From the specified \a notification, it will remove the lowest pending
762 	event, and return that one in \a _event.
763 	If there is no pending event anymore, it will return \c false.
764 */
765 static bool
766 get_next_pending_event(cache_notification* notification, int32* _event)
767 {
768 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
769 		int32 pending = atomic_and(&notification->events_pending,
770 			~eventMask);
771 
772 		bool more = (pending & ~eventMask) != 0;
773 
774 		if ((pending & eventMask) != 0) {
775 			*_event = eventMask;
776 			return more;
777 		}
778 	}
779 
780 	return false;
781 }
782 
783 
784 static void
785 flush_pending_notifications(block_cache* cache)
786 {
787 	ASSERT_LOCKED_MUTEX(&sCachesLock);
788 
789 	while (true) {
790 		MutexLocker locker(sNotificationsLock);
791 
792 		cache_notification* notification = cache->pending_notifications.Head();
793 		if (notification == NULL)
794 			return;
795 
796 		bool deleteAfterEvent = false;
797 		int32 event = -1;
798 		if (!get_next_pending_event(notification, &event)) {
799 			// remove the notification if this was the last pending event
800 			cache->pending_notifications.Remove(notification);
801 			deleteAfterEvent = notification->delete_after_event;
802 		}
803 
804 		if (event >= 0) {
805 			// Notify listener, we need to copy the notification, as it might
806 			// be removed when we unlock the list.
807 			cache_notification copy = *notification;
808 			locker.Unlock();
809 
810 			copy.hook(copy.transaction_id, event, copy.data);
811 
812 			locker.Lock();
813 		}
814 
815 		if (deleteAfterEvent)
816 			delete notification;
817 	}
818 }
819 
820 
821 /*!	Flushes all pending notifications by calling the appropriate hook
822 	functions.
823 	Must not be called with a cache lock held.
824 */
825 static void
826 flush_pending_notifications()
827 {
828 	MutexLocker _(sCachesLock);
829 
830 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
831 	while (iterator.HasNext()) {
832 		block_cache* cache = iterator.Next();
833 
834 		flush_pending_notifications(cache);
835 	}
836 }
837 
838 
839 /*!	Initializes the \a notification as specified. */
840 static void
841 set_notification(cache_transaction* transaction,
842 	cache_notification &notification, int32 events,
843 	transaction_notification_hook hook, void* data)
844 {
845 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
846 	notification.events_pending = 0;
847 	notification.events = events;
848 	notification.hook = hook;
849 	notification.data = data;
850 	notification.delete_after_event = false;
851 }
852 
853 
854 /*!	Makes sure the notification is deleted. It either deletes it directly,
855 	when possible, or marks it for deletion if the notification is pending.
856 */
857 static void
858 delete_notification(cache_notification* notification)
859 {
860 	MutexLocker locker(sNotificationsLock);
861 
862 	if (notification->events_pending != 0)
863 		notification->delete_after_event = true;
864 	else
865 		delete notification;
866 }
867 
868 
869 /*!	Adds the notification to the pending notifications list, or, if it's
870 	already part of it, updates its events_pending field.
871 	Also marks the notification to be deleted if \a deleteNotification
872 	is \c true.
873 	Triggers the notifier thread to run.
874 */
875 static void
876 add_notification(block_cache* cache, cache_notification* notification,
877 	int32 event, bool deleteNotification)
878 {
879 	if (notification->hook == NULL)
880 		return;
881 
882 	int32 pending = atomic_or(&notification->events_pending, event);
883 	if (pending == 0) {
884 		// not yet part of the notification list
885 		MutexLocker locker(sNotificationsLock);
886 		if (deleteNotification)
887 			notification->delete_after_event = true;
888 		cache->pending_notifications.Add(notification);
889 	} else if (deleteNotification) {
890 		// we might need to delete it ourselves if we're late
891 		delete_notification(notification);
892 	}
893 
894 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
895 		// We're probably still holding some locks that makes rescheduling
896 		// not a good idea at this point.
897 }
898 
899 
900 /*!	Notifies all interested listeners of this transaction about the \a event.
901 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
902 	TRANSACTION_ABORTED), all listeners except those listening to
903 	TRANSACTION_WRITTEN will be removed.
904 */
905 static void
906 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
907 	int32 event)
908 {
909 	T(Action("notify", cache, transaction));
910 
911 	bool isClosing = is_closing_event(event);
912 	bool isWritten = is_written_event(event);
913 
914 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
915 	while (iterator.HasNext()) {
916 		cache_listener* listener = iterator.Next();
917 
918 		bool remove = (isClosing && !is_written_event(listener->events))
919 			|| (isWritten && is_written_event(listener->events));
920 		if (remove)
921 			iterator.Remove();
922 
923 		if ((listener->events & event) != 0)
924 			add_notification(cache, listener, event, remove);
925 		else if (remove)
926 			delete_notification(listener);
927 	}
928 }
929 
930 
931 /*!	Removes and deletes all listeners that are still monitoring this
932 	transaction.
933 */
934 static void
935 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
936 {
937 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
938 	while (iterator.HasNext()) {
939 		cache_listener* listener = iterator.Next();
940 		iterator.Remove();
941 
942 		delete_notification(listener);
943 	}
944 }
945 
946 
947 static status_t
948 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
949 	int32 events, transaction_notification_hook hookFunction, void* data)
950 {
951 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
952 	while (iterator.HasNext()) {
953 		cache_listener* listener = iterator.Next();
954 
955 		if (listener->data == data && listener->hook == hookFunction) {
956 			// this listener already exists, just update it
957 			listener->events |= events;
958 			return B_OK;
959 		}
960 	}
961 
962 	cache_listener* listener = new(std::nothrow) cache_listener;
963 	if (listener == NULL)
964 		return B_NO_MEMORY;
965 
966 	set_notification(transaction, *listener, events, hookFunction, data);
967 	transaction->listeners.Add(listener);
968 	return B_OK;
969 }
970 
971 
972 //	#pragma mark - private transaction
973 
974 
975 cache_transaction::cache_transaction()
976 {
977 	num_blocks = 0;
978 	main_num_blocks = 0;
979 	sub_num_blocks = 0;
980 	first_block = NULL;
981 	open = true;
982 	has_sub_transaction = false;
983 	last_used = system_time();
984 	busy_writing_count = 0;
985 }
986 
987 
988 static void
989 delete_transaction(block_cache* cache, cache_transaction* transaction)
990 {
991 	if (cache->last_transaction == transaction)
992 		cache->last_transaction = NULL;
993 
994 	remove_transaction_listeners(cache, transaction);
995 	delete transaction;
996 }
997 
998 
999 static cache_transaction*
1000 lookup_transaction(block_cache* cache, int32 id)
1001 {
1002 	return cache->transaction_hash->Lookup(id);
1003 }
1004 
1005 
1006 size_t TransactionHash::Hash(cache_transaction* transaction) const
1007 {
1008 	return transaction->id;
1009 }
1010 
1011 
1012 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
1013 {
1014 	return transaction->id == key;
1015 }
1016 
1017 
1018 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
1019 {
1020 	return value->next;
1021 }
1022 
1023 
1024 /*!	Writes back any changes made to blocks in \a transaction that are still
1025 	part of a previous transacton.
1026 */
1027 static status_t
1028 write_blocks_in_previous_transaction(block_cache* cache,
1029 	cache_transaction* transaction)
1030 {
1031 	BlockWriter writer(cache);
1032 
1033 	cached_block* block = transaction->first_block;
1034 	for (; block != NULL; block = block->transaction_next) {
1035 		if (block->previous_transaction != NULL) {
1036 			// need to write back pending changes
1037 			writer.Add(block);
1038 		}
1039 	}
1040 
1041 	return writer.Write();
1042 }
1043 
1044 
1045 //	#pragma mark - cached_block
1046 
1047 
1048 bool
1049 cached_block::CanBeWritten() const
1050 {
1051 	return !busy_writing && !busy_reading
1052 		&& (previous_transaction != NULL
1053 			|| (transaction == NULL && is_dirty && !is_writing));
1054 }
1055 
1056 
1057 //	#pragma mark - BlockWriter
1058 
1059 
1060 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1061 	:
1062 	fCache(cache),
1063 	fBlocks(fBuffer),
1064 	fCount(0),
1065 	fTotal(0),
1066 	fCapacity(kBufferSize),
1067 	fMax(max),
1068 	fStatus(B_OK),
1069 	fDeletedTransaction(false)
1070 {
1071 }
1072 
1073 
1074 BlockWriter::~BlockWriter()
1075 {
1076 	if (fBlocks != fBuffer)
1077 		free(fBlocks);
1078 }
1079 
1080 
1081 /*!	Adds the specified block to the to be written array. If no more blocks can
1082 	be added, false is returned, otherwise true.
1083 */
1084 bool
1085 BlockWriter::Add(cached_block* block, cache_transaction* transaction)
1086 {
1087 	ASSERT(block->CanBeWritten());
1088 
1089 	if (fTotal == fMax)
1090 		return false;
1091 
1092 	if (fCount >= fCapacity) {
1093 		// Enlarge array if necessary
1094 		cached_block** newBlocks;
1095 		size_t newCapacity = max_c(256, fCapacity * 2);
1096 		if (fBlocks == fBuffer)
1097 			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1098 		else {
1099 			newBlocks = (cached_block**)realloc(fBlocks,
1100 				newCapacity * sizeof(void*));
1101 		}
1102 
1103 		if (newBlocks == NULL) {
1104 			// Allocating a larger array failed - we need to write back what
1105 			// we have synchronously now (this will also clear the array)
1106 			Write(transaction, false);
1107 		} else {
1108 			if (fBlocks == fBuffer)
1109 				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1110 
1111 			fBlocks = newBlocks;
1112 			fCapacity = newCapacity;
1113 		}
1114 	}
1115 
1116 	fBlocks[fCount++] = block;
1117 	fTotal++;
1118 	block->busy_writing = true;
1119 	fCache->busy_writing_count++;
1120 	if (block->previous_transaction != NULL)
1121 		block->previous_transaction->busy_writing_count++;
1122 
1123 	return true;
1124 }
1125 
1126 
1127 /*!	Adds all blocks of the specified transaction to the to be written array.
1128 	If no more blocks can be added, false is returned, otherwise true.
1129 */
1130 bool
1131 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
1132 {
1133 	ASSERT(!transaction->open);
1134 
1135 	if (transaction->busy_writing_count != 0) {
1136 		hasLeftOvers = true;
1137 		return true;
1138 	}
1139 
1140 	hasLeftOvers = false;
1141 
1142 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1143 	while (cached_block* block = blockIterator.Next()) {
1144 		if (!block->CanBeWritten()) {
1145 			// This block was already part of a previous transaction within this
1146 			// writer
1147 			hasLeftOvers = true;
1148 			continue;
1149 		}
1150 		if (!Add(block, transaction))
1151 			return false;
1152 
1153 		if (DeletedTransaction())
1154 			break;
1155 	}
1156 
1157 	return true;
1158 }
1159 
1160 
1161 /*! Cache must be locked when calling this method, but it will be unlocked
1162 	while the blocks are written back.
1163 */
1164 status_t
1165 BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
1166 {
1167 	if (fCount == 0)
1168 		return B_OK;
1169 
1170 	if (canUnlock)
1171 		mutex_unlock(&fCache->lock);
1172 
1173 	// Sort blocks in their on-disk order
1174 	// TODO: ideally, this should be handled by the I/O scheduler
1175 
1176 	qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1177 	fDeletedTransaction = false;
1178 
1179 	for (uint32 i = 0; i < fCount; i++) {
1180 		status_t status = _WriteBlock(fBlocks[i]);
1181 		if (status != B_OK) {
1182 			// propagate to global error handling
1183 			if (fStatus == B_OK)
1184 				fStatus = status;
1185 
1186 			_UnmarkWriting(fBlocks[i]);
1187 			fBlocks[i] = NULL;
1188 				// This block will not be marked clean
1189 		}
1190 	}
1191 
1192 	if (canUnlock)
1193 		mutex_lock(&fCache->lock);
1194 
1195 	for (uint32 i = 0; i < fCount; i++)
1196 		_BlockDone(fBlocks[i], transaction);
1197 
1198 	fCount = 0;
1199 	return fStatus;
1200 }
1201 
1202 
1203 /*!	Writes the specified \a block back to disk. It will always only write back
1204 	the oldest change of the block if it is part of more than one transaction.
1205 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1206 	delete transactions when they are no longer used, and \a deleteTransaction
1207 	is \c true.
1208 */
1209 /*static*/ status_t
1210 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1211 {
1212 	BlockWriter writer(cache);
1213 
1214 	writer.Add(block);
1215 	return writer.Write();
1216 }
1217 
1218 
1219 void*
1220 BlockWriter::_Data(cached_block* block) const
1221 {
1222 	return block->previous_transaction != NULL && block->original_data != NULL
1223 		? block->original_data : block->current_data;
1224 		// We first need to write back changes from previous transactions
1225 }
1226 
1227 
1228 status_t
1229 BlockWriter::_WriteBlock(cached_block* block)
1230 {
1231 	ASSERT(block->busy_writing);
1232 
1233 	TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
1234 	TB(Write(fCache, block));
1235 	TB2(BlockData(fCache, block, "before write"));
1236 
1237 	size_t blockSize = fCache->block_size;
1238 
1239 	ssize_t written = write_pos(fCache->fd,
1240 		block->block_number * blockSize, _Data(block), blockSize);
1241 
1242 	if (written != (ssize_t)blockSize) {
1243 		TB(Error(fCache, block->block_number, "write failed", written));
1244 		TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number,
1245 			strerror(errno)));
1246 		if (written < 0)
1247 			return errno;
1248 
1249 		return B_IO_ERROR;
1250 	}
1251 
1252 	return B_OK;
1253 }
1254 
1255 
1256 void
1257 BlockWriter::_BlockDone(cached_block* block,
1258 	cache_transaction* transaction)
1259 {
1260 	if (block == NULL) {
1261 		// An error occured when trying to write this block
1262 		return;
1263 	}
1264 
1265 	if (fCache->num_dirty_blocks > 0)
1266 		fCache->num_dirty_blocks--;
1267 
1268 	if (_Data(block) == block->current_data)
1269 		block->is_dirty = false;
1270 
1271 	_UnmarkWriting(block);
1272 
1273 	cache_transaction* previous = block->previous_transaction;
1274 	if (previous != NULL) {
1275 		previous->blocks.Remove(block);
1276 		block->previous_transaction = NULL;
1277 
1278 		if (block->original_data != NULL && block->transaction == NULL) {
1279 			// This block is not part of a transaction, so it does not need
1280 			// its original pointer anymore.
1281 			fCache->Free(block->original_data);
1282 			block->original_data = NULL;
1283 		}
1284 
1285 		// Has the previous transaction been finished with that write?
1286 		if (--previous->num_blocks == 0) {
1287 			TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1288 			T(Action("written", fCache, previous));
1289 
1290 			notify_transaction_listeners(fCache, previous,
1291 				TRANSACTION_WRITTEN);
1292 
1293 			if (transaction != NULL) {
1294 				// This function is called while iterating transaction_hash. We
1295 				// use RemoveUnchecked so the iterator is still valid. A regular
1296 				// Remove can trigger a resize of the hash table which would
1297 				// result in the linked items in the table changing order.
1298 				fCache->transaction_hash->RemoveUnchecked(transaction);
1299 			} else
1300 				fCache->transaction_hash->Remove(previous);
1301 
1302 			delete_transaction(fCache, previous);
1303 			fDeletedTransaction = true;
1304 		}
1305 	}
1306 	if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1307 		// the block is no longer used
1308 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1309 		block->unused = true;
1310 		fCache->unused_blocks.Add(block);
1311 		fCache->unused_block_count++;
1312 	}
1313 
1314 	TB2(BlockData(fCache, block, "after write"));
1315 }
1316 
1317 
1318 void
1319 BlockWriter::_UnmarkWriting(cached_block* block)
1320 {
1321 	block->busy_writing = false;
1322 	if (block->previous_transaction != NULL)
1323 		block->previous_transaction->busy_writing_count--;
1324 	fCache->busy_writing_count--;
1325 
1326 	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1327 		|| block->busy_writing_waiters) {
1328 		fCache->busy_writing_waiters = false;
1329 		block->busy_writing_waiters = false;
1330 		fCache->busy_writing_condition.NotifyAll();
1331 	}
1332 }
1333 
1334 
1335 /*static*/ int
1336 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1337 {
1338 	cached_block* blockA = *(cached_block**)_blockA;
1339 	cached_block* blockB = *(cached_block**)_blockB;
1340 
1341 	off_t diff = blockA->block_number - blockB->block_number;
1342 	if (diff > 0)
1343 		return 1;
1344 
1345 	return diff < 0 ? -1 : 0;
1346 }
1347 
1348 
1349 //	#pragma mark - block_cache
1350 
1351 
1352 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1353 		bool readOnly)
1354 	:
1355 	hash(NULL),
1356 	fd(_fd),
1357 	max_blocks(numBlocks),
1358 	block_size(blockSize),
1359 	next_transaction_id(1),
1360 	last_transaction(NULL),
1361 	transaction_hash(NULL),
1362 	buffer_cache(NULL),
1363 	unused_block_count(0),
1364 	busy_reading_count(0),
1365 	busy_reading_waiters(false),
1366 	busy_writing_count(0),
1367 	busy_writing_waiters(0),
1368 	num_dirty_blocks(0),
1369 	read_only(readOnly)
1370 {
1371 }
1372 
1373 
1374 /*! Should be called with the cache's lock held. */
1375 block_cache::~block_cache()
1376 {
1377 	unregister_low_resource_handler(&_LowMemoryHandler, this);
1378 
1379 	delete transaction_hash;
1380 	delete hash;
1381 
1382 	delete_object_cache(buffer_cache);
1383 
1384 	mutex_destroy(&lock);
1385 }
1386 
1387 
1388 status_t
1389 block_cache::Init()
1390 {
1391 	busy_reading_condition.Init(this, "cache block busy_reading");
1392 	busy_writing_condition.Init(this, "cache block busy writing");
1393 	condition_variable.Init(this, "cache transaction sync");
1394 	mutex_init(&lock, "block cache");
1395 
1396 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1397 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1398 	if (buffer_cache == NULL)
1399 		return B_NO_MEMORY;
1400 
1401 	hash = new BlockTable();
1402 	if (hash == NULL || hash->Init(1024) != B_OK)
1403 		return B_NO_MEMORY;
1404 
1405 	transaction_hash = new(std::nothrow) TransactionTable();
1406 	if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
1407 		return B_NO_MEMORY;
1408 
1409 	return register_low_resource_handler(&_LowMemoryHandler, this,
1410 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1411 			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1412 }
1413 
1414 
1415 void
1416 block_cache::Free(void* buffer)
1417 {
1418 	if (buffer != NULL)
1419 		object_cache_free(buffer_cache, buffer, 0);
1420 }
1421 
1422 
1423 void*
1424 block_cache::Allocate()
1425 {
1426 	void* block = object_cache_alloc(buffer_cache, 0);
1427 	if (block != NULL)
1428 		return block;
1429 
1430 	// recycle existing before allocating a new one
1431 	RemoveUnusedBlocks(100);
1432 
1433 	return object_cache_alloc(buffer_cache, 0);
1434 }
1435 
1436 
1437 void
1438 block_cache::FreeBlock(cached_block* block)
1439 {
1440 	Free(block->current_data);
1441 
1442 	if (block->original_data != NULL || block->parent_data != NULL) {
1443 		panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1444 			block->block_number, block->original_data, block->parent_data);
1445 	}
1446 
1447 #if BLOCK_CACHE_DEBUG_CHANGED
1448 	Free(block->compare);
1449 #endif
1450 
1451 	object_cache_free(sBlockCache, block, 0);
1452 }
1453 
1454 
1455 /*! Allocates a new block for \a blockNumber, ready for use */
1456 cached_block*
1457 block_cache::NewBlock(off_t blockNumber)
1458 {
1459 	cached_block* block = NULL;
1460 
1461 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1462 			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1463 		// recycle existing instead of allocating a new one
1464 		block = _GetUnusedBlock();
1465 	}
1466 	if (block == NULL) {
1467 		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1468 		if (block != NULL) {
1469 			block->current_data = Allocate();
1470 			if (block->current_data == NULL) {
1471 				object_cache_free(sBlockCache, block, 0);
1472 				return NULL;
1473 			}
1474 		} else {
1475 			TB(Error(this, blockNumber, "allocation failed"));
1476 			dprintf("block allocation failed, unused list is %sempty.\n",
1477 				unused_blocks.IsEmpty() ? "" : "not ");
1478 
1479 			// allocation failed, try to reuse an unused block
1480 			block = _GetUnusedBlock();
1481 			if (block == NULL) {
1482 				TB(Error(this, blockNumber, "get unused failed"));
1483 				FATAL(("could not allocate block!\n"));
1484 				return NULL;
1485 			}
1486 		}
1487 	}
1488 
1489 	block->block_number = blockNumber;
1490 	block->ref_count = 0;
1491 	block->last_accessed = 0;
1492 	block->transaction_next = NULL;
1493 	block->transaction = block->previous_transaction = NULL;
1494 	block->original_data = NULL;
1495 	block->parent_data = NULL;
1496 	block->busy_reading = false;
1497 	block->busy_writing = false;
1498 	block->is_writing = false;
1499 	block->is_dirty = false;
1500 	block->unused = false;
1501 	block->discard = false;
1502 	block->busy_reading_waiters = false;
1503 	block->busy_writing_waiters = false;
1504 #if BLOCK_CACHE_DEBUG_CHANGED
1505 	block->compare = NULL;
1506 #endif
1507 
1508 	return block;
1509 }
1510 
1511 
1512 void
1513 block_cache::FreeBlockParentData(cached_block* block)
1514 {
1515 	ASSERT(block->parent_data != NULL);
1516 	if (block->parent_data != block->current_data)
1517 		Free(block->parent_data);
1518 	block->parent_data = NULL;
1519 }
1520 
1521 
1522 void
1523 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1524 {
1525 	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1526 
1527 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1528 			cached_block* block = iterator.Next();) {
1529 		if (minSecondsOld >= block->LastAccess()) {
1530 			// The list is sorted by last access
1531 			break;
1532 		}
1533 		if (block->busy_reading || block->busy_writing)
1534 			continue;
1535 
1536 		TB(Flush(this, block));
1537 		TRACE(("  remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1538 			block->block_number, block->last_accessed));
1539 
1540 		// this can only happen if no transactions are used
1541 		if (block->is_dirty && !block->discard) {
1542 			if (block->busy_writing)
1543 				continue;
1544 
1545 			BlockWriter::WriteBlock(this, block);
1546 		}
1547 
1548 		// remove block from lists
1549 		iterator.Remove();
1550 		unused_block_count--;
1551 		RemoveBlock(block);
1552 
1553 		if (--count <= 0)
1554 			break;
1555 	}
1556 }
1557 
1558 
1559 void
1560 block_cache::RemoveBlock(cached_block* block)
1561 {
1562 	hash->Remove(block);
1563 	FreeBlock(block);
1564 }
1565 
1566 
1567 /*!	Discards the block from a transaction (this method must not be called
1568 	for blocks not part of a transaction).
1569 */
1570 void
1571 block_cache::DiscardBlock(cached_block* block)
1572 {
1573 	ASSERT(block->discard);
1574 	ASSERT(block->previous_transaction == NULL);
1575 
1576 	if (block->parent_data != NULL)
1577 		FreeBlockParentData(block);
1578 
1579 	if (block->original_data != NULL) {
1580 		Free(block->original_data);
1581 		block->original_data = NULL;
1582 	}
1583 
1584 	RemoveBlock(block);
1585 }
1586 
1587 
1588 void
1589 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1590 {
1591 	TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1592 
1593 	// free some blocks according to the low memory state
1594 	// (if there is enough memory left, we don't free any)
1595 
1596 	block_cache* cache = (block_cache*)data;
1597 	int32 free = 0;
1598 	int32 secondsOld = 0;
1599 	switch (level) {
1600 		case B_NO_LOW_RESOURCE:
1601 			return;
1602 		case B_LOW_RESOURCE_NOTE:
1603 			free = cache->unused_block_count / 8;
1604 			secondsOld = 120;
1605 			break;
1606 		case B_LOW_RESOURCE_WARNING:
1607 			free = cache->unused_block_count / 4;
1608 			secondsOld = 10;
1609 			break;
1610 		case B_LOW_RESOURCE_CRITICAL:
1611 			free = cache->unused_block_count / 2;
1612 			secondsOld = 0;
1613 			break;
1614 	}
1615 
1616 	MutexLocker locker(&cache->lock);
1617 
1618 	if (!locker.IsLocked()) {
1619 		// If our block_cache were deleted, it could be that we had
1620 		// been called before that deletion went through, therefore,
1621 		// acquiring its lock might fail.
1622 		return;
1623 	}
1624 
1625 #ifdef TRACE_BLOCK_CACHE
1626 	uint32 oldUnused = cache->unused_block_count;
1627 #endif
1628 
1629 	cache->RemoveUnusedBlocks(free, secondsOld);
1630 
1631 	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1632 		cache, oldUnused, cache->unused_block_count));
1633 }
1634 
1635 
1636 cached_block*
1637 block_cache::_GetUnusedBlock()
1638 {
1639 	TRACE(("block_cache: get unused block\n"));
1640 
1641 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1642 			cached_block* block = iterator.Next();) {
1643 		TB(Flush(this, block, true));
1644 		// this can only happen if no transactions are used
1645 		if (block->is_dirty && !block->busy_writing && !block->discard)
1646 			BlockWriter::WriteBlock(this, block);
1647 
1648 		// remove block from lists
1649 		iterator.Remove();
1650 		unused_block_count--;
1651 		hash->Remove(block);
1652 
1653 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1654 		block->unused = false;
1655 
1656 		// TODO: see if compare data is handled correctly here!
1657 #if BLOCK_CACHE_DEBUG_CHANGED
1658 		if (block->compare != NULL)
1659 			Free(block->compare);
1660 #endif
1661 		return block;
1662 	}
1663 
1664 	return NULL;
1665 }
1666 
1667 
1668 //	#pragma mark - private block functions
1669 
1670 
1671 /*!	Cache must be locked.
1672 */
1673 static void
1674 mark_block_busy_reading(block_cache* cache, cached_block* block)
1675 {
1676 	block->busy_reading = true;
1677 	cache->busy_reading_count++;
1678 }
1679 
1680 
1681 /*!	Cache must be locked.
1682 */
1683 static void
1684 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1685 {
1686 	block->busy_reading = false;
1687 	cache->busy_reading_count--;
1688 
1689 	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1690 		|| block->busy_reading_waiters) {
1691 		cache->busy_reading_waiters = false;
1692 		block->busy_reading_waiters = false;
1693 		cache->busy_reading_condition.NotifyAll();
1694 	}
1695 }
1696 
1697 
1698 /*!	Cache must be locked.
1699 */
1700 static void
1701 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1702 {
1703 	while (block->busy_reading) {
1704 		// wait for at least the specified block to be read in
1705 		ConditionVariableEntry entry;
1706 		cache->busy_reading_condition.Add(&entry);
1707 		block->busy_reading_waiters = true;
1708 
1709 		mutex_unlock(&cache->lock);
1710 
1711 		entry.Wait();
1712 
1713 		mutex_lock(&cache->lock);
1714 	}
1715 }
1716 
1717 
1718 /*!	Cache must be locked.
1719 */
1720 static void
1721 wait_for_busy_reading_blocks(block_cache* cache)
1722 {
1723 	while (cache->busy_reading_count != 0) {
1724 		// wait for all blocks to be read in
1725 		ConditionVariableEntry entry;
1726 		cache->busy_reading_condition.Add(&entry);
1727 		cache->busy_reading_waiters = true;
1728 
1729 		mutex_unlock(&cache->lock);
1730 
1731 		entry.Wait();
1732 
1733 		mutex_lock(&cache->lock);
1734 	}
1735 }
1736 
1737 
1738 /*!	Cache must be locked.
1739 */
1740 static void
1741 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1742 {
1743 	while (block->busy_writing) {
1744 		// wait for all blocks to be written back
1745 		ConditionVariableEntry entry;
1746 		cache->busy_writing_condition.Add(&entry);
1747 		block->busy_writing_waiters = true;
1748 
1749 		mutex_unlock(&cache->lock);
1750 
1751 		entry.Wait();
1752 
1753 		mutex_lock(&cache->lock);
1754 	}
1755 }
1756 
1757 
1758 /*!	Cache must be locked.
1759 */
1760 static void
1761 wait_for_busy_writing_blocks(block_cache* cache)
1762 {
1763 	while (cache->busy_writing_count != 0) {
1764 		// wait for all blocks to be written back
1765 		ConditionVariableEntry entry;
1766 		cache->busy_writing_condition.Add(&entry);
1767 		cache->busy_writing_waiters = true;
1768 
1769 		mutex_unlock(&cache->lock);
1770 
1771 		entry.Wait();
1772 
1773 		mutex_lock(&cache->lock);
1774 	}
1775 }
1776 
1777 
1778 /*!	Removes a reference from the specified \a block. If this was the last
1779 	reference, the block is moved into the unused list.
1780 	In low memory situations, it will also free some blocks from that list,
1781 	but not necessarily the \a block it just released.
1782 */
1783 static void
1784 put_cached_block(block_cache* cache, cached_block* block)
1785 {
1786 #if BLOCK_CACHE_DEBUG_CHANGED
1787 	if (!block->is_dirty && block->compare != NULL
1788 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1789 		dprintf("new block:\n");
1790 		dump_block((const char*)block->current_data, 256, "  ");
1791 		dprintf("unchanged block:\n");
1792 		dump_block((const char*)block->compare, 256, "  ");
1793 		BlockWriter::WriteBlock(cache, block);
1794 		panic("block_cache: supposed to be clean block was changed!\n");
1795 
1796 		cache->Free(block->compare);
1797 		block->compare = NULL;
1798 	}
1799 #endif
1800 	TB(Put(cache, block));
1801 
1802 	if (block->ref_count < 1) {
1803 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1804 		return;
1805 	}
1806 
1807 	if (--block->ref_count == 0
1808 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1809 		// This block is not used anymore, and not part of any transaction
1810 		block->is_writing = false;
1811 
1812 		if (block->discard) {
1813 			cache->RemoveBlock(block);
1814 		} else {
1815 			// put this block in the list of unused blocks
1816 			ASSERT(!block->unused);
1817 			block->unused = true;
1818 
1819 			ASSERT(block->original_data == NULL && block->parent_data == NULL);
1820 			cache->unused_blocks.Add(block);
1821 			cache->unused_block_count++;
1822 		}
1823 	}
1824 }
1825 
1826 
1827 static void
1828 put_cached_block(block_cache* cache, off_t blockNumber)
1829 {
1830 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1831 		panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1832 			blockNumber, cache->max_blocks - 1);
1833 	}
1834 
1835 	cached_block* block = cache->hash->Lookup(blockNumber);
1836 	if (block != NULL)
1837 		put_cached_block(cache, block);
1838 	else {
1839 		TB(Error(cache, blockNumber, "put unknown"));
1840 	}
1841 }
1842 
1843 
1844 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1845 	there, or reads it from the disk.
1846 	You need to have the cache locked when calling this function.
1847 
1848 	\param _allocated tells you whether or not a new block has been allocated
1849 		to satisfy your request.
1850 	\param readBlock if \c false, the block will not be read in case it was
1851 		not already in the cache. The block you retrieve may contain random
1852 		data. If \c true, the cache will be temporarily unlocked while the
1853 		block is read in.
1854 */
1855 static cached_block*
1856 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1857 	bool readBlock = true)
1858 {
1859 	ASSERT_LOCKED_MUTEX(&cache->lock);
1860 
1861 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1862 		panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1863 			blockNumber, cache->max_blocks - 1);
1864 		return NULL;
1865 	}
1866 
1867 retry:
1868 	cached_block* block = cache->hash->Lookup(blockNumber);
1869 	*_allocated = false;
1870 
1871 	if (block == NULL) {
1872 		// put block into cache
1873 		block = cache->NewBlock(blockNumber);
1874 		if (block == NULL)
1875 			return NULL;
1876 
1877 		cache->hash->Insert(block);
1878 		*_allocated = true;
1879 	} else if (block->busy_reading) {
1880 		// The block is currently busy_reading - wait and try again later
1881 		wait_for_busy_reading_block(cache, block);
1882 		goto retry;
1883 	}
1884 
1885 	if (block->unused) {
1886 		//TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
1887 		block->unused = false;
1888 		cache->unused_blocks.Remove(block);
1889 		cache->unused_block_count--;
1890 	}
1891 
1892 	if (*_allocated && readBlock) {
1893 		// read block into cache
1894 		int32 blockSize = cache->block_size;
1895 
1896 		mark_block_busy_reading(cache, block);
1897 		mutex_unlock(&cache->lock);
1898 
1899 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1900 			block->current_data, blockSize);
1901 
1902 		mutex_lock(&cache->lock);
1903 		if (bytesRead < blockSize) {
1904 			cache->RemoveBlock(block);
1905 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1906 
1907 			TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n",
1908 				blockNumber, bytesRead, strerror(errno)));
1909 			return NULL;
1910 		}
1911 		TB(Read(cache, block));
1912 
1913 		mark_block_unbusy_reading(cache, block);
1914 	}
1915 
1916 	block->ref_count++;
1917 	block->last_accessed = system_time() / 1000000L;
1918 
1919 	return block;
1920 }
1921 
1922 
1923 /*!	Returns the writable block data for the requested blockNumber.
1924 	If \a cleared is true, the block is not read from disk; an empty block
1925 	is returned.
1926 
1927 	This is the only method to insert a block into a transaction. It makes
1928 	sure that the previous block contents are preserved in that case.
1929 */
1930 static void*
1931 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1932 	off_t length, int32 transactionID, bool cleared)
1933 {
1934 	TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
1935 		blockNumber, transactionID));
1936 
1937 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1938 		panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1939 			blockNumber, cache->max_blocks - 1);
1940 	}
1941 
1942 	bool allocated;
1943 	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1944 		!cleared);
1945 	if (block == NULL)
1946 		return NULL;
1947 
1948 	if (block->busy_writing)
1949 		wait_for_busy_writing_block(cache, block);
1950 
1951 	block->discard = false;
1952 
1953 	// if there is no transaction support, we just return the current block
1954 	if (transactionID == -1) {
1955 		if (cleared) {
1956 			mark_block_busy_reading(cache, block);
1957 			mutex_unlock(&cache->lock);
1958 
1959 			memset(block->current_data, 0, cache->block_size);
1960 
1961 			mutex_lock(&cache->lock);
1962 			mark_block_unbusy_reading(cache, block);
1963 		}
1964 
1965 		block->is_writing = true;
1966 
1967 		if (!block->is_dirty) {
1968 			cache->num_dirty_blocks++;
1969 			block->is_dirty = true;
1970 				// mark the block as dirty
1971 		}
1972 
1973 		TB(Get(cache, block));
1974 		return block->current_data;
1975 	}
1976 
1977 	cache_transaction* transaction = block->transaction;
1978 
1979 	if (transaction != NULL && transaction->id != transactionID) {
1980 		// TODO: we have to wait here until the other transaction is done.
1981 		//	Maybe we should even panic, since we can't prevent any deadlocks.
1982 		panic("get_writable_cached_block(): asked to get busy writable block "
1983 			"(transaction %" B_PRId32 ")\n", block->transaction->id);
1984 		put_cached_block(cache, block);
1985 		return NULL;
1986 	}
1987 	if (transaction == NULL && transactionID != -1) {
1988 		// get new transaction
1989 		transaction = lookup_transaction(cache, transactionID);
1990 		if (transaction == NULL) {
1991 			panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
1992 				transactionID);
1993 			put_cached_block(cache, block);
1994 			return NULL;
1995 		}
1996 		if (!transaction->open) {
1997 			panic("get_writable_cached_block(): transaction already done!\n");
1998 			put_cached_block(cache, block);
1999 			return NULL;
2000 		}
2001 
2002 		block->transaction = transaction;
2003 
2004 		// attach the block to the transaction block list
2005 		block->transaction_next = transaction->first_block;
2006 		transaction->first_block = block;
2007 		transaction->num_blocks++;
2008 	}
2009 	if (transaction != NULL)
2010 		transaction->last_used = system_time();
2011 
2012 	bool wasUnchanged = block->original_data == NULL
2013 		|| block->previous_transaction != NULL;
2014 
2015 	if (!(allocated && cleared) && block->original_data == NULL) {
2016 		// we already have data, so we need to preserve it
2017 		block->original_data = cache->Allocate();
2018 		if (block->original_data == NULL) {
2019 			TB(Error(cache, blockNumber, "allocate original failed"));
2020 			FATAL(("could not allocate original_data\n"));
2021 			put_cached_block(cache, block);
2022 			return NULL;
2023 		}
2024 
2025 		mark_block_busy_reading(cache, block);
2026 		mutex_unlock(&cache->lock);
2027 
2028 		memcpy(block->original_data, block->current_data, cache->block_size);
2029 
2030 		mutex_lock(&cache->lock);
2031 		mark_block_unbusy_reading(cache, block);
2032 	}
2033 	if (block->parent_data == block->current_data) {
2034 		// remember any previous contents for the parent transaction
2035 		block->parent_data = cache->Allocate();
2036 		if (block->parent_data == NULL) {
2037 			// TODO: maybe we should just continue the current transaction in
2038 			// this case...
2039 			TB(Error(cache, blockNumber, "allocate parent failed"));
2040 			FATAL(("could not allocate parent\n"));
2041 			put_cached_block(cache, block);
2042 			return NULL;
2043 		}
2044 
2045 		mark_block_busy_reading(cache, block);
2046 		mutex_unlock(&cache->lock);
2047 
2048 		memcpy(block->parent_data, block->current_data, cache->block_size);
2049 
2050 		mutex_lock(&cache->lock);
2051 		mark_block_unbusy_reading(cache, block);
2052 
2053 		transaction->sub_num_blocks++;
2054 	} else if (transaction != NULL && transaction->has_sub_transaction
2055 		&& block->parent_data == NULL && wasUnchanged)
2056 		transaction->sub_num_blocks++;
2057 
2058 	if (cleared) {
2059 		mark_block_busy_reading(cache, block);
2060 		mutex_unlock(&cache->lock);
2061 
2062 		memset(block->current_data, 0, cache->block_size);
2063 
2064 		mutex_lock(&cache->lock);
2065 		mark_block_unbusy_reading(cache, block);
2066 	}
2067 
2068 	block->is_dirty = true;
2069 	TB(Get(cache, block));
2070 	TB2(BlockData(cache, block, "get writable"));
2071 
2072 	return block->current_data;
2073 }
2074 
2075 
2076 #if DEBUG_BLOCK_CACHE
2077 
2078 
2079 static void
2080 dump_block(cached_block* block)
2081 {
2082 	kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2083 		" %c%c%c%c%c%c %08lx %08lx\n",
2084 		(addr_t)block, block->block_number,
2085 		(addr_t)block->current_data, (addr_t)block->original_data,
2086 		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2087 		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2088 		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2089 		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2090 		(addr_t)block->transaction,
2091 		(addr_t)block->previous_transaction);
2092 }
2093 
2094 
2095 static void
2096 dump_block_long(cached_block* block)
2097 {
2098 	kprintf("BLOCK %p\n", block);
2099 	kprintf(" current data:  %p\n", block->current_data);
2100 	kprintf(" original data: %p\n", block->original_data);
2101 	kprintf(" parent data:   %p\n", block->parent_data);
2102 #if BLOCK_CACHE_DEBUG_CHANGED
2103 	kprintf(" compare data:  %p\n", block->compare);
2104 #endif
2105 	kprintf(" ref_count:     %" B_PRId32 "\n", block->ref_count);
2106 	kprintf(" accessed:      %" B_PRId32 "\n", block->LastAccess());
2107 	kprintf(" flags:        ");
2108 	if (block->busy_reading)
2109 		kprintf(" busy_reading");
2110 	if (block->busy_writing)
2111 		kprintf(" busy_writing");
2112 	if (block->is_writing)
2113 		kprintf(" is-writing");
2114 	if (block->is_dirty)
2115 		kprintf(" is-dirty");
2116 	if (block->unused)
2117 		kprintf(" unused");
2118 	if (block->discard)
2119 		kprintf(" discard");
2120 	kprintf("\n");
2121 	if (block->transaction != NULL) {
2122 		kprintf(" transaction:   %p (%" B_PRId32 ")\n", block->transaction,
2123 			block->transaction->id);
2124 		if (block->transaction_next != NULL) {
2125 			kprintf(" next in transaction: %" B_PRIdOFF "\n",
2126 				block->transaction_next->block_number);
2127 		}
2128 	}
2129 	if (block->previous_transaction != NULL) {
2130 		kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2131 			block->previous_transaction,
2132 			block->previous_transaction->id);
2133 	}
2134 
2135 	set_debug_variable("_current", (addr_t)block->current_data);
2136 	set_debug_variable("_original", (addr_t)block->original_data);
2137 	set_debug_variable("_parent", (addr_t)block->parent_data);
2138 }
2139 
2140 
2141 static int
2142 dump_cached_block(int argc, char** argv)
2143 {
2144 	if (argc != 2) {
2145 		kprintf("usage: %s <block-address>\n", argv[0]);
2146 		return 0;
2147 	}
2148 
2149 	dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2150 	return 0;
2151 }
2152 
2153 
2154 static int
2155 dump_cache(int argc, char** argv)
2156 {
2157 	bool showTransactions = false;
2158 	bool showBlocks = false;
2159 	int32 i = 1;
2160 	while (argv[i] != NULL && argv[i][0] == '-') {
2161 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
2162 			switch (arg[0]) {
2163 				case 'b':
2164 					showBlocks = true;
2165 					break;
2166 				case 't':
2167 					showTransactions = true;
2168 					break;
2169 				default:
2170 					print_debugger_command_usage(argv[0]);
2171 					return 0;
2172 			}
2173 		}
2174 		i++;
2175 	}
2176 
2177 	if (i >= argc) {
2178 		print_debugger_command_usage(argv[0]);
2179 		return 0;
2180 	}
2181 
2182 	block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2183 	if (cache == NULL) {
2184 		kprintf("invalid cache address\n");
2185 		return 0;
2186 	}
2187 
2188 	off_t blockNumber = -1;
2189 	if (i + 1 < argc) {
2190 		blockNumber = parse_expression(argv[i + 1]);
2191 		cached_block* block = cache->hash->Lookup(blockNumber);
2192 		if (block != NULL)
2193 			dump_block_long(block);
2194 		else
2195 			kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2196 		return 0;
2197 	}
2198 
2199 	kprintf("BLOCK CACHE: %p\n", cache);
2200 
2201 	kprintf(" fd:           %d\n", cache->fd);
2202 	kprintf(" max_blocks:   %" B_PRIdOFF "\n", cache->max_blocks);
2203 	kprintf(" block_size:   %zu\n", cache->block_size);
2204 	kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2205 	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2206 	kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2207 		cache->busy_reading_waiters ? "has" : "no");
2208 	kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2209 		cache->busy_writing_waiters ? "has" : "no");
2210 
2211 	if (!cache->pending_notifications.IsEmpty()) {
2212 		kprintf(" pending notifications:\n");
2213 
2214 		NotificationList::Iterator iterator
2215 			= cache->pending_notifications.GetIterator();
2216 		while (iterator.HasNext()) {
2217 			cache_notification* notification = iterator.Next();
2218 
2219 			kprintf("  %p %5" B_PRIx32 " %p - %p\n", notification,
2220 				notification->events_pending, notification->hook,
2221 				notification->data);
2222 		}
2223 	}
2224 
2225 	if (showTransactions) {
2226 		kprintf(" transactions:\n");
2227 		kprintf("address       id state  blocks  main   sub\n");
2228 
2229 		TransactionTable::Iterator iterator(cache->transaction_hash);
2230 
2231 		while (iterator.HasNext()) {
2232 			cache_transaction* transaction = iterator.Next();
2233 			kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
2234 				B_PRId32 "\n", transaction, transaction->id,
2235 				transaction->open ? "open" : "closed",
2236 				transaction->num_blocks, transaction->main_num_blocks,
2237 				transaction->sub_num_blocks);
2238 		}
2239 	}
2240 
2241 	if (showBlocks) {
2242 		kprintf(" blocks:\n");
2243 		kprintf("address  block no. current  original parent    refs access "
2244 			"flags transact prev. trans\n");
2245 	}
2246 
2247 	uint32 referenced = 0;
2248 	uint32 count = 0;
2249 	uint32 dirty = 0;
2250 	uint32 discarded = 0;
2251 	BlockTable::Iterator iterator(cache->hash);
2252 	while (iterator.HasNext()) {
2253 		cached_block* block = iterator.Next();
2254 		if (showBlocks)
2255 			dump_block(block);
2256 
2257 		if (block->is_dirty)
2258 			dirty++;
2259 		if (block->discard)
2260 			discarded++;
2261 		if (block->ref_count)
2262 			referenced++;
2263 		count++;
2264 	}
2265 
2266 	kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
2267 		" discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
2268 		" in unused.\n",
2269 		count, dirty, discarded, referenced, cache->busy_reading_count,
2270 		cache->unused_block_count);
2271 	return 0;
2272 }
2273 
2274 
2275 static int
2276 dump_transaction(int argc, char** argv)
2277 {
2278 	bool showBlocks = false;
2279 	int i = 1;
2280 	if (argc > 1 && !strcmp(argv[1], "-b")) {
2281 		showBlocks = true;
2282 		i++;
2283 	}
2284 
2285 	if (argc - i < 1 || argc - i > 2) {
2286 		print_debugger_command_usage(argv[0]);
2287 		return 0;
2288 	}
2289 
2290 	cache_transaction* transaction = NULL;
2291 
2292 	if (argc - i == 1) {
2293 		transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2294 	} else {
2295 		block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2296 		int32 id = parse_expression(argv[i + 1]);
2297 		transaction = lookup_transaction(cache, id);
2298 		if (transaction == NULL) {
2299 			kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2300 			return 0;
2301 		}
2302 	}
2303 
2304 	kprintf("TRANSACTION %p\n", transaction);
2305 
2306 	kprintf(" id:             %" B_PRId32 "\n", transaction->id);
2307 	kprintf(" num block:      %" B_PRId32 "\n", transaction->num_blocks);
2308 	kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2309 	kprintf(" sub num block:  %" B_PRId32 "\n", transaction->sub_num_blocks);
2310 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
2311 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
2312 	kprintf(" idle:           %" B_PRId64 " secs\n",
2313 		(system_time() - transaction->last_used) / 1000000);
2314 
2315 	kprintf(" listeners:\n");
2316 
2317 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2318 	while (iterator.HasNext()) {
2319 		cache_listener* listener = iterator.Next();
2320 
2321 		kprintf("  %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2322 			listener->hook, listener->data);
2323 	}
2324 
2325 	if (!showBlocks)
2326 		return 0;
2327 
2328 	kprintf(" blocks:\n");
2329 	kprintf("address  block no. current  original parent    refs access "
2330 		"flags transact prev. trans\n");
2331 
2332 	cached_block* block = transaction->first_block;
2333 	while (block != NULL) {
2334 		dump_block(block);
2335 		block = block->transaction_next;
2336 	}
2337 
2338 	kprintf("--\n");
2339 
2340 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2341 	while (blockIterator.HasNext()) {
2342 		block = blockIterator.Next();
2343 		dump_block(block);
2344 	}
2345 
2346 	return 0;
2347 }
2348 
2349 
2350 static int
2351 dump_caches(int argc, char** argv)
2352 {
2353 	kprintf("Block caches:\n");
2354 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2355 	while (i.HasNext()) {
2356 		block_cache* cache = i.Next();
2357 		if (cache == (block_cache*)&sMarkCache)
2358 			continue;
2359 
2360 		kprintf("  %p\n", cache);
2361 	}
2362 
2363 	return 0;
2364 }
2365 
2366 
2367 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2368 static int
2369 dump_block_data(int argc, char** argv)
2370 {
2371 	using namespace BlockTracing;
2372 
2373 	// Determine which blocks to show
2374 
2375 	bool printStackTrace = true;
2376 	uint32 which = 0;
2377 	int32 i = 1;
2378 	while (i < argc && argv[i][0] == '-') {
2379 		char* arg = &argv[i][1];
2380 		while (arg[0]) {
2381 			switch (arg[0]) {
2382 				case 'c':
2383 					which |= BlockData::kCurrent;
2384 					break;
2385 				case 'p':
2386 					which |= BlockData::kParent;
2387 					break;
2388 				case 'o':
2389 					which |= BlockData::kOriginal;
2390 					break;
2391 
2392 				default:
2393 					kprintf("invalid block specifier (only o/c/p are "
2394 						"allowed).\n");
2395 					return 0;
2396 			}
2397 			arg++;
2398 		}
2399 
2400 		i++;
2401 	}
2402 	if (which == 0)
2403 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2404 
2405 	if (i == argc) {
2406 		print_debugger_command_usage(argv[0]);
2407 		return 0;
2408 	}
2409 
2410 	// Get the range of blocks to print
2411 
2412 	int64 from = parse_expression(argv[i]);
2413 	int64 to = from;
2414 	if (argc > i + 1)
2415 		to = parse_expression(argv[i + 1]);
2416 	if (to < from)
2417 		to = from;
2418 
2419 	uint32 offset = 0;
2420 	uint32 size = LONG_MAX;
2421 	if (argc > i + 2)
2422 		offset = parse_expression(argv[i + 2]);
2423 	if (argc > i + 3)
2424 		size = parse_expression(argv[i + 3]);
2425 
2426 	TraceEntryIterator iterator;
2427 	iterator.MoveTo(from - 1);
2428 
2429 	static char sBuffer[1024];
2430 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2431 
2432 	while (TraceEntry* entry = iterator.Next()) {
2433 		int32 index = iterator.Index();
2434 		if (index > to)
2435 			break;
2436 
2437 		Action* action = dynamic_cast<Action*>(entry);
2438 		if (action != NULL) {
2439 			out.Clear();
2440 			out.DumpEntry(action);
2441 			continue;
2442 		}
2443 
2444 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
2445 		if (blockData == NULL)
2446 			continue;
2447 
2448 		out.Clear();
2449 
2450 		const char* dump = out.DumpEntry(entry);
2451 		int length = strlen(dump);
2452 		if (length > 0 && dump[length - 1] == '\n')
2453 			length--;
2454 
2455 		kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2456 
2457 		if (printStackTrace) {
2458 			out.Clear();
2459 			entry->DumpStackTrace(out);
2460 			if (out.Size() > 0)
2461 				kputs(out.Buffer());
2462 		}
2463 
2464 		blockData->DumpBlocks(which, offset, size);
2465 	}
2466 
2467 	return 0;
2468 }
2469 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
2470 
2471 
2472 #endif	// DEBUG_BLOCK_CACHE
2473 
2474 
2475 /*!	Traverses through the block_cache list, and returns one cache after the
2476 	other. The cache returned is automatically locked when you get it, and
2477 	unlocked with the next call to this function. Ignores caches that are in
2478 	deletion state.
2479 	Returns \c NULL when the end of the list is reached.
2480 */
2481 static block_cache*
2482 get_next_locked_block_cache(block_cache* last)
2483 {
2484 	MutexLocker _(sCachesLock);
2485 
2486 	block_cache* cache;
2487 	if (last != NULL) {
2488 		mutex_unlock(&last->lock);
2489 
2490 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
2491 		sCaches.Remove((block_cache*)&sMarkCache);
2492 	} else
2493 		cache = sCaches.Head();
2494 
2495 	if (cache != NULL) {
2496 		mutex_lock(&cache->lock);
2497 		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2498 	}
2499 
2500 	return cache;
2501 }
2502 
2503 
2504 /*!	Background thread that continuously checks for pending notifications of
2505 	all caches.
2506 	Every two seconds, it will also write back up to 64 blocks per cache.
2507 */
2508 static status_t
2509 block_notifier_and_writer(void* /*data*/)
2510 {
2511 	const bigtime_t kTimeout = 2000000LL;
2512 	bigtime_t timeout = kTimeout;
2513 
2514 	while (true) {
2515 		bigtime_t start = system_time();
2516 
2517 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
2518 			B_RELATIVE_TIMEOUT, timeout);
2519 		if (status == B_OK) {
2520 			flush_pending_notifications();
2521 			timeout -= system_time() - start;
2522 			continue;
2523 		}
2524 
2525 		// write 64 blocks of each block_cache every two seconds
2526 		// TODO: change this once we have an I/O scheduler
2527 		timeout = kTimeout;
2528 		size_t usedMemory;
2529 		object_cache_get_usage(sBlockCache, &usedMemory);
2530 
2531 		block_cache* cache = NULL;
2532 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2533 			BlockWriter writer(cache, 64);
2534 
2535 			size_t cacheUsedMemory;
2536 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2537 			usedMemory += cacheUsedMemory;
2538 
2539 			if (cache->num_dirty_blocks) {
2540 				// This cache is not using transactions, we'll scan the blocks
2541 				// directly
2542 				BlockTable::Iterator iterator(cache->hash);
2543 
2544 				while (iterator.HasNext()) {
2545 					cached_block* block = iterator.Next();
2546 					if (block->CanBeWritten() && !writer.Add(block))
2547 						break;
2548 				}
2549 
2550 			} else {
2551 				TransactionTable::Iterator iterator(cache->transaction_hash);
2552 
2553 				while (iterator.HasNext()) {
2554 					cache_transaction* transaction = iterator.Next();
2555 					if (transaction->open) {
2556 						if (system_time() > transaction->last_used
2557 								+ kTransactionIdleTime) {
2558 							// Transaction is open but idle
2559 							notify_transaction_listeners(cache, transaction,
2560 								TRANSACTION_IDLE);
2561 						}
2562 						continue;
2563 					}
2564 
2565 					bool hasLeftOvers;
2566 						// we ignore this one
2567 					if (!writer.Add(transaction, hasLeftOvers))
2568 						break;
2569 				}
2570 			}
2571 
2572 			writer.Write();
2573 
2574 			if ((block_cache_used_memory() / B_PAGE_SIZE)
2575 					> vm_page_num_pages() / 2) {
2576 				// Try to reduce memory usage to half of the available
2577 				// RAM at maximum
2578 				cache->RemoveUnusedBlocks(1000, 10);
2579 			}
2580 		}
2581 
2582 		MutexLocker _(sCachesMemoryUseLock);
2583 		sUsedMemory = usedMemory;
2584 	}
2585 
2586 	// never can get here
2587 	return B_OK;
2588 }
2589 
2590 
2591 /*!	Notify function for wait_for_notifications(). */
2592 static void
2593 notify_sync(int32 transactionID, int32 event, void* _cache)
2594 {
2595 	block_cache* cache = (block_cache*)_cache;
2596 
2597 	cache->condition_variable.NotifyOne();
2598 }
2599 
2600 
2601 /*!	Must be called with the sCachesLock held. */
2602 static bool
2603 is_valid_cache(block_cache* cache)
2604 {
2605 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2606 
2607 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2608 	while (iterator.HasNext()) {
2609 		if (cache == iterator.Next())
2610 			return true;
2611 	}
2612 
2613 	return false;
2614 }
2615 
2616 
2617 /*!	Waits until all pending notifications are carried out.
2618 	Safe to be called from the block writer/notifier thread.
2619 	You must not hold the \a cache lock when calling this function.
2620 */
2621 static void
2622 wait_for_notifications(block_cache* cache)
2623 {
2624 	MutexLocker locker(sCachesLock);
2625 
2626 	if (find_thread(NULL) == sNotifierWriterThread) {
2627 		// We're the notifier thread, don't wait, but flush all pending
2628 		// notifications directly.
2629 		if (is_valid_cache(cache))
2630 			flush_pending_notifications(cache);
2631 		return;
2632 	}
2633 
2634 	// add sync notification
2635 	cache_notification notification;
2636 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2637 		cache);
2638 
2639 	ConditionVariableEntry entry;
2640 	cache->condition_variable.Add(&entry);
2641 
2642 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2643 	locker.Unlock();
2644 
2645 	// wait for notification hook to be called
2646 	entry.Wait();
2647 }
2648 
2649 
2650 status_t
2651 block_cache_init(void)
2652 {
2653 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2654 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2655 	if (sBlockCache == NULL)
2656 		return B_NO_MEMORY;
2657 
2658 	new (&sCaches) DoublyLinkedList<block_cache>;
2659 		// manually call constructor
2660 
2661 	sEventSemaphore = create_sem(0, "block cache event");
2662 	if (sEventSemaphore < B_OK)
2663 		return sEventSemaphore;
2664 
2665 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2666 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2667 	if (sNotifierWriterThread >= B_OK)
2668 		resume_thread(sNotifierWriterThread);
2669 
2670 #if DEBUG_BLOCK_CACHE
2671 	add_debugger_command_etc("block_caches", &dump_caches,
2672 		"dumps all block caches", "\n", 0);
2673 	add_debugger_command_etc("block_cache", &dump_cache,
2674 		"dumps a specific block cache",
2675 		"[-bt] <cache-address> [block-number]\n"
2676 		"  -t lists the transactions\n"
2677 		"  -b lists all blocks\n", 0);
2678 	add_debugger_command("cached_block", &dump_cached_block,
2679 		"dumps the specified cached block");
2680 	add_debugger_command_etc("transaction", &dump_transaction,
2681 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2682 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2683 		"  -b lists all blocks that are part of this transaction\n", 0);
2684 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2685 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2686 		"dumps the data blocks logged for the actions",
2687 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2688 		"If no data specifier is used, all blocks are shown by default.\n"
2689 		" -c       the current data is shown, if available.\n"
2690 		" -p       the parent data is shown, if available.\n"
2691 		" -o       the original data is shown, if available.\n"
2692 		" <from>   first index of tracing entries to show.\n"
2693 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2694 		" <offset> the offset of the block data.\n"
2695 		" <from>   the size of the block data that is dumped\n", 0);
2696 #	endif
2697 #endif	// DEBUG_BLOCK_CACHE
2698 
2699 	return B_OK;
2700 }
2701 
2702 
2703 size_t
2704 block_cache_used_memory(void)
2705 {
2706 	MutexLocker _(sCachesMemoryUseLock);
2707 	return sUsedMemory;
2708 }
2709 
2710 
2711 //	#pragma mark - public transaction API
2712 
2713 
2714 int32
2715 cache_start_transaction(void* _cache)
2716 {
2717 	block_cache* cache = (block_cache*)_cache;
2718 	TransactionLocker locker(cache);
2719 
2720 	if (cache->last_transaction && cache->last_transaction->open) {
2721 		panic("last transaction (%" B_PRId32 ") still open!\n",
2722 			cache->last_transaction->id);
2723 	}
2724 
2725 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2726 	if (transaction == NULL)
2727 		return B_NO_MEMORY;
2728 
2729 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2730 	cache->last_transaction = transaction;
2731 
2732 	TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
2733 	T(Action("start", cache, transaction));
2734 
2735 	cache->transaction_hash->Insert(transaction);
2736 
2737 	return transaction->id;
2738 }
2739 
2740 
2741 status_t
2742 cache_sync_transaction(void* _cache, int32 id)
2743 {
2744 	block_cache* cache = (block_cache*)_cache;
2745 	bool hadBusy;
2746 
2747 	TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
2748 
2749 	do {
2750 		TransactionLocker locker(cache);
2751 		hadBusy = false;
2752 
2753 		BlockWriter writer(cache);
2754 		TransactionTable::Iterator iterator(cache->transaction_hash);
2755 
2756 		while (iterator.HasNext()) {
2757 			// close all earlier transactions which haven't been closed yet
2758 			cache_transaction* transaction = iterator.Next();
2759 
2760 			if (transaction->busy_writing_count != 0) {
2761 				hadBusy = true;
2762 				continue;
2763 			}
2764 			if (transaction->id <= id && !transaction->open) {
2765 				// write back all of their remaining dirty blocks
2766 				T(Action("sync", cache, transaction));
2767 
2768 				bool hasLeftOvers;
2769 				writer.Add(transaction, hasLeftOvers);
2770 
2771 				if (hasLeftOvers) {
2772 					// This transaction contains blocks that a previous
2773 					// transaction is trying to write back in this write run
2774 					hadBusy = true;
2775 				}
2776 			}
2777 		}
2778 
2779 		status_t status = writer.Write();
2780 		if (status != B_OK)
2781 			return status;
2782 	} while (hadBusy);
2783 
2784 	wait_for_notifications(cache);
2785 		// make sure that all pending TRANSACTION_WRITTEN notifications
2786 		// are handled after we return
2787 	return B_OK;
2788 }
2789 
2790 
2791 status_t
2792 cache_end_transaction(void* _cache, int32 id,
2793 	transaction_notification_hook hook, void* data)
2794 {
2795 	block_cache* cache = (block_cache*)_cache;
2796 	TransactionLocker locker(cache);
2797 
2798 	TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
2799 
2800 	cache_transaction* transaction = lookup_transaction(cache, id);
2801 	if (transaction == NULL) {
2802 		panic("cache_end_transaction(): invalid transaction ID\n");
2803 		return B_BAD_VALUE;
2804 	}
2805 
2806 	// Write back all pending transaction blocks
2807 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2808 	if (status != B_OK)
2809 		return status;
2810 
2811 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2812 
2813 	if (hook != NULL
2814 		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2815 			hook, data) != B_OK) {
2816 		return B_NO_MEMORY;
2817 	}
2818 
2819 	T(Action("end", cache, transaction));
2820 
2821 	// iterate through all blocks and free the unchanged original contents
2822 
2823 	cached_block* next;
2824 	for (cached_block* block = transaction->first_block; block != NULL;
2825 			block = next) {
2826 		next = block->transaction_next;
2827 		ASSERT(block->previous_transaction == NULL);
2828 
2829 		if (block->discard) {
2830 			// This block has been discarded in the transaction
2831 			cache->DiscardBlock(block);
2832 			transaction->num_blocks--;
2833 			continue;
2834 		}
2835 
2836 		if (block->original_data != NULL) {
2837 			cache->Free(block->original_data);
2838 			block->original_data = NULL;
2839 		}
2840 		if (block->parent_data != NULL) {
2841 			ASSERT(transaction->has_sub_transaction);
2842 			cache->FreeBlockParentData(block);
2843 		}
2844 
2845 		// move the block to the previous transaction list
2846 		transaction->blocks.Add(block);
2847 
2848 		block->previous_transaction = transaction;
2849 		block->transaction_next = NULL;
2850 		block->transaction = NULL;
2851 	}
2852 
2853 	transaction->open = false;
2854 	return B_OK;
2855 }
2856 
2857 
2858 status_t
2859 cache_abort_transaction(void* _cache, int32 id)
2860 {
2861 	block_cache* cache = (block_cache*)_cache;
2862 	TransactionLocker locker(cache);
2863 
2864 	TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
2865 
2866 	cache_transaction* transaction = lookup_transaction(cache, id);
2867 	if (transaction == NULL) {
2868 		panic("cache_abort_transaction(): invalid transaction ID\n");
2869 		return B_BAD_VALUE;
2870 	}
2871 
2872 	T(Abort(cache, transaction));
2873 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2874 
2875 	// iterate through all blocks and restore their original contents
2876 
2877 	cached_block* block = transaction->first_block;
2878 	cached_block* next;
2879 	for (; block != NULL; block = next) {
2880 		next = block->transaction_next;
2881 
2882 		if (block->original_data != NULL) {
2883 			TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
2884 				"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
2885 			memcpy(block->current_data, block->original_data,
2886 				cache->block_size);
2887 			cache->Free(block->original_data);
2888 			block->original_data = NULL;
2889 		}
2890 		if (transaction->has_sub_transaction && block->parent_data != NULL)
2891 			cache->FreeBlockParentData(block);
2892 
2893 		block->transaction_next = NULL;
2894 		block->transaction = NULL;
2895 		block->discard = false;
2896 		if (block->previous_transaction == NULL)
2897 			block->is_dirty = false;
2898 	}
2899 
2900 	cache->transaction_hash->Remove(transaction);
2901 	delete_transaction(cache, transaction);
2902 	return B_OK;
2903 }
2904 
2905 
2906 /*!	Acknowledges the current parent transaction, and starts a new transaction
2907 	from its sub transaction.
2908 	The new transaction also gets a new transaction ID.
2909 */
2910 int32
2911 cache_detach_sub_transaction(void* _cache, int32 id,
2912 	transaction_notification_hook hook, void* data)
2913 {
2914 	block_cache* cache = (block_cache*)_cache;
2915 	TransactionLocker locker(cache);
2916 
2917 	TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
2918 
2919 	cache_transaction* transaction = lookup_transaction(cache, id);
2920 	if (transaction == NULL) {
2921 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2922 		return B_BAD_VALUE;
2923 	}
2924 	if (!transaction->has_sub_transaction)
2925 		return B_BAD_VALUE;
2926 
2927 	// iterate through all blocks and free the unchanged original contents
2928 
2929 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2930 	if (status != B_OK)
2931 		return status;
2932 
2933 	// create a new transaction for the sub transaction
2934 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
2935 	if (newTransaction == NULL)
2936 		return B_NO_MEMORY;
2937 
2938 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2939 	T(Detach(cache, transaction, newTransaction));
2940 
2941 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2942 
2943 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2944 			data) != B_OK) {
2945 		delete newTransaction;
2946 		return B_NO_MEMORY;
2947 	}
2948 
2949 	cached_block* last = NULL;
2950 	cached_block* next;
2951 	for (cached_block* block = transaction->first_block; block != NULL;
2952 			block = next) {
2953 		next = block->transaction_next;
2954 		ASSERT(block->previous_transaction == NULL);
2955 
2956 		if (block->discard) {
2957 			cache->DiscardBlock(block);
2958 			transaction->main_num_blocks--;
2959 			continue;
2960 		}
2961 
2962 		if (block->parent_data != NULL) {
2963 			// The block changed in the parent - free the original data, since
2964 			// they will be replaced by what is in current.
2965 			ASSERT(block->original_data != NULL);
2966 			cache->Free(block->original_data);
2967 
2968 			if (block->parent_data != block->current_data) {
2969 				// The block had been changed in both transactions
2970 				block->original_data = block->parent_data;
2971 			} else {
2972 				// The block has only been changed in the parent
2973 				block->original_data = NULL;
2974 			}
2975 
2976 			// move the block to the previous transaction list
2977 			transaction->blocks.Add(block);
2978 			block->previous_transaction = transaction;
2979 			block->parent_data = NULL;
2980 		}
2981 
2982 		if (block->original_data != NULL) {
2983 			// This block had been changed in the current sub transaction,
2984 			// we need to move this block over to the new transaction.
2985 			ASSERT(block->parent_data == NULL);
2986 
2987 			if (last == NULL)
2988 				newTransaction->first_block = block;
2989 			else
2990 				last->transaction_next = block;
2991 
2992 			block->transaction = newTransaction;
2993 			last = block;
2994 		} else
2995 			block->transaction = NULL;
2996 
2997 		block->transaction_next = NULL;
2998 	}
2999 
3000 	newTransaction->num_blocks = transaction->sub_num_blocks;
3001 
3002 	transaction->open = false;
3003 	transaction->has_sub_transaction = false;
3004 	transaction->num_blocks = transaction->main_num_blocks;
3005 	transaction->sub_num_blocks = 0;
3006 
3007 	cache->transaction_hash->Insert(newTransaction);
3008 	cache->last_transaction = newTransaction;
3009 
3010 	return newTransaction->id;
3011 }
3012 
3013 
3014 status_t
3015 cache_abort_sub_transaction(void* _cache, int32 id)
3016 {
3017 	block_cache* cache = (block_cache*)_cache;
3018 	TransactionLocker locker(cache);
3019 
3020 	TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3021 
3022 	cache_transaction* transaction = lookup_transaction(cache, id);
3023 	if (transaction == NULL) {
3024 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3025 		return B_BAD_VALUE;
3026 	}
3027 	if (!transaction->has_sub_transaction)
3028 		return B_BAD_VALUE;
3029 
3030 	T(Abort(cache, transaction));
3031 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3032 
3033 	// revert all changes back to the version of the parent
3034 
3035 	cached_block* block = transaction->first_block;
3036 	cached_block* last = NULL;
3037 	cached_block* next;
3038 	for (; block != NULL; block = next) {
3039 		next = block->transaction_next;
3040 
3041 		if (block->parent_data == NULL) {
3042 			// The parent transaction didn't change the block, but the sub
3043 			// transaction did - we need to revert to the original data.
3044 			// The block is no longer part of the transaction
3045 			if (block->original_data != NULL) {
3046 				// The block might not have original data if was empty
3047 				memcpy(block->current_data, block->original_data,
3048 					cache->block_size);
3049 			}
3050 
3051 			if (last != NULL)
3052 				last->transaction_next = next;
3053 			else
3054 				transaction->first_block = next;
3055 
3056 			block->transaction_next = NULL;
3057 			block->transaction = NULL;
3058 			transaction->num_blocks--;
3059 
3060 			if (block->previous_transaction == NULL) {
3061 				cache->Free(block->original_data);
3062 				block->original_data = NULL;
3063 				block->is_dirty = false;
3064 
3065 				if (block->ref_count == 0) {
3066 					// Move the block into the unused list if possible
3067 					block->unused = true;
3068 					cache->unused_blocks.Add(block);
3069 					cache->unused_block_count++;
3070 				}
3071 			}
3072 		} else {
3073 			if (block->parent_data != block->current_data) {
3074 				// The block has been changed and must be restored - the block
3075 				// is still dirty and part of the transaction
3076 				TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3077 					"restored contents of block %" B_PRIdOFF "\n",
3078 					transaction->id, block->block_number));
3079 				memcpy(block->current_data, block->parent_data,
3080 					cache->block_size);
3081 				cache->Free(block->parent_data);
3082 				// The block stays dirty
3083 			}
3084 			block->parent_data = NULL;
3085 			last = block;
3086 		}
3087 
3088 		block->discard = false;
3089 	}
3090 
3091 	// all subsequent changes will go into the main transaction
3092 	transaction->has_sub_transaction = false;
3093 	transaction->sub_num_blocks = 0;
3094 
3095 	return B_OK;
3096 }
3097 
3098 
3099 status_t
3100 cache_start_sub_transaction(void* _cache, int32 id)
3101 {
3102 	block_cache* cache = (block_cache*)_cache;
3103 	TransactionLocker locker(cache);
3104 
3105 	TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3106 
3107 	cache_transaction* transaction = lookup_transaction(cache, id);
3108 	if (transaction == NULL) {
3109 		panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3110 			id);
3111 		return B_BAD_VALUE;
3112 	}
3113 
3114 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3115 
3116 	// move all changed blocks up to the parent
3117 
3118 	cached_block* block = transaction->first_block;
3119 	cached_block* next;
3120 	for (; block != NULL; block = next) {
3121 		next = block->transaction_next;
3122 
3123 		if (block->parent_data != NULL) {
3124 			// There already is an older sub transaction - we acknowledge
3125 			// its changes and move its blocks up to the parent
3126 			ASSERT(transaction->has_sub_transaction);
3127 			cache->FreeBlockParentData(block);
3128 		}
3129 		if (block->discard) {
3130 			// This block has been discarded in the parent transaction.
3131 			// Just throw away any changes made in this transaction, so that
3132 			// it can still be reverted to its original contents if needed
3133 			ASSERT(block->previous_transaction == NULL);
3134 			if (block->original_data != NULL) {
3135 				memcpy(block->current_data, block->original_data,
3136 					cache->block_size);
3137 				block->original_data = NULL;
3138 			}
3139 			continue;
3140 		}
3141 
3142 		// we "allocate" the parent data lazily, that means, we don't copy
3143 		// the data (and allocate memory for it) until we need to
3144 		block->parent_data = block->current_data;
3145 	}
3146 
3147 	// all subsequent changes will go into the sub transaction
3148 	transaction->has_sub_transaction = true;
3149 	transaction->main_num_blocks = transaction->num_blocks;
3150 	transaction->sub_num_blocks = 0;
3151 	T(Action("start-sub", cache, transaction));
3152 
3153 	return B_OK;
3154 }
3155 
3156 
3157 /*!	Adds a transaction listener that gets notified when the transaction
3158 	is ended, aborted, written, or idle as specified by \a events.
3159 	The listener gets automatically removed when the transaction ends.
3160 */
3161 status_t
3162 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3163 	transaction_notification_hook hook, void* data)
3164 {
3165 	block_cache* cache = (block_cache*)_cache;
3166 	TransactionLocker locker(cache);
3167 
3168 	cache_transaction* transaction = lookup_transaction(cache, id);
3169 	if (transaction == NULL)
3170 		return B_BAD_VALUE;
3171 
3172 	return add_transaction_listener(cache, transaction, events, hook, data);
3173 }
3174 
3175 
3176 status_t
3177 cache_remove_transaction_listener(void* _cache, int32 id,
3178 	transaction_notification_hook hookFunction, void* data)
3179 {
3180 	block_cache* cache = (block_cache*)_cache;
3181 	TransactionLocker locker(cache);
3182 
3183 	cache_transaction* transaction = lookup_transaction(cache, id);
3184 	if (transaction == NULL)
3185 		return B_BAD_VALUE;
3186 
3187 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3188 	while (iterator.HasNext()) {
3189 		cache_listener* listener = iterator.Next();
3190 		if (listener->data == data && listener->hook == hookFunction) {
3191 			iterator.Remove();
3192 
3193 			if (listener->events_pending != 0) {
3194 				MutexLocker _(sNotificationsLock);
3195 				if (listener->events_pending != 0)
3196 					cache->pending_notifications.Remove(listener);
3197 			}
3198 			delete listener;
3199 			return B_OK;
3200 		}
3201 	}
3202 
3203 	return B_ENTRY_NOT_FOUND;
3204 }
3205 
3206 
3207 status_t
3208 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3209 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3210 {
3211 	cached_block* block = (cached_block*)*_cookie;
3212 	block_cache* cache = (block_cache*)_cache;
3213 	TransactionLocker locker(cache);
3214 
3215 	cache_transaction* transaction = lookup_transaction(cache, id);
3216 	if (transaction == NULL || !transaction->open)
3217 		return B_BAD_VALUE;
3218 
3219 	if (block == NULL)
3220 		block = transaction->first_block;
3221 	else
3222 		block = block->transaction_next;
3223 
3224 	if (transaction->has_sub_transaction) {
3225 		if (mainOnly) {
3226 			// find next block that the parent changed
3227 			while (block != NULL && block->parent_data == NULL)
3228 				block = block->transaction_next;
3229 		} else {
3230 			// find next non-discarded block
3231 			while (block != NULL && block->discard)
3232 				block = block->transaction_next;
3233 		}
3234 	}
3235 
3236 	if (block == NULL)
3237 		return B_ENTRY_NOT_FOUND;
3238 
3239 	if (_blockNumber)
3240 		*_blockNumber = block->block_number;
3241 	if (_data)
3242 		*_data = mainOnly ? block->parent_data : block->current_data;
3243 	if (_unchangedData)
3244 		*_unchangedData = block->original_data;
3245 
3246 	*_cookie = (addr_t)block;
3247 	return B_OK;
3248 }
3249 
3250 
3251 int32
3252 cache_blocks_in_transaction(void* _cache, int32 id)
3253 {
3254 	block_cache* cache = (block_cache*)_cache;
3255 	TransactionLocker locker(cache);
3256 
3257 	cache_transaction* transaction = lookup_transaction(cache, id);
3258 	if (transaction == NULL)
3259 		return B_BAD_VALUE;
3260 
3261 	return transaction->num_blocks;
3262 }
3263 
3264 
3265 /*!	Returns the number of blocks that are part of the main transaction. If this
3266 	transaction does not have a sub transaction yet, this is the same value as
3267 	cache_blocks_in_transaction() would return.
3268 */
3269 int32
3270 cache_blocks_in_main_transaction(void* _cache, int32 id)
3271 {
3272 	block_cache* cache = (block_cache*)_cache;
3273 	TransactionLocker locker(cache);
3274 
3275 	cache_transaction* transaction = lookup_transaction(cache, id);
3276 	if (transaction == NULL)
3277 		return B_BAD_VALUE;
3278 
3279 	if (transaction->has_sub_transaction)
3280 		return transaction->main_num_blocks;
3281 
3282 	return transaction->num_blocks;
3283 }
3284 
3285 
3286 int32
3287 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3288 {
3289 	block_cache* cache = (block_cache*)_cache;
3290 	TransactionLocker locker(cache);
3291 
3292 	cache_transaction* transaction = lookup_transaction(cache, id);
3293 	if (transaction == NULL)
3294 		return B_BAD_VALUE;
3295 
3296 	return transaction->sub_num_blocks;
3297 }
3298 
3299 
3300 //	#pragma mark - public block cache API
3301 
3302 
3303 void
3304 block_cache_delete(void* _cache, bool allowWrites)
3305 {
3306 	block_cache* cache = (block_cache*)_cache;
3307 
3308 	if (allowWrites)
3309 		block_cache_sync(cache);
3310 
3311 	mutex_lock(&sCachesLock);
3312 	sCaches.Remove(cache);
3313 	mutex_unlock(&sCachesLock);
3314 
3315 	mutex_lock(&cache->lock);
3316 
3317 	// wait for all blocks to become unbusy
3318 	wait_for_busy_reading_blocks(cache);
3319 	wait_for_busy_writing_blocks(cache);
3320 
3321 	// free all blocks
3322 
3323 	cached_block* block = cache->hash->Clear(true);
3324 	while (block != NULL) {
3325 		cached_block* next = block->next;
3326 		cache->FreeBlock(block);
3327 		block = next;
3328 	}
3329 
3330 	// free all transactions (they will all be aborted)
3331 
3332 	cache_transaction* transaction = cache->transaction_hash->Clear(true);
3333 	while (transaction != NULL) {
3334 		cache_transaction* next = transaction->next;
3335 		delete transaction;
3336 		transaction = next;
3337 	}
3338 
3339 	delete cache;
3340 }
3341 
3342 
3343 void*
3344 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3345 {
3346 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3347 		readOnly);
3348 	if (cache == NULL)
3349 		return NULL;
3350 
3351 	if (cache->Init() != B_OK) {
3352 		delete cache;
3353 		return NULL;
3354 	}
3355 
3356 	MutexLocker _(sCachesLock);
3357 	sCaches.Add(cache);
3358 
3359 	return cache;
3360 }
3361 
3362 
3363 status_t
3364 block_cache_sync(void* _cache)
3365 {
3366 	block_cache* cache = (block_cache*)_cache;
3367 
3368 	// We will sync all dirty blocks to disk that have a completed
3369 	// transaction or no transaction only
3370 
3371 	MutexLocker locker(&cache->lock);
3372 
3373 	BlockWriter writer(cache);
3374 	BlockTable::Iterator iterator(cache->hash);
3375 
3376 	while (iterator.HasNext()) {
3377 		cached_block* block = iterator.Next();
3378 		if (block->CanBeWritten())
3379 			writer.Add(block);
3380 	}
3381 
3382 	status_t status = writer.Write();
3383 
3384 	locker.Unlock();
3385 
3386 	wait_for_notifications(cache);
3387 		// make sure that all pending TRANSACTION_WRITTEN notifications
3388 		// are handled after we return
3389 	return status;
3390 }
3391 
3392 
3393 status_t
3394 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3395 {
3396 	block_cache* cache = (block_cache*)_cache;
3397 
3398 	// We will sync all dirty blocks to disk that have a completed
3399 	// transaction or no transaction only
3400 
3401 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3402 		panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
3403 			" (max %" B_PRIdOFF ")",
3404 			blockNumber, cache->max_blocks - 1);
3405 		return B_BAD_VALUE;
3406 	}
3407 
3408 	MutexLocker locker(&cache->lock);
3409 	BlockWriter writer(cache);
3410 
3411 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
3412 		cached_block* block = cache->hash->Lookup(blockNumber);
3413 		if (block == NULL)
3414 			continue;
3415 
3416 		if (block->CanBeWritten())
3417 			writer.Add(block);
3418 	}
3419 
3420 	status_t status = writer.Write();
3421 
3422 	locker.Unlock();
3423 
3424 	wait_for_notifications(cache);
3425 		// make sure that all pending TRANSACTION_WRITTEN notifications
3426 		// are handled after we return
3427 	return status;
3428 }
3429 
3430 
3431 /*!	Discards a block from the current transaction or from the cache.
3432 	You have to call this function when you no longer use a block, ie. when it
3433 	might be reclaimed by the file cache in order to make sure they won't
3434 	interfere.
3435 */
3436 void
3437 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3438 {
3439 	// TODO: this could be a nice place to issue the ATA trim command
3440 	block_cache* cache = (block_cache*)_cache;
3441 	TransactionLocker locker(cache);
3442 
3443 	BlockWriter writer(cache);
3444 
3445 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3446 		cached_block* block = cache->hash->Lookup(blockNumber);
3447 		if (block != NULL && block->previous_transaction != NULL)
3448 			writer.Add(block);
3449 	}
3450 
3451 	writer.Write();
3452 		// TODO: this can fail, too!
3453 
3454 	blockNumber -= numBlocks;
3455 		// reset blockNumber to its original value
3456 
3457 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3458 		cached_block* block = cache->hash->Lookup(blockNumber);
3459 		if (block == NULL)
3460 			continue;
3461 
3462 		ASSERT(block->previous_transaction == NULL);
3463 
3464 		if (block->unused) {
3465 			cache->unused_blocks.Remove(block);
3466 			cache->unused_block_count--;
3467 			cache->RemoveBlock(block);
3468 		} else {
3469 			if (block->transaction != NULL && block->parent_data != NULL
3470 				&& block->parent_data != block->current_data) {
3471 				panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3472 					"transaction!", blockNumber);
3473 			}
3474 
3475 			// mark it as discarded (in the current transaction only, if any)
3476 			block->discard = true;
3477 		}
3478 	}
3479 }
3480 
3481 
3482 status_t
3483 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3484 {
3485 	block_cache* cache = (block_cache*)_cache;
3486 	MutexLocker locker(&cache->lock);
3487 
3488 	if (cache->read_only) {
3489 		panic("tried to make block writable on a read-only cache!");
3490 		return B_ERROR;
3491 	}
3492 
3493 	// TODO: this can be done better!
3494 	void* block = get_writable_cached_block(cache, blockNumber,
3495 		blockNumber, 1, transaction, false);
3496 	if (block != NULL) {
3497 		put_cached_block((block_cache*)_cache, blockNumber);
3498 		return B_OK;
3499 	}
3500 
3501 	return B_ERROR;
3502 }
3503 
3504 
3505 void*
3506 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3507 	off_t length, int32 transaction)
3508 {
3509 	block_cache* cache = (block_cache*)_cache;
3510 	MutexLocker locker(&cache->lock);
3511 
3512 	TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3513 		blockNumber, transaction));
3514 	if (cache->read_only)
3515 		panic("tried to get writable block on a read-only cache!");
3516 
3517 	return get_writable_cached_block(cache, blockNumber, base, length,
3518 		transaction, false);
3519 }
3520 
3521 
3522 void*
3523 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3524 {
3525 	return block_cache_get_writable_etc(_cache, blockNumber,
3526 		blockNumber, 1, transaction);
3527 }
3528 
3529 
3530 void*
3531 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3532 {
3533 	block_cache* cache = (block_cache*)_cache;
3534 	MutexLocker locker(&cache->lock);
3535 
3536 	TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3537 		blockNumber, transaction));
3538 	if (cache->read_only)
3539 		panic("tried to get empty writable block on a read-only cache!");
3540 
3541 	return get_writable_cached_block((block_cache*)_cache, blockNumber,
3542 		blockNumber, 1, transaction, true);
3543 }
3544 
3545 
3546 const void*
3547 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
3548 {
3549 	block_cache* cache = (block_cache*)_cache;
3550 	MutexLocker locker(&cache->lock);
3551 	bool allocated;
3552 
3553 	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
3554 	if (block == NULL)
3555 		return NULL;
3556 
3557 #if BLOCK_CACHE_DEBUG_CHANGED
3558 	if (block->compare == NULL)
3559 		block->compare = cache->Allocate();
3560 	if (block->compare != NULL)
3561 		memcpy(block->compare, block->current_data, cache->block_size);
3562 #endif
3563 	TB(Get(cache, block));
3564 
3565 	return block->current_data;
3566 }
3567 
3568 
3569 const void*
3570 block_cache_get(void* _cache, off_t blockNumber)
3571 {
3572 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
3573 }
3574 
3575 
3576 /*!	Changes the internal status of a writable block to \a dirty. This can be
3577 	helpful in case you realize you don't need to change that block anymore
3578 	for whatever reason.
3579 
3580 	Note, you must only use this function on blocks that were acquired
3581 	writable!
3582 */
3583 status_t
3584 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3585 	int32 transaction)
3586 {
3587 	block_cache* cache = (block_cache*)_cache;
3588 	MutexLocker locker(&cache->lock);
3589 
3590 	cached_block* block = cache->hash->Lookup(blockNumber);
3591 	if (block == NULL)
3592 		return B_BAD_VALUE;
3593 	if (block->is_dirty == dirty) {
3594 		// there is nothing to do for us
3595 		return B_OK;
3596 	}
3597 
3598 	// TODO: not yet implemented
3599 	if (dirty)
3600 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
3601 
3602 	return B_OK;
3603 }
3604 
3605 
3606 void
3607 block_cache_put(void* _cache, off_t blockNumber)
3608 {
3609 	block_cache* cache = (block_cache*)_cache;
3610 	MutexLocker locker(&cache->lock);
3611 
3612 	put_cached_block(cache, blockNumber);
3613 }
3614 
3615