xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 495060760727dd782c9f8a90db71e5d727f19748)
1 /*
2  * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <KernelExport.h>
15 #include <fs_cache.h>
16 
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <vm/vm_page.h>
26 
27 #include "kernel_debug_config.h"
28 
29 
30 // TODO: this is a naive but growing implementation to test the API:
31 //	block reading/writing is not at all optimized for speed, it will
32 //	just read and write single blocks.
33 // TODO: the retrieval/copy of the original data could be delayed until the
34 //		new data must be written, ie. in low memory situations.
35 
36 //#define TRACE_BLOCK_CACHE
37 #ifdef TRACE_BLOCK_CACHE
38 #	define TRACE(x)	dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 #define TRACE_ALWAYS(x) dprintf x
44 
45 // This macro is used for fatal situations that are acceptable in a running
46 // system, like out of memory situations - should only panic for debugging.
47 #define FATAL(x) panic x
48 
49 static const bigtime_t kTransactionIdleTime = 2000000LL;
50 	// a transaction is considered idle after 2 seconds of inactivity
51 
52 
53 namespace {
54 
55 struct cache_transaction;
56 struct cached_block;
57 struct block_cache;
58 typedef DoublyLinkedListLink<cached_block> block_link;
59 
60 struct cached_block {
61 	cached_block*	next;			// next in hash
62 	cached_block*	transaction_next;
63 	block_link		link;
64 	off_t			block_number;
65 	void*			current_data;
66 		// The data that is seen by everyone using the API; this one is always
67 		// present.
68 	void*			original_data;
69 		// When in a transaction, this contains the original data from before
70 		// the transaction.
71 	void*			parent_data;
72 		// This is a lazily alloced buffer that represents the contents of the
73 		// block in the parent transaction. It may point to current_data if the
74 		// contents have been changed only in the parent transaction, or, if the
75 		// block has been changed in the current sub transaction already, to a
76 		// new block containing the contents changed in the parent transaction.
77 		// If this is NULL, the block has not been changed in the parent
78 		// transaction at all.
79 #if BLOCK_CACHE_DEBUG_CHANGED
80 	void*			compare;
81 #endif
82 	int32			ref_count;
83 	int32			last_accessed;
84 	bool			busy_reading : 1;
85 	bool			busy_writing : 1;
86 	bool			is_writing : 1;
87 		// Block has been checked out for writing without transactions, and
88 		// cannot be written back if set
89 	bool			is_dirty : 1;
90 	bool			unused : 1;
91 	bool			discard : 1;
92 	bool			busy_reading_waiters : 1;
93 	bool			busy_writing_waiters : 1;
94 	cache_transaction* transaction;
95 		// This is the current active transaction, if any, the block is
96 		// currently in (meaning was changed as a part of it).
97 	cache_transaction* previous_transaction;
98 		// This is set to the last transaction that was ended containing this
99 		// block. In this case, the block has not yet written back yet, and
100 		// the changed data is either in current_data, or original_data -- the
101 		// latter if the block is already being part of another transaction.
102 		// There can only be one previous transaction, so when the active
103 		// transaction ends, the changes of the previous transaction have to
104 		// be written back before that transaction becomes the next previous
105 		// transaction.
106 
107 	bool CanBeWritten() const;
108 	int32 LastAccess() const
109 		{ return system_time() / 1000000L - last_accessed; }
110 };
111 
112 typedef DoublyLinkedList<cached_block,
113 	DoublyLinkedListMemberGetLink<cached_block,
114 		&cached_block::link> > block_list;
115 
116 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
117 	static inline void* operator new(size_t size);
118 	static inline void operator delete(void* block);
119 
120 	int32			transaction_id;
121 	int32			events_pending;
122 	int32			events;
123 	transaction_notification_hook hook;
124 	void*			data;
125 	bool			delete_after_event;
126 };
127 
128 typedef DoublyLinkedList<cache_notification> NotificationList;
129 
130 static object_cache* sCacheNotificationCache;
131 
132 struct cache_listener;
133 typedef DoublyLinkedListLink<cache_listener> listener_link;
134 
135 struct cache_listener : cache_notification {
136 	listener_link	link;
137 };
138 
139 typedef DoublyLinkedList<cache_listener,
140 	DoublyLinkedListMemberGetLink<cache_listener,
141 		&cache_listener::link> > ListenerList;
142 
143 void*
144 cache_notification::operator new(size_t size)
145 {
146 	// We can't really know whether something is a cache_notification or a
147 	// cache_listener at runtime, so we just use one object_cache for both
148 	// with the size set to that of the (slightly larger) cache_listener.
149 	// In practice, the vast majority of cache_notifications are really
150 	// cache_listeners, so this is a more than acceptable trade-off.
151 	ASSERT(size <= sizeof(cache_listener));
152 	return object_cache_alloc(sCacheNotificationCache, 0);
153 }
154 
155 void
156 cache_notification::operator delete(void* block)
157 {
158 	object_cache_free(sCacheNotificationCache, block, 0);
159 }
160 
161 
162 struct BlockHash {
163 	typedef off_t			KeyType;
164 	typedef	cached_block	ValueType;
165 
166 	size_t HashKey(KeyType key) const
167 	{
168 		return key;
169 	}
170 
171 	size_t Hash(ValueType* block) const
172 	{
173 		return block->block_number;
174 	}
175 
176 	bool Compare(KeyType key, ValueType* block) const
177 	{
178 		return block->block_number == key;
179 	}
180 
181 	ValueType*& GetLink(ValueType* value) const
182 	{
183 		return value->next;
184 	}
185 };
186 
187 typedef BOpenHashTable<BlockHash> BlockTable;
188 
189 
190 struct TransactionHash {
191 	typedef int32				KeyType;
192 	typedef	cache_transaction	ValueType;
193 
194 	size_t HashKey(KeyType key) const
195 	{
196 		return key;
197 	}
198 
199 	size_t Hash(ValueType* transaction) const;
200 	bool Compare(KeyType key, ValueType* transaction) const;
201 	ValueType*& GetLink(ValueType* value) const;
202 };
203 
204 typedef BOpenHashTable<TransactionHash> TransactionTable;
205 
206 
207 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
208 	BlockTable*		hash;
209 	mutex			lock;
210 	int				fd;
211 	off_t			max_blocks;
212 	size_t			block_size;
213 	int32			next_transaction_id;
214 	cache_transaction* last_transaction;
215 	TransactionTable* transaction_hash;
216 
217 	object_cache*	buffer_cache;
218 	block_list		unused_blocks;
219 	uint32			unused_block_count;
220 
221 	ConditionVariable busy_reading_condition;
222 	uint32			busy_reading_count;
223 	bool			busy_reading_waiters;
224 
225 	ConditionVariable busy_writing_condition;
226 	uint32			busy_writing_count;
227 	bool			busy_writing_waiters;
228 
229 	bigtime_t		last_block_write;
230 	bigtime_t		last_block_write_duration;
231 
232 	uint32			num_dirty_blocks;
233 	bool			read_only;
234 
235 	NotificationList pending_notifications;
236 	ConditionVariable condition_variable;
237 
238 					block_cache(int fd, off_t numBlocks, size_t blockSize,
239 						bool readOnly);
240 					~block_cache();
241 
242 	status_t		Init();
243 
244 	void			Free(void* buffer);
245 	void*			Allocate();
246 	void			FreeBlock(cached_block* block);
247 	cached_block*	NewBlock(off_t blockNumber);
248 	void			FreeBlockParentData(cached_block* block);
249 
250 	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
251 	void			RemoveBlock(cached_block* block);
252 	void			DiscardBlock(cached_block* block);
253 
254 private:
255 	static void		_LowMemoryHandler(void* data, uint32 resources,
256 						int32 level);
257 	cached_block*	_GetUnusedBlock();
258 };
259 
260 struct cache_transaction {
261 	cache_transaction();
262 
263 	cache_transaction* next;
264 	int32			id;
265 	int32			num_blocks;
266 	int32			main_num_blocks;
267 	int32			sub_num_blocks;
268 	cached_block*	first_block;
269 	block_list		blocks;
270 	ListenerList	listeners;
271 	bool			open;
272 	bool			has_sub_transaction;
273 	bigtime_t		last_used;
274 	int32			busy_writing_count;
275 };
276 
277 
278 class BlockWriter {
279 public:
280 								BlockWriter(block_cache* cache,
281 									size_t max = SIZE_MAX);
282 								~BlockWriter();
283 
284 			bool				Add(cached_block* block,
285 									cache_transaction* transaction = NULL);
286 			bool				Add(cache_transaction* transaction,
287 									bool& hasLeftOvers);
288 
289 			status_t			Write(cache_transaction* transaction = NULL,
290 									bool canUnlock = true);
291 
292 			bool				DeletedTransaction() const
293 									{ return fDeletedTransaction; }
294 
295 	static	status_t			WriteBlock(block_cache* cache,
296 									cached_block* block);
297 
298 private:
299 			void*				_Data(cached_block* block) const;
300 			status_t			_WriteBlock(cached_block* block);
301 			void				_BlockDone(cached_block* block,
302 									cache_transaction* transaction);
303 			void				_UnmarkWriting(cached_block* block);
304 
305 	static	int					_CompareBlocks(const void* _blockA,
306 									const void* _blockB);
307 
308 private:
309 	static	const size_t		kBufferSize = 64;
310 
311 			block_cache*		fCache;
312 			cached_block*		fBuffer[kBufferSize];
313 			cached_block**		fBlocks;
314 			size_t				fCount;
315 			size_t				fTotal;
316 			size_t				fCapacity;
317 			size_t				fMax;
318 			status_t			fStatus;
319 			bool				fDeletedTransaction;
320 };
321 
322 
323 class TransactionLocking {
324 public:
325 	inline bool Lock(block_cache* cache)
326 	{
327 		mutex_lock(&cache->lock);
328 
329 		while (cache->busy_writing_count != 0) {
330 			// wait for all blocks to be written
331 			ConditionVariableEntry entry;
332 			cache->busy_writing_condition.Add(&entry);
333 			cache->busy_writing_waiters = true;
334 
335 			mutex_unlock(&cache->lock);
336 
337 			entry.Wait();
338 
339 			mutex_lock(&cache->lock);
340 		}
341 
342 		return true;
343 	}
344 
345 	inline void Unlock(block_cache* cache)
346 	{
347 		mutex_unlock(&cache->lock);
348 	}
349 };
350 
351 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
352 
353 } // namespace
354 
355 
356 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
357 namespace BlockTracing {
358 
359 class Action : public AbstractTraceEntry {
360 public:
361 	Action(block_cache* cache, cached_block* block)
362 		:
363 		fCache(cache),
364 		fBlockNumber(block->block_number),
365 		fIsDirty(block->is_dirty),
366 		fHasOriginal(block->original_data != NULL),
367 		fHasParent(block->parent_data != NULL),
368 		fTransactionID(-1),
369 		fPreviousID(-1)
370 	{
371 		if (block->transaction != NULL)
372 			fTransactionID = block->transaction->id;
373 		if (block->previous_transaction != NULL)
374 			fPreviousID = block->previous_transaction->id;
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
380 			" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
381 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
382 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
383 	}
384 
385 	virtual const char* _Action() const = 0;
386 
387 private:
388 	block_cache*		fCache;
389 	uint64				fBlockNumber;
390 	bool				fIsDirty;
391 	bool				fHasOriginal;
392 	bool				fHasParent;
393 	int32				fTransactionID;
394 	int32				fPreviousID;
395 };
396 
397 class Get : public Action {
398 public:
399 	Get(block_cache* cache, cached_block* block)
400 		:
401 		Action(cache, block)
402 	{
403 		Initialized();
404 	}
405 
406 	virtual const char* _Action() const { return "get"; }
407 };
408 
409 class Put : public Action {
410 public:
411 	Put(block_cache* cache, cached_block* block)
412 		:
413 		Action(cache, block)
414 	{
415 		Initialized();
416 	}
417 
418 	virtual const char* _Action() const { return "put"; }
419 };
420 
421 class Read : public Action {
422 public:
423 	Read(block_cache* cache, cached_block* block)
424 		:
425 		Action(cache, block)
426 	{
427 		Initialized();
428 	}
429 
430 	virtual const char* _Action() const { return "read"; }
431 };
432 
433 class Write : public Action {
434 public:
435 	Write(block_cache* cache, cached_block* block)
436 		:
437 		Action(cache, block)
438 	{
439 		Initialized();
440 	}
441 
442 	virtual const char* _Action() const { return "write"; }
443 };
444 
445 class Flush : public Action {
446 public:
447 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
448 		:
449 		Action(cache, block),
450 		fGetUnused(getUnused)
451 	{
452 		Initialized();
453 	}
454 
455 	virtual const char* _Action() const
456 		{ return fGetUnused ? "get-unused" : "flush"; }
457 
458 private:
459 	bool	fGetUnused;
460 };
461 
462 class Error : public AbstractTraceEntry {
463 public:
464 	Error(block_cache* cache, uint64 blockNumber, const char* message,
465 			status_t status = B_OK)
466 		:
467 		fCache(cache),
468 		fBlockNumber(blockNumber),
469 		fMessage(message),
470 		fStatus(status)
471 	{
472 		Initialized();
473 	}
474 
475 	virtual void AddDump(TraceOutput& out)
476 	{
477 		out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
478 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
479 			fStatus != B_OK ? strerror(fStatus) : "");
480 	}
481 
482 private:
483 	block_cache*	fCache;
484 	uint64			fBlockNumber;
485 	const char*		fMessage;
486 	status_t		fStatus;
487 };
488 
489 #if BLOCK_CACHE_BLOCK_TRACING >= 2
490 class BlockData : public AbstractTraceEntry {
491 public:
492 	enum {
493 		kCurrent	= 0x01,
494 		kParent		= 0x02,
495 		kOriginal	= 0x04
496 	};
497 
498 	BlockData(block_cache* cache, cached_block* block, const char* message)
499 		:
500 		fCache(cache),
501 		fSize(cache->block_size),
502 		fBlockNumber(block->block_number),
503 		fMessage(message)
504 	{
505 		_Allocate(fCurrent, block->current_data);
506 		_Allocate(fParent, block->parent_data);
507 		_Allocate(fOriginal, block->original_data);
508 
509 #if KTRACE_PRINTF_STACK_TRACE
510 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
511 			false);
512 #endif
513 
514 		Initialized();
515 	}
516 
517 	virtual void AddDump(TraceOutput& out)
518 	{
519 		out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
520 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
521 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
522 			fMessage);
523 	}
524 
525 #if KTRACE_PRINTF_STACK_TRACE
526 	virtual void DumpStackTrace(TraceOutput& out)
527 	{
528 		out.PrintStackTrace(fStackTrace);
529 	}
530 #endif
531 
532 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
533 	{
534 		if ((which & kCurrent) != 0)
535 			DumpBlock(kCurrent, offset, size);
536 		if ((which & kParent) != 0)
537 			DumpBlock(kParent, offset, size);
538 		if ((which & kOriginal) != 0)
539 			DumpBlock(kOriginal, offset, size);
540 	}
541 
542 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
543 	{
544 		if (offset > fSize) {
545 			kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
546 			return;
547 		}
548 		if (offset + size > fSize)
549 			size = fSize - offset;
550 
551 		const char* label;
552 		uint8* data;
553 
554 		if ((which & kCurrent) != 0) {
555 			label = "current";
556 			data = fCurrent;
557 		} else if ((which & kParent) != 0) {
558 			label = "parent";
559 			data = fParent;
560 		} else if ((which & kOriginal) != 0) {
561 			label = "original";
562 			data = fOriginal;
563 		} else
564 			return;
565 
566 		kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
567 
568 		static const uint32 kBlockSize = 16;
569 		data += offset;
570 
571 		for (uint32 i = 0; i < size;) {
572 			int start = i;
573 
574 			kprintf("  %04" B_PRIx32 " ", i);
575 			for (; i < start + kBlockSize; i++) {
576 				if (!(i % 4))
577 					kprintf(" ");
578 
579 				if (i >= size)
580 					kprintf("  ");
581 				else
582 					kprintf("%02x", data[i]);
583 			}
584 
585 			kprintf("\n");
586 		}
587 	}
588 
589 private:
590 	void _Allocate(uint8*& target, void* source)
591 	{
592 		if (source == NULL) {
593 			target = NULL;
594 			return;
595 		}
596 
597 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
598 	}
599 
600 	block_cache*	fCache;
601 	uint32			fSize;
602 	uint64			fBlockNumber;
603 	const char*		fMessage;
604 	uint8*			fCurrent;
605 	uint8*			fParent;
606 	uint8*			fOriginal;
607 #if KTRACE_PRINTF_STACK_TRACE
608 	tracing_stack_trace* fStackTrace;
609 #endif
610 };
611 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
612 
613 }	// namespace BlockTracing
614 
615 #	define TB(x) new(std::nothrow) BlockTracing::x;
616 #else
617 #	define TB(x) ;
618 #endif
619 
620 #if BLOCK_CACHE_BLOCK_TRACING >= 2
621 #	define TB2(x) new(std::nothrow) BlockTracing::x;
622 #else
623 #	define TB2(x) ;
624 #endif
625 
626 
627 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
628 namespace TransactionTracing {
629 
630 class Action : public AbstractTraceEntry {
631 public:
632 	Action(const char* label, block_cache* cache,
633 			cache_transaction* transaction)
634 		:
635 		fCache(cache),
636 		fTransaction(transaction),
637 		fID(transaction->id),
638 		fSub(transaction->has_sub_transaction),
639 		fNumBlocks(transaction->num_blocks),
640 		fSubNumBlocks(transaction->sub_num_blocks)
641 	{
642 		strlcpy(fLabel, label, sizeof(fLabel));
643 		Initialized();
644 	}
645 
646 	virtual void AddDump(TraceOutput& out)
647 	{
648 		out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
649 			", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
650 			fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
651 	}
652 
653 private:
654 	char				fLabel[12];
655 	block_cache*		fCache;
656 	cache_transaction*	fTransaction;
657 	int32				fID;
658 	bool				fSub;
659 	int32				fNumBlocks;
660 	int32				fSubNumBlocks;
661 };
662 
663 class Detach : public AbstractTraceEntry {
664 public:
665 	Detach(block_cache* cache, cache_transaction* transaction,
666 			cache_transaction* newTransaction)
667 		:
668 		fCache(cache),
669 		fTransaction(transaction),
670 		fID(transaction->id),
671 		fSub(transaction->has_sub_transaction),
672 		fNewTransaction(newTransaction),
673 		fNewID(newTransaction->id)
674 	{
675 		Initialized();
676 	}
677 
678 	virtual void AddDump(TraceOutput& out)
679 	{
680 		out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
681 			"from transaction %p (id %" B_PRId32 ")%s",
682 			fCache, fNewTransaction, fNewID, fTransaction, fID,
683 			fSub ? " sub" : "");
684 	}
685 
686 private:
687 	block_cache*		fCache;
688 	cache_transaction*	fTransaction;
689 	int32				fID;
690 	bool				fSub;
691 	cache_transaction*	fNewTransaction;
692 	int32				fNewID;
693 };
694 
695 class Abort : public AbstractTraceEntry {
696 public:
697 	Abort(block_cache* cache, cache_transaction* transaction)
698 		:
699 		fCache(cache),
700 		fTransaction(transaction),
701 		fID(transaction->id),
702 		fNumBlocks(0)
703 	{
704 		bool isSub = transaction->has_sub_transaction;
705 		fNumBlocks = isSub ? transaction->sub_num_blocks
706 			: transaction->num_blocks;
707 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
708 		if (fBlocks != NULL) {
709 			cached_block* block = transaction->first_block;
710 			for (int32 i = 0; block != NULL && i < fNumBlocks;
711 					block = block->transaction_next) {
712 				fBlocks[i++] = block->block_number;
713 			}
714 		} else
715 			fNumBlocks = 0;
716 
717 #if KTRACE_PRINTF_STACK_TRACE
718 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
719 			false);
720 #endif
721 
722 		Initialized();
723 	}
724 
725 	virtual void AddDump(TraceOutput& out)
726 	{
727 		out.Print("block cache %p, abort transaction "
728 			"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
729 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
730 			out.Print(" %" B_PRIdOFF, fBlocks[i]);
731 	}
732 
733 #if KTRACE_PRINTF_STACK_TRACE
734 	virtual void DumpStackTrace(TraceOutput& out)
735 	{
736 		out.PrintStackTrace(fStackTrace);
737 	}
738 #endif
739 
740 private:
741 	block_cache*		fCache;
742 	cache_transaction*	fTransaction;
743 	int32				fID;
744 	off_t*				fBlocks;
745 	int32				fNumBlocks;
746 #if KTRACE_PRINTF_STACK_TRACE
747 	tracing_stack_trace* fStackTrace;
748 #endif
749 };
750 
751 }	// namespace TransactionTracing
752 
753 #	define T(x) new(std::nothrow) TransactionTracing::x;
754 #else
755 #	define T(x) ;
756 #endif
757 
758 
759 static DoublyLinkedList<block_cache> sCaches;
760 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
761 static mutex sCachesMemoryUseLock
762 	= MUTEX_INITIALIZER("block caches memory use");
763 static size_t sUsedMemory;
764 static sem_id sEventSemaphore;
765 static mutex sNotificationsLock
766 	= MUTEX_INITIALIZER("block cache notifications");
767 static thread_id sNotifierWriterThread;
768 static DoublyLinkedListLink<block_cache> sMarkCache;
769 	// TODO: this only works if the link is the first entry of block_cache
770 static object_cache* sBlockCache;
771 
772 
773 //	#pragma mark - notifications/listener
774 
775 
776 /*!	Checks whether or not this is an event that closes a transaction. */
777 static inline bool
778 is_closing_event(int32 event)
779 {
780 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
781 }
782 
783 
784 static inline bool
785 is_written_event(int32 event)
786 {
787 	return (event & TRANSACTION_WRITTEN) != 0;
788 }
789 
790 
791 /*!	From the specified \a notification, it will remove the lowest pending
792 	event, and return that one in \a _event.
793 	If there is no pending event anymore, it will return \c false.
794 */
795 static bool
796 get_next_pending_event(cache_notification* notification, int32* _event)
797 {
798 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
799 		int32 pending = atomic_and(&notification->events_pending,
800 			~eventMask);
801 
802 		bool more = (pending & ~eventMask) != 0;
803 
804 		if ((pending & eventMask) != 0) {
805 			*_event = eventMask;
806 			return more;
807 		}
808 	}
809 
810 	return false;
811 }
812 
813 
814 static void
815 flush_pending_notifications(block_cache* cache)
816 {
817 	ASSERT_LOCKED_MUTEX(&sCachesLock);
818 
819 	while (true) {
820 		MutexLocker locker(sNotificationsLock);
821 
822 		cache_notification* notification = cache->pending_notifications.Head();
823 		if (notification == NULL)
824 			return;
825 
826 		bool deleteAfterEvent = false;
827 		int32 event = -1;
828 		if (!get_next_pending_event(notification, &event)) {
829 			// remove the notification if this was the last pending event
830 			cache->pending_notifications.Remove(notification);
831 			deleteAfterEvent = notification->delete_after_event;
832 		}
833 
834 		if (event >= 0) {
835 			// Notify listener, we need to copy the notification, as it might
836 			// be removed when we unlock the list.
837 			cache_notification copy = *notification;
838 			locker.Unlock();
839 
840 			copy.hook(copy.transaction_id, event, copy.data);
841 
842 			locker.Lock();
843 		}
844 
845 		if (deleteAfterEvent)
846 			delete notification;
847 	}
848 }
849 
850 
851 /*!	Flushes all pending notifications by calling the appropriate hook
852 	functions.
853 	Must not be called with a cache lock held.
854 */
855 static void
856 flush_pending_notifications()
857 {
858 	MutexLocker _(sCachesLock);
859 
860 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
861 	while (iterator.HasNext()) {
862 		block_cache* cache = iterator.Next();
863 
864 		flush_pending_notifications(cache);
865 	}
866 }
867 
868 
869 /*!	Initializes the \a notification as specified. */
870 static void
871 set_notification(cache_transaction* transaction,
872 	cache_notification &notification, int32 events,
873 	transaction_notification_hook hook, void* data)
874 {
875 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
876 	notification.events_pending = 0;
877 	notification.events = events;
878 	notification.hook = hook;
879 	notification.data = data;
880 	notification.delete_after_event = false;
881 }
882 
883 
884 /*!	Makes sure the notification is deleted. It either deletes it directly,
885 	when possible, or marks it for deletion if the notification is pending.
886 */
887 static void
888 delete_notification(cache_notification* notification)
889 {
890 	MutexLocker locker(sNotificationsLock);
891 
892 	if (notification->events_pending != 0)
893 		notification->delete_after_event = true;
894 	else
895 		delete notification;
896 }
897 
898 
899 /*!	Adds the notification to the pending notifications list, or, if it's
900 	already part of it, updates its events_pending field.
901 	Also marks the notification to be deleted if \a deleteNotification
902 	is \c true.
903 	Triggers the notifier thread to run.
904 */
905 static void
906 add_notification(block_cache* cache, cache_notification* notification,
907 	int32 event, bool deleteNotification)
908 {
909 	if (notification->hook == NULL)
910 		return;
911 
912 	int32 pending = atomic_or(&notification->events_pending, event);
913 	if (pending == 0) {
914 		// not yet part of the notification list
915 		MutexLocker locker(sNotificationsLock);
916 		if (deleteNotification)
917 			notification->delete_after_event = true;
918 		cache->pending_notifications.Add(notification);
919 	} else if (deleteNotification) {
920 		// we might need to delete it ourselves if we're late
921 		delete_notification(notification);
922 	}
923 
924 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
925 		// We're probably still holding some locks that makes rescheduling
926 		// not a good idea at this point.
927 }
928 
929 
930 /*!	Notifies all interested listeners of this transaction about the \a event.
931 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
932 	TRANSACTION_ABORTED), all listeners except those listening to
933 	TRANSACTION_WRITTEN will be removed.
934 */
935 static void
936 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
937 	int32 event)
938 {
939 	T(Action("notify", cache, transaction));
940 
941 	bool isClosing = is_closing_event(event);
942 	bool isWritten = is_written_event(event);
943 
944 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
945 	while (iterator.HasNext()) {
946 		cache_listener* listener = iterator.Next();
947 
948 		bool remove = (isClosing && !is_written_event(listener->events))
949 			|| (isWritten && is_written_event(listener->events));
950 		if (remove)
951 			iterator.Remove();
952 
953 		if ((listener->events & event) != 0)
954 			add_notification(cache, listener, event, remove);
955 		else if (remove)
956 			delete_notification(listener);
957 	}
958 }
959 
960 
961 /*!	Removes and deletes all listeners that are still monitoring this
962 	transaction.
963 */
964 static void
965 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
966 {
967 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
968 	while (iterator.HasNext()) {
969 		cache_listener* listener = iterator.Next();
970 		iterator.Remove();
971 
972 		delete_notification(listener);
973 	}
974 }
975 
976 
977 static status_t
978 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
979 	int32 events, transaction_notification_hook hookFunction, void* data)
980 {
981 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
982 	while (iterator.HasNext()) {
983 		cache_listener* listener = iterator.Next();
984 
985 		if (listener->data == data && listener->hook == hookFunction) {
986 			// this listener already exists, just update it
987 			listener->events |= events;
988 			return B_OK;
989 		}
990 	}
991 
992 	cache_listener* listener = new cache_listener;
993 	if (listener == NULL)
994 		return B_NO_MEMORY;
995 
996 	set_notification(transaction, *listener, events, hookFunction, data);
997 	transaction->listeners.Add(listener);
998 	return B_OK;
999 }
1000 
1001 
1002 //	#pragma mark - private transaction
1003 
1004 
1005 cache_transaction::cache_transaction()
1006 {
1007 	num_blocks = 0;
1008 	main_num_blocks = 0;
1009 	sub_num_blocks = 0;
1010 	first_block = NULL;
1011 	open = true;
1012 	has_sub_transaction = false;
1013 	last_used = system_time();
1014 	busy_writing_count = 0;
1015 }
1016 
1017 
1018 static void
1019 delete_transaction(block_cache* cache, cache_transaction* transaction)
1020 {
1021 	if (cache->last_transaction == transaction)
1022 		cache->last_transaction = NULL;
1023 
1024 	remove_transaction_listeners(cache, transaction);
1025 	delete transaction;
1026 }
1027 
1028 
1029 static cache_transaction*
1030 lookup_transaction(block_cache* cache, int32 id)
1031 {
1032 	return cache->transaction_hash->Lookup(id);
1033 }
1034 
1035 
1036 size_t TransactionHash::Hash(cache_transaction* transaction) const
1037 {
1038 	return transaction->id;
1039 }
1040 
1041 
1042 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
1043 {
1044 	return transaction->id == key;
1045 }
1046 
1047 
1048 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
1049 {
1050 	return value->next;
1051 }
1052 
1053 
1054 /*!	Writes back any changes made to blocks in \a transaction that are still
1055 	part of a previous transacton.
1056 */
1057 static status_t
1058 write_blocks_in_previous_transaction(block_cache* cache,
1059 	cache_transaction* transaction)
1060 {
1061 	BlockWriter writer(cache);
1062 
1063 	cached_block* block = transaction->first_block;
1064 	for (; block != NULL; block = block->transaction_next) {
1065 		if (block->previous_transaction != NULL) {
1066 			// need to write back pending changes
1067 			writer.Add(block);
1068 		}
1069 	}
1070 
1071 	return writer.Write();
1072 }
1073 
1074 
1075 //	#pragma mark - cached_block
1076 
1077 
1078 bool
1079 cached_block::CanBeWritten() const
1080 {
1081 	return !busy_writing && !busy_reading
1082 		&& (previous_transaction != NULL
1083 			|| (transaction == NULL && is_dirty && !is_writing));
1084 }
1085 
1086 
1087 //	#pragma mark - BlockWriter
1088 
1089 
1090 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1091 	:
1092 	fCache(cache),
1093 	fBlocks(fBuffer),
1094 	fCount(0),
1095 	fTotal(0),
1096 	fCapacity(kBufferSize),
1097 	fMax(max),
1098 	fStatus(B_OK),
1099 	fDeletedTransaction(false)
1100 {
1101 }
1102 
1103 
1104 BlockWriter::~BlockWriter()
1105 {
1106 	if (fBlocks != fBuffer)
1107 		free(fBlocks);
1108 }
1109 
1110 
1111 /*!	Adds the specified block to the to be written array. If no more blocks can
1112 	be added, false is returned, otherwise true.
1113 */
1114 bool
1115 BlockWriter::Add(cached_block* block, cache_transaction* transaction)
1116 {
1117 	ASSERT(block->CanBeWritten());
1118 
1119 	if (fTotal == fMax)
1120 		return false;
1121 
1122 	if (fCount >= fCapacity) {
1123 		// Enlarge array if necessary
1124 		cached_block** newBlocks;
1125 		size_t newCapacity = max_c(256, fCapacity * 2);
1126 		if (fBlocks == fBuffer)
1127 			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1128 		else {
1129 			newBlocks = (cached_block**)realloc(fBlocks,
1130 				newCapacity * sizeof(void*));
1131 		}
1132 
1133 		if (newBlocks == NULL) {
1134 			// Allocating a larger array failed - we need to write back what
1135 			// we have synchronously now (this will also clear the array)
1136 			Write(transaction, false);
1137 		} else {
1138 			if (fBlocks == fBuffer)
1139 				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1140 
1141 			fBlocks = newBlocks;
1142 			fCapacity = newCapacity;
1143 		}
1144 	}
1145 
1146 	fBlocks[fCount++] = block;
1147 	fTotal++;
1148 	block->busy_writing = true;
1149 	fCache->busy_writing_count++;
1150 	if (block->previous_transaction != NULL)
1151 		block->previous_transaction->busy_writing_count++;
1152 
1153 	return true;
1154 }
1155 
1156 
1157 /*!	Adds all blocks of the specified transaction to the to be written array.
1158 	If no more blocks can be added, false is returned, otherwise true.
1159 */
1160 bool
1161 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
1162 {
1163 	ASSERT(!transaction->open);
1164 
1165 	if (transaction->busy_writing_count != 0) {
1166 		hasLeftOvers = true;
1167 		return true;
1168 	}
1169 
1170 	hasLeftOvers = false;
1171 
1172 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1173 	while (cached_block* block = blockIterator.Next()) {
1174 		if (!block->CanBeWritten()) {
1175 			// This block was already part of a previous transaction within this
1176 			// writer
1177 			hasLeftOvers = true;
1178 			continue;
1179 		}
1180 		if (!Add(block, transaction))
1181 			return false;
1182 
1183 		if (DeletedTransaction())
1184 			break;
1185 	}
1186 
1187 	return true;
1188 }
1189 
1190 
1191 /*! Cache must be locked when calling this method, but it will be unlocked
1192 	while the blocks are written back.
1193 */
1194 status_t
1195 BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
1196 {
1197 	if (fCount == 0)
1198 		return B_OK;
1199 
1200 	if (canUnlock)
1201 		mutex_unlock(&fCache->lock);
1202 
1203 	// Sort blocks in their on-disk order
1204 	// TODO: ideally, this should be handled by the I/O scheduler
1205 
1206 	qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1207 	fDeletedTransaction = false;
1208 
1209 	bigtime_t start = system_time();
1210 
1211 	for (uint32 i = 0; i < fCount; i++) {
1212 		status_t status = _WriteBlock(fBlocks[i]);
1213 		if (status != B_OK) {
1214 			// propagate to global error handling
1215 			if (fStatus == B_OK)
1216 				fStatus = status;
1217 
1218 			_UnmarkWriting(fBlocks[i]);
1219 			fBlocks[i] = NULL;
1220 				// This block will not be marked clean
1221 		}
1222 	}
1223 
1224 	bigtime_t finish = system_time();
1225 
1226 	if (canUnlock)
1227 		mutex_lock(&fCache->lock);
1228 
1229 	if (fStatus == B_OK && fCount >= 8) {
1230 		fCache->last_block_write = finish;
1231 		fCache->last_block_write_duration = (fCache->last_block_write - start)
1232 			/ fCount;
1233 	}
1234 
1235 	for (uint32 i = 0; i < fCount; i++)
1236 		_BlockDone(fBlocks[i], transaction);
1237 
1238 	fCount = 0;
1239 	return fStatus;
1240 }
1241 
1242 
1243 /*!	Writes the specified \a block back to disk. It will always only write back
1244 	the oldest change of the block if it is part of more than one transaction.
1245 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1246 	delete transactions when they are no longer used, and \a deleteTransaction
1247 	is \c true.
1248 */
1249 /*static*/ status_t
1250 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1251 {
1252 	BlockWriter writer(cache);
1253 
1254 	writer.Add(block);
1255 	return writer.Write();
1256 }
1257 
1258 
1259 void*
1260 BlockWriter::_Data(cached_block* block) const
1261 {
1262 	return block->previous_transaction != NULL && block->original_data != NULL
1263 		? block->original_data : block->current_data;
1264 		// We first need to write back changes from previous transactions
1265 }
1266 
1267 
1268 status_t
1269 BlockWriter::_WriteBlock(cached_block* block)
1270 {
1271 	ASSERT(block->busy_writing);
1272 
1273 	TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
1274 	TB(Write(fCache, block));
1275 	TB2(BlockData(fCache, block, "before write"));
1276 
1277 	size_t blockSize = fCache->block_size;
1278 
1279 	ssize_t written = write_pos(fCache->fd,
1280 		block->block_number * blockSize, _Data(block), blockSize);
1281 
1282 	if (written != (ssize_t)blockSize) {
1283 		TB(Error(fCache, block->block_number, "write failed", written));
1284 		TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number,
1285 			strerror(errno)));
1286 		if (written < 0)
1287 			return errno;
1288 
1289 		return B_IO_ERROR;
1290 	}
1291 
1292 	return B_OK;
1293 }
1294 
1295 
1296 void
1297 BlockWriter::_BlockDone(cached_block* block,
1298 	cache_transaction* transaction)
1299 {
1300 	if (block == NULL) {
1301 		// An error occured when trying to write this block
1302 		return;
1303 	}
1304 
1305 	if (fCache->num_dirty_blocks > 0)
1306 		fCache->num_dirty_blocks--;
1307 
1308 	if (_Data(block) == block->current_data)
1309 		block->is_dirty = false;
1310 
1311 	_UnmarkWriting(block);
1312 
1313 	cache_transaction* previous = block->previous_transaction;
1314 	if (previous != NULL) {
1315 		previous->blocks.Remove(block);
1316 		block->previous_transaction = NULL;
1317 
1318 		if (block->original_data != NULL && block->transaction == NULL) {
1319 			// This block is not part of a transaction, so it does not need
1320 			// its original pointer anymore.
1321 			fCache->Free(block->original_data);
1322 			block->original_data = NULL;
1323 		}
1324 
1325 		// Has the previous transaction been finished with that write?
1326 		if (--previous->num_blocks == 0) {
1327 			TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1328 			T(Action("written", fCache, previous));
1329 
1330 			notify_transaction_listeners(fCache, previous,
1331 				TRANSACTION_WRITTEN);
1332 
1333 			if (transaction != NULL) {
1334 				// This function is called while iterating transaction_hash. We
1335 				// use RemoveUnchecked so the iterator is still valid. A regular
1336 				// Remove can trigger a resize of the hash table which would
1337 				// result in the linked items in the table changing order.
1338 				fCache->transaction_hash->RemoveUnchecked(transaction);
1339 			} else
1340 				fCache->transaction_hash->Remove(previous);
1341 
1342 			delete_transaction(fCache, previous);
1343 			fDeletedTransaction = true;
1344 		}
1345 	}
1346 	if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1347 		// the block is no longer used
1348 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1349 		block->unused = true;
1350 		fCache->unused_blocks.Add(block);
1351 		fCache->unused_block_count++;
1352 	}
1353 
1354 	TB2(BlockData(fCache, block, "after write"));
1355 }
1356 
1357 
1358 void
1359 BlockWriter::_UnmarkWriting(cached_block* block)
1360 {
1361 	block->busy_writing = false;
1362 	if (block->previous_transaction != NULL)
1363 		block->previous_transaction->busy_writing_count--;
1364 	fCache->busy_writing_count--;
1365 
1366 	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1367 		|| block->busy_writing_waiters) {
1368 		fCache->busy_writing_waiters = false;
1369 		block->busy_writing_waiters = false;
1370 		fCache->busy_writing_condition.NotifyAll();
1371 	}
1372 }
1373 
1374 
1375 /*static*/ int
1376 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1377 {
1378 	cached_block* blockA = *(cached_block**)_blockA;
1379 	cached_block* blockB = *(cached_block**)_blockB;
1380 
1381 	off_t diff = blockA->block_number - blockB->block_number;
1382 	if (diff > 0)
1383 		return 1;
1384 
1385 	return diff < 0 ? -1 : 0;
1386 }
1387 
1388 
1389 //	#pragma mark - block_cache
1390 
1391 
1392 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1393 		bool readOnly)
1394 	:
1395 	hash(NULL),
1396 	fd(_fd),
1397 	max_blocks(numBlocks),
1398 	block_size(blockSize),
1399 	next_transaction_id(1),
1400 	last_transaction(NULL),
1401 	transaction_hash(NULL),
1402 	buffer_cache(NULL),
1403 	unused_block_count(0),
1404 	busy_reading_count(0),
1405 	busy_reading_waiters(false),
1406 	busy_writing_count(0),
1407 	busy_writing_waiters(0),
1408 	last_block_write(0),
1409 	last_block_write_duration(0),
1410 	num_dirty_blocks(0),
1411 	read_only(readOnly)
1412 {
1413 }
1414 
1415 
1416 /*! Should be called with the cache's lock held. */
1417 block_cache::~block_cache()
1418 {
1419 	unregister_low_resource_handler(&_LowMemoryHandler, this);
1420 
1421 	delete transaction_hash;
1422 	delete hash;
1423 
1424 	delete_object_cache(buffer_cache);
1425 
1426 	mutex_destroy(&lock);
1427 }
1428 
1429 
1430 status_t
1431 block_cache::Init()
1432 {
1433 	busy_reading_condition.Init(this, "cache block busy_reading");
1434 	busy_writing_condition.Init(this, "cache block busy writing");
1435 	condition_variable.Init(this, "cache transaction sync");
1436 	mutex_init(&lock, "block cache");
1437 
1438 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1439 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1440 	if (buffer_cache == NULL)
1441 		return B_NO_MEMORY;
1442 
1443 	hash = new BlockTable();
1444 	if (hash == NULL || hash->Init(1024) != B_OK)
1445 		return B_NO_MEMORY;
1446 
1447 	transaction_hash = new(std::nothrow) TransactionTable();
1448 	if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
1449 		return B_NO_MEMORY;
1450 
1451 	return register_low_resource_handler(&_LowMemoryHandler, this,
1452 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1453 			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1454 }
1455 
1456 
1457 void
1458 block_cache::Free(void* buffer)
1459 {
1460 	if (buffer != NULL)
1461 		object_cache_free(buffer_cache, buffer, 0);
1462 }
1463 
1464 
1465 void*
1466 block_cache::Allocate()
1467 {
1468 	void* block = object_cache_alloc(buffer_cache, 0);
1469 	if (block != NULL)
1470 		return block;
1471 
1472 	// recycle existing before allocating a new one
1473 	RemoveUnusedBlocks(100);
1474 
1475 	return object_cache_alloc(buffer_cache, 0);
1476 }
1477 
1478 
1479 void
1480 block_cache::FreeBlock(cached_block* block)
1481 {
1482 	Free(block->current_data);
1483 
1484 	if (block->original_data != NULL || block->parent_data != NULL) {
1485 		panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1486 			block->block_number, block->original_data, block->parent_data);
1487 	}
1488 
1489 #if BLOCK_CACHE_DEBUG_CHANGED
1490 	Free(block->compare);
1491 #endif
1492 
1493 	object_cache_free(sBlockCache, block, 0);
1494 }
1495 
1496 
1497 /*! Allocates a new block for \a blockNumber, ready for use */
1498 cached_block*
1499 block_cache::NewBlock(off_t blockNumber)
1500 {
1501 	cached_block* block = NULL;
1502 
1503 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1504 			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1505 		// recycle existing instead of allocating a new one
1506 		block = _GetUnusedBlock();
1507 	}
1508 	if (block == NULL) {
1509 		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1510 		if (block != NULL) {
1511 			block->current_data = Allocate();
1512 			if (block->current_data == NULL) {
1513 				object_cache_free(sBlockCache, block, 0);
1514 				return NULL;
1515 			}
1516 		} else {
1517 			TB(Error(this, blockNumber, "allocation failed"));
1518 			dprintf("block allocation failed, unused list is %sempty.\n",
1519 				unused_blocks.IsEmpty() ? "" : "not ");
1520 
1521 			// allocation failed, try to reuse an unused block
1522 			block = _GetUnusedBlock();
1523 			if (block == NULL) {
1524 				TB(Error(this, blockNumber, "get unused failed"));
1525 				FATAL(("could not allocate block!\n"));
1526 				return NULL;
1527 			}
1528 		}
1529 	}
1530 
1531 	block->block_number = blockNumber;
1532 	block->ref_count = 0;
1533 	block->last_accessed = 0;
1534 	block->transaction_next = NULL;
1535 	block->transaction = block->previous_transaction = NULL;
1536 	block->original_data = NULL;
1537 	block->parent_data = NULL;
1538 	block->busy_reading = false;
1539 	block->busy_writing = false;
1540 	block->is_writing = false;
1541 	block->is_dirty = false;
1542 	block->unused = false;
1543 	block->discard = false;
1544 	block->busy_reading_waiters = false;
1545 	block->busy_writing_waiters = false;
1546 #if BLOCK_CACHE_DEBUG_CHANGED
1547 	block->compare = NULL;
1548 #endif
1549 
1550 	return block;
1551 }
1552 
1553 
1554 void
1555 block_cache::FreeBlockParentData(cached_block* block)
1556 {
1557 	ASSERT(block->parent_data != NULL);
1558 	if (block->parent_data != block->current_data)
1559 		Free(block->parent_data);
1560 	block->parent_data = NULL;
1561 }
1562 
1563 
1564 void
1565 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1566 {
1567 	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1568 
1569 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1570 			cached_block* block = iterator.Next();) {
1571 		if (minSecondsOld >= block->LastAccess()) {
1572 			// The list is sorted by last access
1573 			break;
1574 		}
1575 		if (block->busy_reading || block->busy_writing)
1576 			continue;
1577 
1578 		TB(Flush(this, block));
1579 		TRACE(("  remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1580 			block->block_number, block->last_accessed));
1581 
1582 		// this can only happen if no transactions are used
1583 		if (block->is_dirty && !block->discard) {
1584 			if (block->busy_writing)
1585 				continue;
1586 
1587 			BlockWriter::WriteBlock(this, block);
1588 		}
1589 
1590 		// remove block from lists
1591 		iterator.Remove();
1592 		unused_block_count--;
1593 		RemoveBlock(block);
1594 
1595 		if (--count <= 0)
1596 			break;
1597 	}
1598 }
1599 
1600 
1601 void
1602 block_cache::RemoveBlock(cached_block* block)
1603 {
1604 	hash->Remove(block);
1605 	FreeBlock(block);
1606 }
1607 
1608 
1609 /*!	Discards the block from a transaction (this method must not be called
1610 	for blocks not part of a transaction).
1611 */
1612 void
1613 block_cache::DiscardBlock(cached_block* block)
1614 {
1615 	ASSERT(block->discard);
1616 	ASSERT(block->previous_transaction == NULL);
1617 
1618 	if (block->parent_data != NULL)
1619 		FreeBlockParentData(block);
1620 
1621 	if (block->original_data != NULL) {
1622 		Free(block->original_data);
1623 		block->original_data = NULL;
1624 	}
1625 
1626 	RemoveBlock(block);
1627 }
1628 
1629 
1630 void
1631 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1632 {
1633 	TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1634 
1635 	// free some blocks according to the low memory state
1636 	// (if there is enough memory left, we don't free any)
1637 
1638 	block_cache* cache = (block_cache*)data;
1639 	if (cache->unused_block_count <= 1)
1640 		return;
1641 
1642 	int32 free = 0;
1643 	int32 secondsOld = 0;
1644 	switch (level) {
1645 		case B_NO_LOW_RESOURCE:
1646 			return;
1647 		case B_LOW_RESOURCE_NOTE:
1648 			free = cache->unused_block_count / 4;
1649 			secondsOld = 120;
1650 			break;
1651 		case B_LOW_RESOURCE_WARNING:
1652 			free = cache->unused_block_count / 2;
1653 			secondsOld = 10;
1654 			break;
1655 		case B_LOW_RESOURCE_CRITICAL:
1656 			free = cache->unused_block_count - 1;
1657 			secondsOld = 0;
1658 			break;
1659 	}
1660 
1661 	MutexLocker locker(&cache->lock);
1662 
1663 	if (!locker.IsLocked()) {
1664 		// If our block_cache were deleted, it could be that we had
1665 		// been called before that deletion went through, therefore,
1666 		// acquiring its lock might fail.
1667 		return;
1668 	}
1669 
1670 #ifdef TRACE_BLOCK_CACHE
1671 	uint32 oldUnused = cache->unused_block_count;
1672 #endif
1673 
1674 	cache->RemoveUnusedBlocks(free, secondsOld);
1675 
1676 	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1677 		cache, oldUnused, cache->unused_block_count));
1678 }
1679 
1680 
1681 cached_block*
1682 block_cache::_GetUnusedBlock()
1683 {
1684 	TRACE(("block_cache: get unused block\n"));
1685 
1686 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1687 			cached_block* block = iterator.Next();) {
1688 		TB(Flush(this, block, true));
1689 		// this can only happen if no transactions are used
1690 		if (block->is_dirty && !block->busy_writing && !block->discard)
1691 			BlockWriter::WriteBlock(this, block);
1692 
1693 		// remove block from lists
1694 		iterator.Remove();
1695 		unused_block_count--;
1696 		hash->Remove(block);
1697 
1698 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1699 		block->unused = false;
1700 
1701 		// TODO: see if compare data is handled correctly here!
1702 #if BLOCK_CACHE_DEBUG_CHANGED
1703 		if (block->compare != NULL)
1704 			Free(block->compare);
1705 #endif
1706 		return block;
1707 	}
1708 
1709 	return NULL;
1710 }
1711 
1712 
1713 //	#pragma mark - private block functions
1714 
1715 
1716 /*!	Cache must be locked.
1717 */
1718 static void
1719 mark_block_busy_reading(block_cache* cache, cached_block* block)
1720 {
1721 	block->busy_reading = true;
1722 	cache->busy_reading_count++;
1723 }
1724 
1725 
1726 /*!	Cache must be locked.
1727 */
1728 static void
1729 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1730 {
1731 	block->busy_reading = false;
1732 	cache->busy_reading_count--;
1733 
1734 	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1735 		|| block->busy_reading_waiters) {
1736 		cache->busy_reading_waiters = false;
1737 		block->busy_reading_waiters = false;
1738 		cache->busy_reading_condition.NotifyAll();
1739 	}
1740 }
1741 
1742 
1743 /*!	Cache must be locked.
1744 */
1745 static void
1746 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1747 {
1748 	while (block->busy_reading) {
1749 		// wait for at least the specified block to be read in
1750 		ConditionVariableEntry entry;
1751 		cache->busy_reading_condition.Add(&entry);
1752 		block->busy_reading_waiters = true;
1753 
1754 		mutex_unlock(&cache->lock);
1755 
1756 		entry.Wait();
1757 
1758 		mutex_lock(&cache->lock);
1759 	}
1760 }
1761 
1762 
1763 /*!	Cache must be locked.
1764 */
1765 static void
1766 wait_for_busy_reading_blocks(block_cache* cache)
1767 {
1768 	while (cache->busy_reading_count != 0) {
1769 		// wait for all blocks to be read in
1770 		ConditionVariableEntry entry;
1771 		cache->busy_reading_condition.Add(&entry);
1772 		cache->busy_reading_waiters = true;
1773 
1774 		mutex_unlock(&cache->lock);
1775 
1776 		entry.Wait();
1777 
1778 		mutex_lock(&cache->lock);
1779 	}
1780 }
1781 
1782 
1783 /*!	Cache must be locked.
1784 */
1785 static void
1786 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1787 {
1788 	while (block->busy_writing) {
1789 		// wait for all blocks to be written back
1790 		ConditionVariableEntry entry;
1791 		cache->busy_writing_condition.Add(&entry);
1792 		block->busy_writing_waiters = true;
1793 
1794 		mutex_unlock(&cache->lock);
1795 
1796 		entry.Wait();
1797 
1798 		mutex_lock(&cache->lock);
1799 	}
1800 }
1801 
1802 
1803 /*!	Cache must be locked.
1804 */
1805 static void
1806 wait_for_busy_writing_blocks(block_cache* cache)
1807 {
1808 	while (cache->busy_writing_count != 0) {
1809 		// wait for all blocks to be written back
1810 		ConditionVariableEntry entry;
1811 		cache->busy_writing_condition.Add(&entry);
1812 		cache->busy_writing_waiters = true;
1813 
1814 		mutex_unlock(&cache->lock);
1815 
1816 		entry.Wait();
1817 
1818 		mutex_lock(&cache->lock);
1819 	}
1820 }
1821 
1822 
1823 /*!	Removes a reference from the specified \a block. If this was the last
1824 	reference, the block is moved into the unused list.
1825 	In low memory situations, it will also free some blocks from that list,
1826 	but not necessarily the \a block it just released.
1827 */
1828 static void
1829 put_cached_block(block_cache* cache, cached_block* block)
1830 {
1831 #if BLOCK_CACHE_DEBUG_CHANGED
1832 	if (!block->is_dirty && block->compare != NULL
1833 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1834 		dprintf("new block:\n");
1835 		dump_block((const char*)block->current_data, 256, "  ");
1836 		dprintf("unchanged block:\n");
1837 		dump_block((const char*)block->compare, 256, "  ");
1838 		BlockWriter::WriteBlock(cache, block);
1839 		panic("block_cache: supposed to be clean block was changed!\n");
1840 
1841 		cache->Free(block->compare);
1842 		block->compare = NULL;
1843 	}
1844 #endif
1845 	TB(Put(cache, block));
1846 
1847 	if (block->ref_count < 1) {
1848 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1849 		return;
1850 	}
1851 
1852 	if (--block->ref_count == 0
1853 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1854 		// This block is not used anymore, and not part of any transaction
1855 		block->is_writing = false;
1856 
1857 		if (block->discard) {
1858 			cache->RemoveBlock(block);
1859 		} else {
1860 			// put this block in the list of unused blocks
1861 			ASSERT(!block->unused);
1862 			block->unused = true;
1863 
1864 			ASSERT(block->original_data == NULL && block->parent_data == NULL);
1865 			cache->unused_blocks.Add(block);
1866 			cache->unused_block_count++;
1867 		}
1868 	}
1869 }
1870 
1871 
1872 static void
1873 put_cached_block(block_cache* cache, off_t blockNumber)
1874 {
1875 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1876 		panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1877 			blockNumber, cache->max_blocks - 1);
1878 	}
1879 
1880 	cached_block* block = cache->hash->Lookup(blockNumber);
1881 	if (block != NULL)
1882 		put_cached_block(cache, block);
1883 	else {
1884 		TB(Error(cache, blockNumber, "put unknown"));
1885 	}
1886 }
1887 
1888 
1889 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1890 	there, or reads it from the disk.
1891 	You need to have the cache locked when calling this function.
1892 
1893 	\param _allocated tells you whether or not a new block has been allocated
1894 		to satisfy your request.
1895 	\param readBlock if \c false, the block will not be read in case it was
1896 		not already in the cache. The block you retrieve may contain random
1897 		data. If \c true, the cache will be temporarily unlocked while the
1898 		block is read in.
1899 */
1900 static cached_block*
1901 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1902 	bool readBlock = true)
1903 {
1904 	ASSERT_LOCKED_MUTEX(&cache->lock);
1905 
1906 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1907 		panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1908 			blockNumber, cache->max_blocks - 1);
1909 		return NULL;
1910 	}
1911 
1912 retry:
1913 	cached_block* block = cache->hash->Lookup(blockNumber);
1914 	*_allocated = false;
1915 
1916 	if (block == NULL) {
1917 		// put block into cache
1918 		block = cache->NewBlock(blockNumber);
1919 		if (block == NULL)
1920 			return NULL;
1921 
1922 		cache->hash->Insert(block);
1923 		*_allocated = true;
1924 	} else if (block->busy_reading) {
1925 		// The block is currently busy_reading - wait and try again later
1926 		wait_for_busy_reading_block(cache, block);
1927 		goto retry;
1928 	}
1929 
1930 	if (block->unused) {
1931 		//TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
1932 		block->unused = false;
1933 		cache->unused_blocks.Remove(block);
1934 		cache->unused_block_count--;
1935 	}
1936 
1937 	if (*_allocated && readBlock) {
1938 		// read block into cache
1939 		int32 blockSize = cache->block_size;
1940 
1941 		mark_block_busy_reading(cache, block);
1942 		mutex_unlock(&cache->lock);
1943 
1944 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1945 			block->current_data, blockSize);
1946 
1947 		mutex_lock(&cache->lock);
1948 		if (bytesRead < blockSize) {
1949 			cache->RemoveBlock(block);
1950 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1951 
1952 			TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n",
1953 				blockNumber, bytesRead, strerror(errno)));
1954 			return NULL;
1955 		}
1956 		TB(Read(cache, block));
1957 
1958 		mark_block_unbusy_reading(cache, block);
1959 	}
1960 
1961 	block->ref_count++;
1962 	block->last_accessed = system_time() / 1000000L;
1963 
1964 	return block;
1965 }
1966 
1967 
1968 /*!	Returns the writable block data for the requested blockNumber.
1969 	If \a cleared is true, the block is not read from disk; an empty block
1970 	is returned.
1971 
1972 	This is the only method to insert a block into a transaction. It makes
1973 	sure that the previous block contents are preserved in that case.
1974 */
1975 static void*
1976 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1977 	off_t length, int32 transactionID, bool cleared)
1978 {
1979 	TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
1980 		blockNumber, transactionID));
1981 
1982 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1983 		panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1984 			blockNumber, cache->max_blocks - 1);
1985 	}
1986 
1987 	bool allocated;
1988 	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1989 		!cleared);
1990 	if (block == NULL)
1991 		return NULL;
1992 
1993 	if (block->busy_writing)
1994 		wait_for_busy_writing_block(cache, block);
1995 
1996 	block->discard = false;
1997 
1998 	// if there is no transaction support, we just return the current block
1999 	if (transactionID == -1) {
2000 		if (cleared) {
2001 			mark_block_busy_reading(cache, block);
2002 			mutex_unlock(&cache->lock);
2003 
2004 			memset(block->current_data, 0, cache->block_size);
2005 
2006 			mutex_lock(&cache->lock);
2007 			mark_block_unbusy_reading(cache, block);
2008 		}
2009 
2010 		block->is_writing = true;
2011 
2012 		if (!block->is_dirty) {
2013 			cache->num_dirty_blocks++;
2014 			block->is_dirty = true;
2015 				// mark the block as dirty
2016 		}
2017 
2018 		TB(Get(cache, block));
2019 		return block->current_data;
2020 	}
2021 
2022 	cache_transaction* transaction = block->transaction;
2023 
2024 	if (transaction != NULL && transaction->id != transactionID) {
2025 		// TODO: we have to wait here until the other transaction is done.
2026 		//	Maybe we should even panic, since we can't prevent any deadlocks.
2027 		panic("get_writable_cached_block(): asked to get busy writable block "
2028 			"(transaction %" B_PRId32 ")\n", block->transaction->id);
2029 		put_cached_block(cache, block);
2030 		return NULL;
2031 	}
2032 	if (transaction == NULL && transactionID != -1) {
2033 		// get new transaction
2034 		transaction = lookup_transaction(cache, transactionID);
2035 		if (transaction == NULL) {
2036 			panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
2037 				transactionID);
2038 			put_cached_block(cache, block);
2039 			return NULL;
2040 		}
2041 		if (!transaction->open) {
2042 			panic("get_writable_cached_block(): transaction already done!\n");
2043 			put_cached_block(cache, block);
2044 			return NULL;
2045 		}
2046 
2047 		block->transaction = transaction;
2048 
2049 		// attach the block to the transaction block list
2050 		block->transaction_next = transaction->first_block;
2051 		transaction->first_block = block;
2052 		transaction->num_blocks++;
2053 	}
2054 	if (transaction != NULL)
2055 		transaction->last_used = system_time();
2056 
2057 	bool wasUnchanged = block->original_data == NULL
2058 		|| block->previous_transaction != NULL;
2059 
2060 	if (!(allocated && cleared) && block->original_data == NULL) {
2061 		// we already have data, so we need to preserve it
2062 		block->original_data = cache->Allocate();
2063 		if (block->original_data == NULL) {
2064 			TB(Error(cache, blockNumber, "allocate original failed"));
2065 			FATAL(("could not allocate original_data\n"));
2066 			put_cached_block(cache, block);
2067 			return NULL;
2068 		}
2069 
2070 		mark_block_busy_reading(cache, block);
2071 		mutex_unlock(&cache->lock);
2072 
2073 		memcpy(block->original_data, block->current_data, cache->block_size);
2074 
2075 		mutex_lock(&cache->lock);
2076 		mark_block_unbusy_reading(cache, block);
2077 	}
2078 	if (block->parent_data == block->current_data) {
2079 		// remember any previous contents for the parent transaction
2080 		block->parent_data = cache->Allocate();
2081 		if (block->parent_data == NULL) {
2082 			// TODO: maybe we should just continue the current transaction in
2083 			// this case...
2084 			TB(Error(cache, blockNumber, "allocate parent failed"));
2085 			FATAL(("could not allocate parent\n"));
2086 			put_cached_block(cache, block);
2087 			return NULL;
2088 		}
2089 
2090 		mark_block_busy_reading(cache, block);
2091 		mutex_unlock(&cache->lock);
2092 
2093 		memcpy(block->parent_data, block->current_data, cache->block_size);
2094 
2095 		mutex_lock(&cache->lock);
2096 		mark_block_unbusy_reading(cache, block);
2097 
2098 		transaction->sub_num_blocks++;
2099 	} else if (transaction != NULL && transaction->has_sub_transaction
2100 		&& block->parent_data == NULL && wasUnchanged)
2101 		transaction->sub_num_blocks++;
2102 
2103 	if (cleared) {
2104 		mark_block_busy_reading(cache, block);
2105 		mutex_unlock(&cache->lock);
2106 
2107 		memset(block->current_data, 0, cache->block_size);
2108 
2109 		mutex_lock(&cache->lock);
2110 		mark_block_unbusy_reading(cache, block);
2111 	}
2112 
2113 	block->is_dirty = true;
2114 	TB(Get(cache, block));
2115 	TB2(BlockData(cache, block, "get writable"));
2116 
2117 	return block->current_data;
2118 }
2119 
2120 
2121 #if DEBUG_BLOCK_CACHE
2122 
2123 
2124 static void
2125 dump_block(cached_block* block)
2126 {
2127 	kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2128 		" %c%c%c%c%c%c %08lx %08lx\n",
2129 		(addr_t)block, block->block_number,
2130 		(addr_t)block->current_data, (addr_t)block->original_data,
2131 		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2132 		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2133 		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2134 		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2135 		(addr_t)block->transaction,
2136 		(addr_t)block->previous_transaction);
2137 }
2138 
2139 
2140 static void
2141 dump_block_long(cached_block* block)
2142 {
2143 	kprintf("BLOCK %p\n", block);
2144 	kprintf(" current data:  %p\n", block->current_data);
2145 	kprintf(" original data: %p\n", block->original_data);
2146 	kprintf(" parent data:   %p\n", block->parent_data);
2147 #if BLOCK_CACHE_DEBUG_CHANGED
2148 	kprintf(" compare data:  %p\n", block->compare);
2149 #endif
2150 	kprintf(" ref_count:     %" B_PRId32 "\n", block->ref_count);
2151 	kprintf(" accessed:      %" B_PRId32 "\n", block->LastAccess());
2152 	kprintf(" flags:        ");
2153 	if (block->busy_reading)
2154 		kprintf(" busy_reading");
2155 	if (block->busy_writing)
2156 		kprintf(" busy_writing");
2157 	if (block->is_writing)
2158 		kprintf(" is-writing");
2159 	if (block->is_dirty)
2160 		kprintf(" is-dirty");
2161 	if (block->unused)
2162 		kprintf(" unused");
2163 	if (block->discard)
2164 		kprintf(" discard");
2165 	kprintf("\n");
2166 	if (block->transaction != NULL) {
2167 		kprintf(" transaction:   %p (%" B_PRId32 ")\n", block->transaction,
2168 			block->transaction->id);
2169 		if (block->transaction_next != NULL) {
2170 			kprintf(" next in transaction: %" B_PRIdOFF "\n",
2171 				block->transaction_next->block_number);
2172 		}
2173 	}
2174 	if (block->previous_transaction != NULL) {
2175 		kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2176 			block->previous_transaction,
2177 			block->previous_transaction->id);
2178 	}
2179 
2180 	set_debug_variable("_current", (addr_t)block->current_data);
2181 	set_debug_variable("_original", (addr_t)block->original_data);
2182 	set_debug_variable("_parent", (addr_t)block->parent_data);
2183 }
2184 
2185 
2186 static int
2187 dump_cached_block(int argc, char** argv)
2188 {
2189 	if (argc != 2) {
2190 		kprintf("usage: %s <block-address>\n", argv[0]);
2191 		return 0;
2192 	}
2193 
2194 	dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2195 	return 0;
2196 }
2197 
2198 
2199 static int
2200 dump_cache(int argc, char** argv)
2201 {
2202 	bool showTransactions = false;
2203 	bool showBlocks = false;
2204 	int32 i = 1;
2205 	while (argv[i] != NULL && argv[i][0] == '-') {
2206 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
2207 			switch (arg[0]) {
2208 				case 'b':
2209 					showBlocks = true;
2210 					break;
2211 				case 't':
2212 					showTransactions = true;
2213 					break;
2214 				default:
2215 					print_debugger_command_usage(argv[0]);
2216 					return 0;
2217 			}
2218 		}
2219 		i++;
2220 	}
2221 
2222 	if (i >= argc) {
2223 		print_debugger_command_usage(argv[0]);
2224 		return 0;
2225 	}
2226 
2227 	block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2228 	if (cache == NULL) {
2229 		kprintf("invalid cache address\n");
2230 		return 0;
2231 	}
2232 
2233 	off_t blockNumber = -1;
2234 	if (i + 1 < argc) {
2235 		blockNumber = parse_expression(argv[i + 1]);
2236 		cached_block* block = cache->hash->Lookup(blockNumber);
2237 		if (block != NULL)
2238 			dump_block_long(block);
2239 		else
2240 			kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2241 		return 0;
2242 	}
2243 
2244 	kprintf("BLOCK CACHE: %p\n", cache);
2245 
2246 	kprintf(" fd:           %d\n", cache->fd);
2247 	kprintf(" max_blocks:   %" B_PRIdOFF "\n", cache->max_blocks);
2248 	kprintf(" block_size:   %zu\n", cache->block_size);
2249 	kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2250 	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2251 	kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2252 		cache->busy_reading_waiters ? "has" : "no");
2253 	kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2254 		cache->busy_writing_waiters ? "has" : "no");
2255 
2256 	if (!cache->pending_notifications.IsEmpty()) {
2257 		kprintf(" pending notifications:\n");
2258 
2259 		NotificationList::Iterator iterator
2260 			= cache->pending_notifications.GetIterator();
2261 		while (iterator.HasNext()) {
2262 			cache_notification* notification = iterator.Next();
2263 
2264 			kprintf("  %p %5" B_PRIx32 " %p - %p\n", notification,
2265 				notification->events_pending, notification->hook,
2266 				notification->data);
2267 		}
2268 	}
2269 
2270 	if (showTransactions) {
2271 		kprintf(" transactions:\n");
2272 		kprintf("address       id state  blocks  main   sub\n");
2273 
2274 		TransactionTable::Iterator iterator(cache->transaction_hash);
2275 
2276 		while (iterator.HasNext()) {
2277 			cache_transaction* transaction = iterator.Next();
2278 			kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
2279 				B_PRId32 "\n", transaction, transaction->id,
2280 				transaction->open ? "open" : "closed",
2281 				transaction->num_blocks, transaction->main_num_blocks,
2282 				transaction->sub_num_blocks);
2283 		}
2284 	}
2285 
2286 	if (showBlocks) {
2287 		kprintf(" blocks:\n");
2288 		kprintf("address  block no. current  original parent    refs access "
2289 			"flags transact prev. trans\n");
2290 	}
2291 
2292 	uint32 referenced = 0;
2293 	uint32 count = 0;
2294 	uint32 dirty = 0;
2295 	uint32 discarded = 0;
2296 	BlockTable::Iterator iterator(cache->hash);
2297 	while (iterator.HasNext()) {
2298 		cached_block* block = iterator.Next();
2299 		if (showBlocks)
2300 			dump_block(block);
2301 
2302 		if (block->is_dirty)
2303 			dirty++;
2304 		if (block->discard)
2305 			discarded++;
2306 		if (block->ref_count)
2307 			referenced++;
2308 		count++;
2309 	}
2310 
2311 	kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
2312 		" discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
2313 		" in unused.\n",
2314 		count, dirty, discarded, referenced, cache->busy_reading_count,
2315 		cache->unused_block_count);
2316 	return 0;
2317 }
2318 
2319 
2320 static int
2321 dump_transaction(int argc, char** argv)
2322 {
2323 	bool showBlocks = false;
2324 	int i = 1;
2325 	if (argc > 1 && !strcmp(argv[1], "-b")) {
2326 		showBlocks = true;
2327 		i++;
2328 	}
2329 
2330 	if (argc - i < 1 || argc - i > 2) {
2331 		print_debugger_command_usage(argv[0]);
2332 		return 0;
2333 	}
2334 
2335 	cache_transaction* transaction = NULL;
2336 
2337 	if (argc - i == 1) {
2338 		transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2339 	} else {
2340 		block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2341 		int32 id = parse_expression(argv[i + 1]);
2342 		transaction = lookup_transaction(cache, id);
2343 		if (transaction == NULL) {
2344 			kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2345 			return 0;
2346 		}
2347 	}
2348 
2349 	kprintf("TRANSACTION %p\n", transaction);
2350 
2351 	kprintf(" id:             %" B_PRId32 "\n", transaction->id);
2352 	kprintf(" num block:      %" B_PRId32 "\n", transaction->num_blocks);
2353 	kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2354 	kprintf(" sub num block:  %" B_PRId32 "\n", transaction->sub_num_blocks);
2355 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
2356 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
2357 	kprintf(" idle:           %" B_PRId64 " secs\n",
2358 		(system_time() - transaction->last_used) / 1000000);
2359 
2360 	kprintf(" listeners:\n");
2361 
2362 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2363 	while (iterator.HasNext()) {
2364 		cache_listener* listener = iterator.Next();
2365 
2366 		kprintf("  %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2367 			listener->hook, listener->data);
2368 	}
2369 
2370 	if (!showBlocks)
2371 		return 0;
2372 
2373 	kprintf(" blocks:\n");
2374 	kprintf("address  block no. current  original parent    refs access "
2375 		"flags transact prev. trans\n");
2376 
2377 	cached_block* block = transaction->first_block;
2378 	while (block != NULL) {
2379 		dump_block(block);
2380 		block = block->transaction_next;
2381 	}
2382 
2383 	kprintf("--\n");
2384 
2385 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2386 	while (blockIterator.HasNext()) {
2387 		block = blockIterator.Next();
2388 		dump_block(block);
2389 	}
2390 
2391 	return 0;
2392 }
2393 
2394 
2395 static int
2396 dump_caches(int argc, char** argv)
2397 {
2398 	kprintf("Block caches:\n");
2399 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2400 	while (i.HasNext()) {
2401 		block_cache* cache = i.Next();
2402 		if (cache == (block_cache*)&sMarkCache)
2403 			continue;
2404 
2405 		kprintf("  %p\n", cache);
2406 	}
2407 
2408 	return 0;
2409 }
2410 
2411 
2412 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2413 static int
2414 dump_block_data(int argc, char** argv)
2415 {
2416 	using namespace BlockTracing;
2417 
2418 	// Determine which blocks to show
2419 
2420 	bool printStackTrace = true;
2421 	uint32 which = 0;
2422 	int32 i = 1;
2423 	while (i < argc && argv[i][0] == '-') {
2424 		char* arg = &argv[i][1];
2425 		while (arg[0]) {
2426 			switch (arg[0]) {
2427 				case 'c':
2428 					which |= BlockData::kCurrent;
2429 					break;
2430 				case 'p':
2431 					which |= BlockData::kParent;
2432 					break;
2433 				case 'o':
2434 					which |= BlockData::kOriginal;
2435 					break;
2436 
2437 				default:
2438 					kprintf("invalid block specifier (only o/c/p are "
2439 						"allowed).\n");
2440 					return 0;
2441 			}
2442 			arg++;
2443 		}
2444 
2445 		i++;
2446 	}
2447 	if (which == 0)
2448 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2449 
2450 	if (i == argc) {
2451 		print_debugger_command_usage(argv[0]);
2452 		return 0;
2453 	}
2454 
2455 	// Get the range of blocks to print
2456 
2457 	int64 from = parse_expression(argv[i]);
2458 	int64 to = from;
2459 	if (argc > i + 1)
2460 		to = parse_expression(argv[i + 1]);
2461 	if (to < from)
2462 		to = from;
2463 
2464 	uint32 offset = 0;
2465 	uint32 size = LONG_MAX;
2466 	if (argc > i + 2)
2467 		offset = parse_expression(argv[i + 2]);
2468 	if (argc > i + 3)
2469 		size = parse_expression(argv[i + 3]);
2470 
2471 	TraceEntryIterator iterator;
2472 	iterator.MoveTo(from - 1);
2473 
2474 	static char sBuffer[1024];
2475 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2476 
2477 	while (TraceEntry* entry = iterator.Next()) {
2478 		int32 index = iterator.Index();
2479 		if (index > to)
2480 			break;
2481 
2482 		Action* action = dynamic_cast<Action*>(entry);
2483 		if (action != NULL) {
2484 			out.Clear();
2485 			out.DumpEntry(action);
2486 			continue;
2487 		}
2488 
2489 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
2490 		if (blockData == NULL)
2491 			continue;
2492 
2493 		out.Clear();
2494 
2495 		const char* dump = out.DumpEntry(entry);
2496 		int length = strlen(dump);
2497 		if (length > 0 && dump[length - 1] == '\n')
2498 			length--;
2499 
2500 		kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2501 
2502 		if (printStackTrace) {
2503 			out.Clear();
2504 			entry->DumpStackTrace(out);
2505 			if (out.Size() > 0)
2506 				kputs(out.Buffer());
2507 		}
2508 
2509 		blockData->DumpBlocks(which, offset, size);
2510 	}
2511 
2512 	return 0;
2513 }
2514 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
2515 
2516 
2517 #endif	// DEBUG_BLOCK_CACHE
2518 
2519 
2520 /*!	Traverses through the block_cache list, and returns one cache after the
2521 	other. The cache returned is automatically locked when you get it, and
2522 	unlocked with the next call to this function. Ignores caches that are in
2523 	deletion state.
2524 	Returns \c NULL when the end of the list is reached.
2525 */
2526 static block_cache*
2527 get_next_locked_block_cache(block_cache* last)
2528 {
2529 	MutexLocker _(sCachesLock);
2530 
2531 	block_cache* cache;
2532 	if (last != NULL) {
2533 		mutex_unlock(&last->lock);
2534 
2535 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
2536 		sCaches.Remove((block_cache*)&sMarkCache);
2537 	} else
2538 		cache = sCaches.Head();
2539 
2540 	if (cache != NULL) {
2541 		mutex_lock(&cache->lock);
2542 		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2543 	}
2544 
2545 	return cache;
2546 }
2547 
2548 
2549 /*!	Background thread that continuously checks for pending notifications of
2550 	all caches.
2551 	Every two seconds, it will also write back up to 64 blocks per cache.
2552 */
2553 static status_t
2554 block_notifier_and_writer(void* /*data*/)
2555 {
2556 	const bigtime_t kDefaultTimeout = 2000000LL;
2557 	bigtime_t timeout = kDefaultTimeout;
2558 
2559 	while (true) {
2560 		bigtime_t start = system_time();
2561 
2562 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
2563 			B_RELATIVE_TIMEOUT, timeout);
2564 		if (status == B_OK) {
2565 			flush_pending_notifications();
2566 			timeout -= system_time() - start;
2567 			continue;
2568 		}
2569 
2570 		// Write 64 blocks of each block_cache roughly every 2 seconds,
2571 		// potentially more or less depending on congestion and drive speeds
2572 		// (usually much less.) We do not want to queue everything at once
2573 		// because a future transaction might then get held up waiting for
2574 		// a specific block to be written.
2575 		timeout = kDefaultTimeout;
2576 		size_t usedMemory;
2577 		object_cache_get_usage(sBlockCache, &usedMemory);
2578 
2579 		block_cache* cache = NULL;
2580 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2581 			// Give some breathing room: wait 2x the length of the potential
2582 			// maximum block count-sized write between writes, and also skip
2583 			// if there are more than 16 blocks currently being written.
2584 			const bigtime_t next = cache->last_block_write
2585 					+ cache->last_block_write_duration * 2 * 64;
2586 			if (cache->busy_writing_count > 16 || system_time() < next) {
2587 				if (cache->last_block_write_duration > 0) {
2588 					timeout = min_c(timeout,
2589 						cache->last_block_write_duration * 2 * 64);
2590 				}
2591 				continue;
2592 			}
2593 
2594 			BlockWriter writer(cache, 64);
2595 			bool hasMoreBlocks = false;
2596 
2597 			size_t cacheUsedMemory;
2598 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2599 			usedMemory += cacheUsedMemory;
2600 
2601 			if (cache->num_dirty_blocks) {
2602 				// This cache is not using transactions, we'll scan the blocks
2603 				// directly
2604 				BlockTable::Iterator iterator(cache->hash);
2605 
2606 				while (iterator.HasNext()) {
2607 					cached_block* block = iterator.Next();
2608 					if (block->CanBeWritten() && !writer.Add(block)) {
2609 						hasMoreBlocks = true;
2610 						break;
2611 					}
2612 				}
2613 			} else {
2614 				TransactionTable::Iterator iterator(cache->transaction_hash);
2615 
2616 				while (iterator.HasNext()) {
2617 					cache_transaction* transaction = iterator.Next();
2618 					if (transaction->open) {
2619 						if (system_time() > transaction->last_used
2620 								+ kTransactionIdleTime) {
2621 							// Transaction is open but idle
2622 							notify_transaction_listeners(cache, transaction,
2623 								TRANSACTION_IDLE);
2624 						}
2625 						continue;
2626 					}
2627 
2628 					bool hasLeftOvers;
2629 						// we ignore this one
2630 					if (!writer.Add(transaction, hasLeftOvers)) {
2631 						hasMoreBlocks = true;
2632 						break;
2633 					}
2634 				}
2635 			}
2636 
2637 			writer.Write();
2638 
2639 			if (hasMoreBlocks && cache->last_block_write_duration > 0) {
2640 				// There are probably still more blocks that we could write, so
2641 				// see if we can decrease the timeout.
2642 				timeout = min_c(timeout,
2643 					cache->last_block_write_duration * 2 * 64);
2644 			}
2645 
2646 			if ((block_cache_used_memory() / B_PAGE_SIZE)
2647 					> vm_page_num_pages() / 2) {
2648 				// Try to reduce memory usage to half of the available
2649 				// RAM at maximum
2650 				cache->RemoveUnusedBlocks(1000, 10);
2651 			}
2652 		}
2653 
2654 		MutexLocker _(sCachesMemoryUseLock);
2655 		sUsedMemory = usedMemory;
2656 	}
2657 
2658 	// never can get here
2659 	return B_OK;
2660 }
2661 
2662 
2663 /*!	Notify function for wait_for_notifications(). */
2664 static void
2665 notify_sync(int32 transactionID, int32 event, void* _cache)
2666 {
2667 	block_cache* cache = (block_cache*)_cache;
2668 
2669 	cache->condition_variable.NotifyOne();
2670 }
2671 
2672 
2673 /*!	Must be called with the sCachesLock held. */
2674 static bool
2675 is_valid_cache(block_cache* cache)
2676 {
2677 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2678 
2679 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2680 	while (iterator.HasNext()) {
2681 		if (cache == iterator.Next())
2682 			return true;
2683 	}
2684 
2685 	return false;
2686 }
2687 
2688 
2689 /*!	Waits until all pending notifications are carried out.
2690 	Safe to be called from the block writer/notifier thread.
2691 	You must not hold the \a cache lock when calling this function.
2692 */
2693 static void
2694 wait_for_notifications(block_cache* cache)
2695 {
2696 	MutexLocker locker(sCachesLock);
2697 
2698 	if (find_thread(NULL) == sNotifierWriterThread) {
2699 		// We're the notifier thread, don't wait, but flush all pending
2700 		// notifications directly.
2701 		if (is_valid_cache(cache))
2702 			flush_pending_notifications(cache);
2703 		return;
2704 	}
2705 
2706 	// add sync notification
2707 	cache_notification notification;
2708 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2709 		cache);
2710 
2711 	ConditionVariableEntry entry;
2712 	cache->condition_variable.Add(&entry);
2713 
2714 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2715 	locker.Unlock();
2716 
2717 	// wait for notification hook to be called
2718 	entry.Wait();
2719 }
2720 
2721 
2722 status_t
2723 block_cache_init(void)
2724 {
2725 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2726 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2727 	if (sBlockCache == NULL)
2728 		return B_NO_MEMORY;
2729 
2730 	sCacheNotificationCache = create_object_cache("cache notifications",
2731 		sizeof(cache_listener), 8, NULL, NULL, NULL);
2732 	if (sCacheNotificationCache == NULL)
2733 		return B_NO_MEMORY;
2734 
2735 	new (&sCaches) DoublyLinkedList<block_cache>;
2736 		// manually call constructor
2737 
2738 	sEventSemaphore = create_sem(0, "block cache event");
2739 	if (sEventSemaphore < B_OK)
2740 		return sEventSemaphore;
2741 
2742 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2743 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2744 	if (sNotifierWriterThread >= B_OK)
2745 		resume_thread(sNotifierWriterThread);
2746 
2747 #if DEBUG_BLOCK_CACHE
2748 	add_debugger_command_etc("block_caches", &dump_caches,
2749 		"dumps all block caches", "\n", 0);
2750 	add_debugger_command_etc("block_cache", &dump_cache,
2751 		"dumps a specific block cache",
2752 		"[-bt] <cache-address> [block-number]\n"
2753 		"  -t lists the transactions\n"
2754 		"  -b lists all blocks\n", 0);
2755 	add_debugger_command("cached_block", &dump_cached_block,
2756 		"dumps the specified cached block");
2757 	add_debugger_command_etc("transaction", &dump_transaction,
2758 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2759 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2760 		"  -b lists all blocks that are part of this transaction\n", 0);
2761 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2762 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2763 		"dumps the data blocks logged for the actions",
2764 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2765 		"If no data specifier is used, all blocks are shown by default.\n"
2766 		" -c       the current data is shown, if available.\n"
2767 		" -p       the parent data is shown, if available.\n"
2768 		" -o       the original data is shown, if available.\n"
2769 		" <from>   first index of tracing entries to show.\n"
2770 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2771 		" <offset> the offset of the block data.\n"
2772 		" <from>   the size of the block data that is dumped\n", 0);
2773 #	endif
2774 #endif	// DEBUG_BLOCK_CACHE
2775 
2776 	return B_OK;
2777 }
2778 
2779 
2780 size_t
2781 block_cache_used_memory(void)
2782 {
2783 	MutexLocker _(sCachesMemoryUseLock);
2784 	return sUsedMemory;
2785 }
2786 
2787 
2788 //	#pragma mark - public transaction API
2789 
2790 
2791 int32
2792 cache_start_transaction(void* _cache)
2793 {
2794 	block_cache* cache = (block_cache*)_cache;
2795 	TransactionLocker locker(cache);
2796 
2797 	if (cache->last_transaction && cache->last_transaction->open) {
2798 		panic("last transaction (%" B_PRId32 ") still open!\n",
2799 			cache->last_transaction->id);
2800 	}
2801 
2802 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2803 	if (transaction == NULL)
2804 		return B_NO_MEMORY;
2805 
2806 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2807 	cache->last_transaction = transaction;
2808 
2809 	TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
2810 	T(Action("start", cache, transaction));
2811 
2812 	cache->transaction_hash->Insert(transaction);
2813 
2814 	return transaction->id;
2815 }
2816 
2817 
2818 status_t
2819 cache_sync_transaction(void* _cache, int32 id)
2820 {
2821 	block_cache* cache = (block_cache*)_cache;
2822 	bool hadBusy;
2823 
2824 	TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
2825 
2826 	do {
2827 		TransactionLocker locker(cache);
2828 		hadBusy = false;
2829 
2830 		BlockWriter writer(cache);
2831 		TransactionTable::Iterator iterator(cache->transaction_hash);
2832 
2833 		while (iterator.HasNext()) {
2834 			// close all earlier transactions which haven't been closed yet
2835 			cache_transaction* transaction = iterator.Next();
2836 
2837 			if (transaction->busy_writing_count != 0) {
2838 				hadBusy = true;
2839 				continue;
2840 			}
2841 			if (transaction->id <= id && !transaction->open) {
2842 				// write back all of their remaining dirty blocks
2843 				T(Action("sync", cache, transaction));
2844 
2845 				bool hasLeftOvers;
2846 				writer.Add(transaction, hasLeftOvers);
2847 
2848 				if (hasLeftOvers) {
2849 					// This transaction contains blocks that a previous
2850 					// transaction is trying to write back in this write run
2851 					hadBusy = true;
2852 				}
2853 			}
2854 		}
2855 
2856 		status_t status = writer.Write();
2857 		if (status != B_OK)
2858 			return status;
2859 	} while (hadBusy);
2860 
2861 	wait_for_notifications(cache);
2862 		// make sure that all pending TRANSACTION_WRITTEN notifications
2863 		// are handled after we return
2864 	return B_OK;
2865 }
2866 
2867 
2868 status_t
2869 cache_end_transaction(void* _cache, int32 id,
2870 	transaction_notification_hook hook, void* data)
2871 {
2872 	block_cache* cache = (block_cache*)_cache;
2873 	TransactionLocker locker(cache);
2874 
2875 	TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
2876 
2877 	cache_transaction* transaction = lookup_transaction(cache, id);
2878 	if (transaction == NULL) {
2879 		panic("cache_end_transaction(): invalid transaction ID\n");
2880 		return B_BAD_VALUE;
2881 	}
2882 
2883 	// Write back all pending transaction blocks
2884 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2885 	if (status != B_OK)
2886 		return status;
2887 
2888 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2889 
2890 	if (hook != NULL
2891 		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2892 			hook, data) != B_OK) {
2893 		return B_NO_MEMORY;
2894 	}
2895 
2896 	T(Action("end", cache, transaction));
2897 
2898 	// iterate through all blocks and free the unchanged original contents
2899 
2900 	cached_block* next;
2901 	for (cached_block* block = transaction->first_block; block != NULL;
2902 			block = next) {
2903 		next = block->transaction_next;
2904 		ASSERT(block->previous_transaction == NULL);
2905 
2906 		if (block->discard) {
2907 			// This block has been discarded in the transaction
2908 			cache->DiscardBlock(block);
2909 			transaction->num_blocks--;
2910 			continue;
2911 		}
2912 
2913 		if (block->original_data != NULL) {
2914 			cache->Free(block->original_data);
2915 			block->original_data = NULL;
2916 		}
2917 		if (block->parent_data != NULL) {
2918 			ASSERT(transaction->has_sub_transaction);
2919 			cache->FreeBlockParentData(block);
2920 		}
2921 
2922 		// move the block to the previous transaction list
2923 		transaction->blocks.Add(block);
2924 
2925 		block->previous_transaction = transaction;
2926 		block->transaction_next = NULL;
2927 		block->transaction = NULL;
2928 	}
2929 
2930 	transaction->open = false;
2931 	return B_OK;
2932 }
2933 
2934 
2935 status_t
2936 cache_abort_transaction(void* _cache, int32 id)
2937 {
2938 	block_cache* cache = (block_cache*)_cache;
2939 	TransactionLocker locker(cache);
2940 
2941 	TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
2942 
2943 	cache_transaction* transaction = lookup_transaction(cache, id);
2944 	if (transaction == NULL) {
2945 		panic("cache_abort_transaction(): invalid transaction ID\n");
2946 		return B_BAD_VALUE;
2947 	}
2948 
2949 	T(Abort(cache, transaction));
2950 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2951 
2952 	// iterate through all blocks and restore their original contents
2953 
2954 	cached_block* block = transaction->first_block;
2955 	cached_block* next;
2956 	for (; block != NULL; block = next) {
2957 		next = block->transaction_next;
2958 
2959 		if (block->original_data != NULL) {
2960 			TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
2961 				"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
2962 			memcpy(block->current_data, block->original_data,
2963 				cache->block_size);
2964 			cache->Free(block->original_data);
2965 			block->original_data = NULL;
2966 		}
2967 		if (transaction->has_sub_transaction && block->parent_data != NULL)
2968 			cache->FreeBlockParentData(block);
2969 
2970 		block->transaction_next = NULL;
2971 		block->transaction = NULL;
2972 		block->discard = false;
2973 		if (block->previous_transaction == NULL)
2974 			block->is_dirty = false;
2975 	}
2976 
2977 	cache->transaction_hash->Remove(transaction);
2978 	delete_transaction(cache, transaction);
2979 	return B_OK;
2980 }
2981 
2982 
2983 /*!	Acknowledges the current parent transaction, and starts a new transaction
2984 	from its sub transaction.
2985 	The new transaction also gets a new transaction ID.
2986 */
2987 int32
2988 cache_detach_sub_transaction(void* _cache, int32 id,
2989 	transaction_notification_hook hook, void* data)
2990 {
2991 	block_cache* cache = (block_cache*)_cache;
2992 	TransactionLocker locker(cache);
2993 
2994 	TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
2995 
2996 	cache_transaction* transaction = lookup_transaction(cache, id);
2997 	if (transaction == NULL) {
2998 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2999 		return B_BAD_VALUE;
3000 	}
3001 	if (!transaction->has_sub_transaction)
3002 		return B_BAD_VALUE;
3003 
3004 	// iterate through all blocks and free the unchanged original contents
3005 
3006 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
3007 	if (status != B_OK)
3008 		return status;
3009 
3010 	// create a new transaction for the sub transaction
3011 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
3012 	if (newTransaction == NULL)
3013 		return B_NO_MEMORY;
3014 
3015 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
3016 	T(Detach(cache, transaction, newTransaction));
3017 
3018 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3019 
3020 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
3021 			data) != B_OK) {
3022 		delete newTransaction;
3023 		return B_NO_MEMORY;
3024 	}
3025 
3026 	cached_block* last = NULL;
3027 	cached_block* next;
3028 	for (cached_block* block = transaction->first_block; block != NULL;
3029 			block = next) {
3030 		next = block->transaction_next;
3031 		ASSERT(block->previous_transaction == NULL);
3032 
3033 		if (block->discard) {
3034 			cache->DiscardBlock(block);
3035 			transaction->main_num_blocks--;
3036 			continue;
3037 		}
3038 
3039 		if (block->parent_data != NULL) {
3040 			// The block changed in the parent - free the original data, since
3041 			// they will be replaced by what is in current.
3042 			ASSERT(block->original_data != NULL);
3043 			cache->Free(block->original_data);
3044 
3045 			if (block->parent_data != block->current_data) {
3046 				// The block had been changed in both transactions
3047 				block->original_data = block->parent_data;
3048 			} else {
3049 				// The block has only been changed in the parent
3050 				block->original_data = NULL;
3051 			}
3052 
3053 			// move the block to the previous transaction list
3054 			transaction->blocks.Add(block);
3055 			block->previous_transaction = transaction;
3056 			block->parent_data = NULL;
3057 		}
3058 
3059 		if (block->original_data != NULL) {
3060 			// This block had been changed in the current sub transaction,
3061 			// we need to move this block over to the new transaction.
3062 			ASSERT(block->parent_data == NULL);
3063 
3064 			if (last == NULL)
3065 				newTransaction->first_block = block;
3066 			else
3067 				last->transaction_next = block;
3068 
3069 			block->transaction = newTransaction;
3070 			last = block;
3071 		} else
3072 			block->transaction = NULL;
3073 
3074 		block->transaction_next = NULL;
3075 	}
3076 
3077 	newTransaction->num_blocks = transaction->sub_num_blocks;
3078 
3079 	transaction->open = false;
3080 	transaction->has_sub_transaction = false;
3081 	transaction->num_blocks = transaction->main_num_blocks;
3082 	transaction->sub_num_blocks = 0;
3083 
3084 	cache->transaction_hash->Insert(newTransaction);
3085 	cache->last_transaction = newTransaction;
3086 
3087 	return newTransaction->id;
3088 }
3089 
3090 
3091 status_t
3092 cache_abort_sub_transaction(void* _cache, int32 id)
3093 {
3094 	block_cache* cache = (block_cache*)_cache;
3095 	TransactionLocker locker(cache);
3096 
3097 	TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3098 
3099 	cache_transaction* transaction = lookup_transaction(cache, id);
3100 	if (transaction == NULL) {
3101 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3102 		return B_BAD_VALUE;
3103 	}
3104 	if (!transaction->has_sub_transaction)
3105 		return B_BAD_VALUE;
3106 
3107 	T(Abort(cache, transaction));
3108 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3109 
3110 	// revert all changes back to the version of the parent
3111 
3112 	cached_block* block = transaction->first_block;
3113 	cached_block* last = NULL;
3114 	cached_block* next;
3115 	for (; block != NULL; block = next) {
3116 		next = block->transaction_next;
3117 
3118 		if (block->parent_data == NULL) {
3119 			// The parent transaction didn't change the block, but the sub
3120 			// transaction did - we need to revert to the original data.
3121 			// The block is no longer part of the transaction
3122 			if (block->original_data != NULL) {
3123 				// The block might not have original data if was empty
3124 				memcpy(block->current_data, block->original_data,
3125 					cache->block_size);
3126 			}
3127 
3128 			if (last != NULL)
3129 				last->transaction_next = next;
3130 			else
3131 				transaction->first_block = next;
3132 
3133 			block->transaction_next = NULL;
3134 			block->transaction = NULL;
3135 			transaction->num_blocks--;
3136 
3137 			if (block->previous_transaction == NULL) {
3138 				cache->Free(block->original_data);
3139 				block->original_data = NULL;
3140 				block->is_dirty = false;
3141 
3142 				if (block->ref_count == 0) {
3143 					// Move the block into the unused list if possible
3144 					block->unused = true;
3145 					cache->unused_blocks.Add(block);
3146 					cache->unused_block_count++;
3147 				}
3148 			}
3149 		} else {
3150 			if (block->parent_data != block->current_data) {
3151 				// The block has been changed and must be restored - the block
3152 				// is still dirty and part of the transaction
3153 				TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3154 					"restored contents of block %" B_PRIdOFF "\n",
3155 					transaction->id, block->block_number));
3156 				memcpy(block->current_data, block->parent_data,
3157 					cache->block_size);
3158 				cache->Free(block->parent_data);
3159 				// The block stays dirty
3160 			}
3161 			block->parent_data = NULL;
3162 			last = block;
3163 		}
3164 
3165 		block->discard = false;
3166 	}
3167 
3168 	// all subsequent changes will go into the main transaction
3169 	transaction->has_sub_transaction = false;
3170 	transaction->sub_num_blocks = 0;
3171 
3172 	return B_OK;
3173 }
3174 
3175 
3176 status_t
3177 cache_start_sub_transaction(void* _cache, int32 id)
3178 {
3179 	block_cache* cache = (block_cache*)_cache;
3180 	TransactionLocker locker(cache);
3181 
3182 	TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3183 
3184 	cache_transaction* transaction = lookup_transaction(cache, id);
3185 	if (transaction == NULL) {
3186 		panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3187 			id);
3188 		return B_BAD_VALUE;
3189 	}
3190 
3191 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3192 
3193 	// move all changed blocks up to the parent
3194 
3195 	cached_block* block = transaction->first_block;
3196 	cached_block* next;
3197 	for (; block != NULL; block = next) {
3198 		next = block->transaction_next;
3199 
3200 		if (block->parent_data != NULL) {
3201 			// There already is an older sub transaction - we acknowledge
3202 			// its changes and move its blocks up to the parent
3203 			ASSERT(transaction->has_sub_transaction);
3204 			cache->FreeBlockParentData(block);
3205 		}
3206 		if (block->discard) {
3207 			// This block has been discarded in the parent transaction.
3208 			// Just throw away any changes made in this transaction, so that
3209 			// it can still be reverted to its original contents if needed
3210 			ASSERT(block->previous_transaction == NULL);
3211 			if (block->original_data != NULL) {
3212 				memcpy(block->current_data, block->original_data,
3213 					cache->block_size);
3214 				block->original_data = NULL;
3215 			}
3216 			continue;
3217 		}
3218 
3219 		// we "allocate" the parent data lazily, that means, we don't copy
3220 		// the data (and allocate memory for it) until we need to
3221 		block->parent_data = block->current_data;
3222 	}
3223 
3224 	// all subsequent changes will go into the sub transaction
3225 	transaction->has_sub_transaction = true;
3226 	transaction->main_num_blocks = transaction->num_blocks;
3227 	transaction->sub_num_blocks = 0;
3228 	T(Action("start-sub", cache, transaction));
3229 
3230 	return B_OK;
3231 }
3232 
3233 
3234 /*!	Adds a transaction listener that gets notified when the transaction
3235 	is ended, aborted, written, or idle as specified by \a events.
3236 	The listener gets automatically removed when the transaction ends.
3237 */
3238 status_t
3239 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3240 	transaction_notification_hook hook, void* data)
3241 {
3242 	block_cache* cache = (block_cache*)_cache;
3243 	TransactionLocker locker(cache);
3244 
3245 	cache_transaction* transaction = lookup_transaction(cache, id);
3246 	if (transaction == NULL)
3247 		return B_BAD_VALUE;
3248 
3249 	return add_transaction_listener(cache, transaction, events, hook, data);
3250 }
3251 
3252 
3253 status_t
3254 cache_remove_transaction_listener(void* _cache, int32 id,
3255 	transaction_notification_hook hookFunction, void* data)
3256 {
3257 	block_cache* cache = (block_cache*)_cache;
3258 	TransactionLocker locker(cache);
3259 
3260 	cache_transaction* transaction = lookup_transaction(cache, id);
3261 	if (transaction == NULL)
3262 		return B_BAD_VALUE;
3263 
3264 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3265 	while (iterator.HasNext()) {
3266 		cache_listener* listener = iterator.Next();
3267 		if (listener->data == data && listener->hook == hookFunction) {
3268 			iterator.Remove();
3269 
3270 			if (listener->events_pending != 0) {
3271 				MutexLocker _(sNotificationsLock);
3272 				if (listener->events_pending != 0)
3273 					cache->pending_notifications.Remove(listener);
3274 			}
3275 			delete listener;
3276 			return B_OK;
3277 		}
3278 	}
3279 
3280 	return B_ENTRY_NOT_FOUND;
3281 }
3282 
3283 
3284 status_t
3285 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3286 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3287 {
3288 	cached_block* block = (cached_block*)*_cookie;
3289 	block_cache* cache = (block_cache*)_cache;
3290 	TransactionLocker locker(cache);
3291 
3292 	cache_transaction* transaction = lookup_transaction(cache, id);
3293 	if (transaction == NULL || !transaction->open)
3294 		return B_BAD_VALUE;
3295 
3296 	if (block == NULL)
3297 		block = transaction->first_block;
3298 	else
3299 		block = block->transaction_next;
3300 
3301 	if (transaction->has_sub_transaction) {
3302 		if (mainOnly) {
3303 			// find next block that the parent changed
3304 			while (block != NULL && block->parent_data == NULL)
3305 				block = block->transaction_next;
3306 		} else {
3307 			// find next non-discarded block
3308 			while (block != NULL && block->discard)
3309 				block = block->transaction_next;
3310 		}
3311 	}
3312 
3313 	if (block == NULL)
3314 		return B_ENTRY_NOT_FOUND;
3315 
3316 	if (_blockNumber)
3317 		*_blockNumber = block->block_number;
3318 	if (_data)
3319 		*_data = mainOnly ? block->parent_data : block->current_data;
3320 	if (_unchangedData)
3321 		*_unchangedData = block->original_data;
3322 
3323 	*_cookie = (addr_t)block;
3324 	return B_OK;
3325 }
3326 
3327 
3328 int32
3329 cache_blocks_in_transaction(void* _cache, int32 id)
3330 {
3331 	block_cache* cache = (block_cache*)_cache;
3332 	TransactionLocker locker(cache);
3333 
3334 	cache_transaction* transaction = lookup_transaction(cache, id);
3335 	if (transaction == NULL)
3336 		return B_BAD_VALUE;
3337 
3338 	return transaction->num_blocks;
3339 }
3340 
3341 
3342 /*!	Returns the number of blocks that are part of the main transaction. If this
3343 	transaction does not have a sub transaction yet, this is the same value as
3344 	cache_blocks_in_transaction() would return.
3345 */
3346 int32
3347 cache_blocks_in_main_transaction(void* _cache, int32 id)
3348 {
3349 	block_cache* cache = (block_cache*)_cache;
3350 	TransactionLocker locker(cache);
3351 
3352 	cache_transaction* transaction = lookup_transaction(cache, id);
3353 	if (transaction == NULL)
3354 		return B_BAD_VALUE;
3355 
3356 	if (transaction->has_sub_transaction)
3357 		return transaction->main_num_blocks;
3358 
3359 	return transaction->num_blocks;
3360 }
3361 
3362 
3363 int32
3364 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3365 {
3366 	block_cache* cache = (block_cache*)_cache;
3367 	TransactionLocker locker(cache);
3368 
3369 	cache_transaction* transaction = lookup_transaction(cache, id);
3370 	if (transaction == NULL)
3371 		return B_BAD_VALUE;
3372 
3373 	return transaction->sub_num_blocks;
3374 }
3375 
3376 
3377 /*!	Check if block is in transaction
3378 */
3379 bool
3380 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber)
3381 {
3382 	block_cache* cache = (block_cache*)_cache;
3383 	TransactionLocker locker(cache);
3384 
3385 	cached_block* block = cache->hash->Lookup(blockNumber);
3386 
3387 	return (block != NULL && block->transaction != NULL
3388 		&& block->transaction->id == id);
3389 }
3390 
3391 
3392 //	#pragma mark - public block cache API
3393 
3394 
3395 void
3396 block_cache_delete(void* _cache, bool allowWrites)
3397 {
3398 	block_cache* cache = (block_cache*)_cache;
3399 
3400 	if (allowWrites)
3401 		block_cache_sync(cache);
3402 
3403 	mutex_lock(&sCachesLock);
3404 	sCaches.Remove(cache);
3405 	mutex_unlock(&sCachesLock);
3406 
3407 	mutex_lock(&cache->lock);
3408 
3409 	// wait for all blocks to become unbusy
3410 	wait_for_busy_reading_blocks(cache);
3411 	wait_for_busy_writing_blocks(cache);
3412 
3413 	// free all blocks
3414 
3415 	cached_block* block = cache->hash->Clear(true);
3416 	while (block != NULL) {
3417 		cached_block* next = block->next;
3418 		cache->FreeBlock(block);
3419 		block = next;
3420 	}
3421 
3422 	// free all transactions (they will all be aborted)
3423 
3424 	cache_transaction* transaction = cache->transaction_hash->Clear(true);
3425 	while (transaction != NULL) {
3426 		cache_transaction* next = transaction->next;
3427 		delete transaction;
3428 		transaction = next;
3429 	}
3430 
3431 	delete cache;
3432 }
3433 
3434 
3435 void*
3436 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3437 {
3438 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3439 		readOnly);
3440 	if (cache == NULL)
3441 		return NULL;
3442 
3443 	if (cache->Init() != B_OK) {
3444 		delete cache;
3445 		return NULL;
3446 	}
3447 
3448 	MutexLocker _(sCachesLock);
3449 	sCaches.Add(cache);
3450 
3451 	return cache;
3452 }
3453 
3454 
3455 status_t
3456 block_cache_sync(void* _cache)
3457 {
3458 	block_cache* cache = (block_cache*)_cache;
3459 
3460 	// We will sync all dirty blocks to disk that have a completed
3461 	// transaction or no transaction only
3462 
3463 	MutexLocker locker(&cache->lock);
3464 
3465 	BlockWriter writer(cache);
3466 	BlockTable::Iterator iterator(cache->hash);
3467 
3468 	while (iterator.HasNext()) {
3469 		cached_block* block = iterator.Next();
3470 		if (block->CanBeWritten())
3471 			writer.Add(block);
3472 	}
3473 
3474 	status_t status = writer.Write();
3475 
3476 	locker.Unlock();
3477 
3478 	wait_for_notifications(cache);
3479 		// make sure that all pending TRANSACTION_WRITTEN notifications
3480 		// are handled after we return
3481 	return status;
3482 }
3483 
3484 
3485 status_t
3486 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3487 {
3488 	block_cache* cache = (block_cache*)_cache;
3489 
3490 	// We will sync all dirty blocks to disk that have a completed
3491 	// transaction or no transaction only
3492 
3493 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3494 		panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
3495 			" (max %" B_PRIdOFF ")",
3496 			blockNumber, cache->max_blocks - 1);
3497 		return B_BAD_VALUE;
3498 	}
3499 
3500 	MutexLocker locker(&cache->lock);
3501 	BlockWriter writer(cache);
3502 
3503 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
3504 		cached_block* block = cache->hash->Lookup(blockNumber);
3505 		if (block == NULL)
3506 			continue;
3507 
3508 		if (block->CanBeWritten())
3509 			writer.Add(block);
3510 	}
3511 
3512 	status_t status = writer.Write();
3513 
3514 	locker.Unlock();
3515 
3516 	wait_for_notifications(cache);
3517 		// make sure that all pending TRANSACTION_WRITTEN notifications
3518 		// are handled after we return
3519 	return status;
3520 }
3521 
3522 
3523 /*!	Discards a block from the current transaction or from the cache.
3524 	You have to call this function when you no longer use a block, ie. when it
3525 	might be reclaimed by the file cache in order to make sure they won't
3526 	interfere.
3527 */
3528 void
3529 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3530 {
3531 	// TODO: this could be a nice place to issue the ATA trim command
3532 	block_cache* cache = (block_cache*)_cache;
3533 	TransactionLocker locker(cache);
3534 
3535 	BlockWriter writer(cache);
3536 
3537 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3538 		cached_block* block = cache->hash->Lookup(blockNumber);
3539 		if (block != NULL && block->previous_transaction != NULL)
3540 			writer.Add(block);
3541 	}
3542 
3543 	writer.Write();
3544 		// TODO: this can fail, too!
3545 
3546 	blockNumber -= numBlocks;
3547 		// reset blockNumber to its original value
3548 
3549 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3550 		cached_block* block = cache->hash->Lookup(blockNumber);
3551 		if (block == NULL)
3552 			continue;
3553 
3554 		ASSERT(block->previous_transaction == NULL);
3555 
3556 		if (block->unused) {
3557 			cache->unused_blocks.Remove(block);
3558 			cache->unused_block_count--;
3559 			cache->RemoveBlock(block);
3560 		} else {
3561 			if (block->transaction != NULL && block->parent_data != NULL
3562 				&& block->parent_data != block->current_data) {
3563 				panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3564 					"transaction!", blockNumber);
3565 			}
3566 
3567 			// mark it as discarded (in the current transaction only, if any)
3568 			block->discard = true;
3569 		}
3570 	}
3571 }
3572 
3573 
3574 status_t
3575 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3576 {
3577 	block_cache* cache = (block_cache*)_cache;
3578 	MutexLocker locker(&cache->lock);
3579 
3580 	if (cache->read_only) {
3581 		panic("tried to make block writable on a read-only cache!");
3582 		return B_ERROR;
3583 	}
3584 
3585 	// TODO: this can be done better!
3586 	void* block = get_writable_cached_block(cache, blockNumber,
3587 		blockNumber, 1, transaction, false);
3588 	if (block != NULL) {
3589 		put_cached_block((block_cache*)_cache, blockNumber);
3590 		return B_OK;
3591 	}
3592 
3593 	return B_ERROR;
3594 }
3595 
3596 
3597 void*
3598 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3599 	off_t length, int32 transaction)
3600 {
3601 	block_cache* cache = (block_cache*)_cache;
3602 	MutexLocker locker(&cache->lock);
3603 
3604 	TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3605 		blockNumber, transaction));
3606 	if (cache->read_only)
3607 		panic("tried to get writable block on a read-only cache!");
3608 
3609 	return get_writable_cached_block(cache, blockNumber, base, length,
3610 		transaction, false);
3611 }
3612 
3613 
3614 void*
3615 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3616 {
3617 	return block_cache_get_writable_etc(_cache, blockNumber,
3618 		blockNumber, 1, transaction);
3619 }
3620 
3621 
3622 void*
3623 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3624 {
3625 	block_cache* cache = (block_cache*)_cache;
3626 	MutexLocker locker(&cache->lock);
3627 
3628 	TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3629 		blockNumber, transaction));
3630 	if (cache->read_only)
3631 		panic("tried to get empty writable block on a read-only cache!");
3632 
3633 	return get_writable_cached_block((block_cache*)_cache, blockNumber,
3634 		blockNumber, 1, transaction, true);
3635 }
3636 
3637 
3638 const void*
3639 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
3640 {
3641 	block_cache* cache = (block_cache*)_cache;
3642 	MutexLocker locker(&cache->lock);
3643 	bool allocated;
3644 
3645 	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
3646 	if (block == NULL)
3647 		return NULL;
3648 
3649 #if BLOCK_CACHE_DEBUG_CHANGED
3650 	if (block->compare == NULL)
3651 		block->compare = cache->Allocate();
3652 	if (block->compare != NULL)
3653 		memcpy(block->compare, block->current_data, cache->block_size);
3654 #endif
3655 	TB(Get(cache, block));
3656 
3657 	return block->current_data;
3658 }
3659 
3660 
3661 const void*
3662 block_cache_get(void* _cache, off_t blockNumber)
3663 {
3664 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
3665 }
3666 
3667 
3668 /*!	Changes the internal status of a writable block to \a dirty. This can be
3669 	helpful in case you realize you don't need to change that block anymore
3670 	for whatever reason.
3671 
3672 	Note, you must only use this function on blocks that were acquired
3673 	writable!
3674 */
3675 status_t
3676 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3677 	int32 transaction)
3678 {
3679 	block_cache* cache = (block_cache*)_cache;
3680 	MutexLocker locker(&cache->lock);
3681 
3682 	cached_block* block = cache->hash->Lookup(blockNumber);
3683 	if (block == NULL)
3684 		return B_BAD_VALUE;
3685 	if (block->is_dirty == dirty) {
3686 		// there is nothing to do for us
3687 		return B_OK;
3688 	}
3689 
3690 	// TODO: not yet implemented
3691 	if (dirty)
3692 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
3693 
3694 	return B_OK;
3695 }
3696 
3697 
3698 void
3699 block_cache_put(void* _cache, off_t blockNumber)
3700 {
3701 	block_cache* cache = (block_cache*)_cache;
3702 	MutexLocker locker(&cache->lock);
3703 
3704 	put_cached_block(cache, blockNumber);
3705 }
3706 
3707