xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision 445d4fd926c569e7b9ae28017da86280aaecbae2)
1 /*
2  * Copyright 2004-2020, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <KernelExport.h>
15 #include <fs_cache.h>
16 
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <vm/vm_page.h>
26 
27 #include "kernel_debug_config.h"
28 
29 
30 // TODO: this is a naive but growing implementation to test the API:
31 //	block reading/writing is not at all optimized for speed, it will
32 //	just read and write single blocks.
33 // TODO: the retrieval/copy of the original data could be delayed until the
34 //		new data must be written, ie. in low memory situations.
35 
36 #ifdef _KERNEL_MODE
37 #	define TRACE_ALWAYS(x...) dprintf(x)
38 #else
39 #	define TRACE_ALWAYS(x...) printf(x)
40 #endif
41 
42 //#define TRACE_BLOCK_CACHE
43 #ifdef TRACE_BLOCK_CACHE
44 #	define TRACE(x)	TRACE_ALWAYS(x)
45 #else
46 #	define TRACE(x) ;
47 #endif
48 
49 
50 // This macro is used for fatal situations that are acceptable in a running
51 // system, like out of memory situations - should only panic for debugging.
52 #define FATAL(x) panic x
53 
54 static const bigtime_t kTransactionIdleTime = 2000000LL;
55 	// a transaction is considered idle after 2 seconds of inactivity
56 
57 
58 namespace {
59 
60 struct cache_transaction;
61 struct cached_block;
62 struct block_cache;
63 typedef DoublyLinkedListLink<cached_block> block_link;
64 
65 struct cached_block {
66 	cached_block*	next;			// next in hash
67 	cached_block*	transaction_next;
68 	block_link		link;
69 	off_t			block_number;
70 	void*			current_data;
71 		// The data that is seen by everyone using the API; this one is always
72 		// present.
73 	void*			original_data;
74 		// When in a transaction, this contains the original data from before
75 		// the transaction.
76 	void*			parent_data;
77 		// This is a lazily alloced buffer that represents the contents of the
78 		// block in the parent transaction. It may point to current_data if the
79 		// contents have been changed only in the parent transaction, or, if the
80 		// block has been changed in the current sub transaction already, to a
81 		// new block containing the contents changed in the parent transaction.
82 		// If this is NULL, the block has not been changed in the parent
83 		// transaction at all.
84 #if BLOCK_CACHE_DEBUG_CHANGED
85 	void*			compare;
86 #endif
87 	int32			ref_count;
88 	int32			last_accessed;
89 	bool			busy_reading : 1;
90 	bool			busy_writing : 1;
91 	bool			is_writing : 1;
92 		// Block has been checked out for writing without transactions, and
93 		// cannot be written back if set
94 	bool			is_dirty : 1;
95 	bool			unused : 1;
96 	bool			discard : 1;
97 	bool			busy_reading_waiters : 1;
98 	bool			busy_writing_waiters : 1;
99 	cache_transaction* transaction;
100 		// This is the current active transaction, if any, the block is
101 		// currently in (meaning was changed as a part of it).
102 	cache_transaction* previous_transaction;
103 		// This is set to the last transaction that was ended containing this
104 		// block. In this case, the block has not yet written back yet, and
105 		// the changed data is either in current_data, or original_data -- the
106 		// latter if the block is already being part of another transaction.
107 		// There can only be one previous transaction, so when the active
108 		// transaction ends, the changes of the previous transaction have to
109 		// be written back before that transaction becomes the next previous
110 		// transaction.
111 
112 	bool CanBeWritten() const;
113 	int32 LastAccess() const
114 		{ return system_time() / 1000000L - last_accessed; }
115 };
116 
117 typedef DoublyLinkedList<cached_block,
118 	DoublyLinkedListMemberGetLink<cached_block,
119 		&cached_block::link> > block_list;
120 
121 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
122 	static inline void* operator new(size_t size);
123 	static inline void operator delete(void* block);
124 
125 	int32			transaction_id;
126 	int32			events_pending;
127 	int32			events;
128 	transaction_notification_hook hook;
129 	void*			data;
130 	bool			delete_after_event;
131 };
132 
133 typedef DoublyLinkedList<cache_notification> NotificationList;
134 
135 static object_cache* sCacheNotificationCache;
136 
137 struct cache_listener;
138 typedef DoublyLinkedListLink<cache_listener> listener_link;
139 
140 struct cache_listener : cache_notification {
141 	listener_link	link;
142 };
143 
144 typedef DoublyLinkedList<cache_listener,
145 	DoublyLinkedListMemberGetLink<cache_listener,
146 		&cache_listener::link> > ListenerList;
147 
148 void*
149 cache_notification::operator new(size_t size)
150 {
151 	// We can't really know whether something is a cache_notification or a
152 	// cache_listener at runtime, so we just use one object_cache for both
153 	// with the size set to that of the (slightly larger) cache_listener.
154 	// In practice, the vast majority of cache_notifications are really
155 	// cache_listeners, so this is a more than acceptable trade-off.
156 	ASSERT(size <= sizeof(cache_listener));
157 	return object_cache_alloc(sCacheNotificationCache, 0);
158 }
159 
160 void
161 cache_notification::operator delete(void* block)
162 {
163 	object_cache_free(sCacheNotificationCache, block, 0);
164 }
165 
166 
167 struct BlockHash {
168 	typedef off_t			KeyType;
169 	typedef	cached_block	ValueType;
170 
171 	size_t HashKey(KeyType key) const
172 	{
173 		return key;
174 	}
175 
176 	size_t Hash(ValueType* block) const
177 	{
178 		return block->block_number;
179 	}
180 
181 	bool Compare(KeyType key, ValueType* block) const
182 	{
183 		return block->block_number == key;
184 	}
185 
186 	ValueType*& GetLink(ValueType* value) const
187 	{
188 		return value->next;
189 	}
190 };
191 
192 typedef BOpenHashTable<BlockHash> BlockTable;
193 
194 
195 struct TransactionHash {
196 	typedef int32				KeyType;
197 	typedef	cache_transaction	ValueType;
198 
199 	size_t HashKey(KeyType key) const
200 	{
201 		return key;
202 	}
203 
204 	size_t Hash(ValueType* transaction) const;
205 	bool Compare(KeyType key, ValueType* transaction) const;
206 	ValueType*& GetLink(ValueType* value) const;
207 };
208 
209 typedef BOpenHashTable<TransactionHash> TransactionTable;
210 
211 
212 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
213 	BlockTable*		hash;
214 	mutex			lock;
215 	int				fd;
216 	off_t			max_blocks;
217 	size_t			block_size;
218 	int32			next_transaction_id;
219 	cache_transaction* last_transaction;
220 	TransactionTable* transaction_hash;
221 
222 	object_cache*	buffer_cache;
223 	block_list		unused_blocks;
224 	uint32			unused_block_count;
225 
226 	ConditionVariable busy_reading_condition;
227 	uint32			busy_reading_count;
228 	bool			busy_reading_waiters;
229 
230 	ConditionVariable busy_writing_condition;
231 	uint32			busy_writing_count;
232 	bool			busy_writing_waiters;
233 
234 	bigtime_t		last_block_write;
235 	bigtime_t		last_block_write_duration;
236 
237 	uint32			num_dirty_blocks;
238 	bool			read_only;
239 
240 	NotificationList pending_notifications;
241 	ConditionVariable condition_variable;
242 
243 					block_cache(int fd, off_t numBlocks, size_t blockSize,
244 						bool readOnly);
245 					~block_cache();
246 
247 	status_t		Init();
248 
249 	void			Free(void* buffer);
250 	void*			Allocate();
251 	void			FreeBlock(cached_block* block);
252 	cached_block*	NewBlock(off_t blockNumber);
253 	void			FreeBlockParentData(cached_block* block);
254 
255 	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
256 	void			RemoveBlock(cached_block* block);
257 	void			DiscardBlock(cached_block* block);
258 
259 private:
260 	static void		_LowMemoryHandler(void* data, uint32 resources,
261 						int32 level);
262 	cached_block*	_GetUnusedBlock();
263 };
264 
265 struct cache_transaction {
266 	cache_transaction();
267 
268 	cache_transaction* next;
269 	int32			id;
270 	int32			num_blocks;
271 	int32			main_num_blocks;
272 	int32			sub_num_blocks;
273 	cached_block*	first_block;
274 	block_list		blocks;
275 	ListenerList	listeners;
276 	bool			open;
277 	bool			has_sub_transaction;
278 	bigtime_t		last_used;
279 	int32			busy_writing_count;
280 };
281 
282 
283 class BlockWriter {
284 public:
285 								BlockWriter(block_cache* cache,
286 									size_t max = SIZE_MAX);
287 								~BlockWriter();
288 
289 			bool				Add(cached_block* block,
290 									cache_transaction* transaction = NULL);
291 			bool				Add(cache_transaction* transaction,
292 									bool& hasLeftOvers);
293 
294 			status_t			Write(cache_transaction* transaction = NULL,
295 									bool canUnlock = true);
296 
297 			bool				DeletedTransaction() const
298 									{ return fDeletedTransaction; }
299 
300 	static	status_t			WriteBlock(block_cache* cache,
301 									cached_block* block);
302 
303 private:
304 			void*				_Data(cached_block* block) const;
305 			status_t			_WriteBlock(cached_block* block);
306 			void				_BlockDone(cached_block* block,
307 									cache_transaction* transaction);
308 			void				_UnmarkWriting(cached_block* block);
309 
310 	static	int					_CompareBlocks(const void* _blockA,
311 									const void* _blockB);
312 
313 private:
314 	static	const size_t		kBufferSize = 64;
315 
316 			block_cache*		fCache;
317 			cached_block*		fBuffer[kBufferSize];
318 			cached_block**		fBlocks;
319 			size_t				fCount;
320 			size_t				fTotal;
321 			size_t				fCapacity;
322 			size_t				fMax;
323 			status_t			fStatus;
324 			bool				fDeletedTransaction;
325 };
326 
327 
328 class TransactionLocking {
329 public:
330 	inline bool Lock(block_cache* cache)
331 	{
332 		mutex_lock(&cache->lock);
333 
334 		while (cache->busy_writing_count != 0) {
335 			// wait for all blocks to be written
336 			ConditionVariableEntry entry;
337 			cache->busy_writing_condition.Add(&entry);
338 			cache->busy_writing_waiters = true;
339 
340 			mutex_unlock(&cache->lock);
341 
342 			entry.Wait();
343 
344 			mutex_lock(&cache->lock);
345 		}
346 
347 		return true;
348 	}
349 
350 	inline void Unlock(block_cache* cache)
351 	{
352 		mutex_unlock(&cache->lock);
353 	}
354 };
355 
356 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
357 
358 } // namespace
359 
360 
361 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
362 namespace BlockTracing {
363 
364 class Action : public AbstractTraceEntry {
365 public:
366 	Action(block_cache* cache, cached_block* block)
367 		:
368 		fCache(cache),
369 		fBlockNumber(block->block_number),
370 		fIsDirty(block->is_dirty),
371 		fHasOriginal(block->original_data != NULL),
372 		fHasParent(block->parent_data != NULL),
373 		fTransactionID(-1),
374 		fPreviousID(-1)
375 	{
376 		if (block->transaction != NULL)
377 			fTransactionID = block->transaction->id;
378 		if (block->previous_transaction != NULL)
379 			fPreviousID = block->previous_transaction->id;
380 	}
381 
382 	virtual void AddDump(TraceOutput& out)
383 	{
384 		out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
385 			" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
386 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
387 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
388 	}
389 
390 	virtual const char* _Action() const = 0;
391 
392 private:
393 	block_cache*		fCache;
394 	uint64				fBlockNumber;
395 	bool				fIsDirty;
396 	bool				fHasOriginal;
397 	bool				fHasParent;
398 	int32				fTransactionID;
399 	int32				fPreviousID;
400 };
401 
402 class Get : public Action {
403 public:
404 	Get(block_cache* cache, cached_block* block)
405 		:
406 		Action(cache, block)
407 	{
408 		Initialized();
409 	}
410 
411 	virtual const char* _Action() const { return "get"; }
412 };
413 
414 class Put : public Action {
415 public:
416 	Put(block_cache* cache, cached_block* block)
417 		:
418 		Action(cache, block)
419 	{
420 		Initialized();
421 	}
422 
423 	virtual const char* _Action() const { return "put"; }
424 };
425 
426 class Read : public Action {
427 public:
428 	Read(block_cache* cache, cached_block* block)
429 		:
430 		Action(cache, block)
431 	{
432 		Initialized();
433 	}
434 
435 	virtual const char* _Action() const { return "read"; }
436 };
437 
438 class Write : public Action {
439 public:
440 	Write(block_cache* cache, cached_block* block)
441 		:
442 		Action(cache, block)
443 	{
444 		Initialized();
445 	}
446 
447 	virtual const char* _Action() const { return "write"; }
448 };
449 
450 class Flush : public Action {
451 public:
452 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
453 		:
454 		Action(cache, block),
455 		fGetUnused(getUnused)
456 	{
457 		Initialized();
458 	}
459 
460 	virtual const char* _Action() const
461 		{ return fGetUnused ? "get-unused" : "flush"; }
462 
463 private:
464 	bool	fGetUnused;
465 };
466 
467 class Error : public AbstractTraceEntry {
468 public:
469 	Error(block_cache* cache, uint64 blockNumber, const char* message,
470 			status_t status = B_OK)
471 		:
472 		fCache(cache),
473 		fBlockNumber(blockNumber),
474 		fMessage(message),
475 		fStatus(status)
476 	{
477 		Initialized();
478 	}
479 
480 	virtual void AddDump(TraceOutput& out)
481 	{
482 		out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
483 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
484 			fStatus != B_OK ? strerror(fStatus) : "");
485 	}
486 
487 private:
488 	block_cache*	fCache;
489 	uint64			fBlockNumber;
490 	const char*		fMessage;
491 	status_t		fStatus;
492 };
493 
494 #if BLOCK_CACHE_BLOCK_TRACING >= 2
495 class BlockData : public AbstractTraceEntry {
496 public:
497 	enum {
498 		kCurrent	= 0x01,
499 		kParent		= 0x02,
500 		kOriginal	= 0x04
501 	};
502 
503 	BlockData(block_cache* cache, cached_block* block, const char* message)
504 		:
505 		fCache(cache),
506 		fSize(cache->block_size),
507 		fBlockNumber(block->block_number),
508 		fMessage(message)
509 	{
510 		_Allocate(fCurrent, block->current_data);
511 		_Allocate(fParent, block->parent_data);
512 		_Allocate(fOriginal, block->original_data);
513 
514 #if KTRACE_PRINTF_STACK_TRACE
515 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
516 			false);
517 #endif
518 
519 		Initialized();
520 	}
521 
522 	virtual void AddDump(TraceOutput& out)
523 	{
524 		out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
525 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
526 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
527 			fMessage);
528 	}
529 
530 #if KTRACE_PRINTF_STACK_TRACE
531 	virtual void DumpStackTrace(TraceOutput& out)
532 	{
533 		out.PrintStackTrace(fStackTrace);
534 	}
535 #endif
536 
537 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
538 	{
539 		if ((which & kCurrent) != 0)
540 			DumpBlock(kCurrent, offset, size);
541 		if ((which & kParent) != 0)
542 			DumpBlock(kParent, offset, size);
543 		if ((which & kOriginal) != 0)
544 			DumpBlock(kOriginal, offset, size);
545 	}
546 
547 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
548 	{
549 		if (offset > fSize) {
550 			kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
551 			return;
552 		}
553 		if (offset + size > fSize)
554 			size = fSize - offset;
555 
556 		const char* label;
557 		uint8* data;
558 
559 		if ((which & kCurrent) != 0) {
560 			label = "current";
561 			data = fCurrent;
562 		} else if ((which & kParent) != 0) {
563 			label = "parent";
564 			data = fParent;
565 		} else if ((which & kOriginal) != 0) {
566 			label = "original";
567 			data = fOriginal;
568 		} else
569 			return;
570 
571 		kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
572 
573 		static const uint32 kBlockSize = 16;
574 		data += offset;
575 
576 		for (uint32 i = 0; i < size;) {
577 			int start = i;
578 
579 			kprintf("  %04" B_PRIx32 " ", i);
580 			for (; i < start + kBlockSize; i++) {
581 				if (!(i % 4))
582 					kprintf(" ");
583 
584 				if (i >= size)
585 					kprintf("  ");
586 				else
587 					kprintf("%02x", data[i]);
588 			}
589 
590 			kprintf("\n");
591 		}
592 	}
593 
594 private:
595 	void _Allocate(uint8*& target, void* source)
596 	{
597 		if (source == NULL) {
598 			target = NULL;
599 			return;
600 		}
601 
602 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
603 	}
604 
605 	block_cache*	fCache;
606 	uint32			fSize;
607 	uint64			fBlockNumber;
608 	const char*		fMessage;
609 	uint8*			fCurrent;
610 	uint8*			fParent;
611 	uint8*			fOriginal;
612 #if KTRACE_PRINTF_STACK_TRACE
613 	tracing_stack_trace* fStackTrace;
614 #endif
615 };
616 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
617 
618 }	// namespace BlockTracing
619 
620 #	define TB(x) new(std::nothrow) BlockTracing::x;
621 #else
622 #	define TB(x) ;
623 #endif
624 
625 #if BLOCK_CACHE_BLOCK_TRACING >= 2
626 #	define TB2(x) new(std::nothrow) BlockTracing::x;
627 #else
628 #	define TB2(x) ;
629 #endif
630 
631 
632 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
633 namespace TransactionTracing {
634 
635 class Action : public AbstractTraceEntry {
636 public:
637 	Action(const char* label, block_cache* cache,
638 			cache_transaction* transaction)
639 		:
640 		fCache(cache),
641 		fTransaction(transaction),
642 		fID(transaction->id),
643 		fSub(transaction->has_sub_transaction),
644 		fNumBlocks(transaction->num_blocks),
645 		fSubNumBlocks(transaction->sub_num_blocks)
646 	{
647 		strlcpy(fLabel, label, sizeof(fLabel));
648 		Initialized();
649 	}
650 
651 	virtual void AddDump(TraceOutput& out)
652 	{
653 		out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
654 			", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
655 			fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
656 	}
657 
658 private:
659 	char				fLabel[12];
660 	block_cache*		fCache;
661 	cache_transaction*	fTransaction;
662 	int32				fID;
663 	bool				fSub;
664 	int32				fNumBlocks;
665 	int32				fSubNumBlocks;
666 };
667 
668 class Detach : public AbstractTraceEntry {
669 public:
670 	Detach(block_cache* cache, cache_transaction* transaction,
671 			cache_transaction* newTransaction)
672 		:
673 		fCache(cache),
674 		fTransaction(transaction),
675 		fID(transaction->id),
676 		fSub(transaction->has_sub_transaction),
677 		fNewTransaction(newTransaction),
678 		fNewID(newTransaction->id)
679 	{
680 		Initialized();
681 	}
682 
683 	virtual void AddDump(TraceOutput& out)
684 	{
685 		out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
686 			"from transaction %p (id %" B_PRId32 ")%s",
687 			fCache, fNewTransaction, fNewID, fTransaction, fID,
688 			fSub ? " sub" : "");
689 	}
690 
691 private:
692 	block_cache*		fCache;
693 	cache_transaction*	fTransaction;
694 	int32				fID;
695 	bool				fSub;
696 	cache_transaction*	fNewTransaction;
697 	int32				fNewID;
698 };
699 
700 class Abort : public AbstractTraceEntry {
701 public:
702 	Abort(block_cache* cache, cache_transaction* transaction)
703 		:
704 		fCache(cache),
705 		fTransaction(transaction),
706 		fID(transaction->id),
707 		fNumBlocks(0)
708 	{
709 		bool isSub = transaction->has_sub_transaction;
710 		fNumBlocks = isSub ? transaction->sub_num_blocks
711 			: transaction->num_blocks;
712 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
713 		if (fBlocks != NULL) {
714 			cached_block* block = transaction->first_block;
715 			for (int32 i = 0; block != NULL && i < fNumBlocks;
716 					block = block->transaction_next) {
717 				fBlocks[i++] = block->block_number;
718 			}
719 		} else
720 			fNumBlocks = 0;
721 
722 #if KTRACE_PRINTF_STACK_TRACE
723 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
724 			false);
725 #endif
726 
727 		Initialized();
728 	}
729 
730 	virtual void AddDump(TraceOutput& out)
731 	{
732 		out.Print("block cache %p, abort transaction "
733 			"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
734 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
735 			out.Print(" %" B_PRIdOFF, fBlocks[i]);
736 	}
737 
738 #if KTRACE_PRINTF_STACK_TRACE
739 	virtual void DumpStackTrace(TraceOutput& out)
740 	{
741 		out.PrintStackTrace(fStackTrace);
742 	}
743 #endif
744 
745 private:
746 	block_cache*		fCache;
747 	cache_transaction*	fTransaction;
748 	int32				fID;
749 	off_t*				fBlocks;
750 	int32				fNumBlocks;
751 #if KTRACE_PRINTF_STACK_TRACE
752 	tracing_stack_trace* fStackTrace;
753 #endif
754 };
755 
756 }	// namespace TransactionTracing
757 
758 #	define T(x) new(std::nothrow) TransactionTracing::x;
759 #else
760 #	define T(x) ;
761 #endif
762 
763 
764 static DoublyLinkedList<block_cache> sCaches;
765 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
766 static mutex sCachesMemoryUseLock
767 	= MUTEX_INITIALIZER("block caches memory use");
768 static size_t sUsedMemory;
769 static sem_id sEventSemaphore;
770 static mutex sNotificationsLock
771 	= MUTEX_INITIALIZER("block cache notifications");
772 static thread_id sNotifierWriterThread;
773 static DoublyLinkedListLink<block_cache> sMarkCache;
774 	// TODO: this only works if the link is the first entry of block_cache
775 static object_cache* sBlockCache;
776 
777 
778 //	#pragma mark - notifications/listener
779 
780 
781 /*!	Checks whether or not this is an event that closes a transaction. */
782 static inline bool
783 is_closing_event(int32 event)
784 {
785 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
786 }
787 
788 
789 static inline bool
790 is_written_event(int32 event)
791 {
792 	return (event & TRANSACTION_WRITTEN) != 0;
793 }
794 
795 
796 /*!	From the specified \a notification, it will remove the lowest pending
797 	event, and return that one in \a _event.
798 	If there is no pending event anymore, it will return \c false.
799 */
800 static bool
801 get_next_pending_event(cache_notification* notification, int32* _event)
802 {
803 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
804 		int32 pending = atomic_and(&notification->events_pending,
805 			~eventMask);
806 
807 		bool more = (pending & ~eventMask) != 0;
808 
809 		if ((pending & eventMask) != 0) {
810 			*_event = eventMask;
811 			return more;
812 		}
813 	}
814 
815 	return false;
816 }
817 
818 
819 static void
820 flush_pending_notifications(block_cache* cache)
821 {
822 	ASSERT_LOCKED_MUTEX(&sCachesLock);
823 
824 	while (true) {
825 		MutexLocker locker(sNotificationsLock);
826 
827 		cache_notification* notification = cache->pending_notifications.Head();
828 		if (notification == NULL)
829 			return;
830 
831 		bool deleteAfterEvent = false;
832 		int32 event = -1;
833 		if (!get_next_pending_event(notification, &event)) {
834 			// remove the notification if this was the last pending event
835 			cache->pending_notifications.Remove(notification);
836 			deleteAfterEvent = notification->delete_after_event;
837 		}
838 
839 		if (event >= 0) {
840 			// Notify listener, we need to copy the notification, as it might
841 			// be removed when we unlock the list.
842 			cache_notification copy = *notification;
843 			locker.Unlock();
844 
845 			copy.hook(copy.transaction_id, event, copy.data);
846 
847 			locker.Lock();
848 		}
849 
850 		if (deleteAfterEvent)
851 			delete notification;
852 	}
853 }
854 
855 
856 /*!	Flushes all pending notifications by calling the appropriate hook
857 	functions.
858 	Must not be called with a cache lock held.
859 */
860 static void
861 flush_pending_notifications()
862 {
863 	MutexLocker _(sCachesLock);
864 
865 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
866 	while (iterator.HasNext()) {
867 		block_cache* cache = iterator.Next();
868 
869 		flush_pending_notifications(cache);
870 	}
871 }
872 
873 
874 /*!	Initializes the \a notification as specified. */
875 static void
876 set_notification(cache_transaction* transaction,
877 	cache_notification &notification, int32 events,
878 	transaction_notification_hook hook, void* data)
879 {
880 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
881 	notification.events_pending = 0;
882 	notification.events = events;
883 	notification.hook = hook;
884 	notification.data = data;
885 	notification.delete_after_event = false;
886 }
887 
888 
889 /*!	Makes sure the notification is deleted. It either deletes it directly,
890 	when possible, or marks it for deletion if the notification is pending.
891 */
892 static void
893 delete_notification(cache_notification* notification)
894 {
895 	MutexLocker locker(sNotificationsLock);
896 
897 	if (notification->events_pending != 0)
898 		notification->delete_after_event = true;
899 	else
900 		delete notification;
901 }
902 
903 
904 /*!	Adds the notification to the pending notifications list, or, if it's
905 	already part of it, updates its events_pending field.
906 	Also marks the notification to be deleted if \a deleteNotification
907 	is \c true.
908 	Triggers the notifier thread to run.
909 */
910 static void
911 add_notification(block_cache* cache, cache_notification* notification,
912 	int32 event, bool deleteNotification)
913 {
914 	if (notification->hook == NULL)
915 		return;
916 
917 	int32 pending = atomic_or(&notification->events_pending, event);
918 	if (pending == 0) {
919 		// not yet part of the notification list
920 		MutexLocker locker(sNotificationsLock);
921 		if (deleteNotification)
922 			notification->delete_after_event = true;
923 		cache->pending_notifications.Add(notification);
924 	} else if (deleteNotification) {
925 		// we might need to delete it ourselves if we're late
926 		delete_notification(notification);
927 	}
928 
929 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
930 		// We're probably still holding some locks that makes rescheduling
931 		// not a good idea at this point.
932 }
933 
934 
935 /*!	Notifies all interested listeners of this transaction about the \a event.
936 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
937 	TRANSACTION_ABORTED), all listeners except those listening to
938 	TRANSACTION_WRITTEN will be removed.
939 */
940 static void
941 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
942 	int32 event)
943 {
944 	T(Action("notify", cache, transaction));
945 
946 	bool isClosing = is_closing_event(event);
947 	bool isWritten = is_written_event(event);
948 
949 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
950 	while (iterator.HasNext()) {
951 		cache_listener* listener = iterator.Next();
952 
953 		bool remove = (isClosing && !is_written_event(listener->events))
954 			|| (isWritten && is_written_event(listener->events));
955 		if (remove)
956 			iterator.Remove();
957 
958 		if ((listener->events & event) != 0)
959 			add_notification(cache, listener, event, remove);
960 		else if (remove)
961 			delete_notification(listener);
962 	}
963 }
964 
965 
966 /*!	Removes and deletes all listeners that are still monitoring this
967 	transaction.
968 */
969 static void
970 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
971 {
972 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
973 	while (iterator.HasNext()) {
974 		cache_listener* listener = iterator.Next();
975 		iterator.Remove();
976 
977 		delete_notification(listener);
978 	}
979 }
980 
981 
982 static status_t
983 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
984 	int32 events, transaction_notification_hook hookFunction, void* data)
985 {
986 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
987 	while (iterator.HasNext()) {
988 		cache_listener* listener = iterator.Next();
989 
990 		if (listener->data == data && listener->hook == hookFunction) {
991 			// this listener already exists, just update it
992 			listener->events |= events;
993 			return B_OK;
994 		}
995 	}
996 
997 	cache_listener* listener = new cache_listener;
998 	if (listener == NULL)
999 		return B_NO_MEMORY;
1000 
1001 	set_notification(transaction, *listener, events, hookFunction, data);
1002 	transaction->listeners.Add(listener);
1003 	return B_OK;
1004 }
1005 
1006 
1007 //	#pragma mark - private transaction
1008 
1009 
1010 cache_transaction::cache_transaction()
1011 {
1012 	num_blocks = 0;
1013 	main_num_blocks = 0;
1014 	sub_num_blocks = 0;
1015 	first_block = NULL;
1016 	open = true;
1017 	has_sub_transaction = false;
1018 	last_used = system_time();
1019 	busy_writing_count = 0;
1020 }
1021 
1022 
1023 static void
1024 delete_transaction(block_cache* cache, cache_transaction* transaction)
1025 {
1026 	if (cache->last_transaction == transaction)
1027 		cache->last_transaction = NULL;
1028 
1029 	remove_transaction_listeners(cache, transaction);
1030 	delete transaction;
1031 }
1032 
1033 
1034 static cache_transaction*
1035 lookup_transaction(block_cache* cache, int32 id)
1036 {
1037 	return cache->transaction_hash->Lookup(id);
1038 }
1039 
1040 
1041 size_t TransactionHash::Hash(cache_transaction* transaction) const
1042 {
1043 	return transaction->id;
1044 }
1045 
1046 
1047 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
1048 {
1049 	return transaction->id == key;
1050 }
1051 
1052 
1053 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
1054 {
1055 	return value->next;
1056 }
1057 
1058 
1059 /*!	Writes back any changes made to blocks in \a transaction that are still
1060 	part of a previous transacton.
1061 */
1062 static status_t
1063 write_blocks_in_previous_transaction(block_cache* cache,
1064 	cache_transaction* transaction)
1065 {
1066 	BlockWriter writer(cache);
1067 
1068 	cached_block* block = transaction->first_block;
1069 	for (; block != NULL; block = block->transaction_next) {
1070 		if (block->previous_transaction != NULL) {
1071 			// need to write back pending changes
1072 			writer.Add(block);
1073 		}
1074 	}
1075 
1076 	return writer.Write();
1077 }
1078 
1079 
1080 //	#pragma mark - cached_block
1081 
1082 
1083 bool
1084 cached_block::CanBeWritten() const
1085 {
1086 	return !busy_writing && !busy_reading
1087 		&& (previous_transaction != NULL
1088 			|| (transaction == NULL && is_dirty && !is_writing));
1089 }
1090 
1091 
1092 //	#pragma mark - BlockWriter
1093 
1094 
1095 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1096 	:
1097 	fCache(cache),
1098 	fBlocks(fBuffer),
1099 	fCount(0),
1100 	fTotal(0),
1101 	fCapacity(kBufferSize),
1102 	fMax(max),
1103 	fStatus(B_OK),
1104 	fDeletedTransaction(false)
1105 {
1106 }
1107 
1108 
1109 BlockWriter::~BlockWriter()
1110 {
1111 	if (fBlocks != fBuffer)
1112 		free(fBlocks);
1113 }
1114 
1115 
1116 /*!	Adds the specified block to the to be written array. If no more blocks can
1117 	be added, false is returned, otherwise true.
1118 */
1119 bool
1120 BlockWriter::Add(cached_block* block, cache_transaction* transaction)
1121 {
1122 	ASSERT(block->CanBeWritten());
1123 
1124 	if (fTotal == fMax)
1125 		return false;
1126 
1127 	if (fCount >= fCapacity) {
1128 		// Enlarge array if necessary
1129 		cached_block** newBlocks;
1130 		size_t newCapacity = max_c(256, fCapacity * 2);
1131 		if (fBlocks == fBuffer)
1132 			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1133 		else {
1134 			newBlocks = (cached_block**)realloc(fBlocks,
1135 				newCapacity * sizeof(void*));
1136 		}
1137 
1138 		if (newBlocks == NULL) {
1139 			// Allocating a larger array failed - we need to write back what
1140 			// we have synchronously now (this will also clear the array)
1141 			Write(transaction, false);
1142 		} else {
1143 			if (fBlocks == fBuffer)
1144 				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1145 
1146 			fBlocks = newBlocks;
1147 			fCapacity = newCapacity;
1148 		}
1149 	}
1150 
1151 	fBlocks[fCount++] = block;
1152 	fTotal++;
1153 	block->busy_writing = true;
1154 	fCache->busy_writing_count++;
1155 	if (block->previous_transaction != NULL)
1156 		block->previous_transaction->busy_writing_count++;
1157 
1158 	return true;
1159 }
1160 
1161 
1162 /*!	Adds all blocks of the specified transaction to the to be written array.
1163 	If no more blocks can be added, false is returned, otherwise true.
1164 */
1165 bool
1166 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
1167 {
1168 	ASSERT(!transaction->open);
1169 
1170 	if (transaction->busy_writing_count != 0) {
1171 		hasLeftOvers = true;
1172 		return true;
1173 	}
1174 
1175 	hasLeftOvers = false;
1176 
1177 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1178 	while (cached_block* block = blockIterator.Next()) {
1179 		if (!block->CanBeWritten()) {
1180 			// This block was already part of a previous transaction within this
1181 			// writer
1182 			hasLeftOvers = true;
1183 			continue;
1184 		}
1185 		if (!Add(block, transaction))
1186 			return false;
1187 
1188 		if (DeletedTransaction())
1189 			break;
1190 	}
1191 
1192 	return true;
1193 }
1194 
1195 
1196 /*! Cache must be locked when calling this method, but it will be unlocked
1197 	while the blocks are written back.
1198 */
1199 status_t
1200 BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
1201 {
1202 	if (fCount == 0)
1203 		return B_OK;
1204 
1205 	if (canUnlock)
1206 		mutex_unlock(&fCache->lock);
1207 
1208 	// Sort blocks in their on-disk order
1209 	// TODO: ideally, this should be handled by the I/O scheduler
1210 
1211 	qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1212 	fDeletedTransaction = false;
1213 
1214 	bigtime_t start = system_time();
1215 
1216 	for (uint32 i = 0; i < fCount; i++) {
1217 		status_t status = _WriteBlock(fBlocks[i]);
1218 		if (status != B_OK) {
1219 			// propagate to global error handling
1220 			if (fStatus == B_OK)
1221 				fStatus = status;
1222 
1223 			_UnmarkWriting(fBlocks[i]);
1224 			fBlocks[i] = NULL;
1225 				// This block will not be marked clean
1226 		}
1227 	}
1228 
1229 	bigtime_t finish = system_time();
1230 
1231 	if (canUnlock)
1232 		mutex_lock(&fCache->lock);
1233 
1234 	if (fStatus == B_OK && fCount >= 8) {
1235 		fCache->last_block_write = finish;
1236 		fCache->last_block_write_duration = (fCache->last_block_write - start)
1237 			/ fCount;
1238 	}
1239 
1240 	for (uint32 i = 0; i < fCount; i++)
1241 		_BlockDone(fBlocks[i], transaction);
1242 
1243 	fCount = 0;
1244 	return fStatus;
1245 }
1246 
1247 
1248 /*!	Writes the specified \a block back to disk. It will always only write back
1249 	the oldest change of the block if it is part of more than one transaction.
1250 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1251 	delete transactions when they are no longer used, and \a deleteTransaction
1252 	is \c true.
1253 */
1254 /*static*/ status_t
1255 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1256 {
1257 	BlockWriter writer(cache);
1258 
1259 	writer.Add(block);
1260 	return writer.Write();
1261 }
1262 
1263 
1264 void*
1265 BlockWriter::_Data(cached_block* block) const
1266 {
1267 	return block->previous_transaction != NULL && block->original_data != NULL
1268 		? block->original_data : block->current_data;
1269 		// We first need to write back changes from previous transactions
1270 }
1271 
1272 
1273 status_t
1274 BlockWriter::_WriteBlock(cached_block* block)
1275 {
1276 	ASSERT(block->busy_writing);
1277 
1278 	TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
1279 	TB(Write(fCache, block));
1280 	TB2(BlockData(fCache, block, "before write"));
1281 
1282 	size_t blockSize = fCache->block_size;
1283 
1284 	ssize_t written = write_pos(fCache->fd,
1285 		block->block_number * blockSize, _Data(block), blockSize);
1286 
1287 	if (written != (ssize_t)blockSize) {
1288 		TB(Error(fCache, block->block_number, "write failed", written));
1289 		TRACE_ALWAYS("could not write back block %" B_PRIdOFF " (%s)\n",
1290 			block->block_number, strerror(errno));
1291 		if (written < 0)
1292 			return errno;
1293 
1294 		return B_IO_ERROR;
1295 	}
1296 
1297 	return B_OK;
1298 }
1299 
1300 
1301 void
1302 BlockWriter::_BlockDone(cached_block* block,
1303 	cache_transaction* transaction)
1304 {
1305 	if (block == NULL) {
1306 		// An error occured when trying to write this block
1307 		return;
1308 	}
1309 
1310 	if (fCache->num_dirty_blocks > 0)
1311 		fCache->num_dirty_blocks--;
1312 
1313 	if (_Data(block) == block->current_data)
1314 		block->is_dirty = false;
1315 
1316 	_UnmarkWriting(block);
1317 
1318 	cache_transaction* previous = block->previous_transaction;
1319 	if (previous != NULL) {
1320 		previous->blocks.Remove(block);
1321 		block->previous_transaction = NULL;
1322 
1323 		if (block->original_data != NULL && block->transaction == NULL) {
1324 			// This block is not part of a transaction, so it does not need
1325 			// its original pointer anymore.
1326 			fCache->Free(block->original_data);
1327 			block->original_data = NULL;
1328 		}
1329 
1330 		// Has the previous transaction been finished with that write?
1331 		if (--previous->num_blocks == 0) {
1332 			TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1333 			T(Action("written", fCache, previous));
1334 
1335 			notify_transaction_listeners(fCache, previous,
1336 				TRANSACTION_WRITTEN);
1337 
1338 			if (transaction != NULL) {
1339 				// This function is called while iterating transaction_hash. We
1340 				// use RemoveUnchecked so the iterator is still valid. A regular
1341 				// Remove can trigger a resize of the hash table which would
1342 				// result in the linked items in the table changing order.
1343 				fCache->transaction_hash->RemoveUnchecked(transaction);
1344 			} else
1345 				fCache->transaction_hash->Remove(previous);
1346 
1347 			delete_transaction(fCache, previous);
1348 			fDeletedTransaction = true;
1349 		}
1350 	}
1351 	if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1352 		// the block is no longer used
1353 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1354 		block->unused = true;
1355 		fCache->unused_blocks.Add(block);
1356 		fCache->unused_block_count++;
1357 	}
1358 
1359 	TB2(BlockData(fCache, block, "after write"));
1360 }
1361 
1362 
1363 void
1364 BlockWriter::_UnmarkWriting(cached_block* block)
1365 {
1366 	block->busy_writing = false;
1367 	if (block->previous_transaction != NULL)
1368 		block->previous_transaction->busy_writing_count--;
1369 	fCache->busy_writing_count--;
1370 
1371 	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1372 		|| block->busy_writing_waiters) {
1373 		fCache->busy_writing_waiters = false;
1374 		block->busy_writing_waiters = false;
1375 		fCache->busy_writing_condition.NotifyAll();
1376 	}
1377 }
1378 
1379 
1380 /*static*/ int
1381 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1382 {
1383 	cached_block* blockA = *(cached_block**)_blockA;
1384 	cached_block* blockB = *(cached_block**)_blockB;
1385 
1386 	off_t diff = blockA->block_number - blockB->block_number;
1387 	if (diff > 0)
1388 		return 1;
1389 
1390 	return diff < 0 ? -1 : 0;
1391 }
1392 
1393 
1394 //	#pragma mark - block_cache
1395 
1396 
1397 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1398 		bool readOnly)
1399 	:
1400 	hash(NULL),
1401 	fd(_fd),
1402 	max_blocks(numBlocks),
1403 	block_size(blockSize),
1404 	next_transaction_id(1),
1405 	last_transaction(NULL),
1406 	transaction_hash(NULL),
1407 	buffer_cache(NULL),
1408 	unused_block_count(0),
1409 	busy_reading_count(0),
1410 	busy_reading_waiters(false),
1411 	busy_writing_count(0),
1412 	busy_writing_waiters(0),
1413 	last_block_write(0),
1414 	last_block_write_duration(0),
1415 	num_dirty_blocks(0),
1416 	read_only(readOnly)
1417 {
1418 }
1419 
1420 
1421 /*! Should be called with the cache's lock held. */
1422 block_cache::~block_cache()
1423 {
1424 	unregister_low_resource_handler(&_LowMemoryHandler, this);
1425 
1426 	delete transaction_hash;
1427 	delete hash;
1428 
1429 	delete_object_cache(buffer_cache);
1430 
1431 	mutex_destroy(&lock);
1432 }
1433 
1434 
1435 status_t
1436 block_cache::Init()
1437 {
1438 	busy_reading_condition.Init(this, "cache block busy_reading");
1439 	busy_writing_condition.Init(this, "cache block busy writing");
1440 	condition_variable.Init(this, "cache transaction sync");
1441 	mutex_init(&lock, "block cache");
1442 
1443 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1444 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1445 	if (buffer_cache == NULL)
1446 		return B_NO_MEMORY;
1447 
1448 	hash = new BlockTable();
1449 	if (hash == NULL || hash->Init(1024) != B_OK)
1450 		return B_NO_MEMORY;
1451 
1452 	transaction_hash = new(std::nothrow) TransactionTable();
1453 	if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
1454 		return B_NO_MEMORY;
1455 
1456 	return register_low_resource_handler(&_LowMemoryHandler, this,
1457 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1458 			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1459 }
1460 
1461 
1462 void
1463 block_cache::Free(void* buffer)
1464 {
1465 	if (buffer != NULL)
1466 		object_cache_free(buffer_cache, buffer, 0);
1467 }
1468 
1469 
1470 void*
1471 block_cache::Allocate()
1472 {
1473 	void* block = object_cache_alloc(buffer_cache, 0);
1474 	if (block != NULL)
1475 		return block;
1476 
1477 	// recycle existing before allocating a new one
1478 	RemoveUnusedBlocks(100);
1479 
1480 	return object_cache_alloc(buffer_cache, 0);
1481 }
1482 
1483 
1484 void
1485 block_cache::FreeBlock(cached_block* block)
1486 {
1487 	Free(block->current_data);
1488 
1489 	if (block->original_data != NULL || block->parent_data != NULL) {
1490 		panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1491 			block->block_number, block->original_data, block->parent_data);
1492 	}
1493 
1494 #if BLOCK_CACHE_DEBUG_CHANGED
1495 	Free(block->compare);
1496 #endif
1497 
1498 	object_cache_free(sBlockCache, block, 0);
1499 }
1500 
1501 
1502 /*! Allocates a new block for \a blockNumber, ready for use */
1503 cached_block*
1504 block_cache::NewBlock(off_t blockNumber)
1505 {
1506 	cached_block* block = NULL;
1507 
1508 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1509 			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1510 		// recycle existing instead of allocating a new one
1511 		block = _GetUnusedBlock();
1512 	}
1513 	if (block == NULL) {
1514 		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1515 		if (block != NULL) {
1516 			block->current_data = Allocate();
1517 			if (block->current_data == NULL) {
1518 				object_cache_free(sBlockCache, block, 0);
1519 				return NULL;
1520 			}
1521 		} else {
1522 			TB(Error(this, blockNumber, "allocation failed"));
1523 			TRACE_ALWAYS("block allocation failed, unused list is %sempty.\n",
1524 				unused_blocks.IsEmpty() ? "" : "not ");
1525 
1526 			// allocation failed, try to reuse an unused block
1527 			block = _GetUnusedBlock();
1528 			if (block == NULL) {
1529 				TB(Error(this, blockNumber, "get unused failed"));
1530 				FATAL(("could not allocate block!\n"));
1531 				return NULL;
1532 			}
1533 		}
1534 	}
1535 
1536 	block->block_number = blockNumber;
1537 	block->ref_count = 0;
1538 	block->last_accessed = 0;
1539 	block->transaction_next = NULL;
1540 	block->transaction = block->previous_transaction = NULL;
1541 	block->original_data = NULL;
1542 	block->parent_data = NULL;
1543 	block->busy_reading = false;
1544 	block->busy_writing = false;
1545 	block->is_writing = false;
1546 	block->is_dirty = false;
1547 	block->unused = false;
1548 	block->discard = false;
1549 	block->busy_reading_waiters = false;
1550 	block->busy_writing_waiters = false;
1551 #if BLOCK_CACHE_DEBUG_CHANGED
1552 	block->compare = NULL;
1553 #endif
1554 
1555 	return block;
1556 }
1557 
1558 
1559 void
1560 block_cache::FreeBlockParentData(cached_block* block)
1561 {
1562 	ASSERT(block->parent_data != NULL);
1563 	if (block->parent_data != block->current_data)
1564 		Free(block->parent_data);
1565 	block->parent_data = NULL;
1566 }
1567 
1568 
1569 void
1570 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1571 {
1572 	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1573 
1574 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1575 			cached_block* block = iterator.Next();) {
1576 		if (minSecondsOld >= block->LastAccess()) {
1577 			// The list is sorted by last access
1578 			break;
1579 		}
1580 		if (block->busy_reading || block->busy_writing)
1581 			continue;
1582 
1583 		TB(Flush(this, block));
1584 		TRACE(("  remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1585 			block->block_number, block->last_accessed));
1586 
1587 		// this can only happen if no transactions are used
1588 		if (block->is_dirty && !block->discard) {
1589 			if (block->busy_writing)
1590 				continue;
1591 
1592 			BlockWriter::WriteBlock(this, block);
1593 		}
1594 
1595 		// remove block from lists
1596 		iterator.Remove();
1597 		unused_block_count--;
1598 		RemoveBlock(block);
1599 
1600 		if (--count <= 0)
1601 			break;
1602 	}
1603 }
1604 
1605 
1606 void
1607 block_cache::RemoveBlock(cached_block* block)
1608 {
1609 	hash->Remove(block);
1610 	FreeBlock(block);
1611 }
1612 
1613 
1614 /*!	Discards the block from a transaction (this method must not be called
1615 	for blocks not part of a transaction).
1616 */
1617 void
1618 block_cache::DiscardBlock(cached_block* block)
1619 {
1620 	ASSERT(block->discard);
1621 	ASSERT(block->previous_transaction == NULL);
1622 
1623 	if (block->parent_data != NULL)
1624 		FreeBlockParentData(block);
1625 
1626 	if (block->original_data != NULL) {
1627 		Free(block->original_data);
1628 		block->original_data = NULL;
1629 	}
1630 
1631 	RemoveBlock(block);
1632 }
1633 
1634 
1635 void
1636 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1637 {
1638 	TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1639 
1640 	// free some blocks according to the low memory state
1641 	// (if there is enough memory left, we don't free any)
1642 
1643 	block_cache* cache = (block_cache*)data;
1644 	if (cache->unused_block_count <= 1)
1645 		return;
1646 
1647 	int32 free = 0;
1648 	int32 secondsOld = 0;
1649 	switch (level) {
1650 		case B_NO_LOW_RESOURCE:
1651 			return;
1652 		case B_LOW_RESOURCE_NOTE:
1653 			free = cache->unused_block_count / 4;
1654 			secondsOld = 120;
1655 			break;
1656 		case B_LOW_RESOURCE_WARNING:
1657 			free = cache->unused_block_count / 2;
1658 			secondsOld = 10;
1659 			break;
1660 		case B_LOW_RESOURCE_CRITICAL:
1661 			free = cache->unused_block_count - 1;
1662 			secondsOld = 0;
1663 			break;
1664 	}
1665 
1666 	MutexLocker locker(&cache->lock);
1667 
1668 	if (!locker.IsLocked()) {
1669 		// If our block_cache were deleted, it could be that we had
1670 		// been called before that deletion went through, therefore,
1671 		// acquiring its lock might fail.
1672 		return;
1673 	}
1674 
1675 #ifdef TRACE_BLOCK_CACHE
1676 	uint32 oldUnused = cache->unused_block_count;
1677 #endif
1678 
1679 	cache->RemoveUnusedBlocks(free, secondsOld);
1680 
1681 	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1682 		cache, oldUnused, cache->unused_block_count));
1683 }
1684 
1685 
1686 cached_block*
1687 block_cache::_GetUnusedBlock()
1688 {
1689 	TRACE(("block_cache: get unused block\n"));
1690 
1691 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1692 			cached_block* block = iterator.Next();) {
1693 		TB(Flush(this, block, true));
1694 		// this can only happen if no transactions are used
1695 		if (block->is_dirty && !block->busy_writing && !block->discard)
1696 			BlockWriter::WriteBlock(this, block);
1697 
1698 		// remove block from lists
1699 		iterator.Remove();
1700 		unused_block_count--;
1701 		hash->Remove(block);
1702 
1703 		ASSERT(block->original_data == NULL && block->parent_data == NULL);
1704 		block->unused = false;
1705 
1706 		// TODO: see if compare data is handled correctly here!
1707 #if BLOCK_CACHE_DEBUG_CHANGED
1708 		if (block->compare != NULL)
1709 			Free(block->compare);
1710 #endif
1711 		return block;
1712 	}
1713 
1714 	return NULL;
1715 }
1716 
1717 
1718 //	#pragma mark - private block functions
1719 
1720 
1721 /*!	Cache must be locked.
1722 */
1723 static void
1724 mark_block_busy_reading(block_cache* cache, cached_block* block)
1725 {
1726 	block->busy_reading = true;
1727 	cache->busy_reading_count++;
1728 }
1729 
1730 
1731 /*!	Cache must be locked.
1732 */
1733 static void
1734 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1735 {
1736 	block->busy_reading = false;
1737 	cache->busy_reading_count--;
1738 
1739 	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1740 		|| block->busy_reading_waiters) {
1741 		cache->busy_reading_waiters = false;
1742 		block->busy_reading_waiters = false;
1743 		cache->busy_reading_condition.NotifyAll();
1744 	}
1745 }
1746 
1747 
1748 /*!	Cache must be locked.
1749 */
1750 static void
1751 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1752 {
1753 	while (block->busy_reading) {
1754 		// wait for at least the specified block to be read in
1755 		ConditionVariableEntry entry;
1756 		cache->busy_reading_condition.Add(&entry);
1757 		block->busy_reading_waiters = true;
1758 
1759 		mutex_unlock(&cache->lock);
1760 
1761 		entry.Wait();
1762 
1763 		mutex_lock(&cache->lock);
1764 	}
1765 }
1766 
1767 
1768 /*!	Cache must be locked.
1769 */
1770 static void
1771 wait_for_busy_reading_blocks(block_cache* cache)
1772 {
1773 	while (cache->busy_reading_count != 0) {
1774 		// wait for all blocks to be read in
1775 		ConditionVariableEntry entry;
1776 		cache->busy_reading_condition.Add(&entry);
1777 		cache->busy_reading_waiters = true;
1778 
1779 		mutex_unlock(&cache->lock);
1780 
1781 		entry.Wait();
1782 
1783 		mutex_lock(&cache->lock);
1784 	}
1785 }
1786 
1787 
1788 /*!	Cache must be locked.
1789 */
1790 static void
1791 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1792 {
1793 	while (block->busy_writing) {
1794 		// wait for all blocks to be written back
1795 		ConditionVariableEntry entry;
1796 		cache->busy_writing_condition.Add(&entry);
1797 		block->busy_writing_waiters = true;
1798 
1799 		mutex_unlock(&cache->lock);
1800 
1801 		entry.Wait();
1802 
1803 		mutex_lock(&cache->lock);
1804 	}
1805 }
1806 
1807 
1808 /*!	Cache must be locked.
1809 */
1810 static void
1811 wait_for_busy_writing_blocks(block_cache* cache)
1812 {
1813 	while (cache->busy_writing_count != 0) {
1814 		// wait for all blocks to be written back
1815 		ConditionVariableEntry entry;
1816 		cache->busy_writing_condition.Add(&entry);
1817 		cache->busy_writing_waiters = true;
1818 
1819 		mutex_unlock(&cache->lock);
1820 
1821 		entry.Wait();
1822 
1823 		mutex_lock(&cache->lock);
1824 	}
1825 }
1826 
1827 
1828 /*!	Removes a reference from the specified \a block. If this was the last
1829 	reference, the block is moved into the unused list.
1830 	In low memory situations, it will also free some blocks from that list,
1831 	but not necessarily the \a block it just released.
1832 */
1833 static void
1834 put_cached_block(block_cache* cache, cached_block* block)
1835 {
1836 #if BLOCK_CACHE_DEBUG_CHANGED
1837 	if (!block->is_dirty && block->compare != NULL
1838 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1839 		TRACE_ALWAYS("new block:\n");
1840 		dump_block((const char*)block->current_data, 256, "  ");
1841 		TRACE_ALWAYS("unchanged block:\n");
1842 		dump_block((const char*)block->compare, 256, "  ");
1843 		BlockWriter::WriteBlock(cache, block);
1844 		panic("block_cache: supposed to be clean block was changed!\n");
1845 
1846 		cache->Free(block->compare);
1847 		block->compare = NULL;
1848 	}
1849 #endif
1850 	TB(Put(cache, block));
1851 
1852 	if (block->ref_count < 1) {
1853 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1854 		return;
1855 	}
1856 
1857 	if (--block->ref_count == 0
1858 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1859 		// This block is not used anymore, and not part of any transaction
1860 		block->is_writing = false;
1861 
1862 		if (block->discard) {
1863 			cache->RemoveBlock(block);
1864 		} else {
1865 			// put this block in the list of unused blocks
1866 			ASSERT(!block->unused);
1867 			block->unused = true;
1868 
1869 			ASSERT(block->original_data == NULL && block->parent_data == NULL);
1870 			cache->unused_blocks.Add(block);
1871 			cache->unused_block_count++;
1872 		}
1873 	}
1874 }
1875 
1876 
1877 static void
1878 put_cached_block(block_cache* cache, off_t blockNumber)
1879 {
1880 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1881 		panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1882 			blockNumber, cache->max_blocks - 1);
1883 	}
1884 
1885 	cached_block* block = cache->hash->Lookup(blockNumber);
1886 	if (block != NULL)
1887 		put_cached_block(cache, block);
1888 	else {
1889 		TB(Error(cache, blockNumber, "put unknown"));
1890 	}
1891 }
1892 
1893 
1894 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1895 	there, or reads it from the disk.
1896 	You need to have the cache locked when calling this function.
1897 
1898 	\param _allocated tells you whether or not a new block has been allocated
1899 		to satisfy your request.
1900 	\param readBlock if \c false, the block will not be read in case it was
1901 		not already in the cache. The block you retrieve may contain random
1902 		data. If \c true, the cache will be temporarily unlocked while the
1903 		block is read in.
1904 */
1905 static status_t
1906 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1907 	bool readBlock, cached_block** _block)
1908 {
1909 	ASSERT_LOCKED_MUTEX(&cache->lock);
1910 
1911 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1912 		panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1913 			blockNumber, cache->max_blocks - 1);
1914 		return B_BAD_VALUE;
1915 	}
1916 
1917 retry:
1918 	cached_block* block = cache->hash->Lookup(blockNumber);
1919 	*_allocated = false;
1920 
1921 	if (block == NULL) {
1922 		// put block into cache
1923 		block = cache->NewBlock(blockNumber);
1924 		if (block == NULL)
1925 			return B_NO_MEMORY;
1926 
1927 		cache->hash->Insert(block);
1928 		*_allocated = true;
1929 	} else if (block->busy_reading) {
1930 		// The block is currently busy_reading - wait and try again later
1931 		wait_for_busy_reading_block(cache, block);
1932 		goto retry;
1933 	}
1934 
1935 	if (block->unused) {
1936 		//TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
1937 		block->unused = false;
1938 		cache->unused_blocks.Remove(block);
1939 		cache->unused_block_count--;
1940 	}
1941 
1942 	if (*_allocated && readBlock) {
1943 		// read block into cache
1944 		int32 blockSize = cache->block_size;
1945 
1946 		mark_block_busy_reading(cache, block);
1947 		mutex_unlock(&cache->lock);
1948 
1949 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1950 			block->current_data, blockSize);
1951 
1952 		mutex_lock(&cache->lock);
1953 		if (bytesRead < blockSize) {
1954 			cache->RemoveBlock(block);
1955 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1956 
1957 			TRACE_ALWAYS("could not read block %" B_PRIdOFF ": bytesRead: %zd,"
1958 				" error: %s\n", blockNumber, bytesRead, strerror(errno));
1959 			return errno;
1960 		}
1961 		TB(Read(cache, block));
1962 
1963 		mark_block_unbusy_reading(cache, block);
1964 	}
1965 
1966 	block->ref_count++;
1967 	block->last_accessed = system_time() / 1000000L;
1968 
1969 	*_block = block;
1970 	return B_OK;
1971 }
1972 
1973 
1974 /*!	Returns the writable block data for the requested blockNumber.
1975 	If \a cleared is true, the block is not read from disk; an empty block
1976 	is returned.
1977 
1978 	This is the only method to insert a block into a transaction. It makes
1979 	sure that the previous block contents are preserved in that case.
1980 */
1981 static status_t
1982 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1983 	off_t length, int32 transactionID, bool cleared, void** _block)
1984 {
1985 	TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
1986 		blockNumber, transactionID));
1987 
1988 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1989 		panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1990 			blockNumber, cache->max_blocks - 1);
1991 		return B_BAD_VALUE;
1992 	}
1993 
1994 	bool allocated;
1995 	cached_block* block;
1996 	status_t status = get_cached_block(cache, blockNumber, &allocated,
1997 		!cleared, &block);
1998 	if (status != B_OK)
1999 		return status;
2000 
2001 	if (block->busy_writing)
2002 		wait_for_busy_writing_block(cache, block);
2003 
2004 	block->discard = false;
2005 
2006 	// if there is no transaction support, we just return the current block
2007 	if (transactionID == -1) {
2008 		if (cleared) {
2009 			mark_block_busy_reading(cache, block);
2010 			mutex_unlock(&cache->lock);
2011 
2012 			memset(block->current_data, 0, cache->block_size);
2013 
2014 			mutex_lock(&cache->lock);
2015 			mark_block_unbusy_reading(cache, block);
2016 		}
2017 
2018 		block->is_writing = true;
2019 
2020 		if (!block->is_dirty) {
2021 			cache->num_dirty_blocks++;
2022 			block->is_dirty = true;
2023 				// mark the block as dirty
2024 		}
2025 
2026 		TB(Get(cache, block));
2027 		*_block = block->current_data;
2028 		return B_OK;
2029 	}
2030 
2031 	cache_transaction* transaction = block->transaction;
2032 
2033 	if (transaction != NULL && transaction->id != transactionID) {
2034 		// TODO: we have to wait here until the other transaction is done.
2035 		//	Maybe we should even panic, since we can't prevent any deadlocks.
2036 		panic("get_writable_cached_block(): asked to get busy writable block "
2037 			"(transaction %" B_PRId32 ")\n", block->transaction->id);
2038 		put_cached_block(cache, block);
2039 		return B_BAD_VALUE;
2040 	}
2041 	if (transaction == NULL && transactionID != -1) {
2042 		// get new transaction
2043 		transaction = lookup_transaction(cache, transactionID);
2044 		if (transaction == NULL) {
2045 			panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
2046 				transactionID);
2047 			put_cached_block(cache, block);
2048 			return B_BAD_VALUE;
2049 		}
2050 		if (!transaction->open) {
2051 			panic("get_writable_cached_block(): transaction already done!\n");
2052 			put_cached_block(cache, block);
2053 			return B_BAD_VALUE;
2054 		}
2055 
2056 		block->transaction = transaction;
2057 
2058 		// attach the block to the transaction block list
2059 		block->transaction_next = transaction->first_block;
2060 		transaction->first_block = block;
2061 		transaction->num_blocks++;
2062 	}
2063 	if (transaction != NULL)
2064 		transaction->last_used = system_time();
2065 
2066 	bool wasUnchanged = block->original_data == NULL
2067 		|| block->previous_transaction != NULL;
2068 
2069 	if (!(allocated && cleared) && block->original_data == NULL) {
2070 		// we already have data, so we need to preserve it
2071 		block->original_data = cache->Allocate();
2072 		if (block->original_data == NULL) {
2073 			TB(Error(cache, blockNumber, "allocate original failed"));
2074 			FATAL(("could not allocate original_data\n"));
2075 			put_cached_block(cache, block);
2076 			return B_NO_MEMORY;
2077 		}
2078 
2079 		mark_block_busy_reading(cache, block);
2080 		mutex_unlock(&cache->lock);
2081 
2082 		memcpy(block->original_data, block->current_data, cache->block_size);
2083 
2084 		mutex_lock(&cache->lock);
2085 		mark_block_unbusy_reading(cache, block);
2086 	}
2087 	if (block->parent_data == block->current_data) {
2088 		// remember any previous contents for the parent transaction
2089 		block->parent_data = cache->Allocate();
2090 		if (block->parent_data == NULL) {
2091 			// TODO: maybe we should just continue the current transaction in
2092 			// this case...
2093 			TB(Error(cache, blockNumber, "allocate parent failed"));
2094 			FATAL(("could not allocate parent\n"));
2095 			put_cached_block(cache, block);
2096 			return B_NO_MEMORY;
2097 		}
2098 
2099 		mark_block_busy_reading(cache, block);
2100 		mutex_unlock(&cache->lock);
2101 
2102 		memcpy(block->parent_data, block->current_data, cache->block_size);
2103 
2104 		mutex_lock(&cache->lock);
2105 		mark_block_unbusy_reading(cache, block);
2106 
2107 		transaction->sub_num_blocks++;
2108 	} else if (transaction != NULL && transaction->has_sub_transaction
2109 		&& block->parent_data == NULL && wasUnchanged)
2110 		transaction->sub_num_blocks++;
2111 
2112 	if (cleared) {
2113 		mark_block_busy_reading(cache, block);
2114 		mutex_unlock(&cache->lock);
2115 
2116 		memset(block->current_data, 0, cache->block_size);
2117 
2118 		mutex_lock(&cache->lock);
2119 		mark_block_unbusy_reading(cache, block);
2120 	}
2121 
2122 	block->is_dirty = true;
2123 	TB(Get(cache, block));
2124 	TB2(BlockData(cache, block, "get writable"));
2125 
2126 	*_block = block->current_data;
2127 	return B_OK;
2128 }
2129 
2130 
2131 #if DEBUG_BLOCK_CACHE
2132 
2133 
2134 static void
2135 dump_block(cached_block* block)
2136 {
2137 	kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2138 		" %c%c%c%c%c%c %08lx %08lx\n",
2139 		(addr_t)block, block->block_number,
2140 		(addr_t)block->current_data, (addr_t)block->original_data,
2141 		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2142 		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2143 		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2144 		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2145 		(addr_t)block->transaction,
2146 		(addr_t)block->previous_transaction);
2147 }
2148 
2149 
2150 static void
2151 dump_block_long(cached_block* block)
2152 {
2153 	kprintf("BLOCK %p\n", block);
2154 	kprintf(" current data:  %p\n", block->current_data);
2155 	kprintf(" original data: %p\n", block->original_data);
2156 	kprintf(" parent data:   %p\n", block->parent_data);
2157 #if BLOCK_CACHE_DEBUG_CHANGED
2158 	kprintf(" compare data:  %p\n", block->compare);
2159 #endif
2160 	kprintf(" ref_count:     %" B_PRId32 "\n", block->ref_count);
2161 	kprintf(" accessed:      %" B_PRId32 "\n", block->LastAccess());
2162 	kprintf(" flags:        ");
2163 	if (block->busy_reading)
2164 		kprintf(" busy_reading");
2165 	if (block->busy_writing)
2166 		kprintf(" busy_writing");
2167 	if (block->is_writing)
2168 		kprintf(" is-writing");
2169 	if (block->is_dirty)
2170 		kprintf(" is-dirty");
2171 	if (block->unused)
2172 		kprintf(" unused");
2173 	if (block->discard)
2174 		kprintf(" discard");
2175 	kprintf("\n");
2176 	if (block->transaction != NULL) {
2177 		kprintf(" transaction:   %p (%" B_PRId32 ")\n", block->transaction,
2178 			block->transaction->id);
2179 		if (block->transaction_next != NULL) {
2180 			kprintf(" next in transaction: %" B_PRIdOFF "\n",
2181 				block->transaction_next->block_number);
2182 		}
2183 	}
2184 	if (block->previous_transaction != NULL) {
2185 		kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2186 			block->previous_transaction,
2187 			block->previous_transaction->id);
2188 	}
2189 
2190 	set_debug_variable("_current", (addr_t)block->current_data);
2191 	set_debug_variable("_original", (addr_t)block->original_data);
2192 	set_debug_variable("_parent", (addr_t)block->parent_data);
2193 }
2194 
2195 
2196 static int
2197 dump_cached_block(int argc, char** argv)
2198 {
2199 	if (argc != 2) {
2200 		kprintf("usage: %s <block-address>\n", argv[0]);
2201 		return 0;
2202 	}
2203 
2204 	dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2205 	return 0;
2206 }
2207 
2208 
2209 static int
2210 dump_cache(int argc, char** argv)
2211 {
2212 	bool showTransactions = false;
2213 	bool showBlocks = false;
2214 	int32 i = 1;
2215 	while (argv[i] != NULL && argv[i][0] == '-') {
2216 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
2217 			switch (arg[0]) {
2218 				case 'b':
2219 					showBlocks = true;
2220 					break;
2221 				case 't':
2222 					showTransactions = true;
2223 					break;
2224 				default:
2225 					print_debugger_command_usage(argv[0]);
2226 					return 0;
2227 			}
2228 		}
2229 		i++;
2230 	}
2231 
2232 	if (i >= argc) {
2233 		print_debugger_command_usage(argv[0]);
2234 		return 0;
2235 	}
2236 
2237 	block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2238 	if (cache == NULL) {
2239 		kprintf("invalid cache address\n");
2240 		return 0;
2241 	}
2242 
2243 	off_t blockNumber = -1;
2244 	if (i + 1 < argc) {
2245 		blockNumber = parse_expression(argv[i + 1]);
2246 		cached_block* block = cache->hash->Lookup(blockNumber);
2247 		if (block != NULL)
2248 			dump_block_long(block);
2249 		else
2250 			kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2251 		return 0;
2252 	}
2253 
2254 	kprintf("BLOCK CACHE: %p\n", cache);
2255 
2256 	kprintf(" fd:           %d\n", cache->fd);
2257 	kprintf(" max_blocks:   %" B_PRIdOFF "\n", cache->max_blocks);
2258 	kprintf(" block_size:   %zu\n", cache->block_size);
2259 	kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2260 	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2261 	kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2262 		cache->busy_reading_waiters ? "has" : "no");
2263 	kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2264 		cache->busy_writing_waiters ? "has" : "no");
2265 
2266 	if (!cache->pending_notifications.IsEmpty()) {
2267 		kprintf(" pending notifications:\n");
2268 
2269 		NotificationList::Iterator iterator
2270 			= cache->pending_notifications.GetIterator();
2271 		while (iterator.HasNext()) {
2272 			cache_notification* notification = iterator.Next();
2273 
2274 			kprintf("  %p %5" B_PRIx32 " %p - %p\n", notification,
2275 				notification->events_pending, notification->hook,
2276 				notification->data);
2277 		}
2278 	}
2279 
2280 	if (showTransactions) {
2281 		kprintf(" transactions:\n");
2282 		kprintf("address       id state  blocks  main   sub\n");
2283 
2284 		TransactionTable::Iterator iterator(cache->transaction_hash);
2285 
2286 		while (iterator.HasNext()) {
2287 			cache_transaction* transaction = iterator.Next();
2288 			kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
2289 				B_PRId32 "\n", transaction, transaction->id,
2290 				transaction->open ? "open" : "closed",
2291 				transaction->num_blocks, transaction->main_num_blocks,
2292 				transaction->sub_num_blocks);
2293 		}
2294 	}
2295 
2296 	if (showBlocks) {
2297 		kprintf(" blocks:\n");
2298 		kprintf("address  block no. current  original parent    refs access "
2299 			"flags transact prev. trans\n");
2300 	}
2301 
2302 	uint32 referenced = 0;
2303 	uint32 count = 0;
2304 	uint32 dirty = 0;
2305 	uint32 discarded = 0;
2306 	BlockTable::Iterator iterator(cache->hash);
2307 	while (iterator.HasNext()) {
2308 		cached_block* block = iterator.Next();
2309 		if (showBlocks)
2310 			dump_block(block);
2311 
2312 		if (block->is_dirty)
2313 			dirty++;
2314 		if (block->discard)
2315 			discarded++;
2316 		if (block->ref_count)
2317 			referenced++;
2318 		count++;
2319 	}
2320 
2321 	kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
2322 		" discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
2323 		" in unused.\n",
2324 		count, dirty, discarded, referenced, cache->busy_reading_count,
2325 		cache->unused_block_count);
2326 	return 0;
2327 }
2328 
2329 
2330 static int
2331 dump_transaction(int argc, char** argv)
2332 {
2333 	bool showBlocks = false;
2334 	int i = 1;
2335 	if (argc > 1 && !strcmp(argv[1], "-b")) {
2336 		showBlocks = true;
2337 		i++;
2338 	}
2339 
2340 	if (argc - i < 1 || argc - i > 2) {
2341 		print_debugger_command_usage(argv[0]);
2342 		return 0;
2343 	}
2344 
2345 	cache_transaction* transaction = NULL;
2346 
2347 	if (argc - i == 1) {
2348 		transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2349 	} else {
2350 		block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2351 		int32 id = parse_expression(argv[i + 1]);
2352 		transaction = lookup_transaction(cache, id);
2353 		if (transaction == NULL) {
2354 			kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2355 			return 0;
2356 		}
2357 	}
2358 
2359 	kprintf("TRANSACTION %p\n", transaction);
2360 
2361 	kprintf(" id:             %" B_PRId32 "\n", transaction->id);
2362 	kprintf(" num block:      %" B_PRId32 "\n", transaction->num_blocks);
2363 	kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2364 	kprintf(" sub num block:  %" B_PRId32 "\n", transaction->sub_num_blocks);
2365 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
2366 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
2367 	kprintf(" idle:           %" B_PRId64 " secs\n",
2368 		(system_time() - transaction->last_used) / 1000000);
2369 
2370 	kprintf(" listeners:\n");
2371 
2372 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2373 	while (iterator.HasNext()) {
2374 		cache_listener* listener = iterator.Next();
2375 
2376 		kprintf("  %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2377 			listener->hook, listener->data);
2378 	}
2379 
2380 	if (!showBlocks)
2381 		return 0;
2382 
2383 	kprintf(" blocks:\n");
2384 	kprintf("address  block no. current  original parent    refs access "
2385 		"flags transact prev. trans\n");
2386 
2387 	cached_block* block = transaction->first_block;
2388 	while (block != NULL) {
2389 		dump_block(block);
2390 		block = block->transaction_next;
2391 	}
2392 
2393 	kprintf("--\n");
2394 
2395 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2396 	while (blockIterator.HasNext()) {
2397 		block = blockIterator.Next();
2398 		dump_block(block);
2399 	}
2400 
2401 	return 0;
2402 }
2403 
2404 
2405 static int
2406 dump_caches(int argc, char** argv)
2407 {
2408 	kprintf("Block caches:\n");
2409 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2410 	while (i.HasNext()) {
2411 		block_cache* cache = i.Next();
2412 		if (cache == (block_cache*)&sMarkCache)
2413 			continue;
2414 
2415 		kprintf("  %p\n", cache);
2416 	}
2417 
2418 	return 0;
2419 }
2420 
2421 
2422 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2423 static int
2424 dump_block_data(int argc, char** argv)
2425 {
2426 	using namespace BlockTracing;
2427 
2428 	// Determine which blocks to show
2429 
2430 	bool printStackTrace = true;
2431 	uint32 which = 0;
2432 	int32 i = 1;
2433 	while (i < argc && argv[i][0] == '-') {
2434 		char* arg = &argv[i][1];
2435 		while (arg[0]) {
2436 			switch (arg[0]) {
2437 				case 'c':
2438 					which |= BlockData::kCurrent;
2439 					break;
2440 				case 'p':
2441 					which |= BlockData::kParent;
2442 					break;
2443 				case 'o':
2444 					which |= BlockData::kOriginal;
2445 					break;
2446 
2447 				default:
2448 					kprintf("invalid block specifier (only o/c/p are "
2449 						"allowed).\n");
2450 					return 0;
2451 			}
2452 			arg++;
2453 		}
2454 
2455 		i++;
2456 	}
2457 	if (which == 0)
2458 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2459 
2460 	if (i == argc) {
2461 		print_debugger_command_usage(argv[0]);
2462 		return 0;
2463 	}
2464 
2465 	// Get the range of blocks to print
2466 
2467 	int64 from = parse_expression(argv[i]);
2468 	int64 to = from;
2469 	if (argc > i + 1)
2470 		to = parse_expression(argv[i + 1]);
2471 	if (to < from)
2472 		to = from;
2473 
2474 	uint32 offset = 0;
2475 	uint32 size = LONG_MAX;
2476 	if (argc > i + 2)
2477 		offset = parse_expression(argv[i + 2]);
2478 	if (argc > i + 3)
2479 		size = parse_expression(argv[i + 3]);
2480 
2481 	TraceEntryIterator iterator;
2482 	iterator.MoveTo(from - 1);
2483 
2484 	static char sBuffer[1024];
2485 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2486 
2487 	while (TraceEntry* entry = iterator.Next()) {
2488 		int32 index = iterator.Index();
2489 		if (index > to)
2490 			break;
2491 
2492 		Action* action = dynamic_cast<Action*>(entry);
2493 		if (action != NULL) {
2494 			out.Clear();
2495 			out.DumpEntry(action);
2496 			continue;
2497 		}
2498 
2499 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
2500 		if (blockData == NULL)
2501 			continue;
2502 
2503 		out.Clear();
2504 
2505 		const char* dump = out.DumpEntry(entry);
2506 		int length = strlen(dump);
2507 		if (length > 0 && dump[length - 1] == '\n')
2508 			length--;
2509 
2510 		kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2511 
2512 		if (printStackTrace) {
2513 			out.Clear();
2514 			entry->DumpStackTrace(out);
2515 			if (out.Size() > 0)
2516 				kputs(out.Buffer());
2517 		}
2518 
2519 		blockData->DumpBlocks(which, offset, size);
2520 	}
2521 
2522 	return 0;
2523 }
2524 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
2525 
2526 
2527 #endif	// DEBUG_BLOCK_CACHE
2528 
2529 
2530 /*!	Traverses through the block_cache list, and returns one cache after the
2531 	other. The cache returned is automatically locked when you get it, and
2532 	unlocked with the next call to this function. Ignores caches that are in
2533 	deletion state.
2534 	Returns \c NULL when the end of the list is reached.
2535 */
2536 static block_cache*
2537 get_next_locked_block_cache(block_cache* last)
2538 {
2539 	MutexLocker _(sCachesLock);
2540 
2541 	block_cache* cache;
2542 	if (last != NULL) {
2543 		mutex_unlock(&last->lock);
2544 
2545 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
2546 		sCaches.Remove((block_cache*)&sMarkCache);
2547 	} else
2548 		cache = sCaches.Head();
2549 
2550 	if (cache != NULL) {
2551 		mutex_lock(&cache->lock);
2552 		sCaches.InsertBefore(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2553 	}
2554 
2555 	return cache;
2556 }
2557 
2558 
2559 /*!	Background thread that continuously checks for pending notifications of
2560 	all caches.
2561 	Every two seconds, it will also write back up to 64 blocks per cache.
2562 */
2563 static status_t
2564 block_notifier_and_writer(void* /*data*/)
2565 {
2566 	const bigtime_t kDefaultTimeout = 2000000LL;
2567 	bigtime_t timeout = kDefaultTimeout;
2568 
2569 	while (true) {
2570 		bigtime_t start = system_time();
2571 
2572 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
2573 			B_RELATIVE_TIMEOUT, timeout);
2574 		if (status == B_OK) {
2575 			flush_pending_notifications();
2576 			timeout -= system_time() - start;
2577 			continue;
2578 		}
2579 
2580 		// Write 64 blocks of each block_cache roughly every 2 seconds,
2581 		// potentially more or less depending on congestion and drive speeds
2582 		// (usually much less.) We do not want to queue everything at once
2583 		// because a future transaction might then get held up waiting for
2584 		// a specific block to be written.
2585 		timeout = kDefaultTimeout;
2586 		size_t usedMemory;
2587 		object_cache_get_usage(sBlockCache, &usedMemory);
2588 
2589 		block_cache* cache = NULL;
2590 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2591 			// Give some breathing room: wait 2x the length of the potential
2592 			// maximum block count-sized write between writes, and also skip
2593 			// if there are more than 16 blocks currently being written.
2594 			const bigtime_t next = cache->last_block_write
2595 					+ cache->last_block_write_duration * 2 * 64;
2596 			if (cache->busy_writing_count > 16 || system_time() < next) {
2597 				if (cache->last_block_write_duration > 0) {
2598 					timeout = min_c(timeout,
2599 						cache->last_block_write_duration * 2 * 64);
2600 				}
2601 				continue;
2602 			}
2603 
2604 			BlockWriter writer(cache, 64);
2605 			bool hasMoreBlocks = false;
2606 
2607 			size_t cacheUsedMemory;
2608 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2609 			usedMemory += cacheUsedMemory;
2610 
2611 			if (cache->num_dirty_blocks) {
2612 				// This cache is not using transactions, we'll scan the blocks
2613 				// directly
2614 				BlockTable::Iterator iterator(cache->hash);
2615 
2616 				while (iterator.HasNext()) {
2617 					cached_block* block = iterator.Next();
2618 					if (block->CanBeWritten() && !writer.Add(block)) {
2619 						hasMoreBlocks = true;
2620 						break;
2621 					}
2622 				}
2623 			} else {
2624 				TransactionTable::Iterator iterator(cache->transaction_hash);
2625 
2626 				while (iterator.HasNext()) {
2627 					cache_transaction* transaction = iterator.Next();
2628 					if (transaction->open) {
2629 						if (system_time() > transaction->last_used
2630 								+ kTransactionIdleTime) {
2631 							// Transaction is open but idle
2632 							notify_transaction_listeners(cache, transaction,
2633 								TRANSACTION_IDLE);
2634 						}
2635 						continue;
2636 					}
2637 
2638 					bool hasLeftOvers;
2639 						// we ignore this one
2640 					if (!writer.Add(transaction, hasLeftOvers)) {
2641 						hasMoreBlocks = true;
2642 						break;
2643 					}
2644 				}
2645 			}
2646 
2647 			writer.Write();
2648 
2649 			if (hasMoreBlocks && cache->last_block_write_duration > 0) {
2650 				// There are probably still more blocks that we could write, so
2651 				// see if we can decrease the timeout.
2652 				timeout = min_c(timeout,
2653 					cache->last_block_write_duration * 2 * 64);
2654 			}
2655 
2656 			if ((block_cache_used_memory() / B_PAGE_SIZE)
2657 					> vm_page_num_pages() / 2) {
2658 				// Try to reduce memory usage to half of the available
2659 				// RAM at maximum
2660 				cache->RemoveUnusedBlocks(1000, 10);
2661 			}
2662 		}
2663 
2664 		MutexLocker _(sCachesMemoryUseLock);
2665 		sUsedMemory = usedMemory;
2666 	}
2667 
2668 	// never can get here
2669 	return B_OK;
2670 }
2671 
2672 
2673 /*!	Notify function for wait_for_notifications(). */
2674 static void
2675 notify_sync(int32 transactionID, int32 event, void* _cache)
2676 {
2677 	block_cache* cache = (block_cache*)_cache;
2678 
2679 	cache->condition_variable.NotifyOne();
2680 }
2681 
2682 
2683 /*!	Must be called with the sCachesLock held. */
2684 static bool
2685 is_valid_cache(block_cache* cache)
2686 {
2687 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2688 
2689 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2690 	while (iterator.HasNext()) {
2691 		if (cache == iterator.Next())
2692 			return true;
2693 	}
2694 
2695 	return false;
2696 }
2697 
2698 
2699 /*!	Waits until all pending notifications are carried out.
2700 	Safe to be called from the block writer/notifier thread.
2701 	You must not hold the \a cache lock when calling this function.
2702 */
2703 static void
2704 wait_for_notifications(block_cache* cache)
2705 {
2706 	MutexLocker locker(sCachesLock);
2707 
2708 	if (find_thread(NULL) == sNotifierWriterThread) {
2709 		// We're the notifier thread, don't wait, but flush all pending
2710 		// notifications directly.
2711 		if (is_valid_cache(cache))
2712 			flush_pending_notifications(cache);
2713 		return;
2714 	}
2715 
2716 	// add sync notification
2717 	cache_notification notification;
2718 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2719 		cache);
2720 
2721 	ConditionVariableEntry entry;
2722 	cache->condition_variable.Add(&entry);
2723 
2724 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2725 	locker.Unlock();
2726 
2727 	// wait for notification hook to be called
2728 	entry.Wait();
2729 }
2730 
2731 
2732 status_t
2733 block_cache_init(void)
2734 {
2735 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2736 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2737 	if (sBlockCache == NULL)
2738 		return B_NO_MEMORY;
2739 
2740 	sCacheNotificationCache = create_object_cache("cache notifications",
2741 		sizeof(cache_listener), 8, NULL, NULL, NULL);
2742 	if (sCacheNotificationCache == NULL)
2743 		return B_NO_MEMORY;
2744 
2745 	new (&sCaches) DoublyLinkedList<block_cache>;
2746 		// manually call constructor
2747 
2748 	sEventSemaphore = create_sem(0, "block cache event");
2749 	if (sEventSemaphore < B_OK)
2750 		return sEventSemaphore;
2751 
2752 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2753 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2754 	if (sNotifierWriterThread >= B_OK)
2755 		resume_thread(sNotifierWriterThread);
2756 
2757 #if DEBUG_BLOCK_CACHE
2758 	add_debugger_command_etc("block_caches", &dump_caches,
2759 		"dumps all block caches", "\n", 0);
2760 	add_debugger_command_etc("block_cache", &dump_cache,
2761 		"dumps a specific block cache",
2762 		"[-bt] <cache-address> [block-number]\n"
2763 		"  -t lists the transactions\n"
2764 		"  -b lists all blocks\n", 0);
2765 	add_debugger_command("cached_block", &dump_cached_block,
2766 		"dumps the specified cached block");
2767 	add_debugger_command_etc("transaction", &dump_transaction,
2768 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2769 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2770 		"  -b lists all blocks that are part of this transaction\n", 0);
2771 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2772 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2773 		"dumps the data blocks logged for the actions",
2774 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2775 		"If no data specifier is used, all blocks are shown by default.\n"
2776 		" -c       the current data is shown, if available.\n"
2777 		" -p       the parent data is shown, if available.\n"
2778 		" -o       the original data is shown, if available.\n"
2779 		" <from>   first index of tracing entries to show.\n"
2780 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2781 		" <offset> the offset of the block data.\n"
2782 		" <from>   the size of the block data that is dumped\n", 0);
2783 #	endif
2784 #endif	// DEBUG_BLOCK_CACHE
2785 
2786 	return B_OK;
2787 }
2788 
2789 
2790 size_t
2791 block_cache_used_memory(void)
2792 {
2793 	MutexLocker _(sCachesMemoryUseLock);
2794 	return sUsedMemory;
2795 }
2796 
2797 
2798 //	#pragma mark - public transaction API
2799 
2800 
2801 int32
2802 cache_start_transaction(void* _cache)
2803 {
2804 	block_cache* cache = (block_cache*)_cache;
2805 	TransactionLocker locker(cache);
2806 
2807 	if (cache->last_transaction && cache->last_transaction->open) {
2808 		panic("last transaction (%" B_PRId32 ") still open!\n",
2809 			cache->last_transaction->id);
2810 	}
2811 
2812 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2813 	if (transaction == NULL)
2814 		return B_NO_MEMORY;
2815 
2816 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2817 	cache->last_transaction = transaction;
2818 
2819 	TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
2820 	T(Action("start", cache, transaction));
2821 
2822 	cache->transaction_hash->Insert(transaction);
2823 
2824 	return transaction->id;
2825 }
2826 
2827 
2828 status_t
2829 cache_sync_transaction(void* _cache, int32 id)
2830 {
2831 	block_cache* cache = (block_cache*)_cache;
2832 	bool hadBusy;
2833 
2834 	TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
2835 
2836 	do {
2837 		TransactionLocker locker(cache);
2838 		hadBusy = false;
2839 
2840 		BlockWriter writer(cache);
2841 		TransactionTable::Iterator iterator(cache->transaction_hash);
2842 
2843 		while (iterator.HasNext()) {
2844 			// close all earlier transactions which haven't been closed yet
2845 			cache_transaction* transaction = iterator.Next();
2846 
2847 			if (transaction->busy_writing_count != 0) {
2848 				hadBusy = true;
2849 				continue;
2850 			}
2851 			if (transaction->id <= id && !transaction->open) {
2852 				// write back all of their remaining dirty blocks
2853 				T(Action("sync", cache, transaction));
2854 
2855 				bool hasLeftOvers;
2856 				writer.Add(transaction, hasLeftOvers);
2857 
2858 				if (hasLeftOvers) {
2859 					// This transaction contains blocks that a previous
2860 					// transaction is trying to write back in this write run
2861 					hadBusy = true;
2862 				}
2863 			}
2864 		}
2865 
2866 		status_t status = writer.Write();
2867 		if (status != B_OK)
2868 			return status;
2869 	} while (hadBusy);
2870 
2871 	wait_for_notifications(cache);
2872 		// make sure that all pending TRANSACTION_WRITTEN notifications
2873 		// are handled after we return
2874 	return B_OK;
2875 }
2876 
2877 
2878 status_t
2879 cache_end_transaction(void* _cache, int32 id,
2880 	transaction_notification_hook hook, void* data)
2881 {
2882 	block_cache* cache = (block_cache*)_cache;
2883 	TransactionLocker locker(cache);
2884 
2885 	TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
2886 
2887 	cache_transaction* transaction = lookup_transaction(cache, id);
2888 	if (transaction == NULL) {
2889 		panic("cache_end_transaction(): invalid transaction ID\n");
2890 		return B_BAD_VALUE;
2891 	}
2892 
2893 	// Write back all pending transaction blocks
2894 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
2895 	if (status != B_OK)
2896 		return status;
2897 
2898 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2899 
2900 	if (hook != NULL
2901 		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2902 			hook, data) != B_OK) {
2903 		return B_NO_MEMORY;
2904 	}
2905 
2906 	T(Action("end", cache, transaction));
2907 
2908 	// iterate through all blocks and free the unchanged original contents
2909 
2910 	cached_block* next;
2911 	for (cached_block* block = transaction->first_block; block != NULL;
2912 			block = next) {
2913 		next = block->transaction_next;
2914 		ASSERT(block->previous_transaction == NULL);
2915 
2916 		if (block->discard) {
2917 			// This block has been discarded in the transaction
2918 			cache->DiscardBlock(block);
2919 			transaction->num_blocks--;
2920 			continue;
2921 		}
2922 
2923 		if (block->original_data != NULL) {
2924 			cache->Free(block->original_data);
2925 			block->original_data = NULL;
2926 		}
2927 		if (block->parent_data != NULL) {
2928 			ASSERT(transaction->has_sub_transaction);
2929 			cache->FreeBlockParentData(block);
2930 		}
2931 
2932 		// move the block to the previous transaction list
2933 		transaction->blocks.Add(block);
2934 
2935 		block->previous_transaction = transaction;
2936 		block->transaction_next = NULL;
2937 		block->transaction = NULL;
2938 	}
2939 
2940 	transaction->open = false;
2941 	return B_OK;
2942 }
2943 
2944 
2945 status_t
2946 cache_abort_transaction(void* _cache, int32 id)
2947 {
2948 	block_cache* cache = (block_cache*)_cache;
2949 	TransactionLocker locker(cache);
2950 
2951 	TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
2952 
2953 	cache_transaction* transaction = lookup_transaction(cache, id);
2954 	if (transaction == NULL) {
2955 		panic("cache_abort_transaction(): invalid transaction ID\n");
2956 		return B_BAD_VALUE;
2957 	}
2958 
2959 	T(Abort(cache, transaction));
2960 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2961 
2962 	// iterate through all blocks and restore their original contents
2963 
2964 	cached_block* block = transaction->first_block;
2965 	cached_block* next;
2966 	for (; block != NULL; block = next) {
2967 		next = block->transaction_next;
2968 
2969 		if (block->original_data != NULL) {
2970 			TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
2971 				"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
2972 			memcpy(block->current_data, block->original_data,
2973 				cache->block_size);
2974 			cache->Free(block->original_data);
2975 			block->original_data = NULL;
2976 		}
2977 		if (transaction->has_sub_transaction && block->parent_data != NULL)
2978 			cache->FreeBlockParentData(block);
2979 
2980 		block->transaction_next = NULL;
2981 		block->transaction = NULL;
2982 		block->discard = false;
2983 		if (block->previous_transaction == NULL)
2984 			block->is_dirty = false;
2985 	}
2986 
2987 	cache->transaction_hash->Remove(transaction);
2988 	delete_transaction(cache, transaction);
2989 	return B_OK;
2990 }
2991 
2992 
2993 /*!	Acknowledges the current parent transaction, and starts a new transaction
2994 	from its sub transaction.
2995 	The new transaction also gets a new transaction ID.
2996 */
2997 int32
2998 cache_detach_sub_transaction(void* _cache, int32 id,
2999 	transaction_notification_hook hook, void* data)
3000 {
3001 	block_cache* cache = (block_cache*)_cache;
3002 	TransactionLocker locker(cache);
3003 
3004 	TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
3005 
3006 	cache_transaction* transaction = lookup_transaction(cache, id);
3007 	if (transaction == NULL) {
3008 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
3009 		return B_BAD_VALUE;
3010 	}
3011 	if (!transaction->has_sub_transaction)
3012 		return B_BAD_VALUE;
3013 
3014 	// iterate through all blocks and free the unchanged original contents
3015 
3016 	status_t status = write_blocks_in_previous_transaction(cache, transaction);
3017 	if (status != B_OK)
3018 		return status;
3019 
3020 	// create a new transaction for the sub transaction
3021 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
3022 	if (newTransaction == NULL)
3023 		return B_NO_MEMORY;
3024 
3025 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
3026 	T(Detach(cache, transaction, newTransaction));
3027 
3028 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3029 
3030 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
3031 			data) != B_OK) {
3032 		delete newTransaction;
3033 		return B_NO_MEMORY;
3034 	}
3035 
3036 	cached_block* last = NULL;
3037 	cached_block* next;
3038 	for (cached_block* block = transaction->first_block; block != NULL;
3039 			block = next) {
3040 		next = block->transaction_next;
3041 		ASSERT(block->previous_transaction == NULL);
3042 
3043 		if (block->discard) {
3044 			cache->DiscardBlock(block);
3045 			transaction->main_num_blocks--;
3046 			continue;
3047 		}
3048 
3049 		if (block->parent_data != NULL) {
3050 			// The block changed in the parent - free the original data, since
3051 			// they will be replaced by what is in current.
3052 			ASSERT(block->original_data != NULL);
3053 			cache->Free(block->original_data);
3054 
3055 			if (block->parent_data != block->current_data) {
3056 				// The block had been changed in both transactions
3057 				block->original_data = block->parent_data;
3058 			} else {
3059 				// The block has only been changed in the parent
3060 				block->original_data = NULL;
3061 			}
3062 			block->parent_data = NULL;
3063 
3064 			// move the block to the previous transaction list
3065 			transaction->blocks.Add(block);
3066 			block->previous_transaction = transaction;
3067 		}
3068 
3069 		if (block->original_data != NULL) {
3070 			// This block had been changed in the current sub transaction,
3071 			// we need to move this block over to the new transaction.
3072 			ASSERT(block->parent_data == NULL);
3073 
3074 			if (last == NULL)
3075 				newTransaction->first_block = block;
3076 			else
3077 				last->transaction_next = block;
3078 
3079 			block->transaction = newTransaction;
3080 			last = block;
3081 		} else
3082 			block->transaction = NULL;
3083 
3084 		block->transaction_next = NULL;
3085 	}
3086 
3087 	newTransaction->num_blocks = transaction->sub_num_blocks;
3088 
3089 	transaction->open = false;
3090 	transaction->has_sub_transaction = false;
3091 	transaction->num_blocks = transaction->main_num_blocks;
3092 	transaction->sub_num_blocks = 0;
3093 
3094 	cache->transaction_hash->Insert(newTransaction);
3095 	cache->last_transaction = newTransaction;
3096 
3097 	return newTransaction->id;
3098 }
3099 
3100 
3101 status_t
3102 cache_abort_sub_transaction(void* _cache, int32 id)
3103 {
3104 	block_cache* cache = (block_cache*)_cache;
3105 	TransactionLocker locker(cache);
3106 
3107 	TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3108 
3109 	cache_transaction* transaction = lookup_transaction(cache, id);
3110 	if (transaction == NULL) {
3111 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3112 		return B_BAD_VALUE;
3113 	}
3114 	if (!transaction->has_sub_transaction)
3115 		return B_BAD_VALUE;
3116 
3117 	T(Abort(cache, transaction));
3118 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3119 
3120 	// revert all changes back to the version of the parent
3121 
3122 	cached_block* block = transaction->first_block;
3123 	cached_block* last = NULL;
3124 	cached_block* next;
3125 	for (; block != NULL; block = next) {
3126 		next = block->transaction_next;
3127 
3128 		if (block->parent_data == NULL) {
3129 			// The parent transaction didn't change the block, but the sub
3130 			// transaction did - we need to revert to the original data.
3131 			// The block is no longer part of the transaction
3132 			if (block->original_data != NULL) {
3133 				// The block might not have original data if was empty
3134 				memcpy(block->current_data, block->original_data,
3135 					cache->block_size);
3136 			}
3137 
3138 			if (last != NULL)
3139 				last->transaction_next = next;
3140 			else
3141 				transaction->first_block = next;
3142 
3143 			block->transaction_next = NULL;
3144 			block->transaction = NULL;
3145 			transaction->num_blocks--;
3146 
3147 			if (block->previous_transaction == NULL) {
3148 				cache->Free(block->original_data);
3149 				block->original_data = NULL;
3150 				block->is_dirty = false;
3151 
3152 				if (block->ref_count == 0) {
3153 					// Move the block into the unused list if possible
3154 					block->unused = true;
3155 					cache->unused_blocks.Add(block);
3156 					cache->unused_block_count++;
3157 				}
3158 			}
3159 		} else {
3160 			if (block->parent_data != block->current_data) {
3161 				// The block has been changed and must be restored - the block
3162 				// is still dirty and part of the transaction
3163 				TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3164 					"restored contents of block %" B_PRIdOFF "\n",
3165 					transaction->id, block->block_number));
3166 				memcpy(block->current_data, block->parent_data,
3167 					cache->block_size);
3168 				cache->Free(block->parent_data);
3169 				// The block stays dirty
3170 			}
3171 			block->parent_data = NULL;
3172 			last = block;
3173 		}
3174 
3175 		block->discard = false;
3176 	}
3177 
3178 	// all subsequent changes will go into the main transaction
3179 	transaction->has_sub_transaction = false;
3180 	transaction->sub_num_blocks = 0;
3181 
3182 	return B_OK;
3183 }
3184 
3185 
3186 status_t
3187 cache_start_sub_transaction(void* _cache, int32 id)
3188 {
3189 	block_cache* cache = (block_cache*)_cache;
3190 	TransactionLocker locker(cache);
3191 
3192 	TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3193 
3194 	cache_transaction* transaction = lookup_transaction(cache, id);
3195 	if (transaction == NULL) {
3196 		panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3197 			id);
3198 		return B_BAD_VALUE;
3199 	}
3200 
3201 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3202 
3203 	// move all changed blocks up to the parent
3204 
3205 	cached_block* block = transaction->first_block;
3206 	cached_block* next;
3207 	for (; block != NULL; block = next) {
3208 		next = block->transaction_next;
3209 
3210 		if (block->parent_data != NULL) {
3211 			// There already is an older sub transaction - we acknowledge
3212 			// its changes and move its blocks up to the parent
3213 			ASSERT(transaction->has_sub_transaction);
3214 			cache->FreeBlockParentData(block);
3215 		}
3216 		if (block->discard) {
3217 			// This block has been discarded in the parent transaction.
3218 			// Just throw away any changes made in this transaction, so that
3219 			// it can still be reverted to its original contents if needed
3220 			ASSERT(block->previous_transaction == NULL);
3221 			if (block->original_data != NULL) {
3222 				memcpy(block->current_data, block->original_data,
3223 					cache->block_size);
3224 
3225 				cache->Free(block->original_data);
3226 				block->original_data = NULL;
3227 			}
3228 			continue;
3229 		}
3230 
3231 		// we "allocate" the parent data lazily, that means, we don't copy
3232 		// the data (and allocate memory for it) until we need to
3233 		block->parent_data = block->current_data;
3234 	}
3235 
3236 	// all subsequent changes will go into the sub transaction
3237 	transaction->has_sub_transaction = true;
3238 	transaction->main_num_blocks = transaction->num_blocks;
3239 	transaction->sub_num_blocks = 0;
3240 	T(Action("start-sub", cache, transaction));
3241 
3242 	return B_OK;
3243 }
3244 
3245 
3246 /*!	Adds a transaction listener that gets notified when the transaction
3247 	is ended, aborted, written, or idle as specified by \a events.
3248 	The listener gets automatically removed when the transaction ends.
3249 */
3250 status_t
3251 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3252 	transaction_notification_hook hook, void* data)
3253 {
3254 	block_cache* cache = (block_cache*)_cache;
3255 	TransactionLocker locker(cache);
3256 
3257 	cache_transaction* transaction = lookup_transaction(cache, id);
3258 	if (transaction == NULL)
3259 		return B_BAD_VALUE;
3260 
3261 	return add_transaction_listener(cache, transaction, events, hook, data);
3262 }
3263 
3264 
3265 status_t
3266 cache_remove_transaction_listener(void* _cache, int32 id,
3267 	transaction_notification_hook hookFunction, void* data)
3268 {
3269 	block_cache* cache = (block_cache*)_cache;
3270 	TransactionLocker locker(cache);
3271 
3272 	cache_transaction* transaction = lookup_transaction(cache, id);
3273 	if (transaction == NULL)
3274 		return B_BAD_VALUE;
3275 
3276 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3277 	while (iterator.HasNext()) {
3278 		cache_listener* listener = iterator.Next();
3279 		if (listener->data == data && listener->hook == hookFunction) {
3280 			iterator.Remove();
3281 
3282 			if (listener->events_pending != 0) {
3283 				MutexLocker _(sNotificationsLock);
3284 				if (listener->events_pending != 0)
3285 					cache->pending_notifications.Remove(listener);
3286 			}
3287 			delete listener;
3288 			return B_OK;
3289 		}
3290 	}
3291 
3292 	return B_ENTRY_NOT_FOUND;
3293 }
3294 
3295 
3296 status_t
3297 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3298 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3299 {
3300 	cached_block* block = (cached_block*)*_cookie;
3301 	block_cache* cache = (block_cache*)_cache;
3302 	TransactionLocker locker(cache);
3303 
3304 	cache_transaction* transaction = lookup_transaction(cache, id);
3305 	if (transaction == NULL || !transaction->open)
3306 		return B_BAD_VALUE;
3307 
3308 	if (block == NULL)
3309 		block = transaction->first_block;
3310 	else
3311 		block = block->transaction_next;
3312 
3313 	if (transaction->has_sub_transaction) {
3314 		if (mainOnly) {
3315 			// find next block that the parent changed
3316 			while (block != NULL && block->parent_data == NULL)
3317 				block = block->transaction_next;
3318 		} else {
3319 			// find next non-discarded block
3320 			while (block != NULL && block->discard)
3321 				block = block->transaction_next;
3322 		}
3323 	}
3324 
3325 	if (block == NULL)
3326 		return B_ENTRY_NOT_FOUND;
3327 
3328 	if (_blockNumber)
3329 		*_blockNumber = block->block_number;
3330 	if (_data)
3331 		*_data = mainOnly ? block->parent_data : block->current_data;
3332 	if (_unchangedData)
3333 		*_unchangedData = block->original_data;
3334 
3335 	*_cookie = (addr_t)block;
3336 	return B_OK;
3337 }
3338 
3339 
3340 int32
3341 cache_blocks_in_transaction(void* _cache, int32 id)
3342 {
3343 	block_cache* cache = (block_cache*)_cache;
3344 	TransactionLocker locker(cache);
3345 
3346 	cache_transaction* transaction = lookup_transaction(cache, id);
3347 	if (transaction == NULL)
3348 		return B_BAD_VALUE;
3349 
3350 	return transaction->num_blocks;
3351 }
3352 
3353 
3354 /*!	Returns the number of blocks that are part of the main transaction. If this
3355 	transaction does not have a sub transaction yet, this is the same value as
3356 	cache_blocks_in_transaction() would return.
3357 */
3358 int32
3359 cache_blocks_in_main_transaction(void* _cache, int32 id)
3360 {
3361 	block_cache* cache = (block_cache*)_cache;
3362 	TransactionLocker locker(cache);
3363 
3364 	cache_transaction* transaction = lookup_transaction(cache, id);
3365 	if (transaction == NULL)
3366 		return B_BAD_VALUE;
3367 
3368 	if (transaction->has_sub_transaction)
3369 		return transaction->main_num_blocks;
3370 
3371 	return transaction->num_blocks;
3372 }
3373 
3374 
3375 int32
3376 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3377 {
3378 	block_cache* cache = (block_cache*)_cache;
3379 	TransactionLocker locker(cache);
3380 
3381 	cache_transaction* transaction = lookup_transaction(cache, id);
3382 	if (transaction == NULL)
3383 		return B_BAD_VALUE;
3384 
3385 	return transaction->sub_num_blocks;
3386 }
3387 
3388 
3389 /*!	Check if block is in transaction
3390 */
3391 bool
3392 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber)
3393 {
3394 	block_cache* cache = (block_cache*)_cache;
3395 	TransactionLocker locker(cache);
3396 
3397 	cached_block* block = cache->hash->Lookup(blockNumber);
3398 
3399 	return (block != NULL && block->transaction != NULL
3400 		&& block->transaction->id == id);
3401 }
3402 
3403 
3404 //	#pragma mark - public block cache API
3405 
3406 
3407 void
3408 block_cache_delete(void* _cache, bool allowWrites)
3409 {
3410 	block_cache* cache = (block_cache*)_cache;
3411 
3412 	if (allowWrites)
3413 		block_cache_sync(cache);
3414 
3415 	mutex_lock(&sCachesLock);
3416 	sCaches.Remove(cache);
3417 	mutex_unlock(&sCachesLock);
3418 
3419 	mutex_lock(&cache->lock);
3420 
3421 	// wait for all blocks to become unbusy
3422 	wait_for_busy_reading_blocks(cache);
3423 	wait_for_busy_writing_blocks(cache);
3424 
3425 	// free all blocks
3426 
3427 	cached_block* block = cache->hash->Clear(true);
3428 	while (block != NULL) {
3429 		cached_block* next = block->next;
3430 		cache->FreeBlock(block);
3431 		block = next;
3432 	}
3433 
3434 	// free all transactions (they will all be aborted)
3435 
3436 	cache_transaction* transaction = cache->transaction_hash->Clear(true);
3437 	while (transaction != NULL) {
3438 		cache_transaction* next = transaction->next;
3439 		delete transaction;
3440 		transaction = next;
3441 	}
3442 
3443 	delete cache;
3444 }
3445 
3446 
3447 void*
3448 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3449 {
3450 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3451 		readOnly);
3452 	if (cache == NULL)
3453 		return NULL;
3454 
3455 	if (cache->Init() != B_OK) {
3456 		delete cache;
3457 		return NULL;
3458 	}
3459 
3460 	MutexLocker _(sCachesLock);
3461 	sCaches.Add(cache);
3462 
3463 	return cache;
3464 }
3465 
3466 
3467 status_t
3468 block_cache_sync(void* _cache)
3469 {
3470 	block_cache* cache = (block_cache*)_cache;
3471 
3472 	// We will sync all dirty blocks to disk that have a completed
3473 	// transaction or no transaction only
3474 
3475 	MutexLocker locker(&cache->lock);
3476 
3477 	BlockWriter writer(cache);
3478 	BlockTable::Iterator iterator(cache->hash);
3479 
3480 	while (iterator.HasNext()) {
3481 		cached_block* block = iterator.Next();
3482 		if (block->CanBeWritten())
3483 			writer.Add(block);
3484 	}
3485 
3486 	status_t status = writer.Write();
3487 
3488 	locker.Unlock();
3489 
3490 	wait_for_notifications(cache);
3491 		// make sure that all pending TRANSACTION_WRITTEN notifications
3492 		// are handled after we return
3493 	return status;
3494 }
3495 
3496 
3497 status_t
3498 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3499 {
3500 	block_cache* cache = (block_cache*)_cache;
3501 
3502 	// We will sync all dirty blocks to disk that have a completed
3503 	// transaction or no transaction only
3504 
3505 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3506 		panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
3507 			" (max %" B_PRIdOFF ")",
3508 			blockNumber, cache->max_blocks - 1);
3509 		return B_BAD_VALUE;
3510 	}
3511 
3512 	MutexLocker locker(&cache->lock);
3513 	BlockWriter writer(cache);
3514 
3515 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
3516 		cached_block* block = cache->hash->Lookup(blockNumber);
3517 		if (block == NULL)
3518 			continue;
3519 
3520 		if (block->CanBeWritten())
3521 			writer.Add(block);
3522 	}
3523 
3524 	status_t status = writer.Write();
3525 
3526 	locker.Unlock();
3527 
3528 	wait_for_notifications(cache);
3529 		// make sure that all pending TRANSACTION_WRITTEN notifications
3530 		// are handled after we return
3531 	return status;
3532 }
3533 
3534 
3535 /*!	Discards a block from the current transaction or from the cache.
3536 	You have to call this function when you no longer use a block, ie. when it
3537 	might be reclaimed by the file cache in order to make sure they won't
3538 	interfere.
3539 */
3540 void
3541 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3542 {
3543 	// TODO: this could be a nice place to issue the ATA trim command
3544 	block_cache* cache = (block_cache*)_cache;
3545 	TransactionLocker locker(cache);
3546 
3547 	BlockWriter writer(cache);
3548 
3549 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3550 		cached_block* block = cache->hash->Lookup(blockNumber);
3551 		if (block != NULL && block->previous_transaction != NULL)
3552 			writer.Add(block);
3553 	}
3554 
3555 	writer.Write();
3556 		// TODO: this can fail, too!
3557 
3558 	blockNumber -= numBlocks;
3559 		// reset blockNumber to its original value
3560 
3561 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3562 		cached_block* block = cache->hash->Lookup(blockNumber);
3563 		if (block == NULL)
3564 			continue;
3565 
3566 		ASSERT(block->previous_transaction == NULL);
3567 
3568 		if (block->unused) {
3569 			cache->unused_blocks.Remove(block);
3570 			cache->unused_block_count--;
3571 			cache->RemoveBlock(block);
3572 		} else {
3573 			if (block->transaction != NULL && block->parent_data != NULL
3574 				&& block->parent_data != block->current_data) {
3575 				panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3576 					"transaction!", blockNumber);
3577 			}
3578 
3579 			// mark it as discarded (in the current transaction only, if any)
3580 			block->discard = true;
3581 		}
3582 	}
3583 }
3584 
3585 
3586 status_t
3587 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3588 {
3589 	block_cache* cache = (block_cache*)_cache;
3590 	MutexLocker locker(&cache->lock);
3591 
3592 	if (cache->read_only) {
3593 		panic("tried to make block writable on a read-only cache!");
3594 		return B_ERROR;
3595 	}
3596 
3597 	// TODO: this can be done better!
3598 	void* block;
3599 	status_t status = get_writable_cached_block(cache, blockNumber,
3600 		blockNumber, 1, transaction, false, &block);
3601 	if (status == B_OK) {
3602 		put_cached_block((block_cache*)_cache, blockNumber);
3603 		return B_OK;
3604 	}
3605 
3606 	return status;
3607 }
3608 
3609 
3610 status_t
3611 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3612 	off_t length, int32 transaction, void** _block)
3613 {
3614 	block_cache* cache = (block_cache*)_cache;
3615 	MutexLocker locker(&cache->lock);
3616 
3617 	TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3618 		blockNumber, transaction));
3619 	if (cache->read_only)
3620 		panic("tried to get writable block on a read-only cache!");
3621 
3622 	return get_writable_cached_block(cache, blockNumber, base, length,
3623 		transaction, false, _block);
3624 }
3625 
3626 
3627 void*
3628 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3629 {
3630 	void* block;
3631 	if (block_cache_get_writable_etc(_cache, blockNumber,
3632 			blockNumber, 1, transaction, &block) == B_OK)
3633 		return block;
3634 
3635 	return NULL;
3636 }
3637 
3638 
3639 void*
3640 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3641 {
3642 	block_cache* cache = (block_cache*)_cache;
3643 	MutexLocker locker(&cache->lock);
3644 
3645 	TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3646 		blockNumber, transaction));
3647 	if (cache->read_only)
3648 		panic("tried to get empty writable block on a read-only cache!");
3649 
3650 	void* block;
3651 	if (get_writable_cached_block((block_cache*)_cache, blockNumber,
3652 			blockNumber, 1, transaction, true, &block) == B_OK)
3653 		return block;
3654 
3655 	return NULL;
3656 }
3657 
3658 
3659 status_t
3660 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length,
3661 	const void** _block)
3662 {
3663 	block_cache* cache = (block_cache*)_cache;
3664 	MutexLocker locker(&cache->lock);
3665 	bool allocated;
3666 
3667 	cached_block* block;
3668 	status_t status = get_cached_block(cache, blockNumber, &allocated, true,
3669 		&block);
3670 	if (status != B_OK)
3671 		return status;
3672 
3673 #if BLOCK_CACHE_DEBUG_CHANGED
3674 	if (block->compare == NULL)
3675 		block->compare = cache->Allocate();
3676 	if (block->compare != NULL)
3677 		memcpy(block->compare, block->current_data, cache->block_size);
3678 #endif
3679 	TB(Get(cache, block));
3680 
3681 	*_block = block->current_data;
3682 	return B_OK;
3683 }
3684 
3685 
3686 const void*
3687 block_cache_get(void* _cache, off_t blockNumber)
3688 {
3689 	const void* block;
3690 	if (block_cache_get_etc(_cache, blockNumber, blockNumber, 1, &block)
3691 			== B_OK)
3692 		return block;
3693 
3694 	return NULL;
3695 }
3696 
3697 
3698 /*!	Changes the internal status of a writable block to \a dirty. This can be
3699 	helpful in case you realize you don't need to change that block anymore
3700 	for whatever reason.
3701 
3702 	Note, you must only use this function on blocks that were acquired
3703 	writable!
3704 */
3705 status_t
3706 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3707 	int32 transaction)
3708 {
3709 	block_cache* cache = (block_cache*)_cache;
3710 	MutexLocker locker(&cache->lock);
3711 
3712 	cached_block* block = cache->hash->Lookup(blockNumber);
3713 	if (block == NULL)
3714 		return B_BAD_VALUE;
3715 	if (block->is_dirty == dirty) {
3716 		// there is nothing to do for us
3717 		return B_OK;
3718 	}
3719 
3720 	// TODO: not yet implemented
3721 	if (dirty)
3722 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
3723 
3724 	return B_OK;
3725 }
3726 
3727 
3728 void
3729 block_cache_put(void* _cache, off_t blockNumber)
3730 {
3731 	block_cache* cache = (block_cache*)_cache;
3732 	MutexLocker locker(&cache->lock);
3733 
3734 	put_cached_block(cache, blockNumber);
3735 }
3736 
3737