xref: /haiku/src/system/kernel/cache/block_cache.cpp (revision a906d0a031e721e2f2ec9d95274103e74a3a774f)
1 /*
2  * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <block_cache.h>
8 
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <KernelExport.h>
15 #include <fs_cache.h>
16 
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <util/khash.h>
26 #include <vm/vm_page.h>
27 
28 #include "kernel_debug_config.h"
29 
30 
31 // TODO: this is a naive but growing implementation to test the API:
32 //	block reading/writing is not at all optimized for speed, it will
33 //	just read and write single blocks.
34 // TODO: the retrieval/copy of the original data could be delayed until the
35 //		new data must be written, ie. in low memory situations.
36 
37 //#define TRACE_BLOCK_CACHE
38 #ifdef TRACE_BLOCK_CACHE
39 #	define TRACE(x)	dprintf x
40 #else
41 #	define TRACE(x) ;
42 #endif
43 
44 // This macro is used for fatal situations that are acceptable in a running
45 // system, like out of memory situations - should only panic for debugging.
46 #define FATAL(x) panic x
47 
48 static const bigtime_t kTransactionIdleTime = 2000000LL;
49 	// a transaction is considered idle after 2 seconds of inactivity
50 
51 
52 struct cache_transaction;
53 struct cached_block;
54 struct block_cache;
55 typedef DoublyLinkedListLink<cached_block> block_link;
56 
57 struct cached_block {
58 	cached_block*	next;			// next in hash
59 	cached_block*	transaction_next;
60 	block_link		link;
61 	off_t			block_number;
62 	void*			current_data;
63 		// The data that is seen by everyone using the API; this one is always
64 		// present.
65 	void*			original_data;
66 		// When in a transaction, this contains the original data from before
67 		// the transaction.
68 	void*			parent_data;
69 		// This is a lazily alloced buffer that represents the contents of the
70 		// block in the parent transaction. It may point to current_data if the
71 		// contents have been changed only in the parent transaction, or, if the
72 		// block has been changed in the current sub transaction already, to a
73 		// new block containing the contents changed in the parent transaction.
74 		// If this is NULL, the block has not been changed in the parent
75 		// transaction at all.
76 #if BLOCK_CACHE_DEBUG_CHANGED
77 	void*			compare;
78 #endif
79 	int32			ref_count;
80 	int32			last_accessed;
81 	bool			busy_reading : 1;
82 	bool			busy_writing : 1;
83 	bool			is_writing : 1;
84 		// Block has been checked out for writing without transactions, and
85 		// cannot be written back if set
86 	bool			is_dirty : 1;
87 	bool			unused : 1;
88 	bool			discard : 1;
89 	bool			busy_reading_waiters : 1;
90 	bool			busy_writing_waiters : 1;
91 	cache_transaction* transaction;
92 	cache_transaction* previous_transaction;
93 
94 	bool CanBeWritten() const;
95 	int32 LastAccess() const
96 		{ return system_time() / 1000000L - last_accessed; }
97 
98 	static int Compare(void* _cacheEntry, const void* _block);
99 	static uint32 Hash(void* _cacheEntry, const void* _block, uint32 range);
100 };
101 
102 typedef DoublyLinkedList<cached_block,
103 	DoublyLinkedListMemberGetLink<cached_block,
104 		&cached_block::link> > block_list;
105 
106 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
107 	int32			transaction_id;
108 	int32			events_pending;
109 	int32			events;
110 	transaction_notification_hook hook;
111 	void*			data;
112 	bool			delete_after_event;
113 };
114 
115 typedef DoublyLinkedList<cache_notification> NotificationList;
116 
117 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
118 	hash_table*		hash;
119 	mutex			lock;
120 	int				fd;
121 	off_t			max_blocks;
122 	size_t			block_size;
123 	int32			next_transaction_id;
124 	cache_transaction* last_transaction;
125 	hash_table*		transaction_hash;
126 
127 	object_cache*	buffer_cache;
128 	block_list		unused_blocks;
129 	uint32			unused_block_count;
130 
131 	ConditionVariable busy_reading_condition;
132 	uint32			busy_reading_count;
133 	bool			busy_reading_waiters;
134 
135 	ConditionVariable busy_writing_condition;
136 	uint32			busy_writing_count;
137 	bool			busy_writing_waiters;
138 
139 	uint32			num_dirty_blocks;
140 	bool			read_only;
141 
142 	NotificationList pending_notifications;
143 	ConditionVariable condition_variable;
144 
145 					block_cache(int fd, off_t numBlocks, size_t blockSize,
146 						bool readOnly);
147 					~block_cache();
148 
149 	status_t		Init();
150 
151 	void			Free(void* buffer);
152 	void*			Allocate();
153 	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
154 	void			RemoveBlock(cached_block* block);
155 	void			DiscardBlock(cached_block* block);
156 	void			FreeBlock(cached_block* block);
157 	cached_block*	NewBlock(off_t blockNumber);
158 
159 
160 private:
161 	static void		_LowMemoryHandler(void* data, uint32 resources,
162 						int32 level);
163 	cached_block*	_GetUnusedBlock();
164 };
165 
166 struct cache_listener;
167 typedef DoublyLinkedListLink<cache_listener> listener_link;
168 
169 struct cache_listener : cache_notification {
170 	listener_link	link;
171 };
172 
173 typedef DoublyLinkedList<cache_listener,
174 	DoublyLinkedListMemberGetLink<cache_listener,
175 		&cache_listener::link> > ListenerList;
176 
177 
178 struct cache_transaction {
179 	cache_transaction();
180 
181 	cache_transaction* next;
182 	int32			id;
183 	int32			num_blocks;
184 	int32			main_num_blocks;
185 	int32			sub_num_blocks;
186 	cached_block*	first_block;
187 	block_list		blocks;
188 	ListenerList	listeners;
189 	bool			open;
190 	bool			has_sub_transaction;
191 	bigtime_t		last_used;
192 	int32			busy_writing_count;
193 };
194 
195 
196 class BlockWriter {
197 public:
198 								BlockWriter(block_cache* cache,
199 									size_t max = SIZE_MAX);
200 								~BlockWriter();
201 
202 			bool				Add(cached_block* block,
203 									hash_iterator* iterator = NULL);
204 			bool				Add(cache_transaction* transaction,
205 									hash_iterator* iterator,
206 									bool& hasLeftOvers);
207 
208 			status_t			Write(hash_iterator* iterator = NULL,
209 									bool canUnlock = true);
210 
211 			bool				DeletedTransaction() const
212 									{ return fDeletedTransaction; }
213 
214 	static	status_t			WriteBlock(block_cache* cache,
215 									cached_block* block);
216 
217 private:
218 			void*				_Data(cached_block* block) const;
219 			status_t			_WriteBlock(cached_block* block);
220 			void				_BlockDone(cached_block* block,
221 									hash_iterator* iterator);
222 			void				_UnmarkWriting(cached_block* block);
223 
224 private:
225 	static	const size_t		kBufferSize = 64;
226 
227 			block_cache*		fCache;
228 			cached_block*		fBuffer[kBufferSize];
229 			cached_block**		fBlocks;
230 			size_t				fCount;
231 			size_t				fTotal;
232 			size_t				fCapacity;
233 			size_t				fMax;
234 			status_t			fStatus;
235 			bool				fDeletedTransaction;
236 };
237 
238 
239 class TransactionLocking {
240 public:
241 	inline bool Lock(block_cache* cache)
242 	{
243 		mutex_lock(&cache->lock);
244 
245 		while (cache->busy_writing_count != 0) {
246 			// wait for all blocks to be written
247 			ConditionVariableEntry entry;
248 			cache->busy_writing_condition.Add(&entry);
249 			cache->busy_writing_waiters = true;
250 
251 			mutex_unlock(&cache->lock);
252 
253 			entry.Wait();
254 
255 			mutex_lock(&cache->lock);
256 		}
257 
258 		return true;
259 	}
260 
261 	inline void Unlock(block_cache* cache)
262 	{
263 		mutex_unlock(&cache->lock);
264 	}
265 };
266 
267 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
268 
269 
270 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
271 namespace BlockTracing {
272 
273 class Action : public AbstractTraceEntry {
274 public:
275 	Action(block_cache* cache, cached_block* block)
276 		:
277 		fCache(cache),
278 		fBlockNumber(block->block_number),
279 		fIsDirty(block->is_dirty),
280 		fHasOriginal(block->original_data != NULL),
281 		fHasParent(block->parent_data != NULL),
282 		fTransactionID(-1),
283 		fPreviousID(-1)
284 	{
285 		if (block->transaction != NULL)
286 			fTransactionID = block->transaction->id;
287 		if (block->previous_transaction != NULL)
288 			fPreviousID = block->previous_transaction->id;
289 	}
290 
291 	virtual void AddDump(TraceOutput& out)
292 	{
293 		out.Print("block cache %p, %s %Ld, %c%c%c transaction %ld "
294 			"(previous id %ld)\n", fCache, _Action(), fBlockNumber,
295 			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
296 			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
297 	}
298 
299 	virtual const char* _Action() const = 0;
300 
301 private:
302 	block_cache*		fCache;
303 	uint64				fBlockNumber;
304 	bool				fIsDirty;
305 	bool				fHasOriginal;
306 	bool				fHasParent;
307 	int32				fTransactionID;
308 	int32				fPreviousID;
309 };
310 
311 class Get : public Action {
312 public:
313 	Get(block_cache* cache, cached_block* block)
314 		:
315 		Action(cache, block)
316 	{
317 		Initialized();
318 	}
319 
320 	virtual const char* _Action() const { return "get"; }
321 };
322 
323 class Put : public Action {
324 public:
325 	Put(block_cache* cache, cached_block* block)
326 		:
327 		Action(cache, block)
328 	{
329 		Initialized();
330 	}
331 
332 	virtual const char* _Action() const { return "put"; }
333 };
334 
335 class Read : public Action {
336 public:
337 	Read(block_cache* cache, cached_block* block)
338 		:
339 		Action(cache, block)
340 	{
341 		Initialized();
342 	}
343 
344 	virtual const char* _Action() const { return "read"; }
345 };
346 
347 class Write : public Action {
348 public:
349 	Write(block_cache* cache, cached_block* block)
350 		:
351 		Action(cache, block)
352 	{
353 		Initialized();
354 	}
355 
356 	virtual const char* _Action() const { return "write"; }
357 };
358 
359 class Flush : public Action {
360 public:
361 	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
362 		:
363 		Action(cache, block),
364 		fGetUnused(getUnused)
365 	{
366 		Initialized();
367 	}
368 
369 	virtual const char* _Action() const
370 		{ return fGetUnused ? "get-unused" : "flush"; }
371 
372 private:
373 	bool	fGetUnused;
374 };
375 
376 class Error : public AbstractTraceEntry {
377 public:
378 	Error(block_cache* cache, uint64 blockNumber, const char* message,
379 			status_t status = B_OK)
380 		:
381 		fCache(cache),
382 		fBlockNumber(blockNumber),
383 		fMessage(message),
384 		fStatus(status)
385 	{
386 		Initialized();
387 	}
388 
389 	virtual void AddDump(TraceOutput& out)
390 	{
391 		out.Print("block cache %p, error %Ld, %s%s%s",
392 			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
393 			fStatus != B_OK ? strerror(fStatus) : "");
394 	}
395 
396 private:
397 	block_cache*	fCache;
398 	uint64			fBlockNumber;
399 	const char*		fMessage;
400 	status_t		fStatus;
401 };
402 
403 #if BLOCK_CACHE_BLOCK_TRACING >= 2
404 class BlockData : public AbstractTraceEntry {
405 public:
406 	enum {
407 		kCurrent	= 0x01,
408 		kParent		= 0x02,
409 		kOriginal	= 0x04
410 	};
411 
412 	BlockData(block_cache* cache, cached_block* block, const char* message)
413 		:
414 		fCache(cache),
415 		fSize(cache->block_size),
416 		fBlockNumber(block->block_number),
417 		fMessage(message)
418 	{
419 		_Allocate(fCurrent, block->current_data);
420 		_Allocate(fParent, block->parent_data);
421 		_Allocate(fOriginal, block->original_data);
422 
423 #if KTRACE_PRINTF_STACK_TRACE
424 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
425 			false);
426 #endif
427 
428 		Initialized();
429 	}
430 
431 	virtual void AddDump(TraceOutput& out)
432 	{
433 		out.Print("block cache %p, block %Ld, data %c%c%c: %s",
434 			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
435 			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
436 			fMessage);
437 	}
438 
439 #if KTRACE_PRINTF_STACK_TRACE
440 	virtual void DumpStackTrace(TraceOutput& out)
441 	{
442 		out.PrintStackTrace(fStackTrace);
443 	}
444 #endif
445 
446 	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
447 	{
448 		if ((which & kCurrent) != 0)
449 			DumpBlock(kCurrent, offset, size);
450 		if ((which & kParent) != 0)
451 			DumpBlock(kParent, offset, size);
452 		if ((which & kOriginal) != 0)
453 			DumpBlock(kOriginal, offset, size);
454 	}
455 
456 	void DumpBlock(uint32 which, uint32 offset, uint32 size)
457 	{
458 		if (offset > fSize) {
459 			kprintf("invalid offset (block size %lu)\n", fSize);
460 			return;
461 		}
462 		if (offset + size > fSize)
463 			size = fSize - offset;
464 
465 		const char* label;
466 		uint8* data;
467 
468 		if ((which & kCurrent) != 0) {
469 			label = "current";
470 			data = fCurrent;
471 		} else if ((which & kParent) != 0) {
472 			label = "parent";
473 			data = fParent;
474 		} else if ((which & kOriginal) != 0) {
475 			label = "original";
476 			data = fOriginal;
477 		} else
478 			return;
479 
480 		kprintf("%s: offset %lu, %lu bytes\n", label, offset, size);
481 
482 		static const uint32 kBlockSize = 16;
483 		data += offset;
484 
485 		for (uint32 i = 0; i < size;) {
486 			int start = i;
487 
488 			kprintf("  %04lx ", i);
489 			for (; i < start + kBlockSize; i++) {
490 				if (!(i % 4))
491 					kprintf(" ");
492 
493 				if (i >= size)
494 					kprintf("  ");
495 				else
496 					kprintf("%02x", data[i]);
497 			}
498 
499 			kprintf("\n");
500 		}
501 	}
502 
503 private:
504 	void _Allocate(uint8*& target, void* source)
505 	{
506 		if (source == NULL) {
507 			target = NULL;
508 			return;
509 		}
510 
511 		target = alloc_tracing_buffer_memcpy(source, fSize, false);
512 	}
513 
514 	block_cache*	fCache;
515 	uint32			fSize;
516 	uint64			fBlockNumber;
517 	const char*		fMessage;
518 	uint8*			fCurrent;
519 	uint8*			fParent;
520 	uint8*			fOriginal;
521 #if KTRACE_PRINTF_STACK_TRACE
522 	tracing_stack_trace* fStackTrace;
523 #endif
524 };
525 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
526 
527 }	// namespace BlockTracing
528 
529 #	define TB(x) new(std::nothrow) BlockTracing::x;
530 #else
531 #	define TB(x) ;
532 #endif
533 
534 #if BLOCK_CACHE_BLOCK_TRACING >= 2
535 #	define TB2(x) new(std::nothrow) BlockTracing::x;
536 #else
537 #	define TB2(x) ;
538 #endif
539 
540 
541 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
542 namespace TransactionTracing {
543 
544 class Action : public AbstractTraceEntry {
545 public:
546 	Action(const char* label, block_cache* cache,
547 			cache_transaction* transaction)
548 		:
549 		fCache(cache),
550 		fTransaction(transaction),
551 		fID(transaction->id),
552 		fSub(transaction->has_sub_transaction),
553 		fNumBlocks(transaction->num_blocks),
554 		fSubNumBlocks(transaction->sub_num_blocks)
555 	{
556 		strlcpy(fLabel, label, sizeof(fLabel));
557 		Initialized();
558 	}
559 
560 	virtual void AddDump(TraceOutput& out)
561 	{
562 		out.Print("block cache %p, %s transaction %p (id %ld)%s"
563 			", %ld/%ld blocks", fCache, fLabel, fTransaction, fID,
564 			fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
565 	}
566 
567 private:
568 	char				fLabel[12];
569 	block_cache*		fCache;
570 	cache_transaction*	fTransaction;
571 	int32				fID;
572 	bool				fSub;
573 	int32				fNumBlocks;
574 	int32				fSubNumBlocks;
575 };
576 
577 class Detach : public AbstractTraceEntry {
578 public:
579 	Detach(block_cache* cache, cache_transaction* transaction,
580 			cache_transaction* newTransaction)
581 		:
582 		fCache(cache),
583 		fTransaction(transaction),
584 		fID(transaction->id),
585 		fSub(transaction->has_sub_transaction),
586 		fNewTransaction(newTransaction),
587 		fNewID(newTransaction->id)
588 	{
589 		Initialized();
590 	}
591 
592 	virtual void AddDump(TraceOutput& out)
593 	{
594 		out.Print("block cache %p, detach transaction %p (id %ld)"
595 			"from transaction %p (id %ld)%s",
596 			fCache, fNewTransaction, fNewID, fTransaction, fID,
597 			fSub ? " sub" : "");
598 	}
599 
600 private:
601 	block_cache*		fCache;
602 	cache_transaction*	fTransaction;
603 	int32				fID;
604 	bool				fSub;
605 	cache_transaction*	fNewTransaction;
606 	int32				fNewID;
607 };
608 
609 class Abort : public AbstractTraceEntry {
610 public:
611 	Abort(block_cache* cache, cache_transaction* transaction)
612 		:
613 		fCache(cache),
614 		fTransaction(transaction),
615 		fID(transaction->id),
616 		fNumBlocks(0)
617 	{
618 		bool isSub = transaction->has_sub_transaction;
619 		fNumBlocks = isSub ? transaction->sub_num_blocks
620 			: transaction->num_blocks;
621 		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
622 		if (fBlocks != NULL) {
623 			cached_block* block = transaction->first_block;
624 			for (int32 i = 0; block != NULL && i < fNumBlocks;
625 					block = block->transaction_next) {
626 				fBlocks[i++] = block->block_number;
627 			}
628 		} else
629 			fNumBlocks = 0;
630 
631 #if KTRACE_PRINTF_STACK_TRACE
632 		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
633 			false);
634 #endif
635 
636 		Initialized();
637 	}
638 
639 	virtual void AddDump(TraceOutput& out)
640 	{
641 		out.Print("block cache %p, abort transaction "
642 			"%p (id %ld), blocks", fCache, fTransaction, fID);
643 		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
644 			out.Print(" %Ld", fBlocks[i]);
645 	}
646 
647 #if KTRACE_PRINTF_STACK_TRACE
648 	virtual void DumpStackTrace(TraceOutput& out)
649 	{
650 		out.PrintStackTrace(fStackTrace);
651 	}
652 #endif
653 
654 private:
655 	block_cache*		fCache;
656 	cache_transaction*	fTransaction;
657 	int32				fID;
658 	off_t*				fBlocks;
659 	int32				fNumBlocks;
660 #if KTRACE_PRINTF_STACK_TRACE
661 	tracing_stack_trace* fStackTrace;
662 #endif
663 };
664 
665 }	// namespace TransactionTracing
666 
667 #	define T(x) new(std::nothrow) TransactionTracing::x;
668 #else
669 #	define T(x) ;
670 #endif
671 
672 
673 static DoublyLinkedList<block_cache> sCaches;
674 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
675 static mutex sCachesMemoryUseLock
676 	= MUTEX_INITIALIZER("block caches memory use");
677 static size_t sUsedMemory;
678 static sem_id sEventSemaphore;
679 static mutex sNotificationsLock
680 	= MUTEX_INITIALIZER("block cache notifications");
681 static thread_id sNotifierWriterThread;
682 static DoublyLinkedListLink<block_cache> sMarkCache;
683 	// TODO: this only works if the link is the first entry of block_cache
684 static object_cache* sBlockCache;
685 
686 
687 //	#pragma mark - notifications/listener
688 
689 
690 /*!	Checks wether or not this is an event that closes a transaction. */
691 static inline bool
692 is_closing_event(int32 event)
693 {
694 	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
695 }
696 
697 
698 static inline bool
699 is_written_event(int32 event)
700 {
701 	return (event & TRANSACTION_WRITTEN) != 0;
702 }
703 
704 
705 /*!	From the specified \a notification, it will remove the lowest pending
706 	event, and return that one in \a _event.
707 	If there is no pending event anymore, it will return \c false.
708 */
709 static bool
710 get_next_pending_event(cache_notification* notification, int32* _event)
711 {
712 	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
713 		int32 pending = atomic_and(&notification->events_pending,
714 			~eventMask);
715 
716 		bool more = (pending & ~eventMask) != 0;
717 
718 		if ((pending & eventMask) != 0) {
719 			*_event = eventMask;
720 			return more;
721 		}
722 	}
723 
724 	return false;
725 }
726 
727 
728 static void
729 flush_pending_notifications(block_cache* cache)
730 {
731 	ASSERT_LOCKED_MUTEX(&sCachesLock);
732 
733 	while (true) {
734 		MutexLocker locker(sNotificationsLock);
735 
736 		cache_notification* notification = cache->pending_notifications.Head();
737 		if (notification == NULL)
738 			return;
739 
740 		bool deleteAfterEvent = false;
741 		int32 event = -1;
742 		if (!get_next_pending_event(notification, &event)) {
743 			// remove the notification if this was the last pending event
744 			cache->pending_notifications.Remove(notification);
745 			deleteAfterEvent = notification->delete_after_event;
746 		}
747 
748 		if (event >= 0) {
749 			// Notify listener, we need to copy the notification, as it might
750 			// be removed when we unlock the list.
751 			cache_notification copy = *notification;
752 			locker.Unlock();
753 
754 			copy.hook(copy.transaction_id, event, copy.data);
755 
756 			locker.Lock();
757 		}
758 
759 		if (deleteAfterEvent)
760 			delete notification;
761 	}
762 }
763 
764 
765 /*!	Flushes all pending notifications by calling the appropriate hook
766 	functions.
767 	Must not be called with a cache lock held.
768 */
769 static void
770 flush_pending_notifications()
771 {
772 	MutexLocker _(sCachesLock);
773 
774 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
775 	while (iterator.HasNext()) {
776 		block_cache* cache = iterator.Next();
777 
778 		flush_pending_notifications(cache);
779 	}
780 }
781 
782 
783 /*!	Initializes the \a notification as specified. */
784 static void
785 set_notification(cache_transaction* transaction,
786 	cache_notification &notification, int32 events,
787 	transaction_notification_hook hook, void* data)
788 {
789 	notification.transaction_id = transaction != NULL ? transaction->id : -1;
790 	notification.events_pending = 0;
791 	notification.events = events;
792 	notification.hook = hook;
793 	notification.data = data;
794 	notification.delete_after_event = false;
795 }
796 
797 
798 /*!	Makes sure the notification is deleted. It either deletes it directly,
799 	when possible, or marks it for deletion if the notification is pending.
800 */
801 static void
802 delete_notification(cache_notification* notification)
803 {
804 	MutexLocker locker(sNotificationsLock);
805 
806 	if (notification->events_pending != 0)
807 		notification->delete_after_event = true;
808 	else
809 		delete notification;
810 }
811 
812 
813 /*!	Adds the notification to the pending notifications list, or, if it's
814 	already part of it, updates its events_pending field.
815 	Also marks the notification to be deleted if \a deleteNotification
816 	is \c true.
817 	Triggers the notifier thread to run.
818 */
819 static void
820 add_notification(block_cache* cache, cache_notification* notification,
821 	int32 event, bool deleteNotification)
822 {
823 	if (notification->hook == NULL)
824 		return;
825 
826 	int32 pending = atomic_or(&notification->events_pending, event);
827 	if (pending == 0) {
828 		// not yet part of the notification list
829 		MutexLocker locker(sNotificationsLock);
830 		if (deleteNotification)
831 			notification->delete_after_event = true;
832 		cache->pending_notifications.Add(notification);
833 	} else if (deleteNotification) {
834 		// we might need to delete it ourselves if we're late
835 		delete_notification(notification);
836 	}
837 
838 	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
839 		// We're probably still holding some locks that makes rescheduling
840 		// not a good idea at this point.
841 }
842 
843 
844 /*!	Notifies all interested listeners of this transaction about the \a event.
845 	If \a event is a closing event (ie. TRANSACTION_ENDED, and
846 	TRANSACTION_ABORTED), all listeners except those listening to
847 	TRANSACTION_WRITTEN will be removed.
848 */
849 static void
850 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
851 	int32 event)
852 {
853 	T(Action("notify", cache, transaction));
854 
855 	bool isClosing = is_closing_event(event);
856 	bool isWritten = is_written_event(event);
857 
858 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
859 	while (iterator.HasNext()) {
860 		cache_listener* listener = iterator.Next();
861 
862 		bool remove = (isClosing && !is_written_event(listener->events))
863 			|| (isWritten && is_written_event(listener->events));
864 		if (remove)
865 			iterator.Remove();
866 
867 		if ((listener->events & event) != 0)
868 			add_notification(cache, listener, event, remove);
869 		else if (remove)
870 			delete_notification(listener);
871 	}
872 }
873 
874 
875 /*!	Removes and deletes all listeners that are still monitoring this
876 	transaction.
877 */
878 static void
879 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
880 {
881 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
882 	while (iterator.HasNext()) {
883 		cache_listener* listener = iterator.Next();
884 		iterator.Remove();
885 
886 		delete_notification(listener);
887 	}
888 }
889 
890 
891 static status_t
892 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
893 	int32 events, transaction_notification_hook hookFunction, void* data)
894 {
895 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
896 	while (iterator.HasNext()) {
897 		cache_listener* listener = iterator.Next();
898 
899 		if (listener->data == data && listener->hook == hookFunction) {
900 			// this listener already exists, just update it
901 			listener->events |= events;
902 			return B_OK;
903 		}
904 	}
905 
906 	cache_listener* listener = new(std::nothrow) cache_listener;
907 	if (listener == NULL)
908 		return B_NO_MEMORY;
909 
910 	set_notification(transaction, *listener, events, hookFunction, data);
911 	transaction->listeners.Add(listener);
912 	return B_OK;
913 }
914 
915 
916 //	#pragma mark - private transaction
917 
918 
919 cache_transaction::cache_transaction()
920 {
921 	num_blocks = 0;
922 	main_num_blocks = 0;
923 	sub_num_blocks = 0;
924 	first_block = NULL;
925 	open = true;
926 	has_sub_transaction = false;
927 	last_used = system_time();
928 	busy_writing_count = 0;
929 }
930 
931 
932 static int
933 transaction_compare(void* _transaction, const void* _id)
934 {
935 	cache_transaction* transaction = (cache_transaction*)_transaction;
936 	const int32* id = (const int32*)_id;
937 
938 	return transaction->id - *id;
939 }
940 
941 
942 static uint32
943 transaction_hash(void* _transaction, const void* _id, uint32 range)
944 {
945 	cache_transaction* transaction = (cache_transaction*)_transaction;
946 	const int32* id = (const int32*)_id;
947 
948 	if (transaction != NULL)
949 		return transaction->id % range;
950 
951 	return (uint32)*id % range;
952 }
953 
954 
955 static void
956 delete_transaction(block_cache* cache, cache_transaction* transaction)
957 {
958 	if (cache->last_transaction == transaction)
959 		cache->last_transaction = NULL;
960 
961 	remove_transaction_listeners(cache, transaction);
962 	delete transaction;
963 }
964 
965 
966 static cache_transaction*
967 lookup_transaction(block_cache* cache, int32 id)
968 {
969 	return (cache_transaction*)hash_lookup(cache->transaction_hash, &id);
970 }
971 
972 
973 //	#pragma mark - cached_block
974 
975 
976 int
977 compare_blocks(const void* _blockA, const void* _blockB)
978 {
979 	cached_block* blockA = *(cached_block**)_blockA;
980 	cached_block* blockB = *(cached_block**)_blockB;
981 
982 	off_t diff = blockA->block_number - blockB->block_number;
983 	if (diff > 0)
984 		return 1;
985 
986 	return diff < 0 ? -1 : 0;
987 }
988 
989 
990 bool
991 cached_block::CanBeWritten() const
992 {
993 	return !busy_writing && !busy_reading
994 		&& (previous_transaction != NULL
995 			|| (transaction == NULL && is_dirty && !is_writing));
996 }
997 
998 
999 /*static*/ int
1000 cached_block::Compare(void* _cacheEntry, const void* _block)
1001 {
1002 	cached_block* cacheEntry = (cached_block*)_cacheEntry;
1003 	const off_t* block = (const off_t*)_block;
1004 
1005 	off_t diff = cacheEntry->block_number - *block;
1006 	if (diff > 0)
1007 		return 1;
1008 
1009 	return diff < 0 ? -1 : 0;
1010 }
1011 
1012 
1013 /*static*/ uint32
1014 cached_block::Hash(void* _cacheEntry, const void* _block, uint32 range)
1015 {
1016 	cached_block* cacheEntry = (cached_block*)_cacheEntry;
1017 	const off_t* block = (const off_t*)_block;
1018 
1019 	if (cacheEntry != NULL)
1020 		return cacheEntry->block_number % range;
1021 
1022 	return (uint64)*block % range;
1023 }
1024 
1025 
1026 //	#pragma mark - BlockWriter
1027 
1028 
1029 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1030 	:
1031 	fCache(cache),
1032 	fBlocks(fBuffer),
1033 	fCount(0),
1034 	fTotal(0),
1035 	fCapacity(kBufferSize),
1036 	fMax(max),
1037 	fStatus(B_OK),
1038 	fDeletedTransaction(false)
1039 {
1040 }
1041 
1042 
1043 BlockWriter::~BlockWriter()
1044 {
1045 	if (fBlocks != fBuffer)
1046 		free(fBlocks);
1047 }
1048 
1049 
1050 /*!	Adds the specified block to the to be written array. If no more blocks can
1051 	be added, false is returned, otherwise true.
1052 */
1053 bool
1054 BlockWriter::Add(cached_block* block, hash_iterator* iterator)
1055 {
1056 	ASSERT(block->CanBeWritten());
1057 
1058 	if (fTotal == fMax)
1059 		return false;
1060 
1061 	if (fCount >= fCapacity) {
1062 		// Enlarge array if necessary
1063 		cached_block** newBlocks;
1064 		size_t newCapacity = max_c(256, fCapacity * 2);
1065 		if (fBlocks == fBuffer)
1066 			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1067 		else {
1068 			newBlocks = (cached_block**)realloc(fBlocks,
1069 				newCapacity * sizeof(void*));
1070 		}
1071 
1072 		if (newBlocks == NULL) {
1073 			// Allocating a larger array failed - we need to write back what
1074 			// we have synchronously now (this will also clear the array)
1075 			Write(iterator, false);
1076 		} else {
1077 			if (fBlocks == fBuffer)
1078 				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1079 
1080 			fBlocks = newBlocks;
1081 			fCapacity = newCapacity;
1082 		}
1083 	}
1084 
1085 	fBlocks[fCount++] = block;
1086 	fTotal++;
1087 	block->busy_writing = true;
1088 	fCache->busy_writing_count++;
1089 	if (block->previous_transaction != NULL)
1090 		block->previous_transaction->busy_writing_count++;
1091 
1092 	return true;
1093 }
1094 
1095 
1096 /*!	Adds all blocks of the specified transaction to the to be written array.
1097 	If no more blocks can be added, false is returned, otherwise true.
1098 */
1099 bool
1100 BlockWriter::Add(cache_transaction* transaction, hash_iterator* iterator,
1101 	bool& hasLeftOvers)
1102 {
1103 	ASSERT(!transaction->open);
1104 
1105 	if (transaction->busy_writing_count != 0) {
1106 		hasLeftOvers = true;
1107 		return true;
1108 	}
1109 
1110 	hasLeftOvers = false;
1111 
1112 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1113 	while (cached_block* block = blockIterator.Next()) {
1114 		if (!block->CanBeWritten()) {
1115 			// This block was already part of a previous transaction within this
1116 			// writer
1117 			hasLeftOvers = true;
1118 			continue;
1119 		}
1120 		if (!Add(block, iterator))
1121 			return false;
1122 
1123 		if (DeletedTransaction())
1124 			break;
1125 	}
1126 
1127 	return true;
1128 }
1129 
1130 
1131 /*! Cache must be locked when calling this method, but it will be unlocked
1132 	while the blocks are written back.
1133 */
1134 status_t
1135 BlockWriter::Write(hash_iterator* iterator, bool canUnlock)
1136 {
1137 	if (fCount == 0)
1138 		return B_OK;
1139 
1140 	if (canUnlock)
1141 		mutex_unlock(&fCache->lock);
1142 
1143 	// Sort blocks in their on-disk order
1144 	// TODO: ideally, this should be handled by the I/O scheduler
1145 
1146 	qsort(fBlocks, fCount, sizeof(void*), &compare_blocks);
1147 	fDeletedTransaction = false;
1148 
1149 	for (uint32 i = 0; i < fCount; i++) {
1150 		status_t status = _WriteBlock(fBlocks[i]);
1151 		if (status != B_OK) {
1152 			// propagate to global error handling
1153 			if (fStatus == B_OK)
1154 				fStatus = status;
1155 
1156 			_UnmarkWriting(fBlocks[i]);
1157 			fBlocks[i] = NULL;
1158 				// This block will not be marked clean
1159 		}
1160 	}
1161 
1162 	if (canUnlock)
1163 		mutex_lock(&fCache->lock);
1164 
1165 	for (uint32 i = 0; i < fCount; i++)
1166 		_BlockDone(fBlocks[i], iterator);
1167 
1168 	fCount = 0;
1169 	return fStatus;
1170 }
1171 
1172 
1173 /*!	Writes the specified \a block back to disk. It will always only write back
1174 	the oldest change of the block if it is part of more than one transaction.
1175 	It will automatically send out TRANSACTION_WRITTEN notices, as well as
1176 	delete transactions when they are no longer used, and \a deleteTransaction
1177 	is \c true.
1178 */
1179 /*static*/ status_t
1180 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1181 {
1182 	BlockWriter writer(cache);
1183 
1184 	writer.Add(block);
1185 	return writer.Write();
1186 }
1187 
1188 
1189 void*
1190 BlockWriter::_Data(cached_block* block) const
1191 {
1192 	return block->previous_transaction && block->original_data
1193 		? block->original_data : block->current_data;
1194 		// We first need to write back changes from previous transactions
1195 }
1196 
1197 
1198 status_t
1199 BlockWriter::_WriteBlock(cached_block* block)
1200 {
1201 	ASSERT(block->busy_writing);
1202 
1203 	TRACE(("BlockWriter::_WriteBlock(block %Ld)\n", block->block_number));
1204 	TB(Write(fCache, block));
1205 	TB2(BlockData(fCache, block, "before write"));
1206 
1207 	size_t blockSize = fCache->block_size;
1208 
1209 	ssize_t written = write_pos(fCache->fd,
1210 		block->block_number * blockSize, _Data(block), blockSize);
1211 
1212 	if (written != (ssize_t)blockSize) {
1213 		TB(Error(fCache, block->block_number, "write failed", written));
1214 		FATAL(("could not write back block %Ld (%s)\n", block->block_number,
1215 			strerror(errno)));
1216 		if (written < 0)
1217 			return errno;
1218 
1219 		return B_IO_ERROR;
1220 	}
1221 
1222 	return B_OK;
1223 }
1224 
1225 
1226 void
1227 BlockWriter::_BlockDone(cached_block* block, hash_iterator* iterator)
1228 {
1229 	if (block == NULL) {
1230 		// An error occured when trying to write this block
1231 		return;
1232 	}
1233 
1234 	if (fCache->num_dirty_blocks > 0)
1235 		fCache->num_dirty_blocks--;
1236 
1237 	if (_Data(block) == block->current_data)
1238 		block->is_dirty = false;
1239 
1240 	_UnmarkWriting(block);
1241 
1242 	cache_transaction* previous = block->previous_transaction;
1243 	if (previous != NULL) {
1244 		previous->blocks.Remove(block);
1245 		block->previous_transaction = NULL;
1246 
1247 		if (block->original_data != NULL && block->transaction == NULL) {
1248 			// This block is not part of a transaction, so it does not need
1249 			// its original pointer anymore.
1250 			fCache->Free(block->original_data);
1251 			block->original_data = NULL;
1252 		}
1253 
1254 		// Has the previous transation been finished with that write?
1255 		if (--previous->num_blocks == 0) {
1256 			TRACE(("cache transaction %ld finished!\n", previous->id));
1257 			T(Action("written", fCache, previous));
1258 
1259 			notify_transaction_listeners(fCache, previous,
1260 				TRANSACTION_WRITTEN);
1261 
1262 			if (iterator != NULL)
1263 				hash_remove_current(fCache->transaction_hash, iterator);
1264 			else
1265 				hash_remove(fCache->transaction_hash, previous);
1266 
1267 			delete_transaction(fCache, previous);
1268 			fDeletedTransaction = true;
1269 		}
1270 	}
1271 	if (block->transaction == NULL && block->ref_count == 0) {
1272 		// the block is no longer used
1273 		block->unused = true;
1274 		fCache->unused_blocks.Add(block);
1275 		fCache->unused_block_count++;
1276 	}
1277 
1278 	TB2(BlockData(fCache, block, "after write"));
1279 }
1280 
1281 
1282 void
1283 BlockWriter::_UnmarkWriting(cached_block* block)
1284 {
1285 	block->busy_writing = false;
1286 	if (block->previous_transaction != NULL)
1287 		block->previous_transaction->busy_writing_count--;
1288 	fCache->busy_writing_count--;
1289 
1290 	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1291 		|| block->busy_writing_waiters) {
1292 		fCache->busy_writing_waiters = false;
1293 		block->busy_writing_waiters = false;
1294 		fCache->busy_writing_condition.NotifyAll();
1295 	}
1296 }
1297 
1298 
1299 //	#pragma mark - block_cache
1300 
1301 
1302 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1303 		bool readOnly)
1304 	:
1305 	hash(NULL),
1306 	fd(_fd),
1307 	max_blocks(numBlocks),
1308 	block_size(blockSize),
1309 	next_transaction_id(1),
1310 	last_transaction(NULL),
1311 	transaction_hash(NULL),
1312 	buffer_cache(NULL),
1313 	unused_block_count(0),
1314 	busy_reading_count(0),
1315 	busy_reading_waiters(false),
1316 	busy_writing_count(0),
1317 	busy_writing_waiters(0),
1318 	num_dirty_blocks(0),
1319 	read_only(readOnly)
1320 {
1321 }
1322 
1323 
1324 /*! Should be called with the cache's lock held. */
1325 block_cache::~block_cache()
1326 {
1327 	unregister_low_resource_handler(&_LowMemoryHandler, this);
1328 
1329 	hash_uninit(transaction_hash);
1330 	hash_uninit(hash);
1331 
1332 	delete_object_cache(buffer_cache);
1333 
1334 	mutex_destroy(&lock);
1335 }
1336 
1337 
1338 status_t
1339 block_cache::Init()
1340 {
1341 	busy_reading_condition.Init(this, "cache block busy_reading");
1342 	busy_writing_condition.Init(this, "cache block busy writing");
1343 	condition_variable.Init(this, "cache transaction sync");
1344 	mutex_init(&lock, "block cache");
1345 
1346 	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1347 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1348 	if (buffer_cache == NULL)
1349 		return B_NO_MEMORY;
1350 
1351 	cached_block dummyBlock;
1352 	hash = hash_init(1024, offset_of_member(dummyBlock, next),
1353 		&cached_block::Compare, &cached_block::Hash);
1354 	if (hash == NULL)
1355 		return B_NO_MEMORY;
1356 
1357 	cache_transaction dummyTransaction;
1358 	transaction_hash = hash_init(16, offset_of_member(dummyTransaction, next),
1359 		&transaction_compare, &::transaction_hash);
1360 	if (transaction_hash == NULL)
1361 		return B_NO_MEMORY;
1362 
1363 	return register_low_resource_handler(&_LowMemoryHandler, this,
1364 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1365 			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1366 }
1367 
1368 
1369 void
1370 block_cache::Free(void* buffer)
1371 {
1372 	if (buffer != NULL)
1373 		object_cache_free(buffer_cache, buffer, 0);
1374 }
1375 
1376 
1377 void*
1378 block_cache::Allocate()
1379 {
1380 	void* block = object_cache_alloc(buffer_cache, 0);
1381 	if (block != NULL)
1382 		return block;
1383 
1384 	// recycle existing before allocating a new one
1385 	RemoveUnusedBlocks(100);
1386 
1387 	return object_cache_alloc(buffer_cache, 0);
1388 }
1389 
1390 
1391 void
1392 block_cache::FreeBlock(cached_block* block)
1393 {
1394 	Free(block->current_data);
1395 
1396 	if (block->original_data != NULL || block->parent_data != NULL) {
1397 		panic("block_cache::FreeBlock(): %Ld, original %p, parent %p\n",
1398 			block->block_number, block->original_data, block->parent_data);
1399 	}
1400 
1401 #if BLOCK_CACHE_DEBUG_CHANGED
1402 	Free(block->compare);
1403 #endif
1404 
1405 	object_cache_free(sBlockCache, block, 0);
1406 }
1407 
1408 
1409 /*! Allocates a new block for \a blockNumber, ready for use */
1410 cached_block*
1411 block_cache::NewBlock(off_t blockNumber)
1412 {
1413 	cached_block* block = NULL;
1414 
1415 	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1416 			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1417 		// recycle existing instead of allocating a new one
1418 		block = _GetUnusedBlock();
1419 	}
1420 	if (block == NULL) {
1421 		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1422 		if (block != NULL) {
1423 			block->current_data = Allocate();
1424 			if (block->current_data == NULL) {
1425 				object_cache_free(sBlockCache, block, 0);
1426 				return NULL;
1427 			}
1428 		} else {
1429 			TB(Error(this, blockNumber, "allocation failed"));
1430 			dprintf("block allocation failed, unused list is %sempty.\n",
1431 				unused_blocks.IsEmpty() ? "" : "not ");
1432 
1433 			// allocation failed, try to reuse an unused block
1434 			block = _GetUnusedBlock();
1435 			if (block == NULL) {
1436 				TB(Error(this, blockNumber, "get unused failed"));
1437 				FATAL(("could not allocate block!\n"));
1438 				return NULL;
1439 			}
1440 		}
1441 	}
1442 
1443 	block->block_number = blockNumber;
1444 	block->ref_count = 0;
1445 	block->last_accessed = 0;
1446 	block->transaction_next = NULL;
1447 	block->transaction = block->previous_transaction = NULL;
1448 	block->original_data = NULL;
1449 	block->parent_data = NULL;
1450 	block->busy_reading = false;
1451 	block->busy_writing = false;
1452 	block->is_writing = false;
1453 	block->is_dirty = false;
1454 	block->unused = false;
1455 	block->discard = false;
1456 	block->busy_reading_waiters = false;
1457 	block->busy_writing_waiters = false;
1458 #if BLOCK_CACHE_DEBUG_CHANGED
1459 	block->compare = NULL;
1460 #endif
1461 
1462 	return block;
1463 }
1464 
1465 
1466 void
1467 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1468 {
1469 	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1470 
1471 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1472 			cached_block* block = iterator.Next();) {
1473 		if (minSecondsOld >= block->LastAccess()) {
1474 			// The list is sorted by last access
1475 			break;
1476 		}
1477 		if (block->busy_reading || block->busy_writing)
1478 			continue;
1479 
1480 		TB(Flush(this, block));
1481 		TRACE(("  remove block %Ld, last accessed %" B_PRId32 "\n",
1482 			block->block_number, block->last_accessed));
1483 
1484 		// this can only happen if no transactions are used
1485 		if (block->is_dirty && !block->discard) {
1486 			if (block->busy_writing)
1487 				continue;
1488 
1489 			BlockWriter::WriteBlock(this, block);
1490 		}
1491 
1492 		// remove block from lists
1493 		iterator.Remove();
1494 		unused_block_count--;
1495 		RemoveBlock(block);
1496 
1497 		if (--count <= 0)
1498 			break;
1499 	}
1500 }
1501 
1502 
1503 void
1504 block_cache::RemoveBlock(cached_block* block)
1505 {
1506 	hash_remove(hash, block);
1507 	FreeBlock(block);
1508 }
1509 
1510 
1511 /*!	Discards the block from a transaction (this method must not be called
1512 	for blocks not part of a transaction).
1513 */
1514 void
1515 block_cache::DiscardBlock(cached_block* block)
1516 {
1517 	ASSERT(block->discard);
1518 
1519 	if (block->parent_data != NULL && block->parent_data != block->current_data)
1520 		Free(block->parent_data);
1521 
1522 	block->parent_data = NULL;
1523 
1524 	if (block->original_data != NULL) {
1525 		Free(block->original_data);
1526 		block->original_data = NULL;
1527 	}
1528 
1529 	RemoveBlock(block);
1530 }
1531 
1532 
1533 void
1534 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1535 {
1536 	TRACE(("block_cache: low memory handler called with level %ld\n", level));
1537 
1538 	// free some blocks according to the low memory state
1539 	// (if there is enough memory left, we don't free any)
1540 
1541 	block_cache* cache = (block_cache*)data;
1542 	int32 free = 0;
1543 	int32 secondsOld = 0;
1544 	switch (level) {
1545 		case B_NO_LOW_RESOURCE:
1546 			return;
1547 		case B_LOW_RESOURCE_NOTE:
1548 			free = cache->unused_block_count / 8;
1549 			secondsOld = 120;
1550 			break;
1551 		case B_LOW_RESOURCE_WARNING:
1552 			free = cache->unused_block_count / 4;
1553 			secondsOld = 10;
1554 			break;
1555 		case B_LOW_RESOURCE_CRITICAL:
1556 			free = cache->unused_block_count / 2;
1557 			secondsOld = 0;
1558 			break;
1559 	}
1560 
1561 	MutexLocker locker(&cache->lock);
1562 
1563 	if (!locker.IsLocked()) {
1564 		// If our block_cache were deleted, it could be that we had
1565 		// been called before that deletion went through, therefore,
1566 		// acquiring its lock might fail.
1567 		return;
1568 	}
1569 
1570 #ifdef TRACE_BLOCK_CACHE
1571 	uint32 oldUnused = cache->unused_block_count;
1572 #endif
1573 
1574 	cache->RemoveUnusedBlocks(free, secondsOld);
1575 
1576 	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %lu -> %lu\n", cache,
1577 		oldUnused, cache->unused_block_count));
1578 }
1579 
1580 
1581 cached_block*
1582 block_cache::_GetUnusedBlock()
1583 {
1584 	TRACE(("block_cache: get unused block\n"));
1585 
1586 	for (block_list::Iterator iterator = unused_blocks.GetIterator();
1587 			cached_block* block = iterator.Next();) {
1588 		TB(Flush(this, block, true));
1589 		// this can only happen if no transactions are used
1590 		if (block->is_dirty && !block->busy_writing && !block->discard)
1591 			BlockWriter::WriteBlock(this, block);
1592 
1593 		// remove block from lists
1594 		iterator.Remove();
1595 		unused_block_count--;
1596 		hash_remove(hash, block);
1597 
1598 		// TODO: see if parent/compare data is handled correctly here!
1599 		if (block->parent_data != NULL
1600 			&& block->parent_data != block->original_data)
1601 			Free(block->parent_data);
1602 		if (block->original_data != NULL)
1603 			Free(block->original_data);
1604 
1605 #if BLOCK_CACHE_DEBUG_CHANGED
1606 		if (block->compare != NULL)
1607 			Free(block->compare);
1608 #endif
1609 		return block;
1610 	}
1611 
1612 	return NULL;
1613 }
1614 
1615 
1616 //	#pragma mark - private block functions
1617 
1618 
1619 /*!	Cache must be locked.
1620 */
1621 static void
1622 mark_block_busy_reading(block_cache* cache, cached_block* block)
1623 {
1624 	block->busy_reading = true;
1625 	cache->busy_reading_count++;
1626 }
1627 
1628 
1629 /*!	Cache must be locked.
1630 */
1631 static void
1632 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1633 {
1634 	block->busy_reading = false;
1635 	cache->busy_reading_count--;
1636 
1637 	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1638 		|| block->busy_reading_waiters) {
1639 		cache->busy_reading_waiters = false;
1640 		block->busy_reading_waiters = false;
1641 		cache->busy_reading_condition.NotifyAll();
1642 	}
1643 }
1644 
1645 
1646 /*!	Cache must be locked.
1647 */
1648 static void
1649 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1650 {
1651 	while (block->busy_reading) {
1652 		// wait for at least the specified block to be read in
1653 		ConditionVariableEntry entry;
1654 		cache->busy_reading_condition.Add(&entry);
1655 		block->busy_reading_waiters = true;
1656 
1657 		mutex_unlock(&cache->lock);
1658 
1659 		entry.Wait();
1660 
1661 		mutex_lock(&cache->lock);
1662 	}
1663 }
1664 
1665 
1666 /*!	Cache must be locked.
1667 */
1668 static void
1669 wait_for_busy_reading_blocks(block_cache* cache)
1670 {
1671 	while (cache->busy_reading_count != 0) {
1672 		// wait for all blocks to be read in
1673 		ConditionVariableEntry entry;
1674 		cache->busy_reading_condition.Add(&entry);
1675 		cache->busy_reading_waiters = true;
1676 
1677 		mutex_unlock(&cache->lock);
1678 
1679 		entry.Wait();
1680 
1681 		mutex_lock(&cache->lock);
1682 	}
1683 }
1684 
1685 
1686 /*!	Cache must be locked.
1687 */
1688 static void
1689 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1690 {
1691 	while (block->busy_writing) {
1692 		// wait for all blocks to be written back
1693 		ConditionVariableEntry entry;
1694 		cache->busy_writing_condition.Add(&entry);
1695 		block->busy_writing_waiters = true;
1696 
1697 		mutex_unlock(&cache->lock);
1698 
1699 		entry.Wait();
1700 
1701 		mutex_lock(&cache->lock);
1702 	}
1703 }
1704 
1705 
1706 /*!	Cache must be locked.
1707 */
1708 static void
1709 wait_for_busy_writing_blocks(block_cache* cache)
1710 {
1711 	while (cache->busy_writing_count != 0) {
1712 		// wait for all blocks to be written back
1713 		ConditionVariableEntry entry;
1714 		cache->busy_writing_condition.Add(&entry);
1715 		cache->busy_writing_waiters = true;
1716 
1717 		mutex_unlock(&cache->lock);
1718 
1719 		entry.Wait();
1720 
1721 		mutex_lock(&cache->lock);
1722 	}
1723 }
1724 
1725 
1726 /*!	Removes a reference from the specified \a block. If this was the last
1727 	reference, the block is moved into the unused list.
1728 	In low memory situations, it will also free some blocks from that list,
1729 	but not necessarily the \a block it just released.
1730 */
1731 static void
1732 put_cached_block(block_cache* cache, cached_block* block)
1733 {
1734 #if BLOCK_CACHE_DEBUG_CHANGED
1735 	if (!block->is_dirty && block->compare != NULL
1736 		&& memcmp(block->current_data, block->compare, cache->block_size)) {
1737 		dprintf("new block:\n");
1738 		dump_block((const char*)block->current_data, 256, "  ");
1739 		dprintf("unchanged block:\n");
1740 		dump_block((const char*)block->compare, 256, "  ");
1741 		BlockWriter::WriteBlock(cache, block);
1742 		panic("block_cache: supposed to be clean block was changed!\n");
1743 
1744 		cache->Free(block->compare);
1745 		block->compare = NULL;
1746 	}
1747 #endif
1748 	TB(Put(cache, block));
1749 
1750 	if (block->ref_count < 1) {
1751 		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1752 		return;
1753 	}
1754 
1755 	if (--block->ref_count == 0
1756 		&& block->transaction == NULL && block->previous_transaction == NULL) {
1757 		// This block is not used anymore, and not part of any transaction
1758 		block->is_writing = false;
1759 
1760 		if (block->discard) {
1761 			cache->RemoveBlock(block);
1762 		} else {
1763 			// put this block in the list of unused blocks
1764 			block->unused = true;
1765 			ASSERT(block->original_data == NULL
1766 				&& block->parent_data == NULL);
1767 			cache->unused_blocks.Add(block);
1768 			cache->unused_block_count++;
1769 		}
1770 	}
1771 }
1772 
1773 
1774 static void
1775 put_cached_block(block_cache* cache, off_t blockNumber)
1776 {
1777 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1778 		panic("put_cached_block: invalid block number %lld (max %lld)",
1779 			blockNumber, cache->max_blocks - 1);
1780 	}
1781 
1782 	cached_block* block = (cached_block*)hash_lookup(cache->hash, &blockNumber);
1783 	if (block != NULL)
1784 		put_cached_block(cache, block);
1785 	else {
1786 		TB(Error(cache, blockNumber, "put unknown"));
1787 	}
1788 }
1789 
1790 
1791 /*!	Retrieves the block \a blockNumber from the hash table, if it's already
1792 	there, or reads it from the disk.
1793 	You need to have the cache locked when calling this function.
1794 
1795 	\param _allocated tells you wether or not a new block has been allocated
1796 		to satisfy your request.
1797 	\param readBlock if \c false, the block will not be read in case it was
1798 		not already in the cache. The block you retrieve may contain random
1799 		data. If \c true, the cache will be temporarily unlocked while the
1800 		block is read in.
1801 */
1802 static cached_block*
1803 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1804 	bool readBlock = true)
1805 {
1806 	ASSERT_LOCKED_MUTEX(&cache->lock);
1807 
1808 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1809 		panic("get_cached_block: invalid block number %lld (max %lld)",
1810 			blockNumber, cache->max_blocks - 1);
1811 		return NULL;
1812 	}
1813 
1814 retry:
1815 	cached_block* block = (cached_block*)hash_lookup(cache->hash,
1816 		&blockNumber);
1817 	*_allocated = false;
1818 
1819 	if (block == NULL) {
1820 		// put block into cache
1821 		block = cache->NewBlock(blockNumber);
1822 		if (block == NULL)
1823 			return NULL;
1824 
1825 		hash_insert_grow(cache->hash, block);
1826 		*_allocated = true;
1827 	} else if (block->busy_reading) {
1828 		// The block is currently busy_reading - wait and try again later
1829 		wait_for_busy_reading_block(cache, block);
1830 		goto retry;
1831 	}
1832 
1833 	if (block->unused) {
1834 		//TRACE(("remove block %Ld from unused\n", blockNumber));
1835 		block->unused = false;
1836 		cache->unused_blocks.Remove(block);
1837 		cache->unused_block_count--;
1838 	}
1839 
1840 	if (*_allocated && readBlock) {
1841 		// read block into cache
1842 		int32 blockSize = cache->block_size;
1843 
1844 		mark_block_busy_reading(cache, block);
1845 		mutex_unlock(&cache->lock);
1846 
1847 		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1848 			block->current_data, blockSize);
1849 
1850 		mutex_lock(&cache->lock);
1851 		if (bytesRead < blockSize) {
1852 			cache->RemoveBlock(block);
1853 			TB(Error(cache, blockNumber, "read failed", bytesRead));
1854 
1855 			FATAL(("could not read block %Ld: bytesRead: %ld, error: %s\n",
1856 				blockNumber, bytesRead, strerror(errno)));
1857 			return NULL;
1858 		}
1859 		TB(Read(cache, block));
1860 
1861 		mark_block_unbusy_reading(cache, block);
1862 	}
1863 
1864 	block->ref_count++;
1865 	block->last_accessed = system_time() / 1000000L;
1866 
1867 	return block;
1868 }
1869 
1870 
1871 /*!	Returns the writable block data for the requested blockNumber.
1872 	If \a cleared is true, the block is not read from disk; an empty block
1873 	is returned.
1874 
1875 	This is the only method to insert a block into a transaction. It makes
1876 	sure that the previous block contents are preserved in that case.
1877 */
1878 static void*
1879 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1880 	off_t length, int32 transactionID, bool cleared)
1881 {
1882 	TRACE(("get_writable_cached_block(blockNumber = %Ld, transaction = %ld)\n",
1883 		blockNumber, transactionID));
1884 
1885 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1886 		panic("get_writable_cached_block: invalid block number %lld (max %lld)",
1887 			blockNumber, cache->max_blocks - 1);
1888 	}
1889 
1890 	bool allocated;
1891 	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1892 		!cleared);
1893 	if (block == NULL)
1894 		return NULL;
1895 
1896 	if (block->busy_writing)
1897 		wait_for_busy_writing_block(cache, block);
1898 
1899 	block->discard = false;
1900 
1901 	// if there is no transaction support, we just return the current block
1902 	if (transactionID == -1) {
1903 		if (cleared) {
1904 			mark_block_busy_reading(cache, block);
1905 			mutex_unlock(&cache->lock);
1906 
1907 			memset(block->current_data, 0, cache->block_size);
1908 
1909 			mutex_lock(&cache->lock);
1910 			mark_block_unbusy_reading(cache, block);
1911 		}
1912 
1913 		block->is_writing = true;
1914 
1915 		if (!block->is_dirty) {
1916 			cache->num_dirty_blocks++;
1917 			block->is_dirty = true;
1918 				// mark the block as dirty
1919 		}
1920 
1921 		TB(Get(cache, block));
1922 		return block->current_data;
1923 	}
1924 
1925 	cache_transaction* transaction = block->transaction;
1926 
1927 	if (transaction != NULL && transaction->id != transactionID) {
1928 		// TODO: we have to wait here until the other transaction is done.
1929 		//	Maybe we should even panic, since we can't prevent any deadlocks.
1930 		panic("get_writable_cached_block(): asked to get busy writable block (transaction %ld)\n", block->transaction->id);
1931 		put_cached_block(cache, block);
1932 		return NULL;
1933 	}
1934 	if (transaction == NULL && transactionID != -1) {
1935 		// get new transaction
1936 		transaction = lookup_transaction(cache, transactionID);
1937 		if (transaction == NULL) {
1938 			panic("get_writable_cached_block(): invalid transaction %ld!\n",
1939 				transactionID);
1940 			put_cached_block(cache, block);
1941 			return NULL;
1942 		}
1943 		if (!transaction->open) {
1944 			panic("get_writable_cached_block(): transaction already done!\n");
1945 			put_cached_block(cache, block);
1946 			return NULL;
1947 		}
1948 
1949 		block->transaction = transaction;
1950 
1951 		// attach the block to the transaction block list
1952 		block->transaction_next = transaction->first_block;
1953 		transaction->first_block = block;
1954 		transaction->num_blocks++;
1955 	}
1956 	if (transaction != NULL)
1957 		transaction->last_used = system_time();
1958 
1959 	bool wasUnchanged = block->original_data == NULL
1960 		|| block->previous_transaction != NULL;
1961 
1962 	if (!(allocated && cleared) && block->original_data == NULL) {
1963 		// we already have data, so we need to preserve it
1964 		block->original_data = cache->Allocate();
1965 		if (block->original_data == NULL) {
1966 			TB(Error(cache, blockNumber, "allocate original failed"));
1967 			FATAL(("could not allocate original_data\n"));
1968 			put_cached_block(cache, block);
1969 			return NULL;
1970 		}
1971 
1972 		mark_block_busy_reading(cache, block);
1973 		mutex_unlock(&cache->lock);
1974 
1975 		memcpy(block->original_data, block->current_data, cache->block_size);
1976 
1977 		mutex_lock(&cache->lock);
1978 		mark_block_unbusy_reading(cache, block);
1979 	}
1980 	if (block->parent_data == block->current_data) {
1981 		// remember any previous contents for the parent transaction
1982 		block->parent_data = cache->Allocate();
1983 		if (block->parent_data == NULL) {
1984 			// TODO: maybe we should just continue the current transaction in this case...
1985 			TB(Error(cache, blockNumber, "allocate parent failed"));
1986 			FATAL(("could not allocate parent\n"));
1987 			put_cached_block(cache, block);
1988 			return NULL;
1989 		}
1990 
1991 		mark_block_busy_reading(cache, block);
1992 		mutex_unlock(&cache->lock);
1993 
1994 		memcpy(block->parent_data, block->current_data, cache->block_size);
1995 
1996 		mutex_lock(&cache->lock);
1997 		mark_block_unbusy_reading(cache, block);
1998 
1999 		transaction->sub_num_blocks++;
2000 	} else if (transaction != NULL && transaction->has_sub_transaction
2001 		&& block->parent_data == NULL && wasUnchanged)
2002 		transaction->sub_num_blocks++;
2003 
2004 	if (cleared) {
2005 		mark_block_busy_reading(cache, block);
2006 		mutex_unlock(&cache->lock);
2007 
2008 		memset(block->current_data, 0, cache->block_size);
2009 
2010 		mutex_lock(&cache->lock);
2011 		mark_block_unbusy_reading(cache, block);
2012 	}
2013 
2014 	block->is_dirty = true;
2015 	TB(Get(cache, block));
2016 	TB2(BlockData(cache, block, "get writable"));
2017 
2018 	return block->current_data;
2019 }
2020 
2021 
2022 #if DEBUG_BLOCK_CACHE
2023 
2024 
2025 static void
2026 dump_block(cached_block* block)
2027 {
2028 	kprintf("%08lx %9Ld %08lx %08lx %08lx %5ld %6ld %c%c%c%c%c%c %08lx %08lx\n",
2029 		(addr_t)block, block->block_number,
2030 		(addr_t)block->current_data, (addr_t)block->original_data,
2031 		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2032 		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2033 		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2034 		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2035 		(addr_t)block->transaction,
2036 		(addr_t)block->previous_transaction);
2037 }
2038 
2039 
2040 static void
2041 dump_block_long(cached_block* block)
2042 {
2043 	kprintf("BLOCK %p\n", block);
2044 	kprintf(" current data:  %p\n", block->current_data);
2045 	kprintf(" original data: %p\n", block->original_data);
2046 	kprintf(" parent data:   %p\n", block->parent_data);
2047 #if BLOCK_CACHE_DEBUG_CHANGED
2048 	kprintf(" compare data:  %p\n", block->compare);
2049 #endif
2050 	kprintf(" ref_count:     %ld\n", block->ref_count);
2051 	kprintf(" accessed:      %ld\n", block->LastAccess());
2052 	kprintf(" flags:        ");
2053 	if (block->busy_reading)
2054 		kprintf(" busy_reading");
2055 	if (block->busy_writing)
2056 		kprintf(" busy_writing");
2057 	if (block->is_writing)
2058 		kprintf(" is-writing");
2059 	if (block->is_dirty)
2060 		kprintf(" is-dirty");
2061 	if (block->unused)
2062 		kprintf(" unused");
2063 	if (block->discard)
2064 		kprintf(" discard");
2065 	kprintf("\n");
2066 	if (block->transaction != NULL) {
2067 		kprintf(" transaction:   %p (%ld)\n", block->transaction,
2068 			block->transaction->id);
2069 		if (block->transaction_next != NULL) {
2070 			kprintf(" next in transaction: %Ld\n",
2071 				block->transaction_next->block_number);
2072 		}
2073 	}
2074 	if (block->previous_transaction != NULL) {
2075 		kprintf(" previous transaction: %p (%ld)\n",
2076 			block->previous_transaction,
2077 			block->previous_transaction->id);
2078 	}
2079 
2080 	set_debug_variable("_current", (addr_t)block->current_data);
2081 	set_debug_variable("_original", (addr_t)block->original_data);
2082 	set_debug_variable("_parent", (addr_t)block->parent_data);
2083 }
2084 
2085 
2086 static int
2087 dump_cached_block(int argc, char** argv)
2088 {
2089 	if (argc != 2) {
2090 		kprintf("usage: %s <block-address>\n", argv[0]);
2091 		return 0;
2092 	}
2093 
2094 	dump_block_long((struct cached_block*)parse_expression(argv[1]));
2095 	return 0;
2096 }
2097 
2098 
2099 static int
2100 dump_cache(int argc, char** argv)
2101 {
2102 	bool showTransactions = false;
2103 	bool showBlocks = false;
2104 	int32 i = 1;
2105 	while (argv[i] != NULL && argv[i][0] == '-') {
2106 		for (char* arg = &argv[i][1]; arg[0]; arg++) {
2107 			switch (arg[0]) {
2108 				case 'b':
2109 					showBlocks = true;
2110 					break;
2111 				case 't':
2112 					showTransactions = true;
2113 					break;
2114 				default:
2115 					print_debugger_command_usage(argv[0]);
2116 					return 0;
2117 			}
2118 		}
2119 		i++;
2120 	}
2121 
2122 	if (i >= argc) {
2123 		print_debugger_command_usage(argv[0]);
2124 		return 0;
2125 	}
2126 
2127 	block_cache* cache = (struct block_cache*)parse_expression(argv[i]);
2128 	if (cache == NULL) {
2129 		kprintf("invalid cache address\n");
2130 		return 0;
2131 	}
2132 
2133 	off_t blockNumber = -1;
2134 	if (i + 1 < argc) {
2135 		blockNumber = parse_expression(argv[i + 1]);
2136 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
2137 			&blockNumber);
2138 		if (block != NULL)
2139 			dump_block_long(block);
2140 		else
2141 			kprintf("block %Ld not found\n", blockNumber);
2142 		return 0;
2143 	}
2144 
2145 	kprintf("BLOCK CACHE: %p\n", cache);
2146 
2147 	kprintf(" fd:           %d\n", cache->fd);
2148 	kprintf(" max_blocks:   %Ld\n", cache->max_blocks);
2149 	kprintf(" block_size:   %lu\n", cache->block_size);
2150 	kprintf(" next_transaction_id: %ld\n", cache->next_transaction_id);
2151 	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2152 	kprintf(" busy_reading: %lu, %s waiters\n", cache->busy_reading_count,
2153 		cache->busy_reading_waiters ? "has" : "no");
2154 	kprintf(" busy_writing: %lu, %s waiters\n", cache->busy_writing_count,
2155 		cache->busy_writing_waiters ? "has" : "no");
2156 
2157 	if (!cache->pending_notifications.IsEmpty()) {
2158 		kprintf(" pending notifications:\n");
2159 
2160 		NotificationList::Iterator iterator
2161 			= cache->pending_notifications.GetIterator();
2162 		while (iterator.HasNext()) {
2163 			cache_notification* notification = iterator.Next();
2164 
2165 			kprintf("  %p %5lx %p - %p\n", notification,
2166 				notification->events_pending, notification->hook,
2167 				notification->data);
2168 		}
2169 	}
2170 
2171 	if (showTransactions) {
2172 		kprintf(" transactions:\n");
2173 		kprintf("address       id state  blocks  main   sub\n");
2174 
2175 		hash_iterator iterator;
2176 		hash_open(cache->transaction_hash, &iterator);
2177 
2178 		cache_transaction* transaction;
2179 		while ((transaction = (cache_transaction*)hash_next(
2180 				cache->transaction_hash, &iterator)) != NULL) {
2181 			kprintf("%p %5ld %-7s %5ld %5ld %5ld\n", transaction,
2182 				transaction->id, transaction->open ? "open" : "closed",
2183 				transaction->num_blocks, transaction->main_num_blocks,
2184 				transaction->sub_num_blocks);
2185 		}
2186 	}
2187 
2188 	if (showBlocks) {
2189 		kprintf(" blocks:\n");
2190 		kprintf("address  block no. current  original parent    refs access "
2191 			"flags transact prev. trans\n");
2192 	}
2193 
2194 	uint32 referenced = 0;
2195 	uint32 count = 0;
2196 	uint32 dirty = 0;
2197 	uint32 discarded = 0;
2198 	hash_iterator iterator;
2199 	hash_open(cache->hash, &iterator);
2200 	cached_block* block;
2201 	while ((block = (cached_block*)hash_next(cache->hash, &iterator)) != NULL) {
2202 		if (showBlocks)
2203 			dump_block(block);
2204 
2205 		if (block->is_dirty)
2206 			dirty++;
2207 		if (block->discard)
2208 			discarded++;
2209 		if (block->ref_count)
2210 			referenced++;
2211 		count++;
2212 	}
2213 
2214 	kprintf(" %ld blocks total, %ld dirty, %ld discarded, %ld referenced, %ld "
2215 		"busy, %" B_PRIu32 " in unused.\n", count, dirty, discarded, referenced,
2216 		cache->busy_reading_count, cache->unused_block_count);
2217 
2218 	hash_close(cache->hash, &iterator, false);
2219 	return 0;
2220 }
2221 
2222 
2223 static int
2224 dump_transaction(int argc, char** argv)
2225 {
2226 	bool showBlocks = false;
2227 	int i = 1;
2228 	if (argc > 1 && !strcmp(argv[1], "-b")) {
2229 		showBlocks = true;
2230 		i++;
2231 	}
2232 
2233 	if (argc - i < 1 || argc - i > 2) {
2234 		print_debugger_command_usage(argv[0]);
2235 		return 0;
2236 	}
2237 
2238 	cache_transaction* transaction = NULL;
2239 
2240 	if (argc - i == 1) {
2241 		transaction = (cache_transaction*)parse_expression(argv[i]);
2242 	} else {
2243 		block_cache* cache = (block_cache*)parse_expression(argv[i]);
2244 		int32 id = parse_expression(argv[i + 1]);
2245 		transaction = lookup_transaction(cache, id);
2246 		if (transaction == NULL) {
2247 			kprintf("No transaction with ID %ld found.\n", id);
2248 			return 0;
2249 		}
2250 	}
2251 
2252 	kprintf("TRANSACTION %p\n", transaction);
2253 
2254 	kprintf(" id:             %ld\n", transaction->id);
2255 	kprintf(" num block:      %ld\n", transaction->num_blocks);
2256 	kprintf(" main num block: %ld\n", transaction->main_num_blocks);
2257 	kprintf(" sub num block:  %ld\n", transaction->sub_num_blocks);
2258 	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
2259 	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
2260 	kprintf(" idle:           %Ld secs\n",
2261 		(system_time() - transaction->last_used) / 1000000);
2262 
2263 	kprintf(" listeners:\n");
2264 
2265 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2266 	while (iterator.HasNext()) {
2267 		cache_listener* listener = iterator.Next();
2268 
2269 		kprintf("  %p %5lx %p - %p\n", listener, listener->events_pending,
2270 			listener->hook, listener->data);
2271 	}
2272 
2273 	if (!showBlocks)
2274 		return 0;
2275 
2276 	kprintf(" blocks:\n");
2277 	kprintf("address  block no. current  original parent    refs access "
2278 		"flags transact prev. trans\n");
2279 
2280 	cached_block* block = transaction->first_block;
2281 	while (block != NULL) {
2282 		dump_block(block);
2283 		block = block->transaction_next;
2284 	}
2285 
2286 	kprintf("--\n");
2287 
2288 	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2289 	while (blockIterator.HasNext()) {
2290 		block = blockIterator.Next();
2291 		dump_block(block);
2292 	}
2293 
2294 	return 0;
2295 }
2296 
2297 
2298 static int
2299 dump_caches(int argc, char** argv)
2300 {
2301 	kprintf("Block caches:\n");
2302 	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2303 	while (i.HasNext()) {
2304 		block_cache* cache = i.Next();
2305 		if (cache == (block_cache*)&sMarkCache)
2306 			continue;
2307 
2308 		kprintf("  %p\n", cache);
2309 	}
2310 
2311 	return 0;
2312 }
2313 
2314 
2315 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2316 static int
2317 dump_block_data(int argc, char** argv)
2318 {
2319 	using namespace BlockTracing;
2320 
2321 	// Determine which blocks to show
2322 
2323 	bool printStackTrace = true;
2324 	uint32 which = 0;
2325 	int32 i = 1;
2326 	while (i < argc && argv[i][0] == '-') {
2327 		char* arg = &argv[i][1];
2328 		while (arg[0]) {
2329 			switch (arg[0]) {
2330 				case 'c':
2331 					which |= BlockData::kCurrent;
2332 					break;
2333 				case 'p':
2334 					which |= BlockData::kParent;
2335 					break;
2336 				case 'o':
2337 					which |= BlockData::kOriginal;
2338 					break;
2339 
2340 				default:
2341 					kprintf("invalid block specifier (only o/c/p are "
2342 						"allowed).\n");
2343 					return 0;
2344 			}
2345 			arg++;
2346 		}
2347 
2348 		i++;
2349 	}
2350 	if (which == 0)
2351 		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2352 
2353 	if (i == argc) {
2354 		print_debugger_command_usage(argv[0]);
2355 		return 0;
2356 	}
2357 
2358 	// Get the range of blocks to print
2359 
2360 	int64 from = parse_expression(argv[i]);
2361 	int64 to = from;
2362 	if (argc > i + 1)
2363 		to = parse_expression(argv[i + 1]);
2364 	if (to < from)
2365 		to = from;
2366 
2367 	uint32 offset = 0;
2368 	uint32 size = LONG_MAX;
2369 	if (argc > i + 2)
2370 		offset = parse_expression(argv[i + 2]);
2371 	if (argc > i + 3)
2372 		size = parse_expression(argv[i + 3]);
2373 
2374 	TraceEntryIterator iterator;
2375 	iterator.MoveTo(from - 1);
2376 
2377 	static char sBuffer[1024];
2378 	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2379 
2380 	while (TraceEntry* entry = iterator.Next()) {
2381 		int32 index = iterator.Index();
2382 		if (index > to)
2383 			break;
2384 
2385 		Action* action = dynamic_cast<Action*>(entry);
2386 		if (action != NULL) {
2387 			out.Clear();
2388 			out.DumpEntry(action);
2389 			continue;
2390 		}
2391 
2392 		BlockData* blockData = dynamic_cast<BlockData*>(entry);
2393 		if (blockData == NULL)
2394 			continue;
2395 
2396 		out.Clear();
2397 
2398 		const char* dump = out.DumpEntry(entry);
2399 		int length = strlen(dump);
2400 		if (length > 0 && dump[length - 1] == '\n')
2401 			length--;
2402 
2403 		kprintf("%5ld. %.*s\n", index, length, dump);
2404 
2405 		if (printStackTrace) {
2406 			out.Clear();
2407 			entry->DumpStackTrace(out);
2408 			if (out.Size() > 0)
2409 				kputs(out.Buffer());
2410 		}
2411 
2412 		blockData->DumpBlocks(which, offset, size);
2413 	}
2414 
2415 	return 0;
2416 }
2417 #endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
2418 
2419 
2420 #endif	// DEBUG_BLOCK_CACHE
2421 
2422 
2423 /*!	Traverses through the block_cache list, and returns one cache after the
2424 	other. The cache returned is automatically locked when you get it, and
2425 	unlocked with the next call to this function. Ignores caches that are in
2426 	deletion state.
2427 	Returns \c NULL when the end of the list is reached.
2428 */
2429 static block_cache*
2430 get_next_locked_block_cache(block_cache* last)
2431 {
2432 	MutexLocker _(sCachesLock);
2433 
2434 	block_cache* cache;
2435 	if (last != NULL) {
2436 		mutex_unlock(&last->lock);
2437 
2438 		cache = sCaches.GetNext((block_cache*)&sMarkCache);
2439 		sCaches.Remove((block_cache*)&sMarkCache);
2440 	} else
2441 		cache = sCaches.Head();
2442 
2443 	if (cache != NULL) {
2444 		mutex_lock(&cache->lock);
2445 		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2446 	}
2447 
2448 	return cache;
2449 }
2450 
2451 
2452 /*!	Background thread that continuously checks for pending notifications of
2453 	all caches.
2454 	Every two seconds, it will also write back up to 64 blocks per cache.
2455 */
2456 static status_t
2457 block_notifier_and_writer(void* /*data*/)
2458 {
2459 	const bigtime_t kTimeout = 2000000LL;
2460 	bigtime_t timeout = kTimeout;
2461 
2462 	while (true) {
2463 		bigtime_t start = system_time();
2464 
2465 		status_t status = acquire_sem_etc(sEventSemaphore, 1,
2466 			B_RELATIVE_TIMEOUT, timeout);
2467 		if (status == B_OK) {
2468 			flush_pending_notifications();
2469 			timeout -= system_time() - start;
2470 			continue;
2471 		}
2472 
2473 		// write 64 blocks of each block_cache every two seconds
2474 		// TODO: change this once we have an I/O scheduler
2475 		timeout = kTimeout;
2476 		size_t usedMemory;
2477 		object_cache_get_usage(sBlockCache, &usedMemory);
2478 
2479 		block_cache* cache = NULL;
2480 		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2481 			BlockWriter writer(cache, 64);
2482 
2483 			size_t cacheUsedMemory;
2484 			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2485 			usedMemory += cacheUsedMemory;
2486 
2487 			if (cache->num_dirty_blocks) {
2488 				// This cache is not using transactions, we'll scan the blocks
2489 				// directly
2490 				hash_iterator iterator;
2491 				hash_open(cache->hash, &iterator);
2492 
2493 				cached_block* block;
2494 				while ((block = (cached_block*)hash_next(cache->hash, &iterator))
2495 						!= NULL) {
2496 					if (block->CanBeWritten() && !writer.Add(block))
2497 						break;
2498 				}
2499 
2500 				hash_close(cache->hash, &iterator, false);
2501 			} else {
2502 				hash_iterator iterator;
2503 				hash_open(cache->transaction_hash, &iterator);
2504 
2505 				cache_transaction* transaction;
2506 				while ((transaction = (cache_transaction*)hash_next(
2507 						cache->transaction_hash, &iterator)) != NULL) {
2508 					if (transaction->open) {
2509 						if (system_time() > transaction->last_used
2510 								+ kTransactionIdleTime) {
2511 							// Transaction is open but idle
2512 							notify_transaction_listeners(cache, transaction,
2513 								TRANSACTION_IDLE);
2514 						}
2515 						continue;
2516 					}
2517 
2518 					bool hasLeftOvers;
2519 						// we ignore this one
2520 					if (!writer.Add(transaction, &iterator, hasLeftOvers))
2521 						break;
2522 				}
2523 
2524 				hash_close(cache->transaction_hash, &iterator, false);
2525 			}
2526 
2527 			writer.Write();
2528 
2529 			if ((block_cache_used_memory() / B_PAGE_SIZE)
2530 					> vm_page_num_pages() / 2) {
2531 				// Try to reduce memory usage to half of the available
2532 				// RAM at maximum
2533 				cache->RemoveUnusedBlocks(1000, 10);
2534 			}
2535 		}
2536 
2537 		MutexLocker _(sCachesMemoryUseLock);
2538 		sUsedMemory = usedMemory;
2539 	}
2540 
2541 	// never can get here
2542 	return B_OK;
2543 }
2544 
2545 
2546 /*!	Notify function for wait_for_notifications(). */
2547 static void
2548 notify_sync(int32 transactionID, int32 event, void* _cache)
2549 {
2550 	block_cache* cache = (block_cache*)_cache;
2551 
2552 	cache->condition_variable.NotifyOne();
2553 }
2554 
2555 
2556 /*!	Must be called with the sCachesLock held. */
2557 static bool
2558 is_valid_cache(block_cache* cache)
2559 {
2560 	ASSERT_LOCKED_MUTEX(&sCachesLock);
2561 
2562 	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2563 	while (iterator.HasNext()) {
2564 		if (cache == iterator.Next())
2565 			return true;
2566 	}
2567 
2568 	return false;
2569 }
2570 
2571 
2572 /*!	Waits until all pending notifications are carried out.
2573 	Safe to be called from the block writer/notifier thread.
2574 	You must not hold the \a cache lock when calling this function.
2575 */
2576 static void
2577 wait_for_notifications(block_cache* cache)
2578 {
2579 	MutexLocker locker(sCachesLock);
2580 
2581 	if (find_thread(NULL) == sNotifierWriterThread) {
2582 		// We're the notifier thread, don't wait, but flush all pending
2583 		// notifications directly.
2584 		if (is_valid_cache(cache))
2585 			flush_pending_notifications(cache);
2586 		return;
2587 	}
2588 
2589 	// add sync notification
2590 	cache_notification notification;
2591 	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2592 		cache);
2593 
2594 	ConditionVariableEntry entry;
2595 	cache->condition_variable.Add(&entry);
2596 
2597 	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2598 	locker.Unlock();
2599 
2600 	// wait for notification hook to be called
2601 	entry.Wait();
2602 }
2603 
2604 
2605 status_t
2606 block_cache_init(void)
2607 {
2608 	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2609 		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2610 	if (sBlockCache == NULL)
2611 		return B_NO_MEMORY;
2612 
2613 	new (&sCaches) DoublyLinkedList<block_cache>;
2614 		// manually call constructor
2615 
2616 	sEventSemaphore = create_sem(0, "block cache event");
2617 	if (sEventSemaphore < B_OK)
2618 		return sEventSemaphore;
2619 
2620 	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2621 		"block notifier/writer", B_LOW_PRIORITY, NULL);
2622 	if (sNotifierWriterThread >= B_OK)
2623 		resume_thread(sNotifierWriterThread);
2624 
2625 #if DEBUG_BLOCK_CACHE
2626 	add_debugger_command_etc("block_caches", &dump_caches,
2627 		"dumps all block caches", "\n", 0);
2628 	add_debugger_command_etc("block_cache", &dump_cache,
2629 		"dumps a specific block cache",
2630 		"[-bt] <cache-address> [block-number]\n"
2631 		"  -t lists the transactions\n"
2632 		"  -b lists all blocks\n", 0);
2633 	add_debugger_command("cached_block", &dump_cached_block,
2634 		"dumps the specified cached block");
2635 	add_debugger_command_etc("transaction", &dump_transaction,
2636 		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2637 		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2638 		"  -b lists all blocks that are part of this transaction\n", 0);
2639 #	if BLOCK_CACHE_BLOCK_TRACING >= 2
2640 	add_debugger_command_etc("block_cache_data", &dump_block_data,
2641 		"dumps the data blocks logged for the actions",
2642 		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2643 		"If no data specifier is used, all blocks are shown by default.\n"
2644 		" -c       the current data is shown, if available.\n"
2645 		" -p       the parent data is shown, if available.\n"
2646 		" -o       the original data is shown, if available.\n"
2647 		" <from>   first index of tracing entries to show.\n"
2648 		" <to>     if given, the last entry. If not, only <from> is shown.\n"
2649 		" <offset> the offset of the block data.\n"
2650 		" <from>   the size of the block data that is dumped\n", 0);
2651 #	endif
2652 #endif	// DEBUG_BLOCK_CACHE
2653 
2654 	return B_OK;
2655 }
2656 
2657 
2658 size_t
2659 block_cache_used_memory(void)
2660 {
2661 	MutexLocker _(sCachesMemoryUseLock);
2662 	return sUsedMemory;
2663 }
2664 
2665 
2666 //	#pragma mark - public transaction API
2667 
2668 
2669 int32
2670 cache_start_transaction(void* _cache)
2671 {
2672 	block_cache* cache = (block_cache*)_cache;
2673 	TransactionLocker locker(cache);
2674 
2675 	if (cache->last_transaction && cache->last_transaction->open) {
2676 		panic("last transaction (%ld) still open!\n",
2677 			cache->last_transaction->id);
2678 	}
2679 
2680 	cache_transaction* transaction = new(std::nothrow) cache_transaction;
2681 	if (transaction == NULL)
2682 		return B_NO_MEMORY;
2683 
2684 	transaction->id = atomic_add(&cache->next_transaction_id, 1);
2685 	cache->last_transaction = transaction;
2686 
2687 	TRACE(("cache_start_transaction(): id %ld started\n", transaction->id));
2688 	T(Action("start", cache, transaction));
2689 
2690 	hash_insert_grow(cache->transaction_hash, transaction);
2691 
2692 	return transaction->id;
2693 }
2694 
2695 
2696 status_t
2697 cache_sync_transaction(void* _cache, int32 id)
2698 {
2699 	block_cache* cache = (block_cache*)_cache;
2700 	bool hadBusy;
2701 
2702 	TRACE(("cache_sync_transaction(id %ld)\n", id));
2703 
2704 	do {
2705 		TransactionLocker locker(cache);
2706 		hadBusy = false;
2707 
2708 		BlockWriter writer(cache);
2709 		hash_iterator iterator;
2710 		hash_open(cache->transaction_hash, &iterator);
2711 
2712 		cache_transaction* transaction;
2713 		while ((transaction = (cache_transaction*)hash_next(
2714 				cache->transaction_hash, &iterator)) != NULL) {
2715 			// close all earlier transactions which haven't been closed yet
2716 
2717 			if (transaction->busy_writing_count != 0) {
2718 				hadBusy = true;
2719 				continue;
2720 			}
2721 			if (transaction->id <= id && !transaction->open) {
2722 				// write back all of their remaining dirty blocks
2723 				T(Action("sync", cache, transaction));
2724 
2725 				bool hasLeftOvers;
2726 				writer.Add(transaction, &iterator, hasLeftOvers);
2727 
2728 				if (hasLeftOvers) {
2729 					// This transaction contains blocks that a previous
2730 					// transaction is trying to write back in this write run
2731 					hadBusy = true;
2732 				}
2733 			}
2734 		}
2735 
2736 		hash_close(cache->transaction_hash, &iterator, false);
2737 
2738 		status_t status = writer.Write();
2739 		if (status != B_OK)
2740 			return status;
2741 	} while (hadBusy);
2742 
2743 	wait_for_notifications(cache);
2744 		// make sure that all pending TRANSACTION_WRITTEN notifications
2745 		// are handled after we return
2746 	return B_OK;
2747 }
2748 
2749 
2750 status_t
2751 cache_end_transaction(void* _cache, int32 id,
2752 	transaction_notification_hook hook, void* data)
2753 {
2754 	block_cache* cache = (block_cache*)_cache;
2755 	TransactionLocker locker(cache);
2756 
2757 	TRACE(("cache_end_transaction(id = %ld)\n", id));
2758 
2759 	cache_transaction* transaction = lookup_transaction(cache, id);
2760 	if (transaction == NULL) {
2761 		panic("cache_end_transaction(): invalid transaction ID\n");
2762 		return B_BAD_VALUE;
2763 	}
2764 
2765 	// Write back all pending transaction blocks
2766 
2767 	BlockWriter writer(cache);
2768 	cached_block* block = transaction->first_block;
2769 	for (; block != NULL; block = block->transaction_next) {
2770 		if (block->previous_transaction != NULL) {
2771 			// need to write back pending changes
2772 			writer.Add(block);
2773 		}
2774 	}
2775 
2776 	status_t status = writer.Write();
2777 	if (status != B_OK)
2778 		return status;
2779 
2780 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2781 
2782 	if (hook != NULL
2783 		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2784 			hook, data) != B_OK) {
2785 		return B_NO_MEMORY;
2786 	}
2787 
2788 	T(Action("end", cache, transaction));
2789 
2790 	// iterate through all blocks and free the unchanged original contents
2791 
2792 	cached_block* next;
2793 	for (block = transaction->first_block; block != NULL; block = next) {
2794 		next = block->transaction_next;
2795 		ASSERT(block->previous_transaction == NULL);
2796 
2797 		if (block->discard) {
2798 			// This block has been discarded in the transaction
2799 			cache->DiscardBlock(block);
2800 			transaction->num_blocks--;
2801 			continue;
2802 		}
2803 
2804 		if (block->original_data != NULL) {
2805 			cache->Free(block->original_data);
2806 			block->original_data = NULL;
2807 		}
2808 		if (transaction->has_sub_transaction) {
2809 			if (block->parent_data != block->current_data)
2810 				cache->Free(block->parent_data);
2811 			block->parent_data = NULL;
2812 		}
2813 
2814 		// move the block to the previous transaction list
2815 		transaction->blocks.Add(block);
2816 
2817 		block->previous_transaction = transaction;
2818 		block->transaction_next = NULL;
2819 		block->transaction = NULL;
2820 	}
2821 
2822 	transaction->open = false;
2823 	return B_OK;
2824 }
2825 
2826 
2827 status_t
2828 cache_abort_transaction(void* _cache, int32 id)
2829 {
2830 	block_cache* cache = (block_cache*)_cache;
2831 	TransactionLocker locker(cache);
2832 
2833 	TRACE(("cache_abort_transaction(id = %ld)\n", id));
2834 
2835 	cache_transaction* transaction = lookup_transaction(cache, id);
2836 	if (transaction == NULL) {
2837 		panic("cache_abort_transaction(): invalid transaction ID\n");
2838 		return B_BAD_VALUE;
2839 	}
2840 
2841 	T(Abort(cache, transaction));
2842 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2843 
2844 	// iterate through all blocks and restore their original contents
2845 
2846 	cached_block* block = transaction->first_block;
2847 	cached_block* next;
2848 	for (; block != NULL; block = next) {
2849 		next = block->transaction_next;
2850 
2851 		if (block->original_data != NULL) {
2852 			TRACE(("cache_abort_transaction(id = %ld): restored contents of "
2853 				"block %Ld\n", transaction->id, block->block_number));
2854 			memcpy(block->current_data, block->original_data,
2855 				cache->block_size);
2856 			cache->Free(block->original_data);
2857 			block->original_data = NULL;
2858 		}
2859 		if (transaction->has_sub_transaction) {
2860 			if (block->parent_data != block->current_data)
2861 				cache->Free(block->parent_data);
2862 			block->parent_data = NULL;
2863 		}
2864 
2865 		block->transaction_next = NULL;
2866 		block->transaction = NULL;
2867 		block->discard = false;
2868 	}
2869 
2870 	hash_remove(cache->transaction_hash, transaction);
2871 	delete_transaction(cache, transaction);
2872 	return B_OK;
2873 }
2874 
2875 
2876 /*!	Acknowledges the current parent transaction, and starts a new transaction
2877 	from its sub transaction.
2878 	The new transaction also gets a new transaction ID.
2879 */
2880 int32
2881 cache_detach_sub_transaction(void* _cache, int32 id,
2882 	transaction_notification_hook hook, void* data)
2883 {
2884 	block_cache* cache = (block_cache*)_cache;
2885 	TransactionLocker locker(cache);
2886 
2887 	TRACE(("cache_detach_sub_transaction(id = %ld)\n", id));
2888 
2889 	cache_transaction* transaction = lookup_transaction(cache, id);
2890 	if (transaction == NULL) {
2891 		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2892 		return B_BAD_VALUE;
2893 	}
2894 	if (!transaction->has_sub_transaction)
2895 		return B_BAD_VALUE;
2896 
2897 	// iterate through all blocks and free the unchanged original contents
2898 
2899 	BlockWriter writer(cache);
2900 	cached_block* block = transaction->first_block;
2901 	for (; block != NULL; block = block->transaction_next) {
2902 		if (block->previous_transaction != NULL) {
2903 			// need to write back pending changes
2904 			writer.Add(block);
2905 		}
2906 	}
2907 
2908 	status_t status = writer.Write();
2909 	if (status != B_OK)
2910 		return status;
2911 
2912 	// create a new transaction for the sub transaction
2913 	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
2914 	if (newTransaction == NULL)
2915 		return B_NO_MEMORY;
2916 
2917 	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2918 	T(Detach(cache, transaction, newTransaction));
2919 
2920 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2921 
2922 	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2923 			data) != B_OK) {
2924 		delete newTransaction;
2925 		return B_NO_MEMORY;
2926 	}
2927 
2928 	cached_block* last = NULL;
2929 	cached_block* next;
2930 	for (block = transaction->first_block; block != NULL; block = next) {
2931 		next = block->transaction_next;
2932 		ASSERT(block->previous_transaction == NULL);
2933 
2934 		if (block->discard) {
2935 			cache->DiscardBlock(block);
2936 			transaction->main_num_blocks--;
2937 			continue;
2938 		}
2939 
2940 		if (block->parent_data != NULL) {
2941 			// The block changed in the parent - free the original data, since
2942 			// they will be replaced by what is in current.
2943 			ASSERT(block->original_data != NULL);
2944 			cache->Free(block->original_data);
2945 
2946 			if (block->parent_data != block->current_data) {
2947 				// The block had been changed in both transactions
2948 				block->original_data = block->parent_data;
2949 			} else {
2950 				// The block has only been changed in the parent
2951 				block->original_data = NULL;
2952 			}
2953 
2954 			// move the block to the previous transaction list
2955 			transaction->blocks.Add(block);
2956 			block->previous_transaction = transaction;
2957 			block->parent_data = NULL;
2958 		}
2959 
2960 		if (block->original_data != NULL) {
2961 			// This block had been changed in the current sub transaction,
2962 			// we need to move this block over to the new transaction.
2963 			ASSERT(block->parent_data == NULL);
2964 
2965 			if (last == NULL)
2966 				newTransaction->first_block = block;
2967 			else
2968 				last->transaction_next = block;
2969 
2970 			block->transaction = newTransaction;
2971 			last = block;
2972 		} else
2973 			block->transaction = NULL;
2974 
2975 		block->transaction_next = NULL;
2976 	}
2977 
2978 	newTransaction->num_blocks = transaction->sub_num_blocks;
2979 
2980 	transaction->open = false;
2981 	transaction->has_sub_transaction = false;
2982 	transaction->num_blocks = transaction->main_num_blocks;
2983 	transaction->sub_num_blocks = 0;
2984 
2985 	hash_insert_grow(cache->transaction_hash, newTransaction);
2986 	cache->last_transaction = newTransaction;
2987 
2988 	return newTransaction->id;
2989 }
2990 
2991 
2992 status_t
2993 cache_abort_sub_transaction(void* _cache, int32 id)
2994 {
2995 	block_cache* cache = (block_cache*)_cache;
2996 	TransactionLocker locker(cache);
2997 
2998 	TRACE(("cache_abort_sub_transaction(id = %ld)\n", id));
2999 
3000 	cache_transaction* transaction = lookup_transaction(cache, id);
3001 	if (transaction == NULL) {
3002 		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3003 		return B_BAD_VALUE;
3004 	}
3005 	if (!transaction->has_sub_transaction)
3006 		return B_BAD_VALUE;
3007 
3008 	T(Abort(cache, transaction));
3009 	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3010 
3011 	// revert all changes back to the version of the parent
3012 
3013 	cached_block* block = transaction->first_block;
3014 	cached_block* next;
3015 	for (; block != NULL; block = next) {
3016 		next = block->transaction_next;
3017 
3018 		if (block->parent_data == NULL) {
3019 			// the parent transaction didn't change the block, but the sub
3020 			// transaction did - we need to revert from the original data
3021 			ASSERT(block->original_data != NULL);
3022 			memcpy(block->current_data, block->original_data,
3023 				cache->block_size);
3024 		} else if (block->parent_data != block->current_data) {
3025 			// the block has been changed and must be restored
3026 			TRACE(("cache_abort_sub_transaction(id = %ld): restored contents "
3027 				"of block %Ld\n", transaction->id, block->block_number));
3028 			memcpy(block->current_data, block->parent_data, cache->block_size);
3029 			cache->Free(block->parent_data);
3030 		}
3031 
3032 		block->parent_data = NULL;
3033 		block->discard = false;
3034 	}
3035 
3036 	// all subsequent changes will go into the main transaction
3037 	transaction->has_sub_transaction = false;
3038 	transaction->sub_num_blocks = 0;
3039 
3040 	return B_OK;
3041 }
3042 
3043 
3044 status_t
3045 cache_start_sub_transaction(void* _cache, int32 id)
3046 {
3047 	block_cache* cache = (block_cache*)_cache;
3048 	TransactionLocker locker(cache);
3049 
3050 	TRACE(("cache_start_sub_transaction(id = %ld)\n", id));
3051 
3052 	cache_transaction* transaction = lookup_transaction(cache, id);
3053 	if (transaction == NULL) {
3054 		panic("cache_start_sub_transaction(): invalid transaction ID %ld\n",
3055 			id);
3056 		return B_BAD_VALUE;
3057 	}
3058 
3059 	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3060 
3061 	// move all changed blocks up to the parent
3062 
3063 	cached_block* block = transaction->first_block;
3064 	cached_block* last = NULL;
3065 	cached_block* next;
3066 	for (; block != NULL; block = next) {
3067 		next = block->transaction_next;
3068 
3069 		if (block->discard) {
3070 			// This block has been discarded in the parent transaction
3071 			// TODO: this is wrong: since the parent transaction is not
3072 			// finished here (just extended), this block cannot be reverted
3073 			// anymore in case the parent transaction is aborted!!!
3074 			if (last != NULL)
3075 				last->transaction_next = next;
3076 			else
3077 				transaction->first_block = next;
3078 
3079 			cache->DiscardBlock(block);
3080 			transaction->num_blocks--;
3081 			continue;
3082 		}
3083 
3084 		if (transaction->has_sub_transaction
3085 			&& block->parent_data != NULL
3086 			&& block->parent_data != block->current_data) {
3087 			// there already is an older sub transaction - we acknowledge
3088 			// its changes and move its blocks up to the parent
3089 			cache->Free(block->parent_data);
3090 		}
3091 
3092 		// we "allocate" the parent data lazily, that means, we don't copy
3093 		// the data (and allocate memory for it) until we need to
3094 		block->parent_data = block->current_data;
3095 		last = block;
3096 	}
3097 
3098 	// all subsequent changes will go into the sub transaction
3099 	transaction->has_sub_transaction = true;
3100 	transaction->main_num_blocks = transaction->num_blocks;
3101 	transaction->sub_num_blocks = 0;
3102 	T(Action("start-sub", cache, transaction));
3103 
3104 	return B_OK;
3105 }
3106 
3107 
3108 /*!	Adds a transaction listener that gets notified when the transaction
3109 	is ended, aborted, written, or idle as specified by \a events.
3110 	The listener gets automatically removed when the transaction ends.
3111 */
3112 status_t
3113 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3114 	transaction_notification_hook hook, void* data)
3115 {
3116 	block_cache* cache = (block_cache*)_cache;
3117 	TransactionLocker locker(cache);
3118 
3119 	cache_transaction* transaction = lookup_transaction(cache, id);
3120 	if (transaction == NULL)
3121 		return B_BAD_VALUE;
3122 
3123 	return add_transaction_listener(cache, transaction, events, hook, data);
3124 }
3125 
3126 
3127 status_t
3128 cache_remove_transaction_listener(void* _cache, int32 id,
3129 	transaction_notification_hook hookFunction, void* data)
3130 {
3131 	block_cache* cache = (block_cache*)_cache;
3132 	TransactionLocker locker(cache);
3133 
3134 	cache_transaction* transaction = lookup_transaction(cache, id);
3135 	if (transaction == NULL)
3136 		return B_BAD_VALUE;
3137 
3138 	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3139 	while (iterator.HasNext()) {
3140 		cache_listener* listener = iterator.Next();
3141 		if (listener->data == data && listener->hook == hookFunction) {
3142 			iterator.Remove();
3143 
3144 			if (listener->events_pending != 0) {
3145 				MutexLocker _(sNotificationsLock);
3146 				if (listener->events_pending != 0)
3147 					cache->pending_notifications.Remove(listener);
3148 			}
3149 			delete listener;
3150 			return B_OK;
3151 		}
3152 	}
3153 
3154 	return B_ENTRY_NOT_FOUND;
3155 }
3156 
3157 
3158 status_t
3159 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3160 	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3161 {
3162 	cached_block* block = (cached_block*)*_cookie;
3163 	block_cache* cache = (block_cache*)_cache;
3164 	TransactionLocker locker(cache);
3165 
3166 	cache_transaction* transaction = lookup_transaction(cache, id);
3167 	if (transaction == NULL || !transaction->open)
3168 		return B_BAD_VALUE;
3169 
3170 	if (block == NULL)
3171 		block = transaction->first_block;
3172 	else
3173 		block = block->transaction_next;
3174 
3175 	if (transaction->has_sub_transaction) {
3176 		if (mainOnly) {
3177 			// find next block that the parent changed
3178 			while (block != NULL && block->parent_data == NULL)
3179 				block = block->transaction_next;
3180 		} else {
3181 			// find next non-discarded block
3182 			while (block != NULL && block->discard)
3183 				block = block->transaction_next;
3184 		}
3185 	}
3186 
3187 	if (block == NULL)
3188 		return B_ENTRY_NOT_FOUND;
3189 
3190 	if (_blockNumber)
3191 		*_blockNumber = block->block_number;
3192 	if (_data)
3193 		*_data = mainOnly ? block->parent_data : block->current_data;
3194 	if (_unchangedData)
3195 		*_unchangedData = block->original_data;
3196 
3197 	*_cookie = (addr_t)block;
3198 	return B_OK;
3199 }
3200 
3201 
3202 int32
3203 cache_blocks_in_transaction(void* _cache, int32 id)
3204 {
3205 	block_cache* cache = (block_cache*)_cache;
3206 	TransactionLocker locker(cache);
3207 
3208 	cache_transaction* transaction = lookup_transaction(cache, id);
3209 	if (transaction == NULL)
3210 		return B_BAD_VALUE;
3211 
3212 	return transaction->num_blocks;
3213 }
3214 
3215 
3216 /*!	Returns the number of blocks that are part of the main transaction. If this
3217 	transaction does not have a sub transaction yet, this is the same value as
3218 	cache_blocks_in_transaction() would return.
3219 */
3220 int32
3221 cache_blocks_in_main_transaction(void* _cache, int32 id)
3222 {
3223 	block_cache* cache = (block_cache*)_cache;
3224 	TransactionLocker locker(cache);
3225 
3226 	cache_transaction* transaction = lookup_transaction(cache, id);
3227 	if (transaction == NULL)
3228 		return B_BAD_VALUE;
3229 
3230 	if (transaction->has_sub_transaction)
3231 		return transaction->main_num_blocks;
3232 
3233 	return transaction->num_blocks;
3234 }
3235 
3236 
3237 int32
3238 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3239 {
3240 	block_cache* cache = (block_cache*)_cache;
3241 	TransactionLocker locker(cache);
3242 
3243 	cache_transaction* transaction = lookup_transaction(cache, id);
3244 	if (transaction == NULL)
3245 		return B_BAD_VALUE;
3246 
3247 	return transaction->sub_num_blocks;
3248 }
3249 
3250 
3251 //	#pragma mark - public block cache API
3252 
3253 
3254 void
3255 block_cache_delete(void* _cache, bool allowWrites)
3256 {
3257 	block_cache* cache = (block_cache*)_cache;
3258 
3259 	if (allowWrites)
3260 		block_cache_sync(cache);
3261 
3262 	mutex_lock(&sCachesLock);
3263 	sCaches.Remove(cache);
3264 	mutex_unlock(&sCachesLock);
3265 
3266 	mutex_lock(&cache->lock);
3267 
3268 	// wait for all blocks to become unbusy
3269 	wait_for_busy_reading_blocks(cache);
3270 	wait_for_busy_writing_blocks(cache);
3271 
3272 	// free all blocks
3273 
3274 	uint32 cookie = 0;
3275 	cached_block* block;
3276 	while ((block = (cached_block*)hash_remove_first(cache->hash,
3277 			&cookie)) != NULL) {
3278 		cache->FreeBlock(block);
3279 	}
3280 
3281 	// free all transactions (they will all be aborted)
3282 
3283 	cookie = 0;
3284 	cache_transaction* transaction;
3285 	while ((transaction = (cache_transaction*)hash_remove_first(
3286 			cache->transaction_hash, &cookie)) != NULL) {
3287 		delete transaction;
3288 	}
3289 
3290 	delete cache;
3291 }
3292 
3293 
3294 void*
3295 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3296 {
3297 	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3298 		readOnly);
3299 	if (cache == NULL)
3300 		return NULL;
3301 
3302 	if (cache->Init() != B_OK) {
3303 		delete cache;
3304 		return NULL;
3305 	}
3306 
3307 	MutexLocker _(sCachesLock);
3308 	sCaches.Add(cache);
3309 
3310 	return cache;
3311 }
3312 
3313 
3314 status_t
3315 block_cache_sync(void* _cache)
3316 {
3317 	block_cache* cache = (block_cache*)_cache;
3318 
3319 	// We will sync all dirty blocks to disk that have a completed
3320 	// transaction or no transaction only
3321 
3322 	MutexLocker locker(&cache->lock);
3323 
3324 	BlockWriter writer(cache);
3325 	hash_iterator iterator;
3326 	hash_open(cache->hash, &iterator);
3327 
3328 	cached_block* block;
3329 	while ((block = (cached_block*)hash_next(cache->hash, &iterator)) != NULL) {
3330 		if (block->CanBeWritten())
3331 			writer.Add(block);
3332 	}
3333 
3334 	hash_close(cache->hash, &iterator, false);
3335 
3336 	status_t status = writer.Write();
3337 
3338 	locker.Unlock();
3339 
3340 	wait_for_notifications(cache);
3341 		// make sure that all pending TRANSACTION_WRITTEN notifications
3342 		// are handled after we return
3343 	return status;
3344 }
3345 
3346 
3347 status_t
3348 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3349 {
3350 	block_cache* cache = (block_cache*)_cache;
3351 
3352 	// We will sync all dirty blocks to disk that have a completed
3353 	// transaction or no transaction only
3354 
3355 	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3356 		panic("block_cache_sync_etc: invalid block number %Ld (max %Ld)",
3357 			blockNumber, cache->max_blocks - 1);
3358 		return B_BAD_VALUE;
3359 	}
3360 
3361 	MutexLocker locker(&cache->lock);
3362 	BlockWriter writer(cache);
3363 
3364 	for (; numBlocks > 0; numBlocks--, blockNumber++) {
3365 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
3366 			&blockNumber);
3367 		if (block == NULL)
3368 			continue;
3369 
3370 		if (block->CanBeWritten())
3371 			writer.Add(block);
3372 	}
3373 
3374 	status_t status = writer.Write();
3375 
3376 	locker.Unlock();
3377 
3378 	wait_for_notifications(cache);
3379 		// make sure that all pending TRANSACTION_WRITTEN notifications
3380 		// are handled after we return
3381 	return status;
3382 }
3383 
3384 
3385 /*!	Discards a block from the current transaction or from the cache.
3386 	You have to call this function when you no longer use a block, ie. when it
3387 	might be reclaimed by the file cache in order to make sure they won't
3388 	interfere.
3389 */
3390 void
3391 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3392 {
3393 	// TODO: this could be a nice place to issue the ATA trim command
3394 	block_cache* cache = (block_cache*)_cache;
3395 	TransactionLocker locker(cache);
3396 
3397 	BlockWriter writer(cache);
3398 
3399 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3400 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
3401 			&blockNumber);
3402 		if (block != NULL && block->previous_transaction != NULL)
3403 			writer.Add(block);
3404 	}
3405 
3406 	writer.Write();
3407 		// TODO: this can fail, too!
3408 
3409 	blockNumber -= numBlocks;
3410 		// reset blockNumber to its original value
3411 
3412 	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3413 		cached_block* block = (cached_block*)hash_lookup(cache->hash,
3414 			&blockNumber);
3415 		if (block == NULL)
3416 			continue;
3417 
3418 		ASSERT(block->previous_transaction == NULL);
3419 
3420 		if (block->unused) {
3421 			cache->unused_blocks.Remove(block);
3422 			cache->unused_block_count--;
3423 			cache->RemoveBlock(block);
3424 		} else {
3425 			if (block->transaction != NULL && block->parent_data != NULL
3426 				&& block->parent_data != block->current_data) {
3427 				panic("Discarded block %Ld has already been changed in this "
3428 					"transaction!", blockNumber);
3429 			}
3430 
3431 			// mark it as discarded (in the current transaction only, if any)
3432 			block->discard = true;
3433 		}
3434 	}
3435 }
3436 
3437 
3438 status_t
3439 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3440 {
3441 	block_cache* cache = (block_cache*)_cache;
3442 	MutexLocker locker(&cache->lock);
3443 
3444 	if (cache->read_only) {
3445 		panic("tried to make block writable on a read-only cache!");
3446 		return B_ERROR;
3447 	}
3448 
3449 	// TODO: this can be done better!
3450 	void* block = get_writable_cached_block(cache, blockNumber,
3451 		blockNumber, 1, transaction, false);
3452 	if (block != NULL) {
3453 		put_cached_block((block_cache*)_cache, blockNumber);
3454 		return B_OK;
3455 	}
3456 
3457 	return B_ERROR;
3458 }
3459 
3460 
3461 void*
3462 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3463 	off_t length, int32 transaction)
3464 {
3465 	block_cache* cache = (block_cache*)_cache;
3466 	MutexLocker locker(&cache->lock);
3467 
3468 	TRACE(("block_cache_get_writable_etc(block = %Ld, transaction = %ld)\n",
3469 		blockNumber, transaction));
3470 	if (cache->read_only)
3471 		panic("tried to get writable block on a read-only cache!");
3472 
3473 	return get_writable_cached_block(cache, blockNumber, base, length,
3474 		transaction, false);
3475 }
3476 
3477 
3478 void*
3479 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3480 {
3481 	return block_cache_get_writable_etc(_cache, blockNumber,
3482 		blockNumber, 1, transaction);
3483 }
3484 
3485 
3486 void*
3487 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3488 {
3489 	block_cache* cache = (block_cache*)_cache;
3490 	MutexLocker locker(&cache->lock);
3491 
3492 	TRACE(("block_cache_get_empty(block = %Ld, transaction = %ld)\n",
3493 		blockNumber, transaction));
3494 	if (cache->read_only)
3495 		panic("tried to get empty writable block on a read-only cache!");
3496 
3497 	return get_writable_cached_block((block_cache*)_cache, blockNumber,
3498 		blockNumber, 1, transaction, true);
3499 }
3500 
3501 
3502 const void*
3503 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
3504 {
3505 	block_cache* cache = (block_cache*)_cache;
3506 	MutexLocker locker(&cache->lock);
3507 	bool allocated;
3508 
3509 	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
3510 	if (block == NULL)
3511 		return NULL;
3512 
3513 #if BLOCK_CACHE_DEBUG_CHANGED
3514 	if (block->compare == NULL)
3515 		block->compare = cache->Allocate();
3516 	if (block->compare != NULL)
3517 		memcpy(block->compare, block->current_data, cache->block_size);
3518 #endif
3519 	TB(Get(cache, block));
3520 
3521 	return block->current_data;
3522 }
3523 
3524 
3525 const void*
3526 block_cache_get(void* _cache, off_t blockNumber)
3527 {
3528 	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
3529 }
3530 
3531 
3532 /*!	Changes the internal status of a writable block to \a dirty. This can be
3533 	helpful in case you realize you don't need to change that block anymore
3534 	for whatever reason.
3535 
3536 	Note, you must only use this function on blocks that were acquired
3537 	writable!
3538 */
3539 status_t
3540 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3541 	int32 transaction)
3542 {
3543 	block_cache* cache = (block_cache*)_cache;
3544 	MutexLocker locker(&cache->lock);
3545 
3546 	cached_block* block = (cached_block*)hash_lookup(cache->hash,
3547 		&blockNumber);
3548 	if (block == NULL)
3549 		return B_BAD_VALUE;
3550 	if (block->is_dirty == dirty) {
3551 		// there is nothing to do for us
3552 		return B_OK;
3553 	}
3554 
3555 	// TODO: not yet implemented
3556 	if (dirty)
3557 		panic("block_cache_set_dirty(): not yet implemented that way!\n");
3558 
3559 	return B_OK;
3560 }
3561 
3562 
3563 void
3564 block_cache_put(void* _cache, off_t blockNumber)
3565 {
3566 	block_cache* cache = (block_cache*)_cache;
3567 	MutexLocker locker(&cache->lock);
3568 
3569 	put_cached_block(cache, blockNumber);
3570 }
3571 
3572