1 /*
2 * Copyright 2004-2020, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7 #include <block_cache.h>
8
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 #include <sys/uio.h>
14
15 #include <KernelExport.h>
16 #include <fs_cache.h>
17
18 #include <condition_variable.h>
19 #include <lock.h>
20 #include <low_resource_manager.h>
21 #include <slab/Slab.h>
22 #include <tracing.h>
23 #include <util/kernel_cpp.h>
24 #include <util/DoublyLinkedList.h>
25 #include <util/AutoLock.h>
26 #include <StackOrHeapArray.h>
27 #include <vm/vm_page.h>
28
29 #ifndef BUILDING_USERLAND_FS_SERVER
30 #include "IORequest.h"
31 #endif // !BUILDING_USERLAND_FS_SERVER
32 #include "kernel_debug_config.h"
33
34
35 // TODO: this is a naive but growing implementation to test the API:
36 // block reading/writing is not at all optimized for speed, it will
37 // just read and write single blocks.
38 // TODO: the retrieval/copy of the original data could be delayed until the
39 // new data must be written, ie. in low memory situations.
40
41 #ifdef _KERNEL_MODE
42 # define TRACE_ALWAYS(x...) dprintf(x)
43 #else
44 # define TRACE_ALWAYS(x...) printf(x)
45 #endif
46
47 //#define TRACE_BLOCK_CACHE
48 #ifdef TRACE_BLOCK_CACHE
49 # define TRACE(x) TRACE_ALWAYS(x)
50 #else
51 # define TRACE(x) ;
52 #endif
53
54
55 // This macro is used for fatal situations that are acceptable in a running
56 // system, like out of memory situations - should only panic for debugging.
57 #define FATAL(x) panic x
58
59 static const bigtime_t kTransactionIdleTime = 2000000LL;
60 // a transaction is considered idle after 2 seconds of inactivity
61
62
63 namespace {
64
65 struct cache_transaction;
66 struct cached_block;
67 struct block_cache;
68 typedef DoublyLinkedListLink<cached_block> block_link;
69
70 struct cached_block {
71 cached_block* next; // next in hash
72 cached_block* transaction_next;
73 block_link link;
74 off_t block_number;
75 void* current_data;
76 // The data that is seen by everyone using the API; this one is always
77 // present.
78 void* original_data;
79 // When in a transaction, this contains the original data from before
80 // the transaction.
81 void* parent_data;
82 // This is a lazily alloced buffer that represents the contents of the
83 // block in the parent transaction. It may point to current_data if the
84 // contents have been changed only in the parent transaction, or, if the
85 // block has been changed in the current sub transaction already, to a
86 // new block containing the contents changed in the parent transaction.
87 // If this is NULL, the block has not been changed in the parent
88 // transaction at all.
89 #if BLOCK_CACHE_DEBUG_CHANGED
90 void* compare;
91 #endif
92 int32 ref_count;
93 int32 last_accessed;
94 bool busy_reading : 1;
95 bool busy_writing : 1;
96 bool is_writing : 1;
97 // Block has been checked out for writing without transactions, and
98 // cannot be written back if set
99 bool is_dirty : 1;
100 bool unused : 1;
101 bool discard : 1;
102 bool busy_reading_waiters : 1;
103 bool busy_writing_waiters : 1;
104 cache_transaction* transaction;
105 // This is the current active transaction, if any, the block is
106 // currently in (meaning was changed as a part of it).
107 cache_transaction* previous_transaction;
108 // This is set to the last transaction that was ended containing this
109 // block. In this case, the block has not yet written back yet, and
110 // the changed data is either in current_data, or original_data -- the
111 // latter if the block is already being part of another transaction.
112 // There can only be one previous transaction, so when the active
113 // transaction ends, the changes of the previous transaction have to
114 // be written back before that transaction becomes the next previous
115 // transaction.
116
117 bool CanBeWritten() const;
LastAccess__anon7a06ab4d0111::cached_block118 int32 LastAccess() const
119 { return system_time() / 1000000L - last_accessed; }
120 };
121
122 typedef DoublyLinkedList<cached_block,
123 DoublyLinkedListMemberGetLink<cached_block,
124 &cached_block::link> > block_list;
125
126 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
127 static inline void* operator new(size_t size);
128 static inline void operator delete(void* block);
129
130 int32 transaction_id;
131 int32 events_pending;
132 int32 events;
133 transaction_notification_hook hook;
134 void* data;
135 bool delete_after_event;
136 };
137
138 typedef DoublyLinkedList<cache_notification> NotificationList;
139
140 static object_cache* sCacheNotificationCache;
141
142 struct cache_listener;
143 typedef DoublyLinkedListLink<cache_listener> listener_link;
144
145 struct cache_listener : cache_notification {
146 listener_link link;
147 };
148
149 typedef DoublyLinkedList<cache_listener,
150 DoublyLinkedListMemberGetLink<cache_listener,
151 &cache_listener::link> > ListenerList;
152
153 void*
operator new(size_t size)154 cache_notification::operator new(size_t size)
155 {
156 // We can't really know whether something is a cache_notification or a
157 // cache_listener at runtime, so we just use one object_cache for both
158 // with the size set to that of the (slightly larger) cache_listener.
159 // In practice, the vast majority of cache_notifications are really
160 // cache_listeners, so this is a more than acceptable trade-off.
161 ASSERT(size <= sizeof(cache_listener));
162 return object_cache_alloc(sCacheNotificationCache, 0);
163 }
164
165 void
operator delete(void * block)166 cache_notification::operator delete(void* block)
167 {
168 object_cache_free(sCacheNotificationCache, block, 0);
169 }
170
171
172 struct BlockHash {
173 typedef off_t KeyType;
174 typedef cached_block ValueType;
175
HashKey__anon7a06ab4d0111::BlockHash176 size_t HashKey(KeyType key) const
177 {
178 return key;
179 }
180
Hash__anon7a06ab4d0111::BlockHash181 size_t Hash(ValueType* block) const
182 {
183 return block->block_number;
184 }
185
Compare__anon7a06ab4d0111::BlockHash186 bool Compare(KeyType key, ValueType* block) const
187 {
188 return block->block_number == key;
189 }
190
GetLink__anon7a06ab4d0111::BlockHash191 ValueType*& GetLink(ValueType* value) const
192 {
193 return value->next;
194 }
195 };
196
197 typedef BOpenHashTable<BlockHash> BlockTable;
198
199
200 struct TransactionHash {
201 typedef int32 KeyType;
202 typedef cache_transaction ValueType;
203
HashKey__anon7a06ab4d0111::TransactionHash204 size_t HashKey(KeyType key) const
205 {
206 return key;
207 }
208
209 size_t Hash(ValueType* transaction) const;
210 bool Compare(KeyType key, ValueType* transaction) const;
211 ValueType*& GetLink(ValueType* value) const;
212 };
213
214 typedef BOpenHashTable<TransactionHash> TransactionTable;
215
216
217 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
218 BlockTable* hash;
219 mutex lock;
220 const int fd;
221 off_t max_blocks;
222 const size_t block_size;
223 int32 next_transaction_id;
224 cache_transaction* last_transaction;
225 TransactionTable* transaction_hash;
226
227 object_cache* buffer_cache;
228 block_list unused_blocks;
229 uint32 unused_block_count;
230
231 ConditionVariable busy_reading_condition;
232 uint32 busy_reading_count;
233 bool busy_reading_waiters;
234
235 ConditionVariable busy_writing_condition;
236 uint32 busy_writing_count;
237 bool busy_writing_waiters;
238
239 bigtime_t last_block_write;
240 bigtime_t last_block_write_duration;
241
242 uint32 num_dirty_blocks;
243 const bool read_only;
244
245 NotificationList pending_notifications;
246 ConditionVariable condition_variable;
247
248 block_cache(int fd, off_t numBlocks, size_t blockSize,
249 bool readOnly);
250 ~block_cache();
251
252 status_t Init();
253
254 void Free(void* buffer);
255 void* Allocate();
256 void FreeBlock(cached_block* block);
257 cached_block* NewBlock(off_t blockNumber);
258 void FreeBlockParentData(cached_block* block);
259
260 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
261 void RemoveBlock(cached_block* block);
262 void DiscardBlock(cached_block* block);
263
264 private:
265 static void _LowMemoryHandler(void* data, uint32 resources,
266 int32 level);
267 cached_block* _GetUnusedBlock();
268 };
269
270 struct cache_transaction {
271 cache_transaction();
272
273 cache_transaction* next;
274 int32 id;
275 int32 num_blocks;
276 int32 main_num_blocks;
277 int32 sub_num_blocks;
278 cached_block* first_block;
279 block_list blocks;
280 ListenerList listeners;
281 bool open;
282 bool has_sub_transaction;
283 bigtime_t last_used;
284 int32 busy_writing_count;
285 };
286
287
288 class BlockWriter {
289 public:
290 BlockWriter(block_cache* cache,
291 size_t max = SIZE_MAX);
292 ~BlockWriter();
293
294 bool Add(cached_block* block,
295 cache_transaction* transaction = NULL);
296 bool Add(cache_transaction* transaction,
297 bool& hasLeftOvers);
298
299 status_t Write(cache_transaction* transaction = NULL,
300 bool canUnlock = true);
301
DeletedTransaction() const302 bool DeletedTransaction() const
303 { return fDeletedTransaction; }
304
305 static status_t WriteBlock(block_cache* cache,
306 cached_block* block);
307
308 private:
309 void* _Data(cached_block* block) const;
310 status_t _WriteBlocks(cached_block** blocks, uint32 count);
311 void _BlockDone(cached_block* block,
312 cache_transaction* transaction);
313 void _UnmarkWriting(cached_block* block);
314
315 static int _CompareBlocks(const void* _blockA,
316 const void* _blockB);
317
318 private:
319 static const size_t kBufferSize = 64;
320
321 block_cache* fCache;
322 cached_block* fBuffer[kBufferSize];
323 cached_block** fBlocks;
324 size_t fCount;
325 size_t fTotal;
326 size_t fCapacity;
327 size_t fMax;
328 status_t fStatus;
329 bool fDeletedTransaction;
330 };
331
332
333 #ifndef BUILDING_USERLAND_FS_SERVER
334 class BlockPrefetcher {
335 public:
336 BlockPrefetcher(block_cache* cache, off_t fBlockNumber,
337 size_t numBlocks);
338 ~BlockPrefetcher();
339
340 status_t Allocate();
341 status_t ReadAsync(MutexLocker& cacheLocker);
342
NumAllocated()343 size_t NumAllocated() { return fNumAllocated; }
344
345 private:
346 static void _IOFinishedCallback(void* cookie, io_request* request,
347 status_t status, bool partialTransfer,
348 generic_size_t bytesTransferred);
349 void _IOFinished(status_t status, generic_size_t bytesTransferred);
350
351 void _RemoveAllocated(size_t unbusyCount, size_t removeCount);
352
353 private:
354 block_cache* fCache;
355 off_t fBlockNumber;
356 size_t fNumRequested;
357 size_t fNumAllocated;
358 cached_block** fBlocks;
359 generic_io_vec* fDestVecs;
360 };
361 #endif // !BUILDING_USERLAND_FS_SERVER
362
363
364 class TransactionLocking {
365 public:
Lock(block_cache * cache)366 inline bool Lock(block_cache* cache)
367 {
368 mutex_lock(&cache->lock);
369
370 while (cache->busy_writing_count != 0) {
371 // wait for all blocks to be written
372 ConditionVariableEntry entry;
373 cache->busy_writing_condition.Add(&entry);
374 cache->busy_writing_waiters = true;
375
376 mutex_unlock(&cache->lock);
377
378 entry.Wait();
379
380 mutex_lock(&cache->lock);
381 }
382
383 return true;
384 }
385
Unlock(block_cache * cache)386 inline void Unlock(block_cache* cache)
387 {
388 mutex_unlock(&cache->lock);
389 }
390 };
391
392 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
393
394 } // namespace
395
396
397 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
398 namespace BlockTracing {
399
400 class Action : public AbstractTraceEntry {
401 public:
Action(block_cache * cache,cached_block * block)402 Action(block_cache* cache, cached_block* block)
403 :
404 fCache(cache),
405 fBlockNumber(block->block_number),
406 fIsDirty(block->is_dirty),
407 fHasOriginal(block->original_data != NULL),
408 fHasParent(block->parent_data != NULL),
409 fTransactionID(-1),
410 fPreviousID(-1)
411 {
412 if (block->transaction != NULL)
413 fTransactionID = block->transaction->id;
414 if (block->previous_transaction != NULL)
415 fPreviousID = block->previous_transaction->id;
416 }
417
AddDump(TraceOutput & out)418 virtual void AddDump(TraceOutput& out)
419 {
420 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
421 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
422 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
423 fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
424 }
425
426 virtual const char* _Action() const = 0;
427
428 private:
429 block_cache* fCache;
430 uint64 fBlockNumber;
431 bool fIsDirty;
432 bool fHasOriginal;
433 bool fHasParent;
434 int32 fTransactionID;
435 int32 fPreviousID;
436 };
437
438 class Get : public Action {
439 public:
Get(block_cache * cache,cached_block * block)440 Get(block_cache* cache, cached_block* block)
441 :
442 Action(cache, block)
443 {
444 Initialized();
445 }
446
_Action() const447 virtual const char* _Action() const { return "get"; }
448 };
449
450 class Put : public Action {
451 public:
Put(block_cache * cache,cached_block * block)452 Put(block_cache* cache, cached_block* block)
453 :
454 Action(cache, block)
455 {
456 Initialized();
457 }
458
_Action() const459 virtual const char* _Action() const { return "put"; }
460 };
461
462 class Read : public Action {
463 public:
Read(block_cache * cache,cached_block * block)464 Read(block_cache* cache, cached_block* block)
465 :
466 Action(cache, block)
467 {
468 Initialized();
469 }
470
_Action() const471 virtual const char* _Action() const { return "read"; }
472 };
473
474 class Write : public Action {
475 public:
Write(block_cache * cache,cached_block * block)476 Write(block_cache* cache, cached_block* block)
477 :
478 Action(cache, block)
479 {
480 Initialized();
481 }
482
_Action() const483 virtual const char* _Action() const { return "write"; }
484 };
485
486 class Flush : public Action {
487 public:
Flush(block_cache * cache,cached_block * block,bool getUnused=false)488 Flush(block_cache* cache, cached_block* block, bool getUnused = false)
489 :
490 Action(cache, block),
491 fGetUnused(getUnused)
492 {
493 Initialized();
494 }
495
_Action() const496 virtual const char* _Action() const
497 { return fGetUnused ? "get-unused" : "flush"; }
498
499 private:
500 bool fGetUnused;
501 };
502
503 class Error : public AbstractTraceEntry {
504 public:
Error(block_cache * cache,uint64 blockNumber,const char * message,status_t status=B_OK)505 Error(block_cache* cache, uint64 blockNumber, const char* message,
506 status_t status = B_OK)
507 :
508 fCache(cache),
509 fBlockNumber(blockNumber),
510 fMessage(message),
511 fStatus(status)
512 {
513 Initialized();
514 }
515
AddDump(TraceOutput & out)516 virtual void AddDump(TraceOutput& out)
517 {
518 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
519 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
520 fStatus != B_OK ? strerror(fStatus) : "");
521 }
522
523 private:
524 block_cache* fCache;
525 uint64 fBlockNumber;
526 const char* fMessage;
527 status_t fStatus;
528 };
529
530 #if BLOCK_CACHE_BLOCK_TRACING >= 2
531 class BlockData : public AbstractTraceEntry {
532 public:
533 enum {
534 kCurrent = 0x01,
535 kParent = 0x02,
536 kOriginal = 0x04
537 };
538
BlockData(block_cache * cache,cached_block * block,const char * message)539 BlockData(block_cache* cache, cached_block* block, const char* message)
540 :
541 fCache(cache),
542 fSize(cache->block_size),
543 fBlockNumber(block->block_number),
544 fMessage(message)
545 {
546 _Allocate(fCurrent, block->current_data);
547 _Allocate(fParent, block->parent_data);
548 _Allocate(fOriginal, block->original_data);
549
550 #if KTRACE_PRINTF_STACK_TRACE
551 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
552 false);
553 #endif
554
555 Initialized();
556 }
557
AddDump(TraceOutput & out)558 virtual void AddDump(TraceOutput& out)
559 {
560 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
561 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
562 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
563 fMessage);
564 }
565
566 #if KTRACE_PRINTF_STACK_TRACE
DumpStackTrace(TraceOutput & out)567 virtual void DumpStackTrace(TraceOutput& out)
568 {
569 out.PrintStackTrace(fStackTrace);
570 }
571 #endif
572
DumpBlocks(uint32 which,uint32 offset,uint32 size)573 void DumpBlocks(uint32 which, uint32 offset, uint32 size)
574 {
575 if ((which & kCurrent) != 0)
576 DumpBlock(kCurrent, offset, size);
577 if ((which & kParent) != 0)
578 DumpBlock(kParent, offset, size);
579 if ((which & kOriginal) != 0)
580 DumpBlock(kOriginal, offset, size);
581 }
582
DumpBlock(uint32 which,uint32 offset,uint32 size)583 void DumpBlock(uint32 which, uint32 offset, uint32 size)
584 {
585 if (offset > fSize) {
586 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
587 return;
588 }
589 if (offset + size > fSize)
590 size = fSize - offset;
591
592 const char* label;
593 uint8* data;
594
595 if ((which & kCurrent) != 0) {
596 label = "current";
597 data = fCurrent;
598 } else if ((which & kParent) != 0) {
599 label = "parent";
600 data = fParent;
601 } else if ((which & kOriginal) != 0) {
602 label = "original";
603 data = fOriginal;
604 } else
605 return;
606
607 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
608
609 static const uint32 kBlockSize = 16;
610 data += offset;
611
612 for (uint32 i = 0; i < size;) {
613 int start = i;
614
615 kprintf(" %04" B_PRIx32 " ", i);
616 for (; i < start + kBlockSize; i++) {
617 if (!(i % 4))
618 kprintf(" ");
619
620 if (i >= size)
621 kprintf(" ");
622 else
623 kprintf("%02x", data[i]);
624 }
625
626 kprintf("\n");
627 }
628 }
629
630 private:
_Allocate(uint8 * & target,void * source)631 void _Allocate(uint8*& target, void* source)
632 {
633 if (source == NULL) {
634 target = NULL;
635 return;
636 }
637
638 target = alloc_tracing_buffer_memcpy(source, fSize, false);
639 }
640
641 block_cache* fCache;
642 uint32 fSize;
643 uint64 fBlockNumber;
644 const char* fMessage;
645 uint8* fCurrent;
646 uint8* fParent;
647 uint8* fOriginal;
648 #if KTRACE_PRINTF_STACK_TRACE
649 tracing_stack_trace* fStackTrace;
650 #endif
651 };
652 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2
653
654 } // namespace BlockTracing
655
656 # define TB(x) new(std::nothrow) BlockTracing::x;
657 #else
658 # define TB(x) ;
659 #endif
660
661 #if BLOCK_CACHE_BLOCK_TRACING >= 2
662 # define TB2(x) new(std::nothrow) BlockTracing::x;
663 #else
664 # define TB2(x) ;
665 #endif
666
667
668 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
669 namespace TransactionTracing {
670
671 class Action : public AbstractTraceEntry {
672 public:
Action(const char * label,block_cache * cache,cache_transaction * transaction)673 Action(const char* label, block_cache* cache,
674 cache_transaction* transaction)
675 :
676 fCache(cache),
677 fTransaction(transaction),
678 fID(transaction->id),
679 fSub(transaction->has_sub_transaction),
680 fNumBlocks(transaction->num_blocks),
681 fSubNumBlocks(transaction->sub_num_blocks)
682 {
683 strlcpy(fLabel, label, sizeof(fLabel));
684 Initialized();
685 }
686
AddDump(TraceOutput & out)687 virtual void AddDump(TraceOutput& out)
688 {
689 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
690 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
691 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
692 }
693
694 private:
695 char fLabel[12];
696 block_cache* fCache;
697 cache_transaction* fTransaction;
698 int32 fID;
699 bool fSub;
700 int32 fNumBlocks;
701 int32 fSubNumBlocks;
702 };
703
704 class Detach : public AbstractTraceEntry {
705 public:
Detach(block_cache * cache,cache_transaction * transaction,cache_transaction * newTransaction)706 Detach(block_cache* cache, cache_transaction* transaction,
707 cache_transaction* newTransaction)
708 :
709 fCache(cache),
710 fTransaction(transaction),
711 fID(transaction->id),
712 fSub(transaction->has_sub_transaction),
713 fNewTransaction(newTransaction),
714 fNewID(newTransaction->id)
715 {
716 Initialized();
717 }
718
AddDump(TraceOutput & out)719 virtual void AddDump(TraceOutput& out)
720 {
721 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
722 "from transaction %p (id %" B_PRId32 ")%s",
723 fCache, fNewTransaction, fNewID, fTransaction, fID,
724 fSub ? " sub" : "");
725 }
726
727 private:
728 block_cache* fCache;
729 cache_transaction* fTransaction;
730 int32 fID;
731 bool fSub;
732 cache_transaction* fNewTransaction;
733 int32 fNewID;
734 };
735
736 class Abort : public AbstractTraceEntry {
737 public:
Abort(block_cache * cache,cache_transaction * transaction)738 Abort(block_cache* cache, cache_transaction* transaction)
739 :
740 fCache(cache),
741 fTransaction(transaction),
742 fID(transaction->id),
743 fNumBlocks(0)
744 {
745 bool isSub = transaction->has_sub_transaction;
746 fNumBlocks = isSub ? transaction->sub_num_blocks
747 : transaction->num_blocks;
748 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
749 if (fBlocks != NULL) {
750 cached_block* block = transaction->first_block;
751 for (int32 i = 0; block != NULL && i < fNumBlocks;
752 block = block->transaction_next) {
753 fBlocks[i++] = block->block_number;
754 }
755 } else
756 fNumBlocks = 0;
757
758 #if KTRACE_PRINTF_STACK_TRACE
759 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
760 false);
761 #endif
762
763 Initialized();
764 }
765
AddDump(TraceOutput & out)766 virtual void AddDump(TraceOutput& out)
767 {
768 out.Print("block cache %p, abort transaction "
769 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
770 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
771 out.Print(" %" B_PRIdOFF, fBlocks[i]);
772 }
773
774 #if KTRACE_PRINTF_STACK_TRACE
DumpStackTrace(TraceOutput & out)775 virtual void DumpStackTrace(TraceOutput& out)
776 {
777 out.PrintStackTrace(fStackTrace);
778 }
779 #endif
780
781 private:
782 block_cache* fCache;
783 cache_transaction* fTransaction;
784 int32 fID;
785 off_t* fBlocks;
786 int32 fNumBlocks;
787 #if KTRACE_PRINTF_STACK_TRACE
788 tracing_stack_trace* fStackTrace;
789 #endif
790 };
791
792 } // namespace TransactionTracing
793
794 # define T(x) new(std::nothrow) TransactionTracing::x;
795 #else
796 # define T(x) ;
797 #endif
798
799
800 static DoublyLinkedList<block_cache> sCaches;
801 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
802 static mutex sCachesMemoryUseLock
803 = MUTEX_INITIALIZER("block caches memory use");
804 static size_t sUsedMemory;
805 static sem_id sEventSemaphore;
806 static mutex sNotificationsLock
807 = MUTEX_INITIALIZER("block cache notifications");
808 static thread_id sNotifierWriterThread;
809 static DoublyLinkedListLink<block_cache> sMarkCache;
810 // TODO: this only works if the link is the first entry of block_cache
811 static object_cache* sBlockCache;
812
813
814 static void mark_block_busy_reading(block_cache* cache, cached_block* block);
815 static void mark_block_unbusy_reading(block_cache* cache, cached_block* block);
816
817
818 // #pragma mark - notifications/listener
819
820
821 /*! Checks whether or not this is an event that closes a transaction. */
822 static inline bool
is_closing_event(int32 event)823 is_closing_event(int32 event)
824 {
825 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
826 }
827
828
829 static inline bool
is_written_event(int32 event)830 is_written_event(int32 event)
831 {
832 return (event & TRANSACTION_WRITTEN) != 0;
833 }
834
835
836 /*! From the specified \a notification, it will remove the lowest pending
837 event, and return that one in \a _event.
838 If there is no pending event anymore, it will return \c false.
839 */
840 static bool
get_next_pending_event(cache_notification * notification,int32 * _event)841 get_next_pending_event(cache_notification* notification, int32* _event)
842 {
843 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
844 int32 pending = atomic_and(¬ification->events_pending,
845 ~eventMask);
846
847 bool more = (pending & ~eventMask) != 0;
848
849 if ((pending & eventMask) != 0) {
850 *_event = eventMask;
851 return more;
852 }
853 }
854
855 return false;
856 }
857
858
859 static void
flush_pending_notifications(block_cache * cache)860 flush_pending_notifications(block_cache* cache)
861 {
862 ASSERT_LOCKED_MUTEX(&sCachesLock);
863
864 while (true) {
865 MutexLocker locker(sNotificationsLock);
866
867 cache_notification* notification = cache->pending_notifications.Head();
868 if (notification == NULL)
869 return;
870
871 bool deleteAfterEvent = false;
872 int32 event = -1;
873 if (!get_next_pending_event(notification, &event)) {
874 // remove the notification if this was the last pending event
875 cache->pending_notifications.Remove(notification);
876 deleteAfterEvent = notification->delete_after_event;
877 }
878
879 if (event >= 0) {
880 // Notify listener, we need to copy the notification, as it might
881 // be removed when we unlock the list.
882 cache_notification copy = *notification;
883 locker.Unlock();
884
885 copy.hook(copy.transaction_id, event, copy.data);
886
887 locker.Lock();
888 }
889
890 if (deleteAfterEvent)
891 delete notification;
892 }
893 }
894
895
896 /*! Flushes all pending notifications by calling the appropriate hook
897 functions.
898 Must not be called with a cache lock held.
899 */
900 static void
flush_pending_notifications()901 flush_pending_notifications()
902 {
903 MutexLocker _(sCachesLock);
904
905 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
906 while (iterator.HasNext()) {
907 block_cache* cache = iterator.Next();
908
909 flush_pending_notifications(cache);
910 }
911 }
912
913
914 /*! Initializes the \a notification as specified. */
915 static void
set_notification(cache_transaction * transaction,cache_notification & notification,int32 events,transaction_notification_hook hook,void * data)916 set_notification(cache_transaction* transaction,
917 cache_notification ¬ification, int32 events,
918 transaction_notification_hook hook, void* data)
919 {
920 notification.transaction_id = transaction != NULL ? transaction->id : -1;
921 notification.events_pending = 0;
922 notification.events = events;
923 notification.hook = hook;
924 notification.data = data;
925 notification.delete_after_event = false;
926 }
927
928
929 /*! Makes sure the notification is deleted. It either deletes it directly,
930 when possible, or marks it for deletion if the notification is pending.
931 */
932 static void
delete_notification(cache_notification * notification)933 delete_notification(cache_notification* notification)
934 {
935 MutexLocker locker(sNotificationsLock);
936
937 if (notification->events_pending != 0)
938 notification->delete_after_event = true;
939 else
940 delete notification;
941 }
942
943
944 /*! Adds the notification to the pending notifications list, or, if it's
945 already part of it, updates its events_pending field.
946 Also marks the notification to be deleted if \a deleteNotification
947 is \c true.
948 Triggers the notifier thread to run.
949 */
950 static void
add_notification(block_cache * cache,cache_notification * notification,int32 event,bool deleteNotification)951 add_notification(block_cache* cache, cache_notification* notification,
952 int32 event, bool deleteNotification)
953 {
954 if (notification->hook == NULL)
955 return;
956
957 int32 pending = atomic_or(¬ification->events_pending, event);
958 if (pending == 0) {
959 // not yet part of the notification list
960 MutexLocker locker(sNotificationsLock);
961 if (deleteNotification)
962 notification->delete_after_event = true;
963 cache->pending_notifications.Add(notification);
964 } else if (deleteNotification) {
965 // we might need to delete it ourselves if we're late
966 delete_notification(notification);
967 }
968
969 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
970 // We're probably still holding some locks that makes rescheduling
971 // not a good idea at this point.
972 }
973
974
975 /*! Notifies all interested listeners of this transaction about the \a event.
976 If \a event is a closing event (ie. TRANSACTION_ENDED, and
977 TRANSACTION_ABORTED), all listeners except those listening to
978 TRANSACTION_WRITTEN will be removed.
979 */
980 static void
notify_transaction_listeners(block_cache * cache,cache_transaction * transaction,int32 event)981 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
982 int32 event)
983 {
984 T(Action("notify", cache, transaction));
985
986 bool isClosing = is_closing_event(event);
987 bool isWritten = is_written_event(event);
988
989 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
990 while (iterator.HasNext()) {
991 cache_listener* listener = iterator.Next();
992
993 bool remove = (isClosing && !is_written_event(listener->events))
994 || (isWritten && is_written_event(listener->events));
995 if (remove)
996 iterator.Remove();
997
998 if ((listener->events & event) != 0)
999 add_notification(cache, listener, event, remove);
1000 else if (remove)
1001 delete_notification(listener);
1002 }
1003 }
1004
1005
1006 /*! Removes and deletes all listeners that are still monitoring this
1007 transaction.
1008 */
1009 static void
remove_transaction_listeners(block_cache * cache,cache_transaction * transaction)1010 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
1011 {
1012 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
1013 while (iterator.HasNext()) {
1014 cache_listener* listener = iterator.Next();
1015 iterator.Remove();
1016
1017 delete_notification(listener);
1018 }
1019 }
1020
1021
1022 static status_t
add_transaction_listener(block_cache * cache,cache_transaction * transaction,int32 events,transaction_notification_hook hookFunction,void * data)1023 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
1024 int32 events, transaction_notification_hook hookFunction, void* data)
1025 {
1026 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
1027 while (iterator.HasNext()) {
1028 cache_listener* listener = iterator.Next();
1029
1030 if (listener->data == data && listener->hook == hookFunction) {
1031 // this listener already exists, just update it
1032 listener->events |= events;
1033 return B_OK;
1034 }
1035 }
1036
1037 cache_listener* listener = new cache_listener;
1038 if (listener == NULL)
1039 return B_NO_MEMORY;
1040
1041 set_notification(transaction, *listener, events, hookFunction, data);
1042 transaction->listeners.Add(listener);
1043 return B_OK;
1044 }
1045
1046
1047 // #pragma mark - private transaction
1048
1049
cache_transaction()1050 cache_transaction::cache_transaction()
1051 {
1052 num_blocks = 0;
1053 main_num_blocks = 0;
1054 sub_num_blocks = 0;
1055 first_block = NULL;
1056 open = true;
1057 has_sub_transaction = false;
1058 last_used = system_time();
1059 busy_writing_count = 0;
1060 }
1061
1062
1063 static void
delete_transaction(block_cache * cache,cache_transaction * transaction)1064 delete_transaction(block_cache* cache, cache_transaction* transaction)
1065 {
1066 if (cache->last_transaction == transaction)
1067 cache->last_transaction = NULL;
1068
1069 remove_transaction_listeners(cache, transaction);
1070 delete transaction;
1071 }
1072
1073
1074 static cache_transaction*
lookup_transaction(block_cache * cache,int32 id)1075 lookup_transaction(block_cache* cache, int32 id)
1076 {
1077 return cache->transaction_hash->Lookup(id);
1078 }
1079
1080
Hash(cache_transaction * transaction) const1081 size_t TransactionHash::Hash(cache_transaction* transaction) const
1082 {
1083 return transaction->id;
1084 }
1085
1086
Compare(int32 key,cache_transaction * transaction) const1087 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
1088 {
1089 return transaction->id == key;
1090 }
1091
1092
GetLink(cache_transaction * value) const1093 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
1094 {
1095 return value->next;
1096 }
1097
1098
1099 /*! Writes back any changes made to blocks in \a transaction that are still
1100 part of a previous transacton.
1101 */
1102 static status_t
write_blocks_in_previous_transaction(block_cache * cache,cache_transaction * transaction)1103 write_blocks_in_previous_transaction(block_cache* cache,
1104 cache_transaction* transaction)
1105 {
1106 BlockWriter writer(cache);
1107
1108 cached_block* block = transaction->first_block;
1109 for (; block != NULL; block = block->transaction_next) {
1110 if (block->previous_transaction != NULL) {
1111 // need to write back pending changes
1112 writer.Add(block);
1113 }
1114 }
1115
1116 return writer.Write();
1117 }
1118
1119
1120 // #pragma mark - cached_block
1121
1122
1123 bool
CanBeWritten() const1124 cached_block::CanBeWritten() const
1125 {
1126 return !busy_writing && !busy_reading
1127 && (previous_transaction != NULL
1128 || (transaction == NULL && is_dirty && !is_writing));
1129 }
1130
1131
1132 // #pragma mark - BlockWriter
1133
1134
BlockWriter(block_cache * cache,size_t max)1135 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1136 :
1137 fCache(cache),
1138 fBlocks(fBuffer),
1139 fCount(0),
1140 fTotal(0),
1141 fCapacity(kBufferSize),
1142 fMax(max),
1143 fStatus(B_OK),
1144 fDeletedTransaction(false)
1145 {
1146 }
1147
1148
~BlockWriter()1149 BlockWriter::~BlockWriter()
1150 {
1151 if (fBlocks != fBuffer)
1152 free(fBlocks);
1153 }
1154
1155
1156 /*! Adds the specified block to the to be written array. If no more blocks can
1157 be added, false is returned, otherwise true.
1158 */
1159 bool
Add(cached_block * block,cache_transaction * transaction)1160 BlockWriter::Add(cached_block* block, cache_transaction* transaction)
1161 {
1162 ASSERT(block->CanBeWritten());
1163
1164 if (fTotal == fMax)
1165 return false;
1166
1167 if (fCount >= fCapacity) {
1168 // Enlarge array if necessary
1169 cached_block** newBlocks;
1170 size_t newCapacity = max_c(256, fCapacity * 2);
1171 if (fBlocks == fBuffer)
1172 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1173 else {
1174 newBlocks = (cached_block**)realloc(fBlocks,
1175 newCapacity * sizeof(void*));
1176 }
1177
1178 if (newBlocks == NULL) {
1179 // Allocating a larger array failed - we need to write back what
1180 // we have synchronously now (this will also clear the array)
1181 Write(transaction, false);
1182 } else {
1183 if (fBlocks == fBuffer)
1184 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1185
1186 fBlocks = newBlocks;
1187 fCapacity = newCapacity;
1188 }
1189 }
1190
1191 fBlocks[fCount++] = block;
1192 fTotal++;
1193 block->busy_writing = true;
1194 fCache->busy_writing_count++;
1195 if (block->previous_transaction != NULL)
1196 block->previous_transaction->busy_writing_count++;
1197
1198 return true;
1199 }
1200
1201
1202 /*! Adds all blocks of the specified transaction to the to be written array.
1203 If no more blocks can be added, false is returned, otherwise true.
1204 */
1205 bool
Add(cache_transaction * transaction,bool & hasLeftOvers)1206 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
1207 {
1208 ASSERT(!transaction->open);
1209
1210 if (transaction->busy_writing_count != 0) {
1211 hasLeftOvers = true;
1212 return true;
1213 }
1214
1215 hasLeftOvers = false;
1216
1217 block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1218 while (cached_block* block = blockIterator.Next()) {
1219 if (!block->CanBeWritten()) {
1220 // This block was already part of a previous transaction within this
1221 // writer
1222 hasLeftOvers = true;
1223 continue;
1224 }
1225 if (!Add(block, transaction))
1226 return false;
1227
1228 if (DeletedTransaction())
1229 break;
1230 }
1231
1232 return true;
1233 }
1234
1235
1236 /*! Cache must be locked when calling this method, but it will be unlocked
1237 while the blocks are written back.
1238 */
1239 status_t
Write(cache_transaction * transaction,bool canUnlock)1240 BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
1241 {
1242 if (fCount == 0)
1243 return B_OK;
1244
1245 if (canUnlock)
1246 mutex_unlock(&fCache->lock);
1247
1248 // Sort blocks in their on-disk order, so we can merge consecutive writes.
1249 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1250 fDeletedTransaction = false;
1251
1252 bigtime_t start = system_time();
1253
1254 for (uint32 i = 0; i < fCount; i++) {
1255 uint32 blocks = 1;
1256 for (; (i + blocks) < fCount && blocks < IOV_MAX; blocks++) {
1257 const uint32 j = i + blocks;
1258 if (fBlocks[j]->block_number != (fBlocks[j - 1]->block_number + 1))
1259 break;
1260 }
1261
1262 status_t status = _WriteBlocks(fBlocks + i, blocks);
1263 if (status != B_OK) {
1264 // propagate to global error handling
1265 if (fStatus == B_OK)
1266 fStatus = status;
1267
1268 for (uint32 j = i; j < (i + blocks); j++) {
1269 _UnmarkWriting(fBlocks[j]);
1270 fBlocks[j] = NULL;
1271 // This block will not be marked clean
1272 }
1273 }
1274
1275 i += (blocks - 1);
1276 }
1277
1278 bigtime_t finish = system_time();
1279
1280 if (canUnlock)
1281 mutex_lock(&fCache->lock);
1282
1283 if (fStatus == B_OK && fCount >= 8) {
1284 fCache->last_block_write = finish;
1285 fCache->last_block_write_duration = (fCache->last_block_write - start)
1286 / fCount;
1287 }
1288
1289 for (uint32 i = 0; i < fCount; i++)
1290 _BlockDone(fBlocks[i], transaction);
1291
1292 fCount = 0;
1293 return fStatus;
1294 }
1295
1296
1297 /*! Writes the specified \a block back to disk. It will always only write back
1298 the oldest change of the block if it is part of more than one transaction.
1299 It will automatically send out TRANSACTION_WRITTEN notices, as well as
1300 delete transactions when they are no longer used, and \a deleteTransaction
1301 is \c true.
1302 */
1303 /*static*/ status_t
WriteBlock(block_cache * cache,cached_block * block)1304 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1305 {
1306 BlockWriter writer(cache);
1307
1308 writer.Add(block);
1309 return writer.Write();
1310 }
1311
1312
1313 void*
_Data(cached_block * block) const1314 BlockWriter::_Data(cached_block* block) const
1315 {
1316 return block->previous_transaction != NULL && block->original_data != NULL
1317 ? block->original_data : block->current_data;
1318 // We first need to write back changes from previous transactions
1319 }
1320
1321
1322 status_t
_WriteBlocks(cached_block ** blocks,uint32 count)1323 BlockWriter::_WriteBlocks(cached_block** blocks, uint32 count)
1324 {
1325 const size_t blockSize = fCache->block_size;
1326
1327 BStackOrHeapArray<iovec, 8> vecs(count);
1328 for (uint32 i = 0; i < count; i++) {
1329 cached_block* block = blocks[i];
1330 ASSERT(block->busy_writing);
1331 ASSERT(i == 0 || block->block_number == (blocks[i - 1]->block_number + 1));
1332
1333 TRACE(("BlockWriter::_WriteBlocks(block %" B_PRIdOFF ", count %" B_PRIu32 ")\n",
1334 block->block_number, count));
1335 TB(Write(fCache, block));
1336 TB2(BlockData(fCache, block, "before write"));
1337
1338 vecs[i].iov_base = _Data(block);
1339 vecs[i].iov_len = blockSize;
1340 }
1341
1342 ssize_t written = writev_pos(fCache->fd,
1343 blocks[0]->block_number * blockSize, vecs, count);
1344
1345 if (written != (ssize_t)(blockSize * count)) {
1346 TB(Error(fCache, block->block_number, "write failed", written));
1347 TRACE_ALWAYS("could not write back %" B_PRIu32 " blocks (start block %" B_PRIdOFF "): %s\n",
1348 count, blocks[0]->block_number, strerror(errno));
1349 if (written < 0)
1350 return errno;
1351
1352 return B_IO_ERROR;
1353 }
1354
1355 return B_OK;
1356 }
1357
1358
1359 void
_BlockDone(cached_block * block,cache_transaction * transaction)1360 BlockWriter::_BlockDone(cached_block* block,
1361 cache_transaction* transaction)
1362 {
1363 if (block == NULL) {
1364 // An error occured when trying to write this block
1365 return;
1366 }
1367
1368 if (fCache->num_dirty_blocks > 0)
1369 fCache->num_dirty_blocks--;
1370
1371 if (_Data(block) == block->current_data)
1372 block->is_dirty = false;
1373
1374 _UnmarkWriting(block);
1375
1376 cache_transaction* previous = block->previous_transaction;
1377 if (previous != NULL) {
1378 previous->blocks.Remove(block);
1379 block->previous_transaction = NULL;
1380
1381 if (block->original_data != NULL && block->transaction == NULL) {
1382 // This block is not part of a transaction, so it does not need
1383 // its original pointer anymore.
1384 fCache->Free(block->original_data);
1385 block->original_data = NULL;
1386 }
1387
1388 // Has the previous transaction been finished with that write?
1389 if (--previous->num_blocks == 0) {
1390 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1391 T(Action("written", fCache, previous));
1392
1393 notify_transaction_listeners(fCache, previous,
1394 TRANSACTION_WRITTEN);
1395
1396 if (transaction != NULL) {
1397 // This function is called while iterating transaction_hash. We
1398 // use RemoveUnchecked so the iterator is still valid. A regular
1399 // Remove can trigger a resize of the hash table which would
1400 // result in the linked items in the table changing order.
1401 fCache->transaction_hash->RemoveUnchecked(transaction);
1402 } else
1403 fCache->transaction_hash->Remove(previous);
1404
1405 delete_transaction(fCache, previous);
1406 fDeletedTransaction = true;
1407 }
1408 }
1409 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1410 // the block is no longer used
1411 ASSERT(block->original_data == NULL && block->parent_data == NULL);
1412 block->unused = true;
1413 fCache->unused_blocks.Add(block);
1414 fCache->unused_block_count++;
1415 }
1416
1417 TB2(BlockData(fCache, block, "after write"));
1418 }
1419
1420
1421 void
_UnmarkWriting(cached_block * block)1422 BlockWriter::_UnmarkWriting(cached_block* block)
1423 {
1424 block->busy_writing = false;
1425 if (block->previous_transaction != NULL)
1426 block->previous_transaction->busy_writing_count--;
1427 fCache->busy_writing_count--;
1428
1429 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1430 || block->busy_writing_waiters) {
1431 fCache->busy_writing_waiters = false;
1432 block->busy_writing_waiters = false;
1433 fCache->busy_writing_condition.NotifyAll();
1434 }
1435 }
1436
1437
1438 /*static*/ int
_CompareBlocks(const void * _blockA,const void * _blockB)1439 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1440 {
1441 cached_block* blockA = *(cached_block**)_blockA;
1442 cached_block* blockB = *(cached_block**)_blockB;
1443
1444 off_t diff = blockA->block_number - blockB->block_number;
1445 if (diff > 0)
1446 return 1;
1447
1448 return diff < 0 ? -1 : 0;
1449 }
1450
1451
1452 #ifndef BUILDING_USERLAND_FS_SERVER
1453 // #pragma mark - BlockPrefetcher
1454
1455
BlockPrefetcher(block_cache * cache,off_t blockNumber,size_t numBlocks)1456 BlockPrefetcher::BlockPrefetcher(block_cache* cache, off_t blockNumber, size_t numBlocks)
1457 :
1458 fCache(cache),
1459 fBlockNumber(blockNumber),
1460 fNumRequested(numBlocks),
1461 fNumAllocated(0)
1462 {
1463 fBlocks = new cached_block*[numBlocks];
1464 fDestVecs = new generic_io_vec[numBlocks];
1465 }
1466
1467
~BlockPrefetcher()1468 BlockPrefetcher::~BlockPrefetcher()
1469 {
1470 delete[] fBlocks;
1471 delete[] fDestVecs;
1472 }
1473
1474
1475 /*! Allocates cached_block objects in preparation for prefetching.
1476 @return If an error is returned, then no blocks have been allocated.
1477 @post Blocks have been constructed (including allocating the current_data member)
1478 but current_data is uninitialized.
1479 */
1480 status_t
Allocate()1481 BlockPrefetcher::Allocate()
1482 {
1483 TRACE(("BlockPrefetcher::Allocate: looking up %" B_PRIuSIZE " blocks, starting with %"
1484 B_PRIdOFF "\n", fNumBlocks, fBlockNumber));
1485
1486 ASSERT_LOCKED_MUTEX(&fCache->lock);
1487
1488 size_t finalNumBlocks = fNumRequested;
1489
1490 // determine whether any requested blocks are already cached
1491 for (size_t i = 0; i < fNumRequested; ++i) {
1492 off_t blockNumIter = fBlockNumber + i;
1493 if (blockNumIter < 0 || blockNumIter >= fCache->max_blocks) {
1494 panic("BlockPrefetcher::Allocate: invalid block number %" B_PRIdOFF " (max %"
1495 B_PRIdOFF ")", blockNumIter, fCache->max_blocks - 1);
1496 return B_BAD_VALUE;
1497 }
1498 cached_block* block = fCache->hash->Lookup(blockNumIter);
1499 if (block != NULL) {
1500 // truncate the request
1501 TRACE(("BlockPrefetcher::Allocate: found an existing block (%" B_PRIdOFF ")\n",
1502 blockNumIter));
1503 fBlocks[i] = NULL;
1504 finalNumBlocks = i;
1505 break;
1506 }
1507 }
1508
1509 // allocate the blocks
1510 for (size_t i = 0; i < finalNumBlocks; ++i) {
1511 cached_block* block = fCache->NewBlock(fBlockNumber + i);
1512 if (block == NULL) {
1513 _RemoveAllocated(0, i);
1514 return B_NO_MEMORY;
1515 }
1516 fCache->hash->Insert(block);
1517
1518 block->unused = true;
1519 fCache->unused_blocks.Add(block);
1520 fCache->unused_block_count++;
1521
1522 fBlocks[i] = block;
1523 }
1524
1525 fNumAllocated = finalNumBlocks;
1526
1527 return B_OK;
1528 }
1529
1530
1531 /*! Schedules reads from disk to cache.
1532 \post The calling object will eventually be deleted by IOFinishedCallback.
1533 */
1534 status_t
ReadAsync(MutexLocker & cacheLocker)1535 BlockPrefetcher::ReadAsync(MutexLocker& cacheLocker)
1536 {
1537 TRACE(("BlockPrefetcher::Read: reading %" B_PRIuSIZE " blocks\n", fNumAllocated));
1538
1539 size_t blockSize = fCache->block_size;
1540 generic_io_vec* vecs = fDestVecs;
1541 for (size_t i = 0; i < fNumAllocated; ++i) {
1542 vecs[i].base = reinterpret_cast<generic_addr_t>(fBlocks[i]->current_data);
1543 vecs[i].length = blockSize;
1544 mark_block_busy_reading(fCache, fBlocks[i]);
1545 }
1546
1547 IORequest* request = new IORequest;
1548 status_t status = request->Init(fBlockNumber * blockSize, vecs, fNumAllocated,
1549 fNumAllocated * blockSize, false, B_DELETE_IO_REQUEST);
1550 if (status != B_OK) {
1551 TB(Error(fCache, fBlockNumber, "IORequest::Init starting here failed", status));
1552 TRACE_ALWAYS("BlockPrefetcher::Read: failed to initialize IO request for %" B_PRIuSIZE
1553 " blocks starting with %" B_PRIdOFF ": %s\n",
1554 fNumAllocated, fBlockNumber, strerror(status));
1555
1556 _RemoveAllocated(fNumAllocated, fNumAllocated);
1557 delete request;
1558 return status;
1559 }
1560
1561 request->SetFinishedCallback(_IOFinishedCallback, this);
1562
1563 // do_fd_io() may invoke callbacks directly, so we need to have unlocked the cache.
1564 cacheLocker.Unlock();
1565
1566 return do_fd_io(fCache->fd, request);
1567 }
1568
1569
1570 /*static*/ void
_IOFinishedCallback(void * cookie,io_request * request,status_t status,bool partialTransfer,generic_size_t bytesTransferred)1571 BlockPrefetcher::_IOFinishedCallback(void* cookie, io_request* request, status_t status,
1572 bool partialTransfer, generic_size_t bytesTransferred)
1573 {
1574 TRACE(("BlockPrefetcher::_IOFinishedCallback: status %s, partial %d\n",
1575 strerror(status), partialTransfer));
1576 ((BlockPrefetcher*)cookie)->_IOFinished(status, bytesTransferred);
1577 }
1578
1579
1580 void
_IOFinished(status_t status,generic_size_t bytesTransferred)1581 BlockPrefetcher::_IOFinished(status_t status, generic_size_t bytesTransferred)
1582 {
1583 MutexLocker locker(&fCache->lock);
1584
1585 if (bytesTransferred < (fNumAllocated * fCache->block_size)) {
1586 _RemoveAllocated(fNumAllocated, fNumAllocated);
1587
1588 TB(Error(cache, fBlockNumber, "prefetch starting here failed", status));
1589 TRACE_ALWAYS("BlockPrefetcher::_IOFinished: transferred only %" B_PRIuGENADDR
1590 " bytes in attempt to read %" B_PRIuSIZE " blocks (start block %" B_PRIdOFF "): %s\n",
1591 bytesTransferred, fNumAllocated, fBlockNumber, strerror(status));
1592 } else {
1593 for (size_t i = 0; i < fNumAllocated; i++) {
1594 TB(Read(cache, fBlockNumber + i));
1595 mark_block_unbusy_reading(fCache, fBlocks[i]);
1596 fBlocks[i]->last_accessed = system_time() / 1000000L;
1597 }
1598 }
1599
1600 delete this;
1601 }
1602
1603
1604 /*! Cleans up blocks that were allocated for prefetching when an in-progress prefetch
1605 is cancelled.
1606 */
1607 void
_RemoveAllocated(size_t unbusyCount,size_t removeCount)1608 BlockPrefetcher::_RemoveAllocated(size_t unbusyCount, size_t removeCount)
1609 {
1610 TRACE(("BlockPrefetcher::_RemoveAllocated: unbusy %" B_PRIuSIZE " and remove %" B_PRIuSIZE
1611 " starting with %" B_PRIdOFF "\n", unbusyCount, removeCount, (*fBlocks)->block_number));
1612
1613 ASSERT_LOCKED_MUTEX(&fCache->lock);
1614
1615 for (size_t i = 0; i < unbusyCount; ++i)
1616 mark_block_unbusy_reading(fCache, fBlocks[i]);
1617
1618 for (size_t i = 0; i < removeCount; ++i) {
1619 ASSERT(fBlocks[i]->is_dirty == false && fBlocks[i]->unused == true);
1620
1621 fCache->unused_blocks.Remove(fBlocks[i]);
1622 fCache->unused_block_count--;
1623
1624 fCache->RemoveBlock(fBlocks[i]);
1625 fBlocks[i] = NULL;
1626 }
1627
1628 fNumAllocated = 0;
1629
1630 return;
1631 }
1632 #endif // !BUILDING_USERLAND_FS_SERVER
1633
1634
1635 // #pragma mark - block_cache
1636
1637
block_cache(int _fd,off_t numBlocks,size_t blockSize,bool readOnly)1638 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1639 bool readOnly)
1640 :
1641 hash(NULL),
1642 fd(_fd),
1643 max_blocks(numBlocks),
1644 block_size(blockSize),
1645 next_transaction_id(1),
1646 last_transaction(NULL),
1647 transaction_hash(NULL),
1648 buffer_cache(NULL),
1649 unused_block_count(0),
1650 busy_reading_count(0),
1651 busy_reading_waiters(false),
1652 busy_writing_count(0),
1653 busy_writing_waiters(0),
1654 last_block_write(0),
1655 last_block_write_duration(0),
1656 num_dirty_blocks(0),
1657 read_only(readOnly)
1658 {
1659 }
1660
1661
1662 /*! Should be called with the cache's lock held. */
~block_cache()1663 block_cache::~block_cache()
1664 {
1665 unregister_low_resource_handler(&_LowMemoryHandler, this);
1666
1667 delete transaction_hash;
1668 delete hash;
1669
1670 delete_object_cache(buffer_cache);
1671
1672 mutex_destroy(&lock);
1673 }
1674
1675
1676 status_t
Init()1677 block_cache::Init()
1678 {
1679 busy_reading_condition.Init(this, "cache block busy_reading");
1680 busy_writing_condition.Init(this, "cache block busy writing");
1681 condition_variable.Init(this, "cache transaction sync");
1682 mutex_init(&lock, "block cache");
1683
1684 buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1685 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1686 if (buffer_cache == NULL)
1687 return B_NO_MEMORY;
1688
1689 hash = new BlockTable();
1690 if (hash == NULL || hash->Init(1024) != B_OK)
1691 return B_NO_MEMORY;
1692
1693 transaction_hash = new(std::nothrow) TransactionTable();
1694 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
1695 return B_NO_MEMORY;
1696
1697 return register_low_resource_handler(&_LowMemoryHandler, this,
1698 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1699 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1700 }
1701
1702
1703 void
Free(void * buffer)1704 block_cache::Free(void* buffer)
1705 {
1706 if (buffer != NULL)
1707 object_cache_free(buffer_cache, buffer, 0);
1708 }
1709
1710
1711 void*
Allocate()1712 block_cache::Allocate()
1713 {
1714 void* block = object_cache_alloc(buffer_cache, 0);
1715 if (block != NULL)
1716 return block;
1717
1718 // recycle existing before allocating a new one
1719 RemoveUnusedBlocks(100);
1720
1721 return object_cache_alloc(buffer_cache, 0);
1722 }
1723
1724
1725 void
FreeBlock(cached_block * block)1726 block_cache::FreeBlock(cached_block* block)
1727 {
1728 Free(block->current_data);
1729
1730 if (block->original_data != NULL || block->parent_data != NULL) {
1731 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1732 block->block_number, block->original_data, block->parent_data);
1733 }
1734
1735 #if BLOCK_CACHE_DEBUG_CHANGED
1736 Free(block->compare);
1737 #endif
1738
1739 object_cache_free(sBlockCache, block, 0);
1740 }
1741
1742
1743 /*! Allocates a new block for \a blockNumber, ready for use */
1744 cached_block*
NewBlock(off_t blockNumber)1745 block_cache::NewBlock(off_t blockNumber)
1746 {
1747 cached_block* block = NULL;
1748
1749 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1750 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1751 // recycle existing instead of allocating a new one
1752 block = _GetUnusedBlock();
1753 }
1754 if (block == NULL) {
1755 block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1756 if (block != NULL) {
1757 block->current_data = Allocate();
1758 if (block->current_data == NULL) {
1759 object_cache_free(sBlockCache, block, 0);
1760 return NULL;
1761 }
1762 } else {
1763 TB(Error(this, blockNumber, "allocation failed"));
1764 TRACE_ALWAYS("block allocation failed, unused list is %sempty.\n",
1765 unused_blocks.IsEmpty() ? "" : "not ");
1766
1767 // allocation failed, try to reuse an unused block
1768 block = _GetUnusedBlock();
1769 if (block == NULL) {
1770 TB(Error(this, blockNumber, "get unused failed"));
1771 FATAL(("could not allocate block!\n"));
1772 return NULL;
1773 }
1774 }
1775 }
1776
1777 block->block_number = blockNumber;
1778 block->ref_count = 0;
1779 block->last_accessed = 0;
1780 block->transaction_next = NULL;
1781 block->transaction = block->previous_transaction = NULL;
1782 block->original_data = NULL;
1783 block->parent_data = NULL;
1784 block->busy_reading = false;
1785 block->busy_writing = false;
1786 block->is_writing = false;
1787 block->is_dirty = false;
1788 block->unused = false;
1789 block->discard = false;
1790 block->busy_reading_waiters = false;
1791 block->busy_writing_waiters = false;
1792 #if BLOCK_CACHE_DEBUG_CHANGED
1793 block->compare = NULL;
1794 #endif
1795
1796 return block;
1797 }
1798
1799
1800 void
FreeBlockParentData(cached_block * block)1801 block_cache::FreeBlockParentData(cached_block* block)
1802 {
1803 ASSERT(block->parent_data != NULL);
1804 if (block->parent_data != block->current_data)
1805 Free(block->parent_data);
1806 block->parent_data = NULL;
1807 }
1808
1809
1810 void
RemoveUnusedBlocks(int32 count,int32 minSecondsOld)1811 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1812 {
1813 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1814
1815 for (block_list::Iterator iterator = unused_blocks.GetIterator();
1816 cached_block* block = iterator.Next();) {
1817 if (minSecondsOld >= block->LastAccess()) {
1818 // The list is sorted by last access
1819 break;
1820 }
1821 if (block->busy_reading || block->busy_writing)
1822 continue;
1823
1824 TB(Flush(this, block));
1825 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1826 block->block_number, block->last_accessed));
1827
1828 // this can only happen if no transactions are used
1829 if (block->is_dirty && !block->discard) {
1830 if (block->busy_writing)
1831 continue;
1832
1833 BlockWriter::WriteBlock(this, block);
1834 }
1835
1836 // remove block from lists
1837 iterator.Remove();
1838 unused_block_count--;
1839 RemoveBlock(block);
1840
1841 if (--count <= 0)
1842 break;
1843 }
1844 }
1845
1846
1847 void
RemoveBlock(cached_block * block)1848 block_cache::RemoveBlock(cached_block* block)
1849 {
1850 hash->Remove(block);
1851 FreeBlock(block);
1852 }
1853
1854
1855 /*! Discards the block from a transaction (this method must not be called
1856 for blocks not part of a transaction).
1857 */
1858 void
DiscardBlock(cached_block * block)1859 block_cache::DiscardBlock(cached_block* block)
1860 {
1861 ASSERT(block->discard);
1862 ASSERT(block->previous_transaction == NULL);
1863
1864 if (block->parent_data != NULL)
1865 FreeBlockParentData(block);
1866
1867 if (block->original_data != NULL) {
1868 Free(block->original_data);
1869 block->original_data = NULL;
1870 }
1871
1872 RemoveBlock(block);
1873 }
1874
1875
1876 void
_LowMemoryHandler(void * data,uint32 resources,int32 level)1877 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1878 {
1879 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1880
1881 // free some blocks according to the low memory state
1882 // (if there is enough memory left, we don't free any)
1883
1884 block_cache* cache = (block_cache*)data;
1885 if (cache->unused_block_count <= 1)
1886 return;
1887
1888 int32 free = 0;
1889 int32 secondsOld = 0;
1890 switch (level) {
1891 case B_NO_LOW_RESOURCE:
1892 return;
1893 case B_LOW_RESOURCE_NOTE:
1894 free = cache->unused_block_count / 4;
1895 secondsOld = 120;
1896 break;
1897 case B_LOW_RESOURCE_WARNING:
1898 free = cache->unused_block_count / 2;
1899 secondsOld = 10;
1900 break;
1901 case B_LOW_RESOURCE_CRITICAL:
1902 free = cache->unused_block_count - 1;
1903 secondsOld = 0;
1904 break;
1905 }
1906
1907 MutexLocker locker(&cache->lock);
1908
1909 if (!locker.IsLocked()) {
1910 // If our block_cache were deleted, it could be that we had
1911 // been called before that deletion went through, therefore,
1912 // acquiring its lock might fail.
1913 return;
1914 }
1915
1916 #ifdef TRACE_BLOCK_CACHE
1917 uint32 oldUnused = cache->unused_block_count;
1918 #endif
1919
1920 cache->RemoveUnusedBlocks(free, secondsOld);
1921
1922 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1923 cache, oldUnused, cache->unused_block_count));
1924 }
1925
1926
1927 cached_block*
_GetUnusedBlock()1928 block_cache::_GetUnusedBlock()
1929 {
1930 TRACE(("block_cache: get unused block\n"));
1931
1932 for (block_list::Iterator iterator = unused_blocks.GetIterator();
1933 cached_block* block = iterator.Next();) {
1934 TB(Flush(this, block, true));
1935 // this can only happen if no transactions are used
1936 if (block->is_dirty && !block->busy_writing && !block->discard)
1937 BlockWriter::WriteBlock(this, block);
1938
1939 // remove block from lists
1940 iterator.Remove();
1941 unused_block_count--;
1942 hash->Remove(block);
1943
1944 ASSERT(block->original_data == NULL && block->parent_data == NULL);
1945 block->unused = false;
1946
1947 // TODO: see if compare data is handled correctly here!
1948 #if BLOCK_CACHE_DEBUG_CHANGED
1949 if (block->compare != NULL)
1950 Free(block->compare);
1951 #endif
1952 return block;
1953 }
1954
1955 return NULL;
1956 }
1957
1958
1959 // #pragma mark - private block functions
1960
1961
1962 /*! Cache must be locked.
1963 */
1964 static void
mark_block_busy_reading(block_cache * cache,cached_block * block)1965 mark_block_busy_reading(block_cache* cache, cached_block* block)
1966 {
1967 block->busy_reading = true;
1968 cache->busy_reading_count++;
1969 }
1970
1971
1972 /*! Cache must be locked.
1973 */
1974 static void
mark_block_unbusy_reading(block_cache * cache,cached_block * block)1975 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1976 {
1977 block->busy_reading = false;
1978 cache->busy_reading_count--;
1979
1980 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1981 || block->busy_reading_waiters) {
1982 cache->busy_reading_waiters = false;
1983 block->busy_reading_waiters = false;
1984 cache->busy_reading_condition.NotifyAll();
1985 }
1986 }
1987
1988
1989 /*! Cache must be locked.
1990 */
1991 static void
wait_for_busy_reading_block(block_cache * cache,cached_block * block)1992 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1993 {
1994 while (block->busy_reading) {
1995 // wait for at least the specified block to be read in
1996 ConditionVariableEntry entry;
1997 cache->busy_reading_condition.Add(&entry);
1998 block->busy_reading_waiters = true;
1999
2000 mutex_unlock(&cache->lock);
2001 entry.Wait();
2002 mutex_lock(&cache->lock);
2003 }
2004 }
2005
2006
2007 /*! Cache must be locked.
2008 */
2009 static void
wait_for_busy_reading_blocks(block_cache * cache)2010 wait_for_busy_reading_blocks(block_cache* cache)
2011 {
2012 while (cache->busy_reading_count != 0) {
2013 // wait for all blocks to be read in
2014 ConditionVariableEntry entry;
2015 cache->busy_reading_condition.Add(&entry);
2016 cache->busy_reading_waiters = true;
2017
2018 mutex_unlock(&cache->lock);
2019 entry.Wait();
2020 mutex_lock(&cache->lock);
2021 }
2022 }
2023
2024
2025 /*! Cache must be locked.
2026 */
2027 static void
wait_for_busy_writing_block(block_cache * cache,cached_block * block)2028 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
2029 {
2030 while (block->busy_writing) {
2031 // wait for all blocks to be written back
2032 ConditionVariableEntry entry;
2033 cache->busy_writing_condition.Add(&entry);
2034 block->busy_writing_waiters = true;
2035
2036 mutex_unlock(&cache->lock);
2037 entry.Wait();
2038 mutex_lock(&cache->lock);
2039 }
2040 }
2041
2042
2043 /*! Cache must be locked.
2044 */
2045 static void
wait_for_busy_writing_blocks(block_cache * cache)2046 wait_for_busy_writing_blocks(block_cache* cache)
2047 {
2048 while (cache->busy_writing_count != 0) {
2049 // wait for all blocks to be written back
2050 ConditionVariableEntry entry;
2051 cache->busy_writing_condition.Add(&entry);
2052 cache->busy_writing_waiters = true;
2053
2054 mutex_unlock(&cache->lock);
2055 entry.Wait();
2056 mutex_lock(&cache->lock);
2057 }
2058 }
2059
2060
2061 /*! Removes a reference from the specified \a block. If this was the last
2062 reference, the block is moved into the unused list.
2063 In low memory situations, it will also free some blocks from that list,
2064 but not necessarily the \a block it just released.
2065 */
2066 static void
put_cached_block(block_cache * cache,cached_block * block)2067 put_cached_block(block_cache* cache, cached_block* block)
2068 {
2069 #if BLOCK_CACHE_DEBUG_CHANGED
2070 if (!block->is_dirty && block->compare != NULL
2071 && memcmp(block->current_data, block->compare, cache->block_size)) {
2072 TRACE_ALWAYS("new block:\n");
2073 dump_block((const char*)block->current_data, 256, " ");
2074 TRACE_ALWAYS("unchanged block:\n");
2075 dump_block((const char*)block->compare, 256, " ");
2076 BlockWriter::WriteBlock(cache, block);
2077 panic("block_cache: supposed to be clean block was changed!\n");
2078
2079 cache->Free(block->compare);
2080 block->compare = NULL;
2081 }
2082 #endif
2083 TB(Put(cache, block));
2084
2085 if (block->ref_count < 1) {
2086 panic("Invalid ref_count for block %p, cache %p\n", block, cache);
2087 return;
2088 }
2089
2090 if (--block->ref_count == 0
2091 && block->transaction == NULL && block->previous_transaction == NULL) {
2092 // This block is not used anymore, and not part of any transaction
2093 block->is_writing = false;
2094
2095 if (block->discard) {
2096 cache->RemoveBlock(block);
2097 } else {
2098 // put this block in the list of unused blocks
2099 ASSERT(!block->unused);
2100 block->unused = true;
2101
2102 ASSERT(block->original_data == NULL && block->parent_data == NULL);
2103 cache->unused_blocks.Add(block);
2104 cache->unused_block_count++;
2105 }
2106 }
2107 }
2108
2109
2110 static void
put_cached_block(block_cache * cache,off_t blockNumber)2111 put_cached_block(block_cache* cache, off_t blockNumber)
2112 {
2113 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
2114 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
2115 blockNumber, cache->max_blocks - 1);
2116 }
2117
2118 cached_block* block = cache->hash->Lookup(blockNumber);
2119 if (block != NULL)
2120 put_cached_block(cache, block);
2121 else {
2122 TB(Error(cache, blockNumber, "put unknown"));
2123 }
2124 }
2125
2126
2127 /*! Retrieves the block \a blockNumber from the hash table, if it's already
2128 there, or reads it from the disk.
2129 You need to have the cache locked when calling this function.
2130
2131 \param _allocated tells you whether or not a new block has been allocated
2132 to satisfy your request.
2133 \param readBlock if \c false, the block will not be read in case it was
2134 not already in the cache. The block you retrieve may contain random
2135 data. If \c true, the cache will be temporarily unlocked while the
2136 block is read in.
2137 */
2138 static status_t
get_cached_block(block_cache * cache,off_t blockNumber,bool * _allocated,bool readBlock,cached_block ** _block)2139 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
2140 bool readBlock, cached_block** _block)
2141 {
2142 ASSERT_LOCKED_MUTEX(&cache->lock);
2143
2144 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
2145 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
2146 blockNumber, cache->max_blocks - 1);
2147 return B_BAD_VALUE;
2148 }
2149
2150 retry:
2151 cached_block* block = cache->hash->Lookup(blockNumber);
2152 *_allocated = false;
2153
2154 if (block == NULL) {
2155 // put block into cache
2156 block = cache->NewBlock(blockNumber);
2157 if (block == NULL)
2158 return B_NO_MEMORY;
2159
2160 cache->hash->Insert(block);
2161 *_allocated = true;
2162 } else if (block->busy_reading) {
2163 // The block is currently busy_reading - wait and try again later
2164 wait_for_busy_reading_block(cache, block);
2165
2166 // The block may have been deleted or replaced in the meantime,
2167 // so we must look it up in the hash again after waiting.
2168 goto retry;
2169 }
2170
2171 if (block->unused) {
2172 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
2173 block->unused = false;
2174 cache->unused_blocks.Remove(block);
2175 cache->unused_block_count--;
2176 }
2177
2178 if (*_allocated && readBlock) {
2179 // read block into cache
2180 int32 blockSize = cache->block_size;
2181
2182 mark_block_busy_reading(cache, block);
2183 mutex_unlock(&cache->lock);
2184
2185 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
2186 block->current_data, blockSize);
2187
2188 mutex_lock(&cache->lock);
2189 if (bytesRead < blockSize) {
2190 cache->RemoveBlock(block);
2191 TB(Error(cache, blockNumber, "read failed", bytesRead));
2192
2193 TRACE_ALWAYS("could not read block %" B_PRIdOFF ": bytesRead: %zd,"
2194 " error: %s\n", blockNumber, bytesRead, strerror(errno));
2195 return errno;
2196 }
2197 TB(Read(cache, block));
2198
2199 mark_block_unbusy_reading(cache, block);
2200 }
2201
2202 block->ref_count++;
2203 block->last_accessed = system_time() / 1000000L;
2204
2205 *_block = block;
2206 return B_OK;
2207 }
2208
2209
2210 /*! Returns the writable block data for the requested blockNumber.
2211 If \a cleared is true, the block is not read from disk; an empty block
2212 is returned.
2213
2214 This is the only method to insert a block into a transaction. It makes
2215 sure that the previous block contents are preserved in that case.
2216 */
2217 static status_t
get_writable_cached_block(block_cache * cache,off_t blockNumber,int32 transactionID,bool cleared,void ** _block)2218 get_writable_cached_block(block_cache* cache, off_t blockNumber,
2219 int32 transactionID, bool cleared, void** _block)
2220 {
2221 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
2222 blockNumber, transactionID));
2223
2224 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
2225 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
2226 blockNumber, cache->max_blocks - 1);
2227 return B_BAD_VALUE;
2228 }
2229
2230 bool allocated;
2231 cached_block* block;
2232 status_t status = get_cached_block(cache, blockNumber, &allocated,
2233 !cleared, &block);
2234 if (status != B_OK)
2235 return status;
2236
2237 if (block->busy_writing)
2238 wait_for_busy_writing_block(cache, block);
2239
2240 block->discard = false;
2241
2242 // if there is no transaction support, we just return the current block
2243 if (transactionID == -1) {
2244 if (cleared) {
2245 mark_block_busy_reading(cache, block);
2246 mutex_unlock(&cache->lock);
2247
2248 memset(block->current_data, 0, cache->block_size);
2249
2250 mutex_lock(&cache->lock);
2251 mark_block_unbusy_reading(cache, block);
2252 }
2253
2254 block->is_writing = true;
2255
2256 if (!block->is_dirty) {
2257 cache->num_dirty_blocks++;
2258 block->is_dirty = true;
2259 // mark the block as dirty
2260 }
2261
2262 TB(Get(cache, block));
2263 *_block = block->current_data;
2264 return B_OK;
2265 }
2266
2267 cache_transaction* transaction = block->transaction;
2268
2269 if (transaction != NULL && transaction->id != transactionID) {
2270 // TODO: we have to wait here until the other transaction is done.
2271 // Maybe we should even panic, since we can't prevent any deadlocks.
2272 panic("get_writable_cached_block(): asked to get busy writable block "
2273 "(transaction %" B_PRId32 ")\n", block->transaction->id);
2274 put_cached_block(cache, block);
2275 return B_BAD_VALUE;
2276 }
2277 if (transaction == NULL && transactionID != -1) {
2278 // get new transaction
2279 transaction = lookup_transaction(cache, transactionID);
2280 if (transaction == NULL) {
2281 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
2282 transactionID);
2283 put_cached_block(cache, block);
2284 return B_BAD_VALUE;
2285 }
2286 if (!transaction->open) {
2287 panic("get_writable_cached_block(): transaction already done!\n");
2288 put_cached_block(cache, block);
2289 return B_BAD_VALUE;
2290 }
2291
2292 block->transaction = transaction;
2293
2294 // attach the block to the transaction block list
2295 block->transaction_next = transaction->first_block;
2296 transaction->first_block = block;
2297 transaction->num_blocks++;
2298 }
2299 if (transaction != NULL)
2300 transaction->last_used = system_time();
2301
2302 bool wasUnchanged = block->original_data == NULL
2303 || block->previous_transaction != NULL;
2304
2305 if (!(allocated && cleared) && block->original_data == NULL) {
2306 // we already have data, so we need to preserve it
2307 block->original_data = cache->Allocate();
2308 if (block->original_data == NULL) {
2309 TB(Error(cache, blockNumber, "allocate original failed"));
2310 FATAL(("could not allocate original_data\n"));
2311 put_cached_block(cache, block);
2312 return B_NO_MEMORY;
2313 }
2314
2315 mark_block_busy_reading(cache, block);
2316 mutex_unlock(&cache->lock);
2317
2318 memcpy(block->original_data, block->current_data, cache->block_size);
2319
2320 mutex_lock(&cache->lock);
2321 mark_block_unbusy_reading(cache, block);
2322 }
2323 if (block->parent_data == block->current_data) {
2324 // remember any previous contents for the parent transaction
2325 block->parent_data = cache->Allocate();
2326 if (block->parent_data == NULL) {
2327 // TODO: maybe we should just continue the current transaction in
2328 // this case...
2329 TB(Error(cache, blockNumber, "allocate parent failed"));
2330 FATAL(("could not allocate parent\n"));
2331 put_cached_block(cache, block);
2332 return B_NO_MEMORY;
2333 }
2334
2335 mark_block_busy_reading(cache, block);
2336 mutex_unlock(&cache->lock);
2337
2338 memcpy(block->parent_data, block->current_data, cache->block_size);
2339
2340 mutex_lock(&cache->lock);
2341 mark_block_unbusy_reading(cache, block);
2342
2343 transaction->sub_num_blocks++;
2344 } else if (transaction != NULL && transaction->has_sub_transaction
2345 && block->parent_data == NULL && wasUnchanged)
2346 transaction->sub_num_blocks++;
2347
2348 if (cleared) {
2349 mark_block_busy_reading(cache, block);
2350 mutex_unlock(&cache->lock);
2351
2352 memset(block->current_data, 0, cache->block_size);
2353
2354 mutex_lock(&cache->lock);
2355 mark_block_unbusy_reading(cache, block);
2356 }
2357
2358 block->is_dirty = true;
2359 TB(Get(cache, block));
2360 TB2(BlockData(cache, block, "get writable"));
2361
2362 *_block = block->current_data;
2363 return B_OK;
2364 }
2365
2366
2367 #if DEBUG_BLOCK_CACHE
2368
2369
2370 static void
dump_block(cached_block * block)2371 dump_block(cached_block* block)
2372 {
2373 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2374 " %c%c%c%c%c%c %08lx %08lx\n",
2375 (addr_t)block, block->block_number,
2376 (addr_t)block->current_data, (addr_t)block->original_data,
2377 (addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2378 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2379 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2380 block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2381 (addr_t)block->transaction,
2382 (addr_t)block->previous_transaction);
2383 }
2384
2385
2386 static void
dump_block_long(cached_block * block)2387 dump_block_long(cached_block* block)
2388 {
2389 kprintf("BLOCK %p\n", block);
2390 kprintf(" current data: %p\n", block->current_data);
2391 kprintf(" original data: %p\n", block->original_data);
2392 kprintf(" parent data: %p\n", block->parent_data);
2393 #if BLOCK_CACHE_DEBUG_CHANGED
2394 kprintf(" compare data: %p\n", block->compare);
2395 #endif
2396 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count);
2397 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess());
2398 kprintf(" flags: ");
2399 if (block->busy_reading)
2400 kprintf(" busy_reading");
2401 if (block->busy_writing)
2402 kprintf(" busy_writing");
2403 if (block->is_writing)
2404 kprintf(" is-writing");
2405 if (block->is_dirty)
2406 kprintf(" is-dirty");
2407 if (block->unused)
2408 kprintf(" unused");
2409 if (block->discard)
2410 kprintf(" discard");
2411 kprintf("\n");
2412 if (block->transaction != NULL) {
2413 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction,
2414 block->transaction->id);
2415 if (block->transaction_next != NULL) {
2416 kprintf(" next in transaction: %" B_PRIdOFF "\n",
2417 block->transaction_next->block_number);
2418 }
2419 }
2420 if (block->previous_transaction != NULL) {
2421 kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2422 block->previous_transaction,
2423 block->previous_transaction->id);
2424 }
2425
2426 set_debug_variable("_current", (addr_t)block->current_data);
2427 set_debug_variable("_original", (addr_t)block->original_data);
2428 set_debug_variable("_parent", (addr_t)block->parent_data);
2429 }
2430
2431
2432 static int
dump_cached_block(int argc,char ** argv)2433 dump_cached_block(int argc, char** argv)
2434 {
2435 if (argc != 2) {
2436 kprintf("usage: %s <block-address>\n", argv[0]);
2437 return 0;
2438 }
2439
2440 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2441 return 0;
2442 }
2443
2444
2445 static int
dump_cache(int argc,char ** argv)2446 dump_cache(int argc, char** argv)
2447 {
2448 bool showTransactions = false;
2449 bool showBlocks = false;
2450 int32 i = 1;
2451 while (argv[i] != NULL && argv[i][0] == '-') {
2452 for (char* arg = &argv[i][1]; arg[0]; arg++) {
2453 switch (arg[0]) {
2454 case 'b':
2455 showBlocks = true;
2456 break;
2457 case 't':
2458 showTransactions = true;
2459 break;
2460 default:
2461 print_debugger_command_usage(argv[0]);
2462 return 0;
2463 }
2464 }
2465 i++;
2466 }
2467
2468 if (i >= argc) {
2469 print_debugger_command_usage(argv[0]);
2470 return 0;
2471 }
2472
2473 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2474 if (cache == NULL) {
2475 kprintf("invalid cache address\n");
2476 return 0;
2477 }
2478
2479 off_t blockNumber = -1;
2480 if (i + 1 < argc) {
2481 blockNumber = parse_expression(argv[i + 1]);
2482 cached_block* block = cache->hash->Lookup(blockNumber);
2483 if (block != NULL)
2484 dump_block_long(block);
2485 else
2486 kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2487 return 0;
2488 }
2489
2490 kprintf("BLOCK CACHE: %p\n", cache);
2491
2492 kprintf(" fd: %d\n", cache->fd);
2493 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks);
2494 kprintf(" block_size: %zu\n", cache->block_size);
2495 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2496 kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2497 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2498 cache->busy_reading_waiters ? "has" : "no");
2499 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2500 cache->busy_writing_waiters ? "has" : "no");
2501
2502 if (!cache->pending_notifications.IsEmpty()) {
2503 kprintf(" pending notifications:\n");
2504
2505 NotificationList::Iterator iterator
2506 = cache->pending_notifications.GetIterator();
2507 while (iterator.HasNext()) {
2508 cache_notification* notification = iterator.Next();
2509
2510 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification,
2511 notification->events_pending, notification->hook,
2512 notification->data);
2513 }
2514 }
2515
2516 if (showTransactions) {
2517 kprintf(" transactions:\n");
2518 kprintf("address id state blocks main sub\n");
2519
2520 TransactionTable::Iterator iterator(cache->transaction_hash);
2521
2522 while (iterator.HasNext()) {
2523 cache_transaction* transaction = iterator.Next();
2524 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
2525 B_PRId32 "\n", transaction, transaction->id,
2526 transaction->open ? "open" : "closed",
2527 transaction->num_blocks, transaction->main_num_blocks,
2528 transaction->sub_num_blocks);
2529 }
2530 }
2531
2532 if (showBlocks) {
2533 kprintf(" blocks:\n");
2534 kprintf("address block no. current original parent refs access "
2535 "flags transact prev. trans\n");
2536 }
2537
2538 uint32 referenced = 0;
2539 uint32 count = 0;
2540 uint32 dirty = 0;
2541 uint32 discarded = 0;
2542 BlockTable::Iterator iterator(cache->hash);
2543 while (iterator.HasNext()) {
2544 cached_block* block = iterator.Next();
2545 if (showBlocks)
2546 dump_block(block);
2547
2548 if (block->is_dirty)
2549 dirty++;
2550 if (block->discard)
2551 discarded++;
2552 if (block->ref_count)
2553 referenced++;
2554 count++;
2555 }
2556
2557 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
2558 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
2559 " in unused.\n",
2560 count, dirty, discarded, referenced, cache->busy_reading_count,
2561 cache->unused_block_count);
2562 return 0;
2563 }
2564
2565
2566 static int
dump_transaction(int argc,char ** argv)2567 dump_transaction(int argc, char** argv)
2568 {
2569 bool showBlocks = false;
2570 int i = 1;
2571 if (argc > 1 && !strcmp(argv[1], "-b")) {
2572 showBlocks = true;
2573 i++;
2574 }
2575
2576 if (argc - i < 1 || argc - i > 2) {
2577 print_debugger_command_usage(argv[0]);
2578 return 0;
2579 }
2580
2581 cache_transaction* transaction = NULL;
2582
2583 if (argc - i == 1) {
2584 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2585 } else {
2586 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2587 int32 id = parse_expression(argv[i + 1]);
2588 transaction = lookup_transaction(cache, id);
2589 if (transaction == NULL) {
2590 kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2591 return 0;
2592 }
2593 }
2594
2595 kprintf("TRANSACTION %p\n", transaction);
2596
2597 kprintf(" id: %" B_PRId32 "\n", transaction->id);
2598 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks);
2599 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2600 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks);
2601 kprintf(" has sub: %d\n", transaction->has_sub_transaction);
2602 kprintf(" state: %s\n", transaction->open ? "open" : "closed");
2603 kprintf(" idle: %" B_PRId64 " secs\n",
2604 (system_time() - transaction->last_used) / 1000000);
2605
2606 kprintf(" listeners:\n");
2607
2608 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2609 while (iterator.HasNext()) {
2610 cache_listener* listener = iterator.Next();
2611
2612 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2613 listener->hook, listener->data);
2614 }
2615
2616 if (!showBlocks)
2617 return 0;
2618
2619 kprintf(" blocks:\n");
2620 kprintf("address block no. current original parent refs access "
2621 "flags transact prev. trans\n");
2622
2623 cached_block* block = transaction->first_block;
2624 while (block != NULL) {
2625 dump_block(block);
2626 block = block->transaction_next;
2627 }
2628
2629 kprintf("--\n");
2630
2631 block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2632 while (blockIterator.HasNext()) {
2633 block = blockIterator.Next();
2634 dump_block(block);
2635 }
2636
2637 return 0;
2638 }
2639
2640
2641 static int
dump_caches(int argc,char ** argv)2642 dump_caches(int argc, char** argv)
2643 {
2644 kprintf("Block caches:\n");
2645 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2646 while (i.HasNext()) {
2647 block_cache* cache = i.Next();
2648 if (cache == (block_cache*)&sMarkCache)
2649 continue;
2650
2651 kprintf(" %p\n", cache);
2652 }
2653
2654 return 0;
2655 }
2656
2657
2658 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2659 static int
dump_block_data(int argc,char ** argv)2660 dump_block_data(int argc, char** argv)
2661 {
2662 using namespace BlockTracing;
2663
2664 // Determine which blocks to show
2665
2666 bool printStackTrace = true;
2667 uint32 which = 0;
2668 int32 i = 1;
2669 while (i < argc && argv[i][0] == '-') {
2670 char* arg = &argv[i][1];
2671 while (arg[0]) {
2672 switch (arg[0]) {
2673 case 'c':
2674 which |= BlockData::kCurrent;
2675 break;
2676 case 'p':
2677 which |= BlockData::kParent;
2678 break;
2679 case 'o':
2680 which |= BlockData::kOriginal;
2681 break;
2682
2683 default:
2684 kprintf("invalid block specifier (only o/c/p are "
2685 "allowed).\n");
2686 return 0;
2687 }
2688 arg++;
2689 }
2690
2691 i++;
2692 }
2693 if (which == 0)
2694 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2695
2696 if (i == argc) {
2697 print_debugger_command_usage(argv[0]);
2698 return 0;
2699 }
2700
2701 // Get the range of blocks to print
2702
2703 int64 from = parse_expression(argv[i]);
2704 int64 to = from;
2705 if (argc > i + 1)
2706 to = parse_expression(argv[i + 1]);
2707 if (to < from)
2708 to = from;
2709
2710 uint32 offset = 0;
2711 uint32 size = LONG_MAX;
2712 if (argc > i + 2)
2713 offset = parse_expression(argv[i + 2]);
2714 if (argc > i + 3)
2715 size = parse_expression(argv[i + 3]);
2716
2717 TraceEntryIterator iterator;
2718 iterator.MoveTo(from - 1);
2719
2720 static char sBuffer[1024];
2721 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2722
2723 while (TraceEntry* entry = iterator.Next()) {
2724 int32 index = iterator.Index();
2725 if (index > to)
2726 break;
2727
2728 Action* action = dynamic_cast<Action*>(entry);
2729 if (action != NULL) {
2730 out.Clear();
2731 out.DumpEntry(action);
2732 continue;
2733 }
2734
2735 BlockData* blockData = dynamic_cast<BlockData*>(entry);
2736 if (blockData == NULL)
2737 continue;
2738
2739 out.Clear();
2740
2741 const char* dump = out.DumpEntry(entry);
2742 int length = strlen(dump);
2743 if (length > 0 && dump[length - 1] == '\n')
2744 length--;
2745
2746 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2747
2748 if (printStackTrace) {
2749 out.Clear();
2750 entry->DumpStackTrace(out);
2751 if (out.Size() > 0)
2752 kputs(out.Buffer());
2753 }
2754
2755 blockData->DumpBlocks(which, offset, size);
2756 }
2757
2758 return 0;
2759 }
2760 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2
2761
2762
2763 #endif // DEBUG_BLOCK_CACHE
2764
2765
2766 /*! Traverses through the block_cache list, and returns one cache after the
2767 other. The cache returned is automatically locked when you get it, and
2768 unlocked with the next call to this function. Ignores caches that are in
2769 deletion state.
2770 Returns \c NULL when the end of the list is reached.
2771 */
2772 static block_cache*
get_next_locked_block_cache(block_cache * last)2773 get_next_locked_block_cache(block_cache* last)
2774 {
2775 MutexLocker _(sCachesLock);
2776
2777 block_cache* cache;
2778 if (last != NULL) {
2779 mutex_unlock(&last->lock);
2780
2781 cache = sCaches.GetNext((block_cache*)&sMarkCache);
2782 sCaches.Remove((block_cache*)&sMarkCache);
2783 } else
2784 cache = sCaches.Head();
2785
2786 if (cache != NULL) {
2787 mutex_lock(&cache->lock);
2788 sCaches.InsertBefore(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2789 }
2790
2791 return cache;
2792 }
2793
2794
2795 /*! Background thread that continuously checks for pending notifications of
2796 all caches.
2797 Every two seconds, it will also write back up to 64 blocks per cache.
2798 */
2799 static status_t
block_notifier_and_writer(void *)2800 block_notifier_and_writer(void* /*data*/)
2801 {
2802 const bigtime_t kDefaultTimeout = 2000000LL;
2803 bigtime_t timeout = kDefaultTimeout;
2804
2805 while (true) {
2806 bigtime_t start = system_time();
2807
2808 status_t status = acquire_sem_etc(sEventSemaphore, 1,
2809 B_RELATIVE_TIMEOUT, timeout);
2810 if (status == B_OK) {
2811 flush_pending_notifications();
2812 timeout -= system_time() - start;
2813 continue;
2814 }
2815
2816 // Write 64 blocks of each block_cache roughly every 2 seconds,
2817 // potentially more or less depending on congestion and drive speeds
2818 // (usually much less.) We do not want to queue everything at once
2819 // because a future transaction might then get held up waiting for
2820 // a specific block to be written.
2821 timeout = kDefaultTimeout;
2822 size_t usedMemory;
2823 object_cache_get_usage(sBlockCache, &usedMemory);
2824
2825 block_cache* cache = NULL;
2826 while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2827 // Give some breathing room: wait 2x the length of the potential
2828 // maximum block count-sized write between writes, and also skip
2829 // if there are more than 16 blocks currently being written.
2830 const bigtime_t next = cache->last_block_write
2831 + cache->last_block_write_duration * 2 * 64;
2832 if (cache->busy_writing_count > 16 || system_time() < next) {
2833 if (cache->last_block_write_duration > 0) {
2834 timeout = min_c(timeout,
2835 cache->last_block_write_duration * 2 * 64);
2836 }
2837 continue;
2838 }
2839
2840 BlockWriter writer(cache, 64);
2841 bool hasMoreBlocks = false;
2842
2843 size_t cacheUsedMemory;
2844 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2845 usedMemory += cacheUsedMemory;
2846
2847 if (cache->num_dirty_blocks) {
2848 // This cache is not using transactions, we'll scan the blocks
2849 // directly
2850 BlockTable::Iterator iterator(cache->hash);
2851
2852 while (iterator.HasNext()) {
2853 cached_block* block = iterator.Next();
2854 if (block->CanBeWritten() && !writer.Add(block)) {
2855 hasMoreBlocks = true;
2856 break;
2857 }
2858 }
2859 } else {
2860 TransactionTable::Iterator iterator(cache->transaction_hash);
2861
2862 while (iterator.HasNext()) {
2863 cache_transaction* transaction = iterator.Next();
2864 if (transaction->open) {
2865 if (system_time() > transaction->last_used
2866 + kTransactionIdleTime) {
2867 // Transaction is open but idle
2868 notify_transaction_listeners(cache, transaction,
2869 TRANSACTION_IDLE);
2870 }
2871 continue;
2872 }
2873
2874 bool hasLeftOvers;
2875 // we ignore this one
2876 if (!writer.Add(transaction, hasLeftOvers)) {
2877 hasMoreBlocks = true;
2878 break;
2879 }
2880 }
2881 }
2882
2883 writer.Write();
2884
2885 if (hasMoreBlocks && cache->last_block_write_duration > 0) {
2886 // There are probably still more blocks that we could write, so
2887 // see if we can decrease the timeout.
2888 timeout = min_c(timeout,
2889 cache->last_block_write_duration * 2 * 64);
2890 }
2891
2892 if ((block_cache_used_memory() / B_PAGE_SIZE)
2893 > vm_page_num_pages() / 2) {
2894 // Try to reduce memory usage to half of the available
2895 // RAM at maximum
2896 cache->RemoveUnusedBlocks(1000, 10);
2897 }
2898 }
2899
2900 MutexLocker _(sCachesMemoryUseLock);
2901 sUsedMemory = usedMemory;
2902 }
2903
2904 // never can get here
2905 return B_OK;
2906 }
2907
2908
2909 /*! Notify function for wait_for_notifications(). */
2910 static void
notify_sync(int32 transactionID,int32 event,void * _cache)2911 notify_sync(int32 transactionID, int32 event, void* _cache)
2912 {
2913 block_cache* cache = (block_cache*)_cache;
2914
2915 cache->condition_variable.NotifyOne();
2916 }
2917
2918
2919 /*! Must be called with the sCachesLock held. */
2920 static bool
is_valid_cache(block_cache * cache)2921 is_valid_cache(block_cache* cache)
2922 {
2923 ASSERT_LOCKED_MUTEX(&sCachesLock);
2924
2925 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2926 while (iterator.HasNext()) {
2927 if (cache == iterator.Next())
2928 return true;
2929 }
2930
2931 return false;
2932 }
2933
2934
2935 /*! Waits until all pending notifications are carried out.
2936 Safe to be called from the block writer/notifier thread.
2937 You must not hold the \a cache lock when calling this function.
2938 */
2939 static void
wait_for_notifications(block_cache * cache)2940 wait_for_notifications(block_cache* cache)
2941 {
2942 MutexLocker locker(sCachesLock);
2943
2944 if (find_thread(NULL) == sNotifierWriterThread) {
2945 // We're the notifier thread, don't wait, but flush all pending
2946 // notifications directly.
2947 if (is_valid_cache(cache))
2948 flush_pending_notifications(cache);
2949 return;
2950 }
2951
2952 // add sync notification
2953 cache_notification notification;
2954 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2955 cache);
2956
2957 ConditionVariableEntry entry;
2958 cache->condition_variable.Add(&entry);
2959
2960 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false);
2961 locker.Unlock();
2962
2963 // wait for notification hook to be called
2964 entry.Wait();
2965 }
2966
2967
2968 status_t
block_cache_init(void)2969 block_cache_init(void)
2970 {
2971 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2972 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2973 if (sBlockCache == NULL)
2974 return B_NO_MEMORY;
2975
2976 sCacheNotificationCache = create_object_cache("cache notifications",
2977 sizeof(cache_listener), 8, NULL, NULL, NULL);
2978 if (sCacheNotificationCache == NULL)
2979 return B_NO_MEMORY;
2980
2981 new (&sCaches) DoublyLinkedList<block_cache>;
2982 // manually call constructor
2983
2984 sEventSemaphore = create_sem(0, "block cache event");
2985 if (sEventSemaphore < B_OK)
2986 return sEventSemaphore;
2987
2988 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2989 "block notifier/writer", B_LOW_PRIORITY, NULL);
2990 if (sNotifierWriterThread >= B_OK)
2991 resume_thread(sNotifierWriterThread);
2992
2993 #if DEBUG_BLOCK_CACHE
2994 add_debugger_command_etc("block_caches", &dump_caches,
2995 "dumps all block caches", "\n", 0);
2996 add_debugger_command_etc("block_cache", &dump_cache,
2997 "dumps a specific block cache",
2998 "[-bt] <cache-address> [block-number]\n"
2999 " -t lists the transactions\n"
3000 " -b lists all blocks\n", 0);
3001 add_debugger_command("cached_block", &dump_cached_block,
3002 "dumps the specified cached block");
3003 add_debugger_command_etc("transaction", &dump_transaction,
3004 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
3005 "Either use a block cache pointer and an ID or a pointer to the transaction.\n"
3006 " -b lists all blocks that are part of this transaction\n", 0);
3007 # if BLOCK_CACHE_BLOCK_TRACING >= 2
3008 add_debugger_command_etc("block_cache_data", &dump_block_data,
3009 "dumps the data blocks logged for the actions",
3010 "[-cpo] <from> [<to> [<offset> [<size>]]]\n"
3011 "If no data specifier is used, all blocks are shown by default.\n"
3012 " -c the current data is shown, if available.\n"
3013 " -p the parent data is shown, if available.\n"
3014 " -o the original data is shown, if available.\n"
3015 " <from> first index of tracing entries to show.\n"
3016 " <to> if given, the last entry. If not, only <from> is shown.\n"
3017 " <offset> the offset of the block data.\n"
3018 " <from> the size of the block data that is dumped\n", 0);
3019 # endif
3020 #endif // DEBUG_BLOCK_CACHE
3021
3022 return B_OK;
3023 }
3024
3025
3026 size_t
block_cache_used_memory(void)3027 block_cache_used_memory(void)
3028 {
3029 MutexLocker _(sCachesMemoryUseLock);
3030 return sUsedMemory;
3031 }
3032
3033
3034 // #pragma mark - public transaction API
3035
3036
3037 int32
cache_start_transaction(void * _cache)3038 cache_start_transaction(void* _cache)
3039 {
3040 block_cache* cache = (block_cache*)_cache;
3041 TransactionLocker locker(cache);
3042
3043 if (cache->last_transaction && cache->last_transaction->open) {
3044 panic("last transaction (%" B_PRId32 ") still open!\n",
3045 cache->last_transaction->id);
3046 }
3047
3048 cache_transaction* transaction = new(std::nothrow) cache_transaction;
3049 if (transaction == NULL)
3050 return B_NO_MEMORY;
3051
3052 transaction->id = atomic_add(&cache->next_transaction_id, 1);
3053 cache->last_transaction = transaction;
3054
3055 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
3056 T(Action("start", cache, transaction));
3057
3058 cache->transaction_hash->Insert(transaction);
3059
3060 return transaction->id;
3061 }
3062
3063
3064 status_t
cache_sync_transaction(void * _cache,int32 id)3065 cache_sync_transaction(void* _cache, int32 id)
3066 {
3067 block_cache* cache = (block_cache*)_cache;
3068 bool hadBusy;
3069
3070 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
3071
3072 do {
3073 TransactionLocker locker(cache);
3074 hadBusy = false;
3075
3076 BlockWriter writer(cache);
3077 TransactionTable::Iterator iterator(cache->transaction_hash);
3078
3079 while (iterator.HasNext()) {
3080 // close all earlier transactions which haven't been closed yet
3081 cache_transaction* transaction = iterator.Next();
3082
3083 if (transaction->busy_writing_count != 0) {
3084 hadBusy = true;
3085 continue;
3086 }
3087 if (transaction->id <= id && !transaction->open) {
3088 // write back all of their remaining dirty blocks
3089 T(Action("sync", cache, transaction));
3090
3091 bool hasLeftOvers;
3092 writer.Add(transaction, hasLeftOvers);
3093
3094 if (hasLeftOvers) {
3095 // This transaction contains blocks that a previous
3096 // transaction is trying to write back in this write run
3097 hadBusy = true;
3098 }
3099 }
3100 }
3101
3102 status_t status = writer.Write();
3103 if (status != B_OK)
3104 return status;
3105 } while (hadBusy);
3106
3107 wait_for_notifications(cache);
3108 // make sure that all pending TRANSACTION_WRITTEN notifications
3109 // are handled after we return
3110 return B_OK;
3111 }
3112
3113
3114 status_t
cache_end_transaction(void * _cache,int32 id,transaction_notification_hook hook,void * data)3115 cache_end_transaction(void* _cache, int32 id,
3116 transaction_notification_hook hook, void* data)
3117 {
3118 block_cache* cache = (block_cache*)_cache;
3119 TransactionLocker locker(cache);
3120
3121 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
3122
3123 cache_transaction* transaction = lookup_transaction(cache, id);
3124 if (transaction == NULL) {
3125 panic("cache_end_transaction(): invalid transaction ID\n");
3126 return B_BAD_VALUE;
3127 }
3128
3129 // Write back all pending transaction blocks
3130 status_t status = write_blocks_in_previous_transaction(cache, transaction);
3131 if (status != B_OK)
3132 return status;
3133
3134 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3135
3136 if (hook != NULL
3137 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
3138 hook, data) != B_OK) {
3139 return B_NO_MEMORY;
3140 }
3141
3142 T(Action("end", cache, transaction));
3143
3144 // iterate through all blocks and free the unchanged original contents
3145
3146 cached_block* next;
3147 for (cached_block* block = transaction->first_block; block != NULL;
3148 block = next) {
3149 next = block->transaction_next;
3150 ASSERT(block->previous_transaction == NULL);
3151
3152 if (block->discard) {
3153 // This block has been discarded in the transaction
3154 cache->DiscardBlock(block);
3155 transaction->num_blocks--;
3156 continue;
3157 }
3158
3159 if (block->original_data != NULL) {
3160 cache->Free(block->original_data);
3161 block->original_data = NULL;
3162 }
3163 if (block->parent_data != NULL) {
3164 ASSERT(transaction->has_sub_transaction);
3165 cache->FreeBlockParentData(block);
3166 }
3167
3168 // move the block to the previous transaction list
3169 transaction->blocks.Add(block);
3170
3171 block->previous_transaction = transaction;
3172 block->transaction_next = NULL;
3173 block->transaction = NULL;
3174 }
3175
3176 transaction->open = false;
3177 return B_OK;
3178 }
3179
3180
3181 status_t
cache_abort_transaction(void * _cache,int32 id)3182 cache_abort_transaction(void* _cache, int32 id)
3183 {
3184 block_cache* cache = (block_cache*)_cache;
3185 TransactionLocker locker(cache);
3186
3187 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
3188
3189 cache_transaction* transaction = lookup_transaction(cache, id);
3190 if (transaction == NULL) {
3191 panic("cache_abort_transaction(): invalid transaction ID\n");
3192 return B_BAD_VALUE;
3193 }
3194
3195 T(Abort(cache, transaction));
3196 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3197
3198 // iterate through all blocks and restore their original contents
3199
3200 cached_block* block = transaction->first_block;
3201 cached_block* next;
3202 for (; block != NULL; block = next) {
3203 next = block->transaction_next;
3204
3205 if (block->original_data != NULL) {
3206 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
3207 "block %" B_PRIdOFF "\n", transaction->id, block->block_number));
3208 memcpy(block->current_data, block->original_data,
3209 cache->block_size);
3210 cache->Free(block->original_data);
3211 block->original_data = NULL;
3212 }
3213 if (transaction->has_sub_transaction && block->parent_data != NULL)
3214 cache->FreeBlockParentData(block);
3215
3216 block->transaction_next = NULL;
3217 block->transaction = NULL;
3218 block->discard = false;
3219 if (block->previous_transaction == NULL)
3220 block->is_dirty = false;
3221 }
3222
3223 cache->transaction_hash->Remove(transaction);
3224 delete_transaction(cache, transaction);
3225 return B_OK;
3226 }
3227
3228
3229 /*! Acknowledges the current parent transaction, and starts a new transaction
3230 from its sub transaction.
3231 The new transaction also gets a new transaction ID.
3232 */
3233 int32
cache_detach_sub_transaction(void * _cache,int32 id,transaction_notification_hook hook,void * data)3234 cache_detach_sub_transaction(void* _cache, int32 id,
3235 transaction_notification_hook hook, void* data)
3236 {
3237 block_cache* cache = (block_cache*)_cache;
3238 TransactionLocker locker(cache);
3239
3240 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
3241
3242 cache_transaction* transaction = lookup_transaction(cache, id);
3243 if (transaction == NULL) {
3244 panic("cache_detach_sub_transaction(): invalid transaction ID\n");
3245 return B_BAD_VALUE;
3246 }
3247 if (!transaction->has_sub_transaction)
3248 return B_BAD_VALUE;
3249
3250 // iterate through all blocks and free the unchanged original contents
3251
3252 status_t status = write_blocks_in_previous_transaction(cache, transaction);
3253 if (status != B_OK)
3254 return status;
3255
3256 // create a new transaction for the sub transaction
3257 cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
3258 if (newTransaction == NULL)
3259 return B_NO_MEMORY;
3260
3261 newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
3262 T(Detach(cache, transaction, newTransaction));
3263
3264 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3265
3266 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
3267 data) != B_OK) {
3268 delete newTransaction;
3269 return B_NO_MEMORY;
3270 }
3271
3272 cached_block* last = NULL;
3273 cached_block* next;
3274 for (cached_block* block = transaction->first_block; block != NULL;
3275 block = next) {
3276 next = block->transaction_next;
3277 ASSERT(block->previous_transaction == NULL);
3278
3279 if (block->discard) {
3280 cache->DiscardBlock(block);
3281 transaction->main_num_blocks--;
3282 continue;
3283 }
3284
3285 if (block->parent_data != NULL) {
3286 // The block changed in the parent - free the original data, since
3287 // they will be replaced by what is in current.
3288 ASSERT(block->original_data != NULL);
3289 cache->Free(block->original_data);
3290
3291 if (block->parent_data != block->current_data) {
3292 // The block had been changed in both transactions
3293 block->original_data = block->parent_data;
3294 } else {
3295 // The block has only been changed in the parent
3296 block->original_data = NULL;
3297 }
3298 block->parent_data = NULL;
3299
3300 // move the block to the previous transaction list
3301 transaction->blocks.Add(block);
3302 block->previous_transaction = transaction;
3303 }
3304
3305 if (block->original_data != NULL) {
3306 // This block had been changed in the current sub transaction,
3307 // we need to move this block over to the new transaction.
3308 ASSERT(block->parent_data == NULL);
3309
3310 if (last == NULL)
3311 newTransaction->first_block = block;
3312 else
3313 last->transaction_next = block;
3314
3315 block->transaction = newTransaction;
3316 last = block;
3317 } else
3318 block->transaction = NULL;
3319
3320 block->transaction_next = NULL;
3321 }
3322
3323 newTransaction->num_blocks = transaction->sub_num_blocks;
3324
3325 transaction->open = false;
3326 transaction->has_sub_transaction = false;
3327 transaction->num_blocks = transaction->main_num_blocks;
3328 transaction->sub_num_blocks = 0;
3329
3330 cache->transaction_hash->Insert(newTransaction);
3331 cache->last_transaction = newTransaction;
3332
3333 return newTransaction->id;
3334 }
3335
3336
3337 status_t
cache_abort_sub_transaction(void * _cache,int32 id)3338 cache_abort_sub_transaction(void* _cache, int32 id)
3339 {
3340 block_cache* cache = (block_cache*)_cache;
3341 TransactionLocker locker(cache);
3342
3343 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3344
3345 cache_transaction* transaction = lookup_transaction(cache, id);
3346 if (transaction == NULL) {
3347 panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3348 return B_BAD_VALUE;
3349 }
3350 if (!transaction->has_sub_transaction)
3351 return B_BAD_VALUE;
3352
3353 T(Abort(cache, transaction));
3354 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3355
3356 // revert all changes back to the version of the parent
3357
3358 cached_block* block = transaction->first_block;
3359 cached_block* last = NULL;
3360 cached_block* next;
3361 for (; block != NULL; block = next) {
3362 next = block->transaction_next;
3363
3364 if (block->parent_data == NULL) {
3365 // The parent transaction didn't change the block, but the sub
3366 // transaction did - we need to revert to the original data.
3367 // The block is no longer part of the transaction
3368 if (block->original_data != NULL) {
3369 // The block might not have original data if was empty
3370 memcpy(block->current_data, block->original_data,
3371 cache->block_size);
3372 }
3373
3374 if (last != NULL)
3375 last->transaction_next = next;
3376 else
3377 transaction->first_block = next;
3378
3379 block->transaction_next = NULL;
3380 block->transaction = NULL;
3381 transaction->num_blocks--;
3382
3383 if (block->previous_transaction == NULL) {
3384 cache->Free(block->original_data);
3385 block->original_data = NULL;
3386 block->is_dirty = false;
3387
3388 if (block->ref_count == 0) {
3389 // Move the block into the unused list if possible
3390 block->unused = true;
3391 cache->unused_blocks.Add(block);
3392 cache->unused_block_count++;
3393 }
3394 }
3395 } else {
3396 if (block->parent_data != block->current_data) {
3397 // The block has been changed and must be restored - the block
3398 // is still dirty and part of the transaction
3399 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3400 "restored contents of block %" B_PRIdOFF "\n",
3401 transaction->id, block->block_number));
3402 memcpy(block->current_data, block->parent_data,
3403 cache->block_size);
3404 cache->Free(block->parent_data);
3405 // The block stays dirty
3406 }
3407 block->parent_data = NULL;
3408 last = block;
3409 }
3410
3411 block->discard = false;
3412 }
3413
3414 // all subsequent changes will go into the main transaction
3415 transaction->has_sub_transaction = false;
3416 transaction->sub_num_blocks = 0;
3417
3418 return B_OK;
3419 }
3420
3421
3422 status_t
cache_start_sub_transaction(void * _cache,int32 id)3423 cache_start_sub_transaction(void* _cache, int32 id)
3424 {
3425 block_cache* cache = (block_cache*)_cache;
3426 TransactionLocker locker(cache);
3427
3428 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3429
3430 cache_transaction* transaction = lookup_transaction(cache, id);
3431 if (transaction == NULL) {
3432 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3433 id);
3434 return B_BAD_VALUE;
3435 }
3436
3437 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3438
3439 // move all changed blocks up to the parent
3440
3441 cached_block* block = transaction->first_block;
3442 cached_block* next;
3443 for (; block != NULL; block = next) {
3444 next = block->transaction_next;
3445
3446 if (block->parent_data != NULL) {
3447 // There already is an older sub transaction - we acknowledge
3448 // its changes and move its blocks up to the parent
3449 ASSERT(transaction->has_sub_transaction);
3450 cache->FreeBlockParentData(block);
3451 }
3452 if (block->discard) {
3453 // This block has been discarded in the parent transaction.
3454 // Just throw away any changes made in this transaction, so that
3455 // it can still be reverted to its original contents if needed
3456 ASSERT(block->previous_transaction == NULL);
3457 if (block->original_data != NULL) {
3458 memcpy(block->current_data, block->original_data,
3459 cache->block_size);
3460
3461 cache->Free(block->original_data);
3462 block->original_data = NULL;
3463 }
3464 continue;
3465 }
3466
3467 // we "allocate" the parent data lazily, that means, we don't copy
3468 // the data (and allocate memory for it) until we need to
3469 block->parent_data = block->current_data;
3470 }
3471
3472 // all subsequent changes will go into the sub transaction
3473 transaction->has_sub_transaction = true;
3474 transaction->main_num_blocks = transaction->num_blocks;
3475 transaction->sub_num_blocks = 0;
3476 T(Action("start-sub", cache, transaction));
3477
3478 return B_OK;
3479 }
3480
3481
3482 /*! Adds a transaction listener that gets notified when the transaction
3483 is ended, aborted, written, or idle as specified by \a events.
3484 The listener gets automatically removed when the transaction ends.
3485 */
3486 status_t
cache_add_transaction_listener(void * _cache,int32 id,int32 events,transaction_notification_hook hook,void * data)3487 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3488 transaction_notification_hook hook, void* data)
3489 {
3490 block_cache* cache = (block_cache*)_cache;
3491 TransactionLocker locker(cache);
3492
3493 cache_transaction* transaction = lookup_transaction(cache, id);
3494 if (transaction == NULL)
3495 return B_BAD_VALUE;
3496
3497 return add_transaction_listener(cache, transaction, events, hook, data);
3498 }
3499
3500
3501 status_t
cache_remove_transaction_listener(void * _cache,int32 id,transaction_notification_hook hookFunction,void * data)3502 cache_remove_transaction_listener(void* _cache, int32 id,
3503 transaction_notification_hook hookFunction, void* data)
3504 {
3505 block_cache* cache = (block_cache*)_cache;
3506 TransactionLocker locker(cache);
3507
3508 cache_transaction* transaction = lookup_transaction(cache, id);
3509 if (transaction == NULL)
3510 return B_BAD_VALUE;
3511
3512 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3513 while (iterator.HasNext()) {
3514 cache_listener* listener = iterator.Next();
3515 if (listener->data == data && listener->hook == hookFunction) {
3516 iterator.Remove();
3517
3518 if (listener->events_pending != 0) {
3519 MutexLocker _(sNotificationsLock);
3520 if (listener->events_pending != 0)
3521 cache->pending_notifications.Remove(listener);
3522 }
3523 delete listener;
3524 return B_OK;
3525 }
3526 }
3527
3528 return B_ENTRY_NOT_FOUND;
3529 }
3530
3531
3532 status_t
cache_next_block_in_transaction(void * _cache,int32 id,bool mainOnly,long * _cookie,off_t * _blockNumber,void ** _data,void ** _unchangedData)3533 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3534 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3535 {
3536 cached_block* block = (cached_block*)*_cookie;
3537 block_cache* cache = (block_cache*)_cache;
3538 TransactionLocker locker(cache);
3539
3540 cache_transaction* transaction = lookup_transaction(cache, id);
3541 if (transaction == NULL || !transaction->open)
3542 return B_BAD_VALUE;
3543
3544 if (block == NULL)
3545 block = transaction->first_block;
3546 else
3547 block = block->transaction_next;
3548
3549 if (transaction->has_sub_transaction) {
3550 if (mainOnly) {
3551 // find next block that the parent changed
3552 while (block != NULL && block->parent_data == NULL)
3553 block = block->transaction_next;
3554 } else {
3555 // find next non-discarded block
3556 while (block != NULL && block->discard)
3557 block = block->transaction_next;
3558 }
3559 }
3560
3561 if (block == NULL)
3562 return B_ENTRY_NOT_FOUND;
3563
3564 if (_blockNumber)
3565 *_blockNumber = block->block_number;
3566 if (_data)
3567 *_data = mainOnly ? block->parent_data : block->current_data;
3568 if (_unchangedData)
3569 *_unchangedData = block->original_data;
3570
3571 *_cookie = (addr_t)block;
3572 return B_OK;
3573 }
3574
3575
3576 int32
cache_blocks_in_transaction(void * _cache,int32 id)3577 cache_blocks_in_transaction(void* _cache, int32 id)
3578 {
3579 block_cache* cache = (block_cache*)_cache;
3580 TransactionLocker locker(cache);
3581
3582 cache_transaction* transaction = lookup_transaction(cache, id);
3583 if (transaction == NULL)
3584 return B_BAD_VALUE;
3585
3586 return transaction->num_blocks;
3587 }
3588
3589
3590 /*! Returns the number of blocks that are part of the main transaction. If this
3591 transaction does not have a sub transaction yet, this is the same value as
3592 cache_blocks_in_transaction() would return.
3593 */
3594 int32
cache_blocks_in_main_transaction(void * _cache,int32 id)3595 cache_blocks_in_main_transaction(void* _cache, int32 id)
3596 {
3597 block_cache* cache = (block_cache*)_cache;
3598 TransactionLocker locker(cache);
3599
3600 cache_transaction* transaction = lookup_transaction(cache, id);
3601 if (transaction == NULL)
3602 return B_BAD_VALUE;
3603
3604 if (transaction->has_sub_transaction)
3605 return transaction->main_num_blocks;
3606
3607 return transaction->num_blocks;
3608 }
3609
3610
3611 int32
cache_blocks_in_sub_transaction(void * _cache,int32 id)3612 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3613 {
3614 block_cache* cache = (block_cache*)_cache;
3615 TransactionLocker locker(cache);
3616
3617 cache_transaction* transaction = lookup_transaction(cache, id);
3618 if (transaction == NULL)
3619 return B_BAD_VALUE;
3620
3621 return transaction->sub_num_blocks;
3622 }
3623
3624
3625 /*! Check if block is in transaction
3626 */
3627 bool
cache_has_block_in_transaction(void * _cache,int32 id,off_t blockNumber)3628 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber)
3629 {
3630 block_cache* cache = (block_cache*)_cache;
3631 TransactionLocker locker(cache);
3632
3633 cached_block* block = cache->hash->Lookup(blockNumber);
3634
3635 return (block != NULL && block->transaction != NULL
3636 && block->transaction->id == id);
3637 }
3638
3639
3640 // #pragma mark - public block cache API
3641
3642
3643 void
block_cache_delete(void * _cache,bool allowWrites)3644 block_cache_delete(void* _cache, bool allowWrites)
3645 {
3646 block_cache* cache = (block_cache*)_cache;
3647
3648 if (allowWrites)
3649 block_cache_sync(cache);
3650
3651 mutex_lock(&sCachesLock);
3652 sCaches.Remove(cache);
3653 mutex_unlock(&sCachesLock);
3654
3655 mutex_lock(&cache->lock);
3656
3657 // wait for all blocks to become unbusy
3658 wait_for_busy_reading_blocks(cache);
3659 wait_for_busy_writing_blocks(cache);
3660
3661 // free all blocks
3662
3663 cached_block* block = cache->hash->Clear(true);
3664 while (block != NULL) {
3665 cached_block* next = block->next;
3666 cache->FreeBlock(block);
3667 block = next;
3668 }
3669
3670 // free all transactions (they will all be aborted)
3671
3672 cache_transaction* transaction = cache->transaction_hash->Clear(true);
3673 while (transaction != NULL) {
3674 cache_transaction* next = transaction->next;
3675 delete transaction;
3676 transaction = next;
3677 }
3678
3679 delete cache;
3680 }
3681
3682
3683 void*
block_cache_create(int fd,off_t numBlocks,size_t blockSize,bool readOnly)3684 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3685 {
3686 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3687 readOnly);
3688 if (cache == NULL)
3689 return NULL;
3690
3691 if (cache->Init() != B_OK) {
3692 delete cache;
3693 return NULL;
3694 }
3695
3696 MutexLocker _(sCachesLock);
3697 sCaches.Add(cache);
3698
3699 return cache;
3700 }
3701
3702
3703 status_t
block_cache_sync(void * _cache)3704 block_cache_sync(void* _cache)
3705 {
3706 block_cache* cache = (block_cache*)_cache;
3707
3708 // We will sync all dirty blocks to disk that have a completed
3709 // transaction or no transaction only
3710
3711 MutexLocker locker(&cache->lock);
3712
3713 BlockWriter writer(cache);
3714 BlockTable::Iterator iterator(cache->hash);
3715
3716 while (iterator.HasNext()) {
3717 cached_block* block = iterator.Next();
3718 if (block->CanBeWritten())
3719 writer.Add(block);
3720 }
3721
3722 status_t status = writer.Write();
3723
3724 locker.Unlock();
3725
3726 wait_for_notifications(cache);
3727 // make sure that all pending TRANSACTION_WRITTEN notifications
3728 // are handled after we return
3729 return status;
3730 }
3731
3732
3733 status_t
block_cache_sync_etc(void * _cache,off_t blockNumber,size_t numBlocks)3734 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3735 {
3736 block_cache* cache = (block_cache*)_cache;
3737
3738 // We will sync all dirty blocks to disk that have a completed
3739 // transaction or no transaction only
3740
3741 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3742 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
3743 " (max %" B_PRIdOFF ")",
3744 blockNumber, cache->max_blocks - 1);
3745 return B_BAD_VALUE;
3746 }
3747
3748 MutexLocker locker(&cache->lock);
3749 BlockWriter writer(cache);
3750
3751 for (; numBlocks > 0; numBlocks--, blockNumber++) {
3752 cached_block* block = cache->hash->Lookup(blockNumber);
3753 if (block == NULL)
3754 continue;
3755
3756 if (block->CanBeWritten())
3757 writer.Add(block);
3758 }
3759
3760 status_t status = writer.Write();
3761
3762 locker.Unlock();
3763
3764 wait_for_notifications(cache);
3765 // make sure that all pending TRANSACTION_WRITTEN notifications
3766 // are handled after we return
3767 return status;
3768 }
3769
3770
3771 /*! Discards a block from the current transaction or from the cache.
3772 You have to call this function when you no longer use a block, ie. when it
3773 might be reclaimed by the file cache in order to make sure they won't
3774 interfere.
3775 */
3776 void
block_cache_discard(void * _cache,off_t blockNumber,size_t numBlocks)3777 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3778 {
3779 // TODO: this could be a nice place to issue the ATA trim command
3780 block_cache* cache = (block_cache*)_cache;
3781 TransactionLocker locker(cache);
3782
3783 BlockWriter writer(cache);
3784
3785 for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3786 cached_block* block = cache->hash->Lookup(blockNumber);
3787 if (block != NULL && block->previous_transaction != NULL)
3788 writer.Add(block);
3789 }
3790
3791 writer.Write();
3792 // TODO: this can fail, too!
3793
3794 blockNumber -= numBlocks;
3795 // reset blockNumber to its original value
3796
3797 for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3798 cached_block* block = cache->hash->Lookup(blockNumber);
3799 if (block == NULL)
3800 continue;
3801
3802 ASSERT(block->previous_transaction == NULL);
3803
3804 if (block->unused) {
3805 cache->unused_blocks.Remove(block);
3806 cache->unused_block_count--;
3807 cache->RemoveBlock(block);
3808 } else {
3809 if (block->transaction != NULL && block->parent_data != NULL
3810 && block->parent_data != block->current_data) {
3811 panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3812 "transaction!", blockNumber);
3813 }
3814
3815 // mark it as discarded (in the current transaction only, if any)
3816 block->discard = true;
3817 }
3818 }
3819 }
3820
3821
3822 status_t
block_cache_make_writable(void * _cache,off_t blockNumber,int32 transaction)3823 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3824 {
3825 block_cache* cache = (block_cache*)_cache;
3826 MutexLocker locker(&cache->lock);
3827
3828 if (cache->read_only) {
3829 panic("tried to make block writable on a read-only cache!");
3830 return B_ERROR;
3831 }
3832
3833 // TODO: this can be done better!
3834 void* block;
3835 status_t status = get_writable_cached_block(cache, blockNumber,
3836 transaction, false, &block);
3837 if (status == B_OK) {
3838 put_cached_block((block_cache*)_cache, blockNumber);
3839 return B_OK;
3840 }
3841
3842 return status;
3843 }
3844
3845
3846 status_t
block_cache_get_writable_etc(void * _cache,off_t blockNumber,int32 transaction,void ** _block)3847 block_cache_get_writable_etc(void* _cache, off_t blockNumber,
3848 int32 transaction, void** _block)
3849 {
3850 block_cache* cache = (block_cache*)_cache;
3851 MutexLocker locker(&cache->lock);
3852
3853 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3854 blockNumber, transaction));
3855 if (cache->read_only)
3856 panic("tried to get writable block on a read-only cache!");
3857
3858 return get_writable_cached_block(cache, blockNumber,
3859 transaction, false, _block);
3860 }
3861
3862
3863 void*
block_cache_get_writable(void * _cache,off_t blockNumber,int32 transaction)3864 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3865 {
3866 void* block;
3867 if (block_cache_get_writable_etc(_cache, blockNumber,
3868 transaction, &block) == B_OK)
3869 return block;
3870
3871 return NULL;
3872 }
3873
3874
3875 void*
block_cache_get_empty(void * _cache,off_t blockNumber,int32 transaction)3876 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3877 {
3878 block_cache* cache = (block_cache*)_cache;
3879 MutexLocker locker(&cache->lock);
3880
3881 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3882 blockNumber, transaction));
3883 if (cache->read_only)
3884 panic("tried to get empty writable block on a read-only cache!");
3885
3886 void* block;
3887 if (get_writable_cached_block((block_cache*)_cache, blockNumber,
3888 transaction, true, &block) == B_OK)
3889 return block;
3890
3891 return NULL;
3892 }
3893
3894
3895 status_t
block_cache_get_etc(void * _cache,off_t blockNumber,const void ** _block)3896 block_cache_get_etc(void* _cache, off_t blockNumber, const void** _block)
3897 {
3898 block_cache* cache = (block_cache*)_cache;
3899 MutexLocker locker(&cache->lock);
3900 bool allocated;
3901
3902 cached_block* block;
3903 status_t status = get_cached_block(cache, blockNumber, &allocated, true,
3904 &block);
3905 if (status != B_OK)
3906 return status;
3907
3908 #if BLOCK_CACHE_DEBUG_CHANGED
3909 if (block->compare == NULL)
3910 block->compare = cache->Allocate();
3911 if (block->compare != NULL)
3912 memcpy(block->compare, block->current_data, cache->block_size);
3913 #endif
3914 TB(Get(cache, block));
3915
3916 *_block = block->current_data;
3917 return B_OK;
3918 }
3919
3920
3921 const void*
block_cache_get(void * _cache,off_t blockNumber)3922 block_cache_get(void* _cache, off_t blockNumber)
3923 {
3924 const void* block;
3925 if (block_cache_get_etc(_cache, blockNumber, &block) == B_OK)
3926 return block;
3927
3928 return NULL;
3929 }
3930
3931
3932 /*! Changes the internal status of a writable block to \a dirty. This can be
3933 helpful in case you realize you don't need to change that block anymore
3934 for whatever reason.
3935
3936 Note, you must only use this function on blocks that were acquired
3937 writable!
3938 */
3939 status_t
block_cache_set_dirty(void * _cache,off_t blockNumber,bool dirty,int32 transaction)3940 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3941 int32 transaction)
3942 {
3943 block_cache* cache = (block_cache*)_cache;
3944 MutexLocker locker(&cache->lock);
3945
3946 cached_block* block = cache->hash->Lookup(blockNumber);
3947 if (block == NULL)
3948 return B_BAD_VALUE;
3949 if (block->is_dirty == dirty) {
3950 // there is nothing to do for us
3951 return B_OK;
3952 }
3953
3954 // TODO: not yet implemented
3955 if (dirty)
3956 panic("block_cache_set_dirty(): not yet implemented that way!\n");
3957
3958 return B_OK;
3959 }
3960
3961
3962 void
block_cache_put(void * _cache,off_t blockNumber)3963 block_cache_put(void* _cache, off_t blockNumber)
3964 {
3965 block_cache* cache = (block_cache*)_cache;
3966 MutexLocker locker(&cache->lock);
3967
3968 put_cached_block(cache, blockNumber);
3969 }
3970
3971
3972 /*! Allocates blocks and schedules them to be read from disk, but does not get references to the
3973 blocks.
3974 @param blockNumber The index of the first requested block.
3975 @param _numBlocks As input, the number of blocks requested. As output, the number of
3976 blocks actually scheduled. Prefetching will stop short if the requested range includes a
3977 block that is already cached.
3978 */
3979 status_t
block_cache_prefetch(void * _cache,off_t blockNumber,size_t * _numBlocks)3980 block_cache_prefetch(void* _cache, off_t blockNumber, size_t* _numBlocks)
3981 {
3982 #ifndef BUILDING_USERLAND_FS_SERVER
3983 TRACE(("block_cache_prefetch: fetching %" B_PRIuSIZE " blocks starting with %" B_PRIdOFF "\n",
3984 *_numBlocks, blockNumber));
3985
3986 block_cache* cache = reinterpret_cast<block_cache*>(_cache);
3987 MutexLocker locker(&cache->lock);
3988
3989 size_t numBlocks = *_numBlocks;
3990 *_numBlocks = 0;
3991
3992 BlockPrefetcher* blockPrefetcher = new BlockPrefetcher(cache, blockNumber, numBlocks);
3993
3994 status_t status = blockPrefetcher->Allocate();
3995 if (status != B_OK || blockPrefetcher->NumAllocated() == 0) {
3996 TRACE(("block_cache_prefetch returning early (%s): allocated %" B_PRIuSIZE "\n",
3997 strerror(status), blockPrefetcher->NumAllocated()));
3998 delete blockPrefetcher;
3999 return status;
4000 }
4001
4002 numBlocks = blockPrefetcher->NumAllocated();
4003
4004 status = blockPrefetcher->ReadAsync(locker);
4005
4006 if (status == B_OK)
4007 *_numBlocks = numBlocks;
4008
4009 return status;
4010 #else // BUILDING_USERLAND_FS_SERVER
4011 *_numBlocks = 0;
4012 return B_UNSUPPORTED;
4013 #endif // !BUILDING_USERLAND_FS_SERVER
4014 }
4015