1 /* 2 * Copyright 2004-2020, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <block_cache.h> 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 #include <sys/uio.h> 14 15 #include <KernelExport.h> 16 #include <fs_cache.h> 17 18 #include <condition_variable.h> 19 #include <lock.h> 20 #include <low_resource_manager.h> 21 #include <slab/Slab.h> 22 #include <tracing.h> 23 #include <util/kernel_cpp.h> 24 #include <util/DoublyLinkedList.h> 25 #include <util/AutoLock.h> 26 #include <StackOrHeapArray.h> 27 #include <vm/vm_page.h> 28 29 #ifndef BUILDING_USERLAND_FS_SERVER 30 #include "IORequest.h" 31 #endif // !BUILDING_USERLAND_FS_SERVER 32 #include "kernel_debug_config.h" 33 34 35 // TODO: this is a naive but growing implementation to test the API: 36 // block reading/writing is not at all optimized for speed, it will 37 // just read and write single blocks. 38 // TODO: the retrieval/copy of the original data could be delayed until the 39 // new data must be written, ie. in low memory situations. 40 41 #ifdef _KERNEL_MODE 42 # define TRACE_ALWAYS(x...) dprintf(x) 43 #else 44 # define TRACE_ALWAYS(x...) printf(x) 45 #endif 46 47 //#define TRACE_BLOCK_CACHE 48 #ifdef TRACE_BLOCK_CACHE 49 # define TRACE(x) TRACE_ALWAYS(x) 50 #else 51 # define TRACE(x) ; 52 #endif 53 54 55 // This macro is used for fatal situations that are acceptable in a running 56 // system, like out of memory situations - should only panic for debugging. 57 #define FATAL(x) panic x 58 59 static const bigtime_t kTransactionIdleTime = 2000000LL; 60 // a transaction is considered idle after 2 seconds of inactivity 61 62 63 namespace { 64 65 struct cache_transaction; 66 struct cached_block; 67 struct block_cache; 68 typedef DoublyLinkedListLink<cached_block> block_link; 69 70 struct cached_block { 71 cached_block* next; // next in hash 72 cached_block* transaction_next; 73 block_link link; 74 off_t block_number; 75 void* current_data; 76 // The data that is seen by everyone using the API; this one is always 77 // present. 78 void* original_data; 79 // When in a transaction, this contains the original data from before 80 // the transaction. 81 void* parent_data; 82 // This is a lazily alloced buffer that represents the contents of the 83 // block in the parent transaction. It may point to current_data if the 84 // contents have been changed only in the parent transaction, or, if the 85 // block has been changed in the current sub transaction already, to a 86 // new block containing the contents changed in the parent transaction. 87 // If this is NULL, the block has not been changed in the parent 88 // transaction at all. 89 #if BLOCK_CACHE_DEBUG_CHANGED 90 void* compare; 91 #endif 92 int32 ref_count; 93 int32 last_accessed; 94 bool busy_reading : 1; 95 bool busy_writing : 1; 96 bool is_writing : 1; 97 // Block has been checked out for writing without transactions, and 98 // cannot be written back if set 99 bool is_dirty : 1; 100 bool unused : 1; 101 bool discard : 1; 102 bool busy_reading_waiters : 1; 103 bool busy_writing_waiters : 1; 104 cache_transaction* transaction; 105 // This is the current active transaction, if any, the block is 106 // currently in (meaning was changed as a part of it). 107 cache_transaction* previous_transaction; 108 // This is set to the last transaction that was ended containing this 109 // block. In this case, the block has not yet written back yet, and 110 // the changed data is either in current_data, or original_data -- the 111 // latter if the block is already being part of another transaction. 112 // There can only be one previous transaction, so when the active 113 // transaction ends, the changes of the previous transaction have to 114 // be written back before that transaction becomes the next previous 115 // transaction. 116 117 bool CanBeWritten() const; 118 int32 LastAccess() const 119 { return system_time() / 1000000L - last_accessed; } 120 }; 121 122 typedef DoublyLinkedList<cached_block, 123 DoublyLinkedListMemberGetLink<cached_block, 124 &cached_block::link> > block_list; 125 126 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> { 127 static inline void* operator new(size_t size); 128 static inline void operator delete(void* block); 129 130 int32 transaction_id; 131 int32 events_pending; 132 int32 events; 133 transaction_notification_hook hook; 134 void* data; 135 bool delete_after_event; 136 }; 137 138 typedef DoublyLinkedList<cache_notification> NotificationList; 139 140 static object_cache* sCacheNotificationCache; 141 142 struct cache_listener; 143 typedef DoublyLinkedListLink<cache_listener> listener_link; 144 145 struct cache_listener : cache_notification { 146 listener_link link; 147 }; 148 149 typedef DoublyLinkedList<cache_listener, 150 DoublyLinkedListMemberGetLink<cache_listener, 151 &cache_listener::link> > ListenerList; 152 153 void* 154 cache_notification::operator new(size_t size) 155 { 156 // We can't really know whether something is a cache_notification or a 157 // cache_listener at runtime, so we just use one object_cache for both 158 // with the size set to that of the (slightly larger) cache_listener. 159 // In practice, the vast majority of cache_notifications are really 160 // cache_listeners, so this is a more than acceptable trade-off. 161 ASSERT(size <= sizeof(cache_listener)); 162 return object_cache_alloc(sCacheNotificationCache, 0); 163 } 164 165 void 166 cache_notification::operator delete(void* block) 167 { 168 object_cache_free(sCacheNotificationCache, block, 0); 169 } 170 171 172 struct BlockHash { 173 typedef off_t KeyType; 174 typedef cached_block ValueType; 175 176 size_t HashKey(KeyType key) const 177 { 178 return key; 179 } 180 181 size_t Hash(ValueType* block) const 182 { 183 return block->block_number; 184 } 185 186 bool Compare(KeyType key, ValueType* block) const 187 { 188 return block->block_number == key; 189 } 190 191 ValueType*& GetLink(ValueType* value) const 192 { 193 return value->next; 194 } 195 }; 196 197 typedef BOpenHashTable<BlockHash> BlockTable; 198 199 200 struct TransactionHash { 201 typedef int32 KeyType; 202 typedef cache_transaction ValueType; 203 204 size_t HashKey(KeyType key) const 205 { 206 return key; 207 } 208 209 size_t Hash(ValueType* transaction) const; 210 bool Compare(KeyType key, ValueType* transaction) const; 211 ValueType*& GetLink(ValueType* value) const; 212 }; 213 214 typedef BOpenHashTable<TransactionHash> TransactionTable; 215 216 217 struct block_cache : DoublyLinkedListLinkImpl<block_cache> { 218 BlockTable* hash; 219 mutex lock; 220 int fd; 221 off_t max_blocks; 222 size_t block_size; 223 int32 next_transaction_id; 224 cache_transaction* last_transaction; 225 TransactionTable* transaction_hash; 226 227 object_cache* buffer_cache; 228 block_list unused_blocks; 229 uint32 unused_block_count; 230 231 ConditionVariable busy_reading_condition; 232 uint32 busy_reading_count; 233 bool busy_reading_waiters; 234 235 ConditionVariable busy_writing_condition; 236 uint32 busy_writing_count; 237 bool busy_writing_waiters; 238 239 bigtime_t last_block_write; 240 bigtime_t last_block_write_duration; 241 242 uint32 num_dirty_blocks; 243 bool read_only; 244 245 NotificationList pending_notifications; 246 ConditionVariable condition_variable; 247 248 block_cache(int fd, off_t numBlocks, size_t blockSize, 249 bool readOnly); 250 ~block_cache(); 251 252 status_t Init(); 253 254 void Free(void* buffer); 255 void* Allocate(); 256 void FreeBlock(cached_block* block); 257 cached_block* NewBlock(off_t blockNumber); 258 void FreeBlockParentData(cached_block* block); 259 260 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0); 261 void RemoveBlock(cached_block* block); 262 void DiscardBlock(cached_block* block); 263 264 private: 265 static void _LowMemoryHandler(void* data, uint32 resources, 266 int32 level); 267 cached_block* _GetUnusedBlock(); 268 }; 269 270 struct cache_transaction { 271 cache_transaction(); 272 273 cache_transaction* next; 274 int32 id; 275 int32 num_blocks; 276 int32 main_num_blocks; 277 int32 sub_num_blocks; 278 cached_block* first_block; 279 block_list blocks; 280 ListenerList listeners; 281 bool open; 282 bool has_sub_transaction; 283 bigtime_t last_used; 284 int32 busy_writing_count; 285 }; 286 287 288 class BlockWriter { 289 public: 290 BlockWriter(block_cache* cache, 291 size_t max = SIZE_MAX); 292 ~BlockWriter(); 293 294 bool Add(cached_block* block, 295 cache_transaction* transaction = NULL); 296 bool Add(cache_transaction* transaction, 297 bool& hasLeftOvers); 298 299 status_t Write(cache_transaction* transaction = NULL, 300 bool canUnlock = true); 301 302 bool DeletedTransaction() const 303 { return fDeletedTransaction; } 304 305 static status_t WriteBlock(block_cache* cache, 306 cached_block* block); 307 308 private: 309 void* _Data(cached_block* block) const; 310 status_t _WriteBlocks(cached_block** blocks, uint32 count); 311 void _BlockDone(cached_block* block, 312 cache_transaction* transaction); 313 void _UnmarkWriting(cached_block* block); 314 315 static int _CompareBlocks(const void* _blockA, 316 const void* _blockB); 317 318 private: 319 static const size_t kBufferSize = 64; 320 321 block_cache* fCache; 322 cached_block* fBuffer[kBufferSize]; 323 cached_block** fBlocks; 324 size_t fCount; 325 size_t fTotal; 326 size_t fCapacity; 327 size_t fMax; 328 status_t fStatus; 329 bool fDeletedTransaction; 330 }; 331 332 333 #ifndef BUILDING_USERLAND_FS_SERVER 334 class BlockPrefetcher { 335 public: 336 BlockPrefetcher(block_cache* cache, off_t fBlockNumber, 337 size_t numBlocks); 338 ~BlockPrefetcher(); 339 340 status_t Allocate(); 341 status_t ReadAsync(); 342 343 static status_t IterativeIOGetVecsHook(void* cookie, io_request* request, 344 off_t offset, size_t size, struct file_io_vec* vecs, 345 size_t* _count); 346 static status_t IterativeIOFinishedHook(void* cookie, io_request* request, 347 status_t status, bool partialTransfer, 348 size_t bytesTransferred); 349 350 size_t NumAllocated() { return fNumAllocated; } 351 352 private: 353 void _RemoveAllocated(size_t unbusyCount, size_t removeCount); 354 355 private: 356 block_cache* fCache; 357 off_t fBlockNumber; 358 size_t fNumRequested; 359 size_t fNumAllocated; 360 cached_block** fBlocks; 361 generic_io_vec* fDestVecs; 362 }; 363 #endif // !BUILDING_USERLAND_FS_SERVER 364 365 366 class TransactionLocking { 367 public: 368 inline bool Lock(block_cache* cache) 369 { 370 mutex_lock(&cache->lock); 371 372 while (cache->busy_writing_count != 0) { 373 // wait for all blocks to be written 374 ConditionVariableEntry entry; 375 cache->busy_writing_condition.Add(&entry); 376 cache->busy_writing_waiters = true; 377 378 mutex_unlock(&cache->lock); 379 380 entry.Wait(); 381 382 mutex_lock(&cache->lock); 383 } 384 385 return true; 386 } 387 388 inline void Unlock(block_cache* cache) 389 { 390 mutex_unlock(&cache->lock); 391 } 392 }; 393 394 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker; 395 396 } // namespace 397 398 399 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 400 namespace BlockTracing { 401 402 class Action : public AbstractTraceEntry { 403 public: 404 Action(block_cache* cache, cached_block* block) 405 : 406 fCache(cache), 407 fBlockNumber(block->block_number), 408 fIsDirty(block->is_dirty), 409 fHasOriginal(block->original_data != NULL), 410 fHasParent(block->parent_data != NULL), 411 fTransactionID(-1), 412 fPreviousID(-1) 413 { 414 if (block->transaction != NULL) 415 fTransactionID = block->transaction->id; 416 if (block->previous_transaction != NULL) 417 fPreviousID = block->previous_transaction->id; 418 } 419 420 virtual void AddDump(TraceOutput& out) 421 { 422 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32 423 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber, 424 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-', 425 fHasParent ? 'p' : '-', fTransactionID, fPreviousID); 426 } 427 428 virtual const char* _Action() const = 0; 429 430 private: 431 block_cache* fCache; 432 uint64 fBlockNumber; 433 bool fIsDirty; 434 bool fHasOriginal; 435 bool fHasParent; 436 int32 fTransactionID; 437 int32 fPreviousID; 438 }; 439 440 class Get : public Action { 441 public: 442 Get(block_cache* cache, cached_block* block) 443 : 444 Action(cache, block) 445 { 446 Initialized(); 447 } 448 449 virtual const char* _Action() const { return "get"; } 450 }; 451 452 class Put : public Action { 453 public: 454 Put(block_cache* cache, cached_block* block) 455 : 456 Action(cache, block) 457 { 458 Initialized(); 459 } 460 461 virtual const char* _Action() const { return "put"; } 462 }; 463 464 class Read : public Action { 465 public: 466 Read(block_cache* cache, cached_block* block) 467 : 468 Action(cache, block) 469 { 470 Initialized(); 471 } 472 473 virtual const char* _Action() const { return "read"; } 474 }; 475 476 class Write : public Action { 477 public: 478 Write(block_cache* cache, cached_block* block) 479 : 480 Action(cache, block) 481 { 482 Initialized(); 483 } 484 485 virtual const char* _Action() const { return "write"; } 486 }; 487 488 class Flush : public Action { 489 public: 490 Flush(block_cache* cache, cached_block* block, bool getUnused = false) 491 : 492 Action(cache, block), 493 fGetUnused(getUnused) 494 { 495 Initialized(); 496 } 497 498 virtual const char* _Action() const 499 { return fGetUnused ? "get-unused" : "flush"; } 500 501 private: 502 bool fGetUnused; 503 }; 504 505 class Error : public AbstractTraceEntry { 506 public: 507 Error(block_cache* cache, uint64 blockNumber, const char* message, 508 status_t status = B_OK) 509 : 510 fCache(cache), 511 fBlockNumber(blockNumber), 512 fMessage(message), 513 fStatus(status) 514 { 515 Initialized(); 516 } 517 518 virtual void AddDump(TraceOutput& out) 519 { 520 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s", 521 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "", 522 fStatus != B_OK ? strerror(fStatus) : ""); 523 } 524 525 private: 526 block_cache* fCache; 527 uint64 fBlockNumber; 528 const char* fMessage; 529 status_t fStatus; 530 }; 531 532 #if BLOCK_CACHE_BLOCK_TRACING >= 2 533 class BlockData : public AbstractTraceEntry { 534 public: 535 enum { 536 kCurrent = 0x01, 537 kParent = 0x02, 538 kOriginal = 0x04 539 }; 540 541 BlockData(block_cache* cache, cached_block* block, const char* message) 542 : 543 fCache(cache), 544 fSize(cache->block_size), 545 fBlockNumber(block->block_number), 546 fMessage(message) 547 { 548 _Allocate(fCurrent, block->current_data); 549 _Allocate(fParent, block->parent_data); 550 _Allocate(fOriginal, block->original_data); 551 552 #if KTRACE_PRINTF_STACK_TRACE 553 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 554 false); 555 #endif 556 557 Initialized(); 558 } 559 560 virtual void AddDump(TraceOutput& out) 561 { 562 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s", 563 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-', 564 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-', 565 fMessage); 566 } 567 568 #if KTRACE_PRINTF_STACK_TRACE 569 virtual void DumpStackTrace(TraceOutput& out) 570 { 571 out.PrintStackTrace(fStackTrace); 572 } 573 #endif 574 575 void DumpBlocks(uint32 which, uint32 offset, uint32 size) 576 { 577 if ((which & kCurrent) != 0) 578 DumpBlock(kCurrent, offset, size); 579 if ((which & kParent) != 0) 580 DumpBlock(kParent, offset, size); 581 if ((which & kOriginal) != 0) 582 DumpBlock(kOriginal, offset, size); 583 } 584 585 void DumpBlock(uint32 which, uint32 offset, uint32 size) 586 { 587 if (offset > fSize) { 588 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize); 589 return; 590 } 591 if (offset + size > fSize) 592 size = fSize - offset; 593 594 const char* label; 595 uint8* data; 596 597 if ((which & kCurrent) != 0) { 598 label = "current"; 599 data = fCurrent; 600 } else if ((which & kParent) != 0) { 601 label = "parent"; 602 data = fParent; 603 } else if ((which & kOriginal) != 0) { 604 label = "original"; 605 data = fOriginal; 606 } else 607 return; 608 609 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size); 610 611 static const uint32 kBlockSize = 16; 612 data += offset; 613 614 for (uint32 i = 0; i < size;) { 615 int start = i; 616 617 kprintf(" %04" B_PRIx32 " ", i); 618 for (; i < start + kBlockSize; i++) { 619 if (!(i % 4)) 620 kprintf(" "); 621 622 if (i >= size) 623 kprintf(" "); 624 else 625 kprintf("%02x", data[i]); 626 } 627 628 kprintf("\n"); 629 } 630 } 631 632 private: 633 void _Allocate(uint8*& target, void* source) 634 { 635 if (source == NULL) { 636 target = NULL; 637 return; 638 } 639 640 target = alloc_tracing_buffer_memcpy(source, fSize, false); 641 } 642 643 block_cache* fCache; 644 uint32 fSize; 645 uint64 fBlockNumber; 646 const char* fMessage; 647 uint8* fCurrent; 648 uint8* fParent; 649 uint8* fOriginal; 650 #if KTRACE_PRINTF_STACK_TRACE 651 tracing_stack_trace* fStackTrace; 652 #endif 653 }; 654 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 655 656 } // namespace BlockTracing 657 658 # define TB(x) new(std::nothrow) BlockTracing::x; 659 #else 660 # define TB(x) ; 661 #endif 662 663 #if BLOCK_CACHE_BLOCK_TRACING >= 2 664 # define TB2(x) new(std::nothrow) BlockTracing::x; 665 #else 666 # define TB2(x) ; 667 #endif 668 669 670 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 671 namespace TransactionTracing { 672 673 class Action : public AbstractTraceEntry { 674 public: 675 Action(const char* label, block_cache* cache, 676 cache_transaction* transaction) 677 : 678 fCache(cache), 679 fTransaction(transaction), 680 fID(transaction->id), 681 fSub(transaction->has_sub_transaction), 682 fNumBlocks(transaction->num_blocks), 683 fSubNumBlocks(transaction->sub_num_blocks) 684 { 685 strlcpy(fLabel, label, sizeof(fLabel)); 686 Initialized(); 687 } 688 689 virtual void AddDump(TraceOutput& out) 690 { 691 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s" 692 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction, 693 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks); 694 } 695 696 private: 697 char fLabel[12]; 698 block_cache* fCache; 699 cache_transaction* fTransaction; 700 int32 fID; 701 bool fSub; 702 int32 fNumBlocks; 703 int32 fSubNumBlocks; 704 }; 705 706 class Detach : public AbstractTraceEntry { 707 public: 708 Detach(block_cache* cache, cache_transaction* transaction, 709 cache_transaction* newTransaction) 710 : 711 fCache(cache), 712 fTransaction(transaction), 713 fID(transaction->id), 714 fSub(transaction->has_sub_transaction), 715 fNewTransaction(newTransaction), 716 fNewID(newTransaction->id) 717 { 718 Initialized(); 719 } 720 721 virtual void AddDump(TraceOutput& out) 722 { 723 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")" 724 "from transaction %p (id %" B_PRId32 ")%s", 725 fCache, fNewTransaction, fNewID, fTransaction, fID, 726 fSub ? " sub" : ""); 727 } 728 729 private: 730 block_cache* fCache; 731 cache_transaction* fTransaction; 732 int32 fID; 733 bool fSub; 734 cache_transaction* fNewTransaction; 735 int32 fNewID; 736 }; 737 738 class Abort : public AbstractTraceEntry { 739 public: 740 Abort(block_cache* cache, cache_transaction* transaction) 741 : 742 fCache(cache), 743 fTransaction(transaction), 744 fID(transaction->id), 745 fNumBlocks(0) 746 { 747 bool isSub = transaction->has_sub_transaction; 748 fNumBlocks = isSub ? transaction->sub_num_blocks 749 : transaction->num_blocks; 750 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t)); 751 if (fBlocks != NULL) { 752 cached_block* block = transaction->first_block; 753 for (int32 i = 0; block != NULL && i < fNumBlocks; 754 block = block->transaction_next) { 755 fBlocks[i++] = block->block_number; 756 } 757 } else 758 fNumBlocks = 0; 759 760 #if KTRACE_PRINTF_STACK_TRACE 761 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 762 false); 763 #endif 764 765 Initialized(); 766 } 767 768 virtual void AddDump(TraceOutput& out) 769 { 770 out.Print("block cache %p, abort transaction " 771 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID); 772 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++) 773 out.Print(" %" B_PRIdOFF, fBlocks[i]); 774 } 775 776 #if KTRACE_PRINTF_STACK_TRACE 777 virtual void DumpStackTrace(TraceOutput& out) 778 { 779 out.PrintStackTrace(fStackTrace); 780 } 781 #endif 782 783 private: 784 block_cache* fCache; 785 cache_transaction* fTransaction; 786 int32 fID; 787 off_t* fBlocks; 788 int32 fNumBlocks; 789 #if KTRACE_PRINTF_STACK_TRACE 790 tracing_stack_trace* fStackTrace; 791 #endif 792 }; 793 794 } // namespace TransactionTracing 795 796 # define T(x) new(std::nothrow) TransactionTracing::x; 797 #else 798 # define T(x) ; 799 #endif 800 801 802 static DoublyLinkedList<block_cache> sCaches; 803 static mutex sCachesLock = MUTEX_INITIALIZER("block caches"); 804 static mutex sCachesMemoryUseLock 805 = MUTEX_INITIALIZER("block caches memory use"); 806 static size_t sUsedMemory; 807 static sem_id sEventSemaphore; 808 static mutex sNotificationsLock 809 = MUTEX_INITIALIZER("block cache notifications"); 810 static thread_id sNotifierWriterThread; 811 static DoublyLinkedListLink<block_cache> sMarkCache; 812 // TODO: this only works if the link is the first entry of block_cache 813 static object_cache* sBlockCache; 814 815 816 static void mark_block_busy_reading(block_cache* cache, cached_block* block); 817 static void mark_block_unbusy_reading(block_cache* cache, cached_block* block); 818 819 820 // #pragma mark - notifications/listener 821 822 823 /*! Checks whether or not this is an event that closes a transaction. */ 824 static inline bool 825 is_closing_event(int32 event) 826 { 827 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0; 828 } 829 830 831 static inline bool 832 is_written_event(int32 event) 833 { 834 return (event & TRANSACTION_WRITTEN) != 0; 835 } 836 837 838 /*! From the specified \a notification, it will remove the lowest pending 839 event, and return that one in \a _event. 840 If there is no pending event anymore, it will return \c false. 841 */ 842 static bool 843 get_next_pending_event(cache_notification* notification, int32* _event) 844 { 845 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) { 846 int32 pending = atomic_and(¬ification->events_pending, 847 ~eventMask); 848 849 bool more = (pending & ~eventMask) != 0; 850 851 if ((pending & eventMask) != 0) { 852 *_event = eventMask; 853 return more; 854 } 855 } 856 857 return false; 858 } 859 860 861 static void 862 flush_pending_notifications(block_cache* cache) 863 { 864 ASSERT_LOCKED_MUTEX(&sCachesLock); 865 866 while (true) { 867 MutexLocker locker(sNotificationsLock); 868 869 cache_notification* notification = cache->pending_notifications.Head(); 870 if (notification == NULL) 871 return; 872 873 bool deleteAfterEvent = false; 874 int32 event = -1; 875 if (!get_next_pending_event(notification, &event)) { 876 // remove the notification if this was the last pending event 877 cache->pending_notifications.Remove(notification); 878 deleteAfterEvent = notification->delete_after_event; 879 } 880 881 if (event >= 0) { 882 // Notify listener, we need to copy the notification, as it might 883 // be removed when we unlock the list. 884 cache_notification copy = *notification; 885 locker.Unlock(); 886 887 copy.hook(copy.transaction_id, event, copy.data); 888 889 locker.Lock(); 890 } 891 892 if (deleteAfterEvent) 893 delete notification; 894 } 895 } 896 897 898 /*! Flushes all pending notifications by calling the appropriate hook 899 functions. 900 Must not be called with a cache lock held. 901 */ 902 static void 903 flush_pending_notifications() 904 { 905 MutexLocker _(sCachesLock); 906 907 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 908 while (iterator.HasNext()) { 909 block_cache* cache = iterator.Next(); 910 911 flush_pending_notifications(cache); 912 } 913 } 914 915 916 /*! Initializes the \a notification as specified. */ 917 static void 918 set_notification(cache_transaction* transaction, 919 cache_notification ¬ification, int32 events, 920 transaction_notification_hook hook, void* data) 921 { 922 notification.transaction_id = transaction != NULL ? transaction->id : -1; 923 notification.events_pending = 0; 924 notification.events = events; 925 notification.hook = hook; 926 notification.data = data; 927 notification.delete_after_event = false; 928 } 929 930 931 /*! Makes sure the notification is deleted. It either deletes it directly, 932 when possible, or marks it for deletion if the notification is pending. 933 */ 934 static void 935 delete_notification(cache_notification* notification) 936 { 937 MutexLocker locker(sNotificationsLock); 938 939 if (notification->events_pending != 0) 940 notification->delete_after_event = true; 941 else 942 delete notification; 943 } 944 945 946 /*! Adds the notification to the pending notifications list, or, if it's 947 already part of it, updates its events_pending field. 948 Also marks the notification to be deleted if \a deleteNotification 949 is \c true. 950 Triggers the notifier thread to run. 951 */ 952 static void 953 add_notification(block_cache* cache, cache_notification* notification, 954 int32 event, bool deleteNotification) 955 { 956 if (notification->hook == NULL) 957 return; 958 959 int32 pending = atomic_or(¬ification->events_pending, event); 960 if (pending == 0) { 961 // not yet part of the notification list 962 MutexLocker locker(sNotificationsLock); 963 if (deleteNotification) 964 notification->delete_after_event = true; 965 cache->pending_notifications.Add(notification); 966 } else if (deleteNotification) { 967 // we might need to delete it ourselves if we're late 968 delete_notification(notification); 969 } 970 971 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE); 972 // We're probably still holding some locks that makes rescheduling 973 // not a good idea at this point. 974 } 975 976 977 /*! Notifies all interested listeners of this transaction about the \a event. 978 If \a event is a closing event (ie. TRANSACTION_ENDED, and 979 TRANSACTION_ABORTED), all listeners except those listening to 980 TRANSACTION_WRITTEN will be removed. 981 */ 982 static void 983 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction, 984 int32 event) 985 { 986 T(Action("notify", cache, transaction)); 987 988 bool isClosing = is_closing_event(event); 989 bool isWritten = is_written_event(event); 990 991 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 992 while (iterator.HasNext()) { 993 cache_listener* listener = iterator.Next(); 994 995 bool remove = (isClosing && !is_written_event(listener->events)) 996 || (isWritten && is_written_event(listener->events)); 997 if (remove) 998 iterator.Remove(); 999 1000 if ((listener->events & event) != 0) 1001 add_notification(cache, listener, event, remove); 1002 else if (remove) 1003 delete_notification(listener); 1004 } 1005 } 1006 1007 1008 /*! Removes and deletes all listeners that are still monitoring this 1009 transaction. 1010 */ 1011 static void 1012 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction) 1013 { 1014 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 1015 while (iterator.HasNext()) { 1016 cache_listener* listener = iterator.Next(); 1017 iterator.Remove(); 1018 1019 delete_notification(listener); 1020 } 1021 } 1022 1023 1024 static status_t 1025 add_transaction_listener(block_cache* cache, cache_transaction* transaction, 1026 int32 events, transaction_notification_hook hookFunction, void* data) 1027 { 1028 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 1029 while (iterator.HasNext()) { 1030 cache_listener* listener = iterator.Next(); 1031 1032 if (listener->data == data && listener->hook == hookFunction) { 1033 // this listener already exists, just update it 1034 listener->events |= events; 1035 return B_OK; 1036 } 1037 } 1038 1039 cache_listener* listener = new cache_listener; 1040 if (listener == NULL) 1041 return B_NO_MEMORY; 1042 1043 set_notification(transaction, *listener, events, hookFunction, data); 1044 transaction->listeners.Add(listener); 1045 return B_OK; 1046 } 1047 1048 1049 // #pragma mark - private transaction 1050 1051 1052 cache_transaction::cache_transaction() 1053 { 1054 num_blocks = 0; 1055 main_num_blocks = 0; 1056 sub_num_blocks = 0; 1057 first_block = NULL; 1058 open = true; 1059 has_sub_transaction = false; 1060 last_used = system_time(); 1061 busy_writing_count = 0; 1062 } 1063 1064 1065 static void 1066 delete_transaction(block_cache* cache, cache_transaction* transaction) 1067 { 1068 if (cache->last_transaction == transaction) 1069 cache->last_transaction = NULL; 1070 1071 remove_transaction_listeners(cache, transaction); 1072 delete transaction; 1073 } 1074 1075 1076 static cache_transaction* 1077 lookup_transaction(block_cache* cache, int32 id) 1078 { 1079 return cache->transaction_hash->Lookup(id); 1080 } 1081 1082 1083 size_t TransactionHash::Hash(cache_transaction* transaction) const 1084 { 1085 return transaction->id; 1086 } 1087 1088 1089 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const 1090 { 1091 return transaction->id == key; 1092 } 1093 1094 1095 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const 1096 { 1097 return value->next; 1098 } 1099 1100 1101 /*! Writes back any changes made to blocks in \a transaction that are still 1102 part of a previous transacton. 1103 */ 1104 static status_t 1105 write_blocks_in_previous_transaction(block_cache* cache, 1106 cache_transaction* transaction) 1107 { 1108 BlockWriter writer(cache); 1109 1110 cached_block* block = transaction->first_block; 1111 for (; block != NULL; block = block->transaction_next) { 1112 if (block->previous_transaction != NULL) { 1113 // need to write back pending changes 1114 writer.Add(block); 1115 } 1116 } 1117 1118 return writer.Write(); 1119 } 1120 1121 1122 // #pragma mark - cached_block 1123 1124 1125 bool 1126 cached_block::CanBeWritten() const 1127 { 1128 return !busy_writing && !busy_reading 1129 && (previous_transaction != NULL 1130 || (transaction == NULL && is_dirty && !is_writing)); 1131 } 1132 1133 1134 // #pragma mark - BlockWriter 1135 1136 1137 BlockWriter::BlockWriter(block_cache* cache, size_t max) 1138 : 1139 fCache(cache), 1140 fBlocks(fBuffer), 1141 fCount(0), 1142 fTotal(0), 1143 fCapacity(kBufferSize), 1144 fMax(max), 1145 fStatus(B_OK), 1146 fDeletedTransaction(false) 1147 { 1148 } 1149 1150 1151 BlockWriter::~BlockWriter() 1152 { 1153 if (fBlocks != fBuffer) 1154 free(fBlocks); 1155 } 1156 1157 1158 /*! Adds the specified block to the to be written array. If no more blocks can 1159 be added, false is returned, otherwise true. 1160 */ 1161 bool 1162 BlockWriter::Add(cached_block* block, cache_transaction* transaction) 1163 { 1164 ASSERT(block->CanBeWritten()); 1165 1166 if (fTotal == fMax) 1167 return false; 1168 1169 if (fCount >= fCapacity) { 1170 // Enlarge array if necessary 1171 cached_block** newBlocks; 1172 size_t newCapacity = max_c(256, fCapacity * 2); 1173 if (fBlocks == fBuffer) 1174 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*)); 1175 else { 1176 newBlocks = (cached_block**)realloc(fBlocks, 1177 newCapacity * sizeof(void*)); 1178 } 1179 1180 if (newBlocks == NULL) { 1181 // Allocating a larger array failed - we need to write back what 1182 // we have synchronously now (this will also clear the array) 1183 Write(transaction, false); 1184 } else { 1185 if (fBlocks == fBuffer) 1186 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*)); 1187 1188 fBlocks = newBlocks; 1189 fCapacity = newCapacity; 1190 } 1191 } 1192 1193 fBlocks[fCount++] = block; 1194 fTotal++; 1195 block->busy_writing = true; 1196 fCache->busy_writing_count++; 1197 if (block->previous_transaction != NULL) 1198 block->previous_transaction->busy_writing_count++; 1199 1200 return true; 1201 } 1202 1203 1204 /*! Adds all blocks of the specified transaction to the to be written array. 1205 If no more blocks can be added, false is returned, otherwise true. 1206 */ 1207 bool 1208 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers) 1209 { 1210 ASSERT(!transaction->open); 1211 1212 if (transaction->busy_writing_count != 0) { 1213 hasLeftOvers = true; 1214 return true; 1215 } 1216 1217 hasLeftOvers = false; 1218 1219 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 1220 while (cached_block* block = blockIterator.Next()) { 1221 if (!block->CanBeWritten()) { 1222 // This block was already part of a previous transaction within this 1223 // writer 1224 hasLeftOvers = true; 1225 continue; 1226 } 1227 if (!Add(block, transaction)) 1228 return false; 1229 1230 if (DeletedTransaction()) 1231 break; 1232 } 1233 1234 return true; 1235 } 1236 1237 1238 /*! Cache must be locked when calling this method, but it will be unlocked 1239 while the blocks are written back. 1240 */ 1241 status_t 1242 BlockWriter::Write(cache_transaction* transaction, bool canUnlock) 1243 { 1244 if (fCount == 0) 1245 return B_OK; 1246 1247 if (canUnlock) 1248 mutex_unlock(&fCache->lock); 1249 1250 // Sort blocks in their on-disk order, so we can merge consecutive writes. 1251 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks); 1252 fDeletedTransaction = false; 1253 1254 bigtime_t start = system_time(); 1255 1256 for (uint32 i = 0; i < fCount; i++) { 1257 uint32 blocks = 1; 1258 for (; (i + blocks) < fCount && blocks < IOV_MAX; blocks++) { 1259 const uint32 j = i + blocks; 1260 if (fBlocks[j]->block_number != (fBlocks[j - 1]->block_number + 1)) 1261 break; 1262 } 1263 1264 status_t status = _WriteBlocks(fBlocks + i, blocks); 1265 if (status != B_OK) { 1266 // propagate to global error handling 1267 if (fStatus == B_OK) 1268 fStatus = status; 1269 1270 for (uint32 j = i; j < (i + blocks); j++) { 1271 _UnmarkWriting(fBlocks[j]); 1272 fBlocks[j] = NULL; 1273 // This block will not be marked clean 1274 } 1275 } 1276 1277 i += (blocks - 1); 1278 } 1279 1280 bigtime_t finish = system_time(); 1281 1282 if (canUnlock) 1283 mutex_lock(&fCache->lock); 1284 1285 if (fStatus == B_OK && fCount >= 8) { 1286 fCache->last_block_write = finish; 1287 fCache->last_block_write_duration = (fCache->last_block_write - start) 1288 / fCount; 1289 } 1290 1291 for (uint32 i = 0; i < fCount; i++) 1292 _BlockDone(fBlocks[i], transaction); 1293 1294 fCount = 0; 1295 return fStatus; 1296 } 1297 1298 1299 /*! Writes the specified \a block back to disk. It will always only write back 1300 the oldest change of the block if it is part of more than one transaction. 1301 It will automatically send out TRANSACTION_WRITTEN notices, as well as 1302 delete transactions when they are no longer used, and \a deleteTransaction 1303 is \c true. 1304 */ 1305 /*static*/ status_t 1306 BlockWriter::WriteBlock(block_cache* cache, cached_block* block) 1307 { 1308 BlockWriter writer(cache); 1309 1310 writer.Add(block); 1311 return writer.Write(); 1312 } 1313 1314 1315 void* 1316 BlockWriter::_Data(cached_block* block) const 1317 { 1318 return block->previous_transaction != NULL && block->original_data != NULL 1319 ? block->original_data : block->current_data; 1320 // We first need to write back changes from previous transactions 1321 } 1322 1323 1324 status_t 1325 BlockWriter::_WriteBlocks(cached_block** blocks, uint32 count) 1326 { 1327 const size_t blockSize = fCache->block_size; 1328 1329 BStackOrHeapArray<iovec, 8> vecs(count); 1330 for (uint32 i = 0; i < count; i++) { 1331 cached_block* block = blocks[i]; 1332 ASSERT(block->busy_writing); 1333 ASSERT(i == 0 || block->block_number == (blocks[i - 1]->block_number + 1)); 1334 1335 TRACE(("BlockWriter::_WriteBlocks(block %" B_PRIdOFF ", count %" B_PRIu32 ")\n", 1336 block->block_number, count)); 1337 TB(Write(fCache, block)); 1338 TB2(BlockData(fCache, block, "before write")); 1339 1340 vecs[i].iov_base = _Data(block); 1341 vecs[i].iov_len = blockSize; 1342 } 1343 1344 ssize_t written = writev_pos(fCache->fd, 1345 blocks[0]->block_number * blockSize, vecs, count); 1346 1347 if (written != (ssize_t)(blockSize * count)) { 1348 TB(Error(fCache, block->block_number, "write failed", written)); 1349 TRACE_ALWAYS("could not write back %" B_PRIu32 " blocks (start block %" B_PRIdOFF "): %s\n", 1350 count, blocks[0]->block_number, strerror(errno)); 1351 if (written < 0) 1352 return errno; 1353 1354 return B_IO_ERROR; 1355 } 1356 1357 return B_OK; 1358 } 1359 1360 1361 void 1362 BlockWriter::_BlockDone(cached_block* block, 1363 cache_transaction* transaction) 1364 { 1365 if (block == NULL) { 1366 // An error occured when trying to write this block 1367 return; 1368 } 1369 1370 if (fCache->num_dirty_blocks > 0) 1371 fCache->num_dirty_blocks--; 1372 1373 if (_Data(block) == block->current_data) 1374 block->is_dirty = false; 1375 1376 _UnmarkWriting(block); 1377 1378 cache_transaction* previous = block->previous_transaction; 1379 if (previous != NULL) { 1380 previous->blocks.Remove(block); 1381 block->previous_transaction = NULL; 1382 1383 if (block->original_data != NULL && block->transaction == NULL) { 1384 // This block is not part of a transaction, so it does not need 1385 // its original pointer anymore. 1386 fCache->Free(block->original_data); 1387 block->original_data = NULL; 1388 } 1389 1390 // Has the previous transaction been finished with that write? 1391 if (--previous->num_blocks == 0) { 1392 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id)); 1393 T(Action("written", fCache, previous)); 1394 1395 notify_transaction_listeners(fCache, previous, 1396 TRANSACTION_WRITTEN); 1397 1398 if (transaction != NULL) { 1399 // This function is called while iterating transaction_hash. We 1400 // use RemoveUnchecked so the iterator is still valid. A regular 1401 // Remove can trigger a resize of the hash table which would 1402 // result in the linked items in the table changing order. 1403 fCache->transaction_hash->RemoveUnchecked(transaction); 1404 } else 1405 fCache->transaction_hash->Remove(previous); 1406 1407 delete_transaction(fCache, previous); 1408 fDeletedTransaction = true; 1409 } 1410 } 1411 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) { 1412 // the block is no longer used 1413 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1414 block->unused = true; 1415 fCache->unused_blocks.Add(block); 1416 fCache->unused_block_count++; 1417 } 1418 1419 TB2(BlockData(fCache, block, "after write")); 1420 } 1421 1422 1423 void 1424 BlockWriter::_UnmarkWriting(cached_block* block) 1425 { 1426 block->busy_writing = false; 1427 if (block->previous_transaction != NULL) 1428 block->previous_transaction->busy_writing_count--; 1429 fCache->busy_writing_count--; 1430 1431 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0) 1432 || block->busy_writing_waiters) { 1433 fCache->busy_writing_waiters = false; 1434 block->busy_writing_waiters = false; 1435 fCache->busy_writing_condition.NotifyAll(); 1436 } 1437 } 1438 1439 1440 /*static*/ int 1441 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB) 1442 { 1443 cached_block* blockA = *(cached_block**)_blockA; 1444 cached_block* blockB = *(cached_block**)_blockB; 1445 1446 off_t diff = blockA->block_number - blockB->block_number; 1447 if (diff > 0) 1448 return 1; 1449 1450 return diff < 0 ? -1 : 0; 1451 } 1452 1453 1454 #ifndef BUILDING_USERLAND_FS_SERVER 1455 // #pragma mark - BlockPrefetcher 1456 1457 1458 BlockPrefetcher::BlockPrefetcher(block_cache* cache, off_t blockNumber, size_t numBlocks) 1459 : 1460 fCache(cache), 1461 fBlockNumber(blockNumber), 1462 fNumRequested(numBlocks), 1463 fNumAllocated(0) 1464 { 1465 fBlocks = new cached_block*[numBlocks]; 1466 fDestVecs = new generic_io_vec[numBlocks]; 1467 } 1468 1469 1470 BlockPrefetcher::~BlockPrefetcher() 1471 { 1472 delete[] fBlocks; 1473 delete[] fDestVecs; 1474 } 1475 1476 1477 /*! Allocates cached_block objects in preparation for prefetching. 1478 @return If an error is returned, then no blocks have been allocated. 1479 @post Blocks have been constructed (including allocating the current_data member) 1480 but current_data is uninitialized. 1481 */ 1482 status_t 1483 BlockPrefetcher::Allocate() 1484 { 1485 TRACE(("BlockPrefetcher::Allocate: looking up %" B_PRIuSIZE " blocks, starting with %" 1486 B_PRIdOFF "\n", fNumBlocks, fBlockNumber)); 1487 1488 ASSERT_LOCKED_MUTEX(&fCache->lock); 1489 1490 size_t finalNumBlocks = fNumRequested; 1491 1492 // determine whether any requested blocks are already cached 1493 for (size_t i = 0; i < fNumRequested; ++i) { 1494 off_t blockNumIter = fBlockNumber + i; 1495 if (blockNumIter < 0 || blockNumIter >= fCache->max_blocks) { 1496 panic("BlockPrefetcher::Allocate: invalid block number %" B_PRIdOFF " (max %" 1497 B_PRIdOFF ")", blockNumIter, fCache->max_blocks - 1); 1498 return B_BAD_VALUE; 1499 } 1500 cached_block* block = fCache->hash->Lookup(blockNumIter); 1501 if (block != NULL) { 1502 // truncate the request 1503 TRACE(("BlockPrefetcher::Allocate: found an existing block (%" B_PRIdOFF ")\n", 1504 blockNumIter)); 1505 fBlocks[i] = NULL; 1506 finalNumBlocks = i; 1507 break; 1508 } 1509 } 1510 1511 // allocate the blocks 1512 for (size_t i = 0; i < finalNumBlocks; ++i) { 1513 cached_block* block = fCache->NewBlock(fBlockNumber + i); 1514 if (block == NULL) { 1515 _RemoveAllocated(0, i); 1516 return B_NO_MEMORY; 1517 } 1518 fCache->hash->Insert(block); 1519 1520 block->unused = true; 1521 fCache->unused_blocks.Add(block); 1522 fCache->unused_block_count++; 1523 1524 fBlocks[i] = block; 1525 } 1526 1527 fNumAllocated = finalNumBlocks; 1528 1529 return B_OK; 1530 } 1531 1532 1533 /*! Schedules reads from disk to cache. 1534 @return If an error is returned, then the previously allocated blocks have been cleaned up. 1535 @post The calling object will eventually be deleted by IterativeIOFinishedHook. 1536 */ 1537 status_t 1538 BlockPrefetcher::ReadAsync() 1539 { 1540 TRACE(("BlockPrefetcher::Read: reading %" B_PRIuSIZE " blocks\n", fNumAllocated)); 1541 1542 size_t blockSize = fCache->block_size; 1543 generic_io_vec* vecs = fDestVecs; 1544 for (size_t i = 0; i < fNumAllocated; ++i) { 1545 vecs[i].base = reinterpret_cast<generic_addr_t>(fBlocks[i]->current_data); 1546 vecs[i].length = blockSize; 1547 mark_block_busy_reading(fCache, fBlocks[i]); 1548 } 1549 1550 IORequest* request = new IORequest; 1551 status_t status = request->Init(fBlockNumber * blockSize, vecs, fNumAllocated, 1552 fNumAllocated * blockSize, false, B_DELETE_IO_REQUEST); 1553 if (status != B_OK) { 1554 TB(Error(fCache, fBlockNumber, "IORequest::Init starting here failed", status)); 1555 TRACE_ALWAYS("BlockPrefetcher::Read: failed to initialize IO request for %" B_PRIuSIZE 1556 " blocks starting with %" B_PRIdOFF ": %s\n", 1557 fNumAllocated, fBlockNumber, strerror(status)); 1558 _RemoveAllocated(fNumAllocated, fNumAllocated); 1559 delete request; 1560 return status; 1561 } 1562 1563 return do_iterative_fd_io(fCache->fd, request, IterativeIOGetVecsHook, IterativeIOFinishedHook, 1564 this); 1565 } 1566 1567 1568 /*static*/ status_t 1569 BlockPrefetcher::IterativeIOGetVecsHook(void* cookie, io_request* request, off_t offset, 1570 size_t size, struct file_io_vec* vecs, size_t* _count) 1571 { 1572 TRACE(("BlockPrefetcher::IterativeIOGetVecsHook: setting offset %" B_PRIdOFF " and length %" 1573 B_PRIuSIZE "\n", offset, size)); 1574 1575 if (*_count == 0) 1576 return B_OK; 1577 1578 vecs[0].offset = offset; 1579 // the requested offset was volume-relative to begin with 1580 vecs[0].length = size; 1581 // the request is always for a contiguous run of blocks 1582 *_count = 1; 1583 1584 return B_OK; 1585 } 1586 1587 1588 /*static*/ status_t 1589 BlockPrefetcher::IterativeIOFinishedHook(void* cookie, io_request* request, status_t status, 1590 bool partialTransfer, size_t bytesTransferred) 1591 { 1592 TRACE(("BlockPrefetcher::IterativeIOFinishedHook: status %s, partial %d\n", strerror(status), 1593 partialTransfer)); 1594 1595 BlockPrefetcher* blockPrefetcher = reinterpret_cast<BlockPrefetcher*>(cookie); 1596 block_cache* cache = blockPrefetcher->fCache; 1597 cached_block** blocks = blockPrefetcher->fBlocks; 1598 size_t blockSize = cache->block_size; 1599 off_t blockNumber = blockPrefetcher->fBlockNumber; 1600 size_t numBlocks = blockPrefetcher->fNumAllocated; 1601 1602 MutexLocker locker(&cache->lock); 1603 1604 if (bytesTransferred < numBlocks * blockSize) { 1605 blockPrefetcher->_RemoveAllocated(numBlocks, numBlocks); 1606 TB(Error(cache, blockNumber, "prefetch starting here failed", status)); 1607 TRACE_ALWAYS("prefetch_iterative_io_finished_hook: transferred only %" B_PRIuSIZE 1608 " bytes in attempt to read %" B_PRIuSIZE " blocks (start block %" B_PRIdOFF "): %s\n", 1609 bytesTransferred, numBlocks, blockNumber, strerror(status)); 1610 } else { 1611 for (size_t i = 0; i < numBlocks; ++i) { 1612 TB(Read(cache, blockNumber + i)); 1613 mark_block_unbusy_reading(cache, blocks[i]); 1614 blocks[i]->last_accessed = system_time() / 1000000L; 1615 } 1616 } 1617 1618 delete blockPrefetcher; 1619 1620 return status; 1621 } 1622 1623 1624 /*! Cleans up blocks that were allocated for prefetching when an in-progress prefetch 1625 is cancelled. 1626 */ 1627 void 1628 BlockPrefetcher::_RemoveAllocated(size_t unbusyCount, size_t removeCount) 1629 { 1630 TRACE(("BlockPrefetcher::_RemoveAllocated: unbusy %" B_PRIuSIZE " and remove %" B_PRIuSIZE 1631 " starting with %" B_PRIdOFF "\n", unbusyCount, removeCount, (*fBlocks)->block_number)); 1632 1633 ASSERT_LOCKED_MUTEX(&fCache->lock); 1634 1635 for (size_t i = 0; i < unbusyCount; ++i) 1636 mark_block_unbusy_reading(fCache, fBlocks[i]); 1637 1638 for (size_t i = 0; i < removeCount; ++i) { 1639 ASSERT(fBlocks[i]->is_dirty == false && fBlocks[i]->unused == true); 1640 1641 fCache->unused_blocks.Remove(fBlocks[i]); 1642 fCache->unused_block_count--; 1643 1644 fCache->RemoveBlock(fBlocks[i]); 1645 fBlocks[i] = NULL; 1646 } 1647 1648 fNumAllocated = 0; 1649 1650 return; 1651 } 1652 #endif // !BUILDING_USERLAND_FS_SERVER 1653 1654 1655 // #pragma mark - block_cache 1656 1657 1658 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize, 1659 bool readOnly) 1660 : 1661 hash(NULL), 1662 fd(_fd), 1663 max_blocks(numBlocks), 1664 block_size(blockSize), 1665 next_transaction_id(1), 1666 last_transaction(NULL), 1667 transaction_hash(NULL), 1668 buffer_cache(NULL), 1669 unused_block_count(0), 1670 busy_reading_count(0), 1671 busy_reading_waiters(false), 1672 busy_writing_count(0), 1673 busy_writing_waiters(0), 1674 last_block_write(0), 1675 last_block_write_duration(0), 1676 num_dirty_blocks(0), 1677 read_only(readOnly) 1678 { 1679 } 1680 1681 1682 /*! Should be called with the cache's lock held. */ 1683 block_cache::~block_cache() 1684 { 1685 unregister_low_resource_handler(&_LowMemoryHandler, this); 1686 1687 delete transaction_hash; 1688 delete hash; 1689 1690 delete_object_cache(buffer_cache); 1691 1692 mutex_destroy(&lock); 1693 } 1694 1695 1696 status_t 1697 block_cache::Init() 1698 { 1699 busy_reading_condition.Init(this, "cache block busy_reading"); 1700 busy_writing_condition.Init(this, "cache block busy writing"); 1701 condition_variable.Init(this, "cache transaction sync"); 1702 mutex_init(&lock, "block cache"); 1703 1704 buffer_cache = create_object_cache_etc("block cache buffers", block_size, 1705 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 1706 if (buffer_cache == NULL) 1707 return B_NO_MEMORY; 1708 1709 hash = new BlockTable(); 1710 if (hash == NULL || hash->Init(1024) != B_OK) 1711 return B_NO_MEMORY; 1712 1713 transaction_hash = new(std::nothrow) TransactionTable(); 1714 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK) 1715 return B_NO_MEMORY; 1716 1717 return register_low_resource_handler(&_LowMemoryHandler, this, 1718 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1719 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0); 1720 } 1721 1722 1723 void 1724 block_cache::Free(void* buffer) 1725 { 1726 if (buffer != NULL) 1727 object_cache_free(buffer_cache, buffer, 0); 1728 } 1729 1730 1731 void* 1732 block_cache::Allocate() 1733 { 1734 void* block = object_cache_alloc(buffer_cache, 0); 1735 if (block != NULL) 1736 return block; 1737 1738 // recycle existing before allocating a new one 1739 RemoveUnusedBlocks(100); 1740 1741 return object_cache_alloc(buffer_cache, 0); 1742 } 1743 1744 1745 void 1746 block_cache::FreeBlock(cached_block* block) 1747 { 1748 Free(block->current_data); 1749 1750 if (block->original_data != NULL || block->parent_data != NULL) { 1751 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n", 1752 block->block_number, block->original_data, block->parent_data); 1753 } 1754 1755 #if BLOCK_CACHE_DEBUG_CHANGED 1756 Free(block->compare); 1757 #endif 1758 1759 object_cache_free(sBlockCache, block, 0); 1760 } 1761 1762 1763 /*! Allocates a new block for \a blockNumber, ready for use */ 1764 cached_block* 1765 block_cache::NewBlock(off_t blockNumber) 1766 { 1767 cached_block* block = NULL; 1768 1769 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1770 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) { 1771 // recycle existing instead of allocating a new one 1772 block = _GetUnusedBlock(); 1773 } 1774 if (block == NULL) { 1775 block = (cached_block*)object_cache_alloc(sBlockCache, 0); 1776 if (block != NULL) { 1777 block->current_data = Allocate(); 1778 if (block->current_data == NULL) { 1779 object_cache_free(sBlockCache, block, 0); 1780 return NULL; 1781 } 1782 } else { 1783 TB(Error(this, blockNumber, "allocation failed")); 1784 TRACE_ALWAYS("block allocation failed, unused list is %sempty.\n", 1785 unused_blocks.IsEmpty() ? "" : "not "); 1786 1787 // allocation failed, try to reuse an unused block 1788 block = _GetUnusedBlock(); 1789 if (block == NULL) { 1790 TB(Error(this, blockNumber, "get unused failed")); 1791 FATAL(("could not allocate block!\n")); 1792 return NULL; 1793 } 1794 } 1795 } 1796 1797 block->block_number = blockNumber; 1798 block->ref_count = 0; 1799 block->last_accessed = 0; 1800 block->transaction_next = NULL; 1801 block->transaction = block->previous_transaction = NULL; 1802 block->original_data = NULL; 1803 block->parent_data = NULL; 1804 block->busy_reading = false; 1805 block->busy_writing = false; 1806 block->is_writing = false; 1807 block->is_dirty = false; 1808 block->unused = false; 1809 block->discard = false; 1810 block->busy_reading_waiters = false; 1811 block->busy_writing_waiters = false; 1812 #if BLOCK_CACHE_DEBUG_CHANGED 1813 block->compare = NULL; 1814 #endif 1815 1816 return block; 1817 } 1818 1819 1820 void 1821 block_cache::FreeBlockParentData(cached_block* block) 1822 { 1823 ASSERT(block->parent_data != NULL); 1824 if (block->parent_data != block->current_data) 1825 Free(block->parent_data); 1826 block->parent_data = NULL; 1827 } 1828 1829 1830 void 1831 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld) 1832 { 1833 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count)); 1834 1835 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1836 cached_block* block = iterator.Next();) { 1837 if (minSecondsOld >= block->LastAccess()) { 1838 // The list is sorted by last access 1839 break; 1840 } 1841 if (block->busy_reading || block->busy_writing) 1842 continue; 1843 1844 TB(Flush(this, block)); 1845 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n", 1846 block->block_number, block->last_accessed)); 1847 1848 // this can only happen if no transactions are used 1849 if (block->is_dirty && !block->discard) { 1850 if (block->busy_writing) 1851 continue; 1852 1853 BlockWriter::WriteBlock(this, block); 1854 } 1855 1856 // remove block from lists 1857 iterator.Remove(); 1858 unused_block_count--; 1859 RemoveBlock(block); 1860 1861 if (--count <= 0) 1862 break; 1863 } 1864 } 1865 1866 1867 void 1868 block_cache::RemoveBlock(cached_block* block) 1869 { 1870 hash->Remove(block); 1871 FreeBlock(block); 1872 } 1873 1874 1875 /*! Discards the block from a transaction (this method must not be called 1876 for blocks not part of a transaction). 1877 */ 1878 void 1879 block_cache::DiscardBlock(cached_block* block) 1880 { 1881 ASSERT(block->discard); 1882 ASSERT(block->previous_transaction == NULL); 1883 1884 if (block->parent_data != NULL) 1885 FreeBlockParentData(block); 1886 1887 if (block->original_data != NULL) { 1888 Free(block->original_data); 1889 block->original_data = NULL; 1890 } 1891 1892 RemoveBlock(block); 1893 } 1894 1895 1896 void 1897 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level) 1898 { 1899 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level)); 1900 1901 // free some blocks according to the low memory state 1902 // (if there is enough memory left, we don't free any) 1903 1904 block_cache* cache = (block_cache*)data; 1905 if (cache->unused_block_count <= 1) 1906 return; 1907 1908 int32 free = 0; 1909 int32 secondsOld = 0; 1910 switch (level) { 1911 case B_NO_LOW_RESOURCE: 1912 return; 1913 case B_LOW_RESOURCE_NOTE: 1914 free = cache->unused_block_count / 4; 1915 secondsOld = 120; 1916 break; 1917 case B_LOW_RESOURCE_WARNING: 1918 free = cache->unused_block_count / 2; 1919 secondsOld = 10; 1920 break; 1921 case B_LOW_RESOURCE_CRITICAL: 1922 free = cache->unused_block_count - 1; 1923 secondsOld = 0; 1924 break; 1925 } 1926 1927 MutexLocker locker(&cache->lock); 1928 1929 if (!locker.IsLocked()) { 1930 // If our block_cache were deleted, it could be that we had 1931 // been called before that deletion went through, therefore, 1932 // acquiring its lock might fail. 1933 return; 1934 } 1935 1936 #ifdef TRACE_BLOCK_CACHE 1937 uint32 oldUnused = cache->unused_block_count; 1938 #endif 1939 1940 cache->RemoveUnusedBlocks(free, secondsOld); 1941 1942 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n", 1943 cache, oldUnused, cache->unused_block_count)); 1944 } 1945 1946 1947 cached_block* 1948 block_cache::_GetUnusedBlock() 1949 { 1950 TRACE(("block_cache: get unused block\n")); 1951 1952 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1953 cached_block* block = iterator.Next();) { 1954 TB(Flush(this, block, true)); 1955 // this can only happen if no transactions are used 1956 if (block->is_dirty && !block->busy_writing && !block->discard) 1957 BlockWriter::WriteBlock(this, block); 1958 1959 // remove block from lists 1960 iterator.Remove(); 1961 unused_block_count--; 1962 hash->Remove(block); 1963 1964 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1965 block->unused = false; 1966 1967 // TODO: see if compare data is handled correctly here! 1968 #if BLOCK_CACHE_DEBUG_CHANGED 1969 if (block->compare != NULL) 1970 Free(block->compare); 1971 #endif 1972 return block; 1973 } 1974 1975 return NULL; 1976 } 1977 1978 1979 // #pragma mark - private block functions 1980 1981 1982 /*! Cache must be locked. 1983 */ 1984 static void 1985 mark_block_busy_reading(block_cache* cache, cached_block* block) 1986 { 1987 block->busy_reading = true; 1988 cache->busy_reading_count++; 1989 } 1990 1991 1992 /*! Cache must be locked. 1993 */ 1994 static void 1995 mark_block_unbusy_reading(block_cache* cache, cached_block* block) 1996 { 1997 block->busy_reading = false; 1998 cache->busy_reading_count--; 1999 2000 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0) 2001 || block->busy_reading_waiters) { 2002 cache->busy_reading_waiters = false; 2003 block->busy_reading_waiters = false; 2004 cache->busy_reading_condition.NotifyAll(); 2005 } 2006 } 2007 2008 2009 /*! Cache must be locked. 2010 */ 2011 static void 2012 wait_for_busy_reading_block(block_cache* cache, cached_block* block) 2013 { 2014 while (block->busy_reading) { 2015 // wait for at least the specified block to be read in 2016 ConditionVariableEntry entry; 2017 cache->busy_reading_condition.Add(&entry); 2018 block->busy_reading_waiters = true; 2019 2020 mutex_unlock(&cache->lock); 2021 2022 entry.Wait(); 2023 2024 mutex_lock(&cache->lock); 2025 } 2026 } 2027 2028 2029 /*! Cache must be locked. 2030 */ 2031 static void 2032 wait_for_busy_reading_blocks(block_cache* cache) 2033 { 2034 while (cache->busy_reading_count != 0) { 2035 // wait for all blocks to be read in 2036 ConditionVariableEntry entry; 2037 cache->busy_reading_condition.Add(&entry); 2038 cache->busy_reading_waiters = true; 2039 2040 mutex_unlock(&cache->lock); 2041 2042 entry.Wait(); 2043 2044 mutex_lock(&cache->lock); 2045 } 2046 } 2047 2048 2049 /*! Cache must be locked. 2050 */ 2051 static void 2052 wait_for_busy_writing_block(block_cache* cache, cached_block* block) 2053 { 2054 while (block->busy_writing) { 2055 // wait for all blocks to be written back 2056 ConditionVariableEntry entry; 2057 cache->busy_writing_condition.Add(&entry); 2058 block->busy_writing_waiters = true; 2059 2060 mutex_unlock(&cache->lock); 2061 2062 entry.Wait(); 2063 2064 mutex_lock(&cache->lock); 2065 } 2066 } 2067 2068 2069 /*! Cache must be locked. 2070 */ 2071 static void 2072 wait_for_busy_writing_blocks(block_cache* cache) 2073 { 2074 while (cache->busy_writing_count != 0) { 2075 // wait for all blocks to be written back 2076 ConditionVariableEntry entry; 2077 cache->busy_writing_condition.Add(&entry); 2078 cache->busy_writing_waiters = true; 2079 2080 mutex_unlock(&cache->lock); 2081 2082 entry.Wait(); 2083 2084 mutex_lock(&cache->lock); 2085 } 2086 } 2087 2088 2089 /*! Removes a reference from the specified \a block. If this was the last 2090 reference, the block is moved into the unused list. 2091 In low memory situations, it will also free some blocks from that list, 2092 but not necessarily the \a block it just released. 2093 */ 2094 static void 2095 put_cached_block(block_cache* cache, cached_block* block) 2096 { 2097 #if BLOCK_CACHE_DEBUG_CHANGED 2098 if (!block->is_dirty && block->compare != NULL 2099 && memcmp(block->current_data, block->compare, cache->block_size)) { 2100 TRACE_ALWAYS("new block:\n"); 2101 dump_block((const char*)block->current_data, 256, " "); 2102 TRACE_ALWAYS("unchanged block:\n"); 2103 dump_block((const char*)block->compare, 256, " "); 2104 BlockWriter::WriteBlock(cache, block); 2105 panic("block_cache: supposed to be clean block was changed!\n"); 2106 2107 cache->Free(block->compare); 2108 block->compare = NULL; 2109 } 2110 #endif 2111 TB(Put(cache, block)); 2112 2113 if (block->ref_count < 1) { 2114 panic("Invalid ref_count for block %p, cache %p\n", block, cache); 2115 return; 2116 } 2117 2118 if (--block->ref_count == 0 2119 && block->transaction == NULL && block->previous_transaction == NULL) { 2120 // This block is not used anymore, and not part of any transaction 2121 block->is_writing = false; 2122 2123 if (block->discard) { 2124 cache->RemoveBlock(block); 2125 } else { 2126 // put this block in the list of unused blocks 2127 ASSERT(!block->unused); 2128 block->unused = true; 2129 2130 ASSERT(block->original_data == NULL && block->parent_data == NULL); 2131 cache->unused_blocks.Add(block); 2132 cache->unused_block_count++; 2133 } 2134 } 2135 } 2136 2137 2138 static void 2139 put_cached_block(block_cache* cache, off_t blockNumber) 2140 { 2141 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 2142 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 2143 blockNumber, cache->max_blocks - 1); 2144 } 2145 2146 cached_block* block = cache->hash->Lookup(blockNumber); 2147 if (block != NULL) 2148 put_cached_block(cache, block); 2149 else { 2150 TB(Error(cache, blockNumber, "put unknown")); 2151 } 2152 } 2153 2154 2155 /*! Retrieves the block \a blockNumber from the hash table, if it's already 2156 there, or reads it from the disk. 2157 You need to have the cache locked when calling this function. 2158 2159 \param _allocated tells you whether or not a new block has been allocated 2160 to satisfy your request. 2161 \param readBlock if \c false, the block will not be read in case it was 2162 not already in the cache. The block you retrieve may contain random 2163 data. If \c true, the cache will be temporarily unlocked while the 2164 block is read in. 2165 */ 2166 static status_t 2167 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated, 2168 bool readBlock, cached_block** _block) 2169 { 2170 ASSERT_LOCKED_MUTEX(&cache->lock); 2171 2172 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 2173 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 2174 blockNumber, cache->max_blocks - 1); 2175 return B_BAD_VALUE; 2176 } 2177 2178 retry: 2179 cached_block* block = cache->hash->Lookup(blockNumber); 2180 *_allocated = false; 2181 2182 if (block == NULL) { 2183 // put block into cache 2184 block = cache->NewBlock(blockNumber); 2185 if (block == NULL) 2186 return B_NO_MEMORY; 2187 2188 cache->hash->Insert(block); 2189 *_allocated = true; 2190 } else if (block->busy_reading) { 2191 // The block is currently busy_reading - wait and try again later 2192 wait_for_busy_reading_block(cache, block); 2193 goto retry; 2194 } 2195 2196 if (block->unused) { 2197 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber)); 2198 block->unused = false; 2199 cache->unused_blocks.Remove(block); 2200 cache->unused_block_count--; 2201 } 2202 2203 if (*_allocated && readBlock) { 2204 // read block into cache 2205 int32 blockSize = cache->block_size; 2206 2207 mark_block_busy_reading(cache, block); 2208 mutex_unlock(&cache->lock); 2209 2210 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize, 2211 block->current_data, blockSize); 2212 2213 mutex_lock(&cache->lock); 2214 if (bytesRead < blockSize) { 2215 cache->RemoveBlock(block); 2216 TB(Error(cache, blockNumber, "read failed", bytesRead)); 2217 2218 TRACE_ALWAYS("could not read block %" B_PRIdOFF ": bytesRead: %zd," 2219 " error: %s\n", blockNumber, bytesRead, strerror(errno)); 2220 return errno; 2221 } 2222 TB(Read(cache, block)); 2223 2224 mark_block_unbusy_reading(cache, block); 2225 } 2226 2227 block->ref_count++; 2228 block->last_accessed = system_time() / 1000000L; 2229 2230 *_block = block; 2231 return B_OK; 2232 } 2233 2234 2235 /*! Returns the writable block data for the requested blockNumber. 2236 If \a cleared is true, the block is not read from disk; an empty block 2237 is returned. 2238 2239 This is the only method to insert a block into a transaction. It makes 2240 sure that the previous block contents are preserved in that case. 2241 */ 2242 static status_t 2243 get_writable_cached_block(block_cache* cache, off_t blockNumber, 2244 int32 transactionID, bool cleared, void** _block) 2245 { 2246 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 2247 blockNumber, transactionID)); 2248 2249 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 2250 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 2251 blockNumber, cache->max_blocks - 1); 2252 return B_BAD_VALUE; 2253 } 2254 2255 bool allocated; 2256 cached_block* block; 2257 status_t status = get_cached_block(cache, blockNumber, &allocated, 2258 !cleared, &block); 2259 if (status != B_OK) 2260 return status; 2261 2262 if (block->busy_writing) 2263 wait_for_busy_writing_block(cache, block); 2264 2265 block->discard = false; 2266 2267 // if there is no transaction support, we just return the current block 2268 if (transactionID == -1) { 2269 if (cleared) { 2270 mark_block_busy_reading(cache, block); 2271 mutex_unlock(&cache->lock); 2272 2273 memset(block->current_data, 0, cache->block_size); 2274 2275 mutex_lock(&cache->lock); 2276 mark_block_unbusy_reading(cache, block); 2277 } 2278 2279 block->is_writing = true; 2280 2281 if (!block->is_dirty) { 2282 cache->num_dirty_blocks++; 2283 block->is_dirty = true; 2284 // mark the block as dirty 2285 } 2286 2287 TB(Get(cache, block)); 2288 *_block = block->current_data; 2289 return B_OK; 2290 } 2291 2292 cache_transaction* transaction = block->transaction; 2293 2294 if (transaction != NULL && transaction->id != transactionID) { 2295 // TODO: we have to wait here until the other transaction is done. 2296 // Maybe we should even panic, since we can't prevent any deadlocks. 2297 panic("get_writable_cached_block(): asked to get busy writable block " 2298 "(transaction %" B_PRId32 ")\n", block->transaction->id); 2299 put_cached_block(cache, block); 2300 return B_BAD_VALUE; 2301 } 2302 if (transaction == NULL && transactionID != -1) { 2303 // get new transaction 2304 transaction = lookup_transaction(cache, transactionID); 2305 if (transaction == NULL) { 2306 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n", 2307 transactionID); 2308 put_cached_block(cache, block); 2309 return B_BAD_VALUE; 2310 } 2311 if (!transaction->open) { 2312 panic("get_writable_cached_block(): transaction already done!\n"); 2313 put_cached_block(cache, block); 2314 return B_BAD_VALUE; 2315 } 2316 2317 block->transaction = transaction; 2318 2319 // attach the block to the transaction block list 2320 block->transaction_next = transaction->first_block; 2321 transaction->first_block = block; 2322 transaction->num_blocks++; 2323 } 2324 if (transaction != NULL) 2325 transaction->last_used = system_time(); 2326 2327 bool wasUnchanged = block->original_data == NULL 2328 || block->previous_transaction != NULL; 2329 2330 if (!(allocated && cleared) && block->original_data == NULL) { 2331 // we already have data, so we need to preserve it 2332 block->original_data = cache->Allocate(); 2333 if (block->original_data == NULL) { 2334 TB(Error(cache, blockNumber, "allocate original failed")); 2335 FATAL(("could not allocate original_data\n")); 2336 put_cached_block(cache, block); 2337 return B_NO_MEMORY; 2338 } 2339 2340 mark_block_busy_reading(cache, block); 2341 mutex_unlock(&cache->lock); 2342 2343 memcpy(block->original_data, block->current_data, cache->block_size); 2344 2345 mutex_lock(&cache->lock); 2346 mark_block_unbusy_reading(cache, block); 2347 } 2348 if (block->parent_data == block->current_data) { 2349 // remember any previous contents for the parent transaction 2350 block->parent_data = cache->Allocate(); 2351 if (block->parent_data == NULL) { 2352 // TODO: maybe we should just continue the current transaction in 2353 // this case... 2354 TB(Error(cache, blockNumber, "allocate parent failed")); 2355 FATAL(("could not allocate parent\n")); 2356 put_cached_block(cache, block); 2357 return B_NO_MEMORY; 2358 } 2359 2360 mark_block_busy_reading(cache, block); 2361 mutex_unlock(&cache->lock); 2362 2363 memcpy(block->parent_data, block->current_data, cache->block_size); 2364 2365 mutex_lock(&cache->lock); 2366 mark_block_unbusy_reading(cache, block); 2367 2368 transaction->sub_num_blocks++; 2369 } else if (transaction != NULL && transaction->has_sub_transaction 2370 && block->parent_data == NULL && wasUnchanged) 2371 transaction->sub_num_blocks++; 2372 2373 if (cleared) { 2374 mark_block_busy_reading(cache, block); 2375 mutex_unlock(&cache->lock); 2376 2377 memset(block->current_data, 0, cache->block_size); 2378 2379 mutex_lock(&cache->lock); 2380 mark_block_unbusy_reading(cache, block); 2381 } 2382 2383 block->is_dirty = true; 2384 TB(Get(cache, block)); 2385 TB2(BlockData(cache, block, "get writable")); 2386 2387 *_block = block->current_data; 2388 return B_OK; 2389 } 2390 2391 2392 #if DEBUG_BLOCK_CACHE 2393 2394 2395 static void 2396 dump_block(cached_block* block) 2397 { 2398 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32 2399 " %c%c%c%c%c%c %08lx %08lx\n", 2400 (addr_t)block, block->block_number, 2401 (addr_t)block->current_data, (addr_t)block->original_data, 2402 (addr_t)block->parent_data, block->ref_count, block->LastAccess(), 2403 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-', 2404 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-', 2405 block->unused ? 'U' : '-', block->discard ? 'D' : '-', 2406 (addr_t)block->transaction, 2407 (addr_t)block->previous_transaction); 2408 } 2409 2410 2411 static void 2412 dump_block_long(cached_block* block) 2413 { 2414 kprintf("BLOCK %p\n", block); 2415 kprintf(" current data: %p\n", block->current_data); 2416 kprintf(" original data: %p\n", block->original_data); 2417 kprintf(" parent data: %p\n", block->parent_data); 2418 #if BLOCK_CACHE_DEBUG_CHANGED 2419 kprintf(" compare data: %p\n", block->compare); 2420 #endif 2421 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count); 2422 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess()); 2423 kprintf(" flags: "); 2424 if (block->busy_reading) 2425 kprintf(" busy_reading"); 2426 if (block->busy_writing) 2427 kprintf(" busy_writing"); 2428 if (block->is_writing) 2429 kprintf(" is-writing"); 2430 if (block->is_dirty) 2431 kprintf(" is-dirty"); 2432 if (block->unused) 2433 kprintf(" unused"); 2434 if (block->discard) 2435 kprintf(" discard"); 2436 kprintf("\n"); 2437 if (block->transaction != NULL) { 2438 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction, 2439 block->transaction->id); 2440 if (block->transaction_next != NULL) { 2441 kprintf(" next in transaction: %" B_PRIdOFF "\n", 2442 block->transaction_next->block_number); 2443 } 2444 } 2445 if (block->previous_transaction != NULL) { 2446 kprintf(" previous transaction: %p (%" B_PRId32 ")\n", 2447 block->previous_transaction, 2448 block->previous_transaction->id); 2449 } 2450 2451 set_debug_variable("_current", (addr_t)block->current_data); 2452 set_debug_variable("_original", (addr_t)block->original_data); 2453 set_debug_variable("_parent", (addr_t)block->parent_data); 2454 } 2455 2456 2457 static int 2458 dump_cached_block(int argc, char** argv) 2459 { 2460 if (argc != 2) { 2461 kprintf("usage: %s <block-address>\n", argv[0]); 2462 return 0; 2463 } 2464 2465 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1])); 2466 return 0; 2467 } 2468 2469 2470 static int 2471 dump_cache(int argc, char** argv) 2472 { 2473 bool showTransactions = false; 2474 bool showBlocks = false; 2475 int32 i = 1; 2476 while (argv[i] != NULL && argv[i][0] == '-') { 2477 for (char* arg = &argv[i][1]; arg[0]; arg++) { 2478 switch (arg[0]) { 2479 case 'b': 2480 showBlocks = true; 2481 break; 2482 case 't': 2483 showTransactions = true; 2484 break; 2485 default: 2486 print_debugger_command_usage(argv[0]); 2487 return 0; 2488 } 2489 } 2490 i++; 2491 } 2492 2493 if (i >= argc) { 2494 print_debugger_command_usage(argv[0]); 2495 return 0; 2496 } 2497 2498 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]); 2499 if (cache == NULL) { 2500 kprintf("invalid cache address\n"); 2501 return 0; 2502 } 2503 2504 off_t blockNumber = -1; 2505 if (i + 1 < argc) { 2506 blockNumber = parse_expression(argv[i + 1]); 2507 cached_block* block = cache->hash->Lookup(blockNumber); 2508 if (block != NULL) 2509 dump_block_long(block); 2510 else 2511 kprintf("block %" B_PRIdOFF " not found\n", blockNumber); 2512 return 0; 2513 } 2514 2515 kprintf("BLOCK CACHE: %p\n", cache); 2516 2517 kprintf(" fd: %d\n", cache->fd); 2518 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks); 2519 kprintf(" block_size: %zu\n", cache->block_size); 2520 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id); 2521 kprintf(" buffer_cache: %p\n", cache->buffer_cache); 2522 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count, 2523 cache->busy_reading_waiters ? "has" : "no"); 2524 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count, 2525 cache->busy_writing_waiters ? "has" : "no"); 2526 2527 if (!cache->pending_notifications.IsEmpty()) { 2528 kprintf(" pending notifications:\n"); 2529 2530 NotificationList::Iterator iterator 2531 = cache->pending_notifications.GetIterator(); 2532 while (iterator.HasNext()) { 2533 cache_notification* notification = iterator.Next(); 2534 2535 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification, 2536 notification->events_pending, notification->hook, 2537 notification->data); 2538 } 2539 } 2540 2541 if (showTransactions) { 2542 kprintf(" transactions:\n"); 2543 kprintf("address id state blocks main sub\n"); 2544 2545 TransactionTable::Iterator iterator(cache->transaction_hash); 2546 2547 while (iterator.HasNext()) { 2548 cache_transaction* transaction = iterator.Next(); 2549 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" 2550 B_PRId32 "\n", transaction, transaction->id, 2551 transaction->open ? "open" : "closed", 2552 transaction->num_blocks, transaction->main_num_blocks, 2553 transaction->sub_num_blocks); 2554 } 2555 } 2556 2557 if (showBlocks) { 2558 kprintf(" blocks:\n"); 2559 kprintf("address block no. current original parent refs access " 2560 "flags transact prev. trans\n"); 2561 } 2562 2563 uint32 referenced = 0; 2564 uint32 count = 0; 2565 uint32 dirty = 0; 2566 uint32 discarded = 0; 2567 BlockTable::Iterator iterator(cache->hash); 2568 while (iterator.HasNext()) { 2569 cached_block* block = iterator.Next(); 2570 if (showBlocks) 2571 dump_block(block); 2572 2573 if (block->is_dirty) 2574 dirty++; 2575 if (block->discard) 2576 discarded++; 2577 if (block->ref_count) 2578 referenced++; 2579 count++; 2580 } 2581 2582 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 2583 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 2584 " in unused.\n", 2585 count, dirty, discarded, referenced, cache->busy_reading_count, 2586 cache->unused_block_count); 2587 return 0; 2588 } 2589 2590 2591 static int 2592 dump_transaction(int argc, char** argv) 2593 { 2594 bool showBlocks = false; 2595 int i = 1; 2596 if (argc > 1 && !strcmp(argv[1], "-b")) { 2597 showBlocks = true; 2598 i++; 2599 } 2600 2601 if (argc - i < 1 || argc - i > 2) { 2602 print_debugger_command_usage(argv[0]); 2603 return 0; 2604 } 2605 2606 cache_transaction* transaction = NULL; 2607 2608 if (argc - i == 1) { 2609 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]); 2610 } else { 2611 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]); 2612 int32 id = parse_expression(argv[i + 1]); 2613 transaction = lookup_transaction(cache, id); 2614 if (transaction == NULL) { 2615 kprintf("No transaction with ID %" B_PRId32 " found.\n", id); 2616 return 0; 2617 } 2618 } 2619 2620 kprintf("TRANSACTION %p\n", transaction); 2621 2622 kprintf(" id: %" B_PRId32 "\n", transaction->id); 2623 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks); 2624 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks); 2625 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks); 2626 kprintf(" has sub: %d\n", transaction->has_sub_transaction); 2627 kprintf(" state: %s\n", transaction->open ? "open" : "closed"); 2628 kprintf(" idle: %" B_PRId64 " secs\n", 2629 (system_time() - transaction->last_used) / 1000000); 2630 2631 kprintf(" listeners:\n"); 2632 2633 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 2634 while (iterator.HasNext()) { 2635 cache_listener* listener = iterator.Next(); 2636 2637 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending, 2638 listener->hook, listener->data); 2639 } 2640 2641 if (!showBlocks) 2642 return 0; 2643 2644 kprintf(" blocks:\n"); 2645 kprintf("address block no. current original parent refs access " 2646 "flags transact prev. trans\n"); 2647 2648 cached_block* block = transaction->first_block; 2649 while (block != NULL) { 2650 dump_block(block); 2651 block = block->transaction_next; 2652 } 2653 2654 kprintf("--\n"); 2655 2656 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 2657 while (blockIterator.HasNext()) { 2658 block = blockIterator.Next(); 2659 dump_block(block); 2660 } 2661 2662 return 0; 2663 } 2664 2665 2666 static int 2667 dump_caches(int argc, char** argv) 2668 { 2669 kprintf("Block caches:\n"); 2670 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator(); 2671 while (i.HasNext()) { 2672 block_cache* cache = i.Next(); 2673 if (cache == (block_cache*)&sMarkCache) 2674 continue; 2675 2676 kprintf(" %p\n", cache); 2677 } 2678 2679 return 0; 2680 } 2681 2682 2683 #if BLOCK_CACHE_BLOCK_TRACING >= 2 2684 static int 2685 dump_block_data(int argc, char** argv) 2686 { 2687 using namespace BlockTracing; 2688 2689 // Determine which blocks to show 2690 2691 bool printStackTrace = true; 2692 uint32 which = 0; 2693 int32 i = 1; 2694 while (i < argc && argv[i][0] == '-') { 2695 char* arg = &argv[i][1]; 2696 while (arg[0]) { 2697 switch (arg[0]) { 2698 case 'c': 2699 which |= BlockData::kCurrent; 2700 break; 2701 case 'p': 2702 which |= BlockData::kParent; 2703 break; 2704 case 'o': 2705 which |= BlockData::kOriginal; 2706 break; 2707 2708 default: 2709 kprintf("invalid block specifier (only o/c/p are " 2710 "allowed).\n"); 2711 return 0; 2712 } 2713 arg++; 2714 } 2715 2716 i++; 2717 } 2718 if (which == 0) 2719 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal; 2720 2721 if (i == argc) { 2722 print_debugger_command_usage(argv[0]); 2723 return 0; 2724 } 2725 2726 // Get the range of blocks to print 2727 2728 int64 from = parse_expression(argv[i]); 2729 int64 to = from; 2730 if (argc > i + 1) 2731 to = parse_expression(argv[i + 1]); 2732 if (to < from) 2733 to = from; 2734 2735 uint32 offset = 0; 2736 uint32 size = LONG_MAX; 2737 if (argc > i + 2) 2738 offset = parse_expression(argv[i + 2]); 2739 if (argc > i + 3) 2740 size = parse_expression(argv[i + 3]); 2741 2742 TraceEntryIterator iterator; 2743 iterator.MoveTo(from - 1); 2744 2745 static char sBuffer[1024]; 2746 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID); 2747 2748 while (TraceEntry* entry = iterator.Next()) { 2749 int32 index = iterator.Index(); 2750 if (index > to) 2751 break; 2752 2753 Action* action = dynamic_cast<Action*>(entry); 2754 if (action != NULL) { 2755 out.Clear(); 2756 out.DumpEntry(action); 2757 continue; 2758 } 2759 2760 BlockData* blockData = dynamic_cast<BlockData*>(entry); 2761 if (blockData == NULL) 2762 continue; 2763 2764 out.Clear(); 2765 2766 const char* dump = out.DumpEntry(entry); 2767 int length = strlen(dump); 2768 if (length > 0 && dump[length - 1] == '\n') 2769 length--; 2770 2771 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump); 2772 2773 if (printStackTrace) { 2774 out.Clear(); 2775 entry->DumpStackTrace(out); 2776 if (out.Size() > 0) 2777 kputs(out.Buffer()); 2778 } 2779 2780 blockData->DumpBlocks(which, offset, size); 2781 } 2782 2783 return 0; 2784 } 2785 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 2786 2787 2788 #endif // DEBUG_BLOCK_CACHE 2789 2790 2791 /*! Traverses through the block_cache list, and returns one cache after the 2792 other. The cache returned is automatically locked when you get it, and 2793 unlocked with the next call to this function. Ignores caches that are in 2794 deletion state. 2795 Returns \c NULL when the end of the list is reached. 2796 */ 2797 static block_cache* 2798 get_next_locked_block_cache(block_cache* last) 2799 { 2800 MutexLocker _(sCachesLock); 2801 2802 block_cache* cache; 2803 if (last != NULL) { 2804 mutex_unlock(&last->lock); 2805 2806 cache = sCaches.GetNext((block_cache*)&sMarkCache); 2807 sCaches.Remove((block_cache*)&sMarkCache); 2808 } else 2809 cache = sCaches.Head(); 2810 2811 if (cache != NULL) { 2812 mutex_lock(&cache->lock); 2813 sCaches.InsertBefore(sCaches.GetNext(cache), (block_cache*)&sMarkCache); 2814 } 2815 2816 return cache; 2817 } 2818 2819 2820 /*! Background thread that continuously checks for pending notifications of 2821 all caches. 2822 Every two seconds, it will also write back up to 64 blocks per cache. 2823 */ 2824 static status_t 2825 block_notifier_and_writer(void* /*data*/) 2826 { 2827 const bigtime_t kDefaultTimeout = 2000000LL; 2828 bigtime_t timeout = kDefaultTimeout; 2829 2830 while (true) { 2831 bigtime_t start = system_time(); 2832 2833 status_t status = acquire_sem_etc(sEventSemaphore, 1, 2834 B_RELATIVE_TIMEOUT, timeout); 2835 if (status == B_OK) { 2836 flush_pending_notifications(); 2837 timeout -= system_time() - start; 2838 continue; 2839 } 2840 2841 // Write 64 blocks of each block_cache roughly every 2 seconds, 2842 // potentially more or less depending on congestion and drive speeds 2843 // (usually much less.) We do not want to queue everything at once 2844 // because a future transaction might then get held up waiting for 2845 // a specific block to be written. 2846 timeout = kDefaultTimeout; 2847 size_t usedMemory; 2848 object_cache_get_usage(sBlockCache, &usedMemory); 2849 2850 block_cache* cache = NULL; 2851 while ((cache = get_next_locked_block_cache(cache)) != NULL) { 2852 // Give some breathing room: wait 2x the length of the potential 2853 // maximum block count-sized write between writes, and also skip 2854 // if there are more than 16 blocks currently being written. 2855 const bigtime_t next = cache->last_block_write 2856 + cache->last_block_write_duration * 2 * 64; 2857 if (cache->busy_writing_count > 16 || system_time() < next) { 2858 if (cache->last_block_write_duration > 0) { 2859 timeout = min_c(timeout, 2860 cache->last_block_write_duration * 2 * 64); 2861 } 2862 continue; 2863 } 2864 2865 BlockWriter writer(cache, 64); 2866 bool hasMoreBlocks = false; 2867 2868 size_t cacheUsedMemory; 2869 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory); 2870 usedMemory += cacheUsedMemory; 2871 2872 if (cache->num_dirty_blocks) { 2873 // This cache is not using transactions, we'll scan the blocks 2874 // directly 2875 BlockTable::Iterator iterator(cache->hash); 2876 2877 while (iterator.HasNext()) { 2878 cached_block* block = iterator.Next(); 2879 if (block->CanBeWritten() && !writer.Add(block)) { 2880 hasMoreBlocks = true; 2881 break; 2882 } 2883 } 2884 } else { 2885 TransactionTable::Iterator iterator(cache->transaction_hash); 2886 2887 while (iterator.HasNext()) { 2888 cache_transaction* transaction = iterator.Next(); 2889 if (transaction->open) { 2890 if (system_time() > transaction->last_used 2891 + kTransactionIdleTime) { 2892 // Transaction is open but idle 2893 notify_transaction_listeners(cache, transaction, 2894 TRANSACTION_IDLE); 2895 } 2896 continue; 2897 } 2898 2899 bool hasLeftOvers; 2900 // we ignore this one 2901 if (!writer.Add(transaction, hasLeftOvers)) { 2902 hasMoreBlocks = true; 2903 break; 2904 } 2905 } 2906 } 2907 2908 writer.Write(); 2909 2910 if (hasMoreBlocks && cache->last_block_write_duration > 0) { 2911 // There are probably still more blocks that we could write, so 2912 // see if we can decrease the timeout. 2913 timeout = min_c(timeout, 2914 cache->last_block_write_duration * 2 * 64); 2915 } 2916 2917 if ((block_cache_used_memory() / B_PAGE_SIZE) 2918 > vm_page_num_pages() / 2) { 2919 // Try to reduce memory usage to half of the available 2920 // RAM at maximum 2921 cache->RemoveUnusedBlocks(1000, 10); 2922 } 2923 } 2924 2925 MutexLocker _(sCachesMemoryUseLock); 2926 sUsedMemory = usedMemory; 2927 } 2928 2929 // never can get here 2930 return B_OK; 2931 } 2932 2933 2934 /*! Notify function for wait_for_notifications(). */ 2935 static void 2936 notify_sync(int32 transactionID, int32 event, void* _cache) 2937 { 2938 block_cache* cache = (block_cache*)_cache; 2939 2940 cache->condition_variable.NotifyOne(); 2941 } 2942 2943 2944 /*! Must be called with the sCachesLock held. */ 2945 static bool 2946 is_valid_cache(block_cache* cache) 2947 { 2948 ASSERT_LOCKED_MUTEX(&sCachesLock); 2949 2950 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 2951 while (iterator.HasNext()) { 2952 if (cache == iterator.Next()) 2953 return true; 2954 } 2955 2956 return false; 2957 } 2958 2959 2960 /*! Waits until all pending notifications are carried out. 2961 Safe to be called from the block writer/notifier thread. 2962 You must not hold the \a cache lock when calling this function. 2963 */ 2964 static void 2965 wait_for_notifications(block_cache* cache) 2966 { 2967 MutexLocker locker(sCachesLock); 2968 2969 if (find_thread(NULL) == sNotifierWriterThread) { 2970 // We're the notifier thread, don't wait, but flush all pending 2971 // notifications directly. 2972 if (is_valid_cache(cache)) 2973 flush_pending_notifications(cache); 2974 return; 2975 } 2976 2977 // add sync notification 2978 cache_notification notification; 2979 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync, 2980 cache); 2981 2982 ConditionVariableEntry entry; 2983 cache->condition_variable.Add(&entry); 2984 2985 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false); 2986 locker.Unlock(); 2987 2988 // wait for notification hook to be called 2989 entry.Wait(); 2990 } 2991 2992 2993 status_t 2994 block_cache_init(void) 2995 { 2996 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block), 2997 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 2998 if (sBlockCache == NULL) 2999 return B_NO_MEMORY; 3000 3001 sCacheNotificationCache = create_object_cache("cache notifications", 3002 sizeof(cache_listener), 8, NULL, NULL, NULL); 3003 if (sCacheNotificationCache == NULL) 3004 return B_NO_MEMORY; 3005 3006 new (&sCaches) DoublyLinkedList<block_cache>; 3007 // manually call constructor 3008 3009 sEventSemaphore = create_sem(0, "block cache event"); 3010 if (sEventSemaphore < B_OK) 3011 return sEventSemaphore; 3012 3013 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer, 3014 "block notifier/writer", B_LOW_PRIORITY, NULL); 3015 if (sNotifierWriterThread >= B_OK) 3016 resume_thread(sNotifierWriterThread); 3017 3018 #if DEBUG_BLOCK_CACHE 3019 add_debugger_command_etc("block_caches", &dump_caches, 3020 "dumps all block caches", "\n", 0); 3021 add_debugger_command_etc("block_cache", &dump_cache, 3022 "dumps a specific block cache", 3023 "[-bt] <cache-address> [block-number]\n" 3024 " -t lists the transactions\n" 3025 " -b lists all blocks\n", 0); 3026 add_debugger_command("cached_block", &dump_cached_block, 3027 "dumps the specified cached block"); 3028 add_debugger_command_etc("transaction", &dump_transaction, 3029 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n" 3030 "Either use a block cache pointer and an ID or a pointer to the transaction.\n" 3031 " -b lists all blocks that are part of this transaction\n", 0); 3032 # if BLOCK_CACHE_BLOCK_TRACING >= 2 3033 add_debugger_command_etc("block_cache_data", &dump_block_data, 3034 "dumps the data blocks logged for the actions", 3035 "[-cpo] <from> [<to> [<offset> [<size>]]]\n" 3036 "If no data specifier is used, all blocks are shown by default.\n" 3037 " -c the current data is shown, if available.\n" 3038 " -p the parent data is shown, if available.\n" 3039 " -o the original data is shown, if available.\n" 3040 " <from> first index of tracing entries to show.\n" 3041 " <to> if given, the last entry. If not, only <from> is shown.\n" 3042 " <offset> the offset of the block data.\n" 3043 " <from> the size of the block data that is dumped\n", 0); 3044 # endif 3045 #endif // DEBUG_BLOCK_CACHE 3046 3047 return B_OK; 3048 } 3049 3050 3051 size_t 3052 block_cache_used_memory(void) 3053 { 3054 MutexLocker _(sCachesMemoryUseLock); 3055 return sUsedMemory; 3056 } 3057 3058 3059 // #pragma mark - public transaction API 3060 3061 3062 int32 3063 cache_start_transaction(void* _cache) 3064 { 3065 block_cache* cache = (block_cache*)_cache; 3066 TransactionLocker locker(cache); 3067 3068 if (cache->last_transaction && cache->last_transaction->open) { 3069 panic("last transaction (%" B_PRId32 ") still open!\n", 3070 cache->last_transaction->id); 3071 } 3072 3073 cache_transaction* transaction = new(std::nothrow) cache_transaction; 3074 if (transaction == NULL) 3075 return B_NO_MEMORY; 3076 3077 transaction->id = atomic_add(&cache->next_transaction_id, 1); 3078 cache->last_transaction = transaction; 3079 3080 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id)); 3081 T(Action("start", cache, transaction)); 3082 3083 cache->transaction_hash->Insert(transaction); 3084 3085 return transaction->id; 3086 } 3087 3088 3089 status_t 3090 cache_sync_transaction(void* _cache, int32 id) 3091 { 3092 block_cache* cache = (block_cache*)_cache; 3093 bool hadBusy; 3094 3095 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id)); 3096 3097 do { 3098 TransactionLocker locker(cache); 3099 hadBusy = false; 3100 3101 BlockWriter writer(cache); 3102 TransactionTable::Iterator iterator(cache->transaction_hash); 3103 3104 while (iterator.HasNext()) { 3105 // close all earlier transactions which haven't been closed yet 3106 cache_transaction* transaction = iterator.Next(); 3107 3108 if (transaction->busy_writing_count != 0) { 3109 hadBusy = true; 3110 continue; 3111 } 3112 if (transaction->id <= id && !transaction->open) { 3113 // write back all of their remaining dirty blocks 3114 T(Action("sync", cache, transaction)); 3115 3116 bool hasLeftOvers; 3117 writer.Add(transaction, hasLeftOvers); 3118 3119 if (hasLeftOvers) { 3120 // This transaction contains blocks that a previous 3121 // transaction is trying to write back in this write run 3122 hadBusy = true; 3123 } 3124 } 3125 } 3126 3127 status_t status = writer.Write(); 3128 if (status != B_OK) 3129 return status; 3130 } while (hadBusy); 3131 3132 wait_for_notifications(cache); 3133 // make sure that all pending TRANSACTION_WRITTEN notifications 3134 // are handled after we return 3135 return B_OK; 3136 } 3137 3138 3139 status_t 3140 cache_end_transaction(void* _cache, int32 id, 3141 transaction_notification_hook hook, void* data) 3142 { 3143 block_cache* cache = (block_cache*)_cache; 3144 TransactionLocker locker(cache); 3145 3146 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id)); 3147 3148 cache_transaction* transaction = lookup_transaction(cache, id); 3149 if (transaction == NULL) { 3150 panic("cache_end_transaction(): invalid transaction ID\n"); 3151 return B_BAD_VALUE; 3152 } 3153 3154 // Write back all pending transaction blocks 3155 status_t status = write_blocks_in_previous_transaction(cache, transaction); 3156 if (status != B_OK) 3157 return status; 3158 3159 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3160 3161 if (hook != NULL 3162 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, 3163 hook, data) != B_OK) { 3164 return B_NO_MEMORY; 3165 } 3166 3167 T(Action("end", cache, transaction)); 3168 3169 // iterate through all blocks and free the unchanged original contents 3170 3171 cached_block* next; 3172 for (cached_block* block = transaction->first_block; block != NULL; 3173 block = next) { 3174 next = block->transaction_next; 3175 ASSERT(block->previous_transaction == NULL); 3176 3177 if (block->discard) { 3178 // This block has been discarded in the transaction 3179 cache->DiscardBlock(block); 3180 transaction->num_blocks--; 3181 continue; 3182 } 3183 3184 if (block->original_data != NULL) { 3185 cache->Free(block->original_data); 3186 block->original_data = NULL; 3187 } 3188 if (block->parent_data != NULL) { 3189 ASSERT(transaction->has_sub_transaction); 3190 cache->FreeBlockParentData(block); 3191 } 3192 3193 // move the block to the previous transaction list 3194 transaction->blocks.Add(block); 3195 3196 block->previous_transaction = transaction; 3197 block->transaction_next = NULL; 3198 block->transaction = NULL; 3199 } 3200 3201 transaction->open = false; 3202 return B_OK; 3203 } 3204 3205 3206 status_t 3207 cache_abort_transaction(void* _cache, int32 id) 3208 { 3209 block_cache* cache = (block_cache*)_cache; 3210 TransactionLocker locker(cache); 3211 3212 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id)); 3213 3214 cache_transaction* transaction = lookup_transaction(cache, id); 3215 if (transaction == NULL) { 3216 panic("cache_abort_transaction(): invalid transaction ID\n"); 3217 return B_BAD_VALUE; 3218 } 3219 3220 T(Abort(cache, transaction)); 3221 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3222 3223 // iterate through all blocks and restore their original contents 3224 3225 cached_block* block = transaction->first_block; 3226 cached_block* next; 3227 for (; block != NULL; block = next) { 3228 next = block->transaction_next; 3229 3230 if (block->original_data != NULL) { 3231 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of " 3232 "block %" B_PRIdOFF "\n", transaction->id, block->block_number)); 3233 memcpy(block->current_data, block->original_data, 3234 cache->block_size); 3235 cache->Free(block->original_data); 3236 block->original_data = NULL; 3237 } 3238 if (transaction->has_sub_transaction && block->parent_data != NULL) 3239 cache->FreeBlockParentData(block); 3240 3241 block->transaction_next = NULL; 3242 block->transaction = NULL; 3243 block->discard = false; 3244 if (block->previous_transaction == NULL) 3245 block->is_dirty = false; 3246 } 3247 3248 cache->transaction_hash->Remove(transaction); 3249 delete_transaction(cache, transaction); 3250 return B_OK; 3251 } 3252 3253 3254 /*! Acknowledges the current parent transaction, and starts a new transaction 3255 from its sub transaction. 3256 The new transaction also gets a new transaction ID. 3257 */ 3258 int32 3259 cache_detach_sub_transaction(void* _cache, int32 id, 3260 transaction_notification_hook hook, void* data) 3261 { 3262 block_cache* cache = (block_cache*)_cache; 3263 TransactionLocker locker(cache); 3264 3265 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id)); 3266 3267 cache_transaction* transaction = lookup_transaction(cache, id); 3268 if (transaction == NULL) { 3269 panic("cache_detach_sub_transaction(): invalid transaction ID\n"); 3270 return B_BAD_VALUE; 3271 } 3272 if (!transaction->has_sub_transaction) 3273 return B_BAD_VALUE; 3274 3275 // iterate through all blocks and free the unchanged original contents 3276 3277 status_t status = write_blocks_in_previous_transaction(cache, transaction); 3278 if (status != B_OK) 3279 return status; 3280 3281 // create a new transaction for the sub transaction 3282 cache_transaction* newTransaction = new(std::nothrow) cache_transaction; 3283 if (newTransaction == NULL) 3284 return B_NO_MEMORY; 3285 3286 newTransaction->id = atomic_add(&cache->next_transaction_id, 1); 3287 T(Detach(cache, transaction, newTransaction)); 3288 3289 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3290 3291 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook, 3292 data) != B_OK) { 3293 delete newTransaction; 3294 return B_NO_MEMORY; 3295 } 3296 3297 cached_block* last = NULL; 3298 cached_block* next; 3299 for (cached_block* block = transaction->first_block; block != NULL; 3300 block = next) { 3301 next = block->transaction_next; 3302 ASSERT(block->previous_transaction == NULL); 3303 3304 if (block->discard) { 3305 cache->DiscardBlock(block); 3306 transaction->main_num_blocks--; 3307 continue; 3308 } 3309 3310 if (block->parent_data != NULL) { 3311 // The block changed in the parent - free the original data, since 3312 // they will be replaced by what is in current. 3313 ASSERT(block->original_data != NULL); 3314 cache->Free(block->original_data); 3315 3316 if (block->parent_data != block->current_data) { 3317 // The block had been changed in both transactions 3318 block->original_data = block->parent_data; 3319 } else { 3320 // The block has only been changed in the parent 3321 block->original_data = NULL; 3322 } 3323 block->parent_data = NULL; 3324 3325 // move the block to the previous transaction list 3326 transaction->blocks.Add(block); 3327 block->previous_transaction = transaction; 3328 } 3329 3330 if (block->original_data != NULL) { 3331 // This block had been changed in the current sub transaction, 3332 // we need to move this block over to the new transaction. 3333 ASSERT(block->parent_data == NULL); 3334 3335 if (last == NULL) 3336 newTransaction->first_block = block; 3337 else 3338 last->transaction_next = block; 3339 3340 block->transaction = newTransaction; 3341 last = block; 3342 } else 3343 block->transaction = NULL; 3344 3345 block->transaction_next = NULL; 3346 } 3347 3348 newTransaction->num_blocks = transaction->sub_num_blocks; 3349 3350 transaction->open = false; 3351 transaction->has_sub_transaction = false; 3352 transaction->num_blocks = transaction->main_num_blocks; 3353 transaction->sub_num_blocks = 0; 3354 3355 cache->transaction_hash->Insert(newTransaction); 3356 cache->last_transaction = newTransaction; 3357 3358 return newTransaction->id; 3359 } 3360 3361 3362 status_t 3363 cache_abort_sub_transaction(void* _cache, int32 id) 3364 { 3365 block_cache* cache = (block_cache*)_cache; 3366 TransactionLocker locker(cache); 3367 3368 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id)); 3369 3370 cache_transaction* transaction = lookup_transaction(cache, id); 3371 if (transaction == NULL) { 3372 panic("cache_abort_sub_transaction(): invalid transaction ID\n"); 3373 return B_BAD_VALUE; 3374 } 3375 if (!transaction->has_sub_transaction) 3376 return B_BAD_VALUE; 3377 3378 T(Abort(cache, transaction)); 3379 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3380 3381 // revert all changes back to the version of the parent 3382 3383 cached_block* block = transaction->first_block; 3384 cached_block* last = NULL; 3385 cached_block* next; 3386 for (; block != NULL; block = next) { 3387 next = block->transaction_next; 3388 3389 if (block->parent_data == NULL) { 3390 // The parent transaction didn't change the block, but the sub 3391 // transaction did - we need to revert to the original data. 3392 // The block is no longer part of the transaction 3393 if (block->original_data != NULL) { 3394 // The block might not have original data if was empty 3395 memcpy(block->current_data, block->original_data, 3396 cache->block_size); 3397 } 3398 3399 if (last != NULL) 3400 last->transaction_next = next; 3401 else 3402 transaction->first_block = next; 3403 3404 block->transaction_next = NULL; 3405 block->transaction = NULL; 3406 transaction->num_blocks--; 3407 3408 if (block->previous_transaction == NULL) { 3409 cache->Free(block->original_data); 3410 block->original_data = NULL; 3411 block->is_dirty = false; 3412 3413 if (block->ref_count == 0) { 3414 // Move the block into the unused list if possible 3415 block->unused = true; 3416 cache->unused_blocks.Add(block); 3417 cache->unused_block_count++; 3418 } 3419 } 3420 } else { 3421 if (block->parent_data != block->current_data) { 3422 // The block has been changed and must be restored - the block 3423 // is still dirty and part of the transaction 3424 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): " 3425 "restored contents of block %" B_PRIdOFF "\n", 3426 transaction->id, block->block_number)); 3427 memcpy(block->current_data, block->parent_data, 3428 cache->block_size); 3429 cache->Free(block->parent_data); 3430 // The block stays dirty 3431 } 3432 block->parent_data = NULL; 3433 last = block; 3434 } 3435 3436 block->discard = false; 3437 } 3438 3439 // all subsequent changes will go into the main transaction 3440 transaction->has_sub_transaction = false; 3441 transaction->sub_num_blocks = 0; 3442 3443 return B_OK; 3444 } 3445 3446 3447 status_t 3448 cache_start_sub_transaction(void* _cache, int32 id) 3449 { 3450 block_cache* cache = (block_cache*)_cache; 3451 TransactionLocker locker(cache); 3452 3453 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id)); 3454 3455 cache_transaction* transaction = lookup_transaction(cache, id); 3456 if (transaction == NULL) { 3457 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n", 3458 id); 3459 return B_BAD_VALUE; 3460 } 3461 3462 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3463 3464 // move all changed blocks up to the parent 3465 3466 cached_block* block = transaction->first_block; 3467 cached_block* next; 3468 for (; block != NULL; block = next) { 3469 next = block->transaction_next; 3470 3471 if (block->parent_data != NULL) { 3472 // There already is an older sub transaction - we acknowledge 3473 // its changes and move its blocks up to the parent 3474 ASSERT(transaction->has_sub_transaction); 3475 cache->FreeBlockParentData(block); 3476 } 3477 if (block->discard) { 3478 // This block has been discarded in the parent transaction. 3479 // Just throw away any changes made in this transaction, so that 3480 // it can still be reverted to its original contents if needed 3481 ASSERT(block->previous_transaction == NULL); 3482 if (block->original_data != NULL) { 3483 memcpy(block->current_data, block->original_data, 3484 cache->block_size); 3485 3486 cache->Free(block->original_data); 3487 block->original_data = NULL; 3488 } 3489 continue; 3490 } 3491 3492 // we "allocate" the parent data lazily, that means, we don't copy 3493 // the data (and allocate memory for it) until we need to 3494 block->parent_data = block->current_data; 3495 } 3496 3497 // all subsequent changes will go into the sub transaction 3498 transaction->has_sub_transaction = true; 3499 transaction->main_num_blocks = transaction->num_blocks; 3500 transaction->sub_num_blocks = 0; 3501 T(Action("start-sub", cache, transaction)); 3502 3503 return B_OK; 3504 } 3505 3506 3507 /*! Adds a transaction listener that gets notified when the transaction 3508 is ended, aborted, written, or idle as specified by \a events. 3509 The listener gets automatically removed when the transaction ends. 3510 */ 3511 status_t 3512 cache_add_transaction_listener(void* _cache, int32 id, int32 events, 3513 transaction_notification_hook hook, void* data) 3514 { 3515 block_cache* cache = (block_cache*)_cache; 3516 TransactionLocker locker(cache); 3517 3518 cache_transaction* transaction = lookup_transaction(cache, id); 3519 if (transaction == NULL) 3520 return B_BAD_VALUE; 3521 3522 return add_transaction_listener(cache, transaction, events, hook, data); 3523 } 3524 3525 3526 status_t 3527 cache_remove_transaction_listener(void* _cache, int32 id, 3528 transaction_notification_hook hookFunction, void* data) 3529 { 3530 block_cache* cache = (block_cache*)_cache; 3531 TransactionLocker locker(cache); 3532 3533 cache_transaction* transaction = lookup_transaction(cache, id); 3534 if (transaction == NULL) 3535 return B_BAD_VALUE; 3536 3537 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 3538 while (iterator.HasNext()) { 3539 cache_listener* listener = iterator.Next(); 3540 if (listener->data == data && listener->hook == hookFunction) { 3541 iterator.Remove(); 3542 3543 if (listener->events_pending != 0) { 3544 MutexLocker _(sNotificationsLock); 3545 if (listener->events_pending != 0) 3546 cache->pending_notifications.Remove(listener); 3547 } 3548 delete listener; 3549 return B_OK; 3550 } 3551 } 3552 3553 return B_ENTRY_NOT_FOUND; 3554 } 3555 3556 3557 status_t 3558 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly, 3559 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData) 3560 { 3561 cached_block* block = (cached_block*)*_cookie; 3562 block_cache* cache = (block_cache*)_cache; 3563 TransactionLocker locker(cache); 3564 3565 cache_transaction* transaction = lookup_transaction(cache, id); 3566 if (transaction == NULL || !transaction->open) 3567 return B_BAD_VALUE; 3568 3569 if (block == NULL) 3570 block = transaction->first_block; 3571 else 3572 block = block->transaction_next; 3573 3574 if (transaction->has_sub_transaction) { 3575 if (mainOnly) { 3576 // find next block that the parent changed 3577 while (block != NULL && block->parent_data == NULL) 3578 block = block->transaction_next; 3579 } else { 3580 // find next non-discarded block 3581 while (block != NULL && block->discard) 3582 block = block->transaction_next; 3583 } 3584 } 3585 3586 if (block == NULL) 3587 return B_ENTRY_NOT_FOUND; 3588 3589 if (_blockNumber) 3590 *_blockNumber = block->block_number; 3591 if (_data) 3592 *_data = mainOnly ? block->parent_data : block->current_data; 3593 if (_unchangedData) 3594 *_unchangedData = block->original_data; 3595 3596 *_cookie = (addr_t)block; 3597 return B_OK; 3598 } 3599 3600 3601 int32 3602 cache_blocks_in_transaction(void* _cache, int32 id) 3603 { 3604 block_cache* cache = (block_cache*)_cache; 3605 TransactionLocker locker(cache); 3606 3607 cache_transaction* transaction = lookup_transaction(cache, id); 3608 if (transaction == NULL) 3609 return B_BAD_VALUE; 3610 3611 return transaction->num_blocks; 3612 } 3613 3614 3615 /*! Returns the number of blocks that are part of the main transaction. If this 3616 transaction does not have a sub transaction yet, this is the same value as 3617 cache_blocks_in_transaction() would return. 3618 */ 3619 int32 3620 cache_blocks_in_main_transaction(void* _cache, int32 id) 3621 { 3622 block_cache* cache = (block_cache*)_cache; 3623 TransactionLocker locker(cache); 3624 3625 cache_transaction* transaction = lookup_transaction(cache, id); 3626 if (transaction == NULL) 3627 return B_BAD_VALUE; 3628 3629 if (transaction->has_sub_transaction) 3630 return transaction->main_num_blocks; 3631 3632 return transaction->num_blocks; 3633 } 3634 3635 3636 int32 3637 cache_blocks_in_sub_transaction(void* _cache, int32 id) 3638 { 3639 block_cache* cache = (block_cache*)_cache; 3640 TransactionLocker locker(cache); 3641 3642 cache_transaction* transaction = lookup_transaction(cache, id); 3643 if (transaction == NULL) 3644 return B_BAD_VALUE; 3645 3646 return transaction->sub_num_blocks; 3647 } 3648 3649 3650 /*! Check if block is in transaction 3651 */ 3652 bool 3653 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber) 3654 { 3655 block_cache* cache = (block_cache*)_cache; 3656 TransactionLocker locker(cache); 3657 3658 cached_block* block = cache->hash->Lookup(blockNumber); 3659 3660 return (block != NULL && block->transaction != NULL 3661 && block->transaction->id == id); 3662 } 3663 3664 3665 // #pragma mark - public block cache API 3666 3667 3668 void 3669 block_cache_delete(void* _cache, bool allowWrites) 3670 { 3671 block_cache* cache = (block_cache*)_cache; 3672 3673 if (allowWrites) 3674 block_cache_sync(cache); 3675 3676 mutex_lock(&sCachesLock); 3677 sCaches.Remove(cache); 3678 mutex_unlock(&sCachesLock); 3679 3680 mutex_lock(&cache->lock); 3681 3682 // wait for all blocks to become unbusy 3683 wait_for_busy_reading_blocks(cache); 3684 wait_for_busy_writing_blocks(cache); 3685 3686 // free all blocks 3687 3688 cached_block* block = cache->hash->Clear(true); 3689 while (block != NULL) { 3690 cached_block* next = block->next; 3691 cache->FreeBlock(block); 3692 block = next; 3693 } 3694 3695 // free all transactions (they will all be aborted) 3696 3697 cache_transaction* transaction = cache->transaction_hash->Clear(true); 3698 while (transaction != NULL) { 3699 cache_transaction* next = transaction->next; 3700 delete transaction; 3701 transaction = next; 3702 } 3703 3704 delete cache; 3705 } 3706 3707 3708 void* 3709 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly) 3710 { 3711 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize, 3712 readOnly); 3713 if (cache == NULL) 3714 return NULL; 3715 3716 if (cache->Init() != B_OK) { 3717 delete cache; 3718 return NULL; 3719 } 3720 3721 MutexLocker _(sCachesLock); 3722 sCaches.Add(cache); 3723 3724 return cache; 3725 } 3726 3727 3728 status_t 3729 block_cache_sync(void* _cache) 3730 { 3731 block_cache* cache = (block_cache*)_cache; 3732 3733 // We will sync all dirty blocks to disk that have a completed 3734 // transaction or no transaction only 3735 3736 MutexLocker locker(&cache->lock); 3737 3738 BlockWriter writer(cache); 3739 BlockTable::Iterator iterator(cache->hash); 3740 3741 while (iterator.HasNext()) { 3742 cached_block* block = iterator.Next(); 3743 if (block->CanBeWritten()) 3744 writer.Add(block); 3745 } 3746 3747 status_t status = writer.Write(); 3748 3749 locker.Unlock(); 3750 3751 wait_for_notifications(cache); 3752 // make sure that all pending TRANSACTION_WRITTEN notifications 3753 // are handled after we return 3754 return status; 3755 } 3756 3757 3758 status_t 3759 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks) 3760 { 3761 block_cache* cache = (block_cache*)_cache; 3762 3763 // We will sync all dirty blocks to disk that have a completed 3764 // transaction or no transaction only 3765 3766 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 3767 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF 3768 " (max %" B_PRIdOFF ")", 3769 blockNumber, cache->max_blocks - 1); 3770 return B_BAD_VALUE; 3771 } 3772 3773 MutexLocker locker(&cache->lock); 3774 BlockWriter writer(cache); 3775 3776 for (; numBlocks > 0; numBlocks--, blockNumber++) { 3777 cached_block* block = cache->hash->Lookup(blockNumber); 3778 if (block == NULL) 3779 continue; 3780 3781 if (block->CanBeWritten()) 3782 writer.Add(block); 3783 } 3784 3785 status_t status = writer.Write(); 3786 3787 locker.Unlock(); 3788 3789 wait_for_notifications(cache); 3790 // make sure that all pending TRANSACTION_WRITTEN notifications 3791 // are handled after we return 3792 return status; 3793 } 3794 3795 3796 /*! Discards a block from the current transaction or from the cache. 3797 You have to call this function when you no longer use a block, ie. when it 3798 might be reclaimed by the file cache in order to make sure they won't 3799 interfere. 3800 */ 3801 void 3802 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks) 3803 { 3804 // TODO: this could be a nice place to issue the ATA trim command 3805 block_cache* cache = (block_cache*)_cache; 3806 TransactionLocker locker(cache); 3807 3808 BlockWriter writer(cache); 3809 3810 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3811 cached_block* block = cache->hash->Lookup(blockNumber); 3812 if (block != NULL && block->previous_transaction != NULL) 3813 writer.Add(block); 3814 } 3815 3816 writer.Write(); 3817 // TODO: this can fail, too! 3818 3819 blockNumber -= numBlocks; 3820 // reset blockNumber to its original value 3821 3822 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3823 cached_block* block = cache->hash->Lookup(blockNumber); 3824 if (block == NULL) 3825 continue; 3826 3827 ASSERT(block->previous_transaction == NULL); 3828 3829 if (block->unused) { 3830 cache->unused_blocks.Remove(block); 3831 cache->unused_block_count--; 3832 cache->RemoveBlock(block); 3833 } else { 3834 if (block->transaction != NULL && block->parent_data != NULL 3835 && block->parent_data != block->current_data) { 3836 panic("Discarded block %" B_PRIdOFF " has already been changed in this " 3837 "transaction!", blockNumber); 3838 } 3839 3840 // mark it as discarded (in the current transaction only, if any) 3841 block->discard = true; 3842 } 3843 } 3844 } 3845 3846 3847 status_t 3848 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction) 3849 { 3850 block_cache* cache = (block_cache*)_cache; 3851 MutexLocker locker(&cache->lock); 3852 3853 if (cache->read_only) { 3854 panic("tried to make block writable on a read-only cache!"); 3855 return B_ERROR; 3856 } 3857 3858 // TODO: this can be done better! 3859 void* block; 3860 status_t status = get_writable_cached_block(cache, blockNumber, 3861 transaction, false, &block); 3862 if (status == B_OK) { 3863 put_cached_block((block_cache*)_cache, blockNumber); 3864 return B_OK; 3865 } 3866 3867 return status; 3868 } 3869 3870 3871 status_t 3872 block_cache_get_writable_etc(void* _cache, off_t blockNumber, 3873 int32 transaction, void** _block) 3874 { 3875 block_cache* cache = (block_cache*)_cache; 3876 MutexLocker locker(&cache->lock); 3877 3878 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3879 blockNumber, transaction)); 3880 if (cache->read_only) 3881 panic("tried to get writable block on a read-only cache!"); 3882 3883 return get_writable_cached_block(cache, blockNumber, 3884 transaction, false, _block); 3885 } 3886 3887 3888 void* 3889 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction) 3890 { 3891 void* block; 3892 if (block_cache_get_writable_etc(_cache, blockNumber, 3893 transaction, &block) == B_OK) 3894 return block; 3895 3896 return NULL; 3897 } 3898 3899 3900 void* 3901 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction) 3902 { 3903 block_cache* cache = (block_cache*)_cache; 3904 MutexLocker locker(&cache->lock); 3905 3906 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3907 blockNumber, transaction)); 3908 if (cache->read_only) 3909 panic("tried to get empty writable block on a read-only cache!"); 3910 3911 void* block; 3912 if (get_writable_cached_block((block_cache*)_cache, blockNumber, 3913 transaction, true, &block) == B_OK) 3914 return block; 3915 3916 return NULL; 3917 } 3918 3919 3920 status_t 3921 block_cache_get_etc(void* _cache, off_t blockNumber, const void** _block) 3922 { 3923 block_cache* cache = (block_cache*)_cache; 3924 MutexLocker locker(&cache->lock); 3925 bool allocated; 3926 3927 cached_block* block; 3928 status_t status = get_cached_block(cache, blockNumber, &allocated, true, 3929 &block); 3930 if (status != B_OK) 3931 return status; 3932 3933 #if BLOCK_CACHE_DEBUG_CHANGED 3934 if (block->compare == NULL) 3935 block->compare = cache->Allocate(); 3936 if (block->compare != NULL) 3937 memcpy(block->compare, block->current_data, cache->block_size); 3938 #endif 3939 TB(Get(cache, block)); 3940 3941 *_block = block->current_data; 3942 return B_OK; 3943 } 3944 3945 3946 const void* 3947 block_cache_get(void* _cache, off_t blockNumber) 3948 { 3949 const void* block; 3950 if (block_cache_get_etc(_cache, blockNumber, &block) == B_OK) 3951 return block; 3952 3953 return NULL; 3954 } 3955 3956 3957 /*! Changes the internal status of a writable block to \a dirty. This can be 3958 helpful in case you realize you don't need to change that block anymore 3959 for whatever reason. 3960 3961 Note, you must only use this function on blocks that were acquired 3962 writable! 3963 */ 3964 status_t 3965 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty, 3966 int32 transaction) 3967 { 3968 block_cache* cache = (block_cache*)_cache; 3969 MutexLocker locker(&cache->lock); 3970 3971 cached_block* block = cache->hash->Lookup(blockNumber); 3972 if (block == NULL) 3973 return B_BAD_VALUE; 3974 if (block->is_dirty == dirty) { 3975 // there is nothing to do for us 3976 return B_OK; 3977 } 3978 3979 // TODO: not yet implemented 3980 if (dirty) 3981 panic("block_cache_set_dirty(): not yet implemented that way!\n"); 3982 3983 return B_OK; 3984 } 3985 3986 3987 void 3988 block_cache_put(void* _cache, off_t blockNumber) 3989 { 3990 block_cache* cache = (block_cache*)_cache; 3991 MutexLocker locker(&cache->lock); 3992 3993 put_cached_block(cache, blockNumber); 3994 } 3995 3996 3997 /*! Allocates blocks and schedules them to be read from disk, but does not get references to the 3998 blocks. 3999 @param blockNumber The index of the first requested block. 4000 @param _numBlocks As input, the number of blocks requested. As output, the number of 4001 blocks actually scheduled. Prefetching will stop short if the requested range includes a 4002 block that is already cached. 4003 */ 4004 status_t 4005 block_cache_prefetch(void* _cache, off_t blockNumber, size_t* _numBlocks) 4006 { 4007 #ifndef BUILDING_USERLAND_FS_SERVER 4008 TRACE(("block_cache_prefetch: fetching %" B_PRIuSIZE " blocks starting with %" B_PRIdOFF "\n", 4009 *_numBlocks, blockNumber)); 4010 4011 block_cache* cache = reinterpret_cast<block_cache*>(_cache); 4012 MutexLocker locker(&cache->lock); 4013 4014 size_t numBlocks = *_numBlocks; 4015 *_numBlocks = 0; 4016 4017 BlockPrefetcher* blockPrefetcher = new BlockPrefetcher(cache, blockNumber, numBlocks); 4018 4019 status_t status = blockPrefetcher->Allocate(); 4020 if (status != B_OK || blockPrefetcher->NumAllocated() == 0) { 4021 TRACE(("block_cache_prefetch returning early (%s): allocated %" B_PRIuSIZE "\n", 4022 strerror(status), blockPrefetcher->NumAllocated())); 4023 delete blockPrefetcher; 4024 return status; 4025 } 4026 4027 numBlocks = blockPrefetcher->NumAllocated(); 4028 4029 status = blockPrefetcher->ReadAsync(); 4030 4031 if (status == B_OK) 4032 *_numBlocks = numBlocks; 4033 4034 return status; 4035 4036 #else // BUILDING_USERLAND_FS_SERVER 4037 *_numBlocks = 0; 4038 return B_UNSUPPORTED; 4039 #endif // !BUILDING_USERLAND_FS_SERVER 4040 } 4041