1 /* 2 * Copyright 2004-2020, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <block_cache.h> 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 #include <sys/uio.h> 14 15 #include <KernelExport.h> 16 #include <fs_cache.h> 17 18 #include <condition_variable.h> 19 #include <lock.h> 20 #include <low_resource_manager.h> 21 #include <slab/Slab.h> 22 #include <tracing.h> 23 #include <util/kernel_cpp.h> 24 #include <util/DoublyLinkedList.h> 25 #include <util/AutoLock.h> 26 #include <StackOrHeapArray.h> 27 #include <vm/vm_page.h> 28 29 #include "kernel_debug_config.h" 30 31 32 // TODO: this is a naive but growing implementation to test the API: 33 // block reading/writing is not at all optimized for speed, it will 34 // just read and write single blocks. 35 // TODO: the retrieval/copy of the original data could be delayed until the 36 // new data must be written, ie. in low memory situations. 37 38 #ifdef _KERNEL_MODE 39 # define TRACE_ALWAYS(x...) dprintf(x) 40 #else 41 # define TRACE_ALWAYS(x...) printf(x) 42 #endif 43 44 //#define TRACE_BLOCK_CACHE 45 #ifdef TRACE_BLOCK_CACHE 46 # define TRACE(x) TRACE_ALWAYS(x) 47 #else 48 # define TRACE(x) ; 49 #endif 50 51 52 // This macro is used for fatal situations that are acceptable in a running 53 // system, like out of memory situations - should only panic for debugging. 54 #define FATAL(x) panic x 55 56 static const bigtime_t kTransactionIdleTime = 2000000LL; 57 // a transaction is considered idle after 2 seconds of inactivity 58 59 60 namespace { 61 62 struct cache_transaction; 63 struct cached_block; 64 struct block_cache; 65 typedef DoublyLinkedListLink<cached_block> block_link; 66 67 struct cached_block { 68 cached_block* next; // next in hash 69 cached_block* transaction_next; 70 block_link link; 71 off_t block_number; 72 void* current_data; 73 // The data that is seen by everyone using the API; this one is always 74 // present. 75 void* original_data; 76 // When in a transaction, this contains the original data from before 77 // the transaction. 78 void* parent_data; 79 // This is a lazily alloced buffer that represents the contents of the 80 // block in the parent transaction. It may point to current_data if the 81 // contents have been changed only in the parent transaction, or, if the 82 // block has been changed in the current sub transaction already, to a 83 // new block containing the contents changed in the parent transaction. 84 // If this is NULL, the block has not been changed in the parent 85 // transaction at all. 86 #if BLOCK_CACHE_DEBUG_CHANGED 87 void* compare; 88 #endif 89 int32 ref_count; 90 int32 last_accessed; 91 bool busy_reading : 1; 92 bool busy_writing : 1; 93 bool is_writing : 1; 94 // Block has been checked out for writing without transactions, and 95 // cannot be written back if set 96 bool is_dirty : 1; 97 bool unused : 1; 98 bool discard : 1; 99 bool busy_reading_waiters : 1; 100 bool busy_writing_waiters : 1; 101 cache_transaction* transaction; 102 // This is the current active transaction, if any, the block is 103 // currently in (meaning was changed as a part of it). 104 cache_transaction* previous_transaction; 105 // This is set to the last transaction that was ended containing this 106 // block. In this case, the block has not yet written back yet, and 107 // the changed data is either in current_data, or original_data -- the 108 // latter if the block is already being part of another transaction. 109 // There can only be one previous transaction, so when the active 110 // transaction ends, the changes of the previous transaction have to 111 // be written back before that transaction becomes the next previous 112 // transaction. 113 114 bool CanBeWritten() const; 115 int32 LastAccess() const 116 { return system_time() / 1000000L - last_accessed; } 117 }; 118 119 typedef DoublyLinkedList<cached_block, 120 DoublyLinkedListMemberGetLink<cached_block, 121 &cached_block::link> > block_list; 122 123 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> { 124 static inline void* operator new(size_t size); 125 static inline void operator delete(void* block); 126 127 int32 transaction_id; 128 int32 events_pending; 129 int32 events; 130 transaction_notification_hook hook; 131 void* data; 132 bool delete_after_event; 133 }; 134 135 typedef DoublyLinkedList<cache_notification> NotificationList; 136 137 static object_cache* sCacheNotificationCache; 138 139 struct cache_listener; 140 typedef DoublyLinkedListLink<cache_listener> listener_link; 141 142 struct cache_listener : cache_notification { 143 listener_link link; 144 }; 145 146 typedef DoublyLinkedList<cache_listener, 147 DoublyLinkedListMemberGetLink<cache_listener, 148 &cache_listener::link> > ListenerList; 149 150 void* 151 cache_notification::operator new(size_t size) 152 { 153 // We can't really know whether something is a cache_notification or a 154 // cache_listener at runtime, so we just use one object_cache for both 155 // with the size set to that of the (slightly larger) cache_listener. 156 // In practice, the vast majority of cache_notifications are really 157 // cache_listeners, so this is a more than acceptable trade-off. 158 ASSERT(size <= sizeof(cache_listener)); 159 return object_cache_alloc(sCacheNotificationCache, 0); 160 } 161 162 void 163 cache_notification::operator delete(void* block) 164 { 165 object_cache_free(sCacheNotificationCache, block, 0); 166 } 167 168 169 struct BlockHash { 170 typedef off_t KeyType; 171 typedef cached_block ValueType; 172 173 size_t HashKey(KeyType key) const 174 { 175 return key; 176 } 177 178 size_t Hash(ValueType* block) const 179 { 180 return block->block_number; 181 } 182 183 bool Compare(KeyType key, ValueType* block) const 184 { 185 return block->block_number == key; 186 } 187 188 ValueType*& GetLink(ValueType* value) const 189 { 190 return value->next; 191 } 192 }; 193 194 typedef BOpenHashTable<BlockHash> BlockTable; 195 196 197 struct TransactionHash { 198 typedef int32 KeyType; 199 typedef cache_transaction ValueType; 200 201 size_t HashKey(KeyType key) const 202 { 203 return key; 204 } 205 206 size_t Hash(ValueType* transaction) const; 207 bool Compare(KeyType key, ValueType* transaction) const; 208 ValueType*& GetLink(ValueType* value) const; 209 }; 210 211 typedef BOpenHashTable<TransactionHash> TransactionTable; 212 213 214 struct block_cache : DoublyLinkedListLinkImpl<block_cache> { 215 BlockTable* hash; 216 mutex lock; 217 int fd; 218 off_t max_blocks; 219 size_t block_size; 220 int32 next_transaction_id; 221 cache_transaction* last_transaction; 222 TransactionTable* transaction_hash; 223 224 object_cache* buffer_cache; 225 block_list unused_blocks; 226 uint32 unused_block_count; 227 228 ConditionVariable busy_reading_condition; 229 uint32 busy_reading_count; 230 bool busy_reading_waiters; 231 232 ConditionVariable busy_writing_condition; 233 uint32 busy_writing_count; 234 bool busy_writing_waiters; 235 236 bigtime_t last_block_write; 237 bigtime_t last_block_write_duration; 238 239 uint32 num_dirty_blocks; 240 bool read_only; 241 242 NotificationList pending_notifications; 243 ConditionVariable condition_variable; 244 245 block_cache(int fd, off_t numBlocks, size_t blockSize, 246 bool readOnly); 247 ~block_cache(); 248 249 status_t Init(); 250 251 void Free(void* buffer); 252 void* Allocate(); 253 void FreeBlock(cached_block* block); 254 cached_block* NewBlock(off_t blockNumber); 255 void FreeBlockParentData(cached_block* block); 256 257 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0); 258 void RemoveBlock(cached_block* block); 259 void DiscardBlock(cached_block* block); 260 261 private: 262 static void _LowMemoryHandler(void* data, uint32 resources, 263 int32 level); 264 cached_block* _GetUnusedBlock(); 265 }; 266 267 struct cache_transaction { 268 cache_transaction(); 269 270 cache_transaction* next; 271 int32 id; 272 int32 num_blocks; 273 int32 main_num_blocks; 274 int32 sub_num_blocks; 275 cached_block* first_block; 276 block_list blocks; 277 ListenerList listeners; 278 bool open; 279 bool has_sub_transaction; 280 bigtime_t last_used; 281 int32 busy_writing_count; 282 }; 283 284 285 class BlockWriter { 286 public: 287 BlockWriter(block_cache* cache, 288 size_t max = SIZE_MAX); 289 ~BlockWriter(); 290 291 bool Add(cached_block* block, 292 cache_transaction* transaction = NULL); 293 bool Add(cache_transaction* transaction, 294 bool& hasLeftOvers); 295 296 status_t Write(cache_transaction* transaction = NULL, 297 bool canUnlock = true); 298 299 bool DeletedTransaction() const 300 { return fDeletedTransaction; } 301 302 static status_t WriteBlock(block_cache* cache, 303 cached_block* block); 304 305 private: 306 void* _Data(cached_block* block) const; 307 status_t _WriteBlocks(cached_block** blocks, uint32 count); 308 void _BlockDone(cached_block* block, 309 cache_transaction* transaction); 310 void _UnmarkWriting(cached_block* block); 311 312 static int _CompareBlocks(const void* _blockA, 313 const void* _blockB); 314 315 private: 316 static const size_t kBufferSize = 64; 317 318 block_cache* fCache; 319 cached_block* fBuffer[kBufferSize]; 320 cached_block** fBlocks; 321 size_t fCount; 322 size_t fTotal; 323 size_t fCapacity; 324 size_t fMax; 325 status_t fStatus; 326 bool fDeletedTransaction; 327 }; 328 329 330 class TransactionLocking { 331 public: 332 inline bool Lock(block_cache* cache) 333 { 334 mutex_lock(&cache->lock); 335 336 while (cache->busy_writing_count != 0) { 337 // wait for all blocks to be written 338 ConditionVariableEntry entry; 339 cache->busy_writing_condition.Add(&entry); 340 cache->busy_writing_waiters = true; 341 342 mutex_unlock(&cache->lock); 343 344 entry.Wait(); 345 346 mutex_lock(&cache->lock); 347 } 348 349 return true; 350 } 351 352 inline void Unlock(block_cache* cache) 353 { 354 mutex_unlock(&cache->lock); 355 } 356 }; 357 358 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker; 359 360 } // namespace 361 362 363 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 364 namespace BlockTracing { 365 366 class Action : public AbstractTraceEntry { 367 public: 368 Action(block_cache* cache, cached_block* block) 369 : 370 fCache(cache), 371 fBlockNumber(block->block_number), 372 fIsDirty(block->is_dirty), 373 fHasOriginal(block->original_data != NULL), 374 fHasParent(block->parent_data != NULL), 375 fTransactionID(-1), 376 fPreviousID(-1) 377 { 378 if (block->transaction != NULL) 379 fTransactionID = block->transaction->id; 380 if (block->previous_transaction != NULL) 381 fPreviousID = block->previous_transaction->id; 382 } 383 384 virtual void AddDump(TraceOutput& out) 385 { 386 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32 387 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber, 388 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-', 389 fHasParent ? 'p' : '-', fTransactionID, fPreviousID); 390 } 391 392 virtual const char* _Action() const = 0; 393 394 private: 395 block_cache* fCache; 396 uint64 fBlockNumber; 397 bool fIsDirty; 398 bool fHasOriginal; 399 bool fHasParent; 400 int32 fTransactionID; 401 int32 fPreviousID; 402 }; 403 404 class Get : public Action { 405 public: 406 Get(block_cache* cache, cached_block* block) 407 : 408 Action(cache, block) 409 { 410 Initialized(); 411 } 412 413 virtual const char* _Action() const { return "get"; } 414 }; 415 416 class Put : public Action { 417 public: 418 Put(block_cache* cache, cached_block* block) 419 : 420 Action(cache, block) 421 { 422 Initialized(); 423 } 424 425 virtual const char* _Action() const { return "put"; } 426 }; 427 428 class Read : public Action { 429 public: 430 Read(block_cache* cache, cached_block* block) 431 : 432 Action(cache, block) 433 { 434 Initialized(); 435 } 436 437 virtual const char* _Action() const { return "read"; } 438 }; 439 440 class Write : public Action { 441 public: 442 Write(block_cache* cache, cached_block* block) 443 : 444 Action(cache, block) 445 { 446 Initialized(); 447 } 448 449 virtual const char* _Action() const { return "write"; } 450 }; 451 452 class Flush : public Action { 453 public: 454 Flush(block_cache* cache, cached_block* block, bool getUnused = false) 455 : 456 Action(cache, block), 457 fGetUnused(getUnused) 458 { 459 Initialized(); 460 } 461 462 virtual const char* _Action() const 463 { return fGetUnused ? "get-unused" : "flush"; } 464 465 private: 466 bool fGetUnused; 467 }; 468 469 class Error : public AbstractTraceEntry { 470 public: 471 Error(block_cache* cache, uint64 blockNumber, const char* message, 472 status_t status = B_OK) 473 : 474 fCache(cache), 475 fBlockNumber(blockNumber), 476 fMessage(message), 477 fStatus(status) 478 { 479 Initialized(); 480 } 481 482 virtual void AddDump(TraceOutput& out) 483 { 484 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s", 485 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "", 486 fStatus != B_OK ? strerror(fStatus) : ""); 487 } 488 489 private: 490 block_cache* fCache; 491 uint64 fBlockNumber; 492 const char* fMessage; 493 status_t fStatus; 494 }; 495 496 #if BLOCK_CACHE_BLOCK_TRACING >= 2 497 class BlockData : public AbstractTraceEntry { 498 public: 499 enum { 500 kCurrent = 0x01, 501 kParent = 0x02, 502 kOriginal = 0x04 503 }; 504 505 BlockData(block_cache* cache, cached_block* block, const char* message) 506 : 507 fCache(cache), 508 fSize(cache->block_size), 509 fBlockNumber(block->block_number), 510 fMessage(message) 511 { 512 _Allocate(fCurrent, block->current_data); 513 _Allocate(fParent, block->parent_data); 514 _Allocate(fOriginal, block->original_data); 515 516 #if KTRACE_PRINTF_STACK_TRACE 517 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 518 false); 519 #endif 520 521 Initialized(); 522 } 523 524 virtual void AddDump(TraceOutput& out) 525 { 526 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s", 527 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-', 528 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-', 529 fMessage); 530 } 531 532 #if KTRACE_PRINTF_STACK_TRACE 533 virtual void DumpStackTrace(TraceOutput& out) 534 { 535 out.PrintStackTrace(fStackTrace); 536 } 537 #endif 538 539 void DumpBlocks(uint32 which, uint32 offset, uint32 size) 540 { 541 if ((which & kCurrent) != 0) 542 DumpBlock(kCurrent, offset, size); 543 if ((which & kParent) != 0) 544 DumpBlock(kParent, offset, size); 545 if ((which & kOriginal) != 0) 546 DumpBlock(kOriginal, offset, size); 547 } 548 549 void DumpBlock(uint32 which, uint32 offset, uint32 size) 550 { 551 if (offset > fSize) { 552 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize); 553 return; 554 } 555 if (offset + size > fSize) 556 size = fSize - offset; 557 558 const char* label; 559 uint8* data; 560 561 if ((which & kCurrent) != 0) { 562 label = "current"; 563 data = fCurrent; 564 } else if ((which & kParent) != 0) { 565 label = "parent"; 566 data = fParent; 567 } else if ((which & kOriginal) != 0) { 568 label = "original"; 569 data = fOriginal; 570 } else 571 return; 572 573 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size); 574 575 static const uint32 kBlockSize = 16; 576 data += offset; 577 578 for (uint32 i = 0; i < size;) { 579 int start = i; 580 581 kprintf(" %04" B_PRIx32 " ", i); 582 for (; i < start + kBlockSize; i++) { 583 if (!(i % 4)) 584 kprintf(" "); 585 586 if (i >= size) 587 kprintf(" "); 588 else 589 kprintf("%02x", data[i]); 590 } 591 592 kprintf("\n"); 593 } 594 } 595 596 private: 597 void _Allocate(uint8*& target, void* source) 598 { 599 if (source == NULL) { 600 target = NULL; 601 return; 602 } 603 604 target = alloc_tracing_buffer_memcpy(source, fSize, false); 605 } 606 607 block_cache* fCache; 608 uint32 fSize; 609 uint64 fBlockNumber; 610 const char* fMessage; 611 uint8* fCurrent; 612 uint8* fParent; 613 uint8* fOriginal; 614 #if KTRACE_PRINTF_STACK_TRACE 615 tracing_stack_trace* fStackTrace; 616 #endif 617 }; 618 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 619 620 } // namespace BlockTracing 621 622 # define TB(x) new(std::nothrow) BlockTracing::x; 623 #else 624 # define TB(x) ; 625 #endif 626 627 #if BLOCK_CACHE_BLOCK_TRACING >= 2 628 # define TB2(x) new(std::nothrow) BlockTracing::x; 629 #else 630 # define TB2(x) ; 631 #endif 632 633 634 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 635 namespace TransactionTracing { 636 637 class Action : public AbstractTraceEntry { 638 public: 639 Action(const char* label, block_cache* cache, 640 cache_transaction* transaction) 641 : 642 fCache(cache), 643 fTransaction(transaction), 644 fID(transaction->id), 645 fSub(transaction->has_sub_transaction), 646 fNumBlocks(transaction->num_blocks), 647 fSubNumBlocks(transaction->sub_num_blocks) 648 { 649 strlcpy(fLabel, label, sizeof(fLabel)); 650 Initialized(); 651 } 652 653 virtual void AddDump(TraceOutput& out) 654 { 655 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s" 656 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction, 657 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks); 658 } 659 660 private: 661 char fLabel[12]; 662 block_cache* fCache; 663 cache_transaction* fTransaction; 664 int32 fID; 665 bool fSub; 666 int32 fNumBlocks; 667 int32 fSubNumBlocks; 668 }; 669 670 class Detach : public AbstractTraceEntry { 671 public: 672 Detach(block_cache* cache, cache_transaction* transaction, 673 cache_transaction* newTransaction) 674 : 675 fCache(cache), 676 fTransaction(transaction), 677 fID(transaction->id), 678 fSub(transaction->has_sub_transaction), 679 fNewTransaction(newTransaction), 680 fNewID(newTransaction->id) 681 { 682 Initialized(); 683 } 684 685 virtual void AddDump(TraceOutput& out) 686 { 687 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")" 688 "from transaction %p (id %" B_PRId32 ")%s", 689 fCache, fNewTransaction, fNewID, fTransaction, fID, 690 fSub ? " sub" : ""); 691 } 692 693 private: 694 block_cache* fCache; 695 cache_transaction* fTransaction; 696 int32 fID; 697 bool fSub; 698 cache_transaction* fNewTransaction; 699 int32 fNewID; 700 }; 701 702 class Abort : public AbstractTraceEntry { 703 public: 704 Abort(block_cache* cache, cache_transaction* transaction) 705 : 706 fCache(cache), 707 fTransaction(transaction), 708 fID(transaction->id), 709 fNumBlocks(0) 710 { 711 bool isSub = transaction->has_sub_transaction; 712 fNumBlocks = isSub ? transaction->sub_num_blocks 713 : transaction->num_blocks; 714 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t)); 715 if (fBlocks != NULL) { 716 cached_block* block = transaction->first_block; 717 for (int32 i = 0; block != NULL && i < fNumBlocks; 718 block = block->transaction_next) { 719 fBlocks[i++] = block->block_number; 720 } 721 } else 722 fNumBlocks = 0; 723 724 #if KTRACE_PRINTF_STACK_TRACE 725 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 726 false); 727 #endif 728 729 Initialized(); 730 } 731 732 virtual void AddDump(TraceOutput& out) 733 { 734 out.Print("block cache %p, abort transaction " 735 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID); 736 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++) 737 out.Print(" %" B_PRIdOFF, fBlocks[i]); 738 } 739 740 #if KTRACE_PRINTF_STACK_TRACE 741 virtual void DumpStackTrace(TraceOutput& out) 742 { 743 out.PrintStackTrace(fStackTrace); 744 } 745 #endif 746 747 private: 748 block_cache* fCache; 749 cache_transaction* fTransaction; 750 int32 fID; 751 off_t* fBlocks; 752 int32 fNumBlocks; 753 #if KTRACE_PRINTF_STACK_TRACE 754 tracing_stack_trace* fStackTrace; 755 #endif 756 }; 757 758 } // namespace TransactionTracing 759 760 # define T(x) new(std::nothrow) TransactionTracing::x; 761 #else 762 # define T(x) ; 763 #endif 764 765 766 static DoublyLinkedList<block_cache> sCaches; 767 static mutex sCachesLock = MUTEX_INITIALIZER("block caches"); 768 static mutex sCachesMemoryUseLock 769 = MUTEX_INITIALIZER("block caches memory use"); 770 static size_t sUsedMemory; 771 static sem_id sEventSemaphore; 772 static mutex sNotificationsLock 773 = MUTEX_INITIALIZER("block cache notifications"); 774 static thread_id sNotifierWriterThread; 775 static DoublyLinkedListLink<block_cache> sMarkCache; 776 // TODO: this only works if the link is the first entry of block_cache 777 static object_cache* sBlockCache; 778 779 780 // #pragma mark - notifications/listener 781 782 783 /*! Checks whether or not this is an event that closes a transaction. */ 784 static inline bool 785 is_closing_event(int32 event) 786 { 787 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0; 788 } 789 790 791 static inline bool 792 is_written_event(int32 event) 793 { 794 return (event & TRANSACTION_WRITTEN) != 0; 795 } 796 797 798 /*! From the specified \a notification, it will remove the lowest pending 799 event, and return that one in \a _event. 800 If there is no pending event anymore, it will return \c false. 801 */ 802 static bool 803 get_next_pending_event(cache_notification* notification, int32* _event) 804 { 805 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) { 806 int32 pending = atomic_and(¬ification->events_pending, 807 ~eventMask); 808 809 bool more = (pending & ~eventMask) != 0; 810 811 if ((pending & eventMask) != 0) { 812 *_event = eventMask; 813 return more; 814 } 815 } 816 817 return false; 818 } 819 820 821 static void 822 flush_pending_notifications(block_cache* cache) 823 { 824 ASSERT_LOCKED_MUTEX(&sCachesLock); 825 826 while (true) { 827 MutexLocker locker(sNotificationsLock); 828 829 cache_notification* notification = cache->pending_notifications.Head(); 830 if (notification == NULL) 831 return; 832 833 bool deleteAfterEvent = false; 834 int32 event = -1; 835 if (!get_next_pending_event(notification, &event)) { 836 // remove the notification if this was the last pending event 837 cache->pending_notifications.Remove(notification); 838 deleteAfterEvent = notification->delete_after_event; 839 } 840 841 if (event >= 0) { 842 // Notify listener, we need to copy the notification, as it might 843 // be removed when we unlock the list. 844 cache_notification copy = *notification; 845 locker.Unlock(); 846 847 copy.hook(copy.transaction_id, event, copy.data); 848 849 locker.Lock(); 850 } 851 852 if (deleteAfterEvent) 853 delete notification; 854 } 855 } 856 857 858 /*! Flushes all pending notifications by calling the appropriate hook 859 functions. 860 Must not be called with a cache lock held. 861 */ 862 static void 863 flush_pending_notifications() 864 { 865 MutexLocker _(sCachesLock); 866 867 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 868 while (iterator.HasNext()) { 869 block_cache* cache = iterator.Next(); 870 871 flush_pending_notifications(cache); 872 } 873 } 874 875 876 /*! Initializes the \a notification as specified. */ 877 static void 878 set_notification(cache_transaction* transaction, 879 cache_notification ¬ification, int32 events, 880 transaction_notification_hook hook, void* data) 881 { 882 notification.transaction_id = transaction != NULL ? transaction->id : -1; 883 notification.events_pending = 0; 884 notification.events = events; 885 notification.hook = hook; 886 notification.data = data; 887 notification.delete_after_event = false; 888 } 889 890 891 /*! Makes sure the notification is deleted. It either deletes it directly, 892 when possible, or marks it for deletion if the notification is pending. 893 */ 894 static void 895 delete_notification(cache_notification* notification) 896 { 897 MutexLocker locker(sNotificationsLock); 898 899 if (notification->events_pending != 0) 900 notification->delete_after_event = true; 901 else 902 delete notification; 903 } 904 905 906 /*! Adds the notification to the pending notifications list, or, if it's 907 already part of it, updates its events_pending field. 908 Also marks the notification to be deleted if \a deleteNotification 909 is \c true. 910 Triggers the notifier thread to run. 911 */ 912 static void 913 add_notification(block_cache* cache, cache_notification* notification, 914 int32 event, bool deleteNotification) 915 { 916 if (notification->hook == NULL) 917 return; 918 919 int32 pending = atomic_or(¬ification->events_pending, event); 920 if (pending == 0) { 921 // not yet part of the notification list 922 MutexLocker locker(sNotificationsLock); 923 if (deleteNotification) 924 notification->delete_after_event = true; 925 cache->pending_notifications.Add(notification); 926 } else if (deleteNotification) { 927 // we might need to delete it ourselves if we're late 928 delete_notification(notification); 929 } 930 931 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE); 932 // We're probably still holding some locks that makes rescheduling 933 // not a good idea at this point. 934 } 935 936 937 /*! Notifies all interested listeners of this transaction about the \a event. 938 If \a event is a closing event (ie. TRANSACTION_ENDED, and 939 TRANSACTION_ABORTED), all listeners except those listening to 940 TRANSACTION_WRITTEN will be removed. 941 */ 942 static void 943 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction, 944 int32 event) 945 { 946 T(Action("notify", cache, transaction)); 947 948 bool isClosing = is_closing_event(event); 949 bool isWritten = is_written_event(event); 950 951 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 952 while (iterator.HasNext()) { 953 cache_listener* listener = iterator.Next(); 954 955 bool remove = (isClosing && !is_written_event(listener->events)) 956 || (isWritten && is_written_event(listener->events)); 957 if (remove) 958 iterator.Remove(); 959 960 if ((listener->events & event) != 0) 961 add_notification(cache, listener, event, remove); 962 else if (remove) 963 delete_notification(listener); 964 } 965 } 966 967 968 /*! Removes and deletes all listeners that are still monitoring this 969 transaction. 970 */ 971 static void 972 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction) 973 { 974 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 975 while (iterator.HasNext()) { 976 cache_listener* listener = iterator.Next(); 977 iterator.Remove(); 978 979 delete_notification(listener); 980 } 981 } 982 983 984 static status_t 985 add_transaction_listener(block_cache* cache, cache_transaction* transaction, 986 int32 events, transaction_notification_hook hookFunction, void* data) 987 { 988 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 989 while (iterator.HasNext()) { 990 cache_listener* listener = iterator.Next(); 991 992 if (listener->data == data && listener->hook == hookFunction) { 993 // this listener already exists, just update it 994 listener->events |= events; 995 return B_OK; 996 } 997 } 998 999 cache_listener* listener = new cache_listener; 1000 if (listener == NULL) 1001 return B_NO_MEMORY; 1002 1003 set_notification(transaction, *listener, events, hookFunction, data); 1004 transaction->listeners.Add(listener); 1005 return B_OK; 1006 } 1007 1008 1009 // #pragma mark - private transaction 1010 1011 1012 cache_transaction::cache_transaction() 1013 { 1014 num_blocks = 0; 1015 main_num_blocks = 0; 1016 sub_num_blocks = 0; 1017 first_block = NULL; 1018 open = true; 1019 has_sub_transaction = false; 1020 last_used = system_time(); 1021 busy_writing_count = 0; 1022 } 1023 1024 1025 static void 1026 delete_transaction(block_cache* cache, cache_transaction* transaction) 1027 { 1028 if (cache->last_transaction == transaction) 1029 cache->last_transaction = NULL; 1030 1031 remove_transaction_listeners(cache, transaction); 1032 delete transaction; 1033 } 1034 1035 1036 static cache_transaction* 1037 lookup_transaction(block_cache* cache, int32 id) 1038 { 1039 return cache->transaction_hash->Lookup(id); 1040 } 1041 1042 1043 size_t TransactionHash::Hash(cache_transaction* transaction) const 1044 { 1045 return transaction->id; 1046 } 1047 1048 1049 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const 1050 { 1051 return transaction->id == key; 1052 } 1053 1054 1055 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const 1056 { 1057 return value->next; 1058 } 1059 1060 1061 /*! Writes back any changes made to blocks in \a transaction that are still 1062 part of a previous transacton. 1063 */ 1064 static status_t 1065 write_blocks_in_previous_transaction(block_cache* cache, 1066 cache_transaction* transaction) 1067 { 1068 BlockWriter writer(cache); 1069 1070 cached_block* block = transaction->first_block; 1071 for (; block != NULL; block = block->transaction_next) { 1072 if (block->previous_transaction != NULL) { 1073 // need to write back pending changes 1074 writer.Add(block); 1075 } 1076 } 1077 1078 return writer.Write(); 1079 } 1080 1081 1082 // #pragma mark - cached_block 1083 1084 1085 bool 1086 cached_block::CanBeWritten() const 1087 { 1088 return !busy_writing && !busy_reading 1089 && (previous_transaction != NULL 1090 || (transaction == NULL && is_dirty && !is_writing)); 1091 } 1092 1093 1094 // #pragma mark - BlockWriter 1095 1096 1097 BlockWriter::BlockWriter(block_cache* cache, size_t max) 1098 : 1099 fCache(cache), 1100 fBlocks(fBuffer), 1101 fCount(0), 1102 fTotal(0), 1103 fCapacity(kBufferSize), 1104 fMax(max), 1105 fStatus(B_OK), 1106 fDeletedTransaction(false) 1107 { 1108 } 1109 1110 1111 BlockWriter::~BlockWriter() 1112 { 1113 if (fBlocks != fBuffer) 1114 free(fBlocks); 1115 } 1116 1117 1118 /*! Adds the specified block to the to be written array. If no more blocks can 1119 be added, false is returned, otherwise true. 1120 */ 1121 bool 1122 BlockWriter::Add(cached_block* block, cache_transaction* transaction) 1123 { 1124 ASSERT(block->CanBeWritten()); 1125 1126 if (fTotal == fMax) 1127 return false; 1128 1129 if (fCount >= fCapacity) { 1130 // Enlarge array if necessary 1131 cached_block** newBlocks; 1132 size_t newCapacity = max_c(256, fCapacity * 2); 1133 if (fBlocks == fBuffer) 1134 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*)); 1135 else { 1136 newBlocks = (cached_block**)realloc(fBlocks, 1137 newCapacity * sizeof(void*)); 1138 } 1139 1140 if (newBlocks == NULL) { 1141 // Allocating a larger array failed - we need to write back what 1142 // we have synchronously now (this will also clear the array) 1143 Write(transaction, false); 1144 } else { 1145 if (fBlocks == fBuffer) 1146 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*)); 1147 1148 fBlocks = newBlocks; 1149 fCapacity = newCapacity; 1150 } 1151 } 1152 1153 fBlocks[fCount++] = block; 1154 fTotal++; 1155 block->busy_writing = true; 1156 fCache->busy_writing_count++; 1157 if (block->previous_transaction != NULL) 1158 block->previous_transaction->busy_writing_count++; 1159 1160 return true; 1161 } 1162 1163 1164 /*! Adds all blocks of the specified transaction to the to be written array. 1165 If no more blocks can be added, false is returned, otherwise true. 1166 */ 1167 bool 1168 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers) 1169 { 1170 ASSERT(!transaction->open); 1171 1172 if (transaction->busy_writing_count != 0) { 1173 hasLeftOvers = true; 1174 return true; 1175 } 1176 1177 hasLeftOvers = false; 1178 1179 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 1180 while (cached_block* block = blockIterator.Next()) { 1181 if (!block->CanBeWritten()) { 1182 // This block was already part of a previous transaction within this 1183 // writer 1184 hasLeftOvers = true; 1185 continue; 1186 } 1187 if (!Add(block, transaction)) 1188 return false; 1189 1190 if (DeletedTransaction()) 1191 break; 1192 } 1193 1194 return true; 1195 } 1196 1197 1198 /*! Cache must be locked when calling this method, but it will be unlocked 1199 while the blocks are written back. 1200 */ 1201 status_t 1202 BlockWriter::Write(cache_transaction* transaction, bool canUnlock) 1203 { 1204 if (fCount == 0) 1205 return B_OK; 1206 1207 if (canUnlock) 1208 mutex_unlock(&fCache->lock); 1209 1210 // Sort blocks in their on-disk order, so we can merge consecutive writes. 1211 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks); 1212 fDeletedTransaction = false; 1213 1214 bigtime_t start = system_time(); 1215 1216 for (uint32 i = 0; i < fCount; i++) { 1217 uint32 blocks = 1; 1218 for (; (i + blocks) < fCount && blocks < IOV_MAX; blocks++) { 1219 const uint32 j = i + blocks; 1220 if (fBlocks[j]->block_number != (fBlocks[j - 1]->block_number + 1)) 1221 break; 1222 } 1223 1224 status_t status = _WriteBlocks(fBlocks + i, blocks); 1225 if (status != B_OK) { 1226 // propagate to global error handling 1227 if (fStatus == B_OK) 1228 fStatus = status; 1229 1230 for (uint32 j = i; j < (i + blocks); j++) { 1231 _UnmarkWriting(fBlocks[j]); 1232 fBlocks[j] = NULL; 1233 // This block will not be marked clean 1234 } 1235 } 1236 1237 i += (blocks - 1); 1238 } 1239 1240 bigtime_t finish = system_time(); 1241 1242 if (canUnlock) 1243 mutex_lock(&fCache->lock); 1244 1245 if (fStatus == B_OK && fCount >= 8) { 1246 fCache->last_block_write = finish; 1247 fCache->last_block_write_duration = (fCache->last_block_write - start) 1248 / fCount; 1249 } 1250 1251 for (uint32 i = 0; i < fCount; i++) 1252 _BlockDone(fBlocks[i], transaction); 1253 1254 fCount = 0; 1255 return fStatus; 1256 } 1257 1258 1259 /*! Writes the specified \a block back to disk. It will always only write back 1260 the oldest change of the block if it is part of more than one transaction. 1261 It will automatically send out TRANSACTION_WRITTEN notices, as well as 1262 delete transactions when they are no longer used, and \a deleteTransaction 1263 is \c true. 1264 */ 1265 /*static*/ status_t 1266 BlockWriter::WriteBlock(block_cache* cache, cached_block* block) 1267 { 1268 BlockWriter writer(cache); 1269 1270 writer.Add(block); 1271 return writer.Write(); 1272 } 1273 1274 1275 void* 1276 BlockWriter::_Data(cached_block* block) const 1277 { 1278 return block->previous_transaction != NULL && block->original_data != NULL 1279 ? block->original_data : block->current_data; 1280 // We first need to write back changes from previous transactions 1281 } 1282 1283 1284 status_t 1285 BlockWriter::_WriteBlocks(cached_block** blocks, uint32 count) 1286 { 1287 const size_t blockSize = fCache->block_size; 1288 1289 BStackOrHeapArray<iovec, 8> vecs(count); 1290 for (uint32 i = 0; i < count; i++) { 1291 cached_block* block = blocks[i]; 1292 ASSERT(block->busy_writing); 1293 ASSERT(i == 0 || block->block_number == (blocks[i - 1]->block_number + 1)); 1294 1295 TRACE(("BlockWriter::_WriteBlocks(block %" B_PRIdOFF ", count %" B_PRIu32 ")\n", 1296 block->block_number, count)); 1297 TB(Write(fCache, block)); 1298 TB2(BlockData(fCache, block, "before write")); 1299 1300 vecs[i].iov_base = _Data(block); 1301 vecs[i].iov_len = blockSize; 1302 } 1303 1304 ssize_t written = writev_pos(fCache->fd, 1305 blocks[0]->block_number * blockSize, vecs, count); 1306 1307 if (written != (ssize_t)(blockSize * count)) { 1308 TB(Error(fCache, block->block_number, "write failed", written)); 1309 TRACE_ALWAYS("could not write back %" B_PRIu32 " blocks (start block %" B_PRIdOFF "): %s\n", 1310 count, blocks[0]->block_number, strerror(errno)); 1311 if (written < 0) 1312 return errno; 1313 1314 return B_IO_ERROR; 1315 } 1316 1317 return B_OK; 1318 } 1319 1320 1321 void 1322 BlockWriter::_BlockDone(cached_block* block, 1323 cache_transaction* transaction) 1324 { 1325 if (block == NULL) { 1326 // An error occured when trying to write this block 1327 return; 1328 } 1329 1330 if (fCache->num_dirty_blocks > 0) 1331 fCache->num_dirty_blocks--; 1332 1333 if (_Data(block) == block->current_data) 1334 block->is_dirty = false; 1335 1336 _UnmarkWriting(block); 1337 1338 cache_transaction* previous = block->previous_transaction; 1339 if (previous != NULL) { 1340 previous->blocks.Remove(block); 1341 block->previous_transaction = NULL; 1342 1343 if (block->original_data != NULL && block->transaction == NULL) { 1344 // This block is not part of a transaction, so it does not need 1345 // its original pointer anymore. 1346 fCache->Free(block->original_data); 1347 block->original_data = NULL; 1348 } 1349 1350 // Has the previous transaction been finished with that write? 1351 if (--previous->num_blocks == 0) { 1352 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id)); 1353 T(Action("written", fCache, previous)); 1354 1355 notify_transaction_listeners(fCache, previous, 1356 TRANSACTION_WRITTEN); 1357 1358 if (transaction != NULL) { 1359 // This function is called while iterating transaction_hash. We 1360 // use RemoveUnchecked so the iterator is still valid. A regular 1361 // Remove can trigger a resize of the hash table which would 1362 // result in the linked items in the table changing order. 1363 fCache->transaction_hash->RemoveUnchecked(transaction); 1364 } else 1365 fCache->transaction_hash->Remove(previous); 1366 1367 delete_transaction(fCache, previous); 1368 fDeletedTransaction = true; 1369 } 1370 } 1371 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) { 1372 // the block is no longer used 1373 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1374 block->unused = true; 1375 fCache->unused_blocks.Add(block); 1376 fCache->unused_block_count++; 1377 } 1378 1379 TB2(BlockData(fCache, block, "after write")); 1380 } 1381 1382 1383 void 1384 BlockWriter::_UnmarkWriting(cached_block* block) 1385 { 1386 block->busy_writing = false; 1387 if (block->previous_transaction != NULL) 1388 block->previous_transaction->busy_writing_count--; 1389 fCache->busy_writing_count--; 1390 1391 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0) 1392 || block->busy_writing_waiters) { 1393 fCache->busy_writing_waiters = false; 1394 block->busy_writing_waiters = false; 1395 fCache->busy_writing_condition.NotifyAll(); 1396 } 1397 } 1398 1399 1400 /*static*/ int 1401 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB) 1402 { 1403 cached_block* blockA = *(cached_block**)_blockA; 1404 cached_block* blockB = *(cached_block**)_blockB; 1405 1406 off_t diff = blockA->block_number - blockB->block_number; 1407 if (diff > 0) 1408 return 1; 1409 1410 return diff < 0 ? -1 : 0; 1411 } 1412 1413 1414 // #pragma mark - block_cache 1415 1416 1417 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize, 1418 bool readOnly) 1419 : 1420 hash(NULL), 1421 fd(_fd), 1422 max_blocks(numBlocks), 1423 block_size(blockSize), 1424 next_transaction_id(1), 1425 last_transaction(NULL), 1426 transaction_hash(NULL), 1427 buffer_cache(NULL), 1428 unused_block_count(0), 1429 busy_reading_count(0), 1430 busy_reading_waiters(false), 1431 busy_writing_count(0), 1432 busy_writing_waiters(0), 1433 last_block_write(0), 1434 last_block_write_duration(0), 1435 num_dirty_blocks(0), 1436 read_only(readOnly) 1437 { 1438 } 1439 1440 1441 /*! Should be called with the cache's lock held. */ 1442 block_cache::~block_cache() 1443 { 1444 unregister_low_resource_handler(&_LowMemoryHandler, this); 1445 1446 delete transaction_hash; 1447 delete hash; 1448 1449 delete_object_cache(buffer_cache); 1450 1451 mutex_destroy(&lock); 1452 } 1453 1454 1455 status_t 1456 block_cache::Init() 1457 { 1458 busy_reading_condition.Init(this, "cache block busy_reading"); 1459 busy_writing_condition.Init(this, "cache block busy writing"); 1460 condition_variable.Init(this, "cache transaction sync"); 1461 mutex_init(&lock, "block cache"); 1462 1463 buffer_cache = create_object_cache_etc("block cache buffers", block_size, 1464 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 1465 if (buffer_cache == NULL) 1466 return B_NO_MEMORY; 1467 1468 hash = new BlockTable(); 1469 if (hash == NULL || hash->Init(1024) != B_OK) 1470 return B_NO_MEMORY; 1471 1472 transaction_hash = new(std::nothrow) TransactionTable(); 1473 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK) 1474 return B_NO_MEMORY; 1475 1476 return register_low_resource_handler(&_LowMemoryHandler, this, 1477 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1478 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0); 1479 } 1480 1481 1482 void 1483 block_cache::Free(void* buffer) 1484 { 1485 if (buffer != NULL) 1486 object_cache_free(buffer_cache, buffer, 0); 1487 } 1488 1489 1490 void* 1491 block_cache::Allocate() 1492 { 1493 void* block = object_cache_alloc(buffer_cache, 0); 1494 if (block != NULL) 1495 return block; 1496 1497 // recycle existing before allocating a new one 1498 RemoveUnusedBlocks(100); 1499 1500 return object_cache_alloc(buffer_cache, 0); 1501 } 1502 1503 1504 void 1505 block_cache::FreeBlock(cached_block* block) 1506 { 1507 Free(block->current_data); 1508 1509 if (block->original_data != NULL || block->parent_data != NULL) { 1510 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n", 1511 block->block_number, block->original_data, block->parent_data); 1512 } 1513 1514 #if BLOCK_CACHE_DEBUG_CHANGED 1515 Free(block->compare); 1516 #endif 1517 1518 object_cache_free(sBlockCache, block, 0); 1519 } 1520 1521 1522 /*! Allocates a new block for \a blockNumber, ready for use */ 1523 cached_block* 1524 block_cache::NewBlock(off_t blockNumber) 1525 { 1526 cached_block* block = NULL; 1527 1528 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1529 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) { 1530 // recycle existing instead of allocating a new one 1531 block = _GetUnusedBlock(); 1532 } 1533 if (block == NULL) { 1534 block = (cached_block*)object_cache_alloc(sBlockCache, 0); 1535 if (block != NULL) { 1536 block->current_data = Allocate(); 1537 if (block->current_data == NULL) { 1538 object_cache_free(sBlockCache, block, 0); 1539 return NULL; 1540 } 1541 } else { 1542 TB(Error(this, blockNumber, "allocation failed")); 1543 TRACE_ALWAYS("block allocation failed, unused list is %sempty.\n", 1544 unused_blocks.IsEmpty() ? "" : "not "); 1545 1546 // allocation failed, try to reuse an unused block 1547 block = _GetUnusedBlock(); 1548 if (block == NULL) { 1549 TB(Error(this, blockNumber, "get unused failed")); 1550 FATAL(("could not allocate block!\n")); 1551 return NULL; 1552 } 1553 } 1554 } 1555 1556 block->block_number = blockNumber; 1557 block->ref_count = 0; 1558 block->last_accessed = 0; 1559 block->transaction_next = NULL; 1560 block->transaction = block->previous_transaction = NULL; 1561 block->original_data = NULL; 1562 block->parent_data = NULL; 1563 block->busy_reading = false; 1564 block->busy_writing = false; 1565 block->is_writing = false; 1566 block->is_dirty = false; 1567 block->unused = false; 1568 block->discard = false; 1569 block->busy_reading_waiters = false; 1570 block->busy_writing_waiters = false; 1571 #if BLOCK_CACHE_DEBUG_CHANGED 1572 block->compare = NULL; 1573 #endif 1574 1575 return block; 1576 } 1577 1578 1579 void 1580 block_cache::FreeBlockParentData(cached_block* block) 1581 { 1582 ASSERT(block->parent_data != NULL); 1583 if (block->parent_data != block->current_data) 1584 Free(block->parent_data); 1585 block->parent_data = NULL; 1586 } 1587 1588 1589 void 1590 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld) 1591 { 1592 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count)); 1593 1594 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1595 cached_block* block = iterator.Next();) { 1596 if (minSecondsOld >= block->LastAccess()) { 1597 // The list is sorted by last access 1598 break; 1599 } 1600 if (block->busy_reading || block->busy_writing) 1601 continue; 1602 1603 TB(Flush(this, block)); 1604 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n", 1605 block->block_number, block->last_accessed)); 1606 1607 // this can only happen if no transactions are used 1608 if (block->is_dirty && !block->discard) { 1609 if (block->busy_writing) 1610 continue; 1611 1612 BlockWriter::WriteBlock(this, block); 1613 } 1614 1615 // remove block from lists 1616 iterator.Remove(); 1617 unused_block_count--; 1618 RemoveBlock(block); 1619 1620 if (--count <= 0) 1621 break; 1622 } 1623 } 1624 1625 1626 void 1627 block_cache::RemoveBlock(cached_block* block) 1628 { 1629 hash->Remove(block); 1630 FreeBlock(block); 1631 } 1632 1633 1634 /*! Discards the block from a transaction (this method must not be called 1635 for blocks not part of a transaction). 1636 */ 1637 void 1638 block_cache::DiscardBlock(cached_block* block) 1639 { 1640 ASSERT(block->discard); 1641 ASSERT(block->previous_transaction == NULL); 1642 1643 if (block->parent_data != NULL) 1644 FreeBlockParentData(block); 1645 1646 if (block->original_data != NULL) { 1647 Free(block->original_data); 1648 block->original_data = NULL; 1649 } 1650 1651 RemoveBlock(block); 1652 } 1653 1654 1655 void 1656 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level) 1657 { 1658 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level)); 1659 1660 // free some blocks according to the low memory state 1661 // (if there is enough memory left, we don't free any) 1662 1663 block_cache* cache = (block_cache*)data; 1664 if (cache->unused_block_count <= 1) 1665 return; 1666 1667 int32 free = 0; 1668 int32 secondsOld = 0; 1669 switch (level) { 1670 case B_NO_LOW_RESOURCE: 1671 return; 1672 case B_LOW_RESOURCE_NOTE: 1673 free = cache->unused_block_count / 4; 1674 secondsOld = 120; 1675 break; 1676 case B_LOW_RESOURCE_WARNING: 1677 free = cache->unused_block_count / 2; 1678 secondsOld = 10; 1679 break; 1680 case B_LOW_RESOURCE_CRITICAL: 1681 free = cache->unused_block_count - 1; 1682 secondsOld = 0; 1683 break; 1684 } 1685 1686 MutexLocker locker(&cache->lock); 1687 1688 if (!locker.IsLocked()) { 1689 // If our block_cache were deleted, it could be that we had 1690 // been called before that deletion went through, therefore, 1691 // acquiring its lock might fail. 1692 return; 1693 } 1694 1695 #ifdef TRACE_BLOCK_CACHE 1696 uint32 oldUnused = cache->unused_block_count; 1697 #endif 1698 1699 cache->RemoveUnusedBlocks(free, secondsOld); 1700 1701 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n", 1702 cache, oldUnused, cache->unused_block_count)); 1703 } 1704 1705 1706 cached_block* 1707 block_cache::_GetUnusedBlock() 1708 { 1709 TRACE(("block_cache: get unused block\n")); 1710 1711 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1712 cached_block* block = iterator.Next();) { 1713 TB(Flush(this, block, true)); 1714 // this can only happen if no transactions are used 1715 if (block->is_dirty && !block->busy_writing && !block->discard) 1716 BlockWriter::WriteBlock(this, block); 1717 1718 // remove block from lists 1719 iterator.Remove(); 1720 unused_block_count--; 1721 hash->Remove(block); 1722 1723 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1724 block->unused = false; 1725 1726 // TODO: see if compare data is handled correctly here! 1727 #if BLOCK_CACHE_DEBUG_CHANGED 1728 if (block->compare != NULL) 1729 Free(block->compare); 1730 #endif 1731 return block; 1732 } 1733 1734 return NULL; 1735 } 1736 1737 1738 // #pragma mark - private block functions 1739 1740 1741 /*! Cache must be locked. 1742 */ 1743 static void 1744 mark_block_busy_reading(block_cache* cache, cached_block* block) 1745 { 1746 block->busy_reading = true; 1747 cache->busy_reading_count++; 1748 } 1749 1750 1751 /*! Cache must be locked. 1752 */ 1753 static void 1754 mark_block_unbusy_reading(block_cache* cache, cached_block* block) 1755 { 1756 block->busy_reading = false; 1757 cache->busy_reading_count--; 1758 1759 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0) 1760 || block->busy_reading_waiters) { 1761 cache->busy_reading_waiters = false; 1762 block->busy_reading_waiters = false; 1763 cache->busy_reading_condition.NotifyAll(); 1764 } 1765 } 1766 1767 1768 /*! Cache must be locked. 1769 */ 1770 static void 1771 wait_for_busy_reading_block(block_cache* cache, cached_block* block) 1772 { 1773 while (block->busy_reading) { 1774 // wait for at least the specified block to be read in 1775 ConditionVariableEntry entry; 1776 cache->busy_reading_condition.Add(&entry); 1777 block->busy_reading_waiters = true; 1778 1779 mutex_unlock(&cache->lock); 1780 1781 entry.Wait(); 1782 1783 mutex_lock(&cache->lock); 1784 } 1785 } 1786 1787 1788 /*! Cache must be locked. 1789 */ 1790 static void 1791 wait_for_busy_reading_blocks(block_cache* cache) 1792 { 1793 while (cache->busy_reading_count != 0) { 1794 // wait for all blocks to be read in 1795 ConditionVariableEntry entry; 1796 cache->busy_reading_condition.Add(&entry); 1797 cache->busy_reading_waiters = true; 1798 1799 mutex_unlock(&cache->lock); 1800 1801 entry.Wait(); 1802 1803 mutex_lock(&cache->lock); 1804 } 1805 } 1806 1807 1808 /*! Cache must be locked. 1809 */ 1810 static void 1811 wait_for_busy_writing_block(block_cache* cache, cached_block* block) 1812 { 1813 while (block->busy_writing) { 1814 // wait for all blocks to be written back 1815 ConditionVariableEntry entry; 1816 cache->busy_writing_condition.Add(&entry); 1817 block->busy_writing_waiters = true; 1818 1819 mutex_unlock(&cache->lock); 1820 1821 entry.Wait(); 1822 1823 mutex_lock(&cache->lock); 1824 } 1825 } 1826 1827 1828 /*! Cache must be locked. 1829 */ 1830 static void 1831 wait_for_busy_writing_blocks(block_cache* cache) 1832 { 1833 while (cache->busy_writing_count != 0) { 1834 // wait for all blocks to be written back 1835 ConditionVariableEntry entry; 1836 cache->busy_writing_condition.Add(&entry); 1837 cache->busy_writing_waiters = true; 1838 1839 mutex_unlock(&cache->lock); 1840 1841 entry.Wait(); 1842 1843 mutex_lock(&cache->lock); 1844 } 1845 } 1846 1847 1848 /*! Removes a reference from the specified \a block. If this was the last 1849 reference, the block is moved into the unused list. 1850 In low memory situations, it will also free some blocks from that list, 1851 but not necessarily the \a block it just released. 1852 */ 1853 static void 1854 put_cached_block(block_cache* cache, cached_block* block) 1855 { 1856 #if BLOCK_CACHE_DEBUG_CHANGED 1857 if (!block->is_dirty && block->compare != NULL 1858 && memcmp(block->current_data, block->compare, cache->block_size)) { 1859 TRACE_ALWAYS("new block:\n"); 1860 dump_block((const char*)block->current_data, 256, " "); 1861 TRACE_ALWAYS("unchanged block:\n"); 1862 dump_block((const char*)block->compare, 256, " "); 1863 BlockWriter::WriteBlock(cache, block); 1864 panic("block_cache: supposed to be clean block was changed!\n"); 1865 1866 cache->Free(block->compare); 1867 block->compare = NULL; 1868 } 1869 #endif 1870 TB(Put(cache, block)); 1871 1872 if (block->ref_count < 1) { 1873 panic("Invalid ref_count for block %p, cache %p\n", block, cache); 1874 return; 1875 } 1876 1877 if (--block->ref_count == 0 1878 && block->transaction == NULL && block->previous_transaction == NULL) { 1879 // This block is not used anymore, and not part of any transaction 1880 block->is_writing = false; 1881 1882 if (block->discard) { 1883 cache->RemoveBlock(block); 1884 } else { 1885 // put this block in the list of unused blocks 1886 ASSERT(!block->unused); 1887 block->unused = true; 1888 1889 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1890 cache->unused_blocks.Add(block); 1891 cache->unused_block_count++; 1892 } 1893 } 1894 } 1895 1896 1897 static void 1898 put_cached_block(block_cache* cache, off_t blockNumber) 1899 { 1900 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1901 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1902 blockNumber, cache->max_blocks - 1); 1903 } 1904 1905 cached_block* block = cache->hash->Lookup(blockNumber); 1906 if (block != NULL) 1907 put_cached_block(cache, block); 1908 else { 1909 TB(Error(cache, blockNumber, "put unknown")); 1910 } 1911 } 1912 1913 1914 /*! Retrieves the block \a blockNumber from the hash table, if it's already 1915 there, or reads it from the disk. 1916 You need to have the cache locked when calling this function. 1917 1918 \param _allocated tells you whether or not a new block has been allocated 1919 to satisfy your request. 1920 \param readBlock if \c false, the block will not be read in case it was 1921 not already in the cache. The block you retrieve may contain random 1922 data. If \c true, the cache will be temporarily unlocked while the 1923 block is read in. 1924 */ 1925 static status_t 1926 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated, 1927 bool readBlock, cached_block** _block) 1928 { 1929 ASSERT_LOCKED_MUTEX(&cache->lock); 1930 1931 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1932 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1933 blockNumber, cache->max_blocks - 1); 1934 return B_BAD_VALUE; 1935 } 1936 1937 retry: 1938 cached_block* block = cache->hash->Lookup(blockNumber); 1939 *_allocated = false; 1940 1941 if (block == NULL) { 1942 // put block into cache 1943 block = cache->NewBlock(blockNumber); 1944 if (block == NULL) 1945 return B_NO_MEMORY; 1946 1947 cache->hash->Insert(block); 1948 *_allocated = true; 1949 } else if (block->busy_reading) { 1950 // The block is currently busy_reading - wait and try again later 1951 wait_for_busy_reading_block(cache, block); 1952 goto retry; 1953 } 1954 1955 if (block->unused) { 1956 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber)); 1957 block->unused = false; 1958 cache->unused_blocks.Remove(block); 1959 cache->unused_block_count--; 1960 } 1961 1962 if (*_allocated && readBlock) { 1963 // read block into cache 1964 int32 blockSize = cache->block_size; 1965 1966 mark_block_busy_reading(cache, block); 1967 mutex_unlock(&cache->lock); 1968 1969 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize, 1970 block->current_data, blockSize); 1971 1972 mutex_lock(&cache->lock); 1973 if (bytesRead < blockSize) { 1974 cache->RemoveBlock(block); 1975 TB(Error(cache, blockNumber, "read failed", bytesRead)); 1976 1977 TRACE_ALWAYS("could not read block %" B_PRIdOFF ": bytesRead: %zd," 1978 " error: %s\n", blockNumber, bytesRead, strerror(errno)); 1979 return errno; 1980 } 1981 TB(Read(cache, block)); 1982 1983 mark_block_unbusy_reading(cache, block); 1984 } 1985 1986 block->ref_count++; 1987 block->last_accessed = system_time() / 1000000L; 1988 1989 *_block = block; 1990 return B_OK; 1991 } 1992 1993 1994 /*! Returns the writable block data for the requested blockNumber. 1995 If \a cleared is true, the block is not read from disk; an empty block 1996 is returned. 1997 1998 This is the only method to insert a block into a transaction. It makes 1999 sure that the previous block contents are preserved in that case. 2000 */ 2001 static status_t 2002 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base, 2003 off_t length, int32 transactionID, bool cleared, void** _block) 2004 { 2005 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 2006 blockNumber, transactionID)); 2007 2008 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 2009 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 2010 blockNumber, cache->max_blocks - 1); 2011 return B_BAD_VALUE; 2012 } 2013 2014 bool allocated; 2015 cached_block* block; 2016 status_t status = get_cached_block(cache, blockNumber, &allocated, 2017 !cleared, &block); 2018 if (status != B_OK) 2019 return status; 2020 2021 if (block->busy_writing) 2022 wait_for_busy_writing_block(cache, block); 2023 2024 block->discard = false; 2025 2026 // if there is no transaction support, we just return the current block 2027 if (transactionID == -1) { 2028 if (cleared) { 2029 mark_block_busy_reading(cache, block); 2030 mutex_unlock(&cache->lock); 2031 2032 memset(block->current_data, 0, cache->block_size); 2033 2034 mutex_lock(&cache->lock); 2035 mark_block_unbusy_reading(cache, block); 2036 } 2037 2038 block->is_writing = true; 2039 2040 if (!block->is_dirty) { 2041 cache->num_dirty_blocks++; 2042 block->is_dirty = true; 2043 // mark the block as dirty 2044 } 2045 2046 TB(Get(cache, block)); 2047 *_block = block->current_data; 2048 return B_OK; 2049 } 2050 2051 cache_transaction* transaction = block->transaction; 2052 2053 if (transaction != NULL && transaction->id != transactionID) { 2054 // TODO: we have to wait here until the other transaction is done. 2055 // Maybe we should even panic, since we can't prevent any deadlocks. 2056 panic("get_writable_cached_block(): asked to get busy writable block " 2057 "(transaction %" B_PRId32 ")\n", block->transaction->id); 2058 put_cached_block(cache, block); 2059 return B_BAD_VALUE; 2060 } 2061 if (transaction == NULL && transactionID != -1) { 2062 // get new transaction 2063 transaction = lookup_transaction(cache, transactionID); 2064 if (transaction == NULL) { 2065 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n", 2066 transactionID); 2067 put_cached_block(cache, block); 2068 return B_BAD_VALUE; 2069 } 2070 if (!transaction->open) { 2071 panic("get_writable_cached_block(): transaction already done!\n"); 2072 put_cached_block(cache, block); 2073 return B_BAD_VALUE; 2074 } 2075 2076 block->transaction = transaction; 2077 2078 // attach the block to the transaction block list 2079 block->transaction_next = transaction->first_block; 2080 transaction->first_block = block; 2081 transaction->num_blocks++; 2082 } 2083 if (transaction != NULL) 2084 transaction->last_used = system_time(); 2085 2086 bool wasUnchanged = block->original_data == NULL 2087 || block->previous_transaction != NULL; 2088 2089 if (!(allocated && cleared) && block->original_data == NULL) { 2090 // we already have data, so we need to preserve it 2091 block->original_data = cache->Allocate(); 2092 if (block->original_data == NULL) { 2093 TB(Error(cache, blockNumber, "allocate original failed")); 2094 FATAL(("could not allocate original_data\n")); 2095 put_cached_block(cache, block); 2096 return B_NO_MEMORY; 2097 } 2098 2099 mark_block_busy_reading(cache, block); 2100 mutex_unlock(&cache->lock); 2101 2102 memcpy(block->original_data, block->current_data, cache->block_size); 2103 2104 mutex_lock(&cache->lock); 2105 mark_block_unbusy_reading(cache, block); 2106 } 2107 if (block->parent_data == block->current_data) { 2108 // remember any previous contents for the parent transaction 2109 block->parent_data = cache->Allocate(); 2110 if (block->parent_data == NULL) { 2111 // TODO: maybe we should just continue the current transaction in 2112 // this case... 2113 TB(Error(cache, blockNumber, "allocate parent failed")); 2114 FATAL(("could not allocate parent\n")); 2115 put_cached_block(cache, block); 2116 return B_NO_MEMORY; 2117 } 2118 2119 mark_block_busy_reading(cache, block); 2120 mutex_unlock(&cache->lock); 2121 2122 memcpy(block->parent_data, block->current_data, cache->block_size); 2123 2124 mutex_lock(&cache->lock); 2125 mark_block_unbusy_reading(cache, block); 2126 2127 transaction->sub_num_blocks++; 2128 } else if (transaction != NULL && transaction->has_sub_transaction 2129 && block->parent_data == NULL && wasUnchanged) 2130 transaction->sub_num_blocks++; 2131 2132 if (cleared) { 2133 mark_block_busy_reading(cache, block); 2134 mutex_unlock(&cache->lock); 2135 2136 memset(block->current_data, 0, cache->block_size); 2137 2138 mutex_lock(&cache->lock); 2139 mark_block_unbusy_reading(cache, block); 2140 } 2141 2142 block->is_dirty = true; 2143 TB(Get(cache, block)); 2144 TB2(BlockData(cache, block, "get writable")); 2145 2146 *_block = block->current_data; 2147 return B_OK; 2148 } 2149 2150 2151 #if DEBUG_BLOCK_CACHE 2152 2153 2154 static void 2155 dump_block(cached_block* block) 2156 { 2157 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32 2158 " %c%c%c%c%c%c %08lx %08lx\n", 2159 (addr_t)block, block->block_number, 2160 (addr_t)block->current_data, (addr_t)block->original_data, 2161 (addr_t)block->parent_data, block->ref_count, block->LastAccess(), 2162 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-', 2163 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-', 2164 block->unused ? 'U' : '-', block->discard ? 'D' : '-', 2165 (addr_t)block->transaction, 2166 (addr_t)block->previous_transaction); 2167 } 2168 2169 2170 static void 2171 dump_block_long(cached_block* block) 2172 { 2173 kprintf("BLOCK %p\n", block); 2174 kprintf(" current data: %p\n", block->current_data); 2175 kprintf(" original data: %p\n", block->original_data); 2176 kprintf(" parent data: %p\n", block->parent_data); 2177 #if BLOCK_CACHE_DEBUG_CHANGED 2178 kprintf(" compare data: %p\n", block->compare); 2179 #endif 2180 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count); 2181 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess()); 2182 kprintf(" flags: "); 2183 if (block->busy_reading) 2184 kprintf(" busy_reading"); 2185 if (block->busy_writing) 2186 kprintf(" busy_writing"); 2187 if (block->is_writing) 2188 kprintf(" is-writing"); 2189 if (block->is_dirty) 2190 kprintf(" is-dirty"); 2191 if (block->unused) 2192 kprintf(" unused"); 2193 if (block->discard) 2194 kprintf(" discard"); 2195 kprintf("\n"); 2196 if (block->transaction != NULL) { 2197 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction, 2198 block->transaction->id); 2199 if (block->transaction_next != NULL) { 2200 kprintf(" next in transaction: %" B_PRIdOFF "\n", 2201 block->transaction_next->block_number); 2202 } 2203 } 2204 if (block->previous_transaction != NULL) { 2205 kprintf(" previous transaction: %p (%" B_PRId32 ")\n", 2206 block->previous_transaction, 2207 block->previous_transaction->id); 2208 } 2209 2210 set_debug_variable("_current", (addr_t)block->current_data); 2211 set_debug_variable("_original", (addr_t)block->original_data); 2212 set_debug_variable("_parent", (addr_t)block->parent_data); 2213 } 2214 2215 2216 static int 2217 dump_cached_block(int argc, char** argv) 2218 { 2219 if (argc != 2) { 2220 kprintf("usage: %s <block-address>\n", argv[0]); 2221 return 0; 2222 } 2223 2224 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1])); 2225 return 0; 2226 } 2227 2228 2229 static int 2230 dump_cache(int argc, char** argv) 2231 { 2232 bool showTransactions = false; 2233 bool showBlocks = false; 2234 int32 i = 1; 2235 while (argv[i] != NULL && argv[i][0] == '-') { 2236 for (char* arg = &argv[i][1]; arg[0]; arg++) { 2237 switch (arg[0]) { 2238 case 'b': 2239 showBlocks = true; 2240 break; 2241 case 't': 2242 showTransactions = true; 2243 break; 2244 default: 2245 print_debugger_command_usage(argv[0]); 2246 return 0; 2247 } 2248 } 2249 i++; 2250 } 2251 2252 if (i >= argc) { 2253 print_debugger_command_usage(argv[0]); 2254 return 0; 2255 } 2256 2257 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]); 2258 if (cache == NULL) { 2259 kprintf("invalid cache address\n"); 2260 return 0; 2261 } 2262 2263 off_t blockNumber = -1; 2264 if (i + 1 < argc) { 2265 blockNumber = parse_expression(argv[i + 1]); 2266 cached_block* block = cache->hash->Lookup(blockNumber); 2267 if (block != NULL) 2268 dump_block_long(block); 2269 else 2270 kprintf("block %" B_PRIdOFF " not found\n", blockNumber); 2271 return 0; 2272 } 2273 2274 kprintf("BLOCK CACHE: %p\n", cache); 2275 2276 kprintf(" fd: %d\n", cache->fd); 2277 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks); 2278 kprintf(" block_size: %zu\n", cache->block_size); 2279 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id); 2280 kprintf(" buffer_cache: %p\n", cache->buffer_cache); 2281 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count, 2282 cache->busy_reading_waiters ? "has" : "no"); 2283 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count, 2284 cache->busy_writing_waiters ? "has" : "no"); 2285 2286 if (!cache->pending_notifications.IsEmpty()) { 2287 kprintf(" pending notifications:\n"); 2288 2289 NotificationList::Iterator iterator 2290 = cache->pending_notifications.GetIterator(); 2291 while (iterator.HasNext()) { 2292 cache_notification* notification = iterator.Next(); 2293 2294 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification, 2295 notification->events_pending, notification->hook, 2296 notification->data); 2297 } 2298 } 2299 2300 if (showTransactions) { 2301 kprintf(" transactions:\n"); 2302 kprintf("address id state blocks main sub\n"); 2303 2304 TransactionTable::Iterator iterator(cache->transaction_hash); 2305 2306 while (iterator.HasNext()) { 2307 cache_transaction* transaction = iterator.Next(); 2308 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" 2309 B_PRId32 "\n", transaction, transaction->id, 2310 transaction->open ? "open" : "closed", 2311 transaction->num_blocks, transaction->main_num_blocks, 2312 transaction->sub_num_blocks); 2313 } 2314 } 2315 2316 if (showBlocks) { 2317 kprintf(" blocks:\n"); 2318 kprintf("address block no. current original parent refs access " 2319 "flags transact prev. trans\n"); 2320 } 2321 2322 uint32 referenced = 0; 2323 uint32 count = 0; 2324 uint32 dirty = 0; 2325 uint32 discarded = 0; 2326 BlockTable::Iterator iterator(cache->hash); 2327 while (iterator.HasNext()) { 2328 cached_block* block = iterator.Next(); 2329 if (showBlocks) 2330 dump_block(block); 2331 2332 if (block->is_dirty) 2333 dirty++; 2334 if (block->discard) 2335 discarded++; 2336 if (block->ref_count) 2337 referenced++; 2338 count++; 2339 } 2340 2341 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 2342 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 2343 " in unused.\n", 2344 count, dirty, discarded, referenced, cache->busy_reading_count, 2345 cache->unused_block_count); 2346 return 0; 2347 } 2348 2349 2350 static int 2351 dump_transaction(int argc, char** argv) 2352 { 2353 bool showBlocks = false; 2354 int i = 1; 2355 if (argc > 1 && !strcmp(argv[1], "-b")) { 2356 showBlocks = true; 2357 i++; 2358 } 2359 2360 if (argc - i < 1 || argc - i > 2) { 2361 print_debugger_command_usage(argv[0]); 2362 return 0; 2363 } 2364 2365 cache_transaction* transaction = NULL; 2366 2367 if (argc - i == 1) { 2368 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]); 2369 } else { 2370 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]); 2371 int32 id = parse_expression(argv[i + 1]); 2372 transaction = lookup_transaction(cache, id); 2373 if (transaction == NULL) { 2374 kprintf("No transaction with ID %" B_PRId32 " found.\n", id); 2375 return 0; 2376 } 2377 } 2378 2379 kprintf("TRANSACTION %p\n", transaction); 2380 2381 kprintf(" id: %" B_PRId32 "\n", transaction->id); 2382 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks); 2383 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks); 2384 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks); 2385 kprintf(" has sub: %d\n", transaction->has_sub_transaction); 2386 kprintf(" state: %s\n", transaction->open ? "open" : "closed"); 2387 kprintf(" idle: %" B_PRId64 " secs\n", 2388 (system_time() - transaction->last_used) / 1000000); 2389 2390 kprintf(" listeners:\n"); 2391 2392 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 2393 while (iterator.HasNext()) { 2394 cache_listener* listener = iterator.Next(); 2395 2396 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending, 2397 listener->hook, listener->data); 2398 } 2399 2400 if (!showBlocks) 2401 return 0; 2402 2403 kprintf(" blocks:\n"); 2404 kprintf("address block no. current original parent refs access " 2405 "flags transact prev. trans\n"); 2406 2407 cached_block* block = transaction->first_block; 2408 while (block != NULL) { 2409 dump_block(block); 2410 block = block->transaction_next; 2411 } 2412 2413 kprintf("--\n"); 2414 2415 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 2416 while (blockIterator.HasNext()) { 2417 block = blockIterator.Next(); 2418 dump_block(block); 2419 } 2420 2421 return 0; 2422 } 2423 2424 2425 static int 2426 dump_caches(int argc, char** argv) 2427 { 2428 kprintf("Block caches:\n"); 2429 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator(); 2430 while (i.HasNext()) { 2431 block_cache* cache = i.Next(); 2432 if (cache == (block_cache*)&sMarkCache) 2433 continue; 2434 2435 kprintf(" %p\n", cache); 2436 } 2437 2438 return 0; 2439 } 2440 2441 2442 #if BLOCK_CACHE_BLOCK_TRACING >= 2 2443 static int 2444 dump_block_data(int argc, char** argv) 2445 { 2446 using namespace BlockTracing; 2447 2448 // Determine which blocks to show 2449 2450 bool printStackTrace = true; 2451 uint32 which = 0; 2452 int32 i = 1; 2453 while (i < argc && argv[i][0] == '-') { 2454 char* arg = &argv[i][1]; 2455 while (arg[0]) { 2456 switch (arg[0]) { 2457 case 'c': 2458 which |= BlockData::kCurrent; 2459 break; 2460 case 'p': 2461 which |= BlockData::kParent; 2462 break; 2463 case 'o': 2464 which |= BlockData::kOriginal; 2465 break; 2466 2467 default: 2468 kprintf("invalid block specifier (only o/c/p are " 2469 "allowed).\n"); 2470 return 0; 2471 } 2472 arg++; 2473 } 2474 2475 i++; 2476 } 2477 if (which == 0) 2478 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal; 2479 2480 if (i == argc) { 2481 print_debugger_command_usage(argv[0]); 2482 return 0; 2483 } 2484 2485 // Get the range of blocks to print 2486 2487 int64 from = parse_expression(argv[i]); 2488 int64 to = from; 2489 if (argc > i + 1) 2490 to = parse_expression(argv[i + 1]); 2491 if (to < from) 2492 to = from; 2493 2494 uint32 offset = 0; 2495 uint32 size = LONG_MAX; 2496 if (argc > i + 2) 2497 offset = parse_expression(argv[i + 2]); 2498 if (argc > i + 3) 2499 size = parse_expression(argv[i + 3]); 2500 2501 TraceEntryIterator iterator; 2502 iterator.MoveTo(from - 1); 2503 2504 static char sBuffer[1024]; 2505 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID); 2506 2507 while (TraceEntry* entry = iterator.Next()) { 2508 int32 index = iterator.Index(); 2509 if (index > to) 2510 break; 2511 2512 Action* action = dynamic_cast<Action*>(entry); 2513 if (action != NULL) { 2514 out.Clear(); 2515 out.DumpEntry(action); 2516 continue; 2517 } 2518 2519 BlockData* blockData = dynamic_cast<BlockData*>(entry); 2520 if (blockData == NULL) 2521 continue; 2522 2523 out.Clear(); 2524 2525 const char* dump = out.DumpEntry(entry); 2526 int length = strlen(dump); 2527 if (length > 0 && dump[length - 1] == '\n') 2528 length--; 2529 2530 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump); 2531 2532 if (printStackTrace) { 2533 out.Clear(); 2534 entry->DumpStackTrace(out); 2535 if (out.Size() > 0) 2536 kputs(out.Buffer()); 2537 } 2538 2539 blockData->DumpBlocks(which, offset, size); 2540 } 2541 2542 return 0; 2543 } 2544 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 2545 2546 2547 #endif // DEBUG_BLOCK_CACHE 2548 2549 2550 /*! Traverses through the block_cache list, and returns one cache after the 2551 other. The cache returned is automatically locked when you get it, and 2552 unlocked with the next call to this function. Ignores caches that are in 2553 deletion state. 2554 Returns \c NULL when the end of the list is reached. 2555 */ 2556 static block_cache* 2557 get_next_locked_block_cache(block_cache* last) 2558 { 2559 MutexLocker _(sCachesLock); 2560 2561 block_cache* cache; 2562 if (last != NULL) { 2563 mutex_unlock(&last->lock); 2564 2565 cache = sCaches.GetNext((block_cache*)&sMarkCache); 2566 sCaches.Remove((block_cache*)&sMarkCache); 2567 } else 2568 cache = sCaches.Head(); 2569 2570 if (cache != NULL) { 2571 mutex_lock(&cache->lock); 2572 sCaches.InsertBefore(sCaches.GetNext(cache), (block_cache*)&sMarkCache); 2573 } 2574 2575 return cache; 2576 } 2577 2578 2579 /*! Background thread that continuously checks for pending notifications of 2580 all caches. 2581 Every two seconds, it will also write back up to 64 blocks per cache. 2582 */ 2583 static status_t 2584 block_notifier_and_writer(void* /*data*/) 2585 { 2586 const bigtime_t kDefaultTimeout = 2000000LL; 2587 bigtime_t timeout = kDefaultTimeout; 2588 2589 while (true) { 2590 bigtime_t start = system_time(); 2591 2592 status_t status = acquire_sem_etc(sEventSemaphore, 1, 2593 B_RELATIVE_TIMEOUT, timeout); 2594 if (status == B_OK) { 2595 flush_pending_notifications(); 2596 timeout -= system_time() - start; 2597 continue; 2598 } 2599 2600 // Write 64 blocks of each block_cache roughly every 2 seconds, 2601 // potentially more or less depending on congestion and drive speeds 2602 // (usually much less.) We do not want to queue everything at once 2603 // because a future transaction might then get held up waiting for 2604 // a specific block to be written. 2605 timeout = kDefaultTimeout; 2606 size_t usedMemory; 2607 object_cache_get_usage(sBlockCache, &usedMemory); 2608 2609 block_cache* cache = NULL; 2610 while ((cache = get_next_locked_block_cache(cache)) != NULL) { 2611 // Give some breathing room: wait 2x the length of the potential 2612 // maximum block count-sized write between writes, and also skip 2613 // if there are more than 16 blocks currently being written. 2614 const bigtime_t next = cache->last_block_write 2615 + cache->last_block_write_duration * 2 * 64; 2616 if (cache->busy_writing_count > 16 || system_time() < next) { 2617 if (cache->last_block_write_duration > 0) { 2618 timeout = min_c(timeout, 2619 cache->last_block_write_duration * 2 * 64); 2620 } 2621 continue; 2622 } 2623 2624 BlockWriter writer(cache, 64); 2625 bool hasMoreBlocks = false; 2626 2627 size_t cacheUsedMemory; 2628 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory); 2629 usedMemory += cacheUsedMemory; 2630 2631 if (cache->num_dirty_blocks) { 2632 // This cache is not using transactions, we'll scan the blocks 2633 // directly 2634 BlockTable::Iterator iterator(cache->hash); 2635 2636 while (iterator.HasNext()) { 2637 cached_block* block = iterator.Next(); 2638 if (block->CanBeWritten() && !writer.Add(block)) { 2639 hasMoreBlocks = true; 2640 break; 2641 } 2642 } 2643 } else { 2644 TransactionTable::Iterator iterator(cache->transaction_hash); 2645 2646 while (iterator.HasNext()) { 2647 cache_transaction* transaction = iterator.Next(); 2648 if (transaction->open) { 2649 if (system_time() > transaction->last_used 2650 + kTransactionIdleTime) { 2651 // Transaction is open but idle 2652 notify_transaction_listeners(cache, transaction, 2653 TRANSACTION_IDLE); 2654 } 2655 continue; 2656 } 2657 2658 bool hasLeftOvers; 2659 // we ignore this one 2660 if (!writer.Add(transaction, hasLeftOvers)) { 2661 hasMoreBlocks = true; 2662 break; 2663 } 2664 } 2665 } 2666 2667 writer.Write(); 2668 2669 if (hasMoreBlocks && cache->last_block_write_duration > 0) { 2670 // There are probably still more blocks that we could write, so 2671 // see if we can decrease the timeout. 2672 timeout = min_c(timeout, 2673 cache->last_block_write_duration * 2 * 64); 2674 } 2675 2676 if ((block_cache_used_memory() / B_PAGE_SIZE) 2677 > vm_page_num_pages() / 2) { 2678 // Try to reduce memory usage to half of the available 2679 // RAM at maximum 2680 cache->RemoveUnusedBlocks(1000, 10); 2681 } 2682 } 2683 2684 MutexLocker _(sCachesMemoryUseLock); 2685 sUsedMemory = usedMemory; 2686 } 2687 2688 // never can get here 2689 return B_OK; 2690 } 2691 2692 2693 /*! Notify function for wait_for_notifications(). */ 2694 static void 2695 notify_sync(int32 transactionID, int32 event, void* _cache) 2696 { 2697 block_cache* cache = (block_cache*)_cache; 2698 2699 cache->condition_variable.NotifyOne(); 2700 } 2701 2702 2703 /*! Must be called with the sCachesLock held. */ 2704 static bool 2705 is_valid_cache(block_cache* cache) 2706 { 2707 ASSERT_LOCKED_MUTEX(&sCachesLock); 2708 2709 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 2710 while (iterator.HasNext()) { 2711 if (cache == iterator.Next()) 2712 return true; 2713 } 2714 2715 return false; 2716 } 2717 2718 2719 /*! Waits until all pending notifications are carried out. 2720 Safe to be called from the block writer/notifier thread. 2721 You must not hold the \a cache lock when calling this function. 2722 */ 2723 static void 2724 wait_for_notifications(block_cache* cache) 2725 { 2726 MutexLocker locker(sCachesLock); 2727 2728 if (find_thread(NULL) == sNotifierWriterThread) { 2729 // We're the notifier thread, don't wait, but flush all pending 2730 // notifications directly. 2731 if (is_valid_cache(cache)) 2732 flush_pending_notifications(cache); 2733 return; 2734 } 2735 2736 // add sync notification 2737 cache_notification notification; 2738 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync, 2739 cache); 2740 2741 ConditionVariableEntry entry; 2742 cache->condition_variable.Add(&entry); 2743 2744 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false); 2745 locker.Unlock(); 2746 2747 // wait for notification hook to be called 2748 entry.Wait(); 2749 } 2750 2751 2752 status_t 2753 block_cache_init(void) 2754 { 2755 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block), 2756 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 2757 if (sBlockCache == NULL) 2758 return B_NO_MEMORY; 2759 2760 sCacheNotificationCache = create_object_cache("cache notifications", 2761 sizeof(cache_listener), 8, NULL, NULL, NULL); 2762 if (sCacheNotificationCache == NULL) 2763 return B_NO_MEMORY; 2764 2765 new (&sCaches) DoublyLinkedList<block_cache>; 2766 // manually call constructor 2767 2768 sEventSemaphore = create_sem(0, "block cache event"); 2769 if (sEventSemaphore < B_OK) 2770 return sEventSemaphore; 2771 2772 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer, 2773 "block notifier/writer", B_LOW_PRIORITY, NULL); 2774 if (sNotifierWriterThread >= B_OK) 2775 resume_thread(sNotifierWriterThread); 2776 2777 #if DEBUG_BLOCK_CACHE 2778 add_debugger_command_etc("block_caches", &dump_caches, 2779 "dumps all block caches", "\n", 0); 2780 add_debugger_command_etc("block_cache", &dump_cache, 2781 "dumps a specific block cache", 2782 "[-bt] <cache-address> [block-number]\n" 2783 " -t lists the transactions\n" 2784 " -b lists all blocks\n", 0); 2785 add_debugger_command("cached_block", &dump_cached_block, 2786 "dumps the specified cached block"); 2787 add_debugger_command_etc("transaction", &dump_transaction, 2788 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n" 2789 "Either use a block cache pointer and an ID or a pointer to the transaction.\n" 2790 " -b lists all blocks that are part of this transaction\n", 0); 2791 # if BLOCK_CACHE_BLOCK_TRACING >= 2 2792 add_debugger_command_etc("block_cache_data", &dump_block_data, 2793 "dumps the data blocks logged for the actions", 2794 "[-cpo] <from> [<to> [<offset> [<size>]]]\n" 2795 "If no data specifier is used, all blocks are shown by default.\n" 2796 " -c the current data is shown, if available.\n" 2797 " -p the parent data is shown, if available.\n" 2798 " -o the original data is shown, if available.\n" 2799 " <from> first index of tracing entries to show.\n" 2800 " <to> if given, the last entry. If not, only <from> is shown.\n" 2801 " <offset> the offset of the block data.\n" 2802 " <from> the size of the block data that is dumped\n", 0); 2803 # endif 2804 #endif // DEBUG_BLOCK_CACHE 2805 2806 return B_OK; 2807 } 2808 2809 2810 size_t 2811 block_cache_used_memory(void) 2812 { 2813 MutexLocker _(sCachesMemoryUseLock); 2814 return sUsedMemory; 2815 } 2816 2817 2818 // #pragma mark - public transaction API 2819 2820 2821 int32 2822 cache_start_transaction(void* _cache) 2823 { 2824 block_cache* cache = (block_cache*)_cache; 2825 TransactionLocker locker(cache); 2826 2827 if (cache->last_transaction && cache->last_transaction->open) { 2828 panic("last transaction (%" B_PRId32 ") still open!\n", 2829 cache->last_transaction->id); 2830 } 2831 2832 cache_transaction* transaction = new(std::nothrow) cache_transaction; 2833 if (transaction == NULL) 2834 return B_NO_MEMORY; 2835 2836 transaction->id = atomic_add(&cache->next_transaction_id, 1); 2837 cache->last_transaction = transaction; 2838 2839 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id)); 2840 T(Action("start", cache, transaction)); 2841 2842 cache->transaction_hash->Insert(transaction); 2843 2844 return transaction->id; 2845 } 2846 2847 2848 status_t 2849 cache_sync_transaction(void* _cache, int32 id) 2850 { 2851 block_cache* cache = (block_cache*)_cache; 2852 bool hadBusy; 2853 2854 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id)); 2855 2856 do { 2857 TransactionLocker locker(cache); 2858 hadBusy = false; 2859 2860 BlockWriter writer(cache); 2861 TransactionTable::Iterator iterator(cache->transaction_hash); 2862 2863 while (iterator.HasNext()) { 2864 // close all earlier transactions which haven't been closed yet 2865 cache_transaction* transaction = iterator.Next(); 2866 2867 if (transaction->busy_writing_count != 0) { 2868 hadBusy = true; 2869 continue; 2870 } 2871 if (transaction->id <= id && !transaction->open) { 2872 // write back all of their remaining dirty blocks 2873 T(Action("sync", cache, transaction)); 2874 2875 bool hasLeftOvers; 2876 writer.Add(transaction, hasLeftOvers); 2877 2878 if (hasLeftOvers) { 2879 // This transaction contains blocks that a previous 2880 // transaction is trying to write back in this write run 2881 hadBusy = true; 2882 } 2883 } 2884 } 2885 2886 status_t status = writer.Write(); 2887 if (status != B_OK) 2888 return status; 2889 } while (hadBusy); 2890 2891 wait_for_notifications(cache); 2892 // make sure that all pending TRANSACTION_WRITTEN notifications 2893 // are handled after we return 2894 return B_OK; 2895 } 2896 2897 2898 status_t 2899 cache_end_transaction(void* _cache, int32 id, 2900 transaction_notification_hook hook, void* data) 2901 { 2902 block_cache* cache = (block_cache*)_cache; 2903 TransactionLocker locker(cache); 2904 2905 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id)); 2906 2907 cache_transaction* transaction = lookup_transaction(cache, id); 2908 if (transaction == NULL) { 2909 panic("cache_end_transaction(): invalid transaction ID\n"); 2910 return B_BAD_VALUE; 2911 } 2912 2913 // Write back all pending transaction blocks 2914 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2915 if (status != B_OK) 2916 return status; 2917 2918 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2919 2920 if (hook != NULL 2921 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, 2922 hook, data) != B_OK) { 2923 return B_NO_MEMORY; 2924 } 2925 2926 T(Action("end", cache, transaction)); 2927 2928 // iterate through all blocks and free the unchanged original contents 2929 2930 cached_block* next; 2931 for (cached_block* block = transaction->first_block; block != NULL; 2932 block = next) { 2933 next = block->transaction_next; 2934 ASSERT(block->previous_transaction == NULL); 2935 2936 if (block->discard) { 2937 // This block has been discarded in the transaction 2938 cache->DiscardBlock(block); 2939 transaction->num_blocks--; 2940 continue; 2941 } 2942 2943 if (block->original_data != NULL) { 2944 cache->Free(block->original_data); 2945 block->original_data = NULL; 2946 } 2947 if (block->parent_data != NULL) { 2948 ASSERT(transaction->has_sub_transaction); 2949 cache->FreeBlockParentData(block); 2950 } 2951 2952 // move the block to the previous transaction list 2953 transaction->blocks.Add(block); 2954 2955 block->previous_transaction = transaction; 2956 block->transaction_next = NULL; 2957 block->transaction = NULL; 2958 } 2959 2960 transaction->open = false; 2961 return B_OK; 2962 } 2963 2964 2965 status_t 2966 cache_abort_transaction(void* _cache, int32 id) 2967 { 2968 block_cache* cache = (block_cache*)_cache; 2969 TransactionLocker locker(cache); 2970 2971 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id)); 2972 2973 cache_transaction* transaction = lookup_transaction(cache, id); 2974 if (transaction == NULL) { 2975 panic("cache_abort_transaction(): invalid transaction ID\n"); 2976 return B_BAD_VALUE; 2977 } 2978 2979 T(Abort(cache, transaction)); 2980 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 2981 2982 // iterate through all blocks and restore their original contents 2983 2984 cached_block* block = transaction->first_block; 2985 cached_block* next; 2986 for (; block != NULL; block = next) { 2987 next = block->transaction_next; 2988 2989 if (block->original_data != NULL) { 2990 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of " 2991 "block %" B_PRIdOFF "\n", transaction->id, block->block_number)); 2992 memcpy(block->current_data, block->original_data, 2993 cache->block_size); 2994 cache->Free(block->original_data); 2995 block->original_data = NULL; 2996 } 2997 if (transaction->has_sub_transaction && block->parent_data != NULL) 2998 cache->FreeBlockParentData(block); 2999 3000 block->transaction_next = NULL; 3001 block->transaction = NULL; 3002 block->discard = false; 3003 if (block->previous_transaction == NULL) 3004 block->is_dirty = false; 3005 } 3006 3007 cache->transaction_hash->Remove(transaction); 3008 delete_transaction(cache, transaction); 3009 return B_OK; 3010 } 3011 3012 3013 /*! Acknowledges the current parent transaction, and starts a new transaction 3014 from its sub transaction. 3015 The new transaction also gets a new transaction ID. 3016 */ 3017 int32 3018 cache_detach_sub_transaction(void* _cache, int32 id, 3019 transaction_notification_hook hook, void* data) 3020 { 3021 block_cache* cache = (block_cache*)_cache; 3022 TransactionLocker locker(cache); 3023 3024 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id)); 3025 3026 cache_transaction* transaction = lookup_transaction(cache, id); 3027 if (transaction == NULL) { 3028 panic("cache_detach_sub_transaction(): invalid transaction ID\n"); 3029 return B_BAD_VALUE; 3030 } 3031 if (!transaction->has_sub_transaction) 3032 return B_BAD_VALUE; 3033 3034 // iterate through all blocks and free the unchanged original contents 3035 3036 status_t status = write_blocks_in_previous_transaction(cache, transaction); 3037 if (status != B_OK) 3038 return status; 3039 3040 // create a new transaction for the sub transaction 3041 cache_transaction* newTransaction = new(std::nothrow) cache_transaction; 3042 if (newTransaction == NULL) 3043 return B_NO_MEMORY; 3044 3045 newTransaction->id = atomic_add(&cache->next_transaction_id, 1); 3046 T(Detach(cache, transaction, newTransaction)); 3047 3048 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3049 3050 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook, 3051 data) != B_OK) { 3052 delete newTransaction; 3053 return B_NO_MEMORY; 3054 } 3055 3056 cached_block* last = NULL; 3057 cached_block* next; 3058 for (cached_block* block = transaction->first_block; block != NULL; 3059 block = next) { 3060 next = block->transaction_next; 3061 ASSERT(block->previous_transaction == NULL); 3062 3063 if (block->discard) { 3064 cache->DiscardBlock(block); 3065 transaction->main_num_blocks--; 3066 continue; 3067 } 3068 3069 if (block->parent_data != NULL) { 3070 // The block changed in the parent - free the original data, since 3071 // they will be replaced by what is in current. 3072 ASSERT(block->original_data != NULL); 3073 cache->Free(block->original_data); 3074 3075 if (block->parent_data != block->current_data) { 3076 // The block had been changed in both transactions 3077 block->original_data = block->parent_data; 3078 } else { 3079 // The block has only been changed in the parent 3080 block->original_data = NULL; 3081 } 3082 block->parent_data = NULL; 3083 3084 // move the block to the previous transaction list 3085 transaction->blocks.Add(block); 3086 block->previous_transaction = transaction; 3087 } 3088 3089 if (block->original_data != NULL) { 3090 // This block had been changed in the current sub transaction, 3091 // we need to move this block over to the new transaction. 3092 ASSERT(block->parent_data == NULL); 3093 3094 if (last == NULL) 3095 newTransaction->first_block = block; 3096 else 3097 last->transaction_next = block; 3098 3099 block->transaction = newTransaction; 3100 last = block; 3101 } else 3102 block->transaction = NULL; 3103 3104 block->transaction_next = NULL; 3105 } 3106 3107 newTransaction->num_blocks = transaction->sub_num_blocks; 3108 3109 transaction->open = false; 3110 transaction->has_sub_transaction = false; 3111 transaction->num_blocks = transaction->main_num_blocks; 3112 transaction->sub_num_blocks = 0; 3113 3114 cache->transaction_hash->Insert(newTransaction); 3115 cache->last_transaction = newTransaction; 3116 3117 return newTransaction->id; 3118 } 3119 3120 3121 status_t 3122 cache_abort_sub_transaction(void* _cache, int32 id) 3123 { 3124 block_cache* cache = (block_cache*)_cache; 3125 TransactionLocker locker(cache); 3126 3127 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id)); 3128 3129 cache_transaction* transaction = lookup_transaction(cache, id); 3130 if (transaction == NULL) { 3131 panic("cache_abort_sub_transaction(): invalid transaction ID\n"); 3132 return B_BAD_VALUE; 3133 } 3134 if (!transaction->has_sub_transaction) 3135 return B_BAD_VALUE; 3136 3137 T(Abort(cache, transaction)); 3138 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3139 3140 // revert all changes back to the version of the parent 3141 3142 cached_block* block = transaction->first_block; 3143 cached_block* last = NULL; 3144 cached_block* next; 3145 for (; block != NULL; block = next) { 3146 next = block->transaction_next; 3147 3148 if (block->parent_data == NULL) { 3149 // The parent transaction didn't change the block, but the sub 3150 // transaction did - we need to revert to the original data. 3151 // The block is no longer part of the transaction 3152 if (block->original_data != NULL) { 3153 // The block might not have original data if was empty 3154 memcpy(block->current_data, block->original_data, 3155 cache->block_size); 3156 } 3157 3158 if (last != NULL) 3159 last->transaction_next = next; 3160 else 3161 transaction->first_block = next; 3162 3163 block->transaction_next = NULL; 3164 block->transaction = NULL; 3165 transaction->num_blocks--; 3166 3167 if (block->previous_transaction == NULL) { 3168 cache->Free(block->original_data); 3169 block->original_data = NULL; 3170 block->is_dirty = false; 3171 3172 if (block->ref_count == 0) { 3173 // Move the block into the unused list if possible 3174 block->unused = true; 3175 cache->unused_blocks.Add(block); 3176 cache->unused_block_count++; 3177 } 3178 } 3179 } else { 3180 if (block->parent_data != block->current_data) { 3181 // The block has been changed and must be restored - the block 3182 // is still dirty and part of the transaction 3183 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): " 3184 "restored contents of block %" B_PRIdOFF "\n", 3185 transaction->id, block->block_number)); 3186 memcpy(block->current_data, block->parent_data, 3187 cache->block_size); 3188 cache->Free(block->parent_data); 3189 // The block stays dirty 3190 } 3191 block->parent_data = NULL; 3192 last = block; 3193 } 3194 3195 block->discard = false; 3196 } 3197 3198 // all subsequent changes will go into the main transaction 3199 transaction->has_sub_transaction = false; 3200 transaction->sub_num_blocks = 0; 3201 3202 return B_OK; 3203 } 3204 3205 3206 status_t 3207 cache_start_sub_transaction(void* _cache, int32 id) 3208 { 3209 block_cache* cache = (block_cache*)_cache; 3210 TransactionLocker locker(cache); 3211 3212 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id)); 3213 3214 cache_transaction* transaction = lookup_transaction(cache, id); 3215 if (transaction == NULL) { 3216 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n", 3217 id); 3218 return B_BAD_VALUE; 3219 } 3220 3221 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3222 3223 // move all changed blocks up to the parent 3224 3225 cached_block* block = transaction->first_block; 3226 cached_block* next; 3227 for (; block != NULL; block = next) { 3228 next = block->transaction_next; 3229 3230 if (block->parent_data != NULL) { 3231 // There already is an older sub transaction - we acknowledge 3232 // its changes and move its blocks up to the parent 3233 ASSERT(transaction->has_sub_transaction); 3234 cache->FreeBlockParentData(block); 3235 } 3236 if (block->discard) { 3237 // This block has been discarded in the parent transaction. 3238 // Just throw away any changes made in this transaction, so that 3239 // it can still be reverted to its original contents if needed 3240 ASSERT(block->previous_transaction == NULL); 3241 if (block->original_data != NULL) { 3242 memcpy(block->current_data, block->original_data, 3243 cache->block_size); 3244 3245 cache->Free(block->original_data); 3246 block->original_data = NULL; 3247 } 3248 continue; 3249 } 3250 3251 // we "allocate" the parent data lazily, that means, we don't copy 3252 // the data (and allocate memory for it) until we need to 3253 block->parent_data = block->current_data; 3254 } 3255 3256 // all subsequent changes will go into the sub transaction 3257 transaction->has_sub_transaction = true; 3258 transaction->main_num_blocks = transaction->num_blocks; 3259 transaction->sub_num_blocks = 0; 3260 T(Action("start-sub", cache, transaction)); 3261 3262 return B_OK; 3263 } 3264 3265 3266 /*! Adds a transaction listener that gets notified when the transaction 3267 is ended, aborted, written, or idle as specified by \a events. 3268 The listener gets automatically removed when the transaction ends. 3269 */ 3270 status_t 3271 cache_add_transaction_listener(void* _cache, int32 id, int32 events, 3272 transaction_notification_hook hook, void* data) 3273 { 3274 block_cache* cache = (block_cache*)_cache; 3275 TransactionLocker locker(cache); 3276 3277 cache_transaction* transaction = lookup_transaction(cache, id); 3278 if (transaction == NULL) 3279 return B_BAD_VALUE; 3280 3281 return add_transaction_listener(cache, transaction, events, hook, data); 3282 } 3283 3284 3285 status_t 3286 cache_remove_transaction_listener(void* _cache, int32 id, 3287 transaction_notification_hook hookFunction, void* data) 3288 { 3289 block_cache* cache = (block_cache*)_cache; 3290 TransactionLocker locker(cache); 3291 3292 cache_transaction* transaction = lookup_transaction(cache, id); 3293 if (transaction == NULL) 3294 return B_BAD_VALUE; 3295 3296 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 3297 while (iterator.HasNext()) { 3298 cache_listener* listener = iterator.Next(); 3299 if (listener->data == data && listener->hook == hookFunction) { 3300 iterator.Remove(); 3301 3302 if (listener->events_pending != 0) { 3303 MutexLocker _(sNotificationsLock); 3304 if (listener->events_pending != 0) 3305 cache->pending_notifications.Remove(listener); 3306 } 3307 delete listener; 3308 return B_OK; 3309 } 3310 } 3311 3312 return B_ENTRY_NOT_FOUND; 3313 } 3314 3315 3316 status_t 3317 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly, 3318 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData) 3319 { 3320 cached_block* block = (cached_block*)*_cookie; 3321 block_cache* cache = (block_cache*)_cache; 3322 TransactionLocker locker(cache); 3323 3324 cache_transaction* transaction = lookup_transaction(cache, id); 3325 if (transaction == NULL || !transaction->open) 3326 return B_BAD_VALUE; 3327 3328 if (block == NULL) 3329 block = transaction->first_block; 3330 else 3331 block = block->transaction_next; 3332 3333 if (transaction->has_sub_transaction) { 3334 if (mainOnly) { 3335 // find next block that the parent changed 3336 while (block != NULL && block->parent_data == NULL) 3337 block = block->transaction_next; 3338 } else { 3339 // find next non-discarded block 3340 while (block != NULL && block->discard) 3341 block = block->transaction_next; 3342 } 3343 } 3344 3345 if (block == NULL) 3346 return B_ENTRY_NOT_FOUND; 3347 3348 if (_blockNumber) 3349 *_blockNumber = block->block_number; 3350 if (_data) 3351 *_data = mainOnly ? block->parent_data : block->current_data; 3352 if (_unchangedData) 3353 *_unchangedData = block->original_data; 3354 3355 *_cookie = (addr_t)block; 3356 return B_OK; 3357 } 3358 3359 3360 int32 3361 cache_blocks_in_transaction(void* _cache, int32 id) 3362 { 3363 block_cache* cache = (block_cache*)_cache; 3364 TransactionLocker locker(cache); 3365 3366 cache_transaction* transaction = lookup_transaction(cache, id); 3367 if (transaction == NULL) 3368 return B_BAD_VALUE; 3369 3370 return transaction->num_blocks; 3371 } 3372 3373 3374 /*! Returns the number of blocks that are part of the main transaction. If this 3375 transaction does not have a sub transaction yet, this is the same value as 3376 cache_blocks_in_transaction() would return. 3377 */ 3378 int32 3379 cache_blocks_in_main_transaction(void* _cache, int32 id) 3380 { 3381 block_cache* cache = (block_cache*)_cache; 3382 TransactionLocker locker(cache); 3383 3384 cache_transaction* transaction = lookup_transaction(cache, id); 3385 if (transaction == NULL) 3386 return B_BAD_VALUE; 3387 3388 if (transaction->has_sub_transaction) 3389 return transaction->main_num_blocks; 3390 3391 return transaction->num_blocks; 3392 } 3393 3394 3395 int32 3396 cache_blocks_in_sub_transaction(void* _cache, int32 id) 3397 { 3398 block_cache* cache = (block_cache*)_cache; 3399 TransactionLocker locker(cache); 3400 3401 cache_transaction* transaction = lookup_transaction(cache, id); 3402 if (transaction == NULL) 3403 return B_BAD_VALUE; 3404 3405 return transaction->sub_num_blocks; 3406 } 3407 3408 3409 /*! Check if block is in transaction 3410 */ 3411 bool 3412 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber) 3413 { 3414 block_cache* cache = (block_cache*)_cache; 3415 TransactionLocker locker(cache); 3416 3417 cached_block* block = cache->hash->Lookup(blockNumber); 3418 3419 return (block != NULL && block->transaction != NULL 3420 && block->transaction->id == id); 3421 } 3422 3423 3424 // #pragma mark - public block cache API 3425 3426 3427 void 3428 block_cache_delete(void* _cache, bool allowWrites) 3429 { 3430 block_cache* cache = (block_cache*)_cache; 3431 3432 if (allowWrites) 3433 block_cache_sync(cache); 3434 3435 mutex_lock(&sCachesLock); 3436 sCaches.Remove(cache); 3437 mutex_unlock(&sCachesLock); 3438 3439 mutex_lock(&cache->lock); 3440 3441 // wait for all blocks to become unbusy 3442 wait_for_busy_reading_blocks(cache); 3443 wait_for_busy_writing_blocks(cache); 3444 3445 // free all blocks 3446 3447 cached_block* block = cache->hash->Clear(true); 3448 while (block != NULL) { 3449 cached_block* next = block->next; 3450 cache->FreeBlock(block); 3451 block = next; 3452 } 3453 3454 // free all transactions (they will all be aborted) 3455 3456 cache_transaction* transaction = cache->transaction_hash->Clear(true); 3457 while (transaction != NULL) { 3458 cache_transaction* next = transaction->next; 3459 delete transaction; 3460 transaction = next; 3461 } 3462 3463 delete cache; 3464 } 3465 3466 3467 void* 3468 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly) 3469 { 3470 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize, 3471 readOnly); 3472 if (cache == NULL) 3473 return NULL; 3474 3475 if (cache->Init() != B_OK) { 3476 delete cache; 3477 return NULL; 3478 } 3479 3480 MutexLocker _(sCachesLock); 3481 sCaches.Add(cache); 3482 3483 return cache; 3484 } 3485 3486 3487 status_t 3488 block_cache_sync(void* _cache) 3489 { 3490 block_cache* cache = (block_cache*)_cache; 3491 3492 // We will sync all dirty blocks to disk that have a completed 3493 // transaction or no transaction only 3494 3495 MutexLocker locker(&cache->lock); 3496 3497 BlockWriter writer(cache); 3498 BlockTable::Iterator iterator(cache->hash); 3499 3500 while (iterator.HasNext()) { 3501 cached_block* block = iterator.Next(); 3502 if (block->CanBeWritten()) 3503 writer.Add(block); 3504 } 3505 3506 status_t status = writer.Write(); 3507 3508 locker.Unlock(); 3509 3510 wait_for_notifications(cache); 3511 // make sure that all pending TRANSACTION_WRITTEN notifications 3512 // are handled after we return 3513 return status; 3514 } 3515 3516 3517 status_t 3518 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks) 3519 { 3520 block_cache* cache = (block_cache*)_cache; 3521 3522 // We will sync all dirty blocks to disk that have a completed 3523 // transaction or no transaction only 3524 3525 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 3526 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF 3527 " (max %" B_PRIdOFF ")", 3528 blockNumber, cache->max_blocks - 1); 3529 return B_BAD_VALUE; 3530 } 3531 3532 MutexLocker locker(&cache->lock); 3533 BlockWriter writer(cache); 3534 3535 for (; numBlocks > 0; numBlocks--, blockNumber++) { 3536 cached_block* block = cache->hash->Lookup(blockNumber); 3537 if (block == NULL) 3538 continue; 3539 3540 if (block->CanBeWritten()) 3541 writer.Add(block); 3542 } 3543 3544 status_t status = writer.Write(); 3545 3546 locker.Unlock(); 3547 3548 wait_for_notifications(cache); 3549 // make sure that all pending TRANSACTION_WRITTEN notifications 3550 // are handled after we return 3551 return status; 3552 } 3553 3554 3555 /*! Discards a block from the current transaction or from the cache. 3556 You have to call this function when you no longer use a block, ie. when it 3557 might be reclaimed by the file cache in order to make sure they won't 3558 interfere. 3559 */ 3560 void 3561 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks) 3562 { 3563 // TODO: this could be a nice place to issue the ATA trim command 3564 block_cache* cache = (block_cache*)_cache; 3565 TransactionLocker locker(cache); 3566 3567 BlockWriter writer(cache); 3568 3569 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3570 cached_block* block = cache->hash->Lookup(blockNumber); 3571 if (block != NULL && block->previous_transaction != NULL) 3572 writer.Add(block); 3573 } 3574 3575 writer.Write(); 3576 // TODO: this can fail, too! 3577 3578 blockNumber -= numBlocks; 3579 // reset blockNumber to its original value 3580 3581 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3582 cached_block* block = cache->hash->Lookup(blockNumber); 3583 if (block == NULL) 3584 continue; 3585 3586 ASSERT(block->previous_transaction == NULL); 3587 3588 if (block->unused) { 3589 cache->unused_blocks.Remove(block); 3590 cache->unused_block_count--; 3591 cache->RemoveBlock(block); 3592 } else { 3593 if (block->transaction != NULL && block->parent_data != NULL 3594 && block->parent_data != block->current_data) { 3595 panic("Discarded block %" B_PRIdOFF " has already been changed in this " 3596 "transaction!", blockNumber); 3597 } 3598 3599 // mark it as discarded (in the current transaction only, if any) 3600 block->discard = true; 3601 } 3602 } 3603 } 3604 3605 3606 status_t 3607 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction) 3608 { 3609 block_cache* cache = (block_cache*)_cache; 3610 MutexLocker locker(&cache->lock); 3611 3612 if (cache->read_only) { 3613 panic("tried to make block writable on a read-only cache!"); 3614 return B_ERROR; 3615 } 3616 3617 // TODO: this can be done better! 3618 void* block; 3619 status_t status = get_writable_cached_block(cache, blockNumber, 3620 blockNumber, 1, transaction, false, &block); 3621 if (status == B_OK) { 3622 put_cached_block((block_cache*)_cache, blockNumber); 3623 return B_OK; 3624 } 3625 3626 return status; 3627 } 3628 3629 3630 status_t 3631 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base, 3632 off_t length, int32 transaction, void** _block) 3633 { 3634 block_cache* cache = (block_cache*)_cache; 3635 MutexLocker locker(&cache->lock); 3636 3637 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3638 blockNumber, transaction)); 3639 if (cache->read_only) 3640 panic("tried to get writable block on a read-only cache!"); 3641 3642 return get_writable_cached_block(cache, blockNumber, base, length, 3643 transaction, false, _block); 3644 } 3645 3646 3647 void* 3648 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction) 3649 { 3650 void* block; 3651 if (block_cache_get_writable_etc(_cache, blockNumber, 3652 blockNumber, 1, transaction, &block) == B_OK) 3653 return block; 3654 3655 return NULL; 3656 } 3657 3658 3659 void* 3660 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction) 3661 { 3662 block_cache* cache = (block_cache*)_cache; 3663 MutexLocker locker(&cache->lock); 3664 3665 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3666 blockNumber, transaction)); 3667 if (cache->read_only) 3668 panic("tried to get empty writable block on a read-only cache!"); 3669 3670 void* block; 3671 if (get_writable_cached_block((block_cache*)_cache, blockNumber, 3672 blockNumber, 1, transaction, true, &block) == B_OK) 3673 return block; 3674 3675 return NULL; 3676 } 3677 3678 3679 status_t 3680 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length, 3681 const void** _block) 3682 { 3683 block_cache* cache = (block_cache*)_cache; 3684 MutexLocker locker(&cache->lock); 3685 bool allocated; 3686 3687 cached_block* block; 3688 status_t status = get_cached_block(cache, blockNumber, &allocated, true, 3689 &block); 3690 if (status != B_OK) 3691 return status; 3692 3693 #if BLOCK_CACHE_DEBUG_CHANGED 3694 if (block->compare == NULL) 3695 block->compare = cache->Allocate(); 3696 if (block->compare != NULL) 3697 memcpy(block->compare, block->current_data, cache->block_size); 3698 #endif 3699 TB(Get(cache, block)); 3700 3701 *_block = block->current_data; 3702 return B_OK; 3703 } 3704 3705 3706 const void* 3707 block_cache_get(void* _cache, off_t blockNumber) 3708 { 3709 const void* block; 3710 if (block_cache_get_etc(_cache, blockNumber, blockNumber, 1, &block) 3711 == B_OK) 3712 return block; 3713 3714 return NULL; 3715 } 3716 3717 3718 /*! Changes the internal status of a writable block to \a dirty. This can be 3719 helpful in case you realize you don't need to change that block anymore 3720 for whatever reason. 3721 3722 Note, you must only use this function on blocks that were acquired 3723 writable! 3724 */ 3725 status_t 3726 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty, 3727 int32 transaction) 3728 { 3729 block_cache* cache = (block_cache*)_cache; 3730 MutexLocker locker(&cache->lock); 3731 3732 cached_block* block = cache->hash->Lookup(blockNumber); 3733 if (block == NULL) 3734 return B_BAD_VALUE; 3735 if (block->is_dirty == dirty) { 3736 // there is nothing to do for us 3737 return B_OK; 3738 } 3739 3740 // TODO: not yet implemented 3741 if (dirty) 3742 panic("block_cache_set_dirty(): not yet implemented that way!\n"); 3743 3744 return B_OK; 3745 } 3746 3747 3748 void 3749 block_cache_put(void* _cache, off_t blockNumber) 3750 { 3751 block_cache* cache = (block_cache*)_cache; 3752 MutexLocker locker(&cache->lock); 3753 3754 put_cached_block(cache, blockNumber); 3755 } 3756 3757