1 /* 2 * Copyright 2004-2020, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <block_cache.h> 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <KernelExport.h> 15 #include <fs_cache.h> 16 17 #include <condition_variable.h> 18 #include <lock.h> 19 #include <low_resource_manager.h> 20 #include <slab/Slab.h> 21 #include <tracing.h> 22 #include <util/kernel_cpp.h> 23 #include <util/DoublyLinkedList.h> 24 #include <util/AutoLock.h> 25 #include <vm/vm_page.h> 26 27 #include "kernel_debug_config.h" 28 29 30 // TODO: this is a naive but growing implementation to test the API: 31 // block reading/writing is not at all optimized for speed, it will 32 // just read and write single blocks. 33 // TODO: the retrieval/copy of the original data could be delayed until the 34 // new data must be written, ie. in low memory situations. 35 36 #ifdef _KERNEL_MODE 37 # define TRACE_ALWAYS(x...) dprintf(x) 38 #else 39 # define TRACE_ALWAYS(x...) printf(x) 40 #endif 41 42 //#define TRACE_BLOCK_CACHE 43 #ifdef TRACE_BLOCK_CACHE 44 # define TRACE(x) TRACE_ALWAYS(x) 45 #else 46 # define TRACE(x) ; 47 #endif 48 49 50 // This macro is used for fatal situations that are acceptable in a running 51 // system, like out of memory situations - should only panic for debugging. 52 #define FATAL(x) panic x 53 54 static const bigtime_t kTransactionIdleTime = 2000000LL; 55 // a transaction is considered idle after 2 seconds of inactivity 56 57 58 namespace { 59 60 struct cache_transaction; 61 struct cached_block; 62 struct block_cache; 63 typedef DoublyLinkedListLink<cached_block> block_link; 64 65 struct cached_block { 66 cached_block* next; // next in hash 67 cached_block* transaction_next; 68 block_link link; 69 off_t block_number; 70 void* current_data; 71 // The data that is seen by everyone using the API; this one is always 72 // present. 73 void* original_data; 74 // When in a transaction, this contains the original data from before 75 // the transaction. 76 void* parent_data; 77 // This is a lazily alloced buffer that represents the contents of the 78 // block in the parent transaction. It may point to current_data if the 79 // contents have been changed only in the parent transaction, or, if the 80 // block has been changed in the current sub transaction already, to a 81 // new block containing the contents changed in the parent transaction. 82 // If this is NULL, the block has not been changed in the parent 83 // transaction at all. 84 #if BLOCK_CACHE_DEBUG_CHANGED 85 void* compare; 86 #endif 87 int32 ref_count; 88 int32 last_accessed; 89 bool busy_reading : 1; 90 bool busy_writing : 1; 91 bool is_writing : 1; 92 // Block has been checked out for writing without transactions, and 93 // cannot be written back if set 94 bool is_dirty : 1; 95 bool unused : 1; 96 bool discard : 1; 97 bool busy_reading_waiters : 1; 98 bool busy_writing_waiters : 1; 99 cache_transaction* transaction; 100 // This is the current active transaction, if any, the block is 101 // currently in (meaning was changed as a part of it). 102 cache_transaction* previous_transaction; 103 // This is set to the last transaction that was ended containing this 104 // block. In this case, the block has not yet written back yet, and 105 // the changed data is either in current_data, or original_data -- the 106 // latter if the block is already being part of another transaction. 107 // There can only be one previous transaction, so when the active 108 // transaction ends, the changes of the previous transaction have to 109 // be written back before that transaction becomes the next previous 110 // transaction. 111 112 bool CanBeWritten() const; 113 int32 LastAccess() const 114 { return system_time() / 1000000L - last_accessed; } 115 }; 116 117 typedef DoublyLinkedList<cached_block, 118 DoublyLinkedListMemberGetLink<cached_block, 119 &cached_block::link> > block_list; 120 121 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> { 122 static inline void* operator new(size_t size); 123 static inline void operator delete(void* block); 124 125 int32 transaction_id; 126 int32 events_pending; 127 int32 events; 128 transaction_notification_hook hook; 129 void* data; 130 bool delete_after_event; 131 }; 132 133 typedef DoublyLinkedList<cache_notification> NotificationList; 134 135 static object_cache* sCacheNotificationCache; 136 137 struct cache_listener; 138 typedef DoublyLinkedListLink<cache_listener> listener_link; 139 140 struct cache_listener : cache_notification { 141 listener_link link; 142 }; 143 144 typedef DoublyLinkedList<cache_listener, 145 DoublyLinkedListMemberGetLink<cache_listener, 146 &cache_listener::link> > ListenerList; 147 148 void* 149 cache_notification::operator new(size_t size) 150 { 151 // We can't really know whether something is a cache_notification or a 152 // cache_listener at runtime, so we just use one object_cache for both 153 // with the size set to that of the (slightly larger) cache_listener. 154 // In practice, the vast majority of cache_notifications are really 155 // cache_listeners, so this is a more than acceptable trade-off. 156 ASSERT(size <= sizeof(cache_listener)); 157 return object_cache_alloc(sCacheNotificationCache, 0); 158 } 159 160 void 161 cache_notification::operator delete(void* block) 162 { 163 object_cache_free(sCacheNotificationCache, block, 0); 164 } 165 166 167 struct BlockHash { 168 typedef off_t KeyType; 169 typedef cached_block ValueType; 170 171 size_t HashKey(KeyType key) const 172 { 173 return key; 174 } 175 176 size_t Hash(ValueType* block) const 177 { 178 return block->block_number; 179 } 180 181 bool Compare(KeyType key, ValueType* block) const 182 { 183 return block->block_number == key; 184 } 185 186 ValueType*& GetLink(ValueType* value) const 187 { 188 return value->next; 189 } 190 }; 191 192 typedef BOpenHashTable<BlockHash> BlockTable; 193 194 195 struct TransactionHash { 196 typedef int32 KeyType; 197 typedef cache_transaction ValueType; 198 199 size_t HashKey(KeyType key) const 200 { 201 return key; 202 } 203 204 size_t Hash(ValueType* transaction) const; 205 bool Compare(KeyType key, ValueType* transaction) const; 206 ValueType*& GetLink(ValueType* value) const; 207 }; 208 209 typedef BOpenHashTable<TransactionHash> TransactionTable; 210 211 212 struct block_cache : DoublyLinkedListLinkImpl<block_cache> { 213 BlockTable* hash; 214 mutex lock; 215 int fd; 216 off_t max_blocks; 217 size_t block_size; 218 int32 next_transaction_id; 219 cache_transaction* last_transaction; 220 TransactionTable* transaction_hash; 221 222 object_cache* buffer_cache; 223 block_list unused_blocks; 224 uint32 unused_block_count; 225 226 ConditionVariable busy_reading_condition; 227 uint32 busy_reading_count; 228 bool busy_reading_waiters; 229 230 ConditionVariable busy_writing_condition; 231 uint32 busy_writing_count; 232 bool busy_writing_waiters; 233 234 bigtime_t last_block_write; 235 bigtime_t last_block_write_duration; 236 237 uint32 num_dirty_blocks; 238 bool read_only; 239 240 NotificationList pending_notifications; 241 ConditionVariable condition_variable; 242 243 block_cache(int fd, off_t numBlocks, size_t blockSize, 244 bool readOnly); 245 ~block_cache(); 246 247 status_t Init(); 248 249 void Free(void* buffer); 250 void* Allocate(); 251 void FreeBlock(cached_block* block); 252 cached_block* NewBlock(off_t blockNumber); 253 void FreeBlockParentData(cached_block* block); 254 255 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0); 256 void RemoveBlock(cached_block* block); 257 void DiscardBlock(cached_block* block); 258 259 private: 260 static void _LowMemoryHandler(void* data, uint32 resources, 261 int32 level); 262 cached_block* _GetUnusedBlock(); 263 }; 264 265 struct cache_transaction { 266 cache_transaction(); 267 268 cache_transaction* next; 269 int32 id; 270 int32 num_blocks; 271 int32 main_num_blocks; 272 int32 sub_num_blocks; 273 cached_block* first_block; 274 block_list blocks; 275 ListenerList listeners; 276 bool open; 277 bool has_sub_transaction; 278 bigtime_t last_used; 279 int32 busy_writing_count; 280 }; 281 282 283 class BlockWriter { 284 public: 285 BlockWriter(block_cache* cache, 286 size_t max = SIZE_MAX); 287 ~BlockWriter(); 288 289 bool Add(cached_block* block, 290 cache_transaction* transaction = NULL); 291 bool Add(cache_transaction* transaction, 292 bool& hasLeftOvers); 293 294 status_t Write(cache_transaction* transaction = NULL, 295 bool canUnlock = true); 296 297 bool DeletedTransaction() const 298 { return fDeletedTransaction; } 299 300 static status_t WriteBlock(block_cache* cache, 301 cached_block* block); 302 303 private: 304 void* _Data(cached_block* block) const; 305 status_t _WriteBlock(cached_block* block); 306 void _BlockDone(cached_block* block, 307 cache_transaction* transaction); 308 void _UnmarkWriting(cached_block* block); 309 310 static int _CompareBlocks(const void* _blockA, 311 const void* _blockB); 312 313 private: 314 static const size_t kBufferSize = 64; 315 316 block_cache* fCache; 317 cached_block* fBuffer[kBufferSize]; 318 cached_block** fBlocks; 319 size_t fCount; 320 size_t fTotal; 321 size_t fCapacity; 322 size_t fMax; 323 status_t fStatus; 324 bool fDeletedTransaction; 325 }; 326 327 328 class TransactionLocking { 329 public: 330 inline bool Lock(block_cache* cache) 331 { 332 mutex_lock(&cache->lock); 333 334 while (cache->busy_writing_count != 0) { 335 // wait for all blocks to be written 336 ConditionVariableEntry entry; 337 cache->busy_writing_condition.Add(&entry); 338 cache->busy_writing_waiters = true; 339 340 mutex_unlock(&cache->lock); 341 342 entry.Wait(); 343 344 mutex_lock(&cache->lock); 345 } 346 347 return true; 348 } 349 350 inline void Unlock(block_cache* cache) 351 { 352 mutex_unlock(&cache->lock); 353 } 354 }; 355 356 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker; 357 358 } // namespace 359 360 361 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 362 namespace BlockTracing { 363 364 class Action : public AbstractTraceEntry { 365 public: 366 Action(block_cache* cache, cached_block* block) 367 : 368 fCache(cache), 369 fBlockNumber(block->block_number), 370 fIsDirty(block->is_dirty), 371 fHasOriginal(block->original_data != NULL), 372 fHasParent(block->parent_data != NULL), 373 fTransactionID(-1), 374 fPreviousID(-1) 375 { 376 if (block->transaction != NULL) 377 fTransactionID = block->transaction->id; 378 if (block->previous_transaction != NULL) 379 fPreviousID = block->previous_transaction->id; 380 } 381 382 virtual void AddDump(TraceOutput& out) 383 { 384 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32 385 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber, 386 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-', 387 fHasParent ? 'p' : '-', fTransactionID, fPreviousID); 388 } 389 390 virtual const char* _Action() const = 0; 391 392 private: 393 block_cache* fCache; 394 uint64 fBlockNumber; 395 bool fIsDirty; 396 bool fHasOriginal; 397 bool fHasParent; 398 int32 fTransactionID; 399 int32 fPreviousID; 400 }; 401 402 class Get : public Action { 403 public: 404 Get(block_cache* cache, cached_block* block) 405 : 406 Action(cache, block) 407 { 408 Initialized(); 409 } 410 411 virtual const char* _Action() const { return "get"; } 412 }; 413 414 class Put : public Action { 415 public: 416 Put(block_cache* cache, cached_block* block) 417 : 418 Action(cache, block) 419 { 420 Initialized(); 421 } 422 423 virtual const char* _Action() const { return "put"; } 424 }; 425 426 class Read : public Action { 427 public: 428 Read(block_cache* cache, cached_block* block) 429 : 430 Action(cache, block) 431 { 432 Initialized(); 433 } 434 435 virtual const char* _Action() const { return "read"; } 436 }; 437 438 class Write : public Action { 439 public: 440 Write(block_cache* cache, cached_block* block) 441 : 442 Action(cache, block) 443 { 444 Initialized(); 445 } 446 447 virtual const char* _Action() const { return "write"; } 448 }; 449 450 class Flush : public Action { 451 public: 452 Flush(block_cache* cache, cached_block* block, bool getUnused = false) 453 : 454 Action(cache, block), 455 fGetUnused(getUnused) 456 { 457 Initialized(); 458 } 459 460 virtual const char* _Action() const 461 { return fGetUnused ? "get-unused" : "flush"; } 462 463 private: 464 bool fGetUnused; 465 }; 466 467 class Error : public AbstractTraceEntry { 468 public: 469 Error(block_cache* cache, uint64 blockNumber, const char* message, 470 status_t status = B_OK) 471 : 472 fCache(cache), 473 fBlockNumber(blockNumber), 474 fMessage(message), 475 fStatus(status) 476 { 477 Initialized(); 478 } 479 480 virtual void AddDump(TraceOutput& out) 481 { 482 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s", 483 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "", 484 fStatus != B_OK ? strerror(fStatus) : ""); 485 } 486 487 private: 488 block_cache* fCache; 489 uint64 fBlockNumber; 490 const char* fMessage; 491 status_t fStatus; 492 }; 493 494 #if BLOCK_CACHE_BLOCK_TRACING >= 2 495 class BlockData : public AbstractTraceEntry { 496 public: 497 enum { 498 kCurrent = 0x01, 499 kParent = 0x02, 500 kOriginal = 0x04 501 }; 502 503 BlockData(block_cache* cache, cached_block* block, const char* message) 504 : 505 fCache(cache), 506 fSize(cache->block_size), 507 fBlockNumber(block->block_number), 508 fMessage(message) 509 { 510 _Allocate(fCurrent, block->current_data); 511 _Allocate(fParent, block->parent_data); 512 _Allocate(fOriginal, block->original_data); 513 514 #if KTRACE_PRINTF_STACK_TRACE 515 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 516 false); 517 #endif 518 519 Initialized(); 520 } 521 522 virtual void AddDump(TraceOutput& out) 523 { 524 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s", 525 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-', 526 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-', 527 fMessage); 528 } 529 530 #if KTRACE_PRINTF_STACK_TRACE 531 virtual void DumpStackTrace(TraceOutput& out) 532 { 533 out.PrintStackTrace(fStackTrace); 534 } 535 #endif 536 537 void DumpBlocks(uint32 which, uint32 offset, uint32 size) 538 { 539 if ((which & kCurrent) != 0) 540 DumpBlock(kCurrent, offset, size); 541 if ((which & kParent) != 0) 542 DumpBlock(kParent, offset, size); 543 if ((which & kOriginal) != 0) 544 DumpBlock(kOriginal, offset, size); 545 } 546 547 void DumpBlock(uint32 which, uint32 offset, uint32 size) 548 { 549 if (offset > fSize) { 550 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize); 551 return; 552 } 553 if (offset + size > fSize) 554 size = fSize - offset; 555 556 const char* label; 557 uint8* data; 558 559 if ((which & kCurrent) != 0) { 560 label = "current"; 561 data = fCurrent; 562 } else if ((which & kParent) != 0) { 563 label = "parent"; 564 data = fParent; 565 } else if ((which & kOriginal) != 0) { 566 label = "original"; 567 data = fOriginal; 568 } else 569 return; 570 571 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size); 572 573 static const uint32 kBlockSize = 16; 574 data += offset; 575 576 for (uint32 i = 0; i < size;) { 577 int start = i; 578 579 kprintf(" %04" B_PRIx32 " ", i); 580 for (; i < start + kBlockSize; i++) { 581 if (!(i % 4)) 582 kprintf(" "); 583 584 if (i >= size) 585 kprintf(" "); 586 else 587 kprintf("%02x", data[i]); 588 } 589 590 kprintf("\n"); 591 } 592 } 593 594 private: 595 void _Allocate(uint8*& target, void* source) 596 { 597 if (source == NULL) { 598 target = NULL; 599 return; 600 } 601 602 target = alloc_tracing_buffer_memcpy(source, fSize, false); 603 } 604 605 block_cache* fCache; 606 uint32 fSize; 607 uint64 fBlockNumber; 608 const char* fMessage; 609 uint8* fCurrent; 610 uint8* fParent; 611 uint8* fOriginal; 612 #if KTRACE_PRINTF_STACK_TRACE 613 tracing_stack_trace* fStackTrace; 614 #endif 615 }; 616 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 617 618 } // namespace BlockTracing 619 620 # define TB(x) new(std::nothrow) BlockTracing::x; 621 #else 622 # define TB(x) ; 623 #endif 624 625 #if BLOCK_CACHE_BLOCK_TRACING >= 2 626 # define TB2(x) new(std::nothrow) BlockTracing::x; 627 #else 628 # define TB2(x) ; 629 #endif 630 631 632 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 633 namespace TransactionTracing { 634 635 class Action : public AbstractTraceEntry { 636 public: 637 Action(const char* label, block_cache* cache, 638 cache_transaction* transaction) 639 : 640 fCache(cache), 641 fTransaction(transaction), 642 fID(transaction->id), 643 fSub(transaction->has_sub_transaction), 644 fNumBlocks(transaction->num_blocks), 645 fSubNumBlocks(transaction->sub_num_blocks) 646 { 647 strlcpy(fLabel, label, sizeof(fLabel)); 648 Initialized(); 649 } 650 651 virtual void AddDump(TraceOutput& out) 652 { 653 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s" 654 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction, 655 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks); 656 } 657 658 private: 659 char fLabel[12]; 660 block_cache* fCache; 661 cache_transaction* fTransaction; 662 int32 fID; 663 bool fSub; 664 int32 fNumBlocks; 665 int32 fSubNumBlocks; 666 }; 667 668 class Detach : public AbstractTraceEntry { 669 public: 670 Detach(block_cache* cache, cache_transaction* transaction, 671 cache_transaction* newTransaction) 672 : 673 fCache(cache), 674 fTransaction(transaction), 675 fID(transaction->id), 676 fSub(transaction->has_sub_transaction), 677 fNewTransaction(newTransaction), 678 fNewID(newTransaction->id) 679 { 680 Initialized(); 681 } 682 683 virtual void AddDump(TraceOutput& out) 684 { 685 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")" 686 "from transaction %p (id %" B_PRId32 ")%s", 687 fCache, fNewTransaction, fNewID, fTransaction, fID, 688 fSub ? " sub" : ""); 689 } 690 691 private: 692 block_cache* fCache; 693 cache_transaction* fTransaction; 694 int32 fID; 695 bool fSub; 696 cache_transaction* fNewTransaction; 697 int32 fNewID; 698 }; 699 700 class Abort : public AbstractTraceEntry { 701 public: 702 Abort(block_cache* cache, cache_transaction* transaction) 703 : 704 fCache(cache), 705 fTransaction(transaction), 706 fID(transaction->id), 707 fNumBlocks(0) 708 { 709 bool isSub = transaction->has_sub_transaction; 710 fNumBlocks = isSub ? transaction->sub_num_blocks 711 : transaction->num_blocks; 712 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t)); 713 if (fBlocks != NULL) { 714 cached_block* block = transaction->first_block; 715 for (int32 i = 0; block != NULL && i < fNumBlocks; 716 block = block->transaction_next) { 717 fBlocks[i++] = block->block_number; 718 } 719 } else 720 fNumBlocks = 0; 721 722 #if KTRACE_PRINTF_STACK_TRACE 723 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 724 false); 725 #endif 726 727 Initialized(); 728 } 729 730 virtual void AddDump(TraceOutput& out) 731 { 732 out.Print("block cache %p, abort transaction " 733 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID); 734 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++) 735 out.Print(" %" B_PRIdOFF, fBlocks[i]); 736 } 737 738 #if KTRACE_PRINTF_STACK_TRACE 739 virtual void DumpStackTrace(TraceOutput& out) 740 { 741 out.PrintStackTrace(fStackTrace); 742 } 743 #endif 744 745 private: 746 block_cache* fCache; 747 cache_transaction* fTransaction; 748 int32 fID; 749 off_t* fBlocks; 750 int32 fNumBlocks; 751 #if KTRACE_PRINTF_STACK_TRACE 752 tracing_stack_trace* fStackTrace; 753 #endif 754 }; 755 756 } // namespace TransactionTracing 757 758 # define T(x) new(std::nothrow) TransactionTracing::x; 759 #else 760 # define T(x) ; 761 #endif 762 763 764 static DoublyLinkedList<block_cache> sCaches; 765 static mutex sCachesLock = MUTEX_INITIALIZER("block caches"); 766 static mutex sCachesMemoryUseLock 767 = MUTEX_INITIALIZER("block caches memory use"); 768 static size_t sUsedMemory; 769 static sem_id sEventSemaphore; 770 static mutex sNotificationsLock 771 = MUTEX_INITIALIZER("block cache notifications"); 772 static thread_id sNotifierWriterThread; 773 static DoublyLinkedListLink<block_cache> sMarkCache; 774 // TODO: this only works if the link is the first entry of block_cache 775 static object_cache* sBlockCache; 776 777 778 // #pragma mark - notifications/listener 779 780 781 /*! Checks whether or not this is an event that closes a transaction. */ 782 static inline bool 783 is_closing_event(int32 event) 784 { 785 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0; 786 } 787 788 789 static inline bool 790 is_written_event(int32 event) 791 { 792 return (event & TRANSACTION_WRITTEN) != 0; 793 } 794 795 796 /*! From the specified \a notification, it will remove the lowest pending 797 event, and return that one in \a _event. 798 If there is no pending event anymore, it will return \c false. 799 */ 800 static bool 801 get_next_pending_event(cache_notification* notification, int32* _event) 802 { 803 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) { 804 int32 pending = atomic_and(¬ification->events_pending, 805 ~eventMask); 806 807 bool more = (pending & ~eventMask) != 0; 808 809 if ((pending & eventMask) != 0) { 810 *_event = eventMask; 811 return more; 812 } 813 } 814 815 return false; 816 } 817 818 819 static void 820 flush_pending_notifications(block_cache* cache) 821 { 822 ASSERT_LOCKED_MUTEX(&sCachesLock); 823 824 while (true) { 825 MutexLocker locker(sNotificationsLock); 826 827 cache_notification* notification = cache->pending_notifications.Head(); 828 if (notification == NULL) 829 return; 830 831 bool deleteAfterEvent = false; 832 int32 event = -1; 833 if (!get_next_pending_event(notification, &event)) { 834 // remove the notification if this was the last pending event 835 cache->pending_notifications.Remove(notification); 836 deleteAfterEvent = notification->delete_after_event; 837 } 838 839 if (event >= 0) { 840 // Notify listener, we need to copy the notification, as it might 841 // be removed when we unlock the list. 842 cache_notification copy = *notification; 843 locker.Unlock(); 844 845 copy.hook(copy.transaction_id, event, copy.data); 846 847 locker.Lock(); 848 } 849 850 if (deleteAfterEvent) 851 delete notification; 852 } 853 } 854 855 856 /*! Flushes all pending notifications by calling the appropriate hook 857 functions. 858 Must not be called with a cache lock held. 859 */ 860 static void 861 flush_pending_notifications() 862 { 863 MutexLocker _(sCachesLock); 864 865 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 866 while (iterator.HasNext()) { 867 block_cache* cache = iterator.Next(); 868 869 flush_pending_notifications(cache); 870 } 871 } 872 873 874 /*! Initializes the \a notification as specified. */ 875 static void 876 set_notification(cache_transaction* transaction, 877 cache_notification ¬ification, int32 events, 878 transaction_notification_hook hook, void* data) 879 { 880 notification.transaction_id = transaction != NULL ? transaction->id : -1; 881 notification.events_pending = 0; 882 notification.events = events; 883 notification.hook = hook; 884 notification.data = data; 885 notification.delete_after_event = false; 886 } 887 888 889 /*! Makes sure the notification is deleted. It either deletes it directly, 890 when possible, or marks it for deletion if the notification is pending. 891 */ 892 static void 893 delete_notification(cache_notification* notification) 894 { 895 MutexLocker locker(sNotificationsLock); 896 897 if (notification->events_pending != 0) 898 notification->delete_after_event = true; 899 else 900 delete notification; 901 } 902 903 904 /*! Adds the notification to the pending notifications list, or, if it's 905 already part of it, updates its events_pending field. 906 Also marks the notification to be deleted if \a deleteNotification 907 is \c true. 908 Triggers the notifier thread to run. 909 */ 910 static void 911 add_notification(block_cache* cache, cache_notification* notification, 912 int32 event, bool deleteNotification) 913 { 914 if (notification->hook == NULL) 915 return; 916 917 int32 pending = atomic_or(¬ification->events_pending, event); 918 if (pending == 0) { 919 // not yet part of the notification list 920 MutexLocker locker(sNotificationsLock); 921 if (deleteNotification) 922 notification->delete_after_event = true; 923 cache->pending_notifications.Add(notification); 924 } else if (deleteNotification) { 925 // we might need to delete it ourselves if we're late 926 delete_notification(notification); 927 } 928 929 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE); 930 // We're probably still holding some locks that makes rescheduling 931 // not a good idea at this point. 932 } 933 934 935 /*! Notifies all interested listeners of this transaction about the \a event. 936 If \a event is a closing event (ie. TRANSACTION_ENDED, and 937 TRANSACTION_ABORTED), all listeners except those listening to 938 TRANSACTION_WRITTEN will be removed. 939 */ 940 static void 941 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction, 942 int32 event) 943 { 944 T(Action("notify", cache, transaction)); 945 946 bool isClosing = is_closing_event(event); 947 bool isWritten = is_written_event(event); 948 949 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 950 while (iterator.HasNext()) { 951 cache_listener* listener = iterator.Next(); 952 953 bool remove = (isClosing && !is_written_event(listener->events)) 954 || (isWritten && is_written_event(listener->events)); 955 if (remove) 956 iterator.Remove(); 957 958 if ((listener->events & event) != 0) 959 add_notification(cache, listener, event, remove); 960 else if (remove) 961 delete_notification(listener); 962 } 963 } 964 965 966 /*! Removes and deletes all listeners that are still monitoring this 967 transaction. 968 */ 969 static void 970 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction) 971 { 972 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 973 while (iterator.HasNext()) { 974 cache_listener* listener = iterator.Next(); 975 iterator.Remove(); 976 977 delete_notification(listener); 978 } 979 } 980 981 982 static status_t 983 add_transaction_listener(block_cache* cache, cache_transaction* transaction, 984 int32 events, transaction_notification_hook hookFunction, void* data) 985 { 986 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 987 while (iterator.HasNext()) { 988 cache_listener* listener = iterator.Next(); 989 990 if (listener->data == data && listener->hook == hookFunction) { 991 // this listener already exists, just update it 992 listener->events |= events; 993 return B_OK; 994 } 995 } 996 997 cache_listener* listener = new cache_listener; 998 if (listener == NULL) 999 return B_NO_MEMORY; 1000 1001 set_notification(transaction, *listener, events, hookFunction, data); 1002 transaction->listeners.Add(listener); 1003 return B_OK; 1004 } 1005 1006 1007 // #pragma mark - private transaction 1008 1009 1010 cache_transaction::cache_transaction() 1011 { 1012 num_blocks = 0; 1013 main_num_blocks = 0; 1014 sub_num_blocks = 0; 1015 first_block = NULL; 1016 open = true; 1017 has_sub_transaction = false; 1018 last_used = system_time(); 1019 busy_writing_count = 0; 1020 } 1021 1022 1023 static void 1024 delete_transaction(block_cache* cache, cache_transaction* transaction) 1025 { 1026 if (cache->last_transaction == transaction) 1027 cache->last_transaction = NULL; 1028 1029 remove_transaction_listeners(cache, transaction); 1030 delete transaction; 1031 } 1032 1033 1034 static cache_transaction* 1035 lookup_transaction(block_cache* cache, int32 id) 1036 { 1037 return cache->transaction_hash->Lookup(id); 1038 } 1039 1040 1041 size_t TransactionHash::Hash(cache_transaction* transaction) const 1042 { 1043 return transaction->id; 1044 } 1045 1046 1047 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const 1048 { 1049 return transaction->id == key; 1050 } 1051 1052 1053 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const 1054 { 1055 return value->next; 1056 } 1057 1058 1059 /*! Writes back any changes made to blocks in \a transaction that are still 1060 part of a previous transacton. 1061 */ 1062 static status_t 1063 write_blocks_in_previous_transaction(block_cache* cache, 1064 cache_transaction* transaction) 1065 { 1066 BlockWriter writer(cache); 1067 1068 cached_block* block = transaction->first_block; 1069 for (; block != NULL; block = block->transaction_next) { 1070 if (block->previous_transaction != NULL) { 1071 // need to write back pending changes 1072 writer.Add(block); 1073 } 1074 } 1075 1076 return writer.Write(); 1077 } 1078 1079 1080 // #pragma mark - cached_block 1081 1082 1083 bool 1084 cached_block::CanBeWritten() const 1085 { 1086 return !busy_writing && !busy_reading 1087 && (previous_transaction != NULL 1088 || (transaction == NULL && is_dirty && !is_writing)); 1089 } 1090 1091 1092 // #pragma mark - BlockWriter 1093 1094 1095 BlockWriter::BlockWriter(block_cache* cache, size_t max) 1096 : 1097 fCache(cache), 1098 fBlocks(fBuffer), 1099 fCount(0), 1100 fTotal(0), 1101 fCapacity(kBufferSize), 1102 fMax(max), 1103 fStatus(B_OK), 1104 fDeletedTransaction(false) 1105 { 1106 } 1107 1108 1109 BlockWriter::~BlockWriter() 1110 { 1111 if (fBlocks != fBuffer) 1112 free(fBlocks); 1113 } 1114 1115 1116 /*! Adds the specified block to the to be written array. If no more blocks can 1117 be added, false is returned, otherwise true. 1118 */ 1119 bool 1120 BlockWriter::Add(cached_block* block, cache_transaction* transaction) 1121 { 1122 ASSERT(block->CanBeWritten()); 1123 1124 if (fTotal == fMax) 1125 return false; 1126 1127 if (fCount >= fCapacity) { 1128 // Enlarge array if necessary 1129 cached_block** newBlocks; 1130 size_t newCapacity = max_c(256, fCapacity * 2); 1131 if (fBlocks == fBuffer) 1132 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*)); 1133 else { 1134 newBlocks = (cached_block**)realloc(fBlocks, 1135 newCapacity * sizeof(void*)); 1136 } 1137 1138 if (newBlocks == NULL) { 1139 // Allocating a larger array failed - we need to write back what 1140 // we have synchronously now (this will also clear the array) 1141 Write(transaction, false); 1142 } else { 1143 if (fBlocks == fBuffer) 1144 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*)); 1145 1146 fBlocks = newBlocks; 1147 fCapacity = newCapacity; 1148 } 1149 } 1150 1151 fBlocks[fCount++] = block; 1152 fTotal++; 1153 block->busy_writing = true; 1154 fCache->busy_writing_count++; 1155 if (block->previous_transaction != NULL) 1156 block->previous_transaction->busy_writing_count++; 1157 1158 return true; 1159 } 1160 1161 1162 /*! Adds all blocks of the specified transaction to the to be written array. 1163 If no more blocks can be added, false is returned, otherwise true. 1164 */ 1165 bool 1166 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers) 1167 { 1168 ASSERT(!transaction->open); 1169 1170 if (transaction->busy_writing_count != 0) { 1171 hasLeftOvers = true; 1172 return true; 1173 } 1174 1175 hasLeftOvers = false; 1176 1177 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 1178 while (cached_block* block = blockIterator.Next()) { 1179 if (!block->CanBeWritten()) { 1180 // This block was already part of a previous transaction within this 1181 // writer 1182 hasLeftOvers = true; 1183 continue; 1184 } 1185 if (!Add(block, transaction)) 1186 return false; 1187 1188 if (DeletedTransaction()) 1189 break; 1190 } 1191 1192 return true; 1193 } 1194 1195 1196 /*! Cache must be locked when calling this method, but it will be unlocked 1197 while the blocks are written back. 1198 */ 1199 status_t 1200 BlockWriter::Write(cache_transaction* transaction, bool canUnlock) 1201 { 1202 if (fCount == 0) 1203 return B_OK; 1204 1205 if (canUnlock) 1206 mutex_unlock(&fCache->lock); 1207 1208 // Sort blocks in their on-disk order 1209 // TODO: ideally, this should be handled by the I/O scheduler 1210 1211 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks); 1212 fDeletedTransaction = false; 1213 1214 bigtime_t start = system_time(); 1215 1216 for (uint32 i = 0; i < fCount; i++) { 1217 status_t status = _WriteBlock(fBlocks[i]); 1218 if (status != B_OK) { 1219 // propagate to global error handling 1220 if (fStatus == B_OK) 1221 fStatus = status; 1222 1223 _UnmarkWriting(fBlocks[i]); 1224 fBlocks[i] = NULL; 1225 // This block will not be marked clean 1226 } 1227 } 1228 1229 bigtime_t finish = system_time(); 1230 1231 if (canUnlock) 1232 mutex_lock(&fCache->lock); 1233 1234 if (fStatus == B_OK && fCount >= 8) { 1235 fCache->last_block_write = finish; 1236 fCache->last_block_write_duration = (fCache->last_block_write - start) 1237 / fCount; 1238 } 1239 1240 for (uint32 i = 0; i < fCount; i++) 1241 _BlockDone(fBlocks[i], transaction); 1242 1243 fCount = 0; 1244 return fStatus; 1245 } 1246 1247 1248 /*! Writes the specified \a block back to disk. It will always only write back 1249 the oldest change of the block if it is part of more than one transaction. 1250 It will automatically send out TRANSACTION_WRITTEN notices, as well as 1251 delete transactions when they are no longer used, and \a deleteTransaction 1252 is \c true. 1253 */ 1254 /*static*/ status_t 1255 BlockWriter::WriteBlock(block_cache* cache, cached_block* block) 1256 { 1257 BlockWriter writer(cache); 1258 1259 writer.Add(block); 1260 return writer.Write(); 1261 } 1262 1263 1264 void* 1265 BlockWriter::_Data(cached_block* block) const 1266 { 1267 return block->previous_transaction != NULL && block->original_data != NULL 1268 ? block->original_data : block->current_data; 1269 // We first need to write back changes from previous transactions 1270 } 1271 1272 1273 status_t 1274 BlockWriter::_WriteBlock(cached_block* block) 1275 { 1276 ASSERT(block->busy_writing); 1277 1278 TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number)); 1279 TB(Write(fCache, block)); 1280 TB2(BlockData(fCache, block, "before write")); 1281 1282 size_t blockSize = fCache->block_size; 1283 1284 ssize_t written = write_pos(fCache->fd, 1285 block->block_number * blockSize, _Data(block), blockSize); 1286 1287 if (written != (ssize_t)blockSize) { 1288 TB(Error(fCache, block->block_number, "write failed", written)); 1289 TRACE_ALWAYS("could not write back block %" B_PRIdOFF " (%s)\n", 1290 block->block_number, strerror(errno)); 1291 if (written < 0) 1292 return errno; 1293 1294 return B_IO_ERROR; 1295 } 1296 1297 return B_OK; 1298 } 1299 1300 1301 void 1302 BlockWriter::_BlockDone(cached_block* block, 1303 cache_transaction* transaction) 1304 { 1305 if (block == NULL) { 1306 // An error occured when trying to write this block 1307 return; 1308 } 1309 1310 if (fCache->num_dirty_blocks > 0) 1311 fCache->num_dirty_blocks--; 1312 1313 if (_Data(block) == block->current_data) 1314 block->is_dirty = false; 1315 1316 _UnmarkWriting(block); 1317 1318 cache_transaction* previous = block->previous_transaction; 1319 if (previous != NULL) { 1320 previous->blocks.Remove(block); 1321 block->previous_transaction = NULL; 1322 1323 if (block->original_data != NULL && block->transaction == NULL) { 1324 // This block is not part of a transaction, so it does not need 1325 // its original pointer anymore. 1326 fCache->Free(block->original_data); 1327 block->original_data = NULL; 1328 } 1329 1330 // Has the previous transaction been finished with that write? 1331 if (--previous->num_blocks == 0) { 1332 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id)); 1333 T(Action("written", fCache, previous)); 1334 1335 notify_transaction_listeners(fCache, previous, 1336 TRANSACTION_WRITTEN); 1337 1338 if (transaction != NULL) { 1339 // This function is called while iterating transaction_hash. We 1340 // use RemoveUnchecked so the iterator is still valid. A regular 1341 // Remove can trigger a resize of the hash table which would 1342 // result in the linked items in the table changing order. 1343 fCache->transaction_hash->RemoveUnchecked(transaction); 1344 } else 1345 fCache->transaction_hash->Remove(previous); 1346 1347 delete_transaction(fCache, previous); 1348 fDeletedTransaction = true; 1349 } 1350 } 1351 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) { 1352 // the block is no longer used 1353 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1354 block->unused = true; 1355 fCache->unused_blocks.Add(block); 1356 fCache->unused_block_count++; 1357 } 1358 1359 TB2(BlockData(fCache, block, "after write")); 1360 } 1361 1362 1363 void 1364 BlockWriter::_UnmarkWriting(cached_block* block) 1365 { 1366 block->busy_writing = false; 1367 if (block->previous_transaction != NULL) 1368 block->previous_transaction->busy_writing_count--; 1369 fCache->busy_writing_count--; 1370 1371 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0) 1372 || block->busy_writing_waiters) { 1373 fCache->busy_writing_waiters = false; 1374 block->busy_writing_waiters = false; 1375 fCache->busy_writing_condition.NotifyAll(); 1376 } 1377 } 1378 1379 1380 /*static*/ int 1381 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB) 1382 { 1383 cached_block* blockA = *(cached_block**)_blockA; 1384 cached_block* blockB = *(cached_block**)_blockB; 1385 1386 off_t diff = blockA->block_number - blockB->block_number; 1387 if (diff > 0) 1388 return 1; 1389 1390 return diff < 0 ? -1 : 0; 1391 } 1392 1393 1394 // #pragma mark - block_cache 1395 1396 1397 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize, 1398 bool readOnly) 1399 : 1400 hash(NULL), 1401 fd(_fd), 1402 max_blocks(numBlocks), 1403 block_size(blockSize), 1404 next_transaction_id(1), 1405 last_transaction(NULL), 1406 transaction_hash(NULL), 1407 buffer_cache(NULL), 1408 unused_block_count(0), 1409 busy_reading_count(0), 1410 busy_reading_waiters(false), 1411 busy_writing_count(0), 1412 busy_writing_waiters(0), 1413 last_block_write(0), 1414 last_block_write_duration(0), 1415 num_dirty_blocks(0), 1416 read_only(readOnly) 1417 { 1418 } 1419 1420 1421 /*! Should be called with the cache's lock held. */ 1422 block_cache::~block_cache() 1423 { 1424 unregister_low_resource_handler(&_LowMemoryHandler, this); 1425 1426 delete transaction_hash; 1427 delete hash; 1428 1429 delete_object_cache(buffer_cache); 1430 1431 mutex_destroy(&lock); 1432 } 1433 1434 1435 status_t 1436 block_cache::Init() 1437 { 1438 busy_reading_condition.Init(this, "cache block busy_reading"); 1439 busy_writing_condition.Init(this, "cache block busy writing"); 1440 condition_variable.Init(this, "cache transaction sync"); 1441 mutex_init(&lock, "block cache"); 1442 1443 buffer_cache = create_object_cache_etc("block cache buffers", block_size, 1444 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 1445 if (buffer_cache == NULL) 1446 return B_NO_MEMORY; 1447 1448 hash = new BlockTable(); 1449 if (hash == NULL || hash->Init(1024) != B_OK) 1450 return B_NO_MEMORY; 1451 1452 transaction_hash = new(std::nothrow) TransactionTable(); 1453 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK) 1454 return B_NO_MEMORY; 1455 1456 return register_low_resource_handler(&_LowMemoryHandler, this, 1457 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1458 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0); 1459 } 1460 1461 1462 void 1463 block_cache::Free(void* buffer) 1464 { 1465 if (buffer != NULL) 1466 object_cache_free(buffer_cache, buffer, 0); 1467 } 1468 1469 1470 void* 1471 block_cache::Allocate() 1472 { 1473 void* block = object_cache_alloc(buffer_cache, 0); 1474 if (block != NULL) 1475 return block; 1476 1477 // recycle existing before allocating a new one 1478 RemoveUnusedBlocks(100); 1479 1480 return object_cache_alloc(buffer_cache, 0); 1481 } 1482 1483 1484 void 1485 block_cache::FreeBlock(cached_block* block) 1486 { 1487 Free(block->current_data); 1488 1489 if (block->original_data != NULL || block->parent_data != NULL) { 1490 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n", 1491 block->block_number, block->original_data, block->parent_data); 1492 } 1493 1494 #if BLOCK_CACHE_DEBUG_CHANGED 1495 Free(block->compare); 1496 #endif 1497 1498 object_cache_free(sBlockCache, block, 0); 1499 } 1500 1501 1502 /*! Allocates a new block for \a blockNumber, ready for use */ 1503 cached_block* 1504 block_cache::NewBlock(off_t blockNumber) 1505 { 1506 cached_block* block = NULL; 1507 1508 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1509 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) { 1510 // recycle existing instead of allocating a new one 1511 block = _GetUnusedBlock(); 1512 } 1513 if (block == NULL) { 1514 block = (cached_block*)object_cache_alloc(sBlockCache, 0); 1515 if (block != NULL) { 1516 block->current_data = Allocate(); 1517 if (block->current_data == NULL) { 1518 object_cache_free(sBlockCache, block, 0); 1519 return NULL; 1520 } 1521 } else { 1522 TB(Error(this, blockNumber, "allocation failed")); 1523 TRACE_ALWAYS("block allocation failed, unused list is %sempty.\n", 1524 unused_blocks.IsEmpty() ? "" : "not "); 1525 1526 // allocation failed, try to reuse an unused block 1527 block = _GetUnusedBlock(); 1528 if (block == NULL) { 1529 TB(Error(this, blockNumber, "get unused failed")); 1530 FATAL(("could not allocate block!\n")); 1531 return NULL; 1532 } 1533 } 1534 } 1535 1536 block->block_number = blockNumber; 1537 block->ref_count = 0; 1538 block->last_accessed = 0; 1539 block->transaction_next = NULL; 1540 block->transaction = block->previous_transaction = NULL; 1541 block->original_data = NULL; 1542 block->parent_data = NULL; 1543 block->busy_reading = false; 1544 block->busy_writing = false; 1545 block->is_writing = false; 1546 block->is_dirty = false; 1547 block->unused = false; 1548 block->discard = false; 1549 block->busy_reading_waiters = false; 1550 block->busy_writing_waiters = false; 1551 #if BLOCK_CACHE_DEBUG_CHANGED 1552 block->compare = NULL; 1553 #endif 1554 1555 return block; 1556 } 1557 1558 1559 void 1560 block_cache::FreeBlockParentData(cached_block* block) 1561 { 1562 ASSERT(block->parent_data != NULL); 1563 if (block->parent_data != block->current_data) 1564 Free(block->parent_data); 1565 block->parent_data = NULL; 1566 } 1567 1568 1569 void 1570 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld) 1571 { 1572 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count)); 1573 1574 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1575 cached_block* block = iterator.Next();) { 1576 if (minSecondsOld >= block->LastAccess()) { 1577 // The list is sorted by last access 1578 break; 1579 } 1580 if (block->busy_reading || block->busy_writing) 1581 continue; 1582 1583 TB(Flush(this, block)); 1584 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n", 1585 block->block_number, block->last_accessed)); 1586 1587 // this can only happen if no transactions are used 1588 if (block->is_dirty && !block->discard) { 1589 if (block->busy_writing) 1590 continue; 1591 1592 BlockWriter::WriteBlock(this, block); 1593 } 1594 1595 // remove block from lists 1596 iterator.Remove(); 1597 unused_block_count--; 1598 RemoveBlock(block); 1599 1600 if (--count <= 0) 1601 break; 1602 } 1603 } 1604 1605 1606 void 1607 block_cache::RemoveBlock(cached_block* block) 1608 { 1609 hash->Remove(block); 1610 FreeBlock(block); 1611 } 1612 1613 1614 /*! Discards the block from a transaction (this method must not be called 1615 for blocks not part of a transaction). 1616 */ 1617 void 1618 block_cache::DiscardBlock(cached_block* block) 1619 { 1620 ASSERT(block->discard); 1621 ASSERT(block->previous_transaction == NULL); 1622 1623 if (block->parent_data != NULL) 1624 FreeBlockParentData(block); 1625 1626 if (block->original_data != NULL) { 1627 Free(block->original_data); 1628 block->original_data = NULL; 1629 } 1630 1631 RemoveBlock(block); 1632 } 1633 1634 1635 void 1636 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level) 1637 { 1638 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level)); 1639 1640 // free some blocks according to the low memory state 1641 // (if there is enough memory left, we don't free any) 1642 1643 block_cache* cache = (block_cache*)data; 1644 if (cache->unused_block_count <= 1) 1645 return; 1646 1647 int32 free = 0; 1648 int32 secondsOld = 0; 1649 switch (level) { 1650 case B_NO_LOW_RESOURCE: 1651 return; 1652 case B_LOW_RESOURCE_NOTE: 1653 free = cache->unused_block_count / 4; 1654 secondsOld = 120; 1655 break; 1656 case B_LOW_RESOURCE_WARNING: 1657 free = cache->unused_block_count / 2; 1658 secondsOld = 10; 1659 break; 1660 case B_LOW_RESOURCE_CRITICAL: 1661 free = cache->unused_block_count - 1; 1662 secondsOld = 0; 1663 break; 1664 } 1665 1666 MutexLocker locker(&cache->lock); 1667 1668 if (!locker.IsLocked()) { 1669 // If our block_cache were deleted, it could be that we had 1670 // been called before that deletion went through, therefore, 1671 // acquiring its lock might fail. 1672 return; 1673 } 1674 1675 #ifdef TRACE_BLOCK_CACHE 1676 uint32 oldUnused = cache->unused_block_count; 1677 #endif 1678 1679 cache->RemoveUnusedBlocks(free, secondsOld); 1680 1681 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n", 1682 cache, oldUnused, cache->unused_block_count)); 1683 } 1684 1685 1686 cached_block* 1687 block_cache::_GetUnusedBlock() 1688 { 1689 TRACE(("block_cache: get unused block\n")); 1690 1691 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1692 cached_block* block = iterator.Next();) { 1693 TB(Flush(this, block, true)); 1694 // this can only happen if no transactions are used 1695 if (block->is_dirty && !block->busy_writing && !block->discard) 1696 BlockWriter::WriteBlock(this, block); 1697 1698 // remove block from lists 1699 iterator.Remove(); 1700 unused_block_count--; 1701 hash->Remove(block); 1702 1703 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1704 block->unused = false; 1705 1706 // TODO: see if compare data is handled correctly here! 1707 #if BLOCK_CACHE_DEBUG_CHANGED 1708 if (block->compare != NULL) 1709 Free(block->compare); 1710 #endif 1711 return block; 1712 } 1713 1714 return NULL; 1715 } 1716 1717 1718 // #pragma mark - private block functions 1719 1720 1721 /*! Cache must be locked. 1722 */ 1723 static void 1724 mark_block_busy_reading(block_cache* cache, cached_block* block) 1725 { 1726 block->busy_reading = true; 1727 cache->busy_reading_count++; 1728 } 1729 1730 1731 /*! Cache must be locked. 1732 */ 1733 static void 1734 mark_block_unbusy_reading(block_cache* cache, cached_block* block) 1735 { 1736 block->busy_reading = false; 1737 cache->busy_reading_count--; 1738 1739 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0) 1740 || block->busy_reading_waiters) { 1741 cache->busy_reading_waiters = false; 1742 block->busy_reading_waiters = false; 1743 cache->busy_reading_condition.NotifyAll(); 1744 } 1745 } 1746 1747 1748 /*! Cache must be locked. 1749 */ 1750 static void 1751 wait_for_busy_reading_block(block_cache* cache, cached_block* block) 1752 { 1753 while (block->busy_reading) { 1754 // wait for at least the specified block to be read in 1755 ConditionVariableEntry entry; 1756 cache->busy_reading_condition.Add(&entry); 1757 block->busy_reading_waiters = true; 1758 1759 mutex_unlock(&cache->lock); 1760 1761 entry.Wait(); 1762 1763 mutex_lock(&cache->lock); 1764 } 1765 } 1766 1767 1768 /*! Cache must be locked. 1769 */ 1770 static void 1771 wait_for_busy_reading_blocks(block_cache* cache) 1772 { 1773 while (cache->busy_reading_count != 0) { 1774 // wait for all blocks to be read in 1775 ConditionVariableEntry entry; 1776 cache->busy_reading_condition.Add(&entry); 1777 cache->busy_reading_waiters = true; 1778 1779 mutex_unlock(&cache->lock); 1780 1781 entry.Wait(); 1782 1783 mutex_lock(&cache->lock); 1784 } 1785 } 1786 1787 1788 /*! Cache must be locked. 1789 */ 1790 static void 1791 wait_for_busy_writing_block(block_cache* cache, cached_block* block) 1792 { 1793 while (block->busy_writing) { 1794 // wait for all blocks to be written back 1795 ConditionVariableEntry entry; 1796 cache->busy_writing_condition.Add(&entry); 1797 block->busy_writing_waiters = true; 1798 1799 mutex_unlock(&cache->lock); 1800 1801 entry.Wait(); 1802 1803 mutex_lock(&cache->lock); 1804 } 1805 } 1806 1807 1808 /*! Cache must be locked. 1809 */ 1810 static void 1811 wait_for_busy_writing_blocks(block_cache* cache) 1812 { 1813 while (cache->busy_writing_count != 0) { 1814 // wait for all blocks to be written back 1815 ConditionVariableEntry entry; 1816 cache->busy_writing_condition.Add(&entry); 1817 cache->busy_writing_waiters = true; 1818 1819 mutex_unlock(&cache->lock); 1820 1821 entry.Wait(); 1822 1823 mutex_lock(&cache->lock); 1824 } 1825 } 1826 1827 1828 /*! Removes a reference from the specified \a block. If this was the last 1829 reference, the block is moved into the unused list. 1830 In low memory situations, it will also free some blocks from that list, 1831 but not necessarily the \a block it just released. 1832 */ 1833 static void 1834 put_cached_block(block_cache* cache, cached_block* block) 1835 { 1836 #if BLOCK_CACHE_DEBUG_CHANGED 1837 if (!block->is_dirty && block->compare != NULL 1838 && memcmp(block->current_data, block->compare, cache->block_size)) { 1839 TRACE_ALWAYS("new block:\n"); 1840 dump_block((const char*)block->current_data, 256, " "); 1841 TRACE_ALWAYS("unchanged block:\n"); 1842 dump_block((const char*)block->compare, 256, " "); 1843 BlockWriter::WriteBlock(cache, block); 1844 panic("block_cache: supposed to be clean block was changed!\n"); 1845 1846 cache->Free(block->compare); 1847 block->compare = NULL; 1848 } 1849 #endif 1850 TB(Put(cache, block)); 1851 1852 if (block->ref_count < 1) { 1853 panic("Invalid ref_count for block %p, cache %p\n", block, cache); 1854 return; 1855 } 1856 1857 if (--block->ref_count == 0 1858 && block->transaction == NULL && block->previous_transaction == NULL) { 1859 // This block is not used anymore, and not part of any transaction 1860 block->is_writing = false; 1861 1862 if (block->discard) { 1863 cache->RemoveBlock(block); 1864 } else { 1865 // put this block in the list of unused blocks 1866 ASSERT(!block->unused); 1867 block->unused = true; 1868 1869 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1870 cache->unused_blocks.Add(block); 1871 cache->unused_block_count++; 1872 } 1873 } 1874 } 1875 1876 1877 static void 1878 put_cached_block(block_cache* cache, off_t blockNumber) 1879 { 1880 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1881 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1882 blockNumber, cache->max_blocks - 1); 1883 } 1884 1885 cached_block* block = cache->hash->Lookup(blockNumber); 1886 if (block != NULL) 1887 put_cached_block(cache, block); 1888 else { 1889 TB(Error(cache, blockNumber, "put unknown")); 1890 } 1891 } 1892 1893 1894 /*! Retrieves the block \a blockNumber from the hash table, if it's already 1895 there, or reads it from the disk. 1896 You need to have the cache locked when calling this function. 1897 1898 \param _allocated tells you whether or not a new block has been allocated 1899 to satisfy your request. 1900 \param readBlock if \c false, the block will not be read in case it was 1901 not already in the cache. The block you retrieve may contain random 1902 data. If \c true, the cache will be temporarily unlocked while the 1903 block is read in. 1904 */ 1905 static status_t 1906 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated, 1907 bool readBlock, cached_block** _block) 1908 { 1909 ASSERT_LOCKED_MUTEX(&cache->lock); 1910 1911 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1912 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1913 blockNumber, cache->max_blocks - 1); 1914 return B_BAD_VALUE; 1915 } 1916 1917 retry: 1918 cached_block* block = cache->hash->Lookup(blockNumber); 1919 *_allocated = false; 1920 1921 if (block == NULL) { 1922 // put block into cache 1923 block = cache->NewBlock(blockNumber); 1924 if (block == NULL) 1925 return B_NO_MEMORY; 1926 1927 cache->hash->Insert(block); 1928 *_allocated = true; 1929 } else if (block->busy_reading) { 1930 // The block is currently busy_reading - wait and try again later 1931 wait_for_busy_reading_block(cache, block); 1932 goto retry; 1933 } 1934 1935 if (block->unused) { 1936 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber)); 1937 block->unused = false; 1938 cache->unused_blocks.Remove(block); 1939 cache->unused_block_count--; 1940 } 1941 1942 if (*_allocated && readBlock) { 1943 // read block into cache 1944 int32 blockSize = cache->block_size; 1945 1946 mark_block_busy_reading(cache, block); 1947 mutex_unlock(&cache->lock); 1948 1949 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize, 1950 block->current_data, blockSize); 1951 1952 mutex_lock(&cache->lock); 1953 if (bytesRead < blockSize) { 1954 cache->RemoveBlock(block); 1955 TB(Error(cache, blockNumber, "read failed", bytesRead)); 1956 1957 TRACE_ALWAYS("could not read block %" B_PRIdOFF ": bytesRead: %zd," 1958 " error: %s\n", blockNumber, bytesRead, strerror(errno)); 1959 return errno; 1960 } 1961 TB(Read(cache, block)); 1962 1963 mark_block_unbusy_reading(cache, block); 1964 } 1965 1966 block->ref_count++; 1967 block->last_accessed = system_time() / 1000000L; 1968 1969 *_block = block; 1970 return B_OK; 1971 } 1972 1973 1974 /*! Returns the writable block data for the requested blockNumber. 1975 If \a cleared is true, the block is not read from disk; an empty block 1976 is returned. 1977 1978 This is the only method to insert a block into a transaction. It makes 1979 sure that the previous block contents are preserved in that case. 1980 */ 1981 static status_t 1982 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base, 1983 off_t length, int32 transactionID, bool cleared, void** _block) 1984 { 1985 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 1986 blockNumber, transactionID)); 1987 1988 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1989 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1990 blockNumber, cache->max_blocks - 1); 1991 return B_BAD_VALUE; 1992 } 1993 1994 bool allocated; 1995 cached_block* block; 1996 status_t status = get_cached_block(cache, blockNumber, &allocated, 1997 !cleared, &block); 1998 if (status != B_OK) 1999 return status; 2000 2001 if (block->busy_writing) 2002 wait_for_busy_writing_block(cache, block); 2003 2004 block->discard = false; 2005 2006 // if there is no transaction support, we just return the current block 2007 if (transactionID == -1) { 2008 if (cleared) { 2009 mark_block_busy_reading(cache, block); 2010 mutex_unlock(&cache->lock); 2011 2012 memset(block->current_data, 0, cache->block_size); 2013 2014 mutex_lock(&cache->lock); 2015 mark_block_unbusy_reading(cache, block); 2016 } 2017 2018 block->is_writing = true; 2019 2020 if (!block->is_dirty) { 2021 cache->num_dirty_blocks++; 2022 block->is_dirty = true; 2023 // mark the block as dirty 2024 } 2025 2026 TB(Get(cache, block)); 2027 *_block = block->current_data; 2028 return B_OK; 2029 } 2030 2031 cache_transaction* transaction = block->transaction; 2032 2033 if (transaction != NULL && transaction->id != transactionID) { 2034 // TODO: we have to wait here until the other transaction is done. 2035 // Maybe we should even panic, since we can't prevent any deadlocks. 2036 panic("get_writable_cached_block(): asked to get busy writable block " 2037 "(transaction %" B_PRId32 ")\n", block->transaction->id); 2038 put_cached_block(cache, block); 2039 return B_BAD_VALUE; 2040 } 2041 if (transaction == NULL && transactionID != -1) { 2042 // get new transaction 2043 transaction = lookup_transaction(cache, transactionID); 2044 if (transaction == NULL) { 2045 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n", 2046 transactionID); 2047 put_cached_block(cache, block); 2048 return B_BAD_VALUE; 2049 } 2050 if (!transaction->open) { 2051 panic("get_writable_cached_block(): transaction already done!\n"); 2052 put_cached_block(cache, block); 2053 return B_BAD_VALUE; 2054 } 2055 2056 block->transaction = transaction; 2057 2058 // attach the block to the transaction block list 2059 block->transaction_next = transaction->first_block; 2060 transaction->first_block = block; 2061 transaction->num_blocks++; 2062 } 2063 if (transaction != NULL) 2064 transaction->last_used = system_time(); 2065 2066 bool wasUnchanged = block->original_data == NULL 2067 || block->previous_transaction != NULL; 2068 2069 if (!(allocated && cleared) && block->original_data == NULL) { 2070 // we already have data, so we need to preserve it 2071 block->original_data = cache->Allocate(); 2072 if (block->original_data == NULL) { 2073 TB(Error(cache, blockNumber, "allocate original failed")); 2074 FATAL(("could not allocate original_data\n")); 2075 put_cached_block(cache, block); 2076 return B_NO_MEMORY; 2077 } 2078 2079 mark_block_busy_reading(cache, block); 2080 mutex_unlock(&cache->lock); 2081 2082 memcpy(block->original_data, block->current_data, cache->block_size); 2083 2084 mutex_lock(&cache->lock); 2085 mark_block_unbusy_reading(cache, block); 2086 } 2087 if (block->parent_data == block->current_data) { 2088 // remember any previous contents for the parent transaction 2089 block->parent_data = cache->Allocate(); 2090 if (block->parent_data == NULL) { 2091 // TODO: maybe we should just continue the current transaction in 2092 // this case... 2093 TB(Error(cache, blockNumber, "allocate parent failed")); 2094 FATAL(("could not allocate parent\n")); 2095 put_cached_block(cache, block); 2096 return B_NO_MEMORY; 2097 } 2098 2099 mark_block_busy_reading(cache, block); 2100 mutex_unlock(&cache->lock); 2101 2102 memcpy(block->parent_data, block->current_data, cache->block_size); 2103 2104 mutex_lock(&cache->lock); 2105 mark_block_unbusy_reading(cache, block); 2106 2107 transaction->sub_num_blocks++; 2108 } else if (transaction != NULL && transaction->has_sub_transaction 2109 && block->parent_data == NULL && wasUnchanged) 2110 transaction->sub_num_blocks++; 2111 2112 if (cleared) { 2113 mark_block_busy_reading(cache, block); 2114 mutex_unlock(&cache->lock); 2115 2116 memset(block->current_data, 0, cache->block_size); 2117 2118 mutex_lock(&cache->lock); 2119 mark_block_unbusy_reading(cache, block); 2120 } 2121 2122 block->is_dirty = true; 2123 TB(Get(cache, block)); 2124 TB2(BlockData(cache, block, "get writable")); 2125 2126 *_block = block->current_data; 2127 return B_OK; 2128 } 2129 2130 2131 #if DEBUG_BLOCK_CACHE 2132 2133 2134 static void 2135 dump_block(cached_block* block) 2136 { 2137 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32 2138 " %c%c%c%c%c%c %08lx %08lx\n", 2139 (addr_t)block, block->block_number, 2140 (addr_t)block->current_data, (addr_t)block->original_data, 2141 (addr_t)block->parent_data, block->ref_count, block->LastAccess(), 2142 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-', 2143 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-', 2144 block->unused ? 'U' : '-', block->discard ? 'D' : '-', 2145 (addr_t)block->transaction, 2146 (addr_t)block->previous_transaction); 2147 } 2148 2149 2150 static void 2151 dump_block_long(cached_block* block) 2152 { 2153 kprintf("BLOCK %p\n", block); 2154 kprintf(" current data: %p\n", block->current_data); 2155 kprintf(" original data: %p\n", block->original_data); 2156 kprintf(" parent data: %p\n", block->parent_data); 2157 #if BLOCK_CACHE_DEBUG_CHANGED 2158 kprintf(" compare data: %p\n", block->compare); 2159 #endif 2160 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count); 2161 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess()); 2162 kprintf(" flags: "); 2163 if (block->busy_reading) 2164 kprintf(" busy_reading"); 2165 if (block->busy_writing) 2166 kprintf(" busy_writing"); 2167 if (block->is_writing) 2168 kprintf(" is-writing"); 2169 if (block->is_dirty) 2170 kprintf(" is-dirty"); 2171 if (block->unused) 2172 kprintf(" unused"); 2173 if (block->discard) 2174 kprintf(" discard"); 2175 kprintf("\n"); 2176 if (block->transaction != NULL) { 2177 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction, 2178 block->transaction->id); 2179 if (block->transaction_next != NULL) { 2180 kprintf(" next in transaction: %" B_PRIdOFF "\n", 2181 block->transaction_next->block_number); 2182 } 2183 } 2184 if (block->previous_transaction != NULL) { 2185 kprintf(" previous transaction: %p (%" B_PRId32 ")\n", 2186 block->previous_transaction, 2187 block->previous_transaction->id); 2188 } 2189 2190 set_debug_variable("_current", (addr_t)block->current_data); 2191 set_debug_variable("_original", (addr_t)block->original_data); 2192 set_debug_variable("_parent", (addr_t)block->parent_data); 2193 } 2194 2195 2196 static int 2197 dump_cached_block(int argc, char** argv) 2198 { 2199 if (argc != 2) { 2200 kprintf("usage: %s <block-address>\n", argv[0]); 2201 return 0; 2202 } 2203 2204 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1])); 2205 return 0; 2206 } 2207 2208 2209 static int 2210 dump_cache(int argc, char** argv) 2211 { 2212 bool showTransactions = false; 2213 bool showBlocks = false; 2214 int32 i = 1; 2215 while (argv[i] != NULL && argv[i][0] == '-') { 2216 for (char* arg = &argv[i][1]; arg[0]; arg++) { 2217 switch (arg[0]) { 2218 case 'b': 2219 showBlocks = true; 2220 break; 2221 case 't': 2222 showTransactions = true; 2223 break; 2224 default: 2225 print_debugger_command_usage(argv[0]); 2226 return 0; 2227 } 2228 } 2229 i++; 2230 } 2231 2232 if (i >= argc) { 2233 print_debugger_command_usage(argv[0]); 2234 return 0; 2235 } 2236 2237 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]); 2238 if (cache == NULL) { 2239 kprintf("invalid cache address\n"); 2240 return 0; 2241 } 2242 2243 off_t blockNumber = -1; 2244 if (i + 1 < argc) { 2245 blockNumber = parse_expression(argv[i + 1]); 2246 cached_block* block = cache->hash->Lookup(blockNumber); 2247 if (block != NULL) 2248 dump_block_long(block); 2249 else 2250 kprintf("block %" B_PRIdOFF " not found\n", blockNumber); 2251 return 0; 2252 } 2253 2254 kprintf("BLOCK CACHE: %p\n", cache); 2255 2256 kprintf(" fd: %d\n", cache->fd); 2257 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks); 2258 kprintf(" block_size: %zu\n", cache->block_size); 2259 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id); 2260 kprintf(" buffer_cache: %p\n", cache->buffer_cache); 2261 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count, 2262 cache->busy_reading_waiters ? "has" : "no"); 2263 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count, 2264 cache->busy_writing_waiters ? "has" : "no"); 2265 2266 if (!cache->pending_notifications.IsEmpty()) { 2267 kprintf(" pending notifications:\n"); 2268 2269 NotificationList::Iterator iterator 2270 = cache->pending_notifications.GetIterator(); 2271 while (iterator.HasNext()) { 2272 cache_notification* notification = iterator.Next(); 2273 2274 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification, 2275 notification->events_pending, notification->hook, 2276 notification->data); 2277 } 2278 } 2279 2280 if (showTransactions) { 2281 kprintf(" transactions:\n"); 2282 kprintf("address id state blocks main sub\n"); 2283 2284 TransactionTable::Iterator iterator(cache->transaction_hash); 2285 2286 while (iterator.HasNext()) { 2287 cache_transaction* transaction = iterator.Next(); 2288 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" 2289 B_PRId32 "\n", transaction, transaction->id, 2290 transaction->open ? "open" : "closed", 2291 transaction->num_blocks, transaction->main_num_blocks, 2292 transaction->sub_num_blocks); 2293 } 2294 } 2295 2296 if (showBlocks) { 2297 kprintf(" blocks:\n"); 2298 kprintf("address block no. current original parent refs access " 2299 "flags transact prev. trans\n"); 2300 } 2301 2302 uint32 referenced = 0; 2303 uint32 count = 0; 2304 uint32 dirty = 0; 2305 uint32 discarded = 0; 2306 BlockTable::Iterator iterator(cache->hash); 2307 while (iterator.HasNext()) { 2308 cached_block* block = iterator.Next(); 2309 if (showBlocks) 2310 dump_block(block); 2311 2312 if (block->is_dirty) 2313 dirty++; 2314 if (block->discard) 2315 discarded++; 2316 if (block->ref_count) 2317 referenced++; 2318 count++; 2319 } 2320 2321 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 2322 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 2323 " in unused.\n", 2324 count, dirty, discarded, referenced, cache->busy_reading_count, 2325 cache->unused_block_count); 2326 return 0; 2327 } 2328 2329 2330 static int 2331 dump_transaction(int argc, char** argv) 2332 { 2333 bool showBlocks = false; 2334 int i = 1; 2335 if (argc > 1 && !strcmp(argv[1], "-b")) { 2336 showBlocks = true; 2337 i++; 2338 } 2339 2340 if (argc - i < 1 || argc - i > 2) { 2341 print_debugger_command_usage(argv[0]); 2342 return 0; 2343 } 2344 2345 cache_transaction* transaction = NULL; 2346 2347 if (argc - i == 1) { 2348 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]); 2349 } else { 2350 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]); 2351 int32 id = parse_expression(argv[i + 1]); 2352 transaction = lookup_transaction(cache, id); 2353 if (transaction == NULL) { 2354 kprintf("No transaction with ID %" B_PRId32 " found.\n", id); 2355 return 0; 2356 } 2357 } 2358 2359 kprintf("TRANSACTION %p\n", transaction); 2360 2361 kprintf(" id: %" B_PRId32 "\n", transaction->id); 2362 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks); 2363 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks); 2364 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks); 2365 kprintf(" has sub: %d\n", transaction->has_sub_transaction); 2366 kprintf(" state: %s\n", transaction->open ? "open" : "closed"); 2367 kprintf(" idle: %" B_PRId64 " secs\n", 2368 (system_time() - transaction->last_used) / 1000000); 2369 2370 kprintf(" listeners:\n"); 2371 2372 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 2373 while (iterator.HasNext()) { 2374 cache_listener* listener = iterator.Next(); 2375 2376 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending, 2377 listener->hook, listener->data); 2378 } 2379 2380 if (!showBlocks) 2381 return 0; 2382 2383 kprintf(" blocks:\n"); 2384 kprintf("address block no. current original parent refs access " 2385 "flags transact prev. trans\n"); 2386 2387 cached_block* block = transaction->first_block; 2388 while (block != NULL) { 2389 dump_block(block); 2390 block = block->transaction_next; 2391 } 2392 2393 kprintf("--\n"); 2394 2395 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 2396 while (blockIterator.HasNext()) { 2397 block = blockIterator.Next(); 2398 dump_block(block); 2399 } 2400 2401 return 0; 2402 } 2403 2404 2405 static int 2406 dump_caches(int argc, char** argv) 2407 { 2408 kprintf("Block caches:\n"); 2409 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator(); 2410 while (i.HasNext()) { 2411 block_cache* cache = i.Next(); 2412 if (cache == (block_cache*)&sMarkCache) 2413 continue; 2414 2415 kprintf(" %p\n", cache); 2416 } 2417 2418 return 0; 2419 } 2420 2421 2422 #if BLOCK_CACHE_BLOCK_TRACING >= 2 2423 static int 2424 dump_block_data(int argc, char** argv) 2425 { 2426 using namespace BlockTracing; 2427 2428 // Determine which blocks to show 2429 2430 bool printStackTrace = true; 2431 uint32 which = 0; 2432 int32 i = 1; 2433 while (i < argc && argv[i][0] == '-') { 2434 char* arg = &argv[i][1]; 2435 while (arg[0]) { 2436 switch (arg[0]) { 2437 case 'c': 2438 which |= BlockData::kCurrent; 2439 break; 2440 case 'p': 2441 which |= BlockData::kParent; 2442 break; 2443 case 'o': 2444 which |= BlockData::kOriginal; 2445 break; 2446 2447 default: 2448 kprintf("invalid block specifier (only o/c/p are " 2449 "allowed).\n"); 2450 return 0; 2451 } 2452 arg++; 2453 } 2454 2455 i++; 2456 } 2457 if (which == 0) 2458 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal; 2459 2460 if (i == argc) { 2461 print_debugger_command_usage(argv[0]); 2462 return 0; 2463 } 2464 2465 // Get the range of blocks to print 2466 2467 int64 from = parse_expression(argv[i]); 2468 int64 to = from; 2469 if (argc > i + 1) 2470 to = parse_expression(argv[i + 1]); 2471 if (to < from) 2472 to = from; 2473 2474 uint32 offset = 0; 2475 uint32 size = LONG_MAX; 2476 if (argc > i + 2) 2477 offset = parse_expression(argv[i + 2]); 2478 if (argc > i + 3) 2479 size = parse_expression(argv[i + 3]); 2480 2481 TraceEntryIterator iterator; 2482 iterator.MoveTo(from - 1); 2483 2484 static char sBuffer[1024]; 2485 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID); 2486 2487 while (TraceEntry* entry = iterator.Next()) { 2488 int32 index = iterator.Index(); 2489 if (index > to) 2490 break; 2491 2492 Action* action = dynamic_cast<Action*>(entry); 2493 if (action != NULL) { 2494 out.Clear(); 2495 out.DumpEntry(action); 2496 continue; 2497 } 2498 2499 BlockData* blockData = dynamic_cast<BlockData*>(entry); 2500 if (blockData == NULL) 2501 continue; 2502 2503 out.Clear(); 2504 2505 const char* dump = out.DumpEntry(entry); 2506 int length = strlen(dump); 2507 if (length > 0 && dump[length - 1] == '\n') 2508 length--; 2509 2510 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump); 2511 2512 if (printStackTrace) { 2513 out.Clear(); 2514 entry->DumpStackTrace(out); 2515 if (out.Size() > 0) 2516 kputs(out.Buffer()); 2517 } 2518 2519 blockData->DumpBlocks(which, offset, size); 2520 } 2521 2522 return 0; 2523 } 2524 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 2525 2526 2527 #endif // DEBUG_BLOCK_CACHE 2528 2529 2530 /*! Traverses through the block_cache list, and returns one cache after the 2531 other. The cache returned is automatically locked when you get it, and 2532 unlocked with the next call to this function. Ignores caches that are in 2533 deletion state. 2534 Returns \c NULL when the end of the list is reached. 2535 */ 2536 static block_cache* 2537 get_next_locked_block_cache(block_cache* last) 2538 { 2539 MutexLocker _(sCachesLock); 2540 2541 block_cache* cache; 2542 if (last != NULL) { 2543 mutex_unlock(&last->lock); 2544 2545 cache = sCaches.GetNext((block_cache*)&sMarkCache); 2546 sCaches.Remove((block_cache*)&sMarkCache); 2547 } else 2548 cache = sCaches.Head(); 2549 2550 if (cache != NULL) { 2551 mutex_lock(&cache->lock); 2552 sCaches.InsertBefore(sCaches.GetNext(cache), (block_cache*)&sMarkCache); 2553 } 2554 2555 return cache; 2556 } 2557 2558 2559 /*! Background thread that continuously checks for pending notifications of 2560 all caches. 2561 Every two seconds, it will also write back up to 64 blocks per cache. 2562 */ 2563 static status_t 2564 block_notifier_and_writer(void* /*data*/) 2565 { 2566 const bigtime_t kDefaultTimeout = 2000000LL; 2567 bigtime_t timeout = kDefaultTimeout; 2568 2569 while (true) { 2570 bigtime_t start = system_time(); 2571 2572 status_t status = acquire_sem_etc(sEventSemaphore, 1, 2573 B_RELATIVE_TIMEOUT, timeout); 2574 if (status == B_OK) { 2575 flush_pending_notifications(); 2576 timeout -= system_time() - start; 2577 continue; 2578 } 2579 2580 // Write 64 blocks of each block_cache roughly every 2 seconds, 2581 // potentially more or less depending on congestion and drive speeds 2582 // (usually much less.) We do not want to queue everything at once 2583 // because a future transaction might then get held up waiting for 2584 // a specific block to be written. 2585 timeout = kDefaultTimeout; 2586 size_t usedMemory; 2587 object_cache_get_usage(sBlockCache, &usedMemory); 2588 2589 block_cache* cache = NULL; 2590 while ((cache = get_next_locked_block_cache(cache)) != NULL) { 2591 // Give some breathing room: wait 2x the length of the potential 2592 // maximum block count-sized write between writes, and also skip 2593 // if there are more than 16 blocks currently being written. 2594 const bigtime_t next = cache->last_block_write 2595 + cache->last_block_write_duration * 2 * 64; 2596 if (cache->busy_writing_count > 16 || system_time() < next) { 2597 if (cache->last_block_write_duration > 0) { 2598 timeout = min_c(timeout, 2599 cache->last_block_write_duration * 2 * 64); 2600 } 2601 continue; 2602 } 2603 2604 BlockWriter writer(cache, 64); 2605 bool hasMoreBlocks = false; 2606 2607 size_t cacheUsedMemory; 2608 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory); 2609 usedMemory += cacheUsedMemory; 2610 2611 if (cache->num_dirty_blocks) { 2612 // This cache is not using transactions, we'll scan the blocks 2613 // directly 2614 BlockTable::Iterator iterator(cache->hash); 2615 2616 while (iterator.HasNext()) { 2617 cached_block* block = iterator.Next(); 2618 if (block->CanBeWritten() && !writer.Add(block)) { 2619 hasMoreBlocks = true; 2620 break; 2621 } 2622 } 2623 } else { 2624 TransactionTable::Iterator iterator(cache->transaction_hash); 2625 2626 while (iterator.HasNext()) { 2627 cache_transaction* transaction = iterator.Next(); 2628 if (transaction->open) { 2629 if (system_time() > transaction->last_used 2630 + kTransactionIdleTime) { 2631 // Transaction is open but idle 2632 notify_transaction_listeners(cache, transaction, 2633 TRANSACTION_IDLE); 2634 } 2635 continue; 2636 } 2637 2638 bool hasLeftOvers; 2639 // we ignore this one 2640 if (!writer.Add(transaction, hasLeftOvers)) { 2641 hasMoreBlocks = true; 2642 break; 2643 } 2644 } 2645 } 2646 2647 writer.Write(); 2648 2649 if (hasMoreBlocks && cache->last_block_write_duration > 0) { 2650 // There are probably still more blocks that we could write, so 2651 // see if we can decrease the timeout. 2652 timeout = min_c(timeout, 2653 cache->last_block_write_duration * 2 * 64); 2654 } 2655 2656 if ((block_cache_used_memory() / B_PAGE_SIZE) 2657 > vm_page_num_pages() / 2) { 2658 // Try to reduce memory usage to half of the available 2659 // RAM at maximum 2660 cache->RemoveUnusedBlocks(1000, 10); 2661 } 2662 } 2663 2664 MutexLocker _(sCachesMemoryUseLock); 2665 sUsedMemory = usedMemory; 2666 } 2667 2668 // never can get here 2669 return B_OK; 2670 } 2671 2672 2673 /*! Notify function for wait_for_notifications(). */ 2674 static void 2675 notify_sync(int32 transactionID, int32 event, void* _cache) 2676 { 2677 block_cache* cache = (block_cache*)_cache; 2678 2679 cache->condition_variable.NotifyOne(); 2680 } 2681 2682 2683 /*! Must be called with the sCachesLock held. */ 2684 static bool 2685 is_valid_cache(block_cache* cache) 2686 { 2687 ASSERT_LOCKED_MUTEX(&sCachesLock); 2688 2689 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 2690 while (iterator.HasNext()) { 2691 if (cache == iterator.Next()) 2692 return true; 2693 } 2694 2695 return false; 2696 } 2697 2698 2699 /*! Waits until all pending notifications are carried out. 2700 Safe to be called from the block writer/notifier thread. 2701 You must not hold the \a cache lock when calling this function. 2702 */ 2703 static void 2704 wait_for_notifications(block_cache* cache) 2705 { 2706 MutexLocker locker(sCachesLock); 2707 2708 if (find_thread(NULL) == sNotifierWriterThread) { 2709 // We're the notifier thread, don't wait, but flush all pending 2710 // notifications directly. 2711 if (is_valid_cache(cache)) 2712 flush_pending_notifications(cache); 2713 return; 2714 } 2715 2716 // add sync notification 2717 cache_notification notification; 2718 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync, 2719 cache); 2720 2721 ConditionVariableEntry entry; 2722 cache->condition_variable.Add(&entry); 2723 2724 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false); 2725 locker.Unlock(); 2726 2727 // wait for notification hook to be called 2728 entry.Wait(); 2729 } 2730 2731 2732 status_t 2733 block_cache_init(void) 2734 { 2735 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block), 2736 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 2737 if (sBlockCache == NULL) 2738 return B_NO_MEMORY; 2739 2740 sCacheNotificationCache = create_object_cache("cache notifications", 2741 sizeof(cache_listener), 8, NULL, NULL, NULL); 2742 if (sCacheNotificationCache == NULL) 2743 return B_NO_MEMORY; 2744 2745 new (&sCaches) DoublyLinkedList<block_cache>; 2746 // manually call constructor 2747 2748 sEventSemaphore = create_sem(0, "block cache event"); 2749 if (sEventSemaphore < B_OK) 2750 return sEventSemaphore; 2751 2752 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer, 2753 "block notifier/writer", B_LOW_PRIORITY, NULL); 2754 if (sNotifierWriterThread >= B_OK) 2755 resume_thread(sNotifierWriterThread); 2756 2757 #if DEBUG_BLOCK_CACHE 2758 add_debugger_command_etc("block_caches", &dump_caches, 2759 "dumps all block caches", "\n", 0); 2760 add_debugger_command_etc("block_cache", &dump_cache, 2761 "dumps a specific block cache", 2762 "[-bt] <cache-address> [block-number]\n" 2763 " -t lists the transactions\n" 2764 " -b lists all blocks\n", 0); 2765 add_debugger_command("cached_block", &dump_cached_block, 2766 "dumps the specified cached block"); 2767 add_debugger_command_etc("transaction", &dump_transaction, 2768 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n" 2769 "Either use a block cache pointer and an ID or a pointer to the transaction.\n" 2770 " -b lists all blocks that are part of this transaction\n", 0); 2771 # if BLOCK_CACHE_BLOCK_TRACING >= 2 2772 add_debugger_command_etc("block_cache_data", &dump_block_data, 2773 "dumps the data blocks logged for the actions", 2774 "[-cpo] <from> [<to> [<offset> [<size>]]]\n" 2775 "If no data specifier is used, all blocks are shown by default.\n" 2776 " -c the current data is shown, if available.\n" 2777 " -p the parent data is shown, if available.\n" 2778 " -o the original data is shown, if available.\n" 2779 " <from> first index of tracing entries to show.\n" 2780 " <to> if given, the last entry. If not, only <from> is shown.\n" 2781 " <offset> the offset of the block data.\n" 2782 " <from> the size of the block data that is dumped\n", 0); 2783 # endif 2784 #endif // DEBUG_BLOCK_CACHE 2785 2786 return B_OK; 2787 } 2788 2789 2790 size_t 2791 block_cache_used_memory(void) 2792 { 2793 MutexLocker _(sCachesMemoryUseLock); 2794 return sUsedMemory; 2795 } 2796 2797 2798 // #pragma mark - public transaction API 2799 2800 2801 int32 2802 cache_start_transaction(void* _cache) 2803 { 2804 block_cache* cache = (block_cache*)_cache; 2805 TransactionLocker locker(cache); 2806 2807 if (cache->last_transaction && cache->last_transaction->open) { 2808 panic("last transaction (%" B_PRId32 ") still open!\n", 2809 cache->last_transaction->id); 2810 } 2811 2812 cache_transaction* transaction = new(std::nothrow) cache_transaction; 2813 if (transaction == NULL) 2814 return B_NO_MEMORY; 2815 2816 transaction->id = atomic_add(&cache->next_transaction_id, 1); 2817 cache->last_transaction = transaction; 2818 2819 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id)); 2820 T(Action("start", cache, transaction)); 2821 2822 cache->transaction_hash->Insert(transaction); 2823 2824 return transaction->id; 2825 } 2826 2827 2828 status_t 2829 cache_sync_transaction(void* _cache, int32 id) 2830 { 2831 block_cache* cache = (block_cache*)_cache; 2832 bool hadBusy; 2833 2834 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id)); 2835 2836 do { 2837 TransactionLocker locker(cache); 2838 hadBusy = false; 2839 2840 BlockWriter writer(cache); 2841 TransactionTable::Iterator iterator(cache->transaction_hash); 2842 2843 while (iterator.HasNext()) { 2844 // close all earlier transactions which haven't been closed yet 2845 cache_transaction* transaction = iterator.Next(); 2846 2847 if (transaction->busy_writing_count != 0) { 2848 hadBusy = true; 2849 continue; 2850 } 2851 if (transaction->id <= id && !transaction->open) { 2852 // write back all of their remaining dirty blocks 2853 T(Action("sync", cache, transaction)); 2854 2855 bool hasLeftOvers; 2856 writer.Add(transaction, hasLeftOvers); 2857 2858 if (hasLeftOvers) { 2859 // This transaction contains blocks that a previous 2860 // transaction is trying to write back in this write run 2861 hadBusy = true; 2862 } 2863 } 2864 } 2865 2866 status_t status = writer.Write(); 2867 if (status != B_OK) 2868 return status; 2869 } while (hadBusy); 2870 2871 wait_for_notifications(cache); 2872 // make sure that all pending TRANSACTION_WRITTEN notifications 2873 // are handled after we return 2874 return B_OK; 2875 } 2876 2877 2878 status_t 2879 cache_end_transaction(void* _cache, int32 id, 2880 transaction_notification_hook hook, void* data) 2881 { 2882 block_cache* cache = (block_cache*)_cache; 2883 TransactionLocker locker(cache); 2884 2885 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id)); 2886 2887 cache_transaction* transaction = lookup_transaction(cache, id); 2888 if (transaction == NULL) { 2889 panic("cache_end_transaction(): invalid transaction ID\n"); 2890 return B_BAD_VALUE; 2891 } 2892 2893 // Write back all pending transaction blocks 2894 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2895 if (status != B_OK) 2896 return status; 2897 2898 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2899 2900 if (hook != NULL 2901 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, 2902 hook, data) != B_OK) { 2903 return B_NO_MEMORY; 2904 } 2905 2906 T(Action("end", cache, transaction)); 2907 2908 // iterate through all blocks and free the unchanged original contents 2909 2910 cached_block* next; 2911 for (cached_block* block = transaction->first_block; block != NULL; 2912 block = next) { 2913 next = block->transaction_next; 2914 ASSERT(block->previous_transaction == NULL); 2915 2916 if (block->discard) { 2917 // This block has been discarded in the transaction 2918 cache->DiscardBlock(block); 2919 transaction->num_blocks--; 2920 continue; 2921 } 2922 2923 if (block->original_data != NULL) { 2924 cache->Free(block->original_data); 2925 block->original_data = NULL; 2926 } 2927 if (block->parent_data != NULL) { 2928 ASSERT(transaction->has_sub_transaction); 2929 cache->FreeBlockParentData(block); 2930 } 2931 2932 // move the block to the previous transaction list 2933 transaction->blocks.Add(block); 2934 2935 block->previous_transaction = transaction; 2936 block->transaction_next = NULL; 2937 block->transaction = NULL; 2938 } 2939 2940 transaction->open = false; 2941 return B_OK; 2942 } 2943 2944 2945 status_t 2946 cache_abort_transaction(void* _cache, int32 id) 2947 { 2948 block_cache* cache = (block_cache*)_cache; 2949 TransactionLocker locker(cache); 2950 2951 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id)); 2952 2953 cache_transaction* transaction = lookup_transaction(cache, id); 2954 if (transaction == NULL) { 2955 panic("cache_abort_transaction(): invalid transaction ID\n"); 2956 return B_BAD_VALUE; 2957 } 2958 2959 T(Abort(cache, transaction)); 2960 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 2961 2962 // iterate through all blocks and restore their original contents 2963 2964 cached_block* block = transaction->first_block; 2965 cached_block* next; 2966 for (; block != NULL; block = next) { 2967 next = block->transaction_next; 2968 2969 if (block->original_data != NULL) { 2970 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of " 2971 "block %" B_PRIdOFF "\n", transaction->id, block->block_number)); 2972 memcpy(block->current_data, block->original_data, 2973 cache->block_size); 2974 cache->Free(block->original_data); 2975 block->original_data = NULL; 2976 } 2977 if (transaction->has_sub_transaction && block->parent_data != NULL) 2978 cache->FreeBlockParentData(block); 2979 2980 block->transaction_next = NULL; 2981 block->transaction = NULL; 2982 block->discard = false; 2983 if (block->previous_transaction == NULL) 2984 block->is_dirty = false; 2985 } 2986 2987 cache->transaction_hash->Remove(transaction); 2988 delete_transaction(cache, transaction); 2989 return B_OK; 2990 } 2991 2992 2993 /*! Acknowledges the current parent transaction, and starts a new transaction 2994 from its sub transaction. 2995 The new transaction also gets a new transaction ID. 2996 */ 2997 int32 2998 cache_detach_sub_transaction(void* _cache, int32 id, 2999 transaction_notification_hook hook, void* data) 3000 { 3001 block_cache* cache = (block_cache*)_cache; 3002 TransactionLocker locker(cache); 3003 3004 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id)); 3005 3006 cache_transaction* transaction = lookup_transaction(cache, id); 3007 if (transaction == NULL) { 3008 panic("cache_detach_sub_transaction(): invalid transaction ID\n"); 3009 return B_BAD_VALUE; 3010 } 3011 if (!transaction->has_sub_transaction) 3012 return B_BAD_VALUE; 3013 3014 // iterate through all blocks and free the unchanged original contents 3015 3016 status_t status = write_blocks_in_previous_transaction(cache, transaction); 3017 if (status != B_OK) 3018 return status; 3019 3020 // create a new transaction for the sub transaction 3021 cache_transaction* newTransaction = new(std::nothrow) cache_transaction; 3022 if (newTransaction == NULL) 3023 return B_NO_MEMORY; 3024 3025 newTransaction->id = atomic_add(&cache->next_transaction_id, 1); 3026 T(Detach(cache, transaction, newTransaction)); 3027 3028 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3029 3030 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook, 3031 data) != B_OK) { 3032 delete newTransaction; 3033 return B_NO_MEMORY; 3034 } 3035 3036 cached_block* last = NULL; 3037 cached_block* next; 3038 for (cached_block* block = transaction->first_block; block != NULL; 3039 block = next) { 3040 next = block->transaction_next; 3041 ASSERT(block->previous_transaction == NULL); 3042 3043 if (block->discard) { 3044 cache->DiscardBlock(block); 3045 transaction->main_num_blocks--; 3046 continue; 3047 } 3048 3049 if (block->parent_data != NULL) { 3050 // The block changed in the parent - free the original data, since 3051 // they will be replaced by what is in current. 3052 ASSERT(block->original_data != NULL); 3053 cache->Free(block->original_data); 3054 3055 if (block->parent_data != block->current_data) { 3056 // The block had been changed in both transactions 3057 block->original_data = block->parent_data; 3058 } else { 3059 // The block has only been changed in the parent 3060 block->original_data = NULL; 3061 } 3062 block->parent_data = NULL; 3063 3064 // move the block to the previous transaction list 3065 transaction->blocks.Add(block); 3066 block->previous_transaction = transaction; 3067 } 3068 3069 if (block->original_data != NULL) { 3070 // This block had been changed in the current sub transaction, 3071 // we need to move this block over to the new transaction. 3072 ASSERT(block->parent_data == NULL); 3073 3074 if (last == NULL) 3075 newTransaction->first_block = block; 3076 else 3077 last->transaction_next = block; 3078 3079 block->transaction = newTransaction; 3080 last = block; 3081 } else 3082 block->transaction = NULL; 3083 3084 block->transaction_next = NULL; 3085 } 3086 3087 newTransaction->num_blocks = transaction->sub_num_blocks; 3088 3089 transaction->open = false; 3090 transaction->has_sub_transaction = false; 3091 transaction->num_blocks = transaction->main_num_blocks; 3092 transaction->sub_num_blocks = 0; 3093 3094 cache->transaction_hash->Insert(newTransaction); 3095 cache->last_transaction = newTransaction; 3096 3097 return newTransaction->id; 3098 } 3099 3100 3101 status_t 3102 cache_abort_sub_transaction(void* _cache, int32 id) 3103 { 3104 block_cache* cache = (block_cache*)_cache; 3105 TransactionLocker locker(cache); 3106 3107 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id)); 3108 3109 cache_transaction* transaction = lookup_transaction(cache, id); 3110 if (transaction == NULL) { 3111 panic("cache_abort_sub_transaction(): invalid transaction ID\n"); 3112 return B_BAD_VALUE; 3113 } 3114 if (!transaction->has_sub_transaction) 3115 return B_BAD_VALUE; 3116 3117 T(Abort(cache, transaction)); 3118 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3119 3120 // revert all changes back to the version of the parent 3121 3122 cached_block* block = transaction->first_block; 3123 cached_block* last = NULL; 3124 cached_block* next; 3125 for (; block != NULL; block = next) { 3126 next = block->transaction_next; 3127 3128 if (block->parent_data == NULL) { 3129 // The parent transaction didn't change the block, but the sub 3130 // transaction did - we need to revert to the original data. 3131 // The block is no longer part of the transaction 3132 if (block->original_data != NULL) { 3133 // The block might not have original data if was empty 3134 memcpy(block->current_data, block->original_data, 3135 cache->block_size); 3136 } 3137 3138 if (last != NULL) 3139 last->transaction_next = next; 3140 else 3141 transaction->first_block = next; 3142 3143 block->transaction_next = NULL; 3144 block->transaction = NULL; 3145 transaction->num_blocks--; 3146 3147 if (block->previous_transaction == NULL) { 3148 cache->Free(block->original_data); 3149 block->original_data = NULL; 3150 block->is_dirty = false; 3151 3152 if (block->ref_count == 0) { 3153 // Move the block into the unused list if possible 3154 block->unused = true; 3155 cache->unused_blocks.Add(block); 3156 cache->unused_block_count++; 3157 } 3158 } 3159 } else { 3160 if (block->parent_data != block->current_data) { 3161 // The block has been changed and must be restored - the block 3162 // is still dirty and part of the transaction 3163 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): " 3164 "restored contents of block %" B_PRIdOFF "\n", 3165 transaction->id, block->block_number)); 3166 memcpy(block->current_data, block->parent_data, 3167 cache->block_size); 3168 cache->Free(block->parent_data); 3169 // The block stays dirty 3170 } 3171 block->parent_data = NULL; 3172 last = block; 3173 } 3174 3175 block->discard = false; 3176 } 3177 3178 // all subsequent changes will go into the main transaction 3179 transaction->has_sub_transaction = false; 3180 transaction->sub_num_blocks = 0; 3181 3182 return B_OK; 3183 } 3184 3185 3186 status_t 3187 cache_start_sub_transaction(void* _cache, int32 id) 3188 { 3189 block_cache* cache = (block_cache*)_cache; 3190 TransactionLocker locker(cache); 3191 3192 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id)); 3193 3194 cache_transaction* transaction = lookup_transaction(cache, id); 3195 if (transaction == NULL) { 3196 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n", 3197 id); 3198 return B_BAD_VALUE; 3199 } 3200 3201 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3202 3203 // move all changed blocks up to the parent 3204 3205 cached_block* block = transaction->first_block; 3206 cached_block* next; 3207 for (; block != NULL; block = next) { 3208 next = block->transaction_next; 3209 3210 if (block->parent_data != NULL) { 3211 // There already is an older sub transaction - we acknowledge 3212 // its changes and move its blocks up to the parent 3213 ASSERT(transaction->has_sub_transaction); 3214 cache->FreeBlockParentData(block); 3215 } 3216 if (block->discard) { 3217 // This block has been discarded in the parent transaction. 3218 // Just throw away any changes made in this transaction, so that 3219 // it can still be reverted to its original contents if needed 3220 ASSERT(block->previous_transaction == NULL); 3221 if (block->original_data != NULL) { 3222 memcpy(block->current_data, block->original_data, 3223 cache->block_size); 3224 3225 cache->Free(block->original_data); 3226 block->original_data = NULL; 3227 } 3228 continue; 3229 } 3230 3231 // we "allocate" the parent data lazily, that means, we don't copy 3232 // the data (and allocate memory for it) until we need to 3233 block->parent_data = block->current_data; 3234 } 3235 3236 // all subsequent changes will go into the sub transaction 3237 transaction->has_sub_transaction = true; 3238 transaction->main_num_blocks = transaction->num_blocks; 3239 transaction->sub_num_blocks = 0; 3240 T(Action("start-sub", cache, transaction)); 3241 3242 return B_OK; 3243 } 3244 3245 3246 /*! Adds a transaction listener that gets notified when the transaction 3247 is ended, aborted, written, or idle as specified by \a events. 3248 The listener gets automatically removed when the transaction ends. 3249 */ 3250 status_t 3251 cache_add_transaction_listener(void* _cache, int32 id, int32 events, 3252 transaction_notification_hook hook, void* data) 3253 { 3254 block_cache* cache = (block_cache*)_cache; 3255 TransactionLocker locker(cache); 3256 3257 cache_transaction* transaction = lookup_transaction(cache, id); 3258 if (transaction == NULL) 3259 return B_BAD_VALUE; 3260 3261 return add_transaction_listener(cache, transaction, events, hook, data); 3262 } 3263 3264 3265 status_t 3266 cache_remove_transaction_listener(void* _cache, int32 id, 3267 transaction_notification_hook hookFunction, void* data) 3268 { 3269 block_cache* cache = (block_cache*)_cache; 3270 TransactionLocker locker(cache); 3271 3272 cache_transaction* transaction = lookup_transaction(cache, id); 3273 if (transaction == NULL) 3274 return B_BAD_VALUE; 3275 3276 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 3277 while (iterator.HasNext()) { 3278 cache_listener* listener = iterator.Next(); 3279 if (listener->data == data && listener->hook == hookFunction) { 3280 iterator.Remove(); 3281 3282 if (listener->events_pending != 0) { 3283 MutexLocker _(sNotificationsLock); 3284 if (listener->events_pending != 0) 3285 cache->pending_notifications.Remove(listener); 3286 } 3287 delete listener; 3288 return B_OK; 3289 } 3290 } 3291 3292 return B_ENTRY_NOT_FOUND; 3293 } 3294 3295 3296 status_t 3297 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly, 3298 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData) 3299 { 3300 cached_block* block = (cached_block*)*_cookie; 3301 block_cache* cache = (block_cache*)_cache; 3302 TransactionLocker locker(cache); 3303 3304 cache_transaction* transaction = lookup_transaction(cache, id); 3305 if (transaction == NULL || !transaction->open) 3306 return B_BAD_VALUE; 3307 3308 if (block == NULL) 3309 block = transaction->first_block; 3310 else 3311 block = block->transaction_next; 3312 3313 if (transaction->has_sub_transaction) { 3314 if (mainOnly) { 3315 // find next block that the parent changed 3316 while (block != NULL && block->parent_data == NULL) 3317 block = block->transaction_next; 3318 } else { 3319 // find next non-discarded block 3320 while (block != NULL && block->discard) 3321 block = block->transaction_next; 3322 } 3323 } 3324 3325 if (block == NULL) 3326 return B_ENTRY_NOT_FOUND; 3327 3328 if (_blockNumber) 3329 *_blockNumber = block->block_number; 3330 if (_data) 3331 *_data = mainOnly ? block->parent_data : block->current_data; 3332 if (_unchangedData) 3333 *_unchangedData = block->original_data; 3334 3335 *_cookie = (addr_t)block; 3336 return B_OK; 3337 } 3338 3339 3340 int32 3341 cache_blocks_in_transaction(void* _cache, int32 id) 3342 { 3343 block_cache* cache = (block_cache*)_cache; 3344 TransactionLocker locker(cache); 3345 3346 cache_transaction* transaction = lookup_transaction(cache, id); 3347 if (transaction == NULL) 3348 return B_BAD_VALUE; 3349 3350 return transaction->num_blocks; 3351 } 3352 3353 3354 /*! Returns the number of blocks that are part of the main transaction. If this 3355 transaction does not have a sub transaction yet, this is the same value as 3356 cache_blocks_in_transaction() would return. 3357 */ 3358 int32 3359 cache_blocks_in_main_transaction(void* _cache, int32 id) 3360 { 3361 block_cache* cache = (block_cache*)_cache; 3362 TransactionLocker locker(cache); 3363 3364 cache_transaction* transaction = lookup_transaction(cache, id); 3365 if (transaction == NULL) 3366 return B_BAD_VALUE; 3367 3368 if (transaction->has_sub_transaction) 3369 return transaction->main_num_blocks; 3370 3371 return transaction->num_blocks; 3372 } 3373 3374 3375 int32 3376 cache_blocks_in_sub_transaction(void* _cache, int32 id) 3377 { 3378 block_cache* cache = (block_cache*)_cache; 3379 TransactionLocker locker(cache); 3380 3381 cache_transaction* transaction = lookup_transaction(cache, id); 3382 if (transaction == NULL) 3383 return B_BAD_VALUE; 3384 3385 return transaction->sub_num_blocks; 3386 } 3387 3388 3389 /*! Check if block is in transaction 3390 */ 3391 bool 3392 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber) 3393 { 3394 block_cache* cache = (block_cache*)_cache; 3395 TransactionLocker locker(cache); 3396 3397 cached_block* block = cache->hash->Lookup(blockNumber); 3398 3399 return (block != NULL && block->transaction != NULL 3400 && block->transaction->id == id); 3401 } 3402 3403 3404 // #pragma mark - public block cache API 3405 3406 3407 void 3408 block_cache_delete(void* _cache, bool allowWrites) 3409 { 3410 block_cache* cache = (block_cache*)_cache; 3411 3412 if (allowWrites) 3413 block_cache_sync(cache); 3414 3415 mutex_lock(&sCachesLock); 3416 sCaches.Remove(cache); 3417 mutex_unlock(&sCachesLock); 3418 3419 mutex_lock(&cache->lock); 3420 3421 // wait for all blocks to become unbusy 3422 wait_for_busy_reading_blocks(cache); 3423 wait_for_busy_writing_blocks(cache); 3424 3425 // free all blocks 3426 3427 cached_block* block = cache->hash->Clear(true); 3428 while (block != NULL) { 3429 cached_block* next = block->next; 3430 cache->FreeBlock(block); 3431 block = next; 3432 } 3433 3434 // free all transactions (they will all be aborted) 3435 3436 cache_transaction* transaction = cache->transaction_hash->Clear(true); 3437 while (transaction != NULL) { 3438 cache_transaction* next = transaction->next; 3439 delete transaction; 3440 transaction = next; 3441 } 3442 3443 delete cache; 3444 } 3445 3446 3447 void* 3448 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly) 3449 { 3450 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize, 3451 readOnly); 3452 if (cache == NULL) 3453 return NULL; 3454 3455 if (cache->Init() != B_OK) { 3456 delete cache; 3457 return NULL; 3458 } 3459 3460 MutexLocker _(sCachesLock); 3461 sCaches.Add(cache); 3462 3463 return cache; 3464 } 3465 3466 3467 status_t 3468 block_cache_sync(void* _cache) 3469 { 3470 block_cache* cache = (block_cache*)_cache; 3471 3472 // We will sync all dirty blocks to disk that have a completed 3473 // transaction or no transaction only 3474 3475 MutexLocker locker(&cache->lock); 3476 3477 BlockWriter writer(cache); 3478 BlockTable::Iterator iterator(cache->hash); 3479 3480 while (iterator.HasNext()) { 3481 cached_block* block = iterator.Next(); 3482 if (block->CanBeWritten()) 3483 writer.Add(block); 3484 } 3485 3486 status_t status = writer.Write(); 3487 3488 locker.Unlock(); 3489 3490 wait_for_notifications(cache); 3491 // make sure that all pending TRANSACTION_WRITTEN notifications 3492 // are handled after we return 3493 return status; 3494 } 3495 3496 3497 status_t 3498 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks) 3499 { 3500 block_cache* cache = (block_cache*)_cache; 3501 3502 // We will sync all dirty blocks to disk that have a completed 3503 // transaction or no transaction only 3504 3505 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 3506 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF 3507 " (max %" B_PRIdOFF ")", 3508 blockNumber, cache->max_blocks - 1); 3509 return B_BAD_VALUE; 3510 } 3511 3512 MutexLocker locker(&cache->lock); 3513 BlockWriter writer(cache); 3514 3515 for (; numBlocks > 0; numBlocks--, blockNumber++) { 3516 cached_block* block = cache->hash->Lookup(blockNumber); 3517 if (block == NULL) 3518 continue; 3519 3520 if (block->CanBeWritten()) 3521 writer.Add(block); 3522 } 3523 3524 status_t status = writer.Write(); 3525 3526 locker.Unlock(); 3527 3528 wait_for_notifications(cache); 3529 // make sure that all pending TRANSACTION_WRITTEN notifications 3530 // are handled after we return 3531 return status; 3532 } 3533 3534 3535 /*! Discards a block from the current transaction or from the cache. 3536 You have to call this function when you no longer use a block, ie. when it 3537 might be reclaimed by the file cache in order to make sure they won't 3538 interfere. 3539 */ 3540 void 3541 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks) 3542 { 3543 // TODO: this could be a nice place to issue the ATA trim command 3544 block_cache* cache = (block_cache*)_cache; 3545 TransactionLocker locker(cache); 3546 3547 BlockWriter writer(cache); 3548 3549 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3550 cached_block* block = cache->hash->Lookup(blockNumber); 3551 if (block != NULL && block->previous_transaction != NULL) 3552 writer.Add(block); 3553 } 3554 3555 writer.Write(); 3556 // TODO: this can fail, too! 3557 3558 blockNumber -= numBlocks; 3559 // reset blockNumber to its original value 3560 3561 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3562 cached_block* block = cache->hash->Lookup(blockNumber); 3563 if (block == NULL) 3564 continue; 3565 3566 ASSERT(block->previous_transaction == NULL); 3567 3568 if (block->unused) { 3569 cache->unused_blocks.Remove(block); 3570 cache->unused_block_count--; 3571 cache->RemoveBlock(block); 3572 } else { 3573 if (block->transaction != NULL && block->parent_data != NULL 3574 && block->parent_data != block->current_data) { 3575 panic("Discarded block %" B_PRIdOFF " has already been changed in this " 3576 "transaction!", blockNumber); 3577 } 3578 3579 // mark it as discarded (in the current transaction only, if any) 3580 block->discard = true; 3581 } 3582 } 3583 } 3584 3585 3586 status_t 3587 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction) 3588 { 3589 block_cache* cache = (block_cache*)_cache; 3590 MutexLocker locker(&cache->lock); 3591 3592 if (cache->read_only) { 3593 panic("tried to make block writable on a read-only cache!"); 3594 return B_ERROR; 3595 } 3596 3597 // TODO: this can be done better! 3598 void* block; 3599 status_t status = get_writable_cached_block(cache, blockNumber, 3600 blockNumber, 1, transaction, false, &block); 3601 if (status == B_OK) { 3602 put_cached_block((block_cache*)_cache, blockNumber); 3603 return B_OK; 3604 } 3605 3606 return status; 3607 } 3608 3609 3610 status_t 3611 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base, 3612 off_t length, int32 transaction, void** _block) 3613 { 3614 block_cache* cache = (block_cache*)_cache; 3615 MutexLocker locker(&cache->lock); 3616 3617 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3618 blockNumber, transaction)); 3619 if (cache->read_only) 3620 panic("tried to get writable block on a read-only cache!"); 3621 3622 return get_writable_cached_block(cache, blockNumber, base, length, 3623 transaction, false, _block); 3624 } 3625 3626 3627 void* 3628 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction) 3629 { 3630 void* block; 3631 if (block_cache_get_writable_etc(_cache, blockNumber, 3632 blockNumber, 1, transaction, &block) == B_OK) 3633 return block; 3634 3635 return NULL; 3636 } 3637 3638 3639 void* 3640 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction) 3641 { 3642 block_cache* cache = (block_cache*)_cache; 3643 MutexLocker locker(&cache->lock); 3644 3645 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3646 blockNumber, transaction)); 3647 if (cache->read_only) 3648 panic("tried to get empty writable block on a read-only cache!"); 3649 3650 void* block; 3651 if (get_writable_cached_block((block_cache*)_cache, blockNumber, 3652 blockNumber, 1, transaction, true, &block) == B_OK) 3653 return block; 3654 3655 return NULL; 3656 } 3657 3658 3659 status_t 3660 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length, 3661 const void** _block) 3662 { 3663 block_cache* cache = (block_cache*)_cache; 3664 MutexLocker locker(&cache->lock); 3665 bool allocated; 3666 3667 cached_block* block; 3668 status_t status = get_cached_block(cache, blockNumber, &allocated, true, 3669 &block); 3670 if (status != B_OK) 3671 return status; 3672 3673 #if BLOCK_CACHE_DEBUG_CHANGED 3674 if (block->compare == NULL) 3675 block->compare = cache->Allocate(); 3676 if (block->compare != NULL) 3677 memcpy(block->compare, block->current_data, cache->block_size); 3678 #endif 3679 TB(Get(cache, block)); 3680 3681 *_block = block->current_data; 3682 return B_OK; 3683 } 3684 3685 3686 const void* 3687 block_cache_get(void* _cache, off_t blockNumber) 3688 { 3689 const void* block; 3690 if (block_cache_get_etc(_cache, blockNumber, blockNumber, 1, &block) 3691 == B_OK) 3692 return block; 3693 3694 return NULL; 3695 } 3696 3697 3698 /*! Changes the internal status of a writable block to \a dirty. This can be 3699 helpful in case you realize you don't need to change that block anymore 3700 for whatever reason. 3701 3702 Note, you must only use this function on blocks that were acquired 3703 writable! 3704 */ 3705 status_t 3706 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty, 3707 int32 transaction) 3708 { 3709 block_cache* cache = (block_cache*)_cache; 3710 MutexLocker locker(&cache->lock); 3711 3712 cached_block* block = cache->hash->Lookup(blockNumber); 3713 if (block == NULL) 3714 return B_BAD_VALUE; 3715 if (block->is_dirty == dirty) { 3716 // there is nothing to do for us 3717 return B_OK; 3718 } 3719 3720 // TODO: not yet implemented 3721 if (dirty) 3722 panic("block_cache_set_dirty(): not yet implemented that way!\n"); 3723 3724 return B_OK; 3725 } 3726 3727 3728 void 3729 block_cache_put(void* _cache, off_t blockNumber) 3730 { 3731 block_cache* cache = (block_cache*)_cache; 3732 MutexLocker locker(&cache->lock); 3733 3734 put_cached_block(cache, blockNumber); 3735 } 3736 3737