1 /* 2 * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <block_cache.h> 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <KernelExport.h> 15 #include <fs_cache.h> 16 17 #include <condition_variable.h> 18 #include <lock.h> 19 #include <low_resource_manager.h> 20 #include <slab/Slab.h> 21 #include <tracing.h> 22 #include <util/kernel_cpp.h> 23 #include <util/DoublyLinkedList.h> 24 #include <util/AutoLock.h> 25 #include <vm/vm_page.h> 26 27 #include "kernel_debug_config.h" 28 29 30 // TODO: this is a naive but growing implementation to test the API: 31 // block reading/writing is not at all optimized for speed, it will 32 // just read and write single blocks. 33 // TODO: the retrieval/copy of the original data could be delayed until the 34 // new data must be written, ie. in low memory situations. 35 36 //#define TRACE_BLOCK_CACHE 37 #ifdef TRACE_BLOCK_CACHE 38 # define TRACE(x) dprintf x 39 #else 40 # define TRACE(x) ; 41 #endif 42 43 #define TRACE_ALWAYS(x) dprintf x 44 45 // This macro is used for fatal situations that are acceptable in a running 46 // system, like out of memory situations - should only panic for debugging. 47 #define FATAL(x) panic x 48 49 static const bigtime_t kTransactionIdleTime = 2000000LL; 50 // a transaction is considered idle after 2 seconds of inactivity 51 52 53 namespace { 54 55 struct cache_transaction; 56 struct cached_block; 57 struct block_cache; 58 typedef DoublyLinkedListLink<cached_block> block_link; 59 60 struct cached_block { 61 cached_block* next; // next in hash 62 cached_block* transaction_next; 63 block_link link; 64 off_t block_number; 65 void* current_data; 66 // The data that is seen by everyone using the API; this one is always 67 // present. 68 void* original_data; 69 // When in a transaction, this contains the original data from before 70 // the transaction. 71 void* parent_data; 72 // This is a lazily alloced buffer that represents the contents of the 73 // block in the parent transaction. It may point to current_data if the 74 // contents have been changed only in the parent transaction, or, if the 75 // block has been changed in the current sub transaction already, to a 76 // new block containing the contents changed in the parent transaction. 77 // If this is NULL, the block has not been changed in the parent 78 // transaction at all. 79 #if BLOCK_CACHE_DEBUG_CHANGED 80 void* compare; 81 #endif 82 int32 ref_count; 83 int32 last_accessed; 84 bool busy_reading : 1; 85 bool busy_writing : 1; 86 bool is_writing : 1; 87 // Block has been checked out for writing without transactions, and 88 // cannot be written back if set 89 bool is_dirty : 1; 90 bool unused : 1; 91 bool discard : 1; 92 bool busy_reading_waiters : 1; 93 bool busy_writing_waiters : 1; 94 cache_transaction* transaction; 95 // This is the current active transaction, if any, the block is 96 // currently in (meaning was changed as a part of it). 97 cache_transaction* previous_transaction; 98 // This is set to the last transaction that was ended containing this 99 // block. In this case, the block has not yet written back yet, and 100 // the changed data is either in current_data, or original_data -- the 101 // latter if the block is already being part of another transaction. 102 // There can only be one previous transaction, so when the active 103 // transaction ends, the changes of the previous transaction have to 104 // be written back before that transaction becomes the next previous 105 // transaction. 106 107 bool CanBeWritten() const; 108 int32 LastAccess() const 109 { return system_time() / 1000000L - last_accessed; } 110 }; 111 112 typedef DoublyLinkedList<cached_block, 113 DoublyLinkedListMemberGetLink<cached_block, 114 &cached_block::link> > block_list; 115 116 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> { 117 int32 transaction_id; 118 int32 events_pending; 119 int32 events; 120 transaction_notification_hook hook; 121 void* data; 122 bool delete_after_event; 123 }; 124 125 typedef DoublyLinkedList<cache_notification> NotificationList; 126 127 struct BlockHash { 128 typedef off_t KeyType; 129 typedef cached_block ValueType; 130 131 size_t HashKey(KeyType key) const 132 { 133 return key; 134 } 135 136 size_t Hash(ValueType* block) const 137 { 138 return block->block_number; 139 } 140 141 bool Compare(KeyType key, ValueType* block) const 142 { 143 return block->block_number == key; 144 } 145 146 ValueType*& GetLink(ValueType* value) const 147 { 148 return value->next; 149 } 150 }; 151 152 typedef BOpenHashTable<BlockHash> BlockTable; 153 154 155 struct TransactionHash { 156 typedef int32 KeyType; 157 typedef cache_transaction ValueType; 158 159 size_t HashKey(KeyType key) const 160 { 161 return key; 162 } 163 164 size_t Hash(ValueType* transaction) const; 165 bool Compare(KeyType key, ValueType* transaction) const; 166 ValueType*& GetLink(ValueType* value) const; 167 }; 168 169 typedef BOpenHashTable<TransactionHash> TransactionTable; 170 171 172 struct block_cache : DoublyLinkedListLinkImpl<block_cache> { 173 BlockTable* hash; 174 mutex lock; 175 int fd; 176 off_t max_blocks; 177 size_t block_size; 178 int32 next_transaction_id; 179 cache_transaction* last_transaction; 180 TransactionTable* transaction_hash; 181 182 object_cache* buffer_cache; 183 block_list unused_blocks; 184 uint32 unused_block_count; 185 186 ConditionVariable busy_reading_condition; 187 uint32 busy_reading_count; 188 bool busy_reading_waiters; 189 190 ConditionVariable busy_writing_condition; 191 uint32 busy_writing_count; 192 bool busy_writing_waiters; 193 194 uint32 num_dirty_blocks; 195 bool read_only; 196 197 NotificationList pending_notifications; 198 ConditionVariable condition_variable; 199 200 block_cache(int fd, off_t numBlocks, size_t blockSize, 201 bool readOnly); 202 ~block_cache(); 203 204 status_t Init(); 205 206 void Free(void* buffer); 207 void* Allocate(); 208 void FreeBlock(cached_block* block); 209 cached_block* NewBlock(off_t blockNumber); 210 void FreeBlockParentData(cached_block* block); 211 212 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0); 213 void RemoveBlock(cached_block* block); 214 void DiscardBlock(cached_block* block); 215 216 private: 217 static void _LowMemoryHandler(void* data, uint32 resources, 218 int32 level); 219 cached_block* _GetUnusedBlock(); 220 }; 221 222 struct cache_listener; 223 typedef DoublyLinkedListLink<cache_listener> listener_link; 224 225 struct cache_listener : cache_notification { 226 listener_link link; 227 }; 228 229 typedef DoublyLinkedList<cache_listener, 230 DoublyLinkedListMemberGetLink<cache_listener, 231 &cache_listener::link> > ListenerList; 232 233 234 struct cache_transaction { 235 cache_transaction(); 236 237 cache_transaction* next; 238 int32 id; 239 int32 num_blocks; 240 int32 main_num_blocks; 241 int32 sub_num_blocks; 242 cached_block* first_block; 243 block_list blocks; 244 ListenerList listeners; 245 bool open; 246 bool has_sub_transaction; 247 bigtime_t last_used; 248 int32 busy_writing_count; 249 }; 250 251 252 class BlockWriter { 253 public: 254 BlockWriter(block_cache* cache, 255 size_t max = SIZE_MAX); 256 ~BlockWriter(); 257 258 bool Add(cached_block* block, 259 cache_transaction* transaction = NULL); 260 bool Add(cache_transaction* transaction, 261 bool& hasLeftOvers); 262 263 status_t Write(cache_transaction* transaction = NULL, 264 bool canUnlock = true); 265 266 bool DeletedTransaction() const 267 { return fDeletedTransaction; } 268 269 static status_t WriteBlock(block_cache* cache, 270 cached_block* block); 271 272 private: 273 void* _Data(cached_block* block) const; 274 status_t _WriteBlock(cached_block* block); 275 void _BlockDone(cached_block* block, 276 cache_transaction* transaction); 277 void _UnmarkWriting(cached_block* block); 278 279 static int _CompareBlocks(const void* _blockA, 280 const void* _blockB); 281 282 private: 283 static const size_t kBufferSize = 64; 284 285 block_cache* fCache; 286 cached_block* fBuffer[kBufferSize]; 287 cached_block** fBlocks; 288 size_t fCount; 289 size_t fTotal; 290 size_t fCapacity; 291 size_t fMax; 292 status_t fStatus; 293 bool fDeletedTransaction; 294 }; 295 296 297 class TransactionLocking { 298 public: 299 inline bool Lock(block_cache* cache) 300 { 301 mutex_lock(&cache->lock); 302 303 while (cache->busy_writing_count != 0) { 304 // wait for all blocks to be written 305 ConditionVariableEntry entry; 306 cache->busy_writing_condition.Add(&entry); 307 cache->busy_writing_waiters = true; 308 309 mutex_unlock(&cache->lock); 310 311 entry.Wait(); 312 313 mutex_lock(&cache->lock); 314 } 315 316 return true; 317 } 318 319 inline void Unlock(block_cache* cache) 320 { 321 mutex_unlock(&cache->lock); 322 } 323 }; 324 325 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker; 326 327 } // namespace 328 329 330 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 331 namespace BlockTracing { 332 333 class Action : public AbstractTraceEntry { 334 public: 335 Action(block_cache* cache, cached_block* block) 336 : 337 fCache(cache), 338 fBlockNumber(block->block_number), 339 fIsDirty(block->is_dirty), 340 fHasOriginal(block->original_data != NULL), 341 fHasParent(block->parent_data != NULL), 342 fTransactionID(-1), 343 fPreviousID(-1) 344 { 345 if (block->transaction != NULL) 346 fTransactionID = block->transaction->id; 347 if (block->previous_transaction != NULL) 348 fPreviousID = block->previous_transaction->id; 349 } 350 351 virtual void AddDump(TraceOutput& out) 352 { 353 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32 354 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber, 355 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-', 356 fHasParent ? 'p' : '-', fTransactionID, fPreviousID); 357 } 358 359 virtual const char* _Action() const = 0; 360 361 private: 362 block_cache* fCache; 363 uint64 fBlockNumber; 364 bool fIsDirty; 365 bool fHasOriginal; 366 bool fHasParent; 367 int32 fTransactionID; 368 int32 fPreviousID; 369 }; 370 371 class Get : public Action { 372 public: 373 Get(block_cache* cache, cached_block* block) 374 : 375 Action(cache, block) 376 { 377 Initialized(); 378 } 379 380 virtual const char* _Action() const { return "get"; } 381 }; 382 383 class Put : public Action { 384 public: 385 Put(block_cache* cache, cached_block* block) 386 : 387 Action(cache, block) 388 { 389 Initialized(); 390 } 391 392 virtual const char* _Action() const { return "put"; } 393 }; 394 395 class Read : public Action { 396 public: 397 Read(block_cache* cache, cached_block* block) 398 : 399 Action(cache, block) 400 { 401 Initialized(); 402 } 403 404 virtual const char* _Action() const { return "read"; } 405 }; 406 407 class Write : public Action { 408 public: 409 Write(block_cache* cache, cached_block* block) 410 : 411 Action(cache, block) 412 { 413 Initialized(); 414 } 415 416 virtual const char* _Action() const { return "write"; } 417 }; 418 419 class Flush : public Action { 420 public: 421 Flush(block_cache* cache, cached_block* block, bool getUnused = false) 422 : 423 Action(cache, block), 424 fGetUnused(getUnused) 425 { 426 Initialized(); 427 } 428 429 virtual const char* _Action() const 430 { return fGetUnused ? "get-unused" : "flush"; } 431 432 private: 433 bool fGetUnused; 434 }; 435 436 class Error : public AbstractTraceEntry { 437 public: 438 Error(block_cache* cache, uint64 blockNumber, const char* message, 439 status_t status = B_OK) 440 : 441 fCache(cache), 442 fBlockNumber(blockNumber), 443 fMessage(message), 444 fStatus(status) 445 { 446 Initialized(); 447 } 448 449 virtual void AddDump(TraceOutput& out) 450 { 451 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s", 452 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "", 453 fStatus != B_OK ? strerror(fStatus) : ""); 454 } 455 456 private: 457 block_cache* fCache; 458 uint64 fBlockNumber; 459 const char* fMessage; 460 status_t fStatus; 461 }; 462 463 #if BLOCK_CACHE_BLOCK_TRACING >= 2 464 class BlockData : public AbstractTraceEntry { 465 public: 466 enum { 467 kCurrent = 0x01, 468 kParent = 0x02, 469 kOriginal = 0x04 470 }; 471 472 BlockData(block_cache* cache, cached_block* block, const char* message) 473 : 474 fCache(cache), 475 fSize(cache->block_size), 476 fBlockNumber(block->block_number), 477 fMessage(message) 478 { 479 _Allocate(fCurrent, block->current_data); 480 _Allocate(fParent, block->parent_data); 481 _Allocate(fOriginal, block->original_data); 482 483 #if KTRACE_PRINTF_STACK_TRACE 484 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 485 false); 486 #endif 487 488 Initialized(); 489 } 490 491 virtual void AddDump(TraceOutput& out) 492 { 493 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s", 494 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-', 495 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-', 496 fMessage); 497 } 498 499 #if KTRACE_PRINTF_STACK_TRACE 500 virtual void DumpStackTrace(TraceOutput& out) 501 { 502 out.PrintStackTrace(fStackTrace); 503 } 504 #endif 505 506 void DumpBlocks(uint32 which, uint32 offset, uint32 size) 507 { 508 if ((which & kCurrent) != 0) 509 DumpBlock(kCurrent, offset, size); 510 if ((which & kParent) != 0) 511 DumpBlock(kParent, offset, size); 512 if ((which & kOriginal) != 0) 513 DumpBlock(kOriginal, offset, size); 514 } 515 516 void DumpBlock(uint32 which, uint32 offset, uint32 size) 517 { 518 if (offset > fSize) { 519 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize); 520 return; 521 } 522 if (offset + size > fSize) 523 size = fSize - offset; 524 525 const char* label; 526 uint8* data; 527 528 if ((which & kCurrent) != 0) { 529 label = "current"; 530 data = fCurrent; 531 } else if ((which & kParent) != 0) { 532 label = "parent"; 533 data = fParent; 534 } else if ((which & kOriginal) != 0) { 535 label = "original"; 536 data = fOriginal; 537 } else 538 return; 539 540 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size); 541 542 static const uint32 kBlockSize = 16; 543 data += offset; 544 545 for (uint32 i = 0; i < size;) { 546 int start = i; 547 548 kprintf(" %04" B_PRIx32 " ", i); 549 for (; i < start + kBlockSize; i++) { 550 if (!(i % 4)) 551 kprintf(" "); 552 553 if (i >= size) 554 kprintf(" "); 555 else 556 kprintf("%02x", data[i]); 557 } 558 559 kprintf("\n"); 560 } 561 } 562 563 private: 564 void _Allocate(uint8*& target, void* source) 565 { 566 if (source == NULL) { 567 target = NULL; 568 return; 569 } 570 571 target = alloc_tracing_buffer_memcpy(source, fSize, false); 572 } 573 574 block_cache* fCache; 575 uint32 fSize; 576 uint64 fBlockNumber; 577 const char* fMessage; 578 uint8* fCurrent; 579 uint8* fParent; 580 uint8* fOriginal; 581 #if KTRACE_PRINTF_STACK_TRACE 582 tracing_stack_trace* fStackTrace; 583 #endif 584 }; 585 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 586 587 } // namespace BlockTracing 588 589 # define TB(x) new(std::nothrow) BlockTracing::x; 590 #else 591 # define TB(x) ; 592 #endif 593 594 #if BLOCK_CACHE_BLOCK_TRACING >= 2 595 # define TB2(x) new(std::nothrow) BlockTracing::x; 596 #else 597 # define TB2(x) ; 598 #endif 599 600 601 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 602 namespace TransactionTracing { 603 604 class Action : public AbstractTraceEntry { 605 public: 606 Action(const char* label, block_cache* cache, 607 cache_transaction* transaction) 608 : 609 fCache(cache), 610 fTransaction(transaction), 611 fID(transaction->id), 612 fSub(transaction->has_sub_transaction), 613 fNumBlocks(transaction->num_blocks), 614 fSubNumBlocks(transaction->sub_num_blocks) 615 { 616 strlcpy(fLabel, label, sizeof(fLabel)); 617 Initialized(); 618 } 619 620 virtual void AddDump(TraceOutput& out) 621 { 622 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s" 623 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction, 624 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks); 625 } 626 627 private: 628 char fLabel[12]; 629 block_cache* fCache; 630 cache_transaction* fTransaction; 631 int32 fID; 632 bool fSub; 633 int32 fNumBlocks; 634 int32 fSubNumBlocks; 635 }; 636 637 class Detach : public AbstractTraceEntry { 638 public: 639 Detach(block_cache* cache, cache_transaction* transaction, 640 cache_transaction* newTransaction) 641 : 642 fCache(cache), 643 fTransaction(transaction), 644 fID(transaction->id), 645 fSub(transaction->has_sub_transaction), 646 fNewTransaction(newTransaction), 647 fNewID(newTransaction->id) 648 { 649 Initialized(); 650 } 651 652 virtual void AddDump(TraceOutput& out) 653 { 654 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")" 655 "from transaction %p (id %" B_PRId32 ")%s", 656 fCache, fNewTransaction, fNewID, fTransaction, fID, 657 fSub ? " sub" : ""); 658 } 659 660 private: 661 block_cache* fCache; 662 cache_transaction* fTransaction; 663 int32 fID; 664 bool fSub; 665 cache_transaction* fNewTransaction; 666 int32 fNewID; 667 }; 668 669 class Abort : public AbstractTraceEntry { 670 public: 671 Abort(block_cache* cache, cache_transaction* transaction) 672 : 673 fCache(cache), 674 fTransaction(transaction), 675 fID(transaction->id), 676 fNumBlocks(0) 677 { 678 bool isSub = transaction->has_sub_transaction; 679 fNumBlocks = isSub ? transaction->sub_num_blocks 680 : transaction->num_blocks; 681 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t)); 682 if (fBlocks != NULL) { 683 cached_block* block = transaction->first_block; 684 for (int32 i = 0; block != NULL && i < fNumBlocks; 685 block = block->transaction_next) { 686 fBlocks[i++] = block->block_number; 687 } 688 } else 689 fNumBlocks = 0; 690 691 #if KTRACE_PRINTF_STACK_TRACE 692 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 693 false); 694 #endif 695 696 Initialized(); 697 } 698 699 virtual void AddDump(TraceOutput& out) 700 { 701 out.Print("block cache %p, abort transaction " 702 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID); 703 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++) 704 out.Print(" %" B_PRIdOFF, fBlocks[i]); 705 } 706 707 #if KTRACE_PRINTF_STACK_TRACE 708 virtual void DumpStackTrace(TraceOutput& out) 709 { 710 out.PrintStackTrace(fStackTrace); 711 } 712 #endif 713 714 private: 715 block_cache* fCache; 716 cache_transaction* fTransaction; 717 int32 fID; 718 off_t* fBlocks; 719 int32 fNumBlocks; 720 #if KTRACE_PRINTF_STACK_TRACE 721 tracing_stack_trace* fStackTrace; 722 #endif 723 }; 724 725 } // namespace TransactionTracing 726 727 # define T(x) new(std::nothrow) TransactionTracing::x; 728 #else 729 # define T(x) ; 730 #endif 731 732 733 static DoublyLinkedList<block_cache> sCaches; 734 static mutex sCachesLock = MUTEX_INITIALIZER("block caches"); 735 static mutex sCachesMemoryUseLock 736 = MUTEX_INITIALIZER("block caches memory use"); 737 static size_t sUsedMemory; 738 static sem_id sEventSemaphore; 739 static mutex sNotificationsLock 740 = MUTEX_INITIALIZER("block cache notifications"); 741 static thread_id sNotifierWriterThread; 742 static DoublyLinkedListLink<block_cache> sMarkCache; 743 // TODO: this only works if the link is the first entry of block_cache 744 static object_cache* sBlockCache; 745 746 747 // #pragma mark - notifications/listener 748 749 750 /*! Checks whether or not this is an event that closes a transaction. */ 751 static inline bool 752 is_closing_event(int32 event) 753 { 754 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0; 755 } 756 757 758 static inline bool 759 is_written_event(int32 event) 760 { 761 return (event & TRANSACTION_WRITTEN) != 0; 762 } 763 764 765 /*! From the specified \a notification, it will remove the lowest pending 766 event, and return that one in \a _event. 767 If there is no pending event anymore, it will return \c false. 768 */ 769 static bool 770 get_next_pending_event(cache_notification* notification, int32* _event) 771 { 772 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) { 773 int32 pending = atomic_and(¬ification->events_pending, 774 ~eventMask); 775 776 bool more = (pending & ~eventMask) != 0; 777 778 if ((pending & eventMask) != 0) { 779 *_event = eventMask; 780 return more; 781 } 782 } 783 784 return false; 785 } 786 787 788 static void 789 flush_pending_notifications(block_cache* cache) 790 { 791 ASSERT_LOCKED_MUTEX(&sCachesLock); 792 793 while (true) { 794 MutexLocker locker(sNotificationsLock); 795 796 cache_notification* notification = cache->pending_notifications.Head(); 797 if (notification == NULL) 798 return; 799 800 bool deleteAfterEvent = false; 801 int32 event = -1; 802 if (!get_next_pending_event(notification, &event)) { 803 // remove the notification if this was the last pending event 804 cache->pending_notifications.Remove(notification); 805 deleteAfterEvent = notification->delete_after_event; 806 } 807 808 if (event >= 0) { 809 // Notify listener, we need to copy the notification, as it might 810 // be removed when we unlock the list. 811 cache_notification copy = *notification; 812 locker.Unlock(); 813 814 copy.hook(copy.transaction_id, event, copy.data); 815 816 locker.Lock(); 817 } 818 819 if (deleteAfterEvent) 820 delete notification; 821 } 822 } 823 824 825 /*! Flushes all pending notifications by calling the appropriate hook 826 functions. 827 Must not be called with a cache lock held. 828 */ 829 static void 830 flush_pending_notifications() 831 { 832 MutexLocker _(sCachesLock); 833 834 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 835 while (iterator.HasNext()) { 836 block_cache* cache = iterator.Next(); 837 838 flush_pending_notifications(cache); 839 } 840 } 841 842 843 /*! Initializes the \a notification as specified. */ 844 static void 845 set_notification(cache_transaction* transaction, 846 cache_notification ¬ification, int32 events, 847 transaction_notification_hook hook, void* data) 848 { 849 notification.transaction_id = transaction != NULL ? transaction->id : -1; 850 notification.events_pending = 0; 851 notification.events = events; 852 notification.hook = hook; 853 notification.data = data; 854 notification.delete_after_event = false; 855 } 856 857 858 /*! Makes sure the notification is deleted. It either deletes it directly, 859 when possible, or marks it for deletion if the notification is pending. 860 */ 861 static void 862 delete_notification(cache_notification* notification) 863 { 864 MutexLocker locker(sNotificationsLock); 865 866 if (notification->events_pending != 0) 867 notification->delete_after_event = true; 868 else 869 delete notification; 870 } 871 872 873 /*! Adds the notification to the pending notifications list, or, if it's 874 already part of it, updates its events_pending field. 875 Also marks the notification to be deleted if \a deleteNotification 876 is \c true. 877 Triggers the notifier thread to run. 878 */ 879 static void 880 add_notification(block_cache* cache, cache_notification* notification, 881 int32 event, bool deleteNotification) 882 { 883 if (notification->hook == NULL) 884 return; 885 886 int32 pending = atomic_or(¬ification->events_pending, event); 887 if (pending == 0) { 888 // not yet part of the notification list 889 MutexLocker locker(sNotificationsLock); 890 if (deleteNotification) 891 notification->delete_after_event = true; 892 cache->pending_notifications.Add(notification); 893 } else if (deleteNotification) { 894 // we might need to delete it ourselves if we're late 895 delete_notification(notification); 896 } 897 898 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE); 899 // We're probably still holding some locks that makes rescheduling 900 // not a good idea at this point. 901 } 902 903 904 /*! Notifies all interested listeners of this transaction about the \a event. 905 If \a event is a closing event (ie. TRANSACTION_ENDED, and 906 TRANSACTION_ABORTED), all listeners except those listening to 907 TRANSACTION_WRITTEN will be removed. 908 */ 909 static void 910 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction, 911 int32 event) 912 { 913 T(Action("notify", cache, transaction)); 914 915 bool isClosing = is_closing_event(event); 916 bool isWritten = is_written_event(event); 917 918 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 919 while (iterator.HasNext()) { 920 cache_listener* listener = iterator.Next(); 921 922 bool remove = (isClosing && !is_written_event(listener->events)) 923 || (isWritten && is_written_event(listener->events)); 924 if (remove) 925 iterator.Remove(); 926 927 if ((listener->events & event) != 0) 928 add_notification(cache, listener, event, remove); 929 else if (remove) 930 delete_notification(listener); 931 } 932 } 933 934 935 /*! Removes and deletes all listeners that are still monitoring this 936 transaction. 937 */ 938 static void 939 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction) 940 { 941 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 942 while (iterator.HasNext()) { 943 cache_listener* listener = iterator.Next(); 944 iterator.Remove(); 945 946 delete_notification(listener); 947 } 948 } 949 950 951 static status_t 952 add_transaction_listener(block_cache* cache, cache_transaction* transaction, 953 int32 events, transaction_notification_hook hookFunction, void* data) 954 { 955 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 956 while (iterator.HasNext()) { 957 cache_listener* listener = iterator.Next(); 958 959 if (listener->data == data && listener->hook == hookFunction) { 960 // this listener already exists, just update it 961 listener->events |= events; 962 return B_OK; 963 } 964 } 965 966 cache_listener* listener = new(std::nothrow) cache_listener; 967 if (listener == NULL) 968 return B_NO_MEMORY; 969 970 set_notification(transaction, *listener, events, hookFunction, data); 971 transaction->listeners.Add(listener); 972 return B_OK; 973 } 974 975 976 // #pragma mark - private transaction 977 978 979 cache_transaction::cache_transaction() 980 { 981 num_blocks = 0; 982 main_num_blocks = 0; 983 sub_num_blocks = 0; 984 first_block = NULL; 985 open = true; 986 has_sub_transaction = false; 987 last_used = system_time(); 988 busy_writing_count = 0; 989 } 990 991 992 static void 993 delete_transaction(block_cache* cache, cache_transaction* transaction) 994 { 995 if (cache->last_transaction == transaction) 996 cache->last_transaction = NULL; 997 998 remove_transaction_listeners(cache, transaction); 999 delete transaction; 1000 } 1001 1002 1003 static cache_transaction* 1004 lookup_transaction(block_cache* cache, int32 id) 1005 { 1006 return cache->transaction_hash->Lookup(id); 1007 } 1008 1009 1010 size_t TransactionHash::Hash(cache_transaction* transaction) const 1011 { 1012 return transaction->id; 1013 } 1014 1015 1016 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const 1017 { 1018 return transaction->id == key; 1019 } 1020 1021 1022 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const 1023 { 1024 return value->next; 1025 } 1026 1027 1028 /*! Writes back any changes made to blocks in \a transaction that are still 1029 part of a previous transacton. 1030 */ 1031 static status_t 1032 write_blocks_in_previous_transaction(block_cache* cache, 1033 cache_transaction* transaction) 1034 { 1035 BlockWriter writer(cache); 1036 1037 cached_block* block = transaction->first_block; 1038 for (; block != NULL; block = block->transaction_next) { 1039 if (block->previous_transaction != NULL) { 1040 // need to write back pending changes 1041 writer.Add(block); 1042 } 1043 } 1044 1045 return writer.Write(); 1046 } 1047 1048 1049 // #pragma mark - cached_block 1050 1051 1052 bool 1053 cached_block::CanBeWritten() const 1054 { 1055 return !busy_writing && !busy_reading 1056 && (previous_transaction != NULL 1057 || (transaction == NULL && is_dirty && !is_writing)); 1058 } 1059 1060 1061 // #pragma mark - BlockWriter 1062 1063 1064 BlockWriter::BlockWriter(block_cache* cache, size_t max) 1065 : 1066 fCache(cache), 1067 fBlocks(fBuffer), 1068 fCount(0), 1069 fTotal(0), 1070 fCapacity(kBufferSize), 1071 fMax(max), 1072 fStatus(B_OK), 1073 fDeletedTransaction(false) 1074 { 1075 } 1076 1077 1078 BlockWriter::~BlockWriter() 1079 { 1080 if (fBlocks != fBuffer) 1081 free(fBlocks); 1082 } 1083 1084 1085 /*! Adds the specified block to the to be written array. If no more blocks can 1086 be added, false is returned, otherwise true. 1087 */ 1088 bool 1089 BlockWriter::Add(cached_block* block, cache_transaction* transaction) 1090 { 1091 ASSERT(block->CanBeWritten()); 1092 1093 if (fTotal == fMax) 1094 return false; 1095 1096 if (fCount >= fCapacity) { 1097 // Enlarge array if necessary 1098 cached_block** newBlocks; 1099 size_t newCapacity = max_c(256, fCapacity * 2); 1100 if (fBlocks == fBuffer) 1101 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*)); 1102 else { 1103 newBlocks = (cached_block**)realloc(fBlocks, 1104 newCapacity * sizeof(void*)); 1105 } 1106 1107 if (newBlocks == NULL) { 1108 // Allocating a larger array failed - we need to write back what 1109 // we have synchronously now (this will also clear the array) 1110 Write(transaction, false); 1111 } else { 1112 if (fBlocks == fBuffer) 1113 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*)); 1114 1115 fBlocks = newBlocks; 1116 fCapacity = newCapacity; 1117 } 1118 } 1119 1120 fBlocks[fCount++] = block; 1121 fTotal++; 1122 block->busy_writing = true; 1123 fCache->busy_writing_count++; 1124 if (block->previous_transaction != NULL) 1125 block->previous_transaction->busy_writing_count++; 1126 1127 return true; 1128 } 1129 1130 1131 /*! Adds all blocks of the specified transaction to the to be written array. 1132 If no more blocks can be added, false is returned, otherwise true. 1133 */ 1134 bool 1135 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers) 1136 { 1137 ASSERT(!transaction->open); 1138 1139 if (transaction->busy_writing_count != 0) { 1140 hasLeftOvers = true; 1141 return true; 1142 } 1143 1144 hasLeftOvers = false; 1145 1146 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 1147 while (cached_block* block = blockIterator.Next()) { 1148 if (!block->CanBeWritten()) { 1149 // This block was already part of a previous transaction within this 1150 // writer 1151 hasLeftOvers = true; 1152 continue; 1153 } 1154 if (!Add(block, transaction)) 1155 return false; 1156 1157 if (DeletedTransaction()) 1158 break; 1159 } 1160 1161 return true; 1162 } 1163 1164 1165 /*! Cache must be locked when calling this method, but it will be unlocked 1166 while the blocks are written back. 1167 */ 1168 status_t 1169 BlockWriter::Write(cache_transaction* transaction, bool canUnlock) 1170 { 1171 if (fCount == 0) 1172 return B_OK; 1173 1174 if (canUnlock) 1175 mutex_unlock(&fCache->lock); 1176 1177 // Sort blocks in their on-disk order 1178 // TODO: ideally, this should be handled by the I/O scheduler 1179 1180 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks); 1181 fDeletedTransaction = false; 1182 1183 for (uint32 i = 0; i < fCount; i++) { 1184 status_t status = _WriteBlock(fBlocks[i]); 1185 if (status != B_OK) { 1186 // propagate to global error handling 1187 if (fStatus == B_OK) 1188 fStatus = status; 1189 1190 _UnmarkWriting(fBlocks[i]); 1191 fBlocks[i] = NULL; 1192 // This block will not be marked clean 1193 } 1194 } 1195 1196 if (canUnlock) 1197 mutex_lock(&fCache->lock); 1198 1199 for (uint32 i = 0; i < fCount; i++) 1200 _BlockDone(fBlocks[i], transaction); 1201 1202 fCount = 0; 1203 return fStatus; 1204 } 1205 1206 1207 /*! Writes the specified \a block back to disk. It will always only write back 1208 the oldest change of the block if it is part of more than one transaction. 1209 It will automatically send out TRANSACTION_WRITTEN notices, as well as 1210 delete transactions when they are no longer used, and \a deleteTransaction 1211 is \c true. 1212 */ 1213 /*static*/ status_t 1214 BlockWriter::WriteBlock(block_cache* cache, cached_block* block) 1215 { 1216 BlockWriter writer(cache); 1217 1218 writer.Add(block); 1219 return writer.Write(); 1220 } 1221 1222 1223 void* 1224 BlockWriter::_Data(cached_block* block) const 1225 { 1226 return block->previous_transaction != NULL && block->original_data != NULL 1227 ? block->original_data : block->current_data; 1228 // We first need to write back changes from previous transactions 1229 } 1230 1231 1232 status_t 1233 BlockWriter::_WriteBlock(cached_block* block) 1234 { 1235 ASSERT(block->busy_writing); 1236 1237 TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number)); 1238 TB(Write(fCache, block)); 1239 TB2(BlockData(fCache, block, "before write")); 1240 1241 size_t blockSize = fCache->block_size; 1242 1243 ssize_t written = write_pos(fCache->fd, 1244 block->block_number * blockSize, _Data(block), blockSize); 1245 1246 if (written != (ssize_t)blockSize) { 1247 TB(Error(fCache, block->block_number, "write failed", written)); 1248 TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number, 1249 strerror(errno))); 1250 if (written < 0) 1251 return errno; 1252 1253 return B_IO_ERROR; 1254 } 1255 1256 return B_OK; 1257 } 1258 1259 1260 void 1261 BlockWriter::_BlockDone(cached_block* block, 1262 cache_transaction* transaction) 1263 { 1264 if (block == NULL) { 1265 // An error occured when trying to write this block 1266 return; 1267 } 1268 1269 if (fCache->num_dirty_blocks > 0) 1270 fCache->num_dirty_blocks--; 1271 1272 if (_Data(block) == block->current_data) 1273 block->is_dirty = false; 1274 1275 _UnmarkWriting(block); 1276 1277 cache_transaction* previous = block->previous_transaction; 1278 if (previous != NULL) { 1279 previous->blocks.Remove(block); 1280 block->previous_transaction = NULL; 1281 1282 if (block->original_data != NULL && block->transaction == NULL) { 1283 // This block is not part of a transaction, so it does not need 1284 // its original pointer anymore. 1285 fCache->Free(block->original_data); 1286 block->original_data = NULL; 1287 } 1288 1289 // Has the previous transaction been finished with that write? 1290 if (--previous->num_blocks == 0) { 1291 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id)); 1292 T(Action("written", fCache, previous)); 1293 1294 notify_transaction_listeners(fCache, previous, 1295 TRANSACTION_WRITTEN); 1296 1297 if (transaction != NULL) { 1298 // This function is called while iterating transaction_hash. We 1299 // use RemoveUnchecked so the iterator is still valid. A regular 1300 // Remove can trigger a resize of the hash table which would 1301 // result in the linked items in the table changing order. 1302 fCache->transaction_hash->RemoveUnchecked(transaction); 1303 } else 1304 fCache->transaction_hash->Remove(previous); 1305 1306 delete_transaction(fCache, previous); 1307 fDeletedTransaction = true; 1308 } 1309 } 1310 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) { 1311 // the block is no longer used 1312 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1313 block->unused = true; 1314 fCache->unused_blocks.Add(block); 1315 fCache->unused_block_count++; 1316 } 1317 1318 TB2(BlockData(fCache, block, "after write")); 1319 } 1320 1321 1322 void 1323 BlockWriter::_UnmarkWriting(cached_block* block) 1324 { 1325 block->busy_writing = false; 1326 if (block->previous_transaction != NULL) 1327 block->previous_transaction->busy_writing_count--; 1328 fCache->busy_writing_count--; 1329 1330 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0) 1331 || block->busy_writing_waiters) { 1332 fCache->busy_writing_waiters = false; 1333 block->busy_writing_waiters = false; 1334 fCache->busy_writing_condition.NotifyAll(); 1335 } 1336 } 1337 1338 1339 /*static*/ int 1340 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB) 1341 { 1342 cached_block* blockA = *(cached_block**)_blockA; 1343 cached_block* blockB = *(cached_block**)_blockB; 1344 1345 off_t diff = blockA->block_number - blockB->block_number; 1346 if (diff > 0) 1347 return 1; 1348 1349 return diff < 0 ? -1 : 0; 1350 } 1351 1352 1353 // #pragma mark - block_cache 1354 1355 1356 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize, 1357 bool readOnly) 1358 : 1359 hash(NULL), 1360 fd(_fd), 1361 max_blocks(numBlocks), 1362 block_size(blockSize), 1363 next_transaction_id(1), 1364 last_transaction(NULL), 1365 transaction_hash(NULL), 1366 buffer_cache(NULL), 1367 unused_block_count(0), 1368 busy_reading_count(0), 1369 busy_reading_waiters(false), 1370 busy_writing_count(0), 1371 busy_writing_waiters(0), 1372 num_dirty_blocks(0), 1373 read_only(readOnly) 1374 { 1375 } 1376 1377 1378 /*! Should be called with the cache's lock held. */ 1379 block_cache::~block_cache() 1380 { 1381 unregister_low_resource_handler(&_LowMemoryHandler, this); 1382 1383 delete transaction_hash; 1384 delete hash; 1385 1386 delete_object_cache(buffer_cache); 1387 1388 mutex_destroy(&lock); 1389 } 1390 1391 1392 status_t 1393 block_cache::Init() 1394 { 1395 busy_reading_condition.Init(this, "cache block busy_reading"); 1396 busy_writing_condition.Init(this, "cache block busy writing"); 1397 condition_variable.Init(this, "cache transaction sync"); 1398 mutex_init(&lock, "block cache"); 1399 1400 buffer_cache = create_object_cache_etc("block cache buffers", block_size, 1401 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 1402 if (buffer_cache == NULL) 1403 return B_NO_MEMORY; 1404 1405 hash = new BlockTable(); 1406 if (hash == NULL || hash->Init(1024) != B_OK) 1407 return B_NO_MEMORY; 1408 1409 transaction_hash = new(std::nothrow) TransactionTable(); 1410 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK) 1411 return B_NO_MEMORY; 1412 1413 return register_low_resource_handler(&_LowMemoryHandler, this, 1414 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1415 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0); 1416 } 1417 1418 1419 void 1420 block_cache::Free(void* buffer) 1421 { 1422 if (buffer != NULL) 1423 object_cache_free(buffer_cache, buffer, 0); 1424 } 1425 1426 1427 void* 1428 block_cache::Allocate() 1429 { 1430 void* block = object_cache_alloc(buffer_cache, 0); 1431 if (block != NULL) 1432 return block; 1433 1434 // recycle existing before allocating a new one 1435 RemoveUnusedBlocks(100); 1436 1437 return object_cache_alloc(buffer_cache, 0); 1438 } 1439 1440 1441 void 1442 block_cache::FreeBlock(cached_block* block) 1443 { 1444 Free(block->current_data); 1445 1446 if (block->original_data != NULL || block->parent_data != NULL) { 1447 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n", 1448 block->block_number, block->original_data, block->parent_data); 1449 } 1450 1451 #if BLOCK_CACHE_DEBUG_CHANGED 1452 Free(block->compare); 1453 #endif 1454 1455 object_cache_free(sBlockCache, block, 0); 1456 } 1457 1458 1459 /*! Allocates a new block for \a blockNumber, ready for use */ 1460 cached_block* 1461 block_cache::NewBlock(off_t blockNumber) 1462 { 1463 cached_block* block = NULL; 1464 1465 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1466 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) { 1467 // recycle existing instead of allocating a new one 1468 block = _GetUnusedBlock(); 1469 } 1470 if (block == NULL) { 1471 block = (cached_block*)object_cache_alloc(sBlockCache, 0); 1472 if (block != NULL) { 1473 block->current_data = Allocate(); 1474 if (block->current_data == NULL) { 1475 object_cache_free(sBlockCache, block, 0); 1476 return NULL; 1477 } 1478 } else { 1479 TB(Error(this, blockNumber, "allocation failed")); 1480 dprintf("block allocation failed, unused list is %sempty.\n", 1481 unused_blocks.IsEmpty() ? "" : "not "); 1482 1483 // allocation failed, try to reuse an unused block 1484 block = _GetUnusedBlock(); 1485 if (block == NULL) { 1486 TB(Error(this, blockNumber, "get unused failed")); 1487 FATAL(("could not allocate block!\n")); 1488 return NULL; 1489 } 1490 } 1491 } 1492 1493 block->block_number = blockNumber; 1494 block->ref_count = 0; 1495 block->last_accessed = 0; 1496 block->transaction_next = NULL; 1497 block->transaction = block->previous_transaction = NULL; 1498 block->original_data = NULL; 1499 block->parent_data = NULL; 1500 block->busy_reading = false; 1501 block->busy_writing = false; 1502 block->is_writing = false; 1503 block->is_dirty = false; 1504 block->unused = false; 1505 block->discard = false; 1506 block->busy_reading_waiters = false; 1507 block->busy_writing_waiters = false; 1508 #if BLOCK_CACHE_DEBUG_CHANGED 1509 block->compare = NULL; 1510 #endif 1511 1512 return block; 1513 } 1514 1515 1516 void 1517 block_cache::FreeBlockParentData(cached_block* block) 1518 { 1519 ASSERT(block->parent_data != NULL); 1520 if (block->parent_data != block->current_data) 1521 Free(block->parent_data); 1522 block->parent_data = NULL; 1523 } 1524 1525 1526 void 1527 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld) 1528 { 1529 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count)); 1530 1531 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1532 cached_block* block = iterator.Next();) { 1533 if (minSecondsOld >= block->LastAccess()) { 1534 // The list is sorted by last access 1535 break; 1536 } 1537 if (block->busy_reading || block->busy_writing) 1538 continue; 1539 1540 TB(Flush(this, block)); 1541 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n", 1542 block->block_number, block->last_accessed)); 1543 1544 // this can only happen if no transactions are used 1545 if (block->is_dirty && !block->discard) { 1546 if (block->busy_writing) 1547 continue; 1548 1549 BlockWriter::WriteBlock(this, block); 1550 } 1551 1552 // remove block from lists 1553 iterator.Remove(); 1554 unused_block_count--; 1555 RemoveBlock(block); 1556 1557 if (--count <= 0) 1558 break; 1559 } 1560 } 1561 1562 1563 void 1564 block_cache::RemoveBlock(cached_block* block) 1565 { 1566 hash->Remove(block); 1567 FreeBlock(block); 1568 } 1569 1570 1571 /*! Discards the block from a transaction (this method must not be called 1572 for blocks not part of a transaction). 1573 */ 1574 void 1575 block_cache::DiscardBlock(cached_block* block) 1576 { 1577 ASSERT(block->discard); 1578 ASSERT(block->previous_transaction == NULL); 1579 1580 if (block->parent_data != NULL) 1581 FreeBlockParentData(block); 1582 1583 if (block->original_data != NULL) { 1584 Free(block->original_data); 1585 block->original_data = NULL; 1586 } 1587 1588 RemoveBlock(block); 1589 } 1590 1591 1592 void 1593 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level) 1594 { 1595 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level)); 1596 1597 // free some blocks according to the low memory state 1598 // (if there is enough memory left, we don't free any) 1599 1600 block_cache* cache = (block_cache*)data; 1601 int32 free = 0; 1602 int32 secondsOld = 0; 1603 switch (level) { 1604 case B_NO_LOW_RESOURCE: 1605 return; 1606 case B_LOW_RESOURCE_NOTE: 1607 free = cache->unused_block_count / 8; 1608 secondsOld = 120; 1609 break; 1610 case B_LOW_RESOURCE_WARNING: 1611 free = cache->unused_block_count / 4; 1612 secondsOld = 10; 1613 break; 1614 case B_LOW_RESOURCE_CRITICAL: 1615 free = cache->unused_block_count / 2; 1616 secondsOld = 0; 1617 break; 1618 } 1619 1620 MutexLocker locker(&cache->lock); 1621 1622 if (!locker.IsLocked()) { 1623 // If our block_cache were deleted, it could be that we had 1624 // been called before that deletion went through, therefore, 1625 // acquiring its lock might fail. 1626 return; 1627 } 1628 1629 #ifdef TRACE_BLOCK_CACHE 1630 uint32 oldUnused = cache->unused_block_count; 1631 #endif 1632 1633 cache->RemoveUnusedBlocks(free, secondsOld); 1634 1635 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n", 1636 cache, oldUnused, cache->unused_block_count)); 1637 } 1638 1639 1640 cached_block* 1641 block_cache::_GetUnusedBlock() 1642 { 1643 TRACE(("block_cache: get unused block\n")); 1644 1645 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1646 cached_block* block = iterator.Next();) { 1647 TB(Flush(this, block, true)); 1648 // this can only happen if no transactions are used 1649 if (block->is_dirty && !block->busy_writing && !block->discard) 1650 BlockWriter::WriteBlock(this, block); 1651 1652 // remove block from lists 1653 iterator.Remove(); 1654 unused_block_count--; 1655 hash->Remove(block); 1656 1657 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1658 block->unused = false; 1659 1660 // TODO: see if compare data is handled correctly here! 1661 #if BLOCK_CACHE_DEBUG_CHANGED 1662 if (block->compare != NULL) 1663 Free(block->compare); 1664 #endif 1665 return block; 1666 } 1667 1668 return NULL; 1669 } 1670 1671 1672 // #pragma mark - private block functions 1673 1674 1675 /*! Cache must be locked. 1676 */ 1677 static void 1678 mark_block_busy_reading(block_cache* cache, cached_block* block) 1679 { 1680 block->busy_reading = true; 1681 cache->busy_reading_count++; 1682 } 1683 1684 1685 /*! Cache must be locked. 1686 */ 1687 static void 1688 mark_block_unbusy_reading(block_cache* cache, cached_block* block) 1689 { 1690 block->busy_reading = false; 1691 cache->busy_reading_count--; 1692 1693 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0) 1694 || block->busy_reading_waiters) { 1695 cache->busy_reading_waiters = false; 1696 block->busy_reading_waiters = false; 1697 cache->busy_reading_condition.NotifyAll(); 1698 } 1699 } 1700 1701 1702 /*! Cache must be locked. 1703 */ 1704 static void 1705 wait_for_busy_reading_block(block_cache* cache, cached_block* block) 1706 { 1707 while (block->busy_reading) { 1708 // wait for at least the specified block to be read in 1709 ConditionVariableEntry entry; 1710 cache->busy_reading_condition.Add(&entry); 1711 block->busy_reading_waiters = true; 1712 1713 mutex_unlock(&cache->lock); 1714 1715 entry.Wait(); 1716 1717 mutex_lock(&cache->lock); 1718 } 1719 } 1720 1721 1722 /*! Cache must be locked. 1723 */ 1724 static void 1725 wait_for_busy_reading_blocks(block_cache* cache) 1726 { 1727 while (cache->busy_reading_count != 0) { 1728 // wait for all blocks to be read in 1729 ConditionVariableEntry entry; 1730 cache->busy_reading_condition.Add(&entry); 1731 cache->busy_reading_waiters = true; 1732 1733 mutex_unlock(&cache->lock); 1734 1735 entry.Wait(); 1736 1737 mutex_lock(&cache->lock); 1738 } 1739 } 1740 1741 1742 /*! Cache must be locked. 1743 */ 1744 static void 1745 wait_for_busy_writing_block(block_cache* cache, cached_block* block) 1746 { 1747 while (block->busy_writing) { 1748 // wait for all blocks to be written back 1749 ConditionVariableEntry entry; 1750 cache->busy_writing_condition.Add(&entry); 1751 block->busy_writing_waiters = true; 1752 1753 mutex_unlock(&cache->lock); 1754 1755 entry.Wait(); 1756 1757 mutex_lock(&cache->lock); 1758 } 1759 } 1760 1761 1762 /*! Cache must be locked. 1763 */ 1764 static void 1765 wait_for_busy_writing_blocks(block_cache* cache) 1766 { 1767 while (cache->busy_writing_count != 0) { 1768 // wait for all blocks to be written back 1769 ConditionVariableEntry entry; 1770 cache->busy_writing_condition.Add(&entry); 1771 cache->busy_writing_waiters = true; 1772 1773 mutex_unlock(&cache->lock); 1774 1775 entry.Wait(); 1776 1777 mutex_lock(&cache->lock); 1778 } 1779 } 1780 1781 1782 /*! Removes a reference from the specified \a block. If this was the last 1783 reference, the block is moved into the unused list. 1784 In low memory situations, it will also free some blocks from that list, 1785 but not necessarily the \a block it just released. 1786 */ 1787 static void 1788 put_cached_block(block_cache* cache, cached_block* block) 1789 { 1790 #if BLOCK_CACHE_DEBUG_CHANGED 1791 if (!block->is_dirty && block->compare != NULL 1792 && memcmp(block->current_data, block->compare, cache->block_size)) { 1793 dprintf("new block:\n"); 1794 dump_block((const char*)block->current_data, 256, " "); 1795 dprintf("unchanged block:\n"); 1796 dump_block((const char*)block->compare, 256, " "); 1797 BlockWriter::WriteBlock(cache, block); 1798 panic("block_cache: supposed to be clean block was changed!\n"); 1799 1800 cache->Free(block->compare); 1801 block->compare = NULL; 1802 } 1803 #endif 1804 TB(Put(cache, block)); 1805 1806 if (block->ref_count < 1) { 1807 panic("Invalid ref_count for block %p, cache %p\n", block, cache); 1808 return; 1809 } 1810 1811 if (--block->ref_count == 0 1812 && block->transaction == NULL && block->previous_transaction == NULL) { 1813 // This block is not used anymore, and not part of any transaction 1814 block->is_writing = false; 1815 1816 if (block->discard) { 1817 cache->RemoveBlock(block); 1818 } else { 1819 // put this block in the list of unused blocks 1820 ASSERT(!block->unused); 1821 block->unused = true; 1822 1823 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1824 cache->unused_blocks.Add(block); 1825 cache->unused_block_count++; 1826 } 1827 } 1828 } 1829 1830 1831 static void 1832 put_cached_block(block_cache* cache, off_t blockNumber) 1833 { 1834 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1835 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1836 blockNumber, cache->max_blocks - 1); 1837 } 1838 1839 cached_block* block = cache->hash->Lookup(blockNumber); 1840 if (block != NULL) 1841 put_cached_block(cache, block); 1842 else { 1843 TB(Error(cache, blockNumber, "put unknown")); 1844 } 1845 } 1846 1847 1848 /*! Retrieves the block \a blockNumber from the hash table, if it's already 1849 there, or reads it from the disk. 1850 You need to have the cache locked when calling this function. 1851 1852 \param _allocated tells you whether or not a new block has been allocated 1853 to satisfy your request. 1854 \param readBlock if \c false, the block will not be read in case it was 1855 not already in the cache. The block you retrieve may contain random 1856 data. If \c true, the cache will be temporarily unlocked while the 1857 block is read in. 1858 */ 1859 static cached_block* 1860 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated, 1861 bool readBlock = true) 1862 { 1863 ASSERT_LOCKED_MUTEX(&cache->lock); 1864 1865 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1866 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1867 blockNumber, cache->max_blocks - 1); 1868 return NULL; 1869 } 1870 1871 retry: 1872 cached_block* block = cache->hash->Lookup(blockNumber); 1873 *_allocated = false; 1874 1875 if (block == NULL) { 1876 // put block into cache 1877 block = cache->NewBlock(blockNumber); 1878 if (block == NULL) 1879 return NULL; 1880 1881 cache->hash->Insert(block); 1882 *_allocated = true; 1883 } else if (block->busy_reading) { 1884 // The block is currently busy_reading - wait and try again later 1885 wait_for_busy_reading_block(cache, block); 1886 goto retry; 1887 } 1888 1889 if (block->unused) { 1890 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber)); 1891 block->unused = false; 1892 cache->unused_blocks.Remove(block); 1893 cache->unused_block_count--; 1894 } 1895 1896 if (*_allocated && readBlock) { 1897 // read block into cache 1898 int32 blockSize = cache->block_size; 1899 1900 mark_block_busy_reading(cache, block); 1901 mutex_unlock(&cache->lock); 1902 1903 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize, 1904 block->current_data, blockSize); 1905 1906 mutex_lock(&cache->lock); 1907 if (bytesRead < blockSize) { 1908 cache->RemoveBlock(block); 1909 TB(Error(cache, blockNumber, "read failed", bytesRead)); 1910 1911 TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n", 1912 blockNumber, bytesRead, strerror(errno))); 1913 return NULL; 1914 } 1915 TB(Read(cache, block)); 1916 1917 mark_block_unbusy_reading(cache, block); 1918 } 1919 1920 block->ref_count++; 1921 block->last_accessed = system_time() / 1000000L; 1922 1923 return block; 1924 } 1925 1926 1927 /*! Returns the writable block data for the requested blockNumber. 1928 If \a cleared is true, the block is not read from disk; an empty block 1929 is returned. 1930 1931 This is the only method to insert a block into a transaction. It makes 1932 sure that the previous block contents are preserved in that case. 1933 */ 1934 static void* 1935 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base, 1936 off_t length, int32 transactionID, bool cleared) 1937 { 1938 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 1939 blockNumber, transactionID)); 1940 1941 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1942 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1943 blockNumber, cache->max_blocks - 1); 1944 } 1945 1946 bool allocated; 1947 cached_block* block = get_cached_block(cache, blockNumber, &allocated, 1948 !cleared); 1949 if (block == NULL) 1950 return NULL; 1951 1952 if (block->busy_writing) 1953 wait_for_busy_writing_block(cache, block); 1954 1955 block->discard = false; 1956 1957 // if there is no transaction support, we just return the current block 1958 if (transactionID == -1) { 1959 if (cleared) { 1960 mark_block_busy_reading(cache, block); 1961 mutex_unlock(&cache->lock); 1962 1963 memset(block->current_data, 0, cache->block_size); 1964 1965 mutex_lock(&cache->lock); 1966 mark_block_unbusy_reading(cache, block); 1967 } 1968 1969 block->is_writing = true; 1970 1971 if (!block->is_dirty) { 1972 cache->num_dirty_blocks++; 1973 block->is_dirty = true; 1974 // mark the block as dirty 1975 } 1976 1977 TB(Get(cache, block)); 1978 return block->current_data; 1979 } 1980 1981 cache_transaction* transaction = block->transaction; 1982 1983 if (transaction != NULL && transaction->id != transactionID) { 1984 // TODO: we have to wait here until the other transaction is done. 1985 // Maybe we should even panic, since we can't prevent any deadlocks. 1986 panic("get_writable_cached_block(): asked to get busy writable block " 1987 "(transaction %" B_PRId32 ")\n", block->transaction->id); 1988 put_cached_block(cache, block); 1989 return NULL; 1990 } 1991 if (transaction == NULL && transactionID != -1) { 1992 // get new transaction 1993 transaction = lookup_transaction(cache, transactionID); 1994 if (transaction == NULL) { 1995 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n", 1996 transactionID); 1997 put_cached_block(cache, block); 1998 return NULL; 1999 } 2000 if (!transaction->open) { 2001 panic("get_writable_cached_block(): transaction already done!\n"); 2002 put_cached_block(cache, block); 2003 return NULL; 2004 } 2005 2006 block->transaction = transaction; 2007 2008 // attach the block to the transaction block list 2009 block->transaction_next = transaction->first_block; 2010 transaction->first_block = block; 2011 transaction->num_blocks++; 2012 } 2013 if (transaction != NULL) 2014 transaction->last_used = system_time(); 2015 2016 bool wasUnchanged = block->original_data == NULL 2017 || block->previous_transaction != NULL; 2018 2019 if (!(allocated && cleared) && block->original_data == NULL) { 2020 // we already have data, so we need to preserve it 2021 block->original_data = cache->Allocate(); 2022 if (block->original_data == NULL) { 2023 TB(Error(cache, blockNumber, "allocate original failed")); 2024 FATAL(("could not allocate original_data\n")); 2025 put_cached_block(cache, block); 2026 return NULL; 2027 } 2028 2029 mark_block_busy_reading(cache, block); 2030 mutex_unlock(&cache->lock); 2031 2032 memcpy(block->original_data, block->current_data, cache->block_size); 2033 2034 mutex_lock(&cache->lock); 2035 mark_block_unbusy_reading(cache, block); 2036 } 2037 if (block->parent_data == block->current_data) { 2038 // remember any previous contents for the parent transaction 2039 block->parent_data = cache->Allocate(); 2040 if (block->parent_data == NULL) { 2041 // TODO: maybe we should just continue the current transaction in 2042 // this case... 2043 TB(Error(cache, blockNumber, "allocate parent failed")); 2044 FATAL(("could not allocate parent\n")); 2045 put_cached_block(cache, block); 2046 return NULL; 2047 } 2048 2049 mark_block_busy_reading(cache, block); 2050 mutex_unlock(&cache->lock); 2051 2052 memcpy(block->parent_data, block->current_data, cache->block_size); 2053 2054 mutex_lock(&cache->lock); 2055 mark_block_unbusy_reading(cache, block); 2056 2057 transaction->sub_num_blocks++; 2058 } else if (transaction != NULL && transaction->has_sub_transaction 2059 && block->parent_data == NULL && wasUnchanged) 2060 transaction->sub_num_blocks++; 2061 2062 if (cleared) { 2063 mark_block_busy_reading(cache, block); 2064 mutex_unlock(&cache->lock); 2065 2066 memset(block->current_data, 0, cache->block_size); 2067 2068 mutex_lock(&cache->lock); 2069 mark_block_unbusy_reading(cache, block); 2070 } 2071 2072 block->is_dirty = true; 2073 TB(Get(cache, block)); 2074 TB2(BlockData(cache, block, "get writable")); 2075 2076 return block->current_data; 2077 } 2078 2079 2080 #if DEBUG_BLOCK_CACHE 2081 2082 2083 static void 2084 dump_block(cached_block* block) 2085 { 2086 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32 2087 " %c%c%c%c%c%c %08lx %08lx\n", 2088 (addr_t)block, block->block_number, 2089 (addr_t)block->current_data, (addr_t)block->original_data, 2090 (addr_t)block->parent_data, block->ref_count, block->LastAccess(), 2091 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-', 2092 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-', 2093 block->unused ? 'U' : '-', block->discard ? 'D' : '-', 2094 (addr_t)block->transaction, 2095 (addr_t)block->previous_transaction); 2096 } 2097 2098 2099 static void 2100 dump_block_long(cached_block* block) 2101 { 2102 kprintf("BLOCK %p\n", block); 2103 kprintf(" current data: %p\n", block->current_data); 2104 kprintf(" original data: %p\n", block->original_data); 2105 kprintf(" parent data: %p\n", block->parent_data); 2106 #if BLOCK_CACHE_DEBUG_CHANGED 2107 kprintf(" compare data: %p\n", block->compare); 2108 #endif 2109 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count); 2110 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess()); 2111 kprintf(" flags: "); 2112 if (block->busy_reading) 2113 kprintf(" busy_reading"); 2114 if (block->busy_writing) 2115 kprintf(" busy_writing"); 2116 if (block->is_writing) 2117 kprintf(" is-writing"); 2118 if (block->is_dirty) 2119 kprintf(" is-dirty"); 2120 if (block->unused) 2121 kprintf(" unused"); 2122 if (block->discard) 2123 kprintf(" discard"); 2124 kprintf("\n"); 2125 if (block->transaction != NULL) { 2126 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction, 2127 block->transaction->id); 2128 if (block->transaction_next != NULL) { 2129 kprintf(" next in transaction: %" B_PRIdOFF "\n", 2130 block->transaction_next->block_number); 2131 } 2132 } 2133 if (block->previous_transaction != NULL) { 2134 kprintf(" previous transaction: %p (%" B_PRId32 ")\n", 2135 block->previous_transaction, 2136 block->previous_transaction->id); 2137 } 2138 2139 set_debug_variable("_current", (addr_t)block->current_data); 2140 set_debug_variable("_original", (addr_t)block->original_data); 2141 set_debug_variable("_parent", (addr_t)block->parent_data); 2142 } 2143 2144 2145 static int 2146 dump_cached_block(int argc, char** argv) 2147 { 2148 if (argc != 2) { 2149 kprintf("usage: %s <block-address>\n", argv[0]); 2150 return 0; 2151 } 2152 2153 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1])); 2154 return 0; 2155 } 2156 2157 2158 static int 2159 dump_cache(int argc, char** argv) 2160 { 2161 bool showTransactions = false; 2162 bool showBlocks = false; 2163 int32 i = 1; 2164 while (argv[i] != NULL && argv[i][0] == '-') { 2165 for (char* arg = &argv[i][1]; arg[0]; arg++) { 2166 switch (arg[0]) { 2167 case 'b': 2168 showBlocks = true; 2169 break; 2170 case 't': 2171 showTransactions = true; 2172 break; 2173 default: 2174 print_debugger_command_usage(argv[0]); 2175 return 0; 2176 } 2177 } 2178 i++; 2179 } 2180 2181 if (i >= argc) { 2182 print_debugger_command_usage(argv[0]); 2183 return 0; 2184 } 2185 2186 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]); 2187 if (cache == NULL) { 2188 kprintf("invalid cache address\n"); 2189 return 0; 2190 } 2191 2192 off_t blockNumber = -1; 2193 if (i + 1 < argc) { 2194 blockNumber = parse_expression(argv[i + 1]); 2195 cached_block* block = cache->hash->Lookup(blockNumber); 2196 if (block != NULL) 2197 dump_block_long(block); 2198 else 2199 kprintf("block %" B_PRIdOFF " not found\n", blockNumber); 2200 return 0; 2201 } 2202 2203 kprintf("BLOCK CACHE: %p\n", cache); 2204 2205 kprintf(" fd: %d\n", cache->fd); 2206 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks); 2207 kprintf(" block_size: %zu\n", cache->block_size); 2208 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id); 2209 kprintf(" buffer_cache: %p\n", cache->buffer_cache); 2210 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count, 2211 cache->busy_reading_waiters ? "has" : "no"); 2212 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count, 2213 cache->busy_writing_waiters ? "has" : "no"); 2214 2215 if (!cache->pending_notifications.IsEmpty()) { 2216 kprintf(" pending notifications:\n"); 2217 2218 NotificationList::Iterator iterator 2219 = cache->pending_notifications.GetIterator(); 2220 while (iterator.HasNext()) { 2221 cache_notification* notification = iterator.Next(); 2222 2223 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification, 2224 notification->events_pending, notification->hook, 2225 notification->data); 2226 } 2227 } 2228 2229 if (showTransactions) { 2230 kprintf(" transactions:\n"); 2231 kprintf("address id state blocks main sub\n"); 2232 2233 TransactionTable::Iterator iterator(cache->transaction_hash); 2234 2235 while (iterator.HasNext()) { 2236 cache_transaction* transaction = iterator.Next(); 2237 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" 2238 B_PRId32 "\n", transaction, transaction->id, 2239 transaction->open ? "open" : "closed", 2240 transaction->num_blocks, transaction->main_num_blocks, 2241 transaction->sub_num_blocks); 2242 } 2243 } 2244 2245 if (showBlocks) { 2246 kprintf(" blocks:\n"); 2247 kprintf("address block no. current original parent refs access " 2248 "flags transact prev. trans\n"); 2249 } 2250 2251 uint32 referenced = 0; 2252 uint32 count = 0; 2253 uint32 dirty = 0; 2254 uint32 discarded = 0; 2255 BlockTable::Iterator iterator(cache->hash); 2256 while (iterator.HasNext()) { 2257 cached_block* block = iterator.Next(); 2258 if (showBlocks) 2259 dump_block(block); 2260 2261 if (block->is_dirty) 2262 dirty++; 2263 if (block->discard) 2264 discarded++; 2265 if (block->ref_count) 2266 referenced++; 2267 count++; 2268 } 2269 2270 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 2271 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 2272 " in unused.\n", 2273 count, dirty, discarded, referenced, cache->busy_reading_count, 2274 cache->unused_block_count); 2275 return 0; 2276 } 2277 2278 2279 static int 2280 dump_transaction(int argc, char** argv) 2281 { 2282 bool showBlocks = false; 2283 int i = 1; 2284 if (argc > 1 && !strcmp(argv[1], "-b")) { 2285 showBlocks = true; 2286 i++; 2287 } 2288 2289 if (argc - i < 1 || argc - i > 2) { 2290 print_debugger_command_usage(argv[0]); 2291 return 0; 2292 } 2293 2294 cache_transaction* transaction = NULL; 2295 2296 if (argc - i == 1) { 2297 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]); 2298 } else { 2299 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]); 2300 int32 id = parse_expression(argv[i + 1]); 2301 transaction = lookup_transaction(cache, id); 2302 if (transaction == NULL) { 2303 kprintf("No transaction with ID %" B_PRId32 " found.\n", id); 2304 return 0; 2305 } 2306 } 2307 2308 kprintf("TRANSACTION %p\n", transaction); 2309 2310 kprintf(" id: %" B_PRId32 "\n", transaction->id); 2311 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks); 2312 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks); 2313 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks); 2314 kprintf(" has sub: %d\n", transaction->has_sub_transaction); 2315 kprintf(" state: %s\n", transaction->open ? "open" : "closed"); 2316 kprintf(" idle: %" B_PRId64 " secs\n", 2317 (system_time() - transaction->last_used) / 1000000); 2318 2319 kprintf(" listeners:\n"); 2320 2321 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 2322 while (iterator.HasNext()) { 2323 cache_listener* listener = iterator.Next(); 2324 2325 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending, 2326 listener->hook, listener->data); 2327 } 2328 2329 if (!showBlocks) 2330 return 0; 2331 2332 kprintf(" blocks:\n"); 2333 kprintf("address block no. current original parent refs access " 2334 "flags transact prev. trans\n"); 2335 2336 cached_block* block = transaction->first_block; 2337 while (block != NULL) { 2338 dump_block(block); 2339 block = block->transaction_next; 2340 } 2341 2342 kprintf("--\n"); 2343 2344 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 2345 while (blockIterator.HasNext()) { 2346 block = blockIterator.Next(); 2347 dump_block(block); 2348 } 2349 2350 return 0; 2351 } 2352 2353 2354 static int 2355 dump_caches(int argc, char** argv) 2356 { 2357 kprintf("Block caches:\n"); 2358 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator(); 2359 while (i.HasNext()) { 2360 block_cache* cache = i.Next(); 2361 if (cache == (block_cache*)&sMarkCache) 2362 continue; 2363 2364 kprintf(" %p\n", cache); 2365 } 2366 2367 return 0; 2368 } 2369 2370 2371 #if BLOCK_CACHE_BLOCK_TRACING >= 2 2372 static int 2373 dump_block_data(int argc, char** argv) 2374 { 2375 using namespace BlockTracing; 2376 2377 // Determine which blocks to show 2378 2379 bool printStackTrace = true; 2380 uint32 which = 0; 2381 int32 i = 1; 2382 while (i < argc && argv[i][0] == '-') { 2383 char* arg = &argv[i][1]; 2384 while (arg[0]) { 2385 switch (arg[0]) { 2386 case 'c': 2387 which |= BlockData::kCurrent; 2388 break; 2389 case 'p': 2390 which |= BlockData::kParent; 2391 break; 2392 case 'o': 2393 which |= BlockData::kOriginal; 2394 break; 2395 2396 default: 2397 kprintf("invalid block specifier (only o/c/p are " 2398 "allowed).\n"); 2399 return 0; 2400 } 2401 arg++; 2402 } 2403 2404 i++; 2405 } 2406 if (which == 0) 2407 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal; 2408 2409 if (i == argc) { 2410 print_debugger_command_usage(argv[0]); 2411 return 0; 2412 } 2413 2414 // Get the range of blocks to print 2415 2416 int64 from = parse_expression(argv[i]); 2417 int64 to = from; 2418 if (argc > i + 1) 2419 to = parse_expression(argv[i + 1]); 2420 if (to < from) 2421 to = from; 2422 2423 uint32 offset = 0; 2424 uint32 size = LONG_MAX; 2425 if (argc > i + 2) 2426 offset = parse_expression(argv[i + 2]); 2427 if (argc > i + 3) 2428 size = parse_expression(argv[i + 3]); 2429 2430 TraceEntryIterator iterator; 2431 iterator.MoveTo(from - 1); 2432 2433 static char sBuffer[1024]; 2434 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID); 2435 2436 while (TraceEntry* entry = iterator.Next()) { 2437 int32 index = iterator.Index(); 2438 if (index > to) 2439 break; 2440 2441 Action* action = dynamic_cast<Action*>(entry); 2442 if (action != NULL) { 2443 out.Clear(); 2444 out.DumpEntry(action); 2445 continue; 2446 } 2447 2448 BlockData* blockData = dynamic_cast<BlockData*>(entry); 2449 if (blockData == NULL) 2450 continue; 2451 2452 out.Clear(); 2453 2454 const char* dump = out.DumpEntry(entry); 2455 int length = strlen(dump); 2456 if (length > 0 && dump[length - 1] == '\n') 2457 length--; 2458 2459 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump); 2460 2461 if (printStackTrace) { 2462 out.Clear(); 2463 entry->DumpStackTrace(out); 2464 if (out.Size() > 0) 2465 kputs(out.Buffer()); 2466 } 2467 2468 blockData->DumpBlocks(which, offset, size); 2469 } 2470 2471 return 0; 2472 } 2473 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 2474 2475 2476 #endif // DEBUG_BLOCK_CACHE 2477 2478 2479 /*! Traverses through the block_cache list, and returns one cache after the 2480 other. The cache returned is automatically locked when you get it, and 2481 unlocked with the next call to this function. Ignores caches that are in 2482 deletion state. 2483 Returns \c NULL when the end of the list is reached. 2484 */ 2485 static block_cache* 2486 get_next_locked_block_cache(block_cache* last) 2487 { 2488 MutexLocker _(sCachesLock); 2489 2490 block_cache* cache; 2491 if (last != NULL) { 2492 mutex_unlock(&last->lock); 2493 2494 cache = sCaches.GetNext((block_cache*)&sMarkCache); 2495 sCaches.Remove((block_cache*)&sMarkCache); 2496 } else 2497 cache = sCaches.Head(); 2498 2499 if (cache != NULL) { 2500 mutex_lock(&cache->lock); 2501 sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache); 2502 } 2503 2504 return cache; 2505 } 2506 2507 2508 /*! Background thread that continuously checks for pending notifications of 2509 all caches. 2510 Every two seconds, it will also write back up to 64 blocks per cache. 2511 */ 2512 static status_t 2513 block_notifier_and_writer(void* /*data*/) 2514 { 2515 const bigtime_t kTimeout = 2000000LL; 2516 bigtime_t timeout = kTimeout; 2517 2518 while (true) { 2519 bigtime_t start = system_time(); 2520 2521 status_t status = acquire_sem_etc(sEventSemaphore, 1, 2522 B_RELATIVE_TIMEOUT, timeout); 2523 if (status == B_OK) { 2524 flush_pending_notifications(); 2525 timeout -= system_time() - start; 2526 continue; 2527 } 2528 2529 // write 64 blocks of each block_cache every two seconds 2530 // TODO: change this once we have an I/O scheduler 2531 timeout = kTimeout; 2532 size_t usedMemory; 2533 object_cache_get_usage(sBlockCache, &usedMemory); 2534 2535 block_cache* cache = NULL; 2536 while ((cache = get_next_locked_block_cache(cache)) != NULL) { 2537 BlockWriter writer(cache, 64); 2538 2539 size_t cacheUsedMemory; 2540 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory); 2541 usedMemory += cacheUsedMemory; 2542 2543 if (cache->num_dirty_blocks) { 2544 // This cache is not using transactions, we'll scan the blocks 2545 // directly 2546 BlockTable::Iterator iterator(cache->hash); 2547 2548 while (iterator.HasNext()) { 2549 cached_block* block = iterator.Next(); 2550 if (block->CanBeWritten() && !writer.Add(block)) 2551 break; 2552 } 2553 2554 } else { 2555 TransactionTable::Iterator iterator(cache->transaction_hash); 2556 2557 while (iterator.HasNext()) { 2558 cache_transaction* transaction = iterator.Next(); 2559 if (transaction->open) { 2560 if (system_time() > transaction->last_used 2561 + kTransactionIdleTime) { 2562 // Transaction is open but idle 2563 notify_transaction_listeners(cache, transaction, 2564 TRANSACTION_IDLE); 2565 } 2566 continue; 2567 } 2568 2569 bool hasLeftOvers; 2570 // we ignore this one 2571 if (!writer.Add(transaction, hasLeftOvers)) 2572 break; 2573 } 2574 } 2575 2576 writer.Write(); 2577 2578 if ((block_cache_used_memory() / B_PAGE_SIZE) 2579 > vm_page_num_pages() / 2) { 2580 // Try to reduce memory usage to half of the available 2581 // RAM at maximum 2582 cache->RemoveUnusedBlocks(1000, 10); 2583 } 2584 } 2585 2586 MutexLocker _(sCachesMemoryUseLock); 2587 sUsedMemory = usedMemory; 2588 } 2589 2590 // never can get here 2591 return B_OK; 2592 } 2593 2594 2595 /*! Notify function for wait_for_notifications(). */ 2596 static void 2597 notify_sync(int32 transactionID, int32 event, void* _cache) 2598 { 2599 block_cache* cache = (block_cache*)_cache; 2600 2601 cache->condition_variable.NotifyOne(); 2602 } 2603 2604 2605 /*! Must be called with the sCachesLock held. */ 2606 static bool 2607 is_valid_cache(block_cache* cache) 2608 { 2609 ASSERT_LOCKED_MUTEX(&sCachesLock); 2610 2611 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 2612 while (iterator.HasNext()) { 2613 if (cache == iterator.Next()) 2614 return true; 2615 } 2616 2617 return false; 2618 } 2619 2620 2621 /*! Waits until all pending notifications are carried out. 2622 Safe to be called from the block writer/notifier thread. 2623 You must not hold the \a cache lock when calling this function. 2624 */ 2625 static void 2626 wait_for_notifications(block_cache* cache) 2627 { 2628 MutexLocker locker(sCachesLock); 2629 2630 if (find_thread(NULL) == sNotifierWriterThread) { 2631 // We're the notifier thread, don't wait, but flush all pending 2632 // notifications directly. 2633 if (is_valid_cache(cache)) 2634 flush_pending_notifications(cache); 2635 return; 2636 } 2637 2638 // add sync notification 2639 cache_notification notification; 2640 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync, 2641 cache); 2642 2643 ConditionVariableEntry entry; 2644 cache->condition_variable.Add(&entry); 2645 2646 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false); 2647 locker.Unlock(); 2648 2649 // wait for notification hook to be called 2650 entry.Wait(); 2651 } 2652 2653 2654 status_t 2655 block_cache_init(void) 2656 { 2657 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block), 2658 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 2659 if (sBlockCache == NULL) 2660 return B_NO_MEMORY; 2661 2662 new (&sCaches) DoublyLinkedList<block_cache>; 2663 // manually call constructor 2664 2665 sEventSemaphore = create_sem(0, "block cache event"); 2666 if (sEventSemaphore < B_OK) 2667 return sEventSemaphore; 2668 2669 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer, 2670 "block notifier/writer", B_LOW_PRIORITY, NULL); 2671 if (sNotifierWriterThread >= B_OK) 2672 resume_thread(sNotifierWriterThread); 2673 2674 #if DEBUG_BLOCK_CACHE 2675 add_debugger_command_etc("block_caches", &dump_caches, 2676 "dumps all block caches", "\n", 0); 2677 add_debugger_command_etc("block_cache", &dump_cache, 2678 "dumps a specific block cache", 2679 "[-bt] <cache-address> [block-number]\n" 2680 " -t lists the transactions\n" 2681 " -b lists all blocks\n", 0); 2682 add_debugger_command("cached_block", &dump_cached_block, 2683 "dumps the specified cached block"); 2684 add_debugger_command_etc("transaction", &dump_transaction, 2685 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n" 2686 "Either use a block cache pointer and an ID or a pointer to the transaction.\n" 2687 " -b lists all blocks that are part of this transaction\n", 0); 2688 # if BLOCK_CACHE_BLOCK_TRACING >= 2 2689 add_debugger_command_etc("block_cache_data", &dump_block_data, 2690 "dumps the data blocks logged for the actions", 2691 "[-cpo] <from> [<to> [<offset> [<size>]]]\n" 2692 "If no data specifier is used, all blocks are shown by default.\n" 2693 " -c the current data is shown, if available.\n" 2694 " -p the parent data is shown, if available.\n" 2695 " -o the original data is shown, if available.\n" 2696 " <from> first index of tracing entries to show.\n" 2697 " <to> if given, the last entry. If not, only <from> is shown.\n" 2698 " <offset> the offset of the block data.\n" 2699 " <from> the size of the block data that is dumped\n", 0); 2700 # endif 2701 #endif // DEBUG_BLOCK_CACHE 2702 2703 return B_OK; 2704 } 2705 2706 2707 size_t 2708 block_cache_used_memory(void) 2709 { 2710 MutexLocker _(sCachesMemoryUseLock); 2711 return sUsedMemory; 2712 } 2713 2714 2715 // #pragma mark - public transaction API 2716 2717 2718 int32 2719 cache_start_transaction(void* _cache) 2720 { 2721 block_cache* cache = (block_cache*)_cache; 2722 TransactionLocker locker(cache); 2723 2724 if (cache->last_transaction && cache->last_transaction->open) { 2725 panic("last transaction (%" B_PRId32 ") still open!\n", 2726 cache->last_transaction->id); 2727 } 2728 2729 cache_transaction* transaction = new(std::nothrow) cache_transaction; 2730 if (transaction == NULL) 2731 return B_NO_MEMORY; 2732 2733 transaction->id = atomic_add(&cache->next_transaction_id, 1); 2734 cache->last_transaction = transaction; 2735 2736 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id)); 2737 T(Action("start", cache, transaction)); 2738 2739 cache->transaction_hash->Insert(transaction); 2740 2741 return transaction->id; 2742 } 2743 2744 2745 status_t 2746 cache_sync_transaction(void* _cache, int32 id) 2747 { 2748 block_cache* cache = (block_cache*)_cache; 2749 bool hadBusy; 2750 2751 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id)); 2752 2753 do { 2754 TransactionLocker locker(cache); 2755 hadBusy = false; 2756 2757 BlockWriter writer(cache); 2758 TransactionTable::Iterator iterator(cache->transaction_hash); 2759 2760 while (iterator.HasNext()) { 2761 // close all earlier transactions which haven't been closed yet 2762 cache_transaction* transaction = iterator.Next(); 2763 2764 if (transaction->busy_writing_count != 0) { 2765 hadBusy = true; 2766 continue; 2767 } 2768 if (transaction->id <= id && !transaction->open) { 2769 // write back all of their remaining dirty blocks 2770 T(Action("sync", cache, transaction)); 2771 2772 bool hasLeftOvers; 2773 writer.Add(transaction, hasLeftOvers); 2774 2775 if (hasLeftOvers) { 2776 // This transaction contains blocks that a previous 2777 // transaction is trying to write back in this write run 2778 hadBusy = true; 2779 } 2780 } 2781 } 2782 2783 status_t status = writer.Write(); 2784 if (status != B_OK) 2785 return status; 2786 } while (hadBusy); 2787 2788 wait_for_notifications(cache); 2789 // make sure that all pending TRANSACTION_WRITTEN notifications 2790 // are handled after we return 2791 return B_OK; 2792 } 2793 2794 2795 status_t 2796 cache_end_transaction(void* _cache, int32 id, 2797 transaction_notification_hook hook, void* data) 2798 { 2799 block_cache* cache = (block_cache*)_cache; 2800 TransactionLocker locker(cache); 2801 2802 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id)); 2803 2804 cache_transaction* transaction = lookup_transaction(cache, id); 2805 if (transaction == NULL) { 2806 panic("cache_end_transaction(): invalid transaction ID\n"); 2807 return B_BAD_VALUE; 2808 } 2809 2810 // Write back all pending transaction blocks 2811 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2812 if (status != B_OK) 2813 return status; 2814 2815 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2816 2817 if (hook != NULL 2818 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, 2819 hook, data) != B_OK) { 2820 return B_NO_MEMORY; 2821 } 2822 2823 T(Action("end", cache, transaction)); 2824 2825 // iterate through all blocks and free the unchanged original contents 2826 2827 cached_block* next; 2828 for (cached_block* block = transaction->first_block; block != NULL; 2829 block = next) { 2830 next = block->transaction_next; 2831 ASSERT(block->previous_transaction == NULL); 2832 2833 if (block->discard) { 2834 // This block has been discarded in the transaction 2835 cache->DiscardBlock(block); 2836 transaction->num_blocks--; 2837 continue; 2838 } 2839 2840 if (block->original_data != NULL) { 2841 cache->Free(block->original_data); 2842 block->original_data = NULL; 2843 } 2844 if (block->parent_data != NULL) { 2845 ASSERT(transaction->has_sub_transaction); 2846 cache->FreeBlockParentData(block); 2847 } 2848 2849 // move the block to the previous transaction list 2850 transaction->blocks.Add(block); 2851 2852 block->previous_transaction = transaction; 2853 block->transaction_next = NULL; 2854 block->transaction = NULL; 2855 } 2856 2857 transaction->open = false; 2858 return B_OK; 2859 } 2860 2861 2862 status_t 2863 cache_abort_transaction(void* _cache, int32 id) 2864 { 2865 block_cache* cache = (block_cache*)_cache; 2866 TransactionLocker locker(cache); 2867 2868 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id)); 2869 2870 cache_transaction* transaction = lookup_transaction(cache, id); 2871 if (transaction == NULL) { 2872 panic("cache_abort_transaction(): invalid transaction ID\n"); 2873 return B_BAD_VALUE; 2874 } 2875 2876 T(Abort(cache, transaction)); 2877 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 2878 2879 // iterate through all blocks and restore their original contents 2880 2881 cached_block* block = transaction->first_block; 2882 cached_block* next; 2883 for (; block != NULL; block = next) { 2884 next = block->transaction_next; 2885 2886 if (block->original_data != NULL) { 2887 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of " 2888 "block %" B_PRIdOFF "\n", transaction->id, block->block_number)); 2889 memcpy(block->current_data, block->original_data, 2890 cache->block_size); 2891 cache->Free(block->original_data); 2892 block->original_data = NULL; 2893 } 2894 if (transaction->has_sub_transaction && block->parent_data != NULL) 2895 cache->FreeBlockParentData(block); 2896 2897 block->transaction_next = NULL; 2898 block->transaction = NULL; 2899 block->discard = false; 2900 if (block->previous_transaction == NULL) 2901 block->is_dirty = false; 2902 } 2903 2904 cache->transaction_hash->Remove(transaction); 2905 delete_transaction(cache, transaction); 2906 return B_OK; 2907 } 2908 2909 2910 /*! Acknowledges the current parent transaction, and starts a new transaction 2911 from its sub transaction. 2912 The new transaction also gets a new transaction ID. 2913 */ 2914 int32 2915 cache_detach_sub_transaction(void* _cache, int32 id, 2916 transaction_notification_hook hook, void* data) 2917 { 2918 block_cache* cache = (block_cache*)_cache; 2919 TransactionLocker locker(cache); 2920 2921 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id)); 2922 2923 cache_transaction* transaction = lookup_transaction(cache, id); 2924 if (transaction == NULL) { 2925 panic("cache_detach_sub_transaction(): invalid transaction ID\n"); 2926 return B_BAD_VALUE; 2927 } 2928 if (!transaction->has_sub_transaction) 2929 return B_BAD_VALUE; 2930 2931 // iterate through all blocks and free the unchanged original contents 2932 2933 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2934 if (status != B_OK) 2935 return status; 2936 2937 // create a new transaction for the sub transaction 2938 cache_transaction* newTransaction = new(std::nothrow) cache_transaction; 2939 if (newTransaction == NULL) 2940 return B_NO_MEMORY; 2941 2942 newTransaction->id = atomic_add(&cache->next_transaction_id, 1); 2943 T(Detach(cache, transaction, newTransaction)); 2944 2945 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2946 2947 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook, 2948 data) != B_OK) { 2949 delete newTransaction; 2950 return B_NO_MEMORY; 2951 } 2952 2953 cached_block* last = NULL; 2954 cached_block* next; 2955 for (cached_block* block = transaction->first_block; block != NULL; 2956 block = next) { 2957 next = block->transaction_next; 2958 ASSERT(block->previous_transaction == NULL); 2959 2960 if (block->discard) { 2961 cache->DiscardBlock(block); 2962 transaction->main_num_blocks--; 2963 continue; 2964 } 2965 2966 if (block->parent_data != NULL) { 2967 // The block changed in the parent - free the original data, since 2968 // they will be replaced by what is in current. 2969 ASSERT(block->original_data != NULL); 2970 cache->Free(block->original_data); 2971 2972 if (block->parent_data != block->current_data) { 2973 // The block had been changed in both transactions 2974 block->original_data = block->parent_data; 2975 } else { 2976 // The block has only been changed in the parent 2977 block->original_data = NULL; 2978 } 2979 2980 // move the block to the previous transaction list 2981 transaction->blocks.Add(block); 2982 block->previous_transaction = transaction; 2983 block->parent_data = NULL; 2984 } 2985 2986 if (block->original_data != NULL) { 2987 // This block had been changed in the current sub transaction, 2988 // we need to move this block over to the new transaction. 2989 ASSERT(block->parent_data == NULL); 2990 2991 if (last == NULL) 2992 newTransaction->first_block = block; 2993 else 2994 last->transaction_next = block; 2995 2996 block->transaction = newTransaction; 2997 last = block; 2998 } else 2999 block->transaction = NULL; 3000 3001 block->transaction_next = NULL; 3002 } 3003 3004 newTransaction->num_blocks = transaction->sub_num_blocks; 3005 3006 transaction->open = false; 3007 transaction->has_sub_transaction = false; 3008 transaction->num_blocks = transaction->main_num_blocks; 3009 transaction->sub_num_blocks = 0; 3010 3011 cache->transaction_hash->Insert(newTransaction); 3012 cache->last_transaction = newTransaction; 3013 3014 return newTransaction->id; 3015 } 3016 3017 3018 status_t 3019 cache_abort_sub_transaction(void* _cache, int32 id) 3020 { 3021 block_cache* cache = (block_cache*)_cache; 3022 TransactionLocker locker(cache); 3023 3024 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id)); 3025 3026 cache_transaction* transaction = lookup_transaction(cache, id); 3027 if (transaction == NULL) { 3028 panic("cache_abort_sub_transaction(): invalid transaction ID\n"); 3029 return B_BAD_VALUE; 3030 } 3031 if (!transaction->has_sub_transaction) 3032 return B_BAD_VALUE; 3033 3034 T(Abort(cache, transaction)); 3035 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3036 3037 // revert all changes back to the version of the parent 3038 3039 cached_block* block = transaction->first_block; 3040 cached_block* last = NULL; 3041 cached_block* next; 3042 for (; block != NULL; block = next) { 3043 next = block->transaction_next; 3044 3045 if (block->parent_data == NULL) { 3046 // The parent transaction didn't change the block, but the sub 3047 // transaction did - we need to revert to the original data. 3048 // The block is no longer part of the transaction 3049 if (block->original_data != NULL) { 3050 // The block might not have original data if was empty 3051 memcpy(block->current_data, block->original_data, 3052 cache->block_size); 3053 } 3054 3055 if (last != NULL) 3056 last->transaction_next = next; 3057 else 3058 transaction->first_block = next; 3059 3060 block->transaction_next = NULL; 3061 block->transaction = NULL; 3062 transaction->num_blocks--; 3063 3064 if (block->previous_transaction == NULL) { 3065 cache->Free(block->original_data); 3066 block->original_data = NULL; 3067 block->is_dirty = false; 3068 3069 if (block->ref_count == 0) { 3070 // Move the block into the unused list if possible 3071 block->unused = true; 3072 cache->unused_blocks.Add(block); 3073 cache->unused_block_count++; 3074 } 3075 } 3076 } else { 3077 if (block->parent_data != block->current_data) { 3078 // The block has been changed and must be restored - the block 3079 // is still dirty and part of the transaction 3080 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): " 3081 "restored contents of block %" B_PRIdOFF "\n", 3082 transaction->id, block->block_number)); 3083 memcpy(block->current_data, block->parent_data, 3084 cache->block_size); 3085 cache->Free(block->parent_data); 3086 // The block stays dirty 3087 } 3088 block->parent_data = NULL; 3089 last = block; 3090 } 3091 3092 block->discard = false; 3093 } 3094 3095 // all subsequent changes will go into the main transaction 3096 transaction->has_sub_transaction = false; 3097 transaction->sub_num_blocks = 0; 3098 3099 return B_OK; 3100 } 3101 3102 3103 status_t 3104 cache_start_sub_transaction(void* _cache, int32 id) 3105 { 3106 block_cache* cache = (block_cache*)_cache; 3107 TransactionLocker locker(cache); 3108 3109 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id)); 3110 3111 cache_transaction* transaction = lookup_transaction(cache, id); 3112 if (transaction == NULL) { 3113 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n", 3114 id); 3115 return B_BAD_VALUE; 3116 } 3117 3118 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3119 3120 // move all changed blocks up to the parent 3121 3122 cached_block* block = transaction->first_block; 3123 cached_block* next; 3124 for (; block != NULL; block = next) { 3125 next = block->transaction_next; 3126 3127 if (block->parent_data != NULL) { 3128 // There already is an older sub transaction - we acknowledge 3129 // its changes and move its blocks up to the parent 3130 ASSERT(transaction->has_sub_transaction); 3131 cache->FreeBlockParentData(block); 3132 } 3133 if (block->discard) { 3134 // This block has been discarded in the parent transaction. 3135 // Just throw away any changes made in this transaction, so that 3136 // it can still be reverted to its original contents if needed 3137 ASSERT(block->previous_transaction == NULL); 3138 if (block->original_data != NULL) { 3139 memcpy(block->current_data, block->original_data, 3140 cache->block_size); 3141 block->original_data = NULL; 3142 } 3143 continue; 3144 } 3145 3146 // we "allocate" the parent data lazily, that means, we don't copy 3147 // the data (and allocate memory for it) until we need to 3148 block->parent_data = block->current_data; 3149 } 3150 3151 // all subsequent changes will go into the sub transaction 3152 transaction->has_sub_transaction = true; 3153 transaction->main_num_blocks = transaction->num_blocks; 3154 transaction->sub_num_blocks = 0; 3155 T(Action("start-sub", cache, transaction)); 3156 3157 return B_OK; 3158 } 3159 3160 3161 /*! Adds a transaction listener that gets notified when the transaction 3162 is ended, aborted, written, or idle as specified by \a events. 3163 The listener gets automatically removed when the transaction ends. 3164 */ 3165 status_t 3166 cache_add_transaction_listener(void* _cache, int32 id, int32 events, 3167 transaction_notification_hook hook, void* data) 3168 { 3169 block_cache* cache = (block_cache*)_cache; 3170 TransactionLocker locker(cache); 3171 3172 cache_transaction* transaction = lookup_transaction(cache, id); 3173 if (transaction == NULL) 3174 return B_BAD_VALUE; 3175 3176 return add_transaction_listener(cache, transaction, events, hook, data); 3177 } 3178 3179 3180 status_t 3181 cache_remove_transaction_listener(void* _cache, int32 id, 3182 transaction_notification_hook hookFunction, void* data) 3183 { 3184 block_cache* cache = (block_cache*)_cache; 3185 TransactionLocker locker(cache); 3186 3187 cache_transaction* transaction = lookup_transaction(cache, id); 3188 if (transaction == NULL) 3189 return B_BAD_VALUE; 3190 3191 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 3192 while (iterator.HasNext()) { 3193 cache_listener* listener = iterator.Next(); 3194 if (listener->data == data && listener->hook == hookFunction) { 3195 iterator.Remove(); 3196 3197 if (listener->events_pending != 0) { 3198 MutexLocker _(sNotificationsLock); 3199 if (listener->events_pending != 0) 3200 cache->pending_notifications.Remove(listener); 3201 } 3202 delete listener; 3203 return B_OK; 3204 } 3205 } 3206 3207 return B_ENTRY_NOT_FOUND; 3208 } 3209 3210 3211 status_t 3212 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly, 3213 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData) 3214 { 3215 cached_block* block = (cached_block*)*_cookie; 3216 block_cache* cache = (block_cache*)_cache; 3217 TransactionLocker locker(cache); 3218 3219 cache_transaction* transaction = lookup_transaction(cache, id); 3220 if (transaction == NULL || !transaction->open) 3221 return B_BAD_VALUE; 3222 3223 if (block == NULL) 3224 block = transaction->first_block; 3225 else 3226 block = block->transaction_next; 3227 3228 if (transaction->has_sub_transaction) { 3229 if (mainOnly) { 3230 // find next block that the parent changed 3231 while (block != NULL && block->parent_data == NULL) 3232 block = block->transaction_next; 3233 } else { 3234 // find next non-discarded block 3235 while (block != NULL && block->discard) 3236 block = block->transaction_next; 3237 } 3238 } 3239 3240 if (block == NULL) 3241 return B_ENTRY_NOT_FOUND; 3242 3243 if (_blockNumber) 3244 *_blockNumber = block->block_number; 3245 if (_data) 3246 *_data = mainOnly ? block->parent_data : block->current_data; 3247 if (_unchangedData) 3248 *_unchangedData = block->original_data; 3249 3250 *_cookie = (addr_t)block; 3251 return B_OK; 3252 } 3253 3254 3255 int32 3256 cache_blocks_in_transaction(void* _cache, int32 id) 3257 { 3258 block_cache* cache = (block_cache*)_cache; 3259 TransactionLocker locker(cache); 3260 3261 cache_transaction* transaction = lookup_transaction(cache, id); 3262 if (transaction == NULL) 3263 return B_BAD_VALUE; 3264 3265 return transaction->num_blocks; 3266 } 3267 3268 3269 /*! Returns the number of blocks that are part of the main transaction. If this 3270 transaction does not have a sub transaction yet, this is the same value as 3271 cache_blocks_in_transaction() would return. 3272 */ 3273 int32 3274 cache_blocks_in_main_transaction(void* _cache, int32 id) 3275 { 3276 block_cache* cache = (block_cache*)_cache; 3277 TransactionLocker locker(cache); 3278 3279 cache_transaction* transaction = lookup_transaction(cache, id); 3280 if (transaction == NULL) 3281 return B_BAD_VALUE; 3282 3283 if (transaction->has_sub_transaction) 3284 return transaction->main_num_blocks; 3285 3286 return transaction->num_blocks; 3287 } 3288 3289 3290 int32 3291 cache_blocks_in_sub_transaction(void* _cache, int32 id) 3292 { 3293 block_cache* cache = (block_cache*)_cache; 3294 TransactionLocker locker(cache); 3295 3296 cache_transaction* transaction = lookup_transaction(cache, id); 3297 if (transaction == NULL) 3298 return B_BAD_VALUE; 3299 3300 return transaction->sub_num_blocks; 3301 } 3302 3303 3304 // #pragma mark - public block cache API 3305 3306 3307 void 3308 block_cache_delete(void* _cache, bool allowWrites) 3309 { 3310 block_cache* cache = (block_cache*)_cache; 3311 3312 if (allowWrites) 3313 block_cache_sync(cache); 3314 3315 mutex_lock(&sCachesLock); 3316 sCaches.Remove(cache); 3317 mutex_unlock(&sCachesLock); 3318 3319 mutex_lock(&cache->lock); 3320 3321 // wait for all blocks to become unbusy 3322 wait_for_busy_reading_blocks(cache); 3323 wait_for_busy_writing_blocks(cache); 3324 3325 // free all blocks 3326 3327 cached_block* block = cache->hash->Clear(true); 3328 while (block != NULL) { 3329 cached_block* next = block->next; 3330 cache->FreeBlock(block); 3331 block = next; 3332 } 3333 3334 // free all transactions (they will all be aborted) 3335 3336 cache_transaction* transaction = cache->transaction_hash->Clear(true); 3337 while (transaction != NULL) { 3338 cache_transaction* next = transaction->next; 3339 delete transaction; 3340 transaction = next; 3341 } 3342 3343 delete cache; 3344 } 3345 3346 3347 void* 3348 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly) 3349 { 3350 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize, 3351 readOnly); 3352 if (cache == NULL) 3353 return NULL; 3354 3355 if (cache->Init() != B_OK) { 3356 delete cache; 3357 return NULL; 3358 } 3359 3360 MutexLocker _(sCachesLock); 3361 sCaches.Add(cache); 3362 3363 return cache; 3364 } 3365 3366 3367 status_t 3368 block_cache_sync(void* _cache) 3369 { 3370 block_cache* cache = (block_cache*)_cache; 3371 3372 // We will sync all dirty blocks to disk that have a completed 3373 // transaction or no transaction only 3374 3375 MutexLocker locker(&cache->lock); 3376 3377 BlockWriter writer(cache); 3378 BlockTable::Iterator iterator(cache->hash); 3379 3380 while (iterator.HasNext()) { 3381 cached_block* block = iterator.Next(); 3382 if (block->CanBeWritten()) 3383 writer.Add(block); 3384 } 3385 3386 status_t status = writer.Write(); 3387 3388 locker.Unlock(); 3389 3390 wait_for_notifications(cache); 3391 // make sure that all pending TRANSACTION_WRITTEN notifications 3392 // are handled after we return 3393 return status; 3394 } 3395 3396 3397 status_t 3398 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks) 3399 { 3400 block_cache* cache = (block_cache*)_cache; 3401 3402 // We will sync all dirty blocks to disk that have a completed 3403 // transaction or no transaction only 3404 3405 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 3406 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF 3407 " (max %" B_PRIdOFF ")", 3408 blockNumber, cache->max_blocks - 1); 3409 return B_BAD_VALUE; 3410 } 3411 3412 MutexLocker locker(&cache->lock); 3413 BlockWriter writer(cache); 3414 3415 for (; numBlocks > 0; numBlocks--, blockNumber++) { 3416 cached_block* block = cache->hash->Lookup(blockNumber); 3417 if (block == NULL) 3418 continue; 3419 3420 if (block->CanBeWritten()) 3421 writer.Add(block); 3422 } 3423 3424 status_t status = writer.Write(); 3425 3426 locker.Unlock(); 3427 3428 wait_for_notifications(cache); 3429 // make sure that all pending TRANSACTION_WRITTEN notifications 3430 // are handled after we return 3431 return status; 3432 } 3433 3434 3435 /*! Discards a block from the current transaction or from the cache. 3436 You have to call this function when you no longer use a block, ie. when it 3437 might be reclaimed by the file cache in order to make sure they won't 3438 interfere. 3439 */ 3440 void 3441 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks) 3442 { 3443 // TODO: this could be a nice place to issue the ATA trim command 3444 block_cache* cache = (block_cache*)_cache; 3445 TransactionLocker locker(cache); 3446 3447 BlockWriter writer(cache); 3448 3449 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3450 cached_block* block = cache->hash->Lookup(blockNumber); 3451 if (block != NULL && block->previous_transaction != NULL) 3452 writer.Add(block); 3453 } 3454 3455 writer.Write(); 3456 // TODO: this can fail, too! 3457 3458 blockNumber -= numBlocks; 3459 // reset blockNumber to its original value 3460 3461 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3462 cached_block* block = cache->hash->Lookup(blockNumber); 3463 if (block == NULL) 3464 continue; 3465 3466 ASSERT(block->previous_transaction == NULL); 3467 3468 if (block->unused) { 3469 cache->unused_blocks.Remove(block); 3470 cache->unused_block_count--; 3471 cache->RemoveBlock(block); 3472 } else { 3473 if (block->transaction != NULL && block->parent_data != NULL 3474 && block->parent_data != block->current_data) { 3475 panic("Discarded block %" B_PRIdOFF " has already been changed in this " 3476 "transaction!", blockNumber); 3477 } 3478 3479 // mark it as discarded (in the current transaction only, if any) 3480 block->discard = true; 3481 } 3482 } 3483 } 3484 3485 3486 status_t 3487 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction) 3488 { 3489 block_cache* cache = (block_cache*)_cache; 3490 MutexLocker locker(&cache->lock); 3491 3492 if (cache->read_only) { 3493 panic("tried to make block writable on a read-only cache!"); 3494 return B_ERROR; 3495 } 3496 3497 // TODO: this can be done better! 3498 void* block = get_writable_cached_block(cache, blockNumber, 3499 blockNumber, 1, transaction, false); 3500 if (block != NULL) { 3501 put_cached_block((block_cache*)_cache, blockNumber); 3502 return B_OK; 3503 } 3504 3505 return B_ERROR; 3506 } 3507 3508 3509 void* 3510 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base, 3511 off_t length, int32 transaction) 3512 { 3513 block_cache* cache = (block_cache*)_cache; 3514 MutexLocker locker(&cache->lock); 3515 3516 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3517 blockNumber, transaction)); 3518 if (cache->read_only) 3519 panic("tried to get writable block on a read-only cache!"); 3520 3521 return get_writable_cached_block(cache, blockNumber, base, length, 3522 transaction, false); 3523 } 3524 3525 3526 void* 3527 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction) 3528 { 3529 return block_cache_get_writable_etc(_cache, blockNumber, 3530 blockNumber, 1, transaction); 3531 } 3532 3533 3534 void* 3535 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction) 3536 { 3537 block_cache* cache = (block_cache*)_cache; 3538 MutexLocker locker(&cache->lock); 3539 3540 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3541 blockNumber, transaction)); 3542 if (cache->read_only) 3543 panic("tried to get empty writable block on a read-only cache!"); 3544 3545 return get_writable_cached_block((block_cache*)_cache, blockNumber, 3546 blockNumber, 1, transaction, true); 3547 } 3548 3549 3550 const void* 3551 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length) 3552 { 3553 block_cache* cache = (block_cache*)_cache; 3554 MutexLocker locker(&cache->lock); 3555 bool allocated; 3556 3557 cached_block* block = get_cached_block(cache, blockNumber, &allocated); 3558 if (block == NULL) 3559 return NULL; 3560 3561 #if BLOCK_CACHE_DEBUG_CHANGED 3562 if (block->compare == NULL) 3563 block->compare = cache->Allocate(); 3564 if (block->compare != NULL) 3565 memcpy(block->compare, block->current_data, cache->block_size); 3566 #endif 3567 TB(Get(cache, block)); 3568 3569 return block->current_data; 3570 } 3571 3572 3573 const void* 3574 block_cache_get(void* _cache, off_t blockNumber) 3575 { 3576 return block_cache_get_etc(_cache, blockNumber, blockNumber, 1); 3577 } 3578 3579 3580 /*! Changes the internal status of a writable block to \a dirty. This can be 3581 helpful in case you realize you don't need to change that block anymore 3582 for whatever reason. 3583 3584 Note, you must only use this function on blocks that were acquired 3585 writable! 3586 */ 3587 status_t 3588 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty, 3589 int32 transaction) 3590 { 3591 block_cache* cache = (block_cache*)_cache; 3592 MutexLocker locker(&cache->lock); 3593 3594 cached_block* block = cache->hash->Lookup(blockNumber); 3595 if (block == NULL) 3596 return B_BAD_VALUE; 3597 if (block->is_dirty == dirty) { 3598 // there is nothing to do for us 3599 return B_OK; 3600 } 3601 3602 // TODO: not yet implemented 3603 if (dirty) 3604 panic("block_cache_set_dirty(): not yet implemented that way!\n"); 3605 3606 return B_OK; 3607 } 3608 3609 3610 void 3611 block_cache_put(void* _cache, off_t blockNumber) 3612 { 3613 block_cache* cache = (block_cache*)_cache; 3614 MutexLocker locker(&cache->lock); 3615 3616 put_cached_block(cache, blockNumber); 3617 } 3618 3619