1 /* 2 * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <block_cache.h> 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <KernelExport.h> 15 #include <fs_cache.h> 16 17 #include <condition_variable.h> 18 #include <lock.h> 19 #include <low_resource_manager.h> 20 #include <slab/Slab.h> 21 #include <tracing.h> 22 #include <util/kernel_cpp.h> 23 #include <util/DoublyLinkedList.h> 24 #include <util/AutoLock.h> 25 #include <vm/vm_page.h> 26 27 #include "kernel_debug_config.h" 28 29 30 // TODO: this is a naive but growing implementation to test the API: 31 // block reading/writing is not at all optimized for speed, it will 32 // just read and write single blocks. 33 // TODO: the retrieval/copy of the original data could be delayed until the 34 // new data must be written, ie. in low memory situations. 35 36 //#define TRACE_BLOCK_CACHE 37 #ifdef TRACE_BLOCK_CACHE 38 # define TRACE(x) dprintf x 39 #else 40 # define TRACE(x) ; 41 #endif 42 43 #define TRACE_ALWAYS(x) dprintf x 44 45 // This macro is used for fatal situations that are acceptable in a running 46 // system, like out of memory situations - should only panic for debugging. 47 #define FATAL(x) panic x 48 49 static const bigtime_t kTransactionIdleTime = 2000000LL; 50 // a transaction is considered idle after 2 seconds of inactivity 51 52 53 struct cache_transaction; 54 struct cached_block; 55 struct block_cache; 56 typedef DoublyLinkedListLink<cached_block> block_link; 57 58 struct cached_block { 59 cached_block* next; // next in hash 60 cached_block* transaction_next; 61 block_link link; 62 off_t block_number; 63 void* current_data; 64 // The data that is seen by everyone using the API; this one is always 65 // present. 66 void* original_data; 67 // When in a transaction, this contains the original data from before 68 // the transaction. 69 void* parent_data; 70 // This is a lazily alloced buffer that represents the contents of the 71 // block in the parent transaction. It may point to current_data if the 72 // contents have been changed only in the parent transaction, or, if the 73 // block has been changed in the current sub transaction already, to a 74 // new block containing the contents changed in the parent transaction. 75 // If this is NULL, the block has not been changed in the parent 76 // transaction at all. 77 #if BLOCK_CACHE_DEBUG_CHANGED 78 void* compare; 79 #endif 80 int32 ref_count; 81 int32 last_accessed; 82 bool busy_reading : 1; 83 bool busy_writing : 1; 84 bool is_writing : 1; 85 // Block has been checked out for writing without transactions, and 86 // cannot be written back if set 87 bool is_dirty : 1; 88 bool unused : 1; 89 bool discard : 1; 90 bool busy_reading_waiters : 1; 91 bool busy_writing_waiters : 1; 92 cache_transaction* transaction; 93 // This is the current active transaction, if any, the block is 94 // currently in (meaning was changed as a part of it). 95 cache_transaction* previous_transaction; 96 // This is set to the last transaction that was ended containing this 97 // block. In this case, the block has not yet written back yet, and 98 // the changed data is either in current_data, or original_data -- the 99 // latter if the block is already being part of another transaction. 100 // There can only be one previous transaction, so when the active 101 // transaction ends, the changes of the previous transaction have to 102 // be written back before that transaction becomes the next previous 103 // transaction. 104 105 bool CanBeWritten() const; 106 int32 LastAccess() const 107 { return system_time() / 1000000L - last_accessed; } 108 }; 109 110 typedef DoublyLinkedList<cached_block, 111 DoublyLinkedListMemberGetLink<cached_block, 112 &cached_block::link> > block_list; 113 114 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> { 115 int32 transaction_id; 116 int32 events_pending; 117 int32 events; 118 transaction_notification_hook hook; 119 void* data; 120 bool delete_after_event; 121 }; 122 123 typedef DoublyLinkedList<cache_notification> NotificationList; 124 125 struct BlockHash { 126 typedef off_t KeyType; 127 typedef cached_block ValueType; 128 129 size_t HashKey(KeyType key) const 130 { 131 return key; 132 } 133 134 size_t Hash(ValueType* block) const 135 { 136 return block->block_number; 137 } 138 139 bool Compare(KeyType key, ValueType* block) const 140 { 141 return block->block_number == key; 142 } 143 144 ValueType*& GetLink(ValueType* value) const 145 { 146 return value->next; 147 } 148 }; 149 150 typedef BOpenHashTable<BlockHash> BlockTable; 151 152 153 struct TransactionHash { 154 typedef int32 KeyType; 155 typedef cache_transaction ValueType; 156 157 size_t HashKey(KeyType key) const 158 { 159 return key; 160 } 161 162 size_t Hash(ValueType* transaction) const; 163 bool Compare(KeyType key, ValueType* transaction) const; 164 ValueType*& GetLink(ValueType* value) const; 165 }; 166 167 typedef BOpenHashTable<TransactionHash> TransactionTable; 168 169 170 struct block_cache : DoublyLinkedListLinkImpl<block_cache> { 171 BlockTable* hash; 172 mutex lock; 173 int fd; 174 off_t max_blocks; 175 size_t block_size; 176 int32 next_transaction_id; 177 cache_transaction* last_transaction; 178 TransactionTable* transaction_hash; 179 180 object_cache* buffer_cache; 181 block_list unused_blocks; 182 uint32 unused_block_count; 183 184 ConditionVariable busy_reading_condition; 185 uint32 busy_reading_count; 186 bool busy_reading_waiters; 187 188 ConditionVariable busy_writing_condition; 189 uint32 busy_writing_count; 190 bool busy_writing_waiters; 191 192 uint32 num_dirty_blocks; 193 bool read_only; 194 195 NotificationList pending_notifications; 196 ConditionVariable condition_variable; 197 198 block_cache(int fd, off_t numBlocks, size_t blockSize, 199 bool readOnly); 200 ~block_cache(); 201 202 status_t Init(); 203 204 void Free(void* buffer); 205 void* Allocate(); 206 void FreeBlock(cached_block* block); 207 cached_block* NewBlock(off_t blockNumber); 208 void FreeBlockParentData(cached_block* block); 209 210 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0); 211 void RemoveBlock(cached_block* block); 212 void DiscardBlock(cached_block* block); 213 214 private: 215 static void _LowMemoryHandler(void* data, uint32 resources, 216 int32 level); 217 cached_block* _GetUnusedBlock(); 218 }; 219 220 struct cache_listener; 221 typedef DoublyLinkedListLink<cache_listener> listener_link; 222 223 struct cache_listener : cache_notification { 224 listener_link link; 225 }; 226 227 typedef DoublyLinkedList<cache_listener, 228 DoublyLinkedListMemberGetLink<cache_listener, 229 &cache_listener::link> > ListenerList; 230 231 232 struct cache_transaction { 233 cache_transaction(); 234 235 cache_transaction* next; 236 int32 id; 237 int32 num_blocks; 238 int32 main_num_blocks; 239 int32 sub_num_blocks; 240 cached_block* first_block; 241 block_list blocks; 242 ListenerList listeners; 243 bool open; 244 bool has_sub_transaction; 245 bigtime_t last_used; 246 int32 busy_writing_count; 247 }; 248 249 250 class BlockWriter { 251 public: 252 BlockWriter(block_cache* cache, 253 size_t max = SIZE_MAX); 254 ~BlockWriter(); 255 256 bool Add(cached_block* block, 257 cache_transaction* transaction = NULL); 258 bool Add(cache_transaction* transaction, 259 bool& hasLeftOvers); 260 261 status_t Write(cache_transaction* transaction = NULL, 262 bool canUnlock = true); 263 264 bool DeletedTransaction() const 265 { return fDeletedTransaction; } 266 267 static status_t WriteBlock(block_cache* cache, 268 cached_block* block); 269 270 private: 271 void* _Data(cached_block* block) const; 272 status_t _WriteBlock(cached_block* block); 273 void _BlockDone(cached_block* block, 274 cache_transaction* transaction); 275 void _UnmarkWriting(cached_block* block); 276 277 static int _CompareBlocks(const void* _blockA, 278 const void* _blockB); 279 280 private: 281 static const size_t kBufferSize = 64; 282 283 block_cache* fCache; 284 cached_block* fBuffer[kBufferSize]; 285 cached_block** fBlocks; 286 size_t fCount; 287 size_t fTotal; 288 size_t fCapacity; 289 size_t fMax; 290 status_t fStatus; 291 bool fDeletedTransaction; 292 }; 293 294 295 class TransactionLocking { 296 public: 297 inline bool Lock(block_cache* cache) 298 { 299 mutex_lock(&cache->lock); 300 301 while (cache->busy_writing_count != 0) { 302 // wait for all blocks to be written 303 ConditionVariableEntry entry; 304 cache->busy_writing_condition.Add(&entry); 305 cache->busy_writing_waiters = true; 306 307 mutex_unlock(&cache->lock); 308 309 entry.Wait(); 310 311 mutex_lock(&cache->lock); 312 } 313 314 return true; 315 } 316 317 inline void Unlock(block_cache* cache) 318 { 319 mutex_unlock(&cache->lock); 320 } 321 }; 322 323 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker; 324 325 326 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 327 namespace BlockTracing { 328 329 class Action : public AbstractTraceEntry { 330 public: 331 Action(block_cache* cache, cached_block* block) 332 : 333 fCache(cache), 334 fBlockNumber(block->block_number), 335 fIsDirty(block->is_dirty), 336 fHasOriginal(block->original_data != NULL), 337 fHasParent(block->parent_data != NULL), 338 fTransactionID(-1), 339 fPreviousID(-1) 340 { 341 if (block->transaction != NULL) 342 fTransactionID = block->transaction->id; 343 if (block->previous_transaction != NULL) 344 fPreviousID = block->previous_transaction->id; 345 } 346 347 virtual void AddDump(TraceOutput& out) 348 { 349 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32 350 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber, 351 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-', 352 fHasParent ? 'p' : '-', fTransactionID, fPreviousID); 353 } 354 355 virtual const char* _Action() const = 0; 356 357 private: 358 block_cache* fCache; 359 uint64 fBlockNumber; 360 bool fIsDirty; 361 bool fHasOriginal; 362 bool fHasParent; 363 int32 fTransactionID; 364 int32 fPreviousID; 365 }; 366 367 class Get : public Action { 368 public: 369 Get(block_cache* cache, cached_block* block) 370 : 371 Action(cache, block) 372 { 373 Initialized(); 374 } 375 376 virtual const char* _Action() const { return "get"; } 377 }; 378 379 class Put : public Action { 380 public: 381 Put(block_cache* cache, cached_block* block) 382 : 383 Action(cache, block) 384 { 385 Initialized(); 386 } 387 388 virtual const char* _Action() const { return "put"; } 389 }; 390 391 class Read : public Action { 392 public: 393 Read(block_cache* cache, cached_block* block) 394 : 395 Action(cache, block) 396 { 397 Initialized(); 398 } 399 400 virtual const char* _Action() const { return "read"; } 401 }; 402 403 class Write : public Action { 404 public: 405 Write(block_cache* cache, cached_block* block) 406 : 407 Action(cache, block) 408 { 409 Initialized(); 410 } 411 412 virtual const char* _Action() const { return "write"; } 413 }; 414 415 class Flush : public Action { 416 public: 417 Flush(block_cache* cache, cached_block* block, bool getUnused = false) 418 : 419 Action(cache, block), 420 fGetUnused(getUnused) 421 { 422 Initialized(); 423 } 424 425 virtual const char* _Action() const 426 { return fGetUnused ? "get-unused" : "flush"; } 427 428 private: 429 bool fGetUnused; 430 }; 431 432 class Error : public AbstractTraceEntry { 433 public: 434 Error(block_cache* cache, uint64 blockNumber, const char* message, 435 status_t status = B_OK) 436 : 437 fCache(cache), 438 fBlockNumber(blockNumber), 439 fMessage(message), 440 fStatus(status) 441 { 442 Initialized(); 443 } 444 445 virtual void AddDump(TraceOutput& out) 446 { 447 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s", 448 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "", 449 fStatus != B_OK ? strerror(fStatus) : ""); 450 } 451 452 private: 453 block_cache* fCache; 454 uint64 fBlockNumber; 455 const char* fMessage; 456 status_t fStatus; 457 }; 458 459 #if BLOCK_CACHE_BLOCK_TRACING >= 2 460 class BlockData : public AbstractTraceEntry { 461 public: 462 enum { 463 kCurrent = 0x01, 464 kParent = 0x02, 465 kOriginal = 0x04 466 }; 467 468 BlockData(block_cache* cache, cached_block* block, const char* message) 469 : 470 fCache(cache), 471 fSize(cache->block_size), 472 fBlockNumber(block->block_number), 473 fMessage(message) 474 { 475 _Allocate(fCurrent, block->current_data); 476 _Allocate(fParent, block->parent_data); 477 _Allocate(fOriginal, block->original_data); 478 479 #if KTRACE_PRINTF_STACK_TRACE 480 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 481 false); 482 #endif 483 484 Initialized(); 485 } 486 487 virtual void AddDump(TraceOutput& out) 488 { 489 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s", 490 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-', 491 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-', 492 fMessage); 493 } 494 495 #if KTRACE_PRINTF_STACK_TRACE 496 virtual void DumpStackTrace(TraceOutput& out) 497 { 498 out.PrintStackTrace(fStackTrace); 499 } 500 #endif 501 502 void DumpBlocks(uint32 which, uint32 offset, uint32 size) 503 { 504 if ((which & kCurrent) != 0) 505 DumpBlock(kCurrent, offset, size); 506 if ((which & kParent) != 0) 507 DumpBlock(kParent, offset, size); 508 if ((which & kOriginal) != 0) 509 DumpBlock(kOriginal, offset, size); 510 } 511 512 void DumpBlock(uint32 which, uint32 offset, uint32 size) 513 { 514 if (offset > fSize) { 515 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize); 516 return; 517 } 518 if (offset + size > fSize) 519 size = fSize - offset; 520 521 const char* label; 522 uint8* data; 523 524 if ((which & kCurrent) != 0) { 525 label = "current"; 526 data = fCurrent; 527 } else if ((which & kParent) != 0) { 528 label = "parent"; 529 data = fParent; 530 } else if ((which & kOriginal) != 0) { 531 label = "original"; 532 data = fOriginal; 533 } else 534 return; 535 536 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size); 537 538 static const uint32 kBlockSize = 16; 539 data += offset; 540 541 for (uint32 i = 0; i < size;) { 542 int start = i; 543 544 kprintf(" %04" B_PRIx32 " ", i); 545 for (; i < start + kBlockSize; i++) { 546 if (!(i % 4)) 547 kprintf(" "); 548 549 if (i >= size) 550 kprintf(" "); 551 else 552 kprintf("%02x", data[i]); 553 } 554 555 kprintf("\n"); 556 } 557 } 558 559 private: 560 void _Allocate(uint8*& target, void* source) 561 { 562 if (source == NULL) { 563 target = NULL; 564 return; 565 } 566 567 target = alloc_tracing_buffer_memcpy(source, fSize, false); 568 } 569 570 block_cache* fCache; 571 uint32 fSize; 572 uint64 fBlockNumber; 573 const char* fMessage; 574 uint8* fCurrent; 575 uint8* fParent; 576 uint8* fOriginal; 577 #if KTRACE_PRINTF_STACK_TRACE 578 tracing_stack_trace* fStackTrace; 579 #endif 580 }; 581 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 582 583 } // namespace BlockTracing 584 585 # define TB(x) new(std::nothrow) BlockTracing::x; 586 #else 587 # define TB(x) ; 588 #endif 589 590 #if BLOCK_CACHE_BLOCK_TRACING >= 2 591 # define TB2(x) new(std::nothrow) BlockTracing::x; 592 #else 593 # define TB2(x) ; 594 #endif 595 596 597 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 598 namespace TransactionTracing { 599 600 class Action : public AbstractTraceEntry { 601 public: 602 Action(const char* label, block_cache* cache, 603 cache_transaction* transaction) 604 : 605 fCache(cache), 606 fTransaction(transaction), 607 fID(transaction->id), 608 fSub(transaction->has_sub_transaction), 609 fNumBlocks(transaction->num_blocks), 610 fSubNumBlocks(transaction->sub_num_blocks) 611 { 612 strlcpy(fLabel, label, sizeof(fLabel)); 613 Initialized(); 614 } 615 616 virtual void AddDump(TraceOutput& out) 617 { 618 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s" 619 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction, 620 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks); 621 } 622 623 private: 624 char fLabel[12]; 625 block_cache* fCache; 626 cache_transaction* fTransaction; 627 int32 fID; 628 bool fSub; 629 int32 fNumBlocks; 630 int32 fSubNumBlocks; 631 }; 632 633 class Detach : public AbstractTraceEntry { 634 public: 635 Detach(block_cache* cache, cache_transaction* transaction, 636 cache_transaction* newTransaction) 637 : 638 fCache(cache), 639 fTransaction(transaction), 640 fID(transaction->id), 641 fSub(transaction->has_sub_transaction), 642 fNewTransaction(newTransaction), 643 fNewID(newTransaction->id) 644 { 645 Initialized(); 646 } 647 648 virtual void AddDump(TraceOutput& out) 649 { 650 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")" 651 "from transaction %p (id %" B_PRId32 ")%s", 652 fCache, fNewTransaction, fNewID, fTransaction, fID, 653 fSub ? " sub" : ""); 654 } 655 656 private: 657 block_cache* fCache; 658 cache_transaction* fTransaction; 659 int32 fID; 660 bool fSub; 661 cache_transaction* fNewTransaction; 662 int32 fNewID; 663 }; 664 665 class Abort : public AbstractTraceEntry { 666 public: 667 Abort(block_cache* cache, cache_transaction* transaction) 668 : 669 fCache(cache), 670 fTransaction(transaction), 671 fID(transaction->id), 672 fNumBlocks(0) 673 { 674 bool isSub = transaction->has_sub_transaction; 675 fNumBlocks = isSub ? transaction->sub_num_blocks 676 : transaction->num_blocks; 677 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t)); 678 if (fBlocks != NULL) { 679 cached_block* block = transaction->first_block; 680 for (int32 i = 0; block != NULL && i < fNumBlocks; 681 block = block->transaction_next) { 682 fBlocks[i++] = block->block_number; 683 } 684 } else 685 fNumBlocks = 0; 686 687 #if KTRACE_PRINTF_STACK_TRACE 688 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 689 false); 690 #endif 691 692 Initialized(); 693 } 694 695 virtual void AddDump(TraceOutput& out) 696 { 697 out.Print("block cache %p, abort transaction " 698 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID); 699 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++) 700 out.Print(" %" B_PRIdOFF, fBlocks[i]); 701 } 702 703 #if KTRACE_PRINTF_STACK_TRACE 704 virtual void DumpStackTrace(TraceOutput& out) 705 { 706 out.PrintStackTrace(fStackTrace); 707 } 708 #endif 709 710 private: 711 block_cache* fCache; 712 cache_transaction* fTransaction; 713 int32 fID; 714 off_t* fBlocks; 715 int32 fNumBlocks; 716 #if KTRACE_PRINTF_STACK_TRACE 717 tracing_stack_trace* fStackTrace; 718 #endif 719 }; 720 721 } // namespace TransactionTracing 722 723 # define T(x) new(std::nothrow) TransactionTracing::x; 724 #else 725 # define T(x) ; 726 #endif 727 728 729 static DoublyLinkedList<block_cache> sCaches; 730 static mutex sCachesLock = MUTEX_INITIALIZER("block caches"); 731 static mutex sCachesMemoryUseLock 732 = MUTEX_INITIALIZER("block caches memory use"); 733 static size_t sUsedMemory; 734 static sem_id sEventSemaphore; 735 static mutex sNotificationsLock 736 = MUTEX_INITIALIZER("block cache notifications"); 737 static thread_id sNotifierWriterThread; 738 static DoublyLinkedListLink<block_cache> sMarkCache; 739 // TODO: this only works if the link is the first entry of block_cache 740 static object_cache* sBlockCache; 741 742 743 // #pragma mark - notifications/listener 744 745 746 /*! Checks whether or not this is an event that closes a transaction. */ 747 static inline bool 748 is_closing_event(int32 event) 749 { 750 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0; 751 } 752 753 754 static inline bool 755 is_written_event(int32 event) 756 { 757 return (event & TRANSACTION_WRITTEN) != 0; 758 } 759 760 761 /*! From the specified \a notification, it will remove the lowest pending 762 event, and return that one in \a _event. 763 If there is no pending event anymore, it will return \c false. 764 */ 765 static bool 766 get_next_pending_event(cache_notification* notification, int32* _event) 767 { 768 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) { 769 int32 pending = atomic_and(¬ification->events_pending, 770 ~eventMask); 771 772 bool more = (pending & ~eventMask) != 0; 773 774 if ((pending & eventMask) != 0) { 775 *_event = eventMask; 776 return more; 777 } 778 } 779 780 return false; 781 } 782 783 784 static void 785 flush_pending_notifications(block_cache* cache) 786 { 787 ASSERT_LOCKED_MUTEX(&sCachesLock); 788 789 while (true) { 790 MutexLocker locker(sNotificationsLock); 791 792 cache_notification* notification = cache->pending_notifications.Head(); 793 if (notification == NULL) 794 return; 795 796 bool deleteAfterEvent = false; 797 int32 event = -1; 798 if (!get_next_pending_event(notification, &event)) { 799 // remove the notification if this was the last pending event 800 cache->pending_notifications.Remove(notification); 801 deleteAfterEvent = notification->delete_after_event; 802 } 803 804 if (event >= 0) { 805 // Notify listener, we need to copy the notification, as it might 806 // be removed when we unlock the list. 807 cache_notification copy = *notification; 808 locker.Unlock(); 809 810 copy.hook(copy.transaction_id, event, copy.data); 811 812 locker.Lock(); 813 } 814 815 if (deleteAfterEvent) 816 delete notification; 817 } 818 } 819 820 821 /*! Flushes all pending notifications by calling the appropriate hook 822 functions. 823 Must not be called with a cache lock held. 824 */ 825 static void 826 flush_pending_notifications() 827 { 828 MutexLocker _(sCachesLock); 829 830 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 831 while (iterator.HasNext()) { 832 block_cache* cache = iterator.Next(); 833 834 flush_pending_notifications(cache); 835 } 836 } 837 838 839 /*! Initializes the \a notification as specified. */ 840 static void 841 set_notification(cache_transaction* transaction, 842 cache_notification ¬ification, int32 events, 843 transaction_notification_hook hook, void* data) 844 { 845 notification.transaction_id = transaction != NULL ? transaction->id : -1; 846 notification.events_pending = 0; 847 notification.events = events; 848 notification.hook = hook; 849 notification.data = data; 850 notification.delete_after_event = false; 851 } 852 853 854 /*! Makes sure the notification is deleted. It either deletes it directly, 855 when possible, or marks it for deletion if the notification is pending. 856 */ 857 static void 858 delete_notification(cache_notification* notification) 859 { 860 MutexLocker locker(sNotificationsLock); 861 862 if (notification->events_pending != 0) 863 notification->delete_after_event = true; 864 else 865 delete notification; 866 } 867 868 869 /*! Adds the notification to the pending notifications list, or, if it's 870 already part of it, updates its events_pending field. 871 Also marks the notification to be deleted if \a deleteNotification 872 is \c true. 873 Triggers the notifier thread to run. 874 */ 875 static void 876 add_notification(block_cache* cache, cache_notification* notification, 877 int32 event, bool deleteNotification) 878 { 879 if (notification->hook == NULL) 880 return; 881 882 int32 pending = atomic_or(¬ification->events_pending, event); 883 if (pending == 0) { 884 // not yet part of the notification list 885 MutexLocker locker(sNotificationsLock); 886 if (deleteNotification) 887 notification->delete_after_event = true; 888 cache->pending_notifications.Add(notification); 889 } else if (deleteNotification) { 890 // we might need to delete it ourselves if we're late 891 delete_notification(notification); 892 } 893 894 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE); 895 // We're probably still holding some locks that makes rescheduling 896 // not a good idea at this point. 897 } 898 899 900 /*! Notifies all interested listeners of this transaction about the \a event. 901 If \a event is a closing event (ie. TRANSACTION_ENDED, and 902 TRANSACTION_ABORTED), all listeners except those listening to 903 TRANSACTION_WRITTEN will be removed. 904 */ 905 static void 906 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction, 907 int32 event) 908 { 909 T(Action("notify", cache, transaction)); 910 911 bool isClosing = is_closing_event(event); 912 bool isWritten = is_written_event(event); 913 914 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 915 while (iterator.HasNext()) { 916 cache_listener* listener = iterator.Next(); 917 918 bool remove = (isClosing && !is_written_event(listener->events)) 919 || (isWritten && is_written_event(listener->events)); 920 if (remove) 921 iterator.Remove(); 922 923 if ((listener->events & event) != 0) 924 add_notification(cache, listener, event, remove); 925 else if (remove) 926 delete_notification(listener); 927 } 928 } 929 930 931 /*! Removes and deletes all listeners that are still monitoring this 932 transaction. 933 */ 934 static void 935 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction) 936 { 937 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 938 while (iterator.HasNext()) { 939 cache_listener* listener = iterator.Next(); 940 iterator.Remove(); 941 942 delete_notification(listener); 943 } 944 } 945 946 947 static status_t 948 add_transaction_listener(block_cache* cache, cache_transaction* transaction, 949 int32 events, transaction_notification_hook hookFunction, void* data) 950 { 951 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 952 while (iterator.HasNext()) { 953 cache_listener* listener = iterator.Next(); 954 955 if (listener->data == data && listener->hook == hookFunction) { 956 // this listener already exists, just update it 957 listener->events |= events; 958 return B_OK; 959 } 960 } 961 962 cache_listener* listener = new(std::nothrow) cache_listener; 963 if (listener == NULL) 964 return B_NO_MEMORY; 965 966 set_notification(transaction, *listener, events, hookFunction, data); 967 transaction->listeners.Add(listener); 968 return B_OK; 969 } 970 971 972 // #pragma mark - private transaction 973 974 975 cache_transaction::cache_transaction() 976 { 977 num_blocks = 0; 978 main_num_blocks = 0; 979 sub_num_blocks = 0; 980 first_block = NULL; 981 open = true; 982 has_sub_transaction = false; 983 last_used = system_time(); 984 busy_writing_count = 0; 985 } 986 987 988 static void 989 delete_transaction(block_cache* cache, cache_transaction* transaction) 990 { 991 if (cache->last_transaction == transaction) 992 cache->last_transaction = NULL; 993 994 remove_transaction_listeners(cache, transaction); 995 delete transaction; 996 } 997 998 999 static cache_transaction* 1000 lookup_transaction(block_cache* cache, int32 id) 1001 { 1002 return cache->transaction_hash->Lookup(id); 1003 } 1004 1005 1006 size_t TransactionHash::Hash(cache_transaction* transaction) const 1007 { 1008 return transaction->id; 1009 } 1010 1011 1012 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const 1013 { 1014 return transaction->id == key; 1015 } 1016 1017 1018 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const 1019 { 1020 return value->next; 1021 } 1022 1023 1024 /*! Writes back any changes made to blocks in \a transaction that are still 1025 part of a previous transacton. 1026 */ 1027 static status_t 1028 write_blocks_in_previous_transaction(block_cache* cache, 1029 cache_transaction* transaction) 1030 { 1031 BlockWriter writer(cache); 1032 1033 cached_block* block = transaction->first_block; 1034 for (; block != NULL; block = block->transaction_next) { 1035 if (block->previous_transaction != NULL) { 1036 // need to write back pending changes 1037 writer.Add(block); 1038 } 1039 } 1040 1041 return writer.Write(); 1042 } 1043 1044 1045 // #pragma mark - cached_block 1046 1047 1048 bool 1049 cached_block::CanBeWritten() const 1050 { 1051 return !busy_writing && !busy_reading 1052 && (previous_transaction != NULL 1053 || (transaction == NULL && is_dirty && !is_writing)); 1054 } 1055 1056 1057 // #pragma mark - BlockWriter 1058 1059 1060 BlockWriter::BlockWriter(block_cache* cache, size_t max) 1061 : 1062 fCache(cache), 1063 fBlocks(fBuffer), 1064 fCount(0), 1065 fTotal(0), 1066 fCapacity(kBufferSize), 1067 fMax(max), 1068 fStatus(B_OK), 1069 fDeletedTransaction(false) 1070 { 1071 } 1072 1073 1074 BlockWriter::~BlockWriter() 1075 { 1076 if (fBlocks != fBuffer) 1077 free(fBlocks); 1078 } 1079 1080 1081 /*! Adds the specified block to the to be written array. If no more blocks can 1082 be added, false is returned, otherwise true. 1083 */ 1084 bool 1085 BlockWriter::Add(cached_block* block, cache_transaction* transaction) 1086 { 1087 ASSERT(block->CanBeWritten()); 1088 1089 if (fTotal == fMax) 1090 return false; 1091 1092 if (fCount >= fCapacity) { 1093 // Enlarge array if necessary 1094 cached_block** newBlocks; 1095 size_t newCapacity = max_c(256, fCapacity * 2); 1096 if (fBlocks == fBuffer) 1097 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*)); 1098 else { 1099 newBlocks = (cached_block**)realloc(fBlocks, 1100 newCapacity * sizeof(void*)); 1101 } 1102 1103 if (newBlocks == NULL) { 1104 // Allocating a larger array failed - we need to write back what 1105 // we have synchronously now (this will also clear the array) 1106 Write(transaction, false); 1107 } else { 1108 if (fBlocks == fBuffer) 1109 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*)); 1110 1111 fBlocks = newBlocks; 1112 fCapacity = newCapacity; 1113 } 1114 } 1115 1116 fBlocks[fCount++] = block; 1117 fTotal++; 1118 block->busy_writing = true; 1119 fCache->busy_writing_count++; 1120 if (block->previous_transaction != NULL) 1121 block->previous_transaction->busy_writing_count++; 1122 1123 return true; 1124 } 1125 1126 1127 /*! Adds all blocks of the specified transaction to the to be written array. 1128 If no more blocks can be added, false is returned, otherwise true. 1129 */ 1130 bool 1131 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers) 1132 { 1133 ASSERT(!transaction->open); 1134 1135 if (transaction->busy_writing_count != 0) { 1136 hasLeftOvers = true; 1137 return true; 1138 } 1139 1140 hasLeftOvers = false; 1141 1142 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 1143 while (cached_block* block = blockIterator.Next()) { 1144 if (!block->CanBeWritten()) { 1145 // This block was already part of a previous transaction within this 1146 // writer 1147 hasLeftOvers = true; 1148 continue; 1149 } 1150 if (!Add(block, transaction)) 1151 return false; 1152 1153 if (DeletedTransaction()) 1154 break; 1155 } 1156 1157 return true; 1158 } 1159 1160 1161 /*! Cache must be locked when calling this method, but it will be unlocked 1162 while the blocks are written back. 1163 */ 1164 status_t 1165 BlockWriter::Write(cache_transaction* transaction, bool canUnlock) 1166 { 1167 if (fCount == 0) 1168 return B_OK; 1169 1170 if (canUnlock) 1171 mutex_unlock(&fCache->lock); 1172 1173 // Sort blocks in their on-disk order 1174 // TODO: ideally, this should be handled by the I/O scheduler 1175 1176 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks); 1177 fDeletedTransaction = false; 1178 1179 for (uint32 i = 0; i < fCount; i++) { 1180 status_t status = _WriteBlock(fBlocks[i]); 1181 if (status != B_OK) { 1182 // propagate to global error handling 1183 if (fStatus == B_OK) 1184 fStatus = status; 1185 1186 _UnmarkWriting(fBlocks[i]); 1187 fBlocks[i] = NULL; 1188 // This block will not be marked clean 1189 } 1190 } 1191 1192 if (canUnlock) 1193 mutex_lock(&fCache->lock); 1194 1195 for (uint32 i = 0; i < fCount; i++) 1196 _BlockDone(fBlocks[i], transaction); 1197 1198 fCount = 0; 1199 return fStatus; 1200 } 1201 1202 1203 /*! Writes the specified \a block back to disk. It will always only write back 1204 the oldest change of the block if it is part of more than one transaction. 1205 It will automatically send out TRANSACTION_WRITTEN notices, as well as 1206 delete transactions when they are no longer used, and \a deleteTransaction 1207 is \c true. 1208 */ 1209 /*static*/ status_t 1210 BlockWriter::WriteBlock(block_cache* cache, cached_block* block) 1211 { 1212 BlockWriter writer(cache); 1213 1214 writer.Add(block); 1215 return writer.Write(); 1216 } 1217 1218 1219 void* 1220 BlockWriter::_Data(cached_block* block) const 1221 { 1222 return block->previous_transaction != NULL && block->original_data != NULL 1223 ? block->original_data : block->current_data; 1224 // We first need to write back changes from previous transactions 1225 } 1226 1227 1228 status_t 1229 BlockWriter::_WriteBlock(cached_block* block) 1230 { 1231 ASSERT(block->busy_writing); 1232 1233 TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number)); 1234 TB(Write(fCache, block)); 1235 TB2(BlockData(fCache, block, "before write")); 1236 1237 size_t blockSize = fCache->block_size; 1238 1239 ssize_t written = write_pos(fCache->fd, 1240 block->block_number * blockSize, _Data(block), blockSize); 1241 1242 if (written != (ssize_t)blockSize) { 1243 TB(Error(fCache, block->block_number, "write failed", written)); 1244 TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number, 1245 strerror(errno))); 1246 if (written < 0) 1247 return errno; 1248 1249 return B_IO_ERROR; 1250 } 1251 1252 return B_OK; 1253 } 1254 1255 1256 void 1257 BlockWriter::_BlockDone(cached_block* block, 1258 cache_transaction* transaction) 1259 { 1260 if (block == NULL) { 1261 // An error occured when trying to write this block 1262 return; 1263 } 1264 1265 if (fCache->num_dirty_blocks > 0) 1266 fCache->num_dirty_blocks--; 1267 1268 if (_Data(block) == block->current_data) 1269 block->is_dirty = false; 1270 1271 _UnmarkWriting(block); 1272 1273 cache_transaction* previous = block->previous_transaction; 1274 if (previous != NULL) { 1275 previous->blocks.Remove(block); 1276 block->previous_transaction = NULL; 1277 1278 if (block->original_data != NULL && block->transaction == NULL) { 1279 // This block is not part of a transaction, so it does not need 1280 // its original pointer anymore. 1281 fCache->Free(block->original_data); 1282 block->original_data = NULL; 1283 } 1284 1285 // Has the previous transaction been finished with that write? 1286 if (--previous->num_blocks == 0) { 1287 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id)); 1288 T(Action("written", fCache, previous)); 1289 1290 notify_transaction_listeners(fCache, previous, 1291 TRANSACTION_WRITTEN); 1292 1293 if (transaction != NULL) { 1294 // This function is called while iterating transaction_hash. We 1295 // use RemoveUnchecked so the iterator is still valid. A regular 1296 // Remove can trigger a resize of the hash table which would 1297 // result in the linked items in the table changing order. 1298 fCache->transaction_hash->RemoveUnchecked(transaction); 1299 } else 1300 fCache->transaction_hash->Remove(previous); 1301 1302 delete_transaction(fCache, previous); 1303 fDeletedTransaction = true; 1304 } 1305 } 1306 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) { 1307 // the block is no longer used 1308 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1309 block->unused = true; 1310 fCache->unused_blocks.Add(block); 1311 fCache->unused_block_count++; 1312 } 1313 1314 TB2(BlockData(fCache, block, "after write")); 1315 } 1316 1317 1318 void 1319 BlockWriter::_UnmarkWriting(cached_block* block) 1320 { 1321 block->busy_writing = false; 1322 if (block->previous_transaction != NULL) 1323 block->previous_transaction->busy_writing_count--; 1324 fCache->busy_writing_count--; 1325 1326 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0) 1327 || block->busy_writing_waiters) { 1328 fCache->busy_writing_waiters = false; 1329 block->busy_writing_waiters = false; 1330 fCache->busy_writing_condition.NotifyAll(); 1331 } 1332 } 1333 1334 1335 /*static*/ int 1336 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB) 1337 { 1338 cached_block* blockA = *(cached_block**)_blockA; 1339 cached_block* blockB = *(cached_block**)_blockB; 1340 1341 off_t diff = blockA->block_number - blockB->block_number; 1342 if (diff > 0) 1343 return 1; 1344 1345 return diff < 0 ? -1 : 0; 1346 } 1347 1348 1349 // #pragma mark - block_cache 1350 1351 1352 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize, 1353 bool readOnly) 1354 : 1355 hash(NULL), 1356 fd(_fd), 1357 max_blocks(numBlocks), 1358 block_size(blockSize), 1359 next_transaction_id(1), 1360 last_transaction(NULL), 1361 transaction_hash(NULL), 1362 buffer_cache(NULL), 1363 unused_block_count(0), 1364 busy_reading_count(0), 1365 busy_reading_waiters(false), 1366 busy_writing_count(0), 1367 busy_writing_waiters(0), 1368 num_dirty_blocks(0), 1369 read_only(readOnly) 1370 { 1371 } 1372 1373 1374 /*! Should be called with the cache's lock held. */ 1375 block_cache::~block_cache() 1376 { 1377 unregister_low_resource_handler(&_LowMemoryHandler, this); 1378 1379 delete transaction_hash; 1380 delete hash; 1381 1382 delete_object_cache(buffer_cache); 1383 1384 mutex_destroy(&lock); 1385 } 1386 1387 1388 status_t 1389 block_cache::Init() 1390 { 1391 busy_reading_condition.Init(this, "cache block busy_reading"); 1392 busy_writing_condition.Init(this, "cache block busy writing"); 1393 condition_variable.Init(this, "cache transaction sync"); 1394 mutex_init(&lock, "block cache"); 1395 1396 buffer_cache = create_object_cache_etc("block cache buffers", block_size, 1397 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 1398 if (buffer_cache == NULL) 1399 return B_NO_MEMORY; 1400 1401 hash = new BlockTable(); 1402 if (hash == NULL || hash->Init(1024) != B_OK) 1403 return B_NO_MEMORY; 1404 1405 transaction_hash = new(std::nothrow) TransactionTable(); 1406 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK) 1407 return B_NO_MEMORY; 1408 1409 return register_low_resource_handler(&_LowMemoryHandler, this, 1410 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1411 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0); 1412 } 1413 1414 1415 void 1416 block_cache::Free(void* buffer) 1417 { 1418 if (buffer != NULL) 1419 object_cache_free(buffer_cache, buffer, 0); 1420 } 1421 1422 1423 void* 1424 block_cache::Allocate() 1425 { 1426 void* block = object_cache_alloc(buffer_cache, 0); 1427 if (block != NULL) 1428 return block; 1429 1430 // recycle existing before allocating a new one 1431 RemoveUnusedBlocks(100); 1432 1433 return object_cache_alloc(buffer_cache, 0); 1434 } 1435 1436 1437 void 1438 block_cache::FreeBlock(cached_block* block) 1439 { 1440 Free(block->current_data); 1441 1442 if (block->original_data != NULL || block->parent_data != NULL) { 1443 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n", 1444 block->block_number, block->original_data, block->parent_data); 1445 } 1446 1447 #if BLOCK_CACHE_DEBUG_CHANGED 1448 Free(block->compare); 1449 #endif 1450 1451 object_cache_free(sBlockCache, block, 0); 1452 } 1453 1454 1455 /*! Allocates a new block for \a blockNumber, ready for use */ 1456 cached_block* 1457 block_cache::NewBlock(off_t blockNumber) 1458 { 1459 cached_block* block = NULL; 1460 1461 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1462 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) { 1463 // recycle existing instead of allocating a new one 1464 block = _GetUnusedBlock(); 1465 } 1466 if (block == NULL) { 1467 block = (cached_block*)object_cache_alloc(sBlockCache, 0); 1468 if (block != NULL) { 1469 block->current_data = Allocate(); 1470 if (block->current_data == NULL) { 1471 object_cache_free(sBlockCache, block, 0); 1472 return NULL; 1473 } 1474 } else { 1475 TB(Error(this, blockNumber, "allocation failed")); 1476 dprintf("block allocation failed, unused list is %sempty.\n", 1477 unused_blocks.IsEmpty() ? "" : "not "); 1478 1479 // allocation failed, try to reuse an unused block 1480 block = _GetUnusedBlock(); 1481 if (block == NULL) { 1482 TB(Error(this, blockNumber, "get unused failed")); 1483 FATAL(("could not allocate block!\n")); 1484 return NULL; 1485 } 1486 } 1487 } 1488 1489 block->block_number = blockNumber; 1490 block->ref_count = 0; 1491 block->last_accessed = 0; 1492 block->transaction_next = NULL; 1493 block->transaction = block->previous_transaction = NULL; 1494 block->original_data = NULL; 1495 block->parent_data = NULL; 1496 block->busy_reading = false; 1497 block->busy_writing = false; 1498 block->is_writing = false; 1499 block->is_dirty = false; 1500 block->unused = false; 1501 block->discard = false; 1502 block->busy_reading_waiters = false; 1503 block->busy_writing_waiters = false; 1504 #if BLOCK_CACHE_DEBUG_CHANGED 1505 block->compare = NULL; 1506 #endif 1507 1508 return block; 1509 } 1510 1511 1512 void 1513 block_cache::FreeBlockParentData(cached_block* block) 1514 { 1515 ASSERT(block->parent_data != NULL); 1516 if (block->parent_data != block->current_data) 1517 Free(block->parent_data); 1518 block->parent_data = NULL; 1519 } 1520 1521 1522 void 1523 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld) 1524 { 1525 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count)); 1526 1527 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1528 cached_block* block = iterator.Next();) { 1529 if (minSecondsOld >= block->LastAccess()) { 1530 // The list is sorted by last access 1531 break; 1532 } 1533 if (block->busy_reading || block->busy_writing) 1534 continue; 1535 1536 TB(Flush(this, block)); 1537 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n", 1538 block->block_number, block->last_accessed)); 1539 1540 // this can only happen if no transactions are used 1541 if (block->is_dirty && !block->discard) { 1542 if (block->busy_writing) 1543 continue; 1544 1545 BlockWriter::WriteBlock(this, block); 1546 } 1547 1548 // remove block from lists 1549 iterator.Remove(); 1550 unused_block_count--; 1551 RemoveBlock(block); 1552 1553 if (--count <= 0) 1554 break; 1555 } 1556 } 1557 1558 1559 void 1560 block_cache::RemoveBlock(cached_block* block) 1561 { 1562 hash->Remove(block); 1563 FreeBlock(block); 1564 } 1565 1566 1567 /*! Discards the block from a transaction (this method must not be called 1568 for blocks not part of a transaction). 1569 */ 1570 void 1571 block_cache::DiscardBlock(cached_block* block) 1572 { 1573 ASSERT(block->discard); 1574 ASSERT(block->previous_transaction == NULL); 1575 1576 if (block->parent_data != NULL) 1577 FreeBlockParentData(block); 1578 1579 if (block->original_data != NULL) { 1580 Free(block->original_data); 1581 block->original_data = NULL; 1582 } 1583 1584 RemoveBlock(block); 1585 } 1586 1587 1588 void 1589 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level) 1590 { 1591 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level)); 1592 1593 // free some blocks according to the low memory state 1594 // (if there is enough memory left, we don't free any) 1595 1596 block_cache* cache = (block_cache*)data; 1597 int32 free = 0; 1598 int32 secondsOld = 0; 1599 switch (level) { 1600 case B_NO_LOW_RESOURCE: 1601 return; 1602 case B_LOW_RESOURCE_NOTE: 1603 free = cache->unused_block_count / 8; 1604 secondsOld = 120; 1605 break; 1606 case B_LOW_RESOURCE_WARNING: 1607 free = cache->unused_block_count / 4; 1608 secondsOld = 10; 1609 break; 1610 case B_LOW_RESOURCE_CRITICAL: 1611 free = cache->unused_block_count / 2; 1612 secondsOld = 0; 1613 break; 1614 } 1615 1616 MutexLocker locker(&cache->lock); 1617 1618 if (!locker.IsLocked()) { 1619 // If our block_cache were deleted, it could be that we had 1620 // been called before that deletion went through, therefore, 1621 // acquiring its lock might fail. 1622 return; 1623 } 1624 1625 #ifdef TRACE_BLOCK_CACHE 1626 uint32 oldUnused = cache->unused_block_count; 1627 #endif 1628 1629 cache->RemoveUnusedBlocks(free, secondsOld); 1630 1631 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n", 1632 cache, oldUnused, cache->unused_block_count)); 1633 } 1634 1635 1636 cached_block* 1637 block_cache::_GetUnusedBlock() 1638 { 1639 TRACE(("block_cache: get unused block\n")); 1640 1641 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1642 cached_block* block = iterator.Next();) { 1643 TB(Flush(this, block, true)); 1644 // this can only happen if no transactions are used 1645 if (block->is_dirty && !block->busy_writing && !block->discard) 1646 BlockWriter::WriteBlock(this, block); 1647 1648 // remove block from lists 1649 iterator.Remove(); 1650 unused_block_count--; 1651 hash->Remove(block); 1652 1653 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1654 block->unused = false; 1655 1656 // TODO: see if compare data is handled correctly here! 1657 #if BLOCK_CACHE_DEBUG_CHANGED 1658 if (block->compare != NULL) 1659 Free(block->compare); 1660 #endif 1661 return block; 1662 } 1663 1664 return NULL; 1665 } 1666 1667 1668 // #pragma mark - private block functions 1669 1670 1671 /*! Cache must be locked. 1672 */ 1673 static void 1674 mark_block_busy_reading(block_cache* cache, cached_block* block) 1675 { 1676 block->busy_reading = true; 1677 cache->busy_reading_count++; 1678 } 1679 1680 1681 /*! Cache must be locked. 1682 */ 1683 static void 1684 mark_block_unbusy_reading(block_cache* cache, cached_block* block) 1685 { 1686 block->busy_reading = false; 1687 cache->busy_reading_count--; 1688 1689 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0) 1690 || block->busy_reading_waiters) { 1691 cache->busy_reading_waiters = false; 1692 block->busy_reading_waiters = false; 1693 cache->busy_reading_condition.NotifyAll(); 1694 } 1695 } 1696 1697 1698 /*! Cache must be locked. 1699 */ 1700 static void 1701 wait_for_busy_reading_block(block_cache* cache, cached_block* block) 1702 { 1703 while (block->busy_reading) { 1704 // wait for at least the specified block to be read in 1705 ConditionVariableEntry entry; 1706 cache->busy_reading_condition.Add(&entry); 1707 block->busy_reading_waiters = true; 1708 1709 mutex_unlock(&cache->lock); 1710 1711 entry.Wait(); 1712 1713 mutex_lock(&cache->lock); 1714 } 1715 } 1716 1717 1718 /*! Cache must be locked. 1719 */ 1720 static void 1721 wait_for_busy_reading_blocks(block_cache* cache) 1722 { 1723 while (cache->busy_reading_count != 0) { 1724 // wait for all blocks to be read in 1725 ConditionVariableEntry entry; 1726 cache->busy_reading_condition.Add(&entry); 1727 cache->busy_reading_waiters = true; 1728 1729 mutex_unlock(&cache->lock); 1730 1731 entry.Wait(); 1732 1733 mutex_lock(&cache->lock); 1734 } 1735 } 1736 1737 1738 /*! Cache must be locked. 1739 */ 1740 static void 1741 wait_for_busy_writing_block(block_cache* cache, cached_block* block) 1742 { 1743 while (block->busy_writing) { 1744 // wait for all blocks to be written back 1745 ConditionVariableEntry entry; 1746 cache->busy_writing_condition.Add(&entry); 1747 block->busy_writing_waiters = true; 1748 1749 mutex_unlock(&cache->lock); 1750 1751 entry.Wait(); 1752 1753 mutex_lock(&cache->lock); 1754 } 1755 } 1756 1757 1758 /*! Cache must be locked. 1759 */ 1760 static void 1761 wait_for_busy_writing_blocks(block_cache* cache) 1762 { 1763 while (cache->busy_writing_count != 0) { 1764 // wait for all blocks to be written back 1765 ConditionVariableEntry entry; 1766 cache->busy_writing_condition.Add(&entry); 1767 cache->busy_writing_waiters = true; 1768 1769 mutex_unlock(&cache->lock); 1770 1771 entry.Wait(); 1772 1773 mutex_lock(&cache->lock); 1774 } 1775 } 1776 1777 1778 /*! Removes a reference from the specified \a block. If this was the last 1779 reference, the block is moved into the unused list. 1780 In low memory situations, it will also free some blocks from that list, 1781 but not necessarily the \a block it just released. 1782 */ 1783 static void 1784 put_cached_block(block_cache* cache, cached_block* block) 1785 { 1786 #if BLOCK_CACHE_DEBUG_CHANGED 1787 if (!block->is_dirty && block->compare != NULL 1788 && memcmp(block->current_data, block->compare, cache->block_size)) { 1789 dprintf("new block:\n"); 1790 dump_block((const char*)block->current_data, 256, " "); 1791 dprintf("unchanged block:\n"); 1792 dump_block((const char*)block->compare, 256, " "); 1793 BlockWriter::WriteBlock(cache, block); 1794 panic("block_cache: supposed to be clean block was changed!\n"); 1795 1796 cache->Free(block->compare); 1797 block->compare = NULL; 1798 } 1799 #endif 1800 TB(Put(cache, block)); 1801 1802 if (block->ref_count < 1) { 1803 panic("Invalid ref_count for block %p, cache %p\n", block, cache); 1804 return; 1805 } 1806 1807 if (--block->ref_count == 0 1808 && block->transaction == NULL && block->previous_transaction == NULL) { 1809 // This block is not used anymore, and not part of any transaction 1810 block->is_writing = false; 1811 1812 if (block->discard) { 1813 cache->RemoveBlock(block); 1814 } else { 1815 // put this block in the list of unused blocks 1816 ASSERT(!block->unused); 1817 block->unused = true; 1818 1819 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1820 cache->unused_blocks.Add(block); 1821 cache->unused_block_count++; 1822 } 1823 } 1824 } 1825 1826 1827 static void 1828 put_cached_block(block_cache* cache, off_t blockNumber) 1829 { 1830 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1831 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1832 blockNumber, cache->max_blocks - 1); 1833 } 1834 1835 cached_block* block = cache->hash->Lookup(blockNumber); 1836 if (block != NULL) 1837 put_cached_block(cache, block); 1838 else { 1839 TB(Error(cache, blockNumber, "put unknown")); 1840 } 1841 } 1842 1843 1844 /*! Retrieves the block \a blockNumber from the hash table, if it's already 1845 there, or reads it from the disk. 1846 You need to have the cache locked when calling this function. 1847 1848 \param _allocated tells you whether or not a new block has been allocated 1849 to satisfy your request. 1850 \param readBlock if \c false, the block will not be read in case it was 1851 not already in the cache. The block you retrieve may contain random 1852 data. If \c true, the cache will be temporarily unlocked while the 1853 block is read in. 1854 */ 1855 static cached_block* 1856 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated, 1857 bool readBlock = true) 1858 { 1859 ASSERT_LOCKED_MUTEX(&cache->lock); 1860 1861 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1862 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1863 blockNumber, cache->max_blocks - 1); 1864 return NULL; 1865 } 1866 1867 retry: 1868 cached_block* block = cache->hash->Lookup(blockNumber); 1869 *_allocated = false; 1870 1871 if (block == NULL) { 1872 // put block into cache 1873 block = cache->NewBlock(blockNumber); 1874 if (block == NULL) 1875 return NULL; 1876 1877 cache->hash->Insert(block); 1878 *_allocated = true; 1879 } else if (block->busy_reading) { 1880 // The block is currently busy_reading - wait and try again later 1881 wait_for_busy_reading_block(cache, block); 1882 goto retry; 1883 } 1884 1885 if (block->unused) { 1886 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber)); 1887 block->unused = false; 1888 cache->unused_blocks.Remove(block); 1889 cache->unused_block_count--; 1890 } 1891 1892 if (*_allocated && readBlock) { 1893 // read block into cache 1894 int32 blockSize = cache->block_size; 1895 1896 mark_block_busy_reading(cache, block); 1897 mutex_unlock(&cache->lock); 1898 1899 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize, 1900 block->current_data, blockSize); 1901 1902 mutex_lock(&cache->lock); 1903 if (bytesRead < blockSize) { 1904 cache->RemoveBlock(block); 1905 TB(Error(cache, blockNumber, "read failed", bytesRead)); 1906 1907 TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n", 1908 blockNumber, bytesRead, strerror(errno))); 1909 return NULL; 1910 } 1911 TB(Read(cache, block)); 1912 1913 mark_block_unbusy_reading(cache, block); 1914 } 1915 1916 block->ref_count++; 1917 block->last_accessed = system_time() / 1000000L; 1918 1919 return block; 1920 } 1921 1922 1923 /*! Returns the writable block data for the requested blockNumber. 1924 If \a cleared is true, the block is not read from disk; an empty block 1925 is returned. 1926 1927 This is the only method to insert a block into a transaction. It makes 1928 sure that the previous block contents are preserved in that case. 1929 */ 1930 static void* 1931 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base, 1932 off_t length, int32 transactionID, bool cleared) 1933 { 1934 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 1935 blockNumber, transactionID)); 1936 1937 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1938 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1939 blockNumber, cache->max_blocks - 1); 1940 } 1941 1942 bool allocated; 1943 cached_block* block = get_cached_block(cache, blockNumber, &allocated, 1944 !cleared); 1945 if (block == NULL) 1946 return NULL; 1947 1948 if (block->busy_writing) 1949 wait_for_busy_writing_block(cache, block); 1950 1951 block->discard = false; 1952 1953 // if there is no transaction support, we just return the current block 1954 if (transactionID == -1) { 1955 if (cleared) { 1956 mark_block_busy_reading(cache, block); 1957 mutex_unlock(&cache->lock); 1958 1959 memset(block->current_data, 0, cache->block_size); 1960 1961 mutex_lock(&cache->lock); 1962 mark_block_unbusy_reading(cache, block); 1963 } 1964 1965 block->is_writing = true; 1966 1967 if (!block->is_dirty) { 1968 cache->num_dirty_blocks++; 1969 block->is_dirty = true; 1970 // mark the block as dirty 1971 } 1972 1973 TB(Get(cache, block)); 1974 return block->current_data; 1975 } 1976 1977 cache_transaction* transaction = block->transaction; 1978 1979 if (transaction != NULL && transaction->id != transactionID) { 1980 // TODO: we have to wait here until the other transaction is done. 1981 // Maybe we should even panic, since we can't prevent any deadlocks. 1982 panic("get_writable_cached_block(): asked to get busy writable block " 1983 "(transaction %" B_PRId32 ")\n", block->transaction->id); 1984 put_cached_block(cache, block); 1985 return NULL; 1986 } 1987 if (transaction == NULL && transactionID != -1) { 1988 // get new transaction 1989 transaction = lookup_transaction(cache, transactionID); 1990 if (transaction == NULL) { 1991 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n", 1992 transactionID); 1993 put_cached_block(cache, block); 1994 return NULL; 1995 } 1996 if (!transaction->open) { 1997 panic("get_writable_cached_block(): transaction already done!\n"); 1998 put_cached_block(cache, block); 1999 return NULL; 2000 } 2001 2002 block->transaction = transaction; 2003 2004 // attach the block to the transaction block list 2005 block->transaction_next = transaction->first_block; 2006 transaction->first_block = block; 2007 transaction->num_blocks++; 2008 } 2009 if (transaction != NULL) 2010 transaction->last_used = system_time(); 2011 2012 bool wasUnchanged = block->original_data == NULL 2013 || block->previous_transaction != NULL; 2014 2015 if (!(allocated && cleared) && block->original_data == NULL) { 2016 // we already have data, so we need to preserve it 2017 block->original_data = cache->Allocate(); 2018 if (block->original_data == NULL) { 2019 TB(Error(cache, blockNumber, "allocate original failed")); 2020 FATAL(("could not allocate original_data\n")); 2021 put_cached_block(cache, block); 2022 return NULL; 2023 } 2024 2025 mark_block_busy_reading(cache, block); 2026 mutex_unlock(&cache->lock); 2027 2028 memcpy(block->original_data, block->current_data, cache->block_size); 2029 2030 mutex_lock(&cache->lock); 2031 mark_block_unbusy_reading(cache, block); 2032 } 2033 if (block->parent_data == block->current_data) { 2034 // remember any previous contents for the parent transaction 2035 block->parent_data = cache->Allocate(); 2036 if (block->parent_data == NULL) { 2037 // TODO: maybe we should just continue the current transaction in 2038 // this case... 2039 TB(Error(cache, blockNumber, "allocate parent failed")); 2040 FATAL(("could not allocate parent\n")); 2041 put_cached_block(cache, block); 2042 return NULL; 2043 } 2044 2045 mark_block_busy_reading(cache, block); 2046 mutex_unlock(&cache->lock); 2047 2048 memcpy(block->parent_data, block->current_data, cache->block_size); 2049 2050 mutex_lock(&cache->lock); 2051 mark_block_unbusy_reading(cache, block); 2052 2053 transaction->sub_num_blocks++; 2054 } else if (transaction != NULL && transaction->has_sub_transaction 2055 && block->parent_data == NULL && wasUnchanged) 2056 transaction->sub_num_blocks++; 2057 2058 if (cleared) { 2059 mark_block_busy_reading(cache, block); 2060 mutex_unlock(&cache->lock); 2061 2062 memset(block->current_data, 0, cache->block_size); 2063 2064 mutex_lock(&cache->lock); 2065 mark_block_unbusy_reading(cache, block); 2066 } 2067 2068 block->is_dirty = true; 2069 TB(Get(cache, block)); 2070 TB2(BlockData(cache, block, "get writable")); 2071 2072 return block->current_data; 2073 } 2074 2075 2076 #if DEBUG_BLOCK_CACHE 2077 2078 2079 static void 2080 dump_block(cached_block* block) 2081 { 2082 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32 2083 " %c%c%c%c%c%c %08lx %08lx\n", 2084 (addr_t)block, block->block_number, 2085 (addr_t)block->current_data, (addr_t)block->original_data, 2086 (addr_t)block->parent_data, block->ref_count, block->LastAccess(), 2087 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-', 2088 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-', 2089 block->unused ? 'U' : '-', block->discard ? 'D' : '-', 2090 (addr_t)block->transaction, 2091 (addr_t)block->previous_transaction); 2092 } 2093 2094 2095 static void 2096 dump_block_long(cached_block* block) 2097 { 2098 kprintf("BLOCK %p\n", block); 2099 kprintf(" current data: %p\n", block->current_data); 2100 kprintf(" original data: %p\n", block->original_data); 2101 kprintf(" parent data: %p\n", block->parent_data); 2102 #if BLOCK_CACHE_DEBUG_CHANGED 2103 kprintf(" compare data: %p\n", block->compare); 2104 #endif 2105 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count); 2106 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess()); 2107 kprintf(" flags: "); 2108 if (block->busy_reading) 2109 kprintf(" busy_reading"); 2110 if (block->busy_writing) 2111 kprintf(" busy_writing"); 2112 if (block->is_writing) 2113 kprintf(" is-writing"); 2114 if (block->is_dirty) 2115 kprintf(" is-dirty"); 2116 if (block->unused) 2117 kprintf(" unused"); 2118 if (block->discard) 2119 kprintf(" discard"); 2120 kprintf("\n"); 2121 if (block->transaction != NULL) { 2122 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction, 2123 block->transaction->id); 2124 if (block->transaction_next != NULL) { 2125 kprintf(" next in transaction: %" B_PRIdOFF "\n", 2126 block->transaction_next->block_number); 2127 } 2128 } 2129 if (block->previous_transaction != NULL) { 2130 kprintf(" previous transaction: %p (%" B_PRId32 ")\n", 2131 block->previous_transaction, 2132 block->previous_transaction->id); 2133 } 2134 2135 set_debug_variable("_current", (addr_t)block->current_data); 2136 set_debug_variable("_original", (addr_t)block->original_data); 2137 set_debug_variable("_parent", (addr_t)block->parent_data); 2138 } 2139 2140 2141 static int 2142 dump_cached_block(int argc, char** argv) 2143 { 2144 if (argc != 2) { 2145 kprintf("usage: %s <block-address>\n", argv[0]); 2146 return 0; 2147 } 2148 2149 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1])); 2150 return 0; 2151 } 2152 2153 2154 static int 2155 dump_cache(int argc, char** argv) 2156 { 2157 bool showTransactions = false; 2158 bool showBlocks = false; 2159 int32 i = 1; 2160 while (argv[i] != NULL && argv[i][0] == '-') { 2161 for (char* arg = &argv[i][1]; arg[0]; arg++) { 2162 switch (arg[0]) { 2163 case 'b': 2164 showBlocks = true; 2165 break; 2166 case 't': 2167 showTransactions = true; 2168 break; 2169 default: 2170 print_debugger_command_usage(argv[0]); 2171 return 0; 2172 } 2173 } 2174 i++; 2175 } 2176 2177 if (i >= argc) { 2178 print_debugger_command_usage(argv[0]); 2179 return 0; 2180 } 2181 2182 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]); 2183 if (cache == NULL) { 2184 kprintf("invalid cache address\n"); 2185 return 0; 2186 } 2187 2188 off_t blockNumber = -1; 2189 if (i + 1 < argc) { 2190 blockNumber = parse_expression(argv[i + 1]); 2191 cached_block* block = cache->hash->Lookup(blockNumber); 2192 if (block != NULL) 2193 dump_block_long(block); 2194 else 2195 kprintf("block %" B_PRIdOFF " not found\n", blockNumber); 2196 return 0; 2197 } 2198 2199 kprintf("BLOCK CACHE: %p\n", cache); 2200 2201 kprintf(" fd: %d\n", cache->fd); 2202 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks); 2203 kprintf(" block_size: %zu\n", cache->block_size); 2204 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id); 2205 kprintf(" buffer_cache: %p\n", cache->buffer_cache); 2206 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count, 2207 cache->busy_reading_waiters ? "has" : "no"); 2208 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count, 2209 cache->busy_writing_waiters ? "has" : "no"); 2210 2211 if (!cache->pending_notifications.IsEmpty()) { 2212 kprintf(" pending notifications:\n"); 2213 2214 NotificationList::Iterator iterator 2215 = cache->pending_notifications.GetIterator(); 2216 while (iterator.HasNext()) { 2217 cache_notification* notification = iterator.Next(); 2218 2219 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification, 2220 notification->events_pending, notification->hook, 2221 notification->data); 2222 } 2223 } 2224 2225 if (showTransactions) { 2226 kprintf(" transactions:\n"); 2227 kprintf("address id state blocks main sub\n"); 2228 2229 TransactionTable::Iterator iterator(cache->transaction_hash); 2230 2231 while (iterator.HasNext()) { 2232 cache_transaction* transaction = iterator.Next(); 2233 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" 2234 B_PRId32 "\n", transaction, transaction->id, 2235 transaction->open ? "open" : "closed", 2236 transaction->num_blocks, transaction->main_num_blocks, 2237 transaction->sub_num_blocks); 2238 } 2239 } 2240 2241 if (showBlocks) { 2242 kprintf(" blocks:\n"); 2243 kprintf("address block no. current original parent refs access " 2244 "flags transact prev. trans\n"); 2245 } 2246 2247 uint32 referenced = 0; 2248 uint32 count = 0; 2249 uint32 dirty = 0; 2250 uint32 discarded = 0; 2251 BlockTable::Iterator iterator(cache->hash); 2252 while (iterator.HasNext()) { 2253 cached_block* block = iterator.Next(); 2254 if (showBlocks) 2255 dump_block(block); 2256 2257 if (block->is_dirty) 2258 dirty++; 2259 if (block->discard) 2260 discarded++; 2261 if (block->ref_count) 2262 referenced++; 2263 count++; 2264 } 2265 2266 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 2267 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 2268 " in unused.\n", 2269 count, dirty, discarded, referenced, cache->busy_reading_count, 2270 cache->unused_block_count); 2271 return 0; 2272 } 2273 2274 2275 static int 2276 dump_transaction(int argc, char** argv) 2277 { 2278 bool showBlocks = false; 2279 int i = 1; 2280 if (argc > 1 && !strcmp(argv[1], "-b")) { 2281 showBlocks = true; 2282 i++; 2283 } 2284 2285 if (argc - i < 1 || argc - i > 2) { 2286 print_debugger_command_usage(argv[0]); 2287 return 0; 2288 } 2289 2290 cache_transaction* transaction = NULL; 2291 2292 if (argc - i == 1) { 2293 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]); 2294 } else { 2295 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]); 2296 int32 id = parse_expression(argv[i + 1]); 2297 transaction = lookup_transaction(cache, id); 2298 if (transaction == NULL) { 2299 kprintf("No transaction with ID %" B_PRId32 " found.\n", id); 2300 return 0; 2301 } 2302 } 2303 2304 kprintf("TRANSACTION %p\n", transaction); 2305 2306 kprintf(" id: %" B_PRId32 "\n", transaction->id); 2307 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks); 2308 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks); 2309 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks); 2310 kprintf(" has sub: %d\n", transaction->has_sub_transaction); 2311 kprintf(" state: %s\n", transaction->open ? "open" : "closed"); 2312 kprintf(" idle: %" B_PRId64 " secs\n", 2313 (system_time() - transaction->last_used) / 1000000); 2314 2315 kprintf(" listeners:\n"); 2316 2317 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 2318 while (iterator.HasNext()) { 2319 cache_listener* listener = iterator.Next(); 2320 2321 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending, 2322 listener->hook, listener->data); 2323 } 2324 2325 if (!showBlocks) 2326 return 0; 2327 2328 kprintf(" blocks:\n"); 2329 kprintf("address block no. current original parent refs access " 2330 "flags transact prev. trans\n"); 2331 2332 cached_block* block = transaction->first_block; 2333 while (block != NULL) { 2334 dump_block(block); 2335 block = block->transaction_next; 2336 } 2337 2338 kprintf("--\n"); 2339 2340 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 2341 while (blockIterator.HasNext()) { 2342 block = blockIterator.Next(); 2343 dump_block(block); 2344 } 2345 2346 return 0; 2347 } 2348 2349 2350 static int 2351 dump_caches(int argc, char** argv) 2352 { 2353 kprintf("Block caches:\n"); 2354 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator(); 2355 while (i.HasNext()) { 2356 block_cache* cache = i.Next(); 2357 if (cache == (block_cache*)&sMarkCache) 2358 continue; 2359 2360 kprintf(" %p\n", cache); 2361 } 2362 2363 return 0; 2364 } 2365 2366 2367 #if BLOCK_CACHE_BLOCK_TRACING >= 2 2368 static int 2369 dump_block_data(int argc, char** argv) 2370 { 2371 using namespace BlockTracing; 2372 2373 // Determine which blocks to show 2374 2375 bool printStackTrace = true; 2376 uint32 which = 0; 2377 int32 i = 1; 2378 while (i < argc && argv[i][0] == '-') { 2379 char* arg = &argv[i][1]; 2380 while (arg[0]) { 2381 switch (arg[0]) { 2382 case 'c': 2383 which |= BlockData::kCurrent; 2384 break; 2385 case 'p': 2386 which |= BlockData::kParent; 2387 break; 2388 case 'o': 2389 which |= BlockData::kOriginal; 2390 break; 2391 2392 default: 2393 kprintf("invalid block specifier (only o/c/p are " 2394 "allowed).\n"); 2395 return 0; 2396 } 2397 arg++; 2398 } 2399 2400 i++; 2401 } 2402 if (which == 0) 2403 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal; 2404 2405 if (i == argc) { 2406 print_debugger_command_usage(argv[0]); 2407 return 0; 2408 } 2409 2410 // Get the range of blocks to print 2411 2412 int64 from = parse_expression(argv[i]); 2413 int64 to = from; 2414 if (argc > i + 1) 2415 to = parse_expression(argv[i + 1]); 2416 if (to < from) 2417 to = from; 2418 2419 uint32 offset = 0; 2420 uint32 size = LONG_MAX; 2421 if (argc > i + 2) 2422 offset = parse_expression(argv[i + 2]); 2423 if (argc > i + 3) 2424 size = parse_expression(argv[i + 3]); 2425 2426 TraceEntryIterator iterator; 2427 iterator.MoveTo(from - 1); 2428 2429 static char sBuffer[1024]; 2430 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID); 2431 2432 while (TraceEntry* entry = iterator.Next()) { 2433 int32 index = iterator.Index(); 2434 if (index > to) 2435 break; 2436 2437 Action* action = dynamic_cast<Action*>(entry); 2438 if (action != NULL) { 2439 out.Clear(); 2440 out.DumpEntry(action); 2441 continue; 2442 } 2443 2444 BlockData* blockData = dynamic_cast<BlockData*>(entry); 2445 if (blockData == NULL) 2446 continue; 2447 2448 out.Clear(); 2449 2450 const char* dump = out.DumpEntry(entry); 2451 int length = strlen(dump); 2452 if (length > 0 && dump[length - 1] == '\n') 2453 length--; 2454 2455 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump); 2456 2457 if (printStackTrace) { 2458 out.Clear(); 2459 entry->DumpStackTrace(out); 2460 if (out.Size() > 0) 2461 kputs(out.Buffer()); 2462 } 2463 2464 blockData->DumpBlocks(which, offset, size); 2465 } 2466 2467 return 0; 2468 } 2469 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 2470 2471 2472 #endif // DEBUG_BLOCK_CACHE 2473 2474 2475 /*! Traverses through the block_cache list, and returns one cache after the 2476 other. The cache returned is automatically locked when you get it, and 2477 unlocked with the next call to this function. Ignores caches that are in 2478 deletion state. 2479 Returns \c NULL when the end of the list is reached. 2480 */ 2481 static block_cache* 2482 get_next_locked_block_cache(block_cache* last) 2483 { 2484 MutexLocker _(sCachesLock); 2485 2486 block_cache* cache; 2487 if (last != NULL) { 2488 mutex_unlock(&last->lock); 2489 2490 cache = sCaches.GetNext((block_cache*)&sMarkCache); 2491 sCaches.Remove((block_cache*)&sMarkCache); 2492 } else 2493 cache = sCaches.Head(); 2494 2495 if (cache != NULL) { 2496 mutex_lock(&cache->lock); 2497 sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache); 2498 } 2499 2500 return cache; 2501 } 2502 2503 2504 /*! Background thread that continuously checks for pending notifications of 2505 all caches. 2506 Every two seconds, it will also write back up to 64 blocks per cache. 2507 */ 2508 static status_t 2509 block_notifier_and_writer(void* /*data*/) 2510 { 2511 const bigtime_t kTimeout = 2000000LL; 2512 bigtime_t timeout = kTimeout; 2513 2514 while (true) { 2515 bigtime_t start = system_time(); 2516 2517 status_t status = acquire_sem_etc(sEventSemaphore, 1, 2518 B_RELATIVE_TIMEOUT, timeout); 2519 if (status == B_OK) { 2520 flush_pending_notifications(); 2521 timeout -= system_time() - start; 2522 continue; 2523 } 2524 2525 // write 64 blocks of each block_cache every two seconds 2526 // TODO: change this once we have an I/O scheduler 2527 timeout = kTimeout; 2528 size_t usedMemory; 2529 object_cache_get_usage(sBlockCache, &usedMemory); 2530 2531 block_cache* cache = NULL; 2532 while ((cache = get_next_locked_block_cache(cache)) != NULL) { 2533 BlockWriter writer(cache, 64); 2534 2535 size_t cacheUsedMemory; 2536 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory); 2537 usedMemory += cacheUsedMemory; 2538 2539 if (cache->num_dirty_blocks) { 2540 // This cache is not using transactions, we'll scan the blocks 2541 // directly 2542 BlockTable::Iterator iterator(cache->hash); 2543 2544 while (iterator.HasNext()) { 2545 cached_block* block = iterator.Next(); 2546 if (block->CanBeWritten() && !writer.Add(block)) 2547 break; 2548 } 2549 2550 } else { 2551 TransactionTable::Iterator iterator(cache->transaction_hash); 2552 2553 while (iterator.HasNext()) { 2554 cache_transaction* transaction = iterator.Next(); 2555 if (transaction->open) { 2556 if (system_time() > transaction->last_used 2557 + kTransactionIdleTime) { 2558 // Transaction is open but idle 2559 notify_transaction_listeners(cache, transaction, 2560 TRANSACTION_IDLE); 2561 } 2562 continue; 2563 } 2564 2565 bool hasLeftOvers; 2566 // we ignore this one 2567 if (!writer.Add(transaction, hasLeftOvers)) 2568 break; 2569 } 2570 } 2571 2572 writer.Write(); 2573 2574 if ((block_cache_used_memory() / B_PAGE_SIZE) 2575 > vm_page_num_pages() / 2) { 2576 // Try to reduce memory usage to half of the available 2577 // RAM at maximum 2578 cache->RemoveUnusedBlocks(1000, 10); 2579 } 2580 } 2581 2582 MutexLocker _(sCachesMemoryUseLock); 2583 sUsedMemory = usedMemory; 2584 } 2585 2586 // never can get here 2587 return B_OK; 2588 } 2589 2590 2591 /*! Notify function for wait_for_notifications(). */ 2592 static void 2593 notify_sync(int32 transactionID, int32 event, void* _cache) 2594 { 2595 block_cache* cache = (block_cache*)_cache; 2596 2597 cache->condition_variable.NotifyOne(); 2598 } 2599 2600 2601 /*! Must be called with the sCachesLock held. */ 2602 static bool 2603 is_valid_cache(block_cache* cache) 2604 { 2605 ASSERT_LOCKED_MUTEX(&sCachesLock); 2606 2607 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 2608 while (iterator.HasNext()) { 2609 if (cache == iterator.Next()) 2610 return true; 2611 } 2612 2613 return false; 2614 } 2615 2616 2617 /*! Waits until all pending notifications are carried out. 2618 Safe to be called from the block writer/notifier thread. 2619 You must not hold the \a cache lock when calling this function. 2620 */ 2621 static void 2622 wait_for_notifications(block_cache* cache) 2623 { 2624 MutexLocker locker(sCachesLock); 2625 2626 if (find_thread(NULL) == sNotifierWriterThread) { 2627 // We're the notifier thread, don't wait, but flush all pending 2628 // notifications directly. 2629 if (is_valid_cache(cache)) 2630 flush_pending_notifications(cache); 2631 return; 2632 } 2633 2634 // add sync notification 2635 cache_notification notification; 2636 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync, 2637 cache); 2638 2639 ConditionVariableEntry entry; 2640 cache->condition_variable.Add(&entry); 2641 2642 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false); 2643 locker.Unlock(); 2644 2645 // wait for notification hook to be called 2646 entry.Wait(); 2647 } 2648 2649 2650 status_t 2651 block_cache_init(void) 2652 { 2653 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block), 2654 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 2655 if (sBlockCache == NULL) 2656 return B_NO_MEMORY; 2657 2658 new (&sCaches) DoublyLinkedList<block_cache>; 2659 // manually call constructor 2660 2661 sEventSemaphore = create_sem(0, "block cache event"); 2662 if (sEventSemaphore < B_OK) 2663 return sEventSemaphore; 2664 2665 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer, 2666 "block notifier/writer", B_LOW_PRIORITY, NULL); 2667 if (sNotifierWriterThread >= B_OK) 2668 resume_thread(sNotifierWriterThread); 2669 2670 #if DEBUG_BLOCK_CACHE 2671 add_debugger_command_etc("block_caches", &dump_caches, 2672 "dumps all block caches", "\n", 0); 2673 add_debugger_command_etc("block_cache", &dump_cache, 2674 "dumps a specific block cache", 2675 "[-bt] <cache-address> [block-number]\n" 2676 " -t lists the transactions\n" 2677 " -b lists all blocks\n", 0); 2678 add_debugger_command("cached_block", &dump_cached_block, 2679 "dumps the specified cached block"); 2680 add_debugger_command_etc("transaction", &dump_transaction, 2681 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n" 2682 "Either use a block cache pointer and an ID or a pointer to the transaction.\n" 2683 " -b lists all blocks that are part of this transaction\n", 0); 2684 # if BLOCK_CACHE_BLOCK_TRACING >= 2 2685 add_debugger_command_etc("block_cache_data", &dump_block_data, 2686 "dumps the data blocks logged for the actions", 2687 "[-cpo] <from> [<to> [<offset> [<size>]]]\n" 2688 "If no data specifier is used, all blocks are shown by default.\n" 2689 " -c the current data is shown, if available.\n" 2690 " -p the parent data is shown, if available.\n" 2691 " -o the original data is shown, if available.\n" 2692 " <from> first index of tracing entries to show.\n" 2693 " <to> if given, the last entry. If not, only <from> is shown.\n" 2694 " <offset> the offset of the block data.\n" 2695 " <from> the size of the block data that is dumped\n", 0); 2696 # endif 2697 #endif // DEBUG_BLOCK_CACHE 2698 2699 return B_OK; 2700 } 2701 2702 2703 size_t 2704 block_cache_used_memory(void) 2705 { 2706 MutexLocker _(sCachesMemoryUseLock); 2707 return sUsedMemory; 2708 } 2709 2710 2711 // #pragma mark - public transaction API 2712 2713 2714 int32 2715 cache_start_transaction(void* _cache) 2716 { 2717 block_cache* cache = (block_cache*)_cache; 2718 TransactionLocker locker(cache); 2719 2720 if (cache->last_transaction && cache->last_transaction->open) { 2721 panic("last transaction (%" B_PRId32 ") still open!\n", 2722 cache->last_transaction->id); 2723 } 2724 2725 cache_transaction* transaction = new(std::nothrow) cache_transaction; 2726 if (transaction == NULL) 2727 return B_NO_MEMORY; 2728 2729 transaction->id = atomic_add(&cache->next_transaction_id, 1); 2730 cache->last_transaction = transaction; 2731 2732 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id)); 2733 T(Action("start", cache, transaction)); 2734 2735 cache->transaction_hash->Insert(transaction); 2736 2737 return transaction->id; 2738 } 2739 2740 2741 status_t 2742 cache_sync_transaction(void* _cache, int32 id) 2743 { 2744 block_cache* cache = (block_cache*)_cache; 2745 bool hadBusy; 2746 2747 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id)); 2748 2749 do { 2750 TransactionLocker locker(cache); 2751 hadBusy = false; 2752 2753 BlockWriter writer(cache); 2754 TransactionTable::Iterator iterator(cache->transaction_hash); 2755 2756 while (iterator.HasNext()) { 2757 // close all earlier transactions which haven't been closed yet 2758 cache_transaction* transaction = iterator.Next(); 2759 2760 if (transaction->busy_writing_count != 0) { 2761 hadBusy = true; 2762 continue; 2763 } 2764 if (transaction->id <= id && !transaction->open) { 2765 // write back all of their remaining dirty blocks 2766 T(Action("sync", cache, transaction)); 2767 2768 bool hasLeftOvers; 2769 writer.Add(transaction, hasLeftOvers); 2770 2771 if (hasLeftOvers) { 2772 // This transaction contains blocks that a previous 2773 // transaction is trying to write back in this write run 2774 hadBusy = true; 2775 } 2776 } 2777 } 2778 2779 status_t status = writer.Write(); 2780 if (status != B_OK) 2781 return status; 2782 } while (hadBusy); 2783 2784 wait_for_notifications(cache); 2785 // make sure that all pending TRANSACTION_WRITTEN notifications 2786 // are handled after we return 2787 return B_OK; 2788 } 2789 2790 2791 status_t 2792 cache_end_transaction(void* _cache, int32 id, 2793 transaction_notification_hook hook, void* data) 2794 { 2795 block_cache* cache = (block_cache*)_cache; 2796 TransactionLocker locker(cache); 2797 2798 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id)); 2799 2800 cache_transaction* transaction = lookup_transaction(cache, id); 2801 if (transaction == NULL) { 2802 panic("cache_end_transaction(): invalid transaction ID\n"); 2803 return B_BAD_VALUE; 2804 } 2805 2806 // Write back all pending transaction blocks 2807 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2808 if (status != B_OK) 2809 return status; 2810 2811 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2812 2813 if (hook != NULL 2814 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, 2815 hook, data) != B_OK) { 2816 return B_NO_MEMORY; 2817 } 2818 2819 T(Action("end", cache, transaction)); 2820 2821 // iterate through all blocks and free the unchanged original contents 2822 2823 cached_block* next; 2824 for (cached_block* block = transaction->first_block; block != NULL; 2825 block = next) { 2826 next = block->transaction_next; 2827 ASSERT(block->previous_transaction == NULL); 2828 2829 if (block->discard) { 2830 // This block has been discarded in the transaction 2831 cache->DiscardBlock(block); 2832 transaction->num_blocks--; 2833 continue; 2834 } 2835 2836 if (block->original_data != NULL) { 2837 cache->Free(block->original_data); 2838 block->original_data = NULL; 2839 } 2840 if (block->parent_data != NULL) { 2841 ASSERT(transaction->has_sub_transaction); 2842 cache->FreeBlockParentData(block); 2843 } 2844 2845 // move the block to the previous transaction list 2846 transaction->blocks.Add(block); 2847 2848 block->previous_transaction = transaction; 2849 block->transaction_next = NULL; 2850 block->transaction = NULL; 2851 } 2852 2853 transaction->open = false; 2854 return B_OK; 2855 } 2856 2857 2858 status_t 2859 cache_abort_transaction(void* _cache, int32 id) 2860 { 2861 block_cache* cache = (block_cache*)_cache; 2862 TransactionLocker locker(cache); 2863 2864 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id)); 2865 2866 cache_transaction* transaction = lookup_transaction(cache, id); 2867 if (transaction == NULL) { 2868 panic("cache_abort_transaction(): invalid transaction ID\n"); 2869 return B_BAD_VALUE; 2870 } 2871 2872 T(Abort(cache, transaction)); 2873 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 2874 2875 // iterate through all blocks and restore their original contents 2876 2877 cached_block* block = transaction->first_block; 2878 cached_block* next; 2879 for (; block != NULL; block = next) { 2880 next = block->transaction_next; 2881 2882 if (block->original_data != NULL) { 2883 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of " 2884 "block %" B_PRIdOFF "\n", transaction->id, block->block_number)); 2885 memcpy(block->current_data, block->original_data, 2886 cache->block_size); 2887 cache->Free(block->original_data); 2888 block->original_data = NULL; 2889 } 2890 if (transaction->has_sub_transaction && block->parent_data != NULL) 2891 cache->FreeBlockParentData(block); 2892 2893 block->transaction_next = NULL; 2894 block->transaction = NULL; 2895 block->discard = false; 2896 if (block->previous_transaction == NULL) 2897 block->is_dirty = false; 2898 } 2899 2900 cache->transaction_hash->Remove(transaction); 2901 delete_transaction(cache, transaction); 2902 return B_OK; 2903 } 2904 2905 2906 /*! Acknowledges the current parent transaction, and starts a new transaction 2907 from its sub transaction. 2908 The new transaction also gets a new transaction ID. 2909 */ 2910 int32 2911 cache_detach_sub_transaction(void* _cache, int32 id, 2912 transaction_notification_hook hook, void* data) 2913 { 2914 block_cache* cache = (block_cache*)_cache; 2915 TransactionLocker locker(cache); 2916 2917 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id)); 2918 2919 cache_transaction* transaction = lookup_transaction(cache, id); 2920 if (transaction == NULL) { 2921 panic("cache_detach_sub_transaction(): invalid transaction ID\n"); 2922 return B_BAD_VALUE; 2923 } 2924 if (!transaction->has_sub_transaction) 2925 return B_BAD_VALUE; 2926 2927 // iterate through all blocks and free the unchanged original contents 2928 2929 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2930 if (status != B_OK) 2931 return status; 2932 2933 // create a new transaction for the sub transaction 2934 cache_transaction* newTransaction = new(std::nothrow) cache_transaction; 2935 if (newTransaction == NULL) 2936 return B_NO_MEMORY; 2937 2938 newTransaction->id = atomic_add(&cache->next_transaction_id, 1); 2939 T(Detach(cache, transaction, newTransaction)); 2940 2941 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2942 2943 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook, 2944 data) != B_OK) { 2945 delete newTransaction; 2946 return B_NO_MEMORY; 2947 } 2948 2949 cached_block* last = NULL; 2950 cached_block* next; 2951 for (cached_block* block = transaction->first_block; block != NULL; 2952 block = next) { 2953 next = block->transaction_next; 2954 ASSERT(block->previous_transaction == NULL); 2955 2956 if (block->discard) { 2957 cache->DiscardBlock(block); 2958 transaction->main_num_blocks--; 2959 continue; 2960 } 2961 2962 if (block->parent_data != NULL) { 2963 // The block changed in the parent - free the original data, since 2964 // they will be replaced by what is in current. 2965 ASSERT(block->original_data != NULL); 2966 cache->Free(block->original_data); 2967 2968 if (block->parent_data != block->current_data) { 2969 // The block had been changed in both transactions 2970 block->original_data = block->parent_data; 2971 } else { 2972 // The block has only been changed in the parent 2973 block->original_data = NULL; 2974 } 2975 2976 // move the block to the previous transaction list 2977 transaction->blocks.Add(block); 2978 block->previous_transaction = transaction; 2979 block->parent_data = NULL; 2980 } 2981 2982 if (block->original_data != NULL) { 2983 // This block had been changed in the current sub transaction, 2984 // we need to move this block over to the new transaction. 2985 ASSERT(block->parent_data == NULL); 2986 2987 if (last == NULL) 2988 newTransaction->first_block = block; 2989 else 2990 last->transaction_next = block; 2991 2992 block->transaction = newTransaction; 2993 last = block; 2994 } else 2995 block->transaction = NULL; 2996 2997 block->transaction_next = NULL; 2998 } 2999 3000 newTransaction->num_blocks = transaction->sub_num_blocks; 3001 3002 transaction->open = false; 3003 transaction->has_sub_transaction = false; 3004 transaction->num_blocks = transaction->main_num_blocks; 3005 transaction->sub_num_blocks = 0; 3006 3007 cache->transaction_hash->Insert(newTransaction); 3008 cache->last_transaction = newTransaction; 3009 3010 return newTransaction->id; 3011 } 3012 3013 3014 status_t 3015 cache_abort_sub_transaction(void* _cache, int32 id) 3016 { 3017 block_cache* cache = (block_cache*)_cache; 3018 TransactionLocker locker(cache); 3019 3020 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id)); 3021 3022 cache_transaction* transaction = lookup_transaction(cache, id); 3023 if (transaction == NULL) { 3024 panic("cache_abort_sub_transaction(): invalid transaction ID\n"); 3025 return B_BAD_VALUE; 3026 } 3027 if (!transaction->has_sub_transaction) 3028 return B_BAD_VALUE; 3029 3030 T(Abort(cache, transaction)); 3031 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3032 3033 // revert all changes back to the version of the parent 3034 3035 cached_block* block = transaction->first_block; 3036 cached_block* last = NULL; 3037 cached_block* next; 3038 for (; block != NULL; block = next) { 3039 next = block->transaction_next; 3040 3041 if (block->parent_data == NULL) { 3042 // The parent transaction didn't change the block, but the sub 3043 // transaction did - we need to revert to the original data. 3044 // The block is no longer part of the transaction 3045 if (block->original_data != NULL) { 3046 // The block might not have original data if was empty 3047 memcpy(block->current_data, block->original_data, 3048 cache->block_size); 3049 } 3050 3051 if (last != NULL) 3052 last->transaction_next = next; 3053 else 3054 transaction->first_block = next; 3055 3056 block->transaction_next = NULL; 3057 block->transaction = NULL; 3058 transaction->num_blocks--; 3059 3060 if (block->previous_transaction == NULL) { 3061 cache->Free(block->original_data); 3062 block->original_data = NULL; 3063 block->is_dirty = false; 3064 3065 if (block->ref_count == 0) { 3066 // Move the block into the unused list if possible 3067 block->unused = true; 3068 cache->unused_blocks.Add(block); 3069 cache->unused_block_count++; 3070 } 3071 } 3072 } else { 3073 if (block->parent_data != block->current_data) { 3074 // The block has been changed and must be restored - the block 3075 // is still dirty and part of the transaction 3076 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): " 3077 "restored contents of block %" B_PRIdOFF "\n", 3078 transaction->id, block->block_number)); 3079 memcpy(block->current_data, block->parent_data, 3080 cache->block_size); 3081 cache->Free(block->parent_data); 3082 // The block stays dirty 3083 } 3084 block->parent_data = NULL; 3085 last = block; 3086 } 3087 3088 block->discard = false; 3089 } 3090 3091 // all subsequent changes will go into the main transaction 3092 transaction->has_sub_transaction = false; 3093 transaction->sub_num_blocks = 0; 3094 3095 return B_OK; 3096 } 3097 3098 3099 status_t 3100 cache_start_sub_transaction(void* _cache, int32 id) 3101 { 3102 block_cache* cache = (block_cache*)_cache; 3103 TransactionLocker locker(cache); 3104 3105 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id)); 3106 3107 cache_transaction* transaction = lookup_transaction(cache, id); 3108 if (transaction == NULL) { 3109 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n", 3110 id); 3111 return B_BAD_VALUE; 3112 } 3113 3114 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3115 3116 // move all changed blocks up to the parent 3117 3118 cached_block* block = transaction->first_block; 3119 cached_block* next; 3120 for (; block != NULL; block = next) { 3121 next = block->transaction_next; 3122 3123 if (block->parent_data != NULL) { 3124 // There already is an older sub transaction - we acknowledge 3125 // its changes and move its blocks up to the parent 3126 ASSERT(transaction->has_sub_transaction); 3127 cache->FreeBlockParentData(block); 3128 } 3129 if (block->discard) { 3130 // This block has been discarded in the parent transaction. 3131 // Just throw away any changes made in this transaction, so that 3132 // it can still be reverted to its original contents if needed 3133 ASSERT(block->previous_transaction == NULL); 3134 if (block->original_data != NULL) { 3135 memcpy(block->current_data, block->original_data, 3136 cache->block_size); 3137 block->original_data = NULL; 3138 } 3139 continue; 3140 } 3141 3142 // we "allocate" the parent data lazily, that means, we don't copy 3143 // the data (and allocate memory for it) until we need to 3144 block->parent_data = block->current_data; 3145 } 3146 3147 // all subsequent changes will go into the sub transaction 3148 transaction->has_sub_transaction = true; 3149 transaction->main_num_blocks = transaction->num_blocks; 3150 transaction->sub_num_blocks = 0; 3151 T(Action("start-sub", cache, transaction)); 3152 3153 return B_OK; 3154 } 3155 3156 3157 /*! Adds a transaction listener that gets notified when the transaction 3158 is ended, aborted, written, or idle as specified by \a events. 3159 The listener gets automatically removed when the transaction ends. 3160 */ 3161 status_t 3162 cache_add_transaction_listener(void* _cache, int32 id, int32 events, 3163 transaction_notification_hook hook, void* data) 3164 { 3165 block_cache* cache = (block_cache*)_cache; 3166 TransactionLocker locker(cache); 3167 3168 cache_transaction* transaction = lookup_transaction(cache, id); 3169 if (transaction == NULL) 3170 return B_BAD_VALUE; 3171 3172 return add_transaction_listener(cache, transaction, events, hook, data); 3173 } 3174 3175 3176 status_t 3177 cache_remove_transaction_listener(void* _cache, int32 id, 3178 transaction_notification_hook hookFunction, void* data) 3179 { 3180 block_cache* cache = (block_cache*)_cache; 3181 TransactionLocker locker(cache); 3182 3183 cache_transaction* transaction = lookup_transaction(cache, id); 3184 if (transaction == NULL) 3185 return B_BAD_VALUE; 3186 3187 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 3188 while (iterator.HasNext()) { 3189 cache_listener* listener = iterator.Next(); 3190 if (listener->data == data && listener->hook == hookFunction) { 3191 iterator.Remove(); 3192 3193 if (listener->events_pending != 0) { 3194 MutexLocker _(sNotificationsLock); 3195 if (listener->events_pending != 0) 3196 cache->pending_notifications.Remove(listener); 3197 } 3198 delete listener; 3199 return B_OK; 3200 } 3201 } 3202 3203 return B_ENTRY_NOT_FOUND; 3204 } 3205 3206 3207 status_t 3208 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly, 3209 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData) 3210 { 3211 cached_block* block = (cached_block*)*_cookie; 3212 block_cache* cache = (block_cache*)_cache; 3213 TransactionLocker locker(cache); 3214 3215 cache_transaction* transaction = lookup_transaction(cache, id); 3216 if (transaction == NULL || !transaction->open) 3217 return B_BAD_VALUE; 3218 3219 if (block == NULL) 3220 block = transaction->first_block; 3221 else 3222 block = block->transaction_next; 3223 3224 if (transaction->has_sub_transaction) { 3225 if (mainOnly) { 3226 // find next block that the parent changed 3227 while (block != NULL && block->parent_data == NULL) 3228 block = block->transaction_next; 3229 } else { 3230 // find next non-discarded block 3231 while (block != NULL && block->discard) 3232 block = block->transaction_next; 3233 } 3234 } 3235 3236 if (block == NULL) 3237 return B_ENTRY_NOT_FOUND; 3238 3239 if (_blockNumber) 3240 *_blockNumber = block->block_number; 3241 if (_data) 3242 *_data = mainOnly ? block->parent_data : block->current_data; 3243 if (_unchangedData) 3244 *_unchangedData = block->original_data; 3245 3246 *_cookie = (addr_t)block; 3247 return B_OK; 3248 } 3249 3250 3251 int32 3252 cache_blocks_in_transaction(void* _cache, int32 id) 3253 { 3254 block_cache* cache = (block_cache*)_cache; 3255 TransactionLocker locker(cache); 3256 3257 cache_transaction* transaction = lookup_transaction(cache, id); 3258 if (transaction == NULL) 3259 return B_BAD_VALUE; 3260 3261 return transaction->num_blocks; 3262 } 3263 3264 3265 /*! Returns the number of blocks that are part of the main transaction. If this 3266 transaction does not have a sub transaction yet, this is the same value as 3267 cache_blocks_in_transaction() would return. 3268 */ 3269 int32 3270 cache_blocks_in_main_transaction(void* _cache, int32 id) 3271 { 3272 block_cache* cache = (block_cache*)_cache; 3273 TransactionLocker locker(cache); 3274 3275 cache_transaction* transaction = lookup_transaction(cache, id); 3276 if (transaction == NULL) 3277 return B_BAD_VALUE; 3278 3279 if (transaction->has_sub_transaction) 3280 return transaction->main_num_blocks; 3281 3282 return transaction->num_blocks; 3283 } 3284 3285 3286 int32 3287 cache_blocks_in_sub_transaction(void* _cache, int32 id) 3288 { 3289 block_cache* cache = (block_cache*)_cache; 3290 TransactionLocker locker(cache); 3291 3292 cache_transaction* transaction = lookup_transaction(cache, id); 3293 if (transaction == NULL) 3294 return B_BAD_VALUE; 3295 3296 return transaction->sub_num_blocks; 3297 } 3298 3299 3300 // #pragma mark - public block cache API 3301 3302 3303 void 3304 block_cache_delete(void* _cache, bool allowWrites) 3305 { 3306 block_cache* cache = (block_cache*)_cache; 3307 3308 if (allowWrites) 3309 block_cache_sync(cache); 3310 3311 mutex_lock(&sCachesLock); 3312 sCaches.Remove(cache); 3313 mutex_unlock(&sCachesLock); 3314 3315 mutex_lock(&cache->lock); 3316 3317 // wait for all blocks to become unbusy 3318 wait_for_busy_reading_blocks(cache); 3319 wait_for_busy_writing_blocks(cache); 3320 3321 // free all blocks 3322 3323 cached_block* block = cache->hash->Clear(true); 3324 while (block != NULL) { 3325 cached_block* next = block->next; 3326 cache->FreeBlock(block); 3327 block = next; 3328 } 3329 3330 // free all transactions (they will all be aborted) 3331 3332 cache_transaction* transaction = cache->transaction_hash->Clear(true); 3333 while (transaction != NULL) { 3334 cache_transaction* next = transaction->next; 3335 delete transaction; 3336 transaction = next; 3337 } 3338 3339 delete cache; 3340 } 3341 3342 3343 void* 3344 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly) 3345 { 3346 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize, 3347 readOnly); 3348 if (cache == NULL) 3349 return NULL; 3350 3351 if (cache->Init() != B_OK) { 3352 delete cache; 3353 return NULL; 3354 } 3355 3356 MutexLocker _(sCachesLock); 3357 sCaches.Add(cache); 3358 3359 return cache; 3360 } 3361 3362 3363 status_t 3364 block_cache_sync(void* _cache) 3365 { 3366 block_cache* cache = (block_cache*)_cache; 3367 3368 // We will sync all dirty blocks to disk that have a completed 3369 // transaction or no transaction only 3370 3371 MutexLocker locker(&cache->lock); 3372 3373 BlockWriter writer(cache); 3374 BlockTable::Iterator iterator(cache->hash); 3375 3376 while (iterator.HasNext()) { 3377 cached_block* block = iterator.Next(); 3378 if (block->CanBeWritten()) 3379 writer.Add(block); 3380 } 3381 3382 status_t status = writer.Write(); 3383 3384 locker.Unlock(); 3385 3386 wait_for_notifications(cache); 3387 // make sure that all pending TRANSACTION_WRITTEN notifications 3388 // are handled after we return 3389 return status; 3390 } 3391 3392 3393 status_t 3394 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks) 3395 { 3396 block_cache* cache = (block_cache*)_cache; 3397 3398 // We will sync all dirty blocks to disk that have a completed 3399 // transaction or no transaction only 3400 3401 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 3402 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF 3403 " (max %" B_PRIdOFF ")", 3404 blockNumber, cache->max_blocks - 1); 3405 return B_BAD_VALUE; 3406 } 3407 3408 MutexLocker locker(&cache->lock); 3409 BlockWriter writer(cache); 3410 3411 for (; numBlocks > 0; numBlocks--, blockNumber++) { 3412 cached_block* block = cache->hash->Lookup(blockNumber); 3413 if (block == NULL) 3414 continue; 3415 3416 if (block->CanBeWritten()) 3417 writer.Add(block); 3418 } 3419 3420 status_t status = writer.Write(); 3421 3422 locker.Unlock(); 3423 3424 wait_for_notifications(cache); 3425 // make sure that all pending TRANSACTION_WRITTEN notifications 3426 // are handled after we return 3427 return status; 3428 } 3429 3430 3431 /*! Discards a block from the current transaction or from the cache. 3432 You have to call this function when you no longer use a block, ie. when it 3433 might be reclaimed by the file cache in order to make sure they won't 3434 interfere. 3435 */ 3436 void 3437 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks) 3438 { 3439 // TODO: this could be a nice place to issue the ATA trim command 3440 block_cache* cache = (block_cache*)_cache; 3441 TransactionLocker locker(cache); 3442 3443 BlockWriter writer(cache); 3444 3445 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3446 cached_block* block = cache->hash->Lookup(blockNumber); 3447 if (block != NULL && block->previous_transaction != NULL) 3448 writer.Add(block); 3449 } 3450 3451 writer.Write(); 3452 // TODO: this can fail, too! 3453 3454 blockNumber -= numBlocks; 3455 // reset blockNumber to its original value 3456 3457 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3458 cached_block* block = cache->hash->Lookup(blockNumber); 3459 if (block == NULL) 3460 continue; 3461 3462 ASSERT(block->previous_transaction == NULL); 3463 3464 if (block->unused) { 3465 cache->unused_blocks.Remove(block); 3466 cache->unused_block_count--; 3467 cache->RemoveBlock(block); 3468 } else { 3469 if (block->transaction != NULL && block->parent_data != NULL 3470 && block->parent_data != block->current_data) { 3471 panic("Discarded block %" B_PRIdOFF " has already been changed in this " 3472 "transaction!", blockNumber); 3473 } 3474 3475 // mark it as discarded (in the current transaction only, if any) 3476 block->discard = true; 3477 } 3478 } 3479 } 3480 3481 3482 status_t 3483 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction) 3484 { 3485 block_cache* cache = (block_cache*)_cache; 3486 MutexLocker locker(&cache->lock); 3487 3488 if (cache->read_only) { 3489 panic("tried to make block writable on a read-only cache!"); 3490 return B_ERROR; 3491 } 3492 3493 // TODO: this can be done better! 3494 void* block = get_writable_cached_block(cache, blockNumber, 3495 blockNumber, 1, transaction, false); 3496 if (block != NULL) { 3497 put_cached_block((block_cache*)_cache, blockNumber); 3498 return B_OK; 3499 } 3500 3501 return B_ERROR; 3502 } 3503 3504 3505 void* 3506 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base, 3507 off_t length, int32 transaction) 3508 { 3509 block_cache* cache = (block_cache*)_cache; 3510 MutexLocker locker(&cache->lock); 3511 3512 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3513 blockNumber, transaction)); 3514 if (cache->read_only) 3515 panic("tried to get writable block on a read-only cache!"); 3516 3517 return get_writable_cached_block(cache, blockNumber, base, length, 3518 transaction, false); 3519 } 3520 3521 3522 void* 3523 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction) 3524 { 3525 return block_cache_get_writable_etc(_cache, blockNumber, 3526 blockNumber, 1, transaction); 3527 } 3528 3529 3530 void* 3531 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction) 3532 { 3533 block_cache* cache = (block_cache*)_cache; 3534 MutexLocker locker(&cache->lock); 3535 3536 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3537 blockNumber, transaction)); 3538 if (cache->read_only) 3539 panic("tried to get empty writable block on a read-only cache!"); 3540 3541 return get_writable_cached_block((block_cache*)_cache, blockNumber, 3542 blockNumber, 1, transaction, true); 3543 } 3544 3545 3546 const void* 3547 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length) 3548 { 3549 block_cache* cache = (block_cache*)_cache; 3550 MutexLocker locker(&cache->lock); 3551 bool allocated; 3552 3553 cached_block* block = get_cached_block(cache, blockNumber, &allocated); 3554 if (block == NULL) 3555 return NULL; 3556 3557 #if BLOCK_CACHE_DEBUG_CHANGED 3558 if (block->compare == NULL) 3559 block->compare = cache->Allocate(); 3560 if (block->compare != NULL) 3561 memcpy(block->compare, block->current_data, cache->block_size); 3562 #endif 3563 TB(Get(cache, block)); 3564 3565 return block->current_data; 3566 } 3567 3568 3569 const void* 3570 block_cache_get(void* _cache, off_t blockNumber) 3571 { 3572 return block_cache_get_etc(_cache, blockNumber, blockNumber, 1); 3573 } 3574 3575 3576 /*! Changes the internal status of a writable block to \a dirty. This can be 3577 helpful in case you realize you don't need to change that block anymore 3578 for whatever reason. 3579 3580 Note, you must only use this function on blocks that were acquired 3581 writable! 3582 */ 3583 status_t 3584 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty, 3585 int32 transaction) 3586 { 3587 block_cache* cache = (block_cache*)_cache; 3588 MutexLocker locker(&cache->lock); 3589 3590 cached_block* block = cache->hash->Lookup(blockNumber); 3591 if (block == NULL) 3592 return B_BAD_VALUE; 3593 if (block->is_dirty == dirty) { 3594 // there is nothing to do for us 3595 return B_OK; 3596 } 3597 3598 // TODO: not yet implemented 3599 if (dirty) 3600 panic("block_cache_set_dirty(): not yet implemented that way!\n"); 3601 3602 return B_OK; 3603 } 3604 3605 3606 void 3607 block_cache_put(void* _cache, off_t blockNumber) 3608 { 3609 block_cache* cache = (block_cache*)_cache; 3610 MutexLocker locker(&cache->lock); 3611 3612 put_cached_block(cache, blockNumber); 3613 } 3614 3615