1 /* 2 * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include <block_cache.h> 8 9 #include <unistd.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <KernelExport.h> 15 #include <fs_cache.h> 16 17 #include <condition_variable.h> 18 #include <lock.h> 19 #include <low_resource_manager.h> 20 #include <slab/Slab.h> 21 #include <tracing.h> 22 #include <util/kernel_cpp.h> 23 #include <util/DoublyLinkedList.h> 24 #include <util/AutoLock.h> 25 #include <vm/vm_page.h> 26 27 #include "kernel_debug_config.h" 28 29 30 // TODO: this is a naive but growing implementation to test the API: 31 // block reading/writing is not at all optimized for speed, it will 32 // just read and write single blocks. 33 // TODO: the retrieval/copy of the original data could be delayed until the 34 // new data must be written, ie. in low memory situations. 35 36 //#define TRACE_BLOCK_CACHE 37 #ifdef TRACE_BLOCK_CACHE 38 # define TRACE(x) dprintf x 39 #else 40 # define TRACE(x) ; 41 #endif 42 43 #define TRACE_ALWAYS(x) dprintf x 44 45 // This macro is used for fatal situations that are acceptable in a running 46 // system, like out of memory situations - should only panic for debugging. 47 #define FATAL(x) panic x 48 49 static const bigtime_t kTransactionIdleTime = 2000000LL; 50 // a transaction is considered idle after 2 seconds of inactivity 51 52 53 namespace { 54 55 struct cache_transaction; 56 struct cached_block; 57 struct block_cache; 58 typedef DoublyLinkedListLink<cached_block> block_link; 59 60 struct cached_block { 61 cached_block* next; // next in hash 62 cached_block* transaction_next; 63 block_link link; 64 off_t block_number; 65 void* current_data; 66 // The data that is seen by everyone using the API; this one is always 67 // present. 68 void* original_data; 69 // When in a transaction, this contains the original data from before 70 // the transaction. 71 void* parent_data; 72 // This is a lazily alloced buffer that represents the contents of the 73 // block in the parent transaction. It may point to current_data if the 74 // contents have been changed only in the parent transaction, or, if the 75 // block has been changed in the current sub transaction already, to a 76 // new block containing the contents changed in the parent transaction. 77 // If this is NULL, the block has not been changed in the parent 78 // transaction at all. 79 #if BLOCK_CACHE_DEBUG_CHANGED 80 void* compare; 81 #endif 82 int32 ref_count; 83 int32 last_accessed; 84 bool busy_reading : 1; 85 bool busy_writing : 1; 86 bool is_writing : 1; 87 // Block has been checked out for writing without transactions, and 88 // cannot be written back if set 89 bool is_dirty : 1; 90 bool unused : 1; 91 bool discard : 1; 92 bool busy_reading_waiters : 1; 93 bool busy_writing_waiters : 1; 94 cache_transaction* transaction; 95 // This is the current active transaction, if any, the block is 96 // currently in (meaning was changed as a part of it). 97 cache_transaction* previous_transaction; 98 // This is set to the last transaction that was ended containing this 99 // block. In this case, the block has not yet written back yet, and 100 // the changed data is either in current_data, or original_data -- the 101 // latter if the block is already being part of another transaction. 102 // There can only be one previous transaction, so when the active 103 // transaction ends, the changes of the previous transaction have to 104 // be written back before that transaction becomes the next previous 105 // transaction. 106 107 bool CanBeWritten() const; 108 int32 LastAccess() const 109 { return system_time() / 1000000L - last_accessed; } 110 }; 111 112 typedef DoublyLinkedList<cached_block, 113 DoublyLinkedListMemberGetLink<cached_block, 114 &cached_block::link> > block_list; 115 116 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> { 117 static inline void* operator new(size_t size); 118 static inline void operator delete(void* block); 119 120 int32 transaction_id; 121 int32 events_pending; 122 int32 events; 123 transaction_notification_hook hook; 124 void* data; 125 bool delete_after_event; 126 }; 127 128 typedef DoublyLinkedList<cache_notification> NotificationList; 129 130 static object_cache* sCacheNotificationCache; 131 132 struct cache_listener; 133 typedef DoublyLinkedListLink<cache_listener> listener_link; 134 135 struct cache_listener : cache_notification { 136 listener_link link; 137 }; 138 139 typedef DoublyLinkedList<cache_listener, 140 DoublyLinkedListMemberGetLink<cache_listener, 141 &cache_listener::link> > ListenerList; 142 143 void* 144 cache_notification::operator new(size_t size) 145 { 146 // We can't really know whether something is a cache_notification or a 147 // cache_listener at runtime, so we just use one object_cache for both 148 // with the size set to that of the (slightly larger) cache_listener. 149 // In practice, the vast majority of cache_notifications are really 150 // cache_listeners, so this is a more than acceptable trade-off. 151 ASSERT(size <= sizeof(cache_listener)); 152 return object_cache_alloc(sCacheNotificationCache, 0); 153 } 154 155 void 156 cache_notification::operator delete(void* block) 157 { 158 object_cache_free(sCacheNotificationCache, block, 0); 159 } 160 161 162 struct BlockHash { 163 typedef off_t KeyType; 164 typedef cached_block ValueType; 165 166 size_t HashKey(KeyType key) const 167 { 168 return key; 169 } 170 171 size_t Hash(ValueType* block) const 172 { 173 return block->block_number; 174 } 175 176 bool Compare(KeyType key, ValueType* block) const 177 { 178 return block->block_number == key; 179 } 180 181 ValueType*& GetLink(ValueType* value) const 182 { 183 return value->next; 184 } 185 }; 186 187 typedef BOpenHashTable<BlockHash> BlockTable; 188 189 190 struct TransactionHash { 191 typedef int32 KeyType; 192 typedef cache_transaction ValueType; 193 194 size_t HashKey(KeyType key) const 195 { 196 return key; 197 } 198 199 size_t Hash(ValueType* transaction) const; 200 bool Compare(KeyType key, ValueType* transaction) const; 201 ValueType*& GetLink(ValueType* value) const; 202 }; 203 204 typedef BOpenHashTable<TransactionHash> TransactionTable; 205 206 207 struct block_cache : DoublyLinkedListLinkImpl<block_cache> { 208 BlockTable* hash; 209 mutex lock; 210 int fd; 211 off_t max_blocks; 212 size_t block_size; 213 int32 next_transaction_id; 214 cache_transaction* last_transaction; 215 TransactionTable* transaction_hash; 216 217 object_cache* buffer_cache; 218 block_list unused_blocks; 219 uint32 unused_block_count; 220 221 ConditionVariable busy_reading_condition; 222 uint32 busy_reading_count; 223 bool busy_reading_waiters; 224 225 ConditionVariable busy_writing_condition; 226 uint32 busy_writing_count; 227 bool busy_writing_waiters; 228 229 bigtime_t last_block_write; 230 bigtime_t last_block_write_duration; 231 232 uint32 num_dirty_blocks; 233 bool read_only; 234 235 NotificationList pending_notifications; 236 ConditionVariable condition_variable; 237 238 block_cache(int fd, off_t numBlocks, size_t blockSize, 239 bool readOnly); 240 ~block_cache(); 241 242 status_t Init(); 243 244 void Free(void* buffer); 245 void* Allocate(); 246 void FreeBlock(cached_block* block); 247 cached_block* NewBlock(off_t blockNumber); 248 void FreeBlockParentData(cached_block* block); 249 250 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0); 251 void RemoveBlock(cached_block* block); 252 void DiscardBlock(cached_block* block); 253 254 private: 255 static void _LowMemoryHandler(void* data, uint32 resources, 256 int32 level); 257 cached_block* _GetUnusedBlock(); 258 }; 259 260 struct cache_transaction { 261 cache_transaction(); 262 263 cache_transaction* next; 264 int32 id; 265 int32 num_blocks; 266 int32 main_num_blocks; 267 int32 sub_num_blocks; 268 cached_block* first_block; 269 block_list blocks; 270 ListenerList listeners; 271 bool open; 272 bool has_sub_transaction; 273 bigtime_t last_used; 274 int32 busy_writing_count; 275 }; 276 277 278 class BlockWriter { 279 public: 280 BlockWriter(block_cache* cache, 281 size_t max = SIZE_MAX); 282 ~BlockWriter(); 283 284 bool Add(cached_block* block, 285 cache_transaction* transaction = NULL); 286 bool Add(cache_transaction* transaction, 287 bool& hasLeftOvers); 288 289 status_t Write(cache_transaction* transaction = NULL, 290 bool canUnlock = true); 291 292 bool DeletedTransaction() const 293 { return fDeletedTransaction; } 294 295 static status_t WriteBlock(block_cache* cache, 296 cached_block* block); 297 298 private: 299 void* _Data(cached_block* block) const; 300 status_t _WriteBlock(cached_block* block); 301 void _BlockDone(cached_block* block, 302 cache_transaction* transaction); 303 void _UnmarkWriting(cached_block* block); 304 305 static int _CompareBlocks(const void* _blockA, 306 const void* _blockB); 307 308 private: 309 static const size_t kBufferSize = 64; 310 311 block_cache* fCache; 312 cached_block* fBuffer[kBufferSize]; 313 cached_block** fBlocks; 314 size_t fCount; 315 size_t fTotal; 316 size_t fCapacity; 317 size_t fMax; 318 status_t fStatus; 319 bool fDeletedTransaction; 320 }; 321 322 323 class TransactionLocking { 324 public: 325 inline bool Lock(block_cache* cache) 326 { 327 mutex_lock(&cache->lock); 328 329 while (cache->busy_writing_count != 0) { 330 // wait for all blocks to be written 331 ConditionVariableEntry entry; 332 cache->busy_writing_condition.Add(&entry); 333 cache->busy_writing_waiters = true; 334 335 mutex_unlock(&cache->lock); 336 337 entry.Wait(); 338 339 mutex_lock(&cache->lock); 340 } 341 342 return true; 343 } 344 345 inline void Unlock(block_cache* cache) 346 { 347 mutex_unlock(&cache->lock); 348 } 349 }; 350 351 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker; 352 353 } // namespace 354 355 356 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 357 namespace BlockTracing { 358 359 class Action : public AbstractTraceEntry { 360 public: 361 Action(block_cache* cache, cached_block* block) 362 : 363 fCache(cache), 364 fBlockNumber(block->block_number), 365 fIsDirty(block->is_dirty), 366 fHasOriginal(block->original_data != NULL), 367 fHasParent(block->parent_data != NULL), 368 fTransactionID(-1), 369 fPreviousID(-1) 370 { 371 if (block->transaction != NULL) 372 fTransactionID = block->transaction->id; 373 if (block->previous_transaction != NULL) 374 fPreviousID = block->previous_transaction->id; 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32 380 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber, 381 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-', 382 fHasParent ? 'p' : '-', fTransactionID, fPreviousID); 383 } 384 385 virtual const char* _Action() const = 0; 386 387 private: 388 block_cache* fCache; 389 uint64 fBlockNumber; 390 bool fIsDirty; 391 bool fHasOriginal; 392 bool fHasParent; 393 int32 fTransactionID; 394 int32 fPreviousID; 395 }; 396 397 class Get : public Action { 398 public: 399 Get(block_cache* cache, cached_block* block) 400 : 401 Action(cache, block) 402 { 403 Initialized(); 404 } 405 406 virtual const char* _Action() const { return "get"; } 407 }; 408 409 class Put : public Action { 410 public: 411 Put(block_cache* cache, cached_block* block) 412 : 413 Action(cache, block) 414 { 415 Initialized(); 416 } 417 418 virtual const char* _Action() const { return "put"; } 419 }; 420 421 class Read : public Action { 422 public: 423 Read(block_cache* cache, cached_block* block) 424 : 425 Action(cache, block) 426 { 427 Initialized(); 428 } 429 430 virtual const char* _Action() const { return "read"; } 431 }; 432 433 class Write : public Action { 434 public: 435 Write(block_cache* cache, cached_block* block) 436 : 437 Action(cache, block) 438 { 439 Initialized(); 440 } 441 442 virtual const char* _Action() const { return "write"; } 443 }; 444 445 class Flush : public Action { 446 public: 447 Flush(block_cache* cache, cached_block* block, bool getUnused = false) 448 : 449 Action(cache, block), 450 fGetUnused(getUnused) 451 { 452 Initialized(); 453 } 454 455 virtual const char* _Action() const 456 { return fGetUnused ? "get-unused" : "flush"; } 457 458 private: 459 bool fGetUnused; 460 }; 461 462 class Error : public AbstractTraceEntry { 463 public: 464 Error(block_cache* cache, uint64 blockNumber, const char* message, 465 status_t status = B_OK) 466 : 467 fCache(cache), 468 fBlockNumber(blockNumber), 469 fMessage(message), 470 fStatus(status) 471 { 472 Initialized(); 473 } 474 475 virtual void AddDump(TraceOutput& out) 476 { 477 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s", 478 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "", 479 fStatus != B_OK ? strerror(fStatus) : ""); 480 } 481 482 private: 483 block_cache* fCache; 484 uint64 fBlockNumber; 485 const char* fMessage; 486 status_t fStatus; 487 }; 488 489 #if BLOCK_CACHE_BLOCK_TRACING >= 2 490 class BlockData : public AbstractTraceEntry { 491 public: 492 enum { 493 kCurrent = 0x01, 494 kParent = 0x02, 495 kOriginal = 0x04 496 }; 497 498 BlockData(block_cache* cache, cached_block* block, const char* message) 499 : 500 fCache(cache), 501 fSize(cache->block_size), 502 fBlockNumber(block->block_number), 503 fMessage(message) 504 { 505 _Allocate(fCurrent, block->current_data); 506 _Allocate(fParent, block->parent_data); 507 _Allocate(fOriginal, block->original_data); 508 509 #if KTRACE_PRINTF_STACK_TRACE 510 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 511 false); 512 #endif 513 514 Initialized(); 515 } 516 517 virtual void AddDump(TraceOutput& out) 518 { 519 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s", 520 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-', 521 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-', 522 fMessage); 523 } 524 525 #if KTRACE_PRINTF_STACK_TRACE 526 virtual void DumpStackTrace(TraceOutput& out) 527 { 528 out.PrintStackTrace(fStackTrace); 529 } 530 #endif 531 532 void DumpBlocks(uint32 which, uint32 offset, uint32 size) 533 { 534 if ((which & kCurrent) != 0) 535 DumpBlock(kCurrent, offset, size); 536 if ((which & kParent) != 0) 537 DumpBlock(kParent, offset, size); 538 if ((which & kOriginal) != 0) 539 DumpBlock(kOriginal, offset, size); 540 } 541 542 void DumpBlock(uint32 which, uint32 offset, uint32 size) 543 { 544 if (offset > fSize) { 545 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize); 546 return; 547 } 548 if (offset + size > fSize) 549 size = fSize - offset; 550 551 const char* label; 552 uint8* data; 553 554 if ((which & kCurrent) != 0) { 555 label = "current"; 556 data = fCurrent; 557 } else if ((which & kParent) != 0) { 558 label = "parent"; 559 data = fParent; 560 } else if ((which & kOriginal) != 0) { 561 label = "original"; 562 data = fOriginal; 563 } else 564 return; 565 566 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size); 567 568 static const uint32 kBlockSize = 16; 569 data += offset; 570 571 for (uint32 i = 0; i < size;) { 572 int start = i; 573 574 kprintf(" %04" B_PRIx32 " ", i); 575 for (; i < start + kBlockSize; i++) { 576 if (!(i % 4)) 577 kprintf(" "); 578 579 if (i >= size) 580 kprintf(" "); 581 else 582 kprintf("%02x", data[i]); 583 } 584 585 kprintf("\n"); 586 } 587 } 588 589 private: 590 void _Allocate(uint8*& target, void* source) 591 { 592 if (source == NULL) { 593 target = NULL; 594 return; 595 } 596 597 target = alloc_tracing_buffer_memcpy(source, fSize, false); 598 } 599 600 block_cache* fCache; 601 uint32 fSize; 602 uint64 fBlockNumber; 603 const char* fMessage; 604 uint8* fCurrent; 605 uint8* fParent; 606 uint8* fOriginal; 607 #if KTRACE_PRINTF_STACK_TRACE 608 tracing_stack_trace* fStackTrace; 609 #endif 610 }; 611 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 612 613 } // namespace BlockTracing 614 615 # define TB(x) new(std::nothrow) BlockTracing::x; 616 #else 617 # define TB(x) ; 618 #endif 619 620 #if BLOCK_CACHE_BLOCK_TRACING >= 2 621 # define TB2(x) new(std::nothrow) BlockTracing::x; 622 #else 623 # define TB2(x) ; 624 #endif 625 626 627 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER) 628 namespace TransactionTracing { 629 630 class Action : public AbstractTraceEntry { 631 public: 632 Action(const char* label, block_cache* cache, 633 cache_transaction* transaction) 634 : 635 fCache(cache), 636 fTransaction(transaction), 637 fID(transaction->id), 638 fSub(transaction->has_sub_transaction), 639 fNumBlocks(transaction->num_blocks), 640 fSubNumBlocks(transaction->sub_num_blocks) 641 { 642 strlcpy(fLabel, label, sizeof(fLabel)); 643 Initialized(); 644 } 645 646 virtual void AddDump(TraceOutput& out) 647 { 648 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s" 649 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction, 650 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks); 651 } 652 653 private: 654 char fLabel[12]; 655 block_cache* fCache; 656 cache_transaction* fTransaction; 657 int32 fID; 658 bool fSub; 659 int32 fNumBlocks; 660 int32 fSubNumBlocks; 661 }; 662 663 class Detach : public AbstractTraceEntry { 664 public: 665 Detach(block_cache* cache, cache_transaction* transaction, 666 cache_transaction* newTransaction) 667 : 668 fCache(cache), 669 fTransaction(transaction), 670 fID(transaction->id), 671 fSub(transaction->has_sub_transaction), 672 fNewTransaction(newTransaction), 673 fNewID(newTransaction->id) 674 { 675 Initialized(); 676 } 677 678 virtual void AddDump(TraceOutput& out) 679 { 680 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")" 681 "from transaction %p (id %" B_PRId32 ")%s", 682 fCache, fNewTransaction, fNewID, fTransaction, fID, 683 fSub ? " sub" : ""); 684 } 685 686 private: 687 block_cache* fCache; 688 cache_transaction* fTransaction; 689 int32 fID; 690 bool fSub; 691 cache_transaction* fNewTransaction; 692 int32 fNewID; 693 }; 694 695 class Abort : public AbstractTraceEntry { 696 public: 697 Abort(block_cache* cache, cache_transaction* transaction) 698 : 699 fCache(cache), 700 fTransaction(transaction), 701 fID(transaction->id), 702 fNumBlocks(0) 703 { 704 bool isSub = transaction->has_sub_transaction; 705 fNumBlocks = isSub ? transaction->sub_num_blocks 706 : transaction->num_blocks; 707 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t)); 708 if (fBlocks != NULL) { 709 cached_block* block = transaction->first_block; 710 for (int32 i = 0; block != NULL && i < fNumBlocks; 711 block = block->transaction_next) { 712 fBlocks[i++] = block->block_number; 713 } 714 } else 715 fNumBlocks = 0; 716 717 #if KTRACE_PRINTF_STACK_TRACE 718 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1, 719 false); 720 #endif 721 722 Initialized(); 723 } 724 725 virtual void AddDump(TraceOutput& out) 726 { 727 out.Print("block cache %p, abort transaction " 728 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID); 729 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++) 730 out.Print(" %" B_PRIdOFF, fBlocks[i]); 731 } 732 733 #if KTRACE_PRINTF_STACK_TRACE 734 virtual void DumpStackTrace(TraceOutput& out) 735 { 736 out.PrintStackTrace(fStackTrace); 737 } 738 #endif 739 740 private: 741 block_cache* fCache; 742 cache_transaction* fTransaction; 743 int32 fID; 744 off_t* fBlocks; 745 int32 fNumBlocks; 746 #if KTRACE_PRINTF_STACK_TRACE 747 tracing_stack_trace* fStackTrace; 748 #endif 749 }; 750 751 } // namespace TransactionTracing 752 753 # define T(x) new(std::nothrow) TransactionTracing::x; 754 #else 755 # define T(x) ; 756 #endif 757 758 759 static DoublyLinkedList<block_cache> sCaches; 760 static mutex sCachesLock = MUTEX_INITIALIZER("block caches"); 761 static mutex sCachesMemoryUseLock 762 = MUTEX_INITIALIZER("block caches memory use"); 763 static size_t sUsedMemory; 764 static sem_id sEventSemaphore; 765 static mutex sNotificationsLock 766 = MUTEX_INITIALIZER("block cache notifications"); 767 static thread_id sNotifierWriterThread; 768 static DoublyLinkedListLink<block_cache> sMarkCache; 769 // TODO: this only works if the link is the first entry of block_cache 770 static object_cache* sBlockCache; 771 772 773 // #pragma mark - notifications/listener 774 775 776 /*! Checks whether or not this is an event that closes a transaction. */ 777 static inline bool 778 is_closing_event(int32 event) 779 { 780 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0; 781 } 782 783 784 static inline bool 785 is_written_event(int32 event) 786 { 787 return (event & TRANSACTION_WRITTEN) != 0; 788 } 789 790 791 /*! From the specified \a notification, it will remove the lowest pending 792 event, and return that one in \a _event. 793 If there is no pending event anymore, it will return \c false. 794 */ 795 static bool 796 get_next_pending_event(cache_notification* notification, int32* _event) 797 { 798 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) { 799 int32 pending = atomic_and(¬ification->events_pending, 800 ~eventMask); 801 802 bool more = (pending & ~eventMask) != 0; 803 804 if ((pending & eventMask) != 0) { 805 *_event = eventMask; 806 return more; 807 } 808 } 809 810 return false; 811 } 812 813 814 static void 815 flush_pending_notifications(block_cache* cache) 816 { 817 ASSERT_LOCKED_MUTEX(&sCachesLock); 818 819 while (true) { 820 MutexLocker locker(sNotificationsLock); 821 822 cache_notification* notification = cache->pending_notifications.Head(); 823 if (notification == NULL) 824 return; 825 826 bool deleteAfterEvent = false; 827 int32 event = -1; 828 if (!get_next_pending_event(notification, &event)) { 829 // remove the notification if this was the last pending event 830 cache->pending_notifications.Remove(notification); 831 deleteAfterEvent = notification->delete_after_event; 832 } 833 834 if (event >= 0) { 835 // Notify listener, we need to copy the notification, as it might 836 // be removed when we unlock the list. 837 cache_notification copy = *notification; 838 locker.Unlock(); 839 840 copy.hook(copy.transaction_id, event, copy.data); 841 842 locker.Lock(); 843 } 844 845 if (deleteAfterEvent) 846 delete notification; 847 } 848 } 849 850 851 /*! Flushes all pending notifications by calling the appropriate hook 852 functions. 853 Must not be called with a cache lock held. 854 */ 855 static void 856 flush_pending_notifications() 857 { 858 MutexLocker _(sCachesLock); 859 860 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 861 while (iterator.HasNext()) { 862 block_cache* cache = iterator.Next(); 863 864 flush_pending_notifications(cache); 865 } 866 } 867 868 869 /*! Initializes the \a notification as specified. */ 870 static void 871 set_notification(cache_transaction* transaction, 872 cache_notification ¬ification, int32 events, 873 transaction_notification_hook hook, void* data) 874 { 875 notification.transaction_id = transaction != NULL ? transaction->id : -1; 876 notification.events_pending = 0; 877 notification.events = events; 878 notification.hook = hook; 879 notification.data = data; 880 notification.delete_after_event = false; 881 } 882 883 884 /*! Makes sure the notification is deleted. It either deletes it directly, 885 when possible, or marks it for deletion if the notification is pending. 886 */ 887 static void 888 delete_notification(cache_notification* notification) 889 { 890 MutexLocker locker(sNotificationsLock); 891 892 if (notification->events_pending != 0) 893 notification->delete_after_event = true; 894 else 895 delete notification; 896 } 897 898 899 /*! Adds the notification to the pending notifications list, or, if it's 900 already part of it, updates its events_pending field. 901 Also marks the notification to be deleted if \a deleteNotification 902 is \c true. 903 Triggers the notifier thread to run. 904 */ 905 static void 906 add_notification(block_cache* cache, cache_notification* notification, 907 int32 event, bool deleteNotification) 908 { 909 if (notification->hook == NULL) 910 return; 911 912 int32 pending = atomic_or(¬ification->events_pending, event); 913 if (pending == 0) { 914 // not yet part of the notification list 915 MutexLocker locker(sNotificationsLock); 916 if (deleteNotification) 917 notification->delete_after_event = true; 918 cache->pending_notifications.Add(notification); 919 } else if (deleteNotification) { 920 // we might need to delete it ourselves if we're late 921 delete_notification(notification); 922 } 923 924 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE); 925 // We're probably still holding some locks that makes rescheduling 926 // not a good idea at this point. 927 } 928 929 930 /*! Notifies all interested listeners of this transaction about the \a event. 931 If \a event is a closing event (ie. TRANSACTION_ENDED, and 932 TRANSACTION_ABORTED), all listeners except those listening to 933 TRANSACTION_WRITTEN will be removed. 934 */ 935 static void 936 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction, 937 int32 event) 938 { 939 T(Action("notify", cache, transaction)); 940 941 bool isClosing = is_closing_event(event); 942 bool isWritten = is_written_event(event); 943 944 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 945 while (iterator.HasNext()) { 946 cache_listener* listener = iterator.Next(); 947 948 bool remove = (isClosing && !is_written_event(listener->events)) 949 || (isWritten && is_written_event(listener->events)); 950 if (remove) 951 iterator.Remove(); 952 953 if ((listener->events & event) != 0) 954 add_notification(cache, listener, event, remove); 955 else if (remove) 956 delete_notification(listener); 957 } 958 } 959 960 961 /*! Removes and deletes all listeners that are still monitoring this 962 transaction. 963 */ 964 static void 965 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction) 966 { 967 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 968 while (iterator.HasNext()) { 969 cache_listener* listener = iterator.Next(); 970 iterator.Remove(); 971 972 delete_notification(listener); 973 } 974 } 975 976 977 static status_t 978 add_transaction_listener(block_cache* cache, cache_transaction* transaction, 979 int32 events, transaction_notification_hook hookFunction, void* data) 980 { 981 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 982 while (iterator.HasNext()) { 983 cache_listener* listener = iterator.Next(); 984 985 if (listener->data == data && listener->hook == hookFunction) { 986 // this listener already exists, just update it 987 listener->events |= events; 988 return B_OK; 989 } 990 } 991 992 cache_listener* listener = new cache_listener; 993 if (listener == NULL) 994 return B_NO_MEMORY; 995 996 set_notification(transaction, *listener, events, hookFunction, data); 997 transaction->listeners.Add(listener); 998 return B_OK; 999 } 1000 1001 1002 // #pragma mark - private transaction 1003 1004 1005 cache_transaction::cache_transaction() 1006 { 1007 num_blocks = 0; 1008 main_num_blocks = 0; 1009 sub_num_blocks = 0; 1010 first_block = NULL; 1011 open = true; 1012 has_sub_transaction = false; 1013 last_used = system_time(); 1014 busy_writing_count = 0; 1015 } 1016 1017 1018 static void 1019 delete_transaction(block_cache* cache, cache_transaction* transaction) 1020 { 1021 if (cache->last_transaction == transaction) 1022 cache->last_transaction = NULL; 1023 1024 remove_transaction_listeners(cache, transaction); 1025 delete transaction; 1026 } 1027 1028 1029 static cache_transaction* 1030 lookup_transaction(block_cache* cache, int32 id) 1031 { 1032 return cache->transaction_hash->Lookup(id); 1033 } 1034 1035 1036 size_t TransactionHash::Hash(cache_transaction* transaction) const 1037 { 1038 return transaction->id; 1039 } 1040 1041 1042 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const 1043 { 1044 return transaction->id == key; 1045 } 1046 1047 1048 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const 1049 { 1050 return value->next; 1051 } 1052 1053 1054 /*! Writes back any changes made to blocks in \a transaction that are still 1055 part of a previous transacton. 1056 */ 1057 static status_t 1058 write_blocks_in_previous_transaction(block_cache* cache, 1059 cache_transaction* transaction) 1060 { 1061 BlockWriter writer(cache); 1062 1063 cached_block* block = transaction->first_block; 1064 for (; block != NULL; block = block->transaction_next) { 1065 if (block->previous_transaction != NULL) { 1066 // need to write back pending changes 1067 writer.Add(block); 1068 } 1069 } 1070 1071 return writer.Write(); 1072 } 1073 1074 1075 // #pragma mark - cached_block 1076 1077 1078 bool 1079 cached_block::CanBeWritten() const 1080 { 1081 return !busy_writing && !busy_reading 1082 && (previous_transaction != NULL 1083 || (transaction == NULL && is_dirty && !is_writing)); 1084 } 1085 1086 1087 // #pragma mark - BlockWriter 1088 1089 1090 BlockWriter::BlockWriter(block_cache* cache, size_t max) 1091 : 1092 fCache(cache), 1093 fBlocks(fBuffer), 1094 fCount(0), 1095 fTotal(0), 1096 fCapacity(kBufferSize), 1097 fMax(max), 1098 fStatus(B_OK), 1099 fDeletedTransaction(false) 1100 { 1101 } 1102 1103 1104 BlockWriter::~BlockWriter() 1105 { 1106 if (fBlocks != fBuffer) 1107 free(fBlocks); 1108 } 1109 1110 1111 /*! Adds the specified block to the to be written array. If no more blocks can 1112 be added, false is returned, otherwise true. 1113 */ 1114 bool 1115 BlockWriter::Add(cached_block* block, cache_transaction* transaction) 1116 { 1117 ASSERT(block->CanBeWritten()); 1118 1119 if (fTotal == fMax) 1120 return false; 1121 1122 if (fCount >= fCapacity) { 1123 // Enlarge array if necessary 1124 cached_block** newBlocks; 1125 size_t newCapacity = max_c(256, fCapacity * 2); 1126 if (fBlocks == fBuffer) 1127 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*)); 1128 else { 1129 newBlocks = (cached_block**)realloc(fBlocks, 1130 newCapacity * sizeof(void*)); 1131 } 1132 1133 if (newBlocks == NULL) { 1134 // Allocating a larger array failed - we need to write back what 1135 // we have synchronously now (this will also clear the array) 1136 Write(transaction, false); 1137 } else { 1138 if (fBlocks == fBuffer) 1139 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*)); 1140 1141 fBlocks = newBlocks; 1142 fCapacity = newCapacity; 1143 } 1144 } 1145 1146 fBlocks[fCount++] = block; 1147 fTotal++; 1148 block->busy_writing = true; 1149 fCache->busy_writing_count++; 1150 if (block->previous_transaction != NULL) 1151 block->previous_transaction->busy_writing_count++; 1152 1153 return true; 1154 } 1155 1156 1157 /*! Adds all blocks of the specified transaction to the to be written array. 1158 If no more blocks can be added, false is returned, otherwise true. 1159 */ 1160 bool 1161 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers) 1162 { 1163 ASSERT(!transaction->open); 1164 1165 if (transaction->busy_writing_count != 0) { 1166 hasLeftOvers = true; 1167 return true; 1168 } 1169 1170 hasLeftOvers = false; 1171 1172 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 1173 while (cached_block* block = blockIterator.Next()) { 1174 if (!block->CanBeWritten()) { 1175 // This block was already part of a previous transaction within this 1176 // writer 1177 hasLeftOvers = true; 1178 continue; 1179 } 1180 if (!Add(block, transaction)) 1181 return false; 1182 1183 if (DeletedTransaction()) 1184 break; 1185 } 1186 1187 return true; 1188 } 1189 1190 1191 /*! Cache must be locked when calling this method, but it will be unlocked 1192 while the blocks are written back. 1193 */ 1194 status_t 1195 BlockWriter::Write(cache_transaction* transaction, bool canUnlock) 1196 { 1197 if (fCount == 0) 1198 return B_OK; 1199 1200 if (canUnlock) 1201 mutex_unlock(&fCache->lock); 1202 1203 // Sort blocks in their on-disk order 1204 // TODO: ideally, this should be handled by the I/O scheduler 1205 1206 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks); 1207 fDeletedTransaction = false; 1208 1209 bigtime_t start = system_time(); 1210 1211 for (uint32 i = 0; i < fCount; i++) { 1212 status_t status = _WriteBlock(fBlocks[i]); 1213 if (status != B_OK) { 1214 // propagate to global error handling 1215 if (fStatus == B_OK) 1216 fStatus = status; 1217 1218 _UnmarkWriting(fBlocks[i]); 1219 fBlocks[i] = NULL; 1220 // This block will not be marked clean 1221 } 1222 } 1223 1224 bigtime_t finish = system_time(); 1225 1226 if (canUnlock) 1227 mutex_lock(&fCache->lock); 1228 1229 if (fStatus == B_OK && fCount >= 8) { 1230 fCache->last_block_write = finish; 1231 fCache->last_block_write_duration = (fCache->last_block_write - start) 1232 / fCount; 1233 } 1234 1235 for (uint32 i = 0; i < fCount; i++) 1236 _BlockDone(fBlocks[i], transaction); 1237 1238 fCount = 0; 1239 return fStatus; 1240 } 1241 1242 1243 /*! Writes the specified \a block back to disk. It will always only write back 1244 the oldest change of the block if it is part of more than one transaction. 1245 It will automatically send out TRANSACTION_WRITTEN notices, as well as 1246 delete transactions when they are no longer used, and \a deleteTransaction 1247 is \c true. 1248 */ 1249 /*static*/ status_t 1250 BlockWriter::WriteBlock(block_cache* cache, cached_block* block) 1251 { 1252 BlockWriter writer(cache); 1253 1254 writer.Add(block); 1255 return writer.Write(); 1256 } 1257 1258 1259 void* 1260 BlockWriter::_Data(cached_block* block) const 1261 { 1262 return block->previous_transaction != NULL && block->original_data != NULL 1263 ? block->original_data : block->current_data; 1264 // We first need to write back changes from previous transactions 1265 } 1266 1267 1268 status_t 1269 BlockWriter::_WriteBlock(cached_block* block) 1270 { 1271 ASSERT(block->busy_writing); 1272 1273 TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number)); 1274 TB(Write(fCache, block)); 1275 TB2(BlockData(fCache, block, "before write")); 1276 1277 size_t blockSize = fCache->block_size; 1278 1279 ssize_t written = write_pos(fCache->fd, 1280 block->block_number * blockSize, _Data(block), blockSize); 1281 1282 if (written != (ssize_t)blockSize) { 1283 TB(Error(fCache, block->block_number, "write failed", written)); 1284 TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number, 1285 strerror(errno))); 1286 if (written < 0) 1287 return errno; 1288 1289 return B_IO_ERROR; 1290 } 1291 1292 return B_OK; 1293 } 1294 1295 1296 void 1297 BlockWriter::_BlockDone(cached_block* block, 1298 cache_transaction* transaction) 1299 { 1300 if (block == NULL) { 1301 // An error occured when trying to write this block 1302 return; 1303 } 1304 1305 if (fCache->num_dirty_blocks > 0) 1306 fCache->num_dirty_blocks--; 1307 1308 if (_Data(block) == block->current_data) 1309 block->is_dirty = false; 1310 1311 _UnmarkWriting(block); 1312 1313 cache_transaction* previous = block->previous_transaction; 1314 if (previous != NULL) { 1315 previous->blocks.Remove(block); 1316 block->previous_transaction = NULL; 1317 1318 if (block->original_data != NULL && block->transaction == NULL) { 1319 // This block is not part of a transaction, so it does not need 1320 // its original pointer anymore. 1321 fCache->Free(block->original_data); 1322 block->original_data = NULL; 1323 } 1324 1325 // Has the previous transaction been finished with that write? 1326 if (--previous->num_blocks == 0) { 1327 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id)); 1328 T(Action("written", fCache, previous)); 1329 1330 notify_transaction_listeners(fCache, previous, 1331 TRANSACTION_WRITTEN); 1332 1333 if (transaction != NULL) { 1334 // This function is called while iterating transaction_hash. We 1335 // use RemoveUnchecked so the iterator is still valid. A regular 1336 // Remove can trigger a resize of the hash table which would 1337 // result in the linked items in the table changing order. 1338 fCache->transaction_hash->RemoveUnchecked(transaction); 1339 } else 1340 fCache->transaction_hash->Remove(previous); 1341 1342 delete_transaction(fCache, previous); 1343 fDeletedTransaction = true; 1344 } 1345 } 1346 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) { 1347 // the block is no longer used 1348 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1349 block->unused = true; 1350 fCache->unused_blocks.Add(block); 1351 fCache->unused_block_count++; 1352 } 1353 1354 TB2(BlockData(fCache, block, "after write")); 1355 } 1356 1357 1358 void 1359 BlockWriter::_UnmarkWriting(cached_block* block) 1360 { 1361 block->busy_writing = false; 1362 if (block->previous_transaction != NULL) 1363 block->previous_transaction->busy_writing_count--; 1364 fCache->busy_writing_count--; 1365 1366 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0) 1367 || block->busy_writing_waiters) { 1368 fCache->busy_writing_waiters = false; 1369 block->busy_writing_waiters = false; 1370 fCache->busy_writing_condition.NotifyAll(); 1371 } 1372 } 1373 1374 1375 /*static*/ int 1376 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB) 1377 { 1378 cached_block* blockA = *(cached_block**)_blockA; 1379 cached_block* blockB = *(cached_block**)_blockB; 1380 1381 off_t diff = blockA->block_number - blockB->block_number; 1382 if (diff > 0) 1383 return 1; 1384 1385 return diff < 0 ? -1 : 0; 1386 } 1387 1388 1389 // #pragma mark - block_cache 1390 1391 1392 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize, 1393 bool readOnly) 1394 : 1395 hash(NULL), 1396 fd(_fd), 1397 max_blocks(numBlocks), 1398 block_size(blockSize), 1399 next_transaction_id(1), 1400 last_transaction(NULL), 1401 transaction_hash(NULL), 1402 buffer_cache(NULL), 1403 unused_block_count(0), 1404 busy_reading_count(0), 1405 busy_reading_waiters(false), 1406 busy_writing_count(0), 1407 busy_writing_waiters(0), 1408 last_block_write(0), 1409 last_block_write_duration(0), 1410 num_dirty_blocks(0), 1411 read_only(readOnly) 1412 { 1413 } 1414 1415 1416 /*! Should be called with the cache's lock held. */ 1417 block_cache::~block_cache() 1418 { 1419 unregister_low_resource_handler(&_LowMemoryHandler, this); 1420 1421 delete transaction_hash; 1422 delete hash; 1423 1424 delete_object_cache(buffer_cache); 1425 1426 mutex_destroy(&lock); 1427 } 1428 1429 1430 status_t 1431 block_cache::Init() 1432 { 1433 busy_reading_condition.Init(this, "cache block busy_reading"); 1434 busy_writing_condition.Init(this, "cache block busy writing"); 1435 condition_variable.Init(this, "cache transaction sync"); 1436 mutex_init(&lock, "block cache"); 1437 1438 buffer_cache = create_object_cache_etc("block cache buffers", block_size, 1439 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 1440 if (buffer_cache == NULL) 1441 return B_NO_MEMORY; 1442 1443 hash = new BlockTable(); 1444 if (hash == NULL || hash->Init(1024) != B_OK) 1445 return B_NO_MEMORY; 1446 1447 transaction_hash = new(std::nothrow) TransactionTable(); 1448 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK) 1449 return B_NO_MEMORY; 1450 1451 return register_low_resource_handler(&_LowMemoryHandler, this, 1452 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1453 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0); 1454 } 1455 1456 1457 void 1458 block_cache::Free(void* buffer) 1459 { 1460 if (buffer != NULL) 1461 object_cache_free(buffer_cache, buffer, 0); 1462 } 1463 1464 1465 void* 1466 block_cache::Allocate() 1467 { 1468 void* block = object_cache_alloc(buffer_cache, 0); 1469 if (block != NULL) 1470 return block; 1471 1472 // recycle existing before allocating a new one 1473 RemoveUnusedBlocks(100); 1474 1475 return object_cache_alloc(buffer_cache, 0); 1476 } 1477 1478 1479 void 1480 block_cache::FreeBlock(cached_block* block) 1481 { 1482 Free(block->current_data); 1483 1484 if (block->original_data != NULL || block->parent_data != NULL) { 1485 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n", 1486 block->block_number, block->original_data, block->parent_data); 1487 } 1488 1489 #if BLOCK_CACHE_DEBUG_CHANGED 1490 Free(block->compare); 1491 #endif 1492 1493 object_cache_free(sBlockCache, block, 0); 1494 } 1495 1496 1497 /*! Allocates a new block for \a blockNumber, ready for use */ 1498 cached_block* 1499 block_cache::NewBlock(off_t blockNumber) 1500 { 1501 cached_block* block = NULL; 1502 1503 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY 1504 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) { 1505 // recycle existing instead of allocating a new one 1506 block = _GetUnusedBlock(); 1507 } 1508 if (block == NULL) { 1509 block = (cached_block*)object_cache_alloc(sBlockCache, 0); 1510 if (block != NULL) { 1511 block->current_data = Allocate(); 1512 if (block->current_data == NULL) { 1513 object_cache_free(sBlockCache, block, 0); 1514 return NULL; 1515 } 1516 } else { 1517 TB(Error(this, blockNumber, "allocation failed")); 1518 dprintf("block allocation failed, unused list is %sempty.\n", 1519 unused_blocks.IsEmpty() ? "" : "not "); 1520 1521 // allocation failed, try to reuse an unused block 1522 block = _GetUnusedBlock(); 1523 if (block == NULL) { 1524 TB(Error(this, blockNumber, "get unused failed")); 1525 FATAL(("could not allocate block!\n")); 1526 return NULL; 1527 } 1528 } 1529 } 1530 1531 block->block_number = blockNumber; 1532 block->ref_count = 0; 1533 block->last_accessed = 0; 1534 block->transaction_next = NULL; 1535 block->transaction = block->previous_transaction = NULL; 1536 block->original_data = NULL; 1537 block->parent_data = NULL; 1538 block->busy_reading = false; 1539 block->busy_writing = false; 1540 block->is_writing = false; 1541 block->is_dirty = false; 1542 block->unused = false; 1543 block->discard = false; 1544 block->busy_reading_waiters = false; 1545 block->busy_writing_waiters = false; 1546 #if BLOCK_CACHE_DEBUG_CHANGED 1547 block->compare = NULL; 1548 #endif 1549 1550 return block; 1551 } 1552 1553 1554 void 1555 block_cache::FreeBlockParentData(cached_block* block) 1556 { 1557 ASSERT(block->parent_data != NULL); 1558 if (block->parent_data != block->current_data) 1559 Free(block->parent_data); 1560 block->parent_data = NULL; 1561 } 1562 1563 1564 void 1565 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld) 1566 { 1567 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count)); 1568 1569 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1570 cached_block* block = iterator.Next();) { 1571 if (minSecondsOld >= block->LastAccess()) { 1572 // The list is sorted by last access 1573 break; 1574 } 1575 if (block->busy_reading || block->busy_writing) 1576 continue; 1577 1578 TB(Flush(this, block)); 1579 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n", 1580 block->block_number, block->last_accessed)); 1581 1582 // this can only happen if no transactions are used 1583 if (block->is_dirty && !block->discard) { 1584 if (block->busy_writing) 1585 continue; 1586 1587 BlockWriter::WriteBlock(this, block); 1588 } 1589 1590 // remove block from lists 1591 iterator.Remove(); 1592 unused_block_count--; 1593 RemoveBlock(block); 1594 1595 if (--count <= 0) 1596 break; 1597 } 1598 } 1599 1600 1601 void 1602 block_cache::RemoveBlock(cached_block* block) 1603 { 1604 hash->Remove(block); 1605 FreeBlock(block); 1606 } 1607 1608 1609 /*! Discards the block from a transaction (this method must not be called 1610 for blocks not part of a transaction). 1611 */ 1612 void 1613 block_cache::DiscardBlock(cached_block* block) 1614 { 1615 ASSERT(block->discard); 1616 ASSERT(block->previous_transaction == NULL); 1617 1618 if (block->parent_data != NULL) 1619 FreeBlockParentData(block); 1620 1621 if (block->original_data != NULL) { 1622 Free(block->original_data); 1623 block->original_data = NULL; 1624 } 1625 1626 RemoveBlock(block); 1627 } 1628 1629 1630 void 1631 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level) 1632 { 1633 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level)); 1634 1635 // free some blocks according to the low memory state 1636 // (if there is enough memory left, we don't free any) 1637 1638 block_cache* cache = (block_cache*)data; 1639 if (cache->unused_block_count <= 1) 1640 return; 1641 1642 int32 free = 0; 1643 int32 secondsOld = 0; 1644 switch (level) { 1645 case B_NO_LOW_RESOURCE: 1646 return; 1647 case B_LOW_RESOURCE_NOTE: 1648 free = cache->unused_block_count / 4; 1649 secondsOld = 120; 1650 break; 1651 case B_LOW_RESOURCE_WARNING: 1652 free = cache->unused_block_count / 2; 1653 secondsOld = 10; 1654 break; 1655 case B_LOW_RESOURCE_CRITICAL: 1656 free = cache->unused_block_count - 1; 1657 secondsOld = 0; 1658 break; 1659 } 1660 1661 MutexLocker locker(&cache->lock); 1662 1663 if (!locker.IsLocked()) { 1664 // If our block_cache were deleted, it could be that we had 1665 // been called before that deletion went through, therefore, 1666 // acquiring its lock might fail. 1667 return; 1668 } 1669 1670 #ifdef TRACE_BLOCK_CACHE 1671 uint32 oldUnused = cache->unused_block_count; 1672 #endif 1673 1674 cache->RemoveUnusedBlocks(free, secondsOld); 1675 1676 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n", 1677 cache, oldUnused, cache->unused_block_count)); 1678 } 1679 1680 1681 cached_block* 1682 block_cache::_GetUnusedBlock() 1683 { 1684 TRACE(("block_cache: get unused block\n")); 1685 1686 for (block_list::Iterator iterator = unused_blocks.GetIterator(); 1687 cached_block* block = iterator.Next();) { 1688 TB(Flush(this, block, true)); 1689 // this can only happen if no transactions are used 1690 if (block->is_dirty && !block->busy_writing && !block->discard) 1691 BlockWriter::WriteBlock(this, block); 1692 1693 // remove block from lists 1694 iterator.Remove(); 1695 unused_block_count--; 1696 hash->Remove(block); 1697 1698 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1699 block->unused = false; 1700 1701 // TODO: see if compare data is handled correctly here! 1702 #if BLOCK_CACHE_DEBUG_CHANGED 1703 if (block->compare != NULL) 1704 Free(block->compare); 1705 #endif 1706 return block; 1707 } 1708 1709 return NULL; 1710 } 1711 1712 1713 // #pragma mark - private block functions 1714 1715 1716 /*! Cache must be locked. 1717 */ 1718 static void 1719 mark_block_busy_reading(block_cache* cache, cached_block* block) 1720 { 1721 block->busy_reading = true; 1722 cache->busy_reading_count++; 1723 } 1724 1725 1726 /*! Cache must be locked. 1727 */ 1728 static void 1729 mark_block_unbusy_reading(block_cache* cache, cached_block* block) 1730 { 1731 block->busy_reading = false; 1732 cache->busy_reading_count--; 1733 1734 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0) 1735 || block->busy_reading_waiters) { 1736 cache->busy_reading_waiters = false; 1737 block->busy_reading_waiters = false; 1738 cache->busy_reading_condition.NotifyAll(); 1739 } 1740 } 1741 1742 1743 /*! Cache must be locked. 1744 */ 1745 static void 1746 wait_for_busy_reading_block(block_cache* cache, cached_block* block) 1747 { 1748 while (block->busy_reading) { 1749 // wait for at least the specified block to be read in 1750 ConditionVariableEntry entry; 1751 cache->busy_reading_condition.Add(&entry); 1752 block->busy_reading_waiters = true; 1753 1754 mutex_unlock(&cache->lock); 1755 1756 entry.Wait(); 1757 1758 mutex_lock(&cache->lock); 1759 } 1760 } 1761 1762 1763 /*! Cache must be locked. 1764 */ 1765 static void 1766 wait_for_busy_reading_blocks(block_cache* cache) 1767 { 1768 while (cache->busy_reading_count != 0) { 1769 // wait for all blocks to be read in 1770 ConditionVariableEntry entry; 1771 cache->busy_reading_condition.Add(&entry); 1772 cache->busy_reading_waiters = true; 1773 1774 mutex_unlock(&cache->lock); 1775 1776 entry.Wait(); 1777 1778 mutex_lock(&cache->lock); 1779 } 1780 } 1781 1782 1783 /*! Cache must be locked. 1784 */ 1785 static void 1786 wait_for_busy_writing_block(block_cache* cache, cached_block* block) 1787 { 1788 while (block->busy_writing) { 1789 // wait for all blocks to be written back 1790 ConditionVariableEntry entry; 1791 cache->busy_writing_condition.Add(&entry); 1792 block->busy_writing_waiters = true; 1793 1794 mutex_unlock(&cache->lock); 1795 1796 entry.Wait(); 1797 1798 mutex_lock(&cache->lock); 1799 } 1800 } 1801 1802 1803 /*! Cache must be locked. 1804 */ 1805 static void 1806 wait_for_busy_writing_blocks(block_cache* cache) 1807 { 1808 while (cache->busy_writing_count != 0) { 1809 // wait for all blocks to be written back 1810 ConditionVariableEntry entry; 1811 cache->busy_writing_condition.Add(&entry); 1812 cache->busy_writing_waiters = true; 1813 1814 mutex_unlock(&cache->lock); 1815 1816 entry.Wait(); 1817 1818 mutex_lock(&cache->lock); 1819 } 1820 } 1821 1822 1823 /*! Removes a reference from the specified \a block. If this was the last 1824 reference, the block is moved into the unused list. 1825 In low memory situations, it will also free some blocks from that list, 1826 but not necessarily the \a block it just released. 1827 */ 1828 static void 1829 put_cached_block(block_cache* cache, cached_block* block) 1830 { 1831 #if BLOCK_CACHE_DEBUG_CHANGED 1832 if (!block->is_dirty && block->compare != NULL 1833 && memcmp(block->current_data, block->compare, cache->block_size)) { 1834 dprintf("new block:\n"); 1835 dump_block((const char*)block->current_data, 256, " "); 1836 dprintf("unchanged block:\n"); 1837 dump_block((const char*)block->compare, 256, " "); 1838 BlockWriter::WriteBlock(cache, block); 1839 panic("block_cache: supposed to be clean block was changed!\n"); 1840 1841 cache->Free(block->compare); 1842 block->compare = NULL; 1843 } 1844 #endif 1845 TB(Put(cache, block)); 1846 1847 if (block->ref_count < 1) { 1848 panic("Invalid ref_count for block %p, cache %p\n", block, cache); 1849 return; 1850 } 1851 1852 if (--block->ref_count == 0 1853 && block->transaction == NULL && block->previous_transaction == NULL) { 1854 // This block is not used anymore, and not part of any transaction 1855 block->is_writing = false; 1856 1857 if (block->discard) { 1858 cache->RemoveBlock(block); 1859 } else { 1860 // put this block in the list of unused blocks 1861 ASSERT(!block->unused); 1862 block->unused = true; 1863 1864 ASSERT(block->original_data == NULL && block->parent_data == NULL); 1865 cache->unused_blocks.Add(block); 1866 cache->unused_block_count++; 1867 } 1868 } 1869 } 1870 1871 1872 static void 1873 put_cached_block(block_cache* cache, off_t blockNumber) 1874 { 1875 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1876 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1877 blockNumber, cache->max_blocks - 1); 1878 } 1879 1880 cached_block* block = cache->hash->Lookup(blockNumber); 1881 if (block != NULL) 1882 put_cached_block(cache, block); 1883 else { 1884 TB(Error(cache, blockNumber, "put unknown")); 1885 } 1886 } 1887 1888 1889 /*! Retrieves the block \a blockNumber from the hash table, if it's already 1890 there, or reads it from the disk. 1891 You need to have the cache locked when calling this function. 1892 1893 \param _allocated tells you whether or not a new block has been allocated 1894 to satisfy your request. 1895 \param readBlock if \c false, the block will not be read in case it was 1896 not already in the cache. The block you retrieve may contain random 1897 data. If \c true, the cache will be temporarily unlocked while the 1898 block is read in. 1899 */ 1900 static cached_block* 1901 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated, 1902 bool readBlock = true) 1903 { 1904 ASSERT_LOCKED_MUTEX(&cache->lock); 1905 1906 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1907 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1908 blockNumber, cache->max_blocks - 1); 1909 return NULL; 1910 } 1911 1912 retry: 1913 cached_block* block = cache->hash->Lookup(blockNumber); 1914 *_allocated = false; 1915 1916 if (block == NULL) { 1917 // put block into cache 1918 block = cache->NewBlock(blockNumber); 1919 if (block == NULL) 1920 return NULL; 1921 1922 cache->hash->Insert(block); 1923 *_allocated = true; 1924 } else if (block->busy_reading) { 1925 // The block is currently busy_reading - wait and try again later 1926 wait_for_busy_reading_block(cache, block); 1927 goto retry; 1928 } 1929 1930 if (block->unused) { 1931 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber)); 1932 block->unused = false; 1933 cache->unused_blocks.Remove(block); 1934 cache->unused_block_count--; 1935 } 1936 1937 if (*_allocated && readBlock) { 1938 // read block into cache 1939 int32 blockSize = cache->block_size; 1940 1941 mark_block_busy_reading(cache, block); 1942 mutex_unlock(&cache->lock); 1943 1944 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize, 1945 block->current_data, blockSize); 1946 1947 mutex_lock(&cache->lock); 1948 if (bytesRead < blockSize) { 1949 cache->RemoveBlock(block); 1950 TB(Error(cache, blockNumber, "read failed", bytesRead)); 1951 1952 TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n", 1953 blockNumber, bytesRead, strerror(errno))); 1954 return NULL; 1955 } 1956 TB(Read(cache, block)); 1957 1958 mark_block_unbusy_reading(cache, block); 1959 } 1960 1961 block->ref_count++; 1962 block->last_accessed = system_time() / 1000000L; 1963 1964 return block; 1965 } 1966 1967 1968 /*! Returns the writable block data for the requested blockNumber. 1969 If \a cleared is true, the block is not read from disk; an empty block 1970 is returned. 1971 1972 This is the only method to insert a block into a transaction. It makes 1973 sure that the previous block contents are preserved in that case. 1974 */ 1975 static void* 1976 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base, 1977 off_t length, int32 transactionID, bool cleared) 1978 { 1979 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 1980 blockNumber, transactionID)); 1981 1982 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 1983 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")", 1984 blockNumber, cache->max_blocks - 1); 1985 } 1986 1987 bool allocated; 1988 cached_block* block = get_cached_block(cache, blockNumber, &allocated, 1989 !cleared); 1990 if (block == NULL) 1991 return NULL; 1992 1993 if (block->busy_writing) 1994 wait_for_busy_writing_block(cache, block); 1995 1996 block->discard = false; 1997 1998 // if there is no transaction support, we just return the current block 1999 if (transactionID == -1) { 2000 if (cleared) { 2001 mark_block_busy_reading(cache, block); 2002 mutex_unlock(&cache->lock); 2003 2004 memset(block->current_data, 0, cache->block_size); 2005 2006 mutex_lock(&cache->lock); 2007 mark_block_unbusy_reading(cache, block); 2008 } 2009 2010 block->is_writing = true; 2011 2012 if (!block->is_dirty) { 2013 cache->num_dirty_blocks++; 2014 block->is_dirty = true; 2015 // mark the block as dirty 2016 } 2017 2018 TB(Get(cache, block)); 2019 return block->current_data; 2020 } 2021 2022 cache_transaction* transaction = block->transaction; 2023 2024 if (transaction != NULL && transaction->id != transactionID) { 2025 // TODO: we have to wait here until the other transaction is done. 2026 // Maybe we should even panic, since we can't prevent any deadlocks. 2027 panic("get_writable_cached_block(): asked to get busy writable block " 2028 "(transaction %" B_PRId32 ")\n", block->transaction->id); 2029 put_cached_block(cache, block); 2030 return NULL; 2031 } 2032 if (transaction == NULL && transactionID != -1) { 2033 // get new transaction 2034 transaction = lookup_transaction(cache, transactionID); 2035 if (transaction == NULL) { 2036 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n", 2037 transactionID); 2038 put_cached_block(cache, block); 2039 return NULL; 2040 } 2041 if (!transaction->open) { 2042 panic("get_writable_cached_block(): transaction already done!\n"); 2043 put_cached_block(cache, block); 2044 return NULL; 2045 } 2046 2047 block->transaction = transaction; 2048 2049 // attach the block to the transaction block list 2050 block->transaction_next = transaction->first_block; 2051 transaction->first_block = block; 2052 transaction->num_blocks++; 2053 } 2054 if (transaction != NULL) 2055 transaction->last_used = system_time(); 2056 2057 bool wasUnchanged = block->original_data == NULL 2058 || block->previous_transaction != NULL; 2059 2060 if (!(allocated && cleared) && block->original_data == NULL) { 2061 // we already have data, so we need to preserve it 2062 block->original_data = cache->Allocate(); 2063 if (block->original_data == NULL) { 2064 TB(Error(cache, blockNumber, "allocate original failed")); 2065 FATAL(("could not allocate original_data\n")); 2066 put_cached_block(cache, block); 2067 return NULL; 2068 } 2069 2070 mark_block_busy_reading(cache, block); 2071 mutex_unlock(&cache->lock); 2072 2073 memcpy(block->original_data, block->current_data, cache->block_size); 2074 2075 mutex_lock(&cache->lock); 2076 mark_block_unbusy_reading(cache, block); 2077 } 2078 if (block->parent_data == block->current_data) { 2079 // remember any previous contents for the parent transaction 2080 block->parent_data = cache->Allocate(); 2081 if (block->parent_data == NULL) { 2082 // TODO: maybe we should just continue the current transaction in 2083 // this case... 2084 TB(Error(cache, blockNumber, "allocate parent failed")); 2085 FATAL(("could not allocate parent\n")); 2086 put_cached_block(cache, block); 2087 return NULL; 2088 } 2089 2090 mark_block_busy_reading(cache, block); 2091 mutex_unlock(&cache->lock); 2092 2093 memcpy(block->parent_data, block->current_data, cache->block_size); 2094 2095 mutex_lock(&cache->lock); 2096 mark_block_unbusy_reading(cache, block); 2097 2098 transaction->sub_num_blocks++; 2099 } else if (transaction != NULL && transaction->has_sub_transaction 2100 && block->parent_data == NULL && wasUnchanged) 2101 transaction->sub_num_blocks++; 2102 2103 if (cleared) { 2104 mark_block_busy_reading(cache, block); 2105 mutex_unlock(&cache->lock); 2106 2107 memset(block->current_data, 0, cache->block_size); 2108 2109 mutex_lock(&cache->lock); 2110 mark_block_unbusy_reading(cache, block); 2111 } 2112 2113 block->is_dirty = true; 2114 TB(Get(cache, block)); 2115 TB2(BlockData(cache, block, "get writable")); 2116 2117 return block->current_data; 2118 } 2119 2120 2121 #if DEBUG_BLOCK_CACHE 2122 2123 2124 static void 2125 dump_block(cached_block* block) 2126 { 2127 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32 2128 " %c%c%c%c%c%c %08lx %08lx\n", 2129 (addr_t)block, block->block_number, 2130 (addr_t)block->current_data, (addr_t)block->original_data, 2131 (addr_t)block->parent_data, block->ref_count, block->LastAccess(), 2132 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-', 2133 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-', 2134 block->unused ? 'U' : '-', block->discard ? 'D' : '-', 2135 (addr_t)block->transaction, 2136 (addr_t)block->previous_transaction); 2137 } 2138 2139 2140 static void 2141 dump_block_long(cached_block* block) 2142 { 2143 kprintf("BLOCK %p\n", block); 2144 kprintf(" current data: %p\n", block->current_data); 2145 kprintf(" original data: %p\n", block->original_data); 2146 kprintf(" parent data: %p\n", block->parent_data); 2147 #if BLOCK_CACHE_DEBUG_CHANGED 2148 kprintf(" compare data: %p\n", block->compare); 2149 #endif 2150 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count); 2151 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess()); 2152 kprintf(" flags: "); 2153 if (block->busy_reading) 2154 kprintf(" busy_reading"); 2155 if (block->busy_writing) 2156 kprintf(" busy_writing"); 2157 if (block->is_writing) 2158 kprintf(" is-writing"); 2159 if (block->is_dirty) 2160 kprintf(" is-dirty"); 2161 if (block->unused) 2162 kprintf(" unused"); 2163 if (block->discard) 2164 kprintf(" discard"); 2165 kprintf("\n"); 2166 if (block->transaction != NULL) { 2167 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction, 2168 block->transaction->id); 2169 if (block->transaction_next != NULL) { 2170 kprintf(" next in transaction: %" B_PRIdOFF "\n", 2171 block->transaction_next->block_number); 2172 } 2173 } 2174 if (block->previous_transaction != NULL) { 2175 kprintf(" previous transaction: %p (%" B_PRId32 ")\n", 2176 block->previous_transaction, 2177 block->previous_transaction->id); 2178 } 2179 2180 set_debug_variable("_current", (addr_t)block->current_data); 2181 set_debug_variable("_original", (addr_t)block->original_data); 2182 set_debug_variable("_parent", (addr_t)block->parent_data); 2183 } 2184 2185 2186 static int 2187 dump_cached_block(int argc, char** argv) 2188 { 2189 if (argc != 2) { 2190 kprintf("usage: %s <block-address>\n", argv[0]); 2191 return 0; 2192 } 2193 2194 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1])); 2195 return 0; 2196 } 2197 2198 2199 static int 2200 dump_cache(int argc, char** argv) 2201 { 2202 bool showTransactions = false; 2203 bool showBlocks = false; 2204 int32 i = 1; 2205 while (argv[i] != NULL && argv[i][0] == '-') { 2206 for (char* arg = &argv[i][1]; arg[0]; arg++) { 2207 switch (arg[0]) { 2208 case 'b': 2209 showBlocks = true; 2210 break; 2211 case 't': 2212 showTransactions = true; 2213 break; 2214 default: 2215 print_debugger_command_usage(argv[0]); 2216 return 0; 2217 } 2218 } 2219 i++; 2220 } 2221 2222 if (i >= argc) { 2223 print_debugger_command_usage(argv[0]); 2224 return 0; 2225 } 2226 2227 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]); 2228 if (cache == NULL) { 2229 kprintf("invalid cache address\n"); 2230 return 0; 2231 } 2232 2233 off_t blockNumber = -1; 2234 if (i + 1 < argc) { 2235 blockNumber = parse_expression(argv[i + 1]); 2236 cached_block* block = cache->hash->Lookup(blockNumber); 2237 if (block != NULL) 2238 dump_block_long(block); 2239 else 2240 kprintf("block %" B_PRIdOFF " not found\n", blockNumber); 2241 return 0; 2242 } 2243 2244 kprintf("BLOCK CACHE: %p\n", cache); 2245 2246 kprintf(" fd: %d\n", cache->fd); 2247 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks); 2248 kprintf(" block_size: %zu\n", cache->block_size); 2249 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id); 2250 kprintf(" buffer_cache: %p\n", cache->buffer_cache); 2251 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count, 2252 cache->busy_reading_waiters ? "has" : "no"); 2253 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count, 2254 cache->busy_writing_waiters ? "has" : "no"); 2255 2256 if (!cache->pending_notifications.IsEmpty()) { 2257 kprintf(" pending notifications:\n"); 2258 2259 NotificationList::Iterator iterator 2260 = cache->pending_notifications.GetIterator(); 2261 while (iterator.HasNext()) { 2262 cache_notification* notification = iterator.Next(); 2263 2264 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification, 2265 notification->events_pending, notification->hook, 2266 notification->data); 2267 } 2268 } 2269 2270 if (showTransactions) { 2271 kprintf(" transactions:\n"); 2272 kprintf("address id state blocks main sub\n"); 2273 2274 TransactionTable::Iterator iterator(cache->transaction_hash); 2275 2276 while (iterator.HasNext()) { 2277 cache_transaction* transaction = iterator.Next(); 2278 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5" 2279 B_PRId32 "\n", transaction, transaction->id, 2280 transaction->open ? "open" : "closed", 2281 transaction->num_blocks, transaction->main_num_blocks, 2282 transaction->sub_num_blocks); 2283 } 2284 } 2285 2286 if (showBlocks) { 2287 kprintf(" blocks:\n"); 2288 kprintf("address block no. current original parent refs access " 2289 "flags transact prev. trans\n"); 2290 } 2291 2292 uint32 referenced = 0; 2293 uint32 count = 0; 2294 uint32 dirty = 0; 2295 uint32 discarded = 0; 2296 BlockTable::Iterator iterator(cache->hash); 2297 while (iterator.HasNext()) { 2298 cached_block* block = iterator.Next(); 2299 if (showBlocks) 2300 dump_block(block); 2301 2302 if (block->is_dirty) 2303 dirty++; 2304 if (block->discard) 2305 discarded++; 2306 if (block->ref_count) 2307 referenced++; 2308 count++; 2309 } 2310 2311 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32 2312 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32 2313 " in unused.\n", 2314 count, dirty, discarded, referenced, cache->busy_reading_count, 2315 cache->unused_block_count); 2316 return 0; 2317 } 2318 2319 2320 static int 2321 dump_transaction(int argc, char** argv) 2322 { 2323 bool showBlocks = false; 2324 int i = 1; 2325 if (argc > 1 && !strcmp(argv[1], "-b")) { 2326 showBlocks = true; 2327 i++; 2328 } 2329 2330 if (argc - i < 1 || argc - i > 2) { 2331 print_debugger_command_usage(argv[0]); 2332 return 0; 2333 } 2334 2335 cache_transaction* transaction = NULL; 2336 2337 if (argc - i == 1) { 2338 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]); 2339 } else { 2340 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]); 2341 int32 id = parse_expression(argv[i + 1]); 2342 transaction = lookup_transaction(cache, id); 2343 if (transaction == NULL) { 2344 kprintf("No transaction with ID %" B_PRId32 " found.\n", id); 2345 return 0; 2346 } 2347 } 2348 2349 kprintf("TRANSACTION %p\n", transaction); 2350 2351 kprintf(" id: %" B_PRId32 "\n", transaction->id); 2352 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks); 2353 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks); 2354 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks); 2355 kprintf(" has sub: %d\n", transaction->has_sub_transaction); 2356 kprintf(" state: %s\n", transaction->open ? "open" : "closed"); 2357 kprintf(" idle: %" B_PRId64 " secs\n", 2358 (system_time() - transaction->last_used) / 1000000); 2359 2360 kprintf(" listeners:\n"); 2361 2362 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 2363 while (iterator.HasNext()) { 2364 cache_listener* listener = iterator.Next(); 2365 2366 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending, 2367 listener->hook, listener->data); 2368 } 2369 2370 if (!showBlocks) 2371 return 0; 2372 2373 kprintf(" blocks:\n"); 2374 kprintf("address block no. current original parent refs access " 2375 "flags transact prev. trans\n"); 2376 2377 cached_block* block = transaction->first_block; 2378 while (block != NULL) { 2379 dump_block(block); 2380 block = block->transaction_next; 2381 } 2382 2383 kprintf("--\n"); 2384 2385 block_list::Iterator blockIterator = transaction->blocks.GetIterator(); 2386 while (blockIterator.HasNext()) { 2387 block = blockIterator.Next(); 2388 dump_block(block); 2389 } 2390 2391 return 0; 2392 } 2393 2394 2395 static int 2396 dump_caches(int argc, char** argv) 2397 { 2398 kprintf("Block caches:\n"); 2399 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator(); 2400 while (i.HasNext()) { 2401 block_cache* cache = i.Next(); 2402 if (cache == (block_cache*)&sMarkCache) 2403 continue; 2404 2405 kprintf(" %p\n", cache); 2406 } 2407 2408 return 0; 2409 } 2410 2411 2412 #if BLOCK_CACHE_BLOCK_TRACING >= 2 2413 static int 2414 dump_block_data(int argc, char** argv) 2415 { 2416 using namespace BlockTracing; 2417 2418 // Determine which blocks to show 2419 2420 bool printStackTrace = true; 2421 uint32 which = 0; 2422 int32 i = 1; 2423 while (i < argc && argv[i][0] == '-') { 2424 char* arg = &argv[i][1]; 2425 while (arg[0]) { 2426 switch (arg[0]) { 2427 case 'c': 2428 which |= BlockData::kCurrent; 2429 break; 2430 case 'p': 2431 which |= BlockData::kParent; 2432 break; 2433 case 'o': 2434 which |= BlockData::kOriginal; 2435 break; 2436 2437 default: 2438 kprintf("invalid block specifier (only o/c/p are " 2439 "allowed).\n"); 2440 return 0; 2441 } 2442 arg++; 2443 } 2444 2445 i++; 2446 } 2447 if (which == 0) 2448 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal; 2449 2450 if (i == argc) { 2451 print_debugger_command_usage(argv[0]); 2452 return 0; 2453 } 2454 2455 // Get the range of blocks to print 2456 2457 int64 from = parse_expression(argv[i]); 2458 int64 to = from; 2459 if (argc > i + 1) 2460 to = parse_expression(argv[i + 1]); 2461 if (to < from) 2462 to = from; 2463 2464 uint32 offset = 0; 2465 uint32 size = LONG_MAX; 2466 if (argc > i + 2) 2467 offset = parse_expression(argv[i + 2]); 2468 if (argc > i + 3) 2469 size = parse_expression(argv[i + 3]); 2470 2471 TraceEntryIterator iterator; 2472 iterator.MoveTo(from - 1); 2473 2474 static char sBuffer[1024]; 2475 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID); 2476 2477 while (TraceEntry* entry = iterator.Next()) { 2478 int32 index = iterator.Index(); 2479 if (index > to) 2480 break; 2481 2482 Action* action = dynamic_cast<Action*>(entry); 2483 if (action != NULL) { 2484 out.Clear(); 2485 out.DumpEntry(action); 2486 continue; 2487 } 2488 2489 BlockData* blockData = dynamic_cast<BlockData*>(entry); 2490 if (blockData == NULL) 2491 continue; 2492 2493 out.Clear(); 2494 2495 const char* dump = out.DumpEntry(entry); 2496 int length = strlen(dump); 2497 if (length > 0 && dump[length - 1] == '\n') 2498 length--; 2499 2500 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump); 2501 2502 if (printStackTrace) { 2503 out.Clear(); 2504 entry->DumpStackTrace(out); 2505 if (out.Size() > 0) 2506 kputs(out.Buffer()); 2507 } 2508 2509 blockData->DumpBlocks(which, offset, size); 2510 } 2511 2512 return 0; 2513 } 2514 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2 2515 2516 2517 #endif // DEBUG_BLOCK_CACHE 2518 2519 2520 /*! Traverses through the block_cache list, and returns one cache after the 2521 other. The cache returned is automatically locked when you get it, and 2522 unlocked with the next call to this function. Ignores caches that are in 2523 deletion state. 2524 Returns \c NULL when the end of the list is reached. 2525 */ 2526 static block_cache* 2527 get_next_locked_block_cache(block_cache* last) 2528 { 2529 MutexLocker _(sCachesLock); 2530 2531 block_cache* cache; 2532 if (last != NULL) { 2533 mutex_unlock(&last->lock); 2534 2535 cache = sCaches.GetNext((block_cache*)&sMarkCache); 2536 sCaches.Remove((block_cache*)&sMarkCache); 2537 } else 2538 cache = sCaches.Head(); 2539 2540 if (cache != NULL) { 2541 mutex_lock(&cache->lock); 2542 sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache); 2543 } 2544 2545 return cache; 2546 } 2547 2548 2549 /*! Background thread that continuously checks for pending notifications of 2550 all caches. 2551 Every two seconds, it will also write back up to 64 blocks per cache. 2552 */ 2553 static status_t 2554 block_notifier_and_writer(void* /*data*/) 2555 { 2556 const bigtime_t kDefaultTimeout = 2000000LL; 2557 bigtime_t timeout = kDefaultTimeout; 2558 2559 while (true) { 2560 bigtime_t start = system_time(); 2561 2562 status_t status = acquire_sem_etc(sEventSemaphore, 1, 2563 B_RELATIVE_TIMEOUT, timeout); 2564 if (status == B_OK) { 2565 flush_pending_notifications(); 2566 timeout -= system_time() - start; 2567 continue; 2568 } 2569 2570 // Write 64 blocks of each block_cache roughly every 2 seconds, 2571 // potentially more or less depending on congestion and drive speeds 2572 // (usually much less.) We do not want to queue everything at once 2573 // because a future transaction might then get held up waiting for 2574 // a specific block to be written. 2575 timeout = kDefaultTimeout; 2576 size_t usedMemory; 2577 object_cache_get_usage(sBlockCache, &usedMemory); 2578 2579 block_cache* cache = NULL; 2580 while ((cache = get_next_locked_block_cache(cache)) != NULL) { 2581 // Give some breathing room: wait 2x the length of the potential 2582 // maximum block count-sized write between writes, and also skip 2583 // if there are more than 16 blocks currently being written. 2584 const bigtime_t next = cache->last_block_write 2585 + cache->last_block_write_duration * 2 * 64; 2586 if (cache->busy_writing_count > 16 || system_time() < next) { 2587 if (cache->last_block_write_duration > 0) { 2588 timeout = min_c(timeout, 2589 cache->last_block_write_duration * 2 * 64); 2590 } 2591 continue; 2592 } 2593 2594 BlockWriter writer(cache, 64); 2595 bool hasMoreBlocks = false; 2596 2597 size_t cacheUsedMemory; 2598 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory); 2599 usedMemory += cacheUsedMemory; 2600 2601 if (cache->num_dirty_blocks) { 2602 // This cache is not using transactions, we'll scan the blocks 2603 // directly 2604 BlockTable::Iterator iterator(cache->hash); 2605 2606 while (iterator.HasNext()) { 2607 cached_block* block = iterator.Next(); 2608 if (block->CanBeWritten() && !writer.Add(block)) { 2609 hasMoreBlocks = true; 2610 break; 2611 } 2612 } 2613 } else { 2614 TransactionTable::Iterator iterator(cache->transaction_hash); 2615 2616 while (iterator.HasNext()) { 2617 cache_transaction* transaction = iterator.Next(); 2618 if (transaction->open) { 2619 if (system_time() > transaction->last_used 2620 + kTransactionIdleTime) { 2621 // Transaction is open but idle 2622 notify_transaction_listeners(cache, transaction, 2623 TRANSACTION_IDLE); 2624 } 2625 continue; 2626 } 2627 2628 bool hasLeftOvers; 2629 // we ignore this one 2630 if (!writer.Add(transaction, hasLeftOvers)) { 2631 hasMoreBlocks = true; 2632 break; 2633 } 2634 } 2635 } 2636 2637 writer.Write(); 2638 2639 if (hasMoreBlocks && cache->last_block_write_duration > 0) { 2640 // There are probably still more blocks that we could write, so 2641 // see if we can decrease the timeout. 2642 timeout = min_c(timeout, 2643 cache->last_block_write_duration * 2 * 64); 2644 } 2645 2646 if ((block_cache_used_memory() / B_PAGE_SIZE) 2647 > vm_page_num_pages() / 2) { 2648 // Try to reduce memory usage to half of the available 2649 // RAM at maximum 2650 cache->RemoveUnusedBlocks(1000, 10); 2651 } 2652 } 2653 2654 MutexLocker _(sCachesMemoryUseLock); 2655 sUsedMemory = usedMemory; 2656 } 2657 2658 // never can get here 2659 return B_OK; 2660 } 2661 2662 2663 /*! Notify function for wait_for_notifications(). */ 2664 static void 2665 notify_sync(int32 transactionID, int32 event, void* _cache) 2666 { 2667 block_cache* cache = (block_cache*)_cache; 2668 2669 cache->condition_variable.NotifyOne(); 2670 } 2671 2672 2673 /*! Must be called with the sCachesLock held. */ 2674 static bool 2675 is_valid_cache(block_cache* cache) 2676 { 2677 ASSERT_LOCKED_MUTEX(&sCachesLock); 2678 2679 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator(); 2680 while (iterator.HasNext()) { 2681 if (cache == iterator.Next()) 2682 return true; 2683 } 2684 2685 return false; 2686 } 2687 2688 2689 /*! Waits until all pending notifications are carried out. 2690 Safe to be called from the block writer/notifier thread. 2691 You must not hold the \a cache lock when calling this function. 2692 */ 2693 static void 2694 wait_for_notifications(block_cache* cache) 2695 { 2696 MutexLocker locker(sCachesLock); 2697 2698 if (find_thread(NULL) == sNotifierWriterThread) { 2699 // We're the notifier thread, don't wait, but flush all pending 2700 // notifications directly. 2701 if (is_valid_cache(cache)) 2702 flush_pending_notifications(cache); 2703 return; 2704 } 2705 2706 // add sync notification 2707 cache_notification notification; 2708 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync, 2709 cache); 2710 2711 ConditionVariableEntry entry; 2712 cache->condition_variable.Add(&entry); 2713 2714 add_notification(cache, ¬ification, TRANSACTION_WRITTEN, false); 2715 locker.Unlock(); 2716 2717 // wait for notification hook to be called 2718 entry.Wait(); 2719 } 2720 2721 2722 status_t 2723 block_cache_init(void) 2724 { 2725 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block), 2726 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL); 2727 if (sBlockCache == NULL) 2728 return B_NO_MEMORY; 2729 2730 sCacheNotificationCache = create_object_cache("cache notifications", 2731 sizeof(cache_listener), 8, NULL, NULL, NULL); 2732 if (sCacheNotificationCache == NULL) 2733 return B_NO_MEMORY; 2734 2735 new (&sCaches) DoublyLinkedList<block_cache>; 2736 // manually call constructor 2737 2738 sEventSemaphore = create_sem(0, "block cache event"); 2739 if (sEventSemaphore < B_OK) 2740 return sEventSemaphore; 2741 2742 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer, 2743 "block notifier/writer", B_LOW_PRIORITY, NULL); 2744 if (sNotifierWriterThread >= B_OK) 2745 resume_thread(sNotifierWriterThread); 2746 2747 #if DEBUG_BLOCK_CACHE 2748 add_debugger_command_etc("block_caches", &dump_caches, 2749 "dumps all block caches", "\n", 0); 2750 add_debugger_command_etc("block_cache", &dump_cache, 2751 "dumps a specific block cache", 2752 "[-bt] <cache-address> [block-number]\n" 2753 " -t lists the transactions\n" 2754 " -b lists all blocks\n", 0); 2755 add_debugger_command("cached_block", &dump_cached_block, 2756 "dumps the specified cached block"); 2757 add_debugger_command_etc("transaction", &dump_transaction, 2758 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n" 2759 "Either use a block cache pointer and an ID or a pointer to the transaction.\n" 2760 " -b lists all blocks that are part of this transaction\n", 0); 2761 # if BLOCK_CACHE_BLOCK_TRACING >= 2 2762 add_debugger_command_etc("block_cache_data", &dump_block_data, 2763 "dumps the data blocks logged for the actions", 2764 "[-cpo] <from> [<to> [<offset> [<size>]]]\n" 2765 "If no data specifier is used, all blocks are shown by default.\n" 2766 " -c the current data is shown, if available.\n" 2767 " -p the parent data is shown, if available.\n" 2768 " -o the original data is shown, if available.\n" 2769 " <from> first index of tracing entries to show.\n" 2770 " <to> if given, the last entry. If not, only <from> is shown.\n" 2771 " <offset> the offset of the block data.\n" 2772 " <from> the size of the block data that is dumped\n", 0); 2773 # endif 2774 #endif // DEBUG_BLOCK_CACHE 2775 2776 return B_OK; 2777 } 2778 2779 2780 size_t 2781 block_cache_used_memory(void) 2782 { 2783 MutexLocker _(sCachesMemoryUseLock); 2784 return sUsedMemory; 2785 } 2786 2787 2788 // #pragma mark - public transaction API 2789 2790 2791 int32 2792 cache_start_transaction(void* _cache) 2793 { 2794 block_cache* cache = (block_cache*)_cache; 2795 TransactionLocker locker(cache); 2796 2797 if (cache->last_transaction && cache->last_transaction->open) { 2798 panic("last transaction (%" B_PRId32 ") still open!\n", 2799 cache->last_transaction->id); 2800 } 2801 2802 cache_transaction* transaction = new(std::nothrow) cache_transaction; 2803 if (transaction == NULL) 2804 return B_NO_MEMORY; 2805 2806 transaction->id = atomic_add(&cache->next_transaction_id, 1); 2807 cache->last_transaction = transaction; 2808 2809 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id)); 2810 T(Action("start", cache, transaction)); 2811 2812 cache->transaction_hash->Insert(transaction); 2813 2814 return transaction->id; 2815 } 2816 2817 2818 status_t 2819 cache_sync_transaction(void* _cache, int32 id) 2820 { 2821 block_cache* cache = (block_cache*)_cache; 2822 bool hadBusy; 2823 2824 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id)); 2825 2826 do { 2827 TransactionLocker locker(cache); 2828 hadBusy = false; 2829 2830 BlockWriter writer(cache); 2831 TransactionTable::Iterator iterator(cache->transaction_hash); 2832 2833 while (iterator.HasNext()) { 2834 // close all earlier transactions which haven't been closed yet 2835 cache_transaction* transaction = iterator.Next(); 2836 2837 if (transaction->busy_writing_count != 0) { 2838 hadBusy = true; 2839 continue; 2840 } 2841 if (transaction->id <= id && !transaction->open) { 2842 // write back all of their remaining dirty blocks 2843 T(Action("sync", cache, transaction)); 2844 2845 bool hasLeftOvers; 2846 writer.Add(transaction, hasLeftOvers); 2847 2848 if (hasLeftOvers) { 2849 // This transaction contains blocks that a previous 2850 // transaction is trying to write back in this write run 2851 hadBusy = true; 2852 } 2853 } 2854 } 2855 2856 status_t status = writer.Write(); 2857 if (status != B_OK) 2858 return status; 2859 } while (hadBusy); 2860 2861 wait_for_notifications(cache); 2862 // make sure that all pending TRANSACTION_WRITTEN notifications 2863 // are handled after we return 2864 return B_OK; 2865 } 2866 2867 2868 status_t 2869 cache_end_transaction(void* _cache, int32 id, 2870 transaction_notification_hook hook, void* data) 2871 { 2872 block_cache* cache = (block_cache*)_cache; 2873 TransactionLocker locker(cache); 2874 2875 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id)); 2876 2877 cache_transaction* transaction = lookup_transaction(cache, id); 2878 if (transaction == NULL) { 2879 panic("cache_end_transaction(): invalid transaction ID\n"); 2880 return B_BAD_VALUE; 2881 } 2882 2883 // Write back all pending transaction blocks 2884 status_t status = write_blocks_in_previous_transaction(cache, transaction); 2885 if (status != B_OK) 2886 return status; 2887 2888 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 2889 2890 if (hook != NULL 2891 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, 2892 hook, data) != B_OK) { 2893 return B_NO_MEMORY; 2894 } 2895 2896 T(Action("end", cache, transaction)); 2897 2898 // iterate through all blocks and free the unchanged original contents 2899 2900 cached_block* next; 2901 for (cached_block* block = transaction->first_block; block != NULL; 2902 block = next) { 2903 next = block->transaction_next; 2904 ASSERT(block->previous_transaction == NULL); 2905 2906 if (block->discard) { 2907 // This block has been discarded in the transaction 2908 cache->DiscardBlock(block); 2909 transaction->num_blocks--; 2910 continue; 2911 } 2912 2913 if (block->original_data != NULL) { 2914 cache->Free(block->original_data); 2915 block->original_data = NULL; 2916 } 2917 if (block->parent_data != NULL) { 2918 ASSERT(transaction->has_sub_transaction); 2919 cache->FreeBlockParentData(block); 2920 } 2921 2922 // move the block to the previous transaction list 2923 transaction->blocks.Add(block); 2924 2925 block->previous_transaction = transaction; 2926 block->transaction_next = NULL; 2927 block->transaction = NULL; 2928 } 2929 2930 transaction->open = false; 2931 return B_OK; 2932 } 2933 2934 2935 status_t 2936 cache_abort_transaction(void* _cache, int32 id) 2937 { 2938 block_cache* cache = (block_cache*)_cache; 2939 TransactionLocker locker(cache); 2940 2941 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id)); 2942 2943 cache_transaction* transaction = lookup_transaction(cache, id); 2944 if (transaction == NULL) { 2945 panic("cache_abort_transaction(): invalid transaction ID\n"); 2946 return B_BAD_VALUE; 2947 } 2948 2949 T(Abort(cache, transaction)); 2950 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 2951 2952 // iterate through all blocks and restore their original contents 2953 2954 cached_block* block = transaction->first_block; 2955 cached_block* next; 2956 for (; block != NULL; block = next) { 2957 next = block->transaction_next; 2958 2959 if (block->original_data != NULL) { 2960 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of " 2961 "block %" B_PRIdOFF "\n", transaction->id, block->block_number)); 2962 memcpy(block->current_data, block->original_data, 2963 cache->block_size); 2964 cache->Free(block->original_data); 2965 block->original_data = NULL; 2966 } 2967 if (transaction->has_sub_transaction && block->parent_data != NULL) 2968 cache->FreeBlockParentData(block); 2969 2970 block->transaction_next = NULL; 2971 block->transaction = NULL; 2972 block->discard = false; 2973 if (block->previous_transaction == NULL) 2974 block->is_dirty = false; 2975 } 2976 2977 cache->transaction_hash->Remove(transaction); 2978 delete_transaction(cache, transaction); 2979 return B_OK; 2980 } 2981 2982 2983 /*! Acknowledges the current parent transaction, and starts a new transaction 2984 from its sub transaction. 2985 The new transaction also gets a new transaction ID. 2986 */ 2987 int32 2988 cache_detach_sub_transaction(void* _cache, int32 id, 2989 transaction_notification_hook hook, void* data) 2990 { 2991 block_cache* cache = (block_cache*)_cache; 2992 TransactionLocker locker(cache); 2993 2994 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id)); 2995 2996 cache_transaction* transaction = lookup_transaction(cache, id); 2997 if (transaction == NULL) { 2998 panic("cache_detach_sub_transaction(): invalid transaction ID\n"); 2999 return B_BAD_VALUE; 3000 } 3001 if (!transaction->has_sub_transaction) 3002 return B_BAD_VALUE; 3003 3004 // iterate through all blocks and free the unchanged original contents 3005 3006 status_t status = write_blocks_in_previous_transaction(cache, transaction); 3007 if (status != B_OK) 3008 return status; 3009 3010 // create a new transaction for the sub transaction 3011 cache_transaction* newTransaction = new(std::nothrow) cache_transaction; 3012 if (newTransaction == NULL) 3013 return B_NO_MEMORY; 3014 3015 newTransaction->id = atomic_add(&cache->next_transaction_id, 1); 3016 T(Detach(cache, transaction, newTransaction)); 3017 3018 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3019 3020 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook, 3021 data) != B_OK) { 3022 delete newTransaction; 3023 return B_NO_MEMORY; 3024 } 3025 3026 cached_block* last = NULL; 3027 cached_block* next; 3028 for (cached_block* block = transaction->first_block; block != NULL; 3029 block = next) { 3030 next = block->transaction_next; 3031 ASSERT(block->previous_transaction == NULL); 3032 3033 if (block->discard) { 3034 cache->DiscardBlock(block); 3035 transaction->main_num_blocks--; 3036 continue; 3037 } 3038 3039 if (block->parent_data != NULL) { 3040 // The block changed in the parent - free the original data, since 3041 // they will be replaced by what is in current. 3042 ASSERT(block->original_data != NULL); 3043 cache->Free(block->original_data); 3044 3045 if (block->parent_data != block->current_data) { 3046 // The block had been changed in both transactions 3047 block->original_data = block->parent_data; 3048 } else { 3049 // The block has only been changed in the parent 3050 block->original_data = NULL; 3051 } 3052 3053 // move the block to the previous transaction list 3054 transaction->blocks.Add(block); 3055 block->previous_transaction = transaction; 3056 block->parent_data = NULL; 3057 } 3058 3059 if (block->original_data != NULL) { 3060 // This block had been changed in the current sub transaction, 3061 // we need to move this block over to the new transaction. 3062 ASSERT(block->parent_data == NULL); 3063 3064 if (last == NULL) 3065 newTransaction->first_block = block; 3066 else 3067 last->transaction_next = block; 3068 3069 block->transaction = newTransaction; 3070 last = block; 3071 } else 3072 block->transaction = NULL; 3073 3074 block->transaction_next = NULL; 3075 } 3076 3077 newTransaction->num_blocks = transaction->sub_num_blocks; 3078 3079 transaction->open = false; 3080 transaction->has_sub_transaction = false; 3081 transaction->num_blocks = transaction->main_num_blocks; 3082 transaction->sub_num_blocks = 0; 3083 3084 cache->transaction_hash->Insert(newTransaction); 3085 cache->last_transaction = newTransaction; 3086 3087 return newTransaction->id; 3088 } 3089 3090 3091 status_t 3092 cache_abort_sub_transaction(void* _cache, int32 id) 3093 { 3094 block_cache* cache = (block_cache*)_cache; 3095 TransactionLocker locker(cache); 3096 3097 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id)); 3098 3099 cache_transaction* transaction = lookup_transaction(cache, id); 3100 if (transaction == NULL) { 3101 panic("cache_abort_sub_transaction(): invalid transaction ID\n"); 3102 return B_BAD_VALUE; 3103 } 3104 if (!transaction->has_sub_transaction) 3105 return B_BAD_VALUE; 3106 3107 T(Abort(cache, transaction)); 3108 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED); 3109 3110 // revert all changes back to the version of the parent 3111 3112 cached_block* block = transaction->first_block; 3113 cached_block* last = NULL; 3114 cached_block* next; 3115 for (; block != NULL; block = next) { 3116 next = block->transaction_next; 3117 3118 if (block->parent_data == NULL) { 3119 // The parent transaction didn't change the block, but the sub 3120 // transaction did - we need to revert to the original data. 3121 // The block is no longer part of the transaction 3122 if (block->original_data != NULL) { 3123 // The block might not have original data if was empty 3124 memcpy(block->current_data, block->original_data, 3125 cache->block_size); 3126 } 3127 3128 if (last != NULL) 3129 last->transaction_next = next; 3130 else 3131 transaction->first_block = next; 3132 3133 block->transaction_next = NULL; 3134 block->transaction = NULL; 3135 transaction->num_blocks--; 3136 3137 if (block->previous_transaction == NULL) { 3138 cache->Free(block->original_data); 3139 block->original_data = NULL; 3140 block->is_dirty = false; 3141 3142 if (block->ref_count == 0) { 3143 // Move the block into the unused list if possible 3144 block->unused = true; 3145 cache->unused_blocks.Add(block); 3146 cache->unused_block_count++; 3147 } 3148 } 3149 } else { 3150 if (block->parent_data != block->current_data) { 3151 // The block has been changed and must be restored - the block 3152 // is still dirty and part of the transaction 3153 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): " 3154 "restored contents of block %" B_PRIdOFF "\n", 3155 transaction->id, block->block_number)); 3156 memcpy(block->current_data, block->parent_data, 3157 cache->block_size); 3158 cache->Free(block->parent_data); 3159 // The block stays dirty 3160 } 3161 block->parent_data = NULL; 3162 last = block; 3163 } 3164 3165 block->discard = false; 3166 } 3167 3168 // all subsequent changes will go into the main transaction 3169 transaction->has_sub_transaction = false; 3170 transaction->sub_num_blocks = 0; 3171 3172 return B_OK; 3173 } 3174 3175 3176 status_t 3177 cache_start_sub_transaction(void* _cache, int32 id) 3178 { 3179 block_cache* cache = (block_cache*)_cache; 3180 TransactionLocker locker(cache); 3181 3182 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id)); 3183 3184 cache_transaction* transaction = lookup_transaction(cache, id); 3185 if (transaction == NULL) { 3186 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n", 3187 id); 3188 return B_BAD_VALUE; 3189 } 3190 3191 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED); 3192 3193 // move all changed blocks up to the parent 3194 3195 cached_block* block = transaction->first_block; 3196 cached_block* next; 3197 for (; block != NULL; block = next) { 3198 next = block->transaction_next; 3199 3200 if (block->parent_data != NULL) { 3201 // There already is an older sub transaction - we acknowledge 3202 // its changes and move its blocks up to the parent 3203 ASSERT(transaction->has_sub_transaction); 3204 cache->FreeBlockParentData(block); 3205 } 3206 if (block->discard) { 3207 // This block has been discarded in the parent transaction. 3208 // Just throw away any changes made in this transaction, so that 3209 // it can still be reverted to its original contents if needed 3210 ASSERT(block->previous_transaction == NULL); 3211 if (block->original_data != NULL) { 3212 memcpy(block->current_data, block->original_data, 3213 cache->block_size); 3214 block->original_data = NULL; 3215 } 3216 continue; 3217 } 3218 3219 // we "allocate" the parent data lazily, that means, we don't copy 3220 // the data (and allocate memory for it) until we need to 3221 block->parent_data = block->current_data; 3222 } 3223 3224 // all subsequent changes will go into the sub transaction 3225 transaction->has_sub_transaction = true; 3226 transaction->main_num_blocks = transaction->num_blocks; 3227 transaction->sub_num_blocks = 0; 3228 T(Action("start-sub", cache, transaction)); 3229 3230 return B_OK; 3231 } 3232 3233 3234 /*! Adds a transaction listener that gets notified when the transaction 3235 is ended, aborted, written, or idle as specified by \a events. 3236 The listener gets automatically removed when the transaction ends. 3237 */ 3238 status_t 3239 cache_add_transaction_listener(void* _cache, int32 id, int32 events, 3240 transaction_notification_hook hook, void* data) 3241 { 3242 block_cache* cache = (block_cache*)_cache; 3243 TransactionLocker locker(cache); 3244 3245 cache_transaction* transaction = lookup_transaction(cache, id); 3246 if (transaction == NULL) 3247 return B_BAD_VALUE; 3248 3249 return add_transaction_listener(cache, transaction, events, hook, data); 3250 } 3251 3252 3253 status_t 3254 cache_remove_transaction_listener(void* _cache, int32 id, 3255 transaction_notification_hook hookFunction, void* data) 3256 { 3257 block_cache* cache = (block_cache*)_cache; 3258 TransactionLocker locker(cache); 3259 3260 cache_transaction* transaction = lookup_transaction(cache, id); 3261 if (transaction == NULL) 3262 return B_BAD_VALUE; 3263 3264 ListenerList::Iterator iterator = transaction->listeners.GetIterator(); 3265 while (iterator.HasNext()) { 3266 cache_listener* listener = iterator.Next(); 3267 if (listener->data == data && listener->hook == hookFunction) { 3268 iterator.Remove(); 3269 3270 if (listener->events_pending != 0) { 3271 MutexLocker _(sNotificationsLock); 3272 if (listener->events_pending != 0) 3273 cache->pending_notifications.Remove(listener); 3274 } 3275 delete listener; 3276 return B_OK; 3277 } 3278 } 3279 3280 return B_ENTRY_NOT_FOUND; 3281 } 3282 3283 3284 status_t 3285 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly, 3286 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData) 3287 { 3288 cached_block* block = (cached_block*)*_cookie; 3289 block_cache* cache = (block_cache*)_cache; 3290 TransactionLocker locker(cache); 3291 3292 cache_transaction* transaction = lookup_transaction(cache, id); 3293 if (transaction == NULL || !transaction->open) 3294 return B_BAD_VALUE; 3295 3296 if (block == NULL) 3297 block = transaction->first_block; 3298 else 3299 block = block->transaction_next; 3300 3301 if (transaction->has_sub_transaction) { 3302 if (mainOnly) { 3303 // find next block that the parent changed 3304 while (block != NULL && block->parent_data == NULL) 3305 block = block->transaction_next; 3306 } else { 3307 // find next non-discarded block 3308 while (block != NULL && block->discard) 3309 block = block->transaction_next; 3310 } 3311 } 3312 3313 if (block == NULL) 3314 return B_ENTRY_NOT_FOUND; 3315 3316 if (_blockNumber) 3317 *_blockNumber = block->block_number; 3318 if (_data) 3319 *_data = mainOnly ? block->parent_data : block->current_data; 3320 if (_unchangedData) 3321 *_unchangedData = block->original_data; 3322 3323 *_cookie = (addr_t)block; 3324 return B_OK; 3325 } 3326 3327 3328 int32 3329 cache_blocks_in_transaction(void* _cache, int32 id) 3330 { 3331 block_cache* cache = (block_cache*)_cache; 3332 TransactionLocker locker(cache); 3333 3334 cache_transaction* transaction = lookup_transaction(cache, id); 3335 if (transaction == NULL) 3336 return B_BAD_VALUE; 3337 3338 return transaction->num_blocks; 3339 } 3340 3341 3342 /*! Returns the number of blocks that are part of the main transaction. If this 3343 transaction does not have a sub transaction yet, this is the same value as 3344 cache_blocks_in_transaction() would return. 3345 */ 3346 int32 3347 cache_blocks_in_main_transaction(void* _cache, int32 id) 3348 { 3349 block_cache* cache = (block_cache*)_cache; 3350 TransactionLocker locker(cache); 3351 3352 cache_transaction* transaction = lookup_transaction(cache, id); 3353 if (transaction == NULL) 3354 return B_BAD_VALUE; 3355 3356 if (transaction->has_sub_transaction) 3357 return transaction->main_num_blocks; 3358 3359 return transaction->num_blocks; 3360 } 3361 3362 3363 int32 3364 cache_blocks_in_sub_transaction(void* _cache, int32 id) 3365 { 3366 block_cache* cache = (block_cache*)_cache; 3367 TransactionLocker locker(cache); 3368 3369 cache_transaction* transaction = lookup_transaction(cache, id); 3370 if (transaction == NULL) 3371 return B_BAD_VALUE; 3372 3373 return transaction->sub_num_blocks; 3374 } 3375 3376 3377 /*! Check if block is in transaction 3378 */ 3379 bool 3380 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber) 3381 { 3382 block_cache* cache = (block_cache*)_cache; 3383 TransactionLocker locker(cache); 3384 3385 cached_block* block = cache->hash->Lookup(blockNumber); 3386 3387 return (block != NULL && block->transaction != NULL 3388 && block->transaction->id == id); 3389 } 3390 3391 3392 // #pragma mark - public block cache API 3393 3394 3395 void 3396 block_cache_delete(void* _cache, bool allowWrites) 3397 { 3398 block_cache* cache = (block_cache*)_cache; 3399 3400 if (allowWrites) 3401 block_cache_sync(cache); 3402 3403 mutex_lock(&sCachesLock); 3404 sCaches.Remove(cache); 3405 mutex_unlock(&sCachesLock); 3406 3407 mutex_lock(&cache->lock); 3408 3409 // wait for all blocks to become unbusy 3410 wait_for_busy_reading_blocks(cache); 3411 wait_for_busy_writing_blocks(cache); 3412 3413 // free all blocks 3414 3415 cached_block* block = cache->hash->Clear(true); 3416 while (block != NULL) { 3417 cached_block* next = block->next; 3418 cache->FreeBlock(block); 3419 block = next; 3420 } 3421 3422 // free all transactions (they will all be aborted) 3423 3424 cache_transaction* transaction = cache->transaction_hash->Clear(true); 3425 while (transaction != NULL) { 3426 cache_transaction* next = transaction->next; 3427 delete transaction; 3428 transaction = next; 3429 } 3430 3431 delete cache; 3432 } 3433 3434 3435 void* 3436 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly) 3437 { 3438 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize, 3439 readOnly); 3440 if (cache == NULL) 3441 return NULL; 3442 3443 if (cache->Init() != B_OK) { 3444 delete cache; 3445 return NULL; 3446 } 3447 3448 MutexLocker _(sCachesLock); 3449 sCaches.Add(cache); 3450 3451 return cache; 3452 } 3453 3454 3455 status_t 3456 block_cache_sync(void* _cache) 3457 { 3458 block_cache* cache = (block_cache*)_cache; 3459 3460 // We will sync all dirty blocks to disk that have a completed 3461 // transaction or no transaction only 3462 3463 MutexLocker locker(&cache->lock); 3464 3465 BlockWriter writer(cache); 3466 BlockTable::Iterator iterator(cache->hash); 3467 3468 while (iterator.HasNext()) { 3469 cached_block* block = iterator.Next(); 3470 if (block->CanBeWritten()) 3471 writer.Add(block); 3472 } 3473 3474 status_t status = writer.Write(); 3475 3476 locker.Unlock(); 3477 3478 wait_for_notifications(cache); 3479 // make sure that all pending TRANSACTION_WRITTEN notifications 3480 // are handled after we return 3481 return status; 3482 } 3483 3484 3485 status_t 3486 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks) 3487 { 3488 block_cache* cache = (block_cache*)_cache; 3489 3490 // We will sync all dirty blocks to disk that have a completed 3491 // transaction or no transaction only 3492 3493 if (blockNumber < 0 || blockNumber >= cache->max_blocks) { 3494 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF 3495 " (max %" B_PRIdOFF ")", 3496 blockNumber, cache->max_blocks - 1); 3497 return B_BAD_VALUE; 3498 } 3499 3500 MutexLocker locker(&cache->lock); 3501 BlockWriter writer(cache); 3502 3503 for (; numBlocks > 0; numBlocks--, blockNumber++) { 3504 cached_block* block = cache->hash->Lookup(blockNumber); 3505 if (block == NULL) 3506 continue; 3507 3508 if (block->CanBeWritten()) 3509 writer.Add(block); 3510 } 3511 3512 status_t status = writer.Write(); 3513 3514 locker.Unlock(); 3515 3516 wait_for_notifications(cache); 3517 // make sure that all pending TRANSACTION_WRITTEN notifications 3518 // are handled after we return 3519 return status; 3520 } 3521 3522 3523 /*! Discards a block from the current transaction or from the cache. 3524 You have to call this function when you no longer use a block, ie. when it 3525 might be reclaimed by the file cache in order to make sure they won't 3526 interfere. 3527 */ 3528 void 3529 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks) 3530 { 3531 // TODO: this could be a nice place to issue the ATA trim command 3532 block_cache* cache = (block_cache*)_cache; 3533 TransactionLocker locker(cache); 3534 3535 BlockWriter writer(cache); 3536 3537 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3538 cached_block* block = cache->hash->Lookup(blockNumber); 3539 if (block != NULL && block->previous_transaction != NULL) 3540 writer.Add(block); 3541 } 3542 3543 writer.Write(); 3544 // TODO: this can fail, too! 3545 3546 blockNumber -= numBlocks; 3547 // reset blockNumber to its original value 3548 3549 for (size_t i = 0; i < numBlocks; i++, blockNumber++) { 3550 cached_block* block = cache->hash->Lookup(blockNumber); 3551 if (block == NULL) 3552 continue; 3553 3554 ASSERT(block->previous_transaction == NULL); 3555 3556 if (block->unused) { 3557 cache->unused_blocks.Remove(block); 3558 cache->unused_block_count--; 3559 cache->RemoveBlock(block); 3560 } else { 3561 if (block->transaction != NULL && block->parent_data != NULL 3562 && block->parent_data != block->current_data) { 3563 panic("Discarded block %" B_PRIdOFF " has already been changed in this " 3564 "transaction!", blockNumber); 3565 } 3566 3567 // mark it as discarded (in the current transaction only, if any) 3568 block->discard = true; 3569 } 3570 } 3571 } 3572 3573 3574 status_t 3575 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction) 3576 { 3577 block_cache* cache = (block_cache*)_cache; 3578 MutexLocker locker(&cache->lock); 3579 3580 if (cache->read_only) { 3581 panic("tried to make block writable on a read-only cache!"); 3582 return B_ERROR; 3583 } 3584 3585 // TODO: this can be done better! 3586 void* block = get_writable_cached_block(cache, blockNumber, 3587 blockNumber, 1, transaction, false); 3588 if (block != NULL) { 3589 put_cached_block((block_cache*)_cache, blockNumber); 3590 return B_OK; 3591 } 3592 3593 return B_ERROR; 3594 } 3595 3596 3597 void* 3598 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base, 3599 off_t length, int32 transaction) 3600 { 3601 block_cache* cache = (block_cache*)_cache; 3602 MutexLocker locker(&cache->lock); 3603 3604 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3605 blockNumber, transaction)); 3606 if (cache->read_only) 3607 panic("tried to get writable block on a read-only cache!"); 3608 3609 return get_writable_cached_block(cache, blockNumber, base, length, 3610 transaction, false); 3611 } 3612 3613 3614 void* 3615 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction) 3616 { 3617 return block_cache_get_writable_etc(_cache, blockNumber, 3618 blockNumber, 1, transaction); 3619 } 3620 3621 3622 void* 3623 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction) 3624 { 3625 block_cache* cache = (block_cache*)_cache; 3626 MutexLocker locker(&cache->lock); 3627 3628 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n", 3629 blockNumber, transaction)); 3630 if (cache->read_only) 3631 panic("tried to get empty writable block on a read-only cache!"); 3632 3633 return get_writable_cached_block((block_cache*)_cache, blockNumber, 3634 blockNumber, 1, transaction, true); 3635 } 3636 3637 3638 const void* 3639 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length) 3640 { 3641 block_cache* cache = (block_cache*)_cache; 3642 MutexLocker locker(&cache->lock); 3643 bool allocated; 3644 3645 cached_block* block = get_cached_block(cache, blockNumber, &allocated); 3646 if (block == NULL) 3647 return NULL; 3648 3649 #if BLOCK_CACHE_DEBUG_CHANGED 3650 if (block->compare == NULL) 3651 block->compare = cache->Allocate(); 3652 if (block->compare != NULL) 3653 memcpy(block->compare, block->current_data, cache->block_size); 3654 #endif 3655 TB(Get(cache, block)); 3656 3657 return block->current_data; 3658 } 3659 3660 3661 const void* 3662 block_cache_get(void* _cache, off_t blockNumber) 3663 { 3664 return block_cache_get_etc(_cache, blockNumber, blockNumber, 1); 3665 } 3666 3667 3668 /*! Changes the internal status of a writable block to \a dirty. This can be 3669 helpful in case you realize you don't need to change that block anymore 3670 for whatever reason. 3671 3672 Note, you must only use this function on blocks that were acquired 3673 writable! 3674 */ 3675 status_t 3676 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty, 3677 int32 transaction) 3678 { 3679 block_cache* cache = (block_cache*)_cache; 3680 MutexLocker locker(&cache->lock); 3681 3682 cached_block* block = cache->hash->Lookup(blockNumber); 3683 if (block == NULL) 3684 return B_BAD_VALUE; 3685 if (block->is_dirty == dirty) { 3686 // there is nothing to do for us 3687 return B_OK; 3688 } 3689 3690 // TODO: not yet implemented 3691 if (dirty) 3692 panic("block_cache_set_dirty(): not yet implemented that way!\n"); 3693 3694 return B_OK; 3695 } 3696 3697 3698 void 3699 block_cache_put(void* _cache, off_t blockNumber) 3700 { 3701 block_cache* cache = (block_cache*)_cache; 3702 MutexLocker locker(&cache->lock); 3703 3704 put_cached_block(cache, blockNumber); 3705 } 3706 3707