1 /* 2 * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2004-2008, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 */ 6 7 8 #include "IOScheduler.h" 9 10 #include <unistd.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 18 #include <khash.h> 19 #include <lock.h> 20 #include <thread_types.h> 21 #include <thread.h> 22 #include <util/AutoLock.h> 23 24 25 //#define TRACE_IO_SCHEDULER 26 #ifdef TRACE_IO_SCHEDULER 27 # define TRACE(x...) dprintf(x) 28 #else 29 # define TRACE(x...) ; 30 #endif 31 32 33 IOCallback::~IOCallback() 34 { 35 } 36 37 38 status_t 39 IOCallback::DoIO(IOOperation* operation) 40 { 41 return B_ERROR; 42 } 43 44 45 // #pragma mark - 46 47 48 void 49 IORequestOwner::Dump() const 50 { 51 kprintf("IORequestOwner at %p\n", this); 52 kprintf(" team: %ld\n", team); 53 kprintf(" thread: %ld\n", thread); 54 kprintf(" priority: %ld\n", priority); 55 56 kprintf(" requests:"); 57 for (IORequestList::ConstIterator it = requests.GetIterator(); 58 IORequest* request = it.Next();) { 59 kprintf(" %p", request); 60 } 61 kprintf("\n"); 62 63 kprintf(" completed requests:"); 64 for (IORequestList::ConstIterator it = completed_requests.GetIterator(); 65 IORequest* request = it.Next();) { 66 kprintf(" %p", request); 67 } 68 kprintf("\n"); 69 70 kprintf(" operations:"); 71 for (IOOperationList::ConstIterator it = operations.GetIterator(); 72 IOOperation* operation = it.Next();) { 73 kprintf(" %p", operation); 74 } 75 kprintf("\n"); 76 } 77 78 79 // #pragma mark - 80 81 82 struct IOScheduler::RequestOwnerHashDefinition { 83 typedef thread_id KeyType; 84 typedef IORequestOwner ValueType; 85 86 size_t HashKey(thread_id key) const { return key; } 87 size_t Hash(const IORequestOwner* value) const { return value->thread; } 88 bool Compare(thread_id key, const IORequestOwner* value) const 89 { return value->thread == key; } 90 IORequestOwner*& GetLink(IORequestOwner* value) const 91 { return value->hash_link; } 92 }; 93 94 struct IOScheduler::RequestOwnerHashTable 95 : BOpenHashTable<RequestOwnerHashDefinition, false> { 96 }; 97 98 99 IOScheduler::IOScheduler(DMAResource* resource) 100 : 101 fDMAResource(resource), 102 fOperationArray(NULL), 103 fAllocatedRequestOwners(NULL), 104 fRequestOwners(NULL), 105 fBlockSize(0), 106 fPendingOperations(0) 107 { 108 mutex_init(&fLock, "I/O scheduler"); 109 B_INITIALIZE_SPINLOCK(&fFinisherLock); 110 } 111 112 113 IOScheduler::~IOScheduler() 114 { 115 // TODO: Shutdown threads. 116 117 mutex_lock(&fLock); 118 mutex_destroy(&fLock); 119 120 while (IOOperation* operation = fUnusedOperations.RemoveHead()) 121 delete operation; 122 123 delete[] fOperationArray; 124 125 delete fRequestOwners; 126 delete[] fAllocatedRequestOwners; 127 } 128 129 130 status_t 131 IOScheduler::Init(const char* name) 132 { 133 fNewRequestCondition.Init(this, "I/O new request"); 134 fFinishedOperationCondition.Init(this, "I/O finished operation"); 135 fFinishedRequestCondition.Init(this, "I/O finished request"); 136 137 size_t count = fDMAResource != NULL ? fDMAResource->BufferCount() : 16; 138 for (size_t i = 0; i < count; i++) { 139 IOOperation* operation = new(std::nothrow) IOOperation; 140 if (operation == NULL) 141 return B_NO_MEMORY; 142 143 fUnusedOperations.Add(operation); 144 } 145 146 fOperationArray = new(std::nothrow) IOOperation*[count]; 147 148 if (fDMAResource != NULL) 149 fBlockSize = fDMAResource->BlockSize(); 150 if (fBlockSize == 0) 151 fBlockSize = 512; 152 153 fAllocatedRequestOwnerCount = thread_max_threads(); 154 fAllocatedRequestOwners 155 = new(std::nothrow) IORequestOwner[fAllocatedRequestOwnerCount]; 156 if (fAllocatedRequestOwners == NULL) 157 return B_NO_MEMORY; 158 159 for (int32 i = 0; i < fAllocatedRequestOwnerCount; i++) { 160 IORequestOwner& owner = fAllocatedRequestOwners[i]; 161 owner.team = -1; 162 owner.thread = -1; 163 owner.priority = B_IDLE_PRIORITY; 164 fUnusedRequestOwners.Add(&owner); 165 } 166 167 fRequestOwners = new(std::nothrow) RequestOwnerHashTable; 168 if (fRequestOwners == NULL) 169 return B_NO_MEMORY; 170 171 status_t error = fRequestOwners->Init(fAllocatedRequestOwnerCount); 172 if (error != B_OK) 173 return error; 174 175 // TODO: Use a device speed dependent bandwidths! 176 fIterationBandwidth = fBlockSize * 8192; 177 fMinOwnerBandwidth = fBlockSize * 1024; 178 fMaxOwnerBandwidth = fBlockSize * 4096; 179 180 // start threads 181 char buffer[B_OS_NAME_LENGTH]; 182 strlcpy(buffer, name, sizeof(buffer)); 183 strlcat(buffer, " scheduler", sizeof(buffer)); 184 fSchedulerThread = spawn_kernel_thread(&_SchedulerThread, buffer, 185 B_NORMAL_PRIORITY, (void *)this); 186 if (fSchedulerThread < B_OK) 187 return fSchedulerThread; 188 189 strlcpy(buffer, name, sizeof(buffer)); 190 strlcat(buffer, " notifier", sizeof(buffer)); 191 fRequestNotifierThread = spawn_kernel_thread(&_RequestNotifierThread, 192 buffer, B_NORMAL_PRIORITY, (void *)this); 193 if (fRequestNotifierThread < B_OK) 194 return fRequestNotifierThread; 195 196 resume_thread(fSchedulerThread); 197 resume_thread(fRequestNotifierThread); 198 return B_OK; 199 } 200 201 202 void 203 IOScheduler::SetCallback(IOCallback& callback) 204 { 205 SetCallback(&_IOCallbackWrapper, &callback); 206 } 207 208 209 void 210 IOScheduler::SetCallback(io_callback callback, void* data) 211 { 212 fIOCallback = callback; 213 fIOCallbackData = data; 214 } 215 216 217 status_t 218 IOScheduler::ScheduleRequest(IORequest* request) 219 { 220 TRACE("%p->IOScheduler::ScheduleRequest(%p)\n", this, request); 221 222 IOBuffer* buffer = request->Buffer(); 223 224 // TODO: it would be nice to be able to lock the memory later, but we can't 225 // easily do it in the I/O scheduler without being able to asynchronously 226 // lock memory (via another thread or a dedicated call). 227 228 if (buffer->IsVirtual()) { 229 status_t status = buffer->LockMemory(request->Team(), 230 request->IsWrite()); 231 if (status != B_OK) { 232 request->SetStatusAndNotify(status); 233 return status; 234 } 235 } 236 237 MutexLocker locker(fLock); 238 239 IORequestOwner* owner = _GetRequestOwner(request->Team(), request->Thread(), 240 true); 241 if (owner == NULL) { 242 panic("IOScheduler: Out of request owners!\n"); 243 locker.Unlock(); 244 if (buffer->IsVirtual()) 245 buffer->UnlockMemory(request->Team(), request->IsWrite()); 246 request->SetStatusAndNotify(B_NO_MEMORY); 247 return B_NO_MEMORY; 248 } 249 250 bool wasActive = owner->IsActive(); 251 request->SetOwner(owner); 252 owner->requests.Add(request); 253 254 int32 priority = thread_get_io_priority(request->Thread()); 255 if (priority >= 0) 256 owner->priority = priority; 257 //dprintf(" request %p -> owner %p (thread %ld, active %d)\n", request, owner, owner->thread, wasActive); 258 259 if (!wasActive) 260 fActiveRequestOwners.Add(owner); 261 262 fNewRequestCondition.NotifyAll(); 263 264 return B_OK; 265 } 266 267 268 void 269 IOScheduler::AbortRequest(IORequest* request, status_t status) 270 { 271 // TODO:... 272 //B_CANCELED 273 } 274 275 276 void 277 IOScheduler::OperationCompleted(IOOperation* operation, status_t status, 278 size_t transferredBytes) 279 { 280 InterruptsSpinLocker _(fFinisherLock); 281 282 // finish operation only once 283 if (operation->Status() <= 0) 284 return; 285 286 operation->SetStatus(status); 287 288 // set the bytes transferred (of the net data) 289 size_t partialBegin = operation->OriginalOffset() - operation->Offset(); 290 operation->SetTransferredBytes( 291 transferredBytes > partialBegin ? transferredBytes - partialBegin : 0); 292 293 fCompletedOperations.Add(operation); 294 fFinishedOperationCondition.NotifyAll(); 295 } 296 297 298 void 299 IOScheduler::Dump() const 300 { 301 kprintf("IOScheduler at %p\n", this); 302 kprintf(" DMA resource: %p\n", fDMAResource); 303 304 kprintf(" active request owners:"); 305 for (RequestOwnerList::ConstIterator it 306 = fActiveRequestOwners.GetIterator(); 307 IORequestOwner* owner = it.Next();) { 308 kprintf(" %p", owner); 309 } 310 kprintf("\n"); 311 } 312 313 314 /*! Must not be called with the fLock held. */ 315 void 316 IOScheduler::_Finisher() 317 { 318 while (true) { 319 InterruptsSpinLocker locker(fFinisherLock); 320 IOOperation* operation = fCompletedOperations.RemoveHead(); 321 if (operation == NULL) 322 return; 323 324 locker.Unlock(); 325 326 TRACE("IOScheduler::_Finisher(): operation: %p\n", operation); 327 328 if (!operation->Finish()) { 329 TRACE(" operation: %p not finished yet\n", operation); 330 MutexLocker _(fLock); 331 operation->SetTransferredBytes(0); 332 operation->Parent()->Owner()->operations.Add(operation); 333 fPendingOperations--; 334 continue; 335 } 336 337 // notify request and remove operation 338 IORequest* request = operation->Parent(); 339 if (request != NULL) { 340 size_t operationOffset = operation->OriginalOffset() 341 - request->Offset(); 342 request->OperationFinished(operation, operation->Status(), 343 operation->TransferredBytes() < operation->OriginalLength(), 344 operation->Status() == B_OK 345 ? operationOffset + operation->OriginalLength() 346 : operationOffset); 347 } 348 349 // recycle the operation 350 MutexLocker _(fLock); 351 if (fDMAResource != NULL) 352 fDMAResource->RecycleBuffer(operation->Buffer()); 353 354 fPendingOperations--; 355 fUnusedOperations.Add(operation); 356 357 // If the request is done, we need to perform its notifications. 358 if (request->IsFinished()) { 359 if (request->Status() == B_OK && request->RemainingBytes() > 0) { 360 // The request has been processed OK so far, but it isn't really 361 // finished yet. 362 request->SetUnfinished(); 363 } else { 364 // Remove the request from the request owner. 365 IORequestOwner* owner = request->Owner(); 366 owner->requests.MoveFrom(&owner->completed_requests); 367 owner->requests.Remove(request); 368 request->SetOwner(NULL); 369 370 if (!owner->IsActive()) { 371 fActiveRequestOwners.Remove(owner); 372 fUnusedRequestOwners.Add(owner); 373 } 374 375 if (request->HasCallbacks()) { 376 // The request has callbacks that may take some time to 377 // perform, so we hand it over to the request notifier. 378 fFinishedRequests.Add(request); 379 fFinishedRequestCondition.NotifyAll(); 380 } else { 381 // No callbacks -- finish the request right now. 382 request->NotifyFinished(); 383 } 384 } 385 } 386 } 387 } 388 389 390 /*! Called with \c fFinisherLock held. 391 */ 392 bool 393 IOScheduler::_FinisherWorkPending() 394 { 395 return !fCompletedOperations.IsEmpty(); 396 } 397 398 399 bool 400 IOScheduler::_PrepareRequestOperations(IORequest* request, 401 IOOperationList& operations, int32& operationsPrepared, off_t quantum, 402 off_t& usedBandwidth) 403 { 404 //dprintf("IOScheduler::_PrepareRequestOperations(%p)\n", request); 405 usedBandwidth = 0; 406 407 if (fDMAResource != NULL) { 408 while (quantum >= fBlockSize && request->RemainingBytes() > 0) { 409 IOOperation* operation = fUnusedOperations.RemoveHead(); 410 if (operation == NULL) 411 return false; 412 413 status_t status = fDMAResource->TranslateNext(request, operation, 414 quantum); 415 if (status != B_OK) { 416 operation->SetParent(NULL); 417 fUnusedOperations.Add(operation); 418 419 // B_BUSY means some resource (DMABuffers or 420 // DMABounceBuffers) was temporarily unavailable. That's OK, 421 // we'll retry later. 422 if (status == B_BUSY) 423 return false; 424 425 AbortRequest(request, status); 426 return true; 427 } 428 //dprintf(" prepared operation %p\n", operation); 429 430 off_t bandwidth = operation->Length(); 431 quantum -= bandwidth; 432 usedBandwidth += bandwidth; 433 434 operations.Add(operation); 435 operationsPrepared++; 436 } 437 } else { 438 // TODO: If the device has block size restrictions, we might need to use 439 // a bounce buffer. 440 IOOperation* operation = fUnusedOperations.RemoveHead(); 441 if (operation == NULL) 442 return false; 443 444 status_t status = operation->Prepare(request); 445 if (status != B_OK) { 446 operation->SetParent(NULL); 447 fUnusedOperations.Add(operation); 448 AbortRequest(request, status); 449 return true; 450 } 451 452 operation->SetOriginalRange(request->Offset(), request->Length()); 453 request->Advance(request->Length()); 454 455 off_t bandwidth = operation->Length(); 456 quantum -= bandwidth; 457 usedBandwidth += bandwidth; 458 459 operations.Add(operation); 460 operationsPrepared++; 461 } 462 463 return true; 464 } 465 466 467 off_t 468 IOScheduler::_ComputeRequestOwnerBandwidth(int32 priority) const 469 { 470 // TODO: Use a priority dependent quantum! 471 return fMinOwnerBandwidth; 472 } 473 474 475 void 476 IOScheduler::_NextActiveRequestOwner(IORequestOwner*& owner, off_t& quantum) 477 { 478 while (true) { 479 if (owner != NULL) 480 owner = fActiveRequestOwners.GetNext(owner); 481 if (owner == NULL) 482 owner = fActiveRequestOwners.Head(); 483 484 if (owner != NULL) { 485 quantum = _ComputeRequestOwnerBandwidth(owner->priority); 486 return; 487 } 488 489 // Wait for new requests owners. First check whether any finisher work 490 // has to be done. 491 InterruptsSpinLocker finisherLocker(fFinisherLock); 492 if (_FinisherWorkPending()) { 493 finisherLocker.Unlock(); 494 mutex_unlock(&fLock); 495 _Finisher(); 496 mutex_lock(&fLock); 497 continue; 498 } 499 500 // Wait for new requests. 501 ConditionVariableEntry entry; 502 fNewRequestCondition.Add(&entry); 503 504 finisherLocker.Unlock(); 505 mutex_unlock(&fLock); 506 507 entry.Wait(B_CAN_INTERRUPT); 508 _Finisher(); 509 mutex_lock(&fLock); 510 } 511 } 512 513 514 struct OperationComparator { 515 inline bool operator()(const IOOperation* a, const IOOperation* b) 516 { 517 off_t offsetA = a->Offset(); 518 off_t offsetB = b->Offset(); 519 return offsetA < offsetB 520 || (offsetA == offsetB && a->Length() > b->Length()); 521 } 522 }; 523 524 525 void 526 IOScheduler::_SortOperations(IOOperationList& operations, off_t& lastOffset) 527 { 528 // TODO: _Scheduler() could directly add the operations to the array. 529 // move operations to an array and sort it 530 int32 count = 0; 531 while (IOOperation* operation = operations.RemoveHead()) 532 fOperationArray[count++] = operation; 533 534 std::sort(fOperationArray, fOperationArray + count, OperationComparator()); 535 536 // move the sorted operations to a temporary list we can work with 537 //dprintf("operations after sorting:\n"); 538 IOOperationList sortedOperations; 539 for (int32 i = 0; i < count; i++) 540 //{ 541 //dprintf(" %3ld: %p: offset: %lld, length: %lu\n", i, fOperationArray[i], fOperationArray[i]->Offset(), fOperationArray[i]->Length()); 542 sortedOperations.Add(fOperationArray[i]); 543 //} 544 545 // Sort the operations so that no two adjacent operations overlap. This 546 // might result in several elevator runs. 547 while (!sortedOperations.IsEmpty()) { 548 IOOperation* operation = sortedOperations.Head(); 549 while (operation != NULL) { 550 IOOperation* nextOperation = sortedOperations.GetNext(operation); 551 if (operation->Offset() >= lastOffset) { 552 sortedOperations.Remove(operation); 553 //dprintf(" adding operation %p\n", operation); 554 operations.Add(operation); 555 lastOffset = operation->Offset() + operation->Length(); 556 } 557 558 operation = nextOperation; 559 } 560 561 if (!sortedOperations.IsEmpty()) 562 lastOffset = 0; 563 } 564 } 565 566 567 status_t 568 IOScheduler::_Scheduler() 569 { 570 IORequestOwner marker; 571 marker.thread = -1; 572 { 573 MutexLocker locker(fLock); 574 fActiveRequestOwners.Add(&marker, false); 575 } 576 577 off_t lastOffset = 0; 578 579 IORequestOwner* owner = NULL; 580 off_t quantum = 0; 581 582 while (true) { 583 //dprintf("IOScheduler::_Scheduler(): next iteration: request owner: %p, quantum: %lld\n", owner, quantum); 584 MutexLocker locker(fLock); 585 586 IOOperationList operations; 587 int32 operationCount = 0; 588 bool resourcesAvailable = true; 589 off_t iterationBandwidth = fIterationBandwidth; 590 591 if (owner == NULL) { 592 owner = fActiveRequestOwners.GetPrevious(&marker); 593 quantum = 0; 594 fActiveRequestOwners.Remove(&marker); 595 } 596 597 if (owner == NULL || quantum < fBlockSize) 598 _NextActiveRequestOwner(owner, quantum); 599 600 while (resourcesAvailable && iterationBandwidth >= fBlockSize) { 601 //dprintf("IOScheduler::_Scheduler(): request owner: %p (thread %ld)\n", 602 //owner, owner->thread); 603 // Prepare operations for the owner. 604 605 // There might still be unfinished ones. 606 while (IOOperation* operation = owner->operations.RemoveHead()) { 607 // TODO: We might actually grant the owner more bandwidth than 608 // it deserves. 609 // TODO: We should make sure that after the first read operation 610 // of a partial write, no other write operation to the same 611 // location is scheduled! 612 operations.Add(operation); 613 operationCount++; 614 off_t bandwidth = operation->Length(); 615 quantum -= bandwidth; 616 iterationBandwidth -= bandwidth; 617 618 if (quantum < fBlockSize || iterationBandwidth < fBlockSize) 619 break; 620 } 621 622 while (resourcesAvailable && quantum >= fBlockSize 623 && iterationBandwidth >= fBlockSize) { 624 IORequest* request = owner->requests.Head(); 625 if (request == NULL) { 626 resourcesAvailable = false; 627 if (operationCount == 0) 628 panic("no more requests"); 629 break; 630 } 631 632 off_t bandwidth = 0; 633 resourcesAvailable = _PrepareRequestOperations(request, 634 operations, operationCount, quantum, bandwidth); 635 quantum -= bandwidth; 636 iterationBandwidth -= bandwidth; 637 if (request->RemainingBytes() == 0 || request->Status() <= 0) { 638 // If the request has been completed, move it to the 639 // completed list, so we don't pick it up again. 640 owner->requests.Remove(request); 641 owner->completed_requests.Add(request); 642 } 643 } 644 645 // Get the next owner. 646 if (resourcesAvailable) 647 _NextActiveRequestOwner(owner, quantum); 648 } 649 650 // If the current owner doesn't have anymore requests, we have to 651 // insert our marker, since the owner will be gone in the next 652 // iteration. 653 if (owner->requests.IsEmpty()) { 654 fActiveRequestOwners.Insert(owner, &marker); 655 owner = NULL; 656 } 657 658 if (operations.IsEmpty()) 659 continue; 660 661 fPendingOperations = operationCount; 662 663 locker.Unlock(); 664 665 // sort the operations 666 _SortOperations(operations, lastOffset); 667 668 // execute the operations 669 #ifdef TRACE_IO_SCHEDULER 670 int32 i = 0; 671 #endif 672 while (IOOperation* operation = operations.RemoveHead()) { 673 TRACE("IOScheduler::_Scheduler(): calling callback for " 674 "operation %ld: %p\n", i++, operation); 675 676 fIOCallback(fIOCallbackData, operation); 677 678 _Finisher(); 679 } 680 681 // wait for all operations to finish 682 while (true) { 683 locker.Lock(); 684 685 if (fPendingOperations == 0) 686 break; 687 688 // Before waiting first check whether any finisher work has to be 689 // done. 690 InterruptsSpinLocker finisherLocker(fFinisherLock); 691 if (_FinisherWorkPending()) { 692 finisherLocker.Unlock(); 693 locker.Unlock(); 694 _Finisher(); 695 continue; 696 } 697 698 // wait for finished operations 699 ConditionVariableEntry entry; 700 fFinishedOperationCondition.Add(&entry); 701 702 finisherLocker.Unlock(); 703 locker.Unlock(); 704 705 entry.Wait(B_CAN_INTERRUPT); 706 _Finisher(); 707 } 708 } 709 710 return B_OK; 711 } 712 713 714 /*static*/ status_t 715 IOScheduler::_SchedulerThread(void *_self) 716 { 717 IOScheduler *self = (IOScheduler *)_self; 718 return self->_Scheduler(); 719 } 720 721 722 status_t 723 IOScheduler::_RequestNotifier() 724 { 725 while (true) { 726 MutexLocker locker(fLock); 727 728 // get a request 729 IORequest* request = fFinishedRequests.RemoveHead(); 730 731 if (request == NULL) { 732 ConditionVariableEntry entry; 733 fFinishedRequestCondition.Add(&entry); 734 735 locker.Unlock(); 736 737 entry.Wait(); 738 continue; 739 } 740 741 locker.Unlock(); 742 743 // notify the request 744 request->NotifyFinished(); 745 } 746 747 // never can get here 748 return B_OK; 749 } 750 751 752 /*static*/ status_t 753 IOScheduler::_RequestNotifierThread(void *_self) 754 { 755 IOScheduler *self = (IOScheduler*)_self; 756 return self->_RequestNotifier(); 757 } 758 759 760 IORequestOwner* 761 IOScheduler::_GetRequestOwner(team_id team, thread_id thread, bool allocate) 762 { 763 // lookup in table 764 IORequestOwner* owner = fRequestOwners->Lookup(thread); 765 if (owner != NULL && !owner->IsActive()) 766 fUnusedRequestOwners.Remove(owner); 767 if (owner != NULL || !allocate) 768 return owner; 769 770 // not in table -- allocate an unused one 771 RequestOwnerList existingOwners; 772 773 while ((owner = fUnusedRequestOwners.RemoveHead()) != NULL) { 774 if (owner->thread < 0 775 || thread_get_thread_struct(owner->thread) == NULL) { 776 if (owner->thread >= 0) 777 fRequestOwners->RemoveUnchecked(owner); 778 owner->team = team; 779 owner->thread = thread; 780 owner->priority = B_IDLE_PRIORITY; 781 fRequestOwners->InsertUnchecked(owner); 782 break; 783 } 784 785 existingOwners.Add(owner); 786 } 787 788 fUnusedRequestOwners.MoveFrom(&existingOwners); 789 return owner; 790 } 791 792 793 /*static*/ status_t 794 IOScheduler::_IOCallbackWrapper(void* data, io_operation* operation) 795 { 796 return ((IOCallback*)data)->DoIO(operation); 797 } 798 799