1 /* 2 * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2004-2008, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 */ 6 7 8 #include "IOScheduler.h" 9 10 #include <unistd.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include <algorithm> 15 16 #include <KernelExport.h> 17 18 #include <khash.h> 19 #include <lock.h> 20 #include <thread_types.h> 21 #include <thread.h> 22 #include <util/AutoLock.h> 23 24 25 //#define TRACE_IO_SCHEDULER 26 #ifdef TRACE_IO_SCHEDULER 27 # define TRACE(x...) dprintf(x) 28 #else 29 # define TRACE(x...) ; 30 #endif 31 32 33 IOCallback::~IOCallback() 34 { 35 } 36 37 38 status_t 39 IOCallback::DoIO(IOOperation* operation) 40 { 41 return B_ERROR; 42 } 43 44 45 // #pragma mark - 46 47 48 void 49 IORequestOwner::Dump() const 50 { 51 kprintf("IORequestOwner at %p\n", this); 52 kprintf(" team: %ld\n", team); 53 kprintf(" thread: %ld\n", thread); 54 kprintf(" priority: %ld\n", priority); 55 56 kprintf(" requests:"); 57 for (IORequestList::ConstIterator it = requests.GetIterator(); 58 IORequest* request = it.Next();) { 59 kprintf(" %p", request); 60 } 61 kprintf("\n"); 62 63 kprintf(" completed requests:"); 64 for (IORequestList::ConstIterator it = completed_requests.GetIterator(); 65 IORequest* request = it.Next();) { 66 kprintf(" %p", request); 67 } 68 kprintf("\n"); 69 70 kprintf(" operations:"); 71 for (IOOperationList::ConstIterator it = operations.GetIterator(); 72 IOOperation* operation = it.Next();) { 73 kprintf(" %p", operation); 74 } 75 kprintf("\n"); 76 } 77 78 79 // #pragma mark - 80 81 82 struct IOScheduler::RequestOwnerHashDefinition { 83 typedef thread_id KeyType; 84 typedef IORequestOwner ValueType; 85 86 size_t HashKey(thread_id key) const { return key; } 87 size_t Hash(const IORequestOwner* value) const { return value->thread; } 88 bool Compare(thread_id key, const IORequestOwner* value) const 89 { return value->thread == key; } 90 HashTableLink<IORequestOwner>* GetLink(IORequestOwner* value) const 91 { return value; } 92 }; 93 94 struct IOScheduler::RequestOwnerHashTable 95 : OpenHashTable<RequestOwnerHashDefinition, false> { 96 }; 97 98 99 IOScheduler::IOScheduler(DMAResource* resource) 100 : 101 fDMAResource(resource), 102 fOperationArray(NULL), 103 fAllocatedRequestOwners(NULL), 104 fRequestOwners(NULL), 105 fPendingOperations(0) 106 { 107 mutex_init(&fLock, "I/O scheduler"); 108 B_INITIALIZE_SPINLOCK(&fFinisherLock); 109 } 110 111 112 IOScheduler::~IOScheduler() 113 { 114 // TODO: Shutdown threads. 115 116 mutex_lock(&fLock); 117 mutex_destroy(&fLock); 118 119 while (IOOperation* operation = fUnusedOperations.RemoveHead()) 120 delete operation; 121 122 delete[] fOperationArray; 123 124 delete fRequestOwners; 125 delete[] fAllocatedRequestOwners; 126 } 127 128 129 status_t 130 IOScheduler::Init(const char* name) 131 { 132 fNewRequestCondition.Init(this, "I/O new request"); 133 fFinishedOperationCondition.Init(this, "I/O finished operation"); 134 fFinishedRequestCondition.Init(this, "I/O finished request"); 135 136 size_t count = fDMAResource != NULL ? fDMAResource->BufferCount() : 16; 137 for (size_t i = 0; i < count; i++) { 138 IOOperation* operation = new(std::nothrow) IOOperation; 139 if (operation == NULL) 140 return B_NO_MEMORY; 141 142 fUnusedOperations.Add(operation); 143 } 144 145 fOperationArray = new(std::nothrow) IOOperation*[count]; 146 147 if (fDMAResource != NULL) 148 fBlockSize = fDMAResource->BlockSize(); 149 if (fBlockSize == 0) 150 fBlockSize = 512; 151 152 fAllocatedRequestOwnerCount = thread_max_threads(); 153 fAllocatedRequestOwners 154 = new(std::nothrow) IORequestOwner[fAllocatedRequestOwnerCount]; 155 if (fAllocatedRequestOwners == NULL) 156 return B_NO_MEMORY; 157 158 for (int32 i = 0; i < fAllocatedRequestOwnerCount; i++) { 159 IORequestOwner& owner = fAllocatedRequestOwners[i]; 160 owner.team = -1; 161 owner.thread = -1; 162 owner.priority = B_IDLE_PRIORITY; 163 fUnusedRequestOwners.Add(&owner); 164 } 165 166 fRequestOwners = new(std::nothrow) RequestOwnerHashTable; 167 if (fRequestOwners == NULL) 168 return B_NO_MEMORY; 169 170 status_t error = fRequestOwners->Init(fAllocatedRequestOwnerCount); 171 if (error != B_OK) 172 return error; 173 174 // TODO: Use a device speed dependent bandwidths! 175 fIterationBandwidth = fBlockSize * 8192; 176 fMinOwnerBandwidth = fBlockSize * 1024; 177 fMaxOwnerBandwidth = fBlockSize * 4096; 178 179 // start threads 180 char buffer[B_OS_NAME_LENGTH]; 181 strlcpy(buffer, name, sizeof(buffer)); 182 strlcat(buffer, " scheduler", sizeof(buffer)); 183 fSchedulerThread = spawn_kernel_thread(&_SchedulerThread, buffer, 184 B_NORMAL_PRIORITY, (void *)this); 185 if (fSchedulerThread < B_OK) 186 return fSchedulerThread; 187 188 strlcpy(buffer, name, sizeof(buffer)); 189 strlcat(buffer, " notifier", sizeof(buffer)); 190 fRequestNotifierThread = spawn_kernel_thread(&_RequestNotifierThread, 191 buffer, B_NORMAL_PRIORITY, (void *)this); 192 if (fRequestNotifierThread < B_OK) 193 return fRequestNotifierThread; 194 195 resume_thread(fSchedulerThread); 196 resume_thread(fRequestNotifierThread); 197 return B_OK; 198 } 199 200 201 void 202 IOScheduler::SetCallback(IOCallback& callback) 203 { 204 SetCallback(&_IOCallbackWrapper, &callback); 205 } 206 207 208 void 209 IOScheduler::SetCallback(io_callback callback, void* data) 210 { 211 fIOCallback = callback; 212 fIOCallbackData = data; 213 } 214 215 216 status_t 217 IOScheduler::ScheduleRequest(IORequest* request) 218 { 219 TRACE("%p->IOScheduler::ScheduleRequest(%p)\n", this, request); 220 221 IOBuffer* buffer = request->Buffer(); 222 223 // TODO: it would be nice to be able to lock the memory later, but we can't 224 // easily do it in the I/O scheduler without being able to asynchronously 225 // lock memory (via another thread or a dedicated call). 226 227 if (buffer->IsVirtual()) { 228 status_t status = buffer->LockMemory(request->Team(), 229 request->IsWrite()); 230 if (status != B_OK) 231 return status; 232 } 233 234 MutexLocker locker(fLock); 235 236 IORequestOwner* owner = _GetRequestOwner(request->Team(), request->Thread(), 237 true); 238 if (owner == NULL) { 239 panic("IOScheduler: Out of request owners!\n"); 240 locker.Unlock(); 241 if (buffer->IsVirtual()) 242 buffer->UnlockMemory(request->Team(), request->IsWrite()); 243 return B_NO_MEMORY; 244 } 245 246 bool wasActive = owner->IsActive(); 247 request->SetOwner(owner); 248 owner->requests.Add(request); 249 250 int32 priority = thread_get_io_priority(request->Thread()); 251 if (priority >= 0) 252 owner->priority = priority; 253 //dprintf(" request %p -> owner %p (thread %ld, active %d)\n", request, owner, owner->thread, wasActive); 254 255 if (!wasActive) 256 fActiveRequestOwners.Add(owner); 257 258 fNewRequestCondition.NotifyAll(); 259 260 return B_OK; 261 } 262 263 264 void 265 IOScheduler::AbortRequest(IORequest* request, status_t status) 266 { 267 // TODO:... 268 //B_CANCELED 269 } 270 271 272 void 273 IOScheduler::OperationCompleted(IOOperation* operation, status_t status, 274 size_t transferredBytes) 275 { 276 InterruptsSpinLocker _(fFinisherLock); 277 278 // finish operation only once 279 if (operation->Status() <= 0) 280 return; 281 282 operation->SetStatus(status); 283 284 // set the bytes transferred (of the net data) 285 size_t partialBegin = operation->OriginalOffset() - operation->Offset(); 286 operation->SetTransferredBytes( 287 transferredBytes > partialBegin ? transferredBytes - partialBegin : 0); 288 289 fCompletedOperations.Add(operation); 290 fFinishedOperationCondition.NotifyAll(); 291 } 292 293 294 void 295 IOScheduler::Dump() const 296 { 297 kprintf("IOScheduler at %p\n", this); 298 kprintf(" DMA resource: %p\n", fDMAResource); 299 300 kprintf(" active request owners:"); 301 for (RequestOwnerList::ConstIterator it 302 = fActiveRequestOwners.GetIterator(); 303 IORequestOwner* owner = it.Next();) { 304 kprintf(" %p", owner); 305 } 306 kprintf("\n"); 307 } 308 309 310 /*! Must not be called with the fLock held. */ 311 void 312 IOScheduler::_Finisher() 313 { 314 while (true) { 315 InterruptsSpinLocker locker(fFinisherLock); 316 IOOperation* operation = fCompletedOperations.RemoveHead(); 317 if (operation == NULL) 318 return; 319 320 locker.Unlock(); 321 322 TRACE("IOScheduler::_Finisher(): operation: %p\n", operation); 323 324 if (!operation->Finish()) { 325 TRACE(" operation: %p not finished yet\n", operation); 326 MutexLocker _(fLock); 327 operation->SetTransferredBytes(0); 328 operation->Parent()->Owner()->operations.Add(operation); 329 fPendingOperations--; 330 continue; 331 } 332 333 // notify request and remove operation 334 IORequest* request = operation->Parent(); 335 if (request != NULL) { 336 size_t operationOffset = operation->OriginalOffset() 337 - request->Offset(); 338 request->OperationFinished(operation, operation->Status(), 339 operation->TransferredBytes() < operation->OriginalLength(), 340 operation->Status() == B_OK 341 ? operationOffset + operation->OriginalLength() 342 : operationOffset); 343 } 344 345 // recycle the operation 346 MutexLocker _(fLock); 347 if (fDMAResource != NULL) 348 fDMAResource->RecycleBuffer(operation->Buffer()); 349 350 fPendingOperations--; 351 fUnusedOperations.Add(operation); 352 353 // If the request is done, we need to perform its notifications. 354 if (request->IsFinished()) { 355 if (request->Status() == B_OK && request->RemainingBytes() > 0) { 356 // The request has been processed OK so far, but it isn't really 357 // finished yet. 358 request->SetUnfinished(); 359 } else { 360 // Remove the request from the request owner. 361 IORequestOwner* owner = request->Owner(); 362 owner->requests.MoveFrom(&owner->completed_requests); 363 owner->requests.Remove(request); 364 request->SetOwner(NULL); 365 366 if (!owner->IsActive()) { 367 fActiveRequestOwners.Remove(owner); 368 fUnusedRequestOwners.Add(owner); 369 } 370 371 if (request->HasCallbacks()) { 372 // The request has callbacks that may take some time to 373 // perform, so we hand it over to the request notifier. 374 fFinishedRequests.Add(request); 375 fFinishedRequestCondition.NotifyAll(); 376 } else { 377 // No callbacks -- finish the request right now. 378 request->NotifyFinished(); 379 } 380 } 381 } 382 } 383 } 384 385 386 /*! Called with \c fFinisherLock held. 387 */ 388 bool 389 IOScheduler::_FinisherWorkPending() 390 { 391 return !fCompletedOperations.IsEmpty(); 392 } 393 394 395 bool 396 IOScheduler::_PrepareRequestOperations(IORequest* request, 397 IOOperationList& operations, int32& operationsPrepared, off_t quantum, 398 off_t& usedBandwidth) 399 { 400 //dprintf("IOScheduler::_PrepareRequestOperations(%p)\n", request); 401 usedBandwidth = 0; 402 403 if (fDMAResource != NULL) { 404 while (quantum >= fBlockSize && request->RemainingBytes() > 0) { 405 IOOperation* operation = fUnusedOperations.RemoveHead(); 406 if (operation == NULL) 407 return false; 408 409 status_t status = fDMAResource->TranslateNext(request, operation, 410 quantum); 411 if (status != B_OK) { 412 operation->SetParent(NULL); 413 fUnusedOperations.Add(operation); 414 415 // B_BUSY means some resource (DMABuffers or 416 // DMABounceBuffers) was temporarily unavailable. That's OK, 417 // we'll retry later. 418 if (status == B_BUSY) 419 return false; 420 421 AbortRequest(request, status); 422 return true; 423 } 424 //dprintf(" prepared operation %p\n", operation); 425 426 off_t bandwidth = operation->Length(); 427 quantum -= bandwidth; 428 usedBandwidth += bandwidth; 429 430 operations.Add(operation); 431 operationsPrepared++; 432 } 433 } else { 434 // TODO: If the device has block size restrictions, we might need to use 435 // a bounce buffer. 436 IOOperation* operation = fUnusedOperations.RemoveHead(); 437 if (operation == NULL) 438 return false; 439 440 status_t status = operation->Prepare(request); 441 if (status != B_OK) { 442 operation->SetParent(NULL); 443 fUnusedOperations.Add(operation); 444 AbortRequest(request, status); 445 return true; 446 } 447 448 operation->SetOriginalRange(request->Offset(), request->Length()); 449 request->Advance(request->Length()); 450 451 off_t bandwidth = operation->Length(); 452 quantum -= bandwidth; 453 usedBandwidth += bandwidth; 454 455 operations.Add(operation); 456 operationsPrepared++; 457 } 458 459 return true; 460 } 461 462 463 off_t 464 IOScheduler::_ComputeRequestOwnerBandwidth(int32 priority) const 465 { 466 // TODO: Use a priority dependent quantum! 467 return fMinOwnerBandwidth; 468 } 469 470 471 void 472 IOScheduler::_NextActiveRequestOwner(IORequestOwner*& owner, off_t& quantum) 473 { 474 while (true) { 475 if (owner != NULL) 476 owner = fActiveRequestOwners.GetNext(owner); 477 if (owner == NULL) 478 owner = fActiveRequestOwners.Head(); 479 480 if (owner != NULL) { 481 quantum = _ComputeRequestOwnerBandwidth(owner->priority); 482 return; 483 } 484 485 // Wait for new requests owners. First check whether any finisher work 486 // has to be done. 487 InterruptsSpinLocker finisherLocker(fFinisherLock); 488 if (_FinisherWorkPending()) { 489 finisherLocker.Unlock(); 490 mutex_unlock(&fLock); 491 _Finisher(); 492 mutex_lock(&fLock); 493 continue; 494 } 495 496 // Wait for new requests. 497 ConditionVariableEntry entry; 498 fNewRequestCondition.Add(&entry); 499 500 finisherLocker.Unlock(); 501 mutex_unlock(&fLock); 502 503 entry.Wait(B_CAN_INTERRUPT); 504 _Finisher(); 505 mutex_lock(&fLock); 506 } 507 } 508 509 510 struct OperationComparator { 511 inline bool operator()(const IOOperation* a, const IOOperation* b) 512 { 513 off_t offsetA = a->Offset(); 514 off_t offsetB = b->Offset(); 515 return offsetA < offsetB 516 || (offsetA == offsetB && a->Length() > b->Length()); 517 } 518 }; 519 520 521 void 522 IOScheduler::_SortOperations(IOOperationList& operations, off_t& lastOffset) 523 { 524 // TODO: _Scheduler() could directly add the operations to the array. 525 // move operations to an array and sort it 526 int32 count = 0; 527 while (IOOperation* operation = operations.RemoveHead()) 528 fOperationArray[count++] = operation; 529 530 std::sort(fOperationArray, fOperationArray + count, OperationComparator()); 531 532 // move the sorted operations to a temporary list we can work with 533 //dprintf("operations after sorting:\n"); 534 IOOperationList sortedOperations; 535 for (int32 i = 0; i < count; i++) 536 //{ 537 //dprintf(" %3ld: %p: offset: %lld, length: %lu\n", i, fOperationArray[i], fOperationArray[i]->Offset(), fOperationArray[i]->Length()); 538 sortedOperations.Add(fOperationArray[i]); 539 //} 540 541 // Sort the operations so that no two adjacent operations overlap. This 542 // might result in several elevator runs. 543 while (!sortedOperations.IsEmpty()) { 544 IOOperation* operation = sortedOperations.Head(); 545 while (operation != NULL) { 546 IOOperation* nextOperation = sortedOperations.GetNext(operation); 547 if (operation->Offset() >= lastOffset) { 548 sortedOperations.Remove(operation); 549 //dprintf(" adding operation %p\n", operation); 550 operations.Add(operation); 551 lastOffset = operation->Offset() + operation->Length(); 552 } 553 554 operation = nextOperation; 555 } 556 557 if (!sortedOperations.IsEmpty()) 558 lastOffset = 0; 559 } 560 } 561 562 563 status_t 564 IOScheduler::_Scheduler() 565 { 566 IORequestOwner marker; 567 marker.thread = -1; 568 { 569 MutexLocker locker(fLock); 570 fActiveRequestOwners.Add(&marker, false); 571 } 572 573 off_t lastOffset = 0; 574 575 IORequestOwner* owner = NULL; 576 off_t quantum = 0; 577 578 while (true) { 579 //dprintf("IOScheduler::_Scheduler(): next iteration: request owner: %p, quantum: %lld\n", owner, quantum); 580 MutexLocker locker(fLock); 581 582 IOOperationList operations; 583 int32 operationCount = 0; 584 bool resourcesAvailable = true; 585 off_t iterationBandwidth = fIterationBandwidth; 586 587 if (owner == NULL) { 588 owner = fActiveRequestOwners.GetPrevious(&marker); 589 quantum = 0; 590 fActiveRequestOwners.Remove(&marker); 591 } 592 593 if (owner == NULL || quantum < fBlockSize) 594 _NextActiveRequestOwner(owner, quantum); 595 596 while (resourcesAvailable && iterationBandwidth >= fBlockSize) { 597 //dprintf("IOScheduler::_Scheduler(): request owner: %p (thread %ld)\n", 598 //owner, owner->thread); 599 // Prepare operations for the owner. 600 601 // There might still be unfinished ones. 602 while (IOOperation* operation = owner->operations.RemoveHead()) { 603 // TODO: We might actually grant the owner more bandwidth than 604 // it deserves. 605 // TODO: We should make sure that after the first read operation 606 // of a partial write, no other write operation to the same 607 // location is scheduled! 608 operations.Add(operation); 609 operationCount++; 610 off_t bandwidth = operation->Length(); 611 quantum -= bandwidth; 612 iterationBandwidth -= bandwidth; 613 614 if (quantum < fBlockSize || iterationBandwidth < fBlockSize) 615 break; 616 } 617 618 while (resourcesAvailable && quantum >= fBlockSize 619 && iterationBandwidth >= fBlockSize) { 620 IORequest* request = owner->requests.Head(); 621 if (request == NULL) { 622 resourcesAvailable = false; 623 if (operationCount == 0) 624 panic("no more requests"); 625 break; 626 } 627 628 off_t bandwidth = 0; 629 resourcesAvailable = _PrepareRequestOperations(request, 630 operations, operationCount, quantum, bandwidth); 631 quantum -= bandwidth; 632 iterationBandwidth -= bandwidth; 633 if (request->RemainingBytes() == 0 || request->Status() <= 0) { 634 // If the request has been completed, move it to the 635 // completed list, so we don't pick it up again. 636 owner->requests.Remove(request); 637 owner->completed_requests.Add(request); 638 } 639 } 640 641 // Get the next owner. 642 if (resourcesAvailable) 643 _NextActiveRequestOwner(owner, quantum); 644 } 645 646 // If the current owner doesn't have anymore requests, we have to 647 // insert our marker, since the owner will be gone in the next 648 // iteration. 649 if (owner->requests.IsEmpty()) { 650 fActiveRequestOwners.Insert(owner, &marker); 651 owner = NULL; 652 } 653 654 if (operations.IsEmpty()) 655 continue; 656 657 fPendingOperations = operationCount; 658 659 locker.Unlock(); 660 661 // sort the operations 662 _SortOperations(operations, lastOffset); 663 664 // execute the operations 665 #ifdef TRACE_IO_SCHEDULER 666 int32 i = 0; 667 #endif 668 while (IOOperation* operation = operations.RemoveHead()) { 669 TRACE("IOScheduler::_Scheduler(): calling callback for " 670 "operation %ld: %p\n", i++, operation); 671 672 fIOCallback(fIOCallbackData, operation); 673 674 _Finisher(); 675 } 676 677 // wait for all operations to finish 678 while (true) { 679 locker.Lock(); 680 681 if (fPendingOperations == 0) 682 break; 683 684 // Before waiting first check whether any finisher work has to be 685 // done. 686 InterruptsSpinLocker finisherLocker(fFinisherLock); 687 if (_FinisherWorkPending()) { 688 finisherLocker.Unlock(); 689 locker.Unlock(); 690 _Finisher(); 691 continue; 692 } 693 694 // wait for finished operations 695 ConditionVariableEntry entry; 696 fFinishedOperationCondition.Add(&entry); 697 698 finisherLocker.Unlock(); 699 locker.Unlock(); 700 701 entry.Wait(B_CAN_INTERRUPT); 702 _Finisher(); 703 } 704 } 705 706 return B_OK; 707 } 708 709 710 /*static*/ status_t 711 IOScheduler::_SchedulerThread(void *_self) 712 { 713 IOScheduler *self = (IOScheduler *)_self; 714 return self->_Scheduler(); 715 } 716 717 718 status_t 719 IOScheduler::_RequestNotifier() 720 { 721 while (true) { 722 MutexLocker locker(fLock); 723 724 // get a request 725 IORequest* request = fFinishedRequests.RemoveHead(); 726 727 if (request == NULL) { 728 ConditionVariableEntry entry; 729 fFinishedRequestCondition.Add(&entry); 730 731 locker.Unlock(); 732 733 entry.Wait(); 734 continue; 735 } 736 737 locker.Unlock(); 738 739 // notify the request 740 request->NotifyFinished(); 741 } 742 } 743 744 745 /*static*/ status_t 746 IOScheduler::_RequestNotifierThread(void *_self) 747 { 748 IOScheduler *self = (IOScheduler*)_self; 749 return self->_RequestNotifier(); 750 } 751 752 753 IORequestOwner* 754 IOScheduler::_GetRequestOwner(team_id team, thread_id thread, bool allocate) 755 { 756 // lookup in table 757 IORequestOwner* owner = fRequestOwners->Lookup(thread); 758 if (owner != NULL && !owner->IsActive()) 759 fUnusedRequestOwners.Remove(owner); 760 if (owner != NULL || !allocate) 761 return owner; 762 763 // not in table -- allocate an unused one 764 RequestOwnerList existingOwners; 765 766 while ((owner = fUnusedRequestOwners.RemoveHead()) != NULL) { 767 if (owner->thread < 0 768 || thread_get_thread_struct(owner->thread) == NULL) { 769 if (owner->thread >= 0) 770 fRequestOwners->RemoveUnchecked(owner); 771 owner->team = team; 772 owner->thread = thread; 773 owner->priority = B_IDLE_PRIORITY; 774 fRequestOwners->InsertUnchecked(owner); 775 break; 776 } 777 778 existingOwners.Add(owner); 779 } 780 781 fUnusedRequestOwners.MoveFrom(&existingOwners); 782 return owner; 783 } 784 785 786 /*static*/ status_t 787 IOScheduler::_IOCallbackWrapper(void* data, io_operation* operation) 788 { 789 return ((IOCallback*)data)->DoIO(operation); 790 } 791 792