xref: /haiku/src/system/kernel/device_manager/IOScheduler.cpp (revision 37c7d5d83a2372a6971e383411d5bacbeef0ebdc)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 #include "IOScheduler.h"
9 
10 #include <unistd.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include <algorithm>
16 
17 #include <KernelExport.h>
18 
19 #include <khash.h>
20 #include <lock.h>
21 #include <thread_types.h>
22 #include <thread.h>
23 #include <util/AutoLock.h>
24 
25 
26 //#define TRACE_IO_SCHEDULER
27 #ifdef TRACE_IO_SCHEDULER
28 #	define TRACE(x...) dprintf(x)
29 #else
30 #	define TRACE(x...) ;
31 #endif
32 
33 
34 // #pragma mark -
35 
36 
37 void
38 IORequestOwner::Dump() const
39 {
40 	kprintf("IORequestOwner at %p\n", this);
41 	kprintf("  team:     %ld\n", team);
42 	kprintf("  thread:   %ld\n", thread);
43 	kprintf("  priority: %ld\n", priority);
44 
45 	kprintf("  requests:");
46 	for (IORequestList::ConstIterator it = requests.GetIterator();
47 			IORequest* request = it.Next();) {
48 		kprintf(" %p", request);
49 	}
50 	kprintf("\n");
51 
52 	kprintf("  completed requests:");
53 	for (IORequestList::ConstIterator it = completed_requests.GetIterator();
54 			IORequest* request = it.Next();) {
55 		kprintf(" %p", request);
56 	}
57 	kprintf("\n");
58 
59 	kprintf("  operations:");
60 	for (IOOperationList::ConstIterator it = operations.GetIterator();
61 			IOOperation* operation = it.Next();) {
62 		kprintf(" %p", operation);
63 	}
64 	kprintf("\n");
65 }
66 
67 
68 // #pragma mark -
69 
70 
71 struct IOScheduler::RequestOwnerHashDefinition {
72 	typedef thread_id		KeyType;
73 	typedef IORequestOwner	ValueType;
74 
75 	size_t HashKey(thread_id key) const				{ return key; }
76 	size_t Hash(const IORequestOwner* value) const	{ return value->thread; }
77 	bool Compare(thread_id key, const IORequestOwner* value) const
78 		{ return value->thread == key; }
79 	IORequestOwner*& GetLink(IORequestOwner* value) const
80 		{ return value->hash_link; }
81 };
82 
83 struct IOScheduler::RequestOwnerHashTable
84 		: BOpenHashTable<RequestOwnerHashDefinition, false> {
85 };
86 
87 
88 IOScheduler::IOScheduler(DMAResource* resource)
89 	:
90 	fDMAResource(resource),
91 	fName(NULL),
92 	fID(IOSchedulerRoster::Default()->NextID()),
93 	fSchedulerThread(-1),
94 	fRequestNotifierThread(-1),
95 	fOperationArray(NULL),
96 	fAllocatedRequestOwners(NULL),
97 	fRequestOwners(NULL),
98 	fBlockSize(0),
99 	fPendingOperations(0),
100 	fTerminating(false)
101 {
102 	mutex_init(&fLock, "I/O scheduler");
103 	B_INITIALIZE_SPINLOCK(&fFinisherLock);
104 
105 	fNewRequestCondition.Init(this, "I/O new request");
106 	fFinishedOperationCondition.Init(this, "I/O finished operation");
107 	fFinishedRequestCondition.Init(this, "I/O finished request");
108 
109 }
110 
111 
112 IOScheduler::~IOScheduler()
113 {
114 	if (InitCheck() == B_OK)
115 		IOSchedulerRoster::Default()->RemoveScheduler(this);
116 
117 	// shutdown threads
118 	MutexLocker locker(fLock);
119 	InterruptsSpinLocker finisherLocker(fFinisherLock);
120 	fTerminating = true;
121 
122 	fNewRequestCondition.NotifyAll();
123 	fFinishedOperationCondition.NotifyAll();
124 	fFinishedRequestCondition.NotifyAll();
125 
126 	finisherLocker.Unlock();
127 	locker.Unlock();
128 
129 	if (fSchedulerThread >= 0)
130 		wait_for_thread(fSchedulerThread, NULL);
131 
132 	if (fRequestNotifierThread >= 0)
133 		wait_for_thread(fRequestNotifierThread, NULL);
134 
135 	// destroy our belongings
136 	mutex_lock(&fLock);
137 	mutex_destroy(&fLock);
138 
139 	while (IOOperation* operation = fUnusedOperations.RemoveHead())
140 		delete operation;
141 
142 	delete[] fOperationArray;
143 
144 	delete fRequestOwners;
145 	delete[] fAllocatedRequestOwners;
146 
147 	free(fName);
148 }
149 
150 
151 status_t
152 IOScheduler::Init(const char* name)
153 {
154 	fName = strdup(name);
155 	if (fName == NULL)
156 		return B_NO_MEMORY;
157 
158 	size_t count = fDMAResource != NULL ? fDMAResource->BufferCount() : 16;
159 	for (size_t i = 0; i < count; i++) {
160 		IOOperation* operation = new(std::nothrow) IOOperation;
161 		if (operation == NULL)
162 			return B_NO_MEMORY;
163 
164 		fUnusedOperations.Add(operation);
165 	}
166 
167 	fOperationArray = new(std::nothrow) IOOperation*[count];
168 
169 	if (fDMAResource != NULL)
170 		fBlockSize = fDMAResource->BlockSize();
171 	if (fBlockSize == 0)
172 		fBlockSize = 512;
173 
174 	fAllocatedRequestOwnerCount = thread_max_threads();
175 	fAllocatedRequestOwners
176 		= new(std::nothrow) IORequestOwner[fAllocatedRequestOwnerCount];
177 	if (fAllocatedRequestOwners == NULL)
178 		return B_NO_MEMORY;
179 
180 	for (int32 i = 0; i < fAllocatedRequestOwnerCount; i++) {
181 		IORequestOwner& owner = fAllocatedRequestOwners[i];
182 		owner.team = -1;
183 		owner.thread = -1;
184 		owner.priority = B_IDLE_PRIORITY;
185 		fUnusedRequestOwners.Add(&owner);
186 	}
187 
188 	fRequestOwners = new(std::nothrow) RequestOwnerHashTable;
189 	if (fRequestOwners == NULL)
190 		return B_NO_MEMORY;
191 
192 	status_t error = fRequestOwners->Init(fAllocatedRequestOwnerCount);
193 	if (error != B_OK)
194 		return error;
195 
196 	// TODO: Use a device speed dependent bandwidths!
197 	fIterationBandwidth = fBlockSize * 8192;
198 	fMinOwnerBandwidth = fBlockSize * 1024;
199 	fMaxOwnerBandwidth = fBlockSize * 4096;
200 
201 	// start threads
202 	char buffer[B_OS_NAME_LENGTH];
203 	strlcpy(buffer, name, sizeof(buffer));
204 	strlcat(buffer, " scheduler ", sizeof(buffer));
205 	size_t nameLength = strlen(buffer);
206 	snprintf(buffer + nameLength, sizeof(buffer) - nameLength, "%" B_PRId32,
207 		fID);
208 	fSchedulerThread = spawn_kernel_thread(&_SchedulerThread, buffer,
209 		B_NORMAL_PRIORITY + 2, (void *)this);
210 	if (fSchedulerThread < B_OK)
211 		return fSchedulerThread;
212 
213 	strlcpy(buffer, name, sizeof(buffer));
214 	strlcat(buffer, " notifier ", sizeof(buffer));
215 	nameLength = strlen(buffer);
216 	snprintf(buffer + nameLength, sizeof(buffer) - nameLength, "%" B_PRId32,
217 		fID);
218 	fRequestNotifierThread = spawn_kernel_thread(&_RequestNotifierThread,
219 		buffer, B_NORMAL_PRIORITY + 2, (void *)this);
220 	if (fRequestNotifierThread < B_OK)
221 		return fRequestNotifierThread;
222 
223 	resume_thread(fSchedulerThread);
224 	resume_thread(fRequestNotifierThread);
225 
226 	IOSchedulerRoster::Default()->AddScheduler(this);
227 
228 	return B_OK;
229 }
230 
231 
232 status_t
233 IOScheduler::InitCheck() const
234 {
235 	return fRequestNotifierThread >= 0 ? B_OK : B_NO_INIT;
236 }
237 
238 
239 void
240 IOScheduler::SetCallback(IOCallback& callback)
241 {
242 	SetCallback(&IOCallback::WrapperFunction, &callback);
243 }
244 
245 
246 void
247 IOScheduler::SetCallback(io_callback callback, void* data)
248 {
249 	fIOCallback = callback;
250 	fIOCallbackData = data;
251 }
252 
253 
254 status_t
255 IOScheduler::ScheduleRequest(IORequest* request)
256 {
257 	TRACE("%p->IOScheduler::ScheduleRequest(%p)\n", this, request);
258 
259 	IOBuffer* buffer = request->Buffer();
260 
261 	// TODO: it would be nice to be able to lock the memory later, but we can't
262 	// easily do it in the I/O scheduler without being able to asynchronously
263 	// lock memory (via another thread or a dedicated call).
264 
265 	if (buffer->IsVirtual()) {
266 		status_t status = buffer->LockMemory(request->Team(),
267 			request->IsWrite());
268 		if (status != B_OK) {
269 			request->SetStatusAndNotify(status);
270 			return status;
271 		}
272 	}
273 
274 	MutexLocker locker(fLock);
275 
276 	IORequestOwner* owner = _GetRequestOwner(request->Team(), request->Thread(),
277 		true);
278 	if (owner == NULL) {
279 		panic("IOScheduler: Out of request owners!\n");
280 		locker.Unlock();
281 		if (buffer->IsVirtual())
282 			buffer->UnlockMemory(request->Team(), request->IsWrite());
283 		request->SetStatusAndNotify(B_NO_MEMORY);
284 		return B_NO_MEMORY;
285 	}
286 
287 	bool wasActive = owner->IsActive();
288 	request->SetOwner(owner);
289 	owner->requests.Add(request);
290 
291 	int32 priority = thread_get_io_priority(request->Thread());
292 	if (priority >= 0)
293 		owner->priority = priority;
294 //dprintf("  request %p -> owner %p (thread %ld, active %d)\n", request, owner, owner->thread, wasActive);
295 
296 	if (!wasActive)
297 		fActiveRequestOwners.Add(owner);
298 
299 	IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_REQUEST_SCHEDULED, this,
300 		request);
301 
302 	fNewRequestCondition.NotifyAll();
303 
304 	return B_OK;
305 }
306 
307 
308 void
309 IOScheduler::AbortRequest(IORequest* request, status_t status)
310 {
311 	// TODO:...
312 //B_CANCELED
313 }
314 
315 
316 void
317 IOScheduler::OperationCompleted(IOOperation* operation, status_t status,
318 	size_t transferredBytes)
319 {
320 	InterruptsSpinLocker _(fFinisherLock);
321 
322 	// finish operation only once
323 	if (operation->Status() <= 0)
324 		return;
325 
326 	operation->SetStatus(status);
327 
328 	// set the bytes transferred (of the net data)
329 	size_t partialBegin = operation->OriginalOffset() - operation->Offset();
330 	operation->SetTransferredBytes(
331 		transferredBytes > partialBegin ? transferredBytes - partialBegin : 0);
332 
333 	fCompletedOperations.Add(operation);
334 	fFinishedOperationCondition.NotifyAll();
335 }
336 
337 
338 void
339 IOScheduler::Dump() const
340 {
341 	kprintf("IOScheduler at %p\n", this);
342 	kprintf("  DMA resource:   %p\n", fDMAResource);
343 
344 	kprintf("  active request owners:");
345 	for (RequestOwnerList::ConstIterator it
346 				= fActiveRequestOwners.GetIterator();
347 			IORequestOwner* owner = it.Next();) {
348 		kprintf(" %p", owner);
349 	}
350 	kprintf("\n");
351 }
352 
353 
354 /*!	Must not be called with the fLock held. */
355 void
356 IOScheduler::_Finisher()
357 {
358 	while (true) {
359 		InterruptsSpinLocker locker(fFinisherLock);
360 		IOOperation* operation = fCompletedOperations.RemoveHead();
361 		if (operation == NULL)
362 			return;
363 
364 		locker.Unlock();
365 
366 		TRACE("IOScheduler::_Finisher(): operation: %p\n", operation);
367 
368 		bool operationFinished = operation->Finish();
369 
370 		IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_OPERATION_FINISHED,
371 			this, operation->Parent(), operation);
372 			// Notify for every time the operation is passed to the I/O hook,
373 			// not only when it is fully finished.
374 
375 		if (!operationFinished) {
376 			TRACE("  operation: %p not finished yet\n", operation);
377 			MutexLocker _(fLock);
378 			operation->SetTransferredBytes(0);
379 			operation->Parent()->Owner()->operations.Add(operation);
380 			fPendingOperations--;
381 			continue;
382 		}
383 
384 		// notify request and remove operation
385 		IORequest* request = operation->Parent();
386 		if (request != NULL) {
387 			size_t operationOffset = operation->OriginalOffset()
388 				- request->Offset();
389 			request->OperationFinished(operation, operation->Status(),
390 				operation->TransferredBytes() < operation->OriginalLength(),
391 				operation->Status() == B_OK
392 					? operationOffset + operation->OriginalLength()
393 					: operationOffset);
394 		}
395 
396 		// recycle the operation
397 		MutexLocker _(fLock);
398 		if (fDMAResource != NULL)
399 			fDMAResource->RecycleBuffer(operation->Buffer());
400 
401 		fPendingOperations--;
402 		fUnusedOperations.Add(operation);
403 
404 		// If the request is done, we need to perform its notifications.
405 		if (request->IsFinished()) {
406 			if (request->Status() == B_OK && request->RemainingBytes() > 0) {
407 				// The request has been processed OK so far, but it isn't really
408 				// finished yet.
409 				request->SetUnfinished();
410 			} else {
411 				// Remove the request from the request owner.
412 				IORequestOwner* owner = request->Owner();
413 				owner->requests.MoveFrom(&owner->completed_requests);
414 				owner->requests.Remove(request);
415 				request->SetOwner(NULL);
416 
417 				if (!owner->IsActive()) {
418 					fActiveRequestOwners.Remove(owner);
419 					fUnusedRequestOwners.Add(owner);
420 				}
421 
422 				if (request->HasCallbacks()) {
423 					// The request has callbacks that may take some time to
424 					// perform, so we hand it over to the request notifier.
425 					fFinishedRequests.Add(request);
426 					fFinishedRequestCondition.NotifyAll();
427 				} else {
428 					// No callbacks -- finish the request right now.
429 					IOSchedulerRoster::Default()->Notify(
430 						IO_SCHEDULER_REQUEST_FINISHED, this, request);
431 					request->NotifyFinished();
432 				}
433 			}
434 		}
435 	}
436 }
437 
438 
439 /*!	Called with \c fFinisherLock held.
440 */
441 bool
442 IOScheduler::_FinisherWorkPending()
443 {
444 	return !fCompletedOperations.IsEmpty();
445 }
446 
447 
448 bool
449 IOScheduler::_PrepareRequestOperations(IORequest* request,
450 	IOOperationList& operations, int32& operationsPrepared, off_t quantum,
451 	off_t& usedBandwidth)
452 {
453 //dprintf("IOScheduler::_PrepareRequestOperations(%p)\n", request);
454 	usedBandwidth = 0;
455 
456 	if (fDMAResource != NULL) {
457 		while (quantum >= fBlockSize && request->RemainingBytes() > 0) {
458 			IOOperation* operation = fUnusedOperations.RemoveHead();
459 			if (operation == NULL)
460 				return false;
461 
462 			status_t status = fDMAResource->TranslateNext(request, operation,
463 				quantum);
464 			if (status != B_OK) {
465 				operation->SetParent(NULL);
466 				fUnusedOperations.Add(operation);
467 
468 				// B_BUSY means some resource (DMABuffers or
469 				// DMABounceBuffers) was temporarily unavailable. That's OK,
470 				// we'll retry later.
471 				if (status == B_BUSY)
472 					return false;
473 
474 				AbortRequest(request, status);
475 				return true;
476 			}
477 //dprintf("  prepared operation %p\n", operation);
478 
479 			off_t bandwidth = operation->Length();
480 			quantum -= bandwidth;
481 			usedBandwidth += bandwidth;
482 
483 			operations.Add(operation);
484 			operationsPrepared++;
485 		}
486 	} else {
487 		// TODO: If the device has block size restrictions, we might need to use
488 		// a bounce buffer.
489 		IOOperation* operation = fUnusedOperations.RemoveHead();
490 		if (operation == NULL)
491 			return false;
492 
493 		status_t status = operation->Prepare(request);
494 		if (status != B_OK) {
495 			operation->SetParent(NULL);
496 			fUnusedOperations.Add(operation);
497 			AbortRequest(request, status);
498 			return true;
499 		}
500 
501 		operation->SetOriginalRange(request->Offset(), request->Length());
502 		request->Advance(request->Length());
503 
504 		off_t bandwidth = operation->Length();
505 		quantum -= bandwidth;
506 		usedBandwidth += bandwidth;
507 
508 		operations.Add(operation);
509 		operationsPrepared++;
510 	}
511 
512 	return true;
513 }
514 
515 
516 off_t
517 IOScheduler::_ComputeRequestOwnerBandwidth(int32 priority) const
518 {
519 // TODO: Use a priority dependent quantum!
520 	return fMinOwnerBandwidth;
521 }
522 
523 
524 bool
525 IOScheduler::_NextActiveRequestOwner(IORequestOwner*& owner, off_t& quantum)
526 {
527 	while (true) {
528 		if (fTerminating)
529 			return false;
530 
531 		if (owner != NULL)
532 			owner = fActiveRequestOwners.GetNext(owner);
533 		if (owner == NULL)
534 			owner = fActiveRequestOwners.Head();
535 
536 		if (owner != NULL) {
537 			quantum = _ComputeRequestOwnerBandwidth(owner->priority);
538 			return true;
539 		}
540 
541 		// Wait for new requests owners. First check whether any finisher work
542 		// has to be done.
543 		InterruptsSpinLocker finisherLocker(fFinisherLock);
544 		if (_FinisherWorkPending()) {
545 			finisherLocker.Unlock();
546 			mutex_unlock(&fLock);
547 			_Finisher();
548 			mutex_lock(&fLock);
549 			continue;
550 		}
551 
552 		// Wait for new requests.
553 		ConditionVariableEntry entry;
554 		fNewRequestCondition.Add(&entry);
555 
556 		finisherLocker.Unlock();
557 		mutex_unlock(&fLock);
558 
559 		entry.Wait(B_CAN_INTERRUPT);
560 		_Finisher();
561 		mutex_lock(&fLock);
562 	}
563 }
564 
565 
566 struct OperationComparator {
567 	inline bool operator()(const IOOperation* a, const IOOperation* b)
568 	{
569 		off_t offsetA = a->Offset();
570 		off_t offsetB = b->Offset();
571 		return offsetA < offsetB
572 			|| (offsetA == offsetB && a->Length() > b->Length());
573 	}
574 };
575 
576 
577 void
578 IOScheduler::_SortOperations(IOOperationList& operations, off_t& lastOffset)
579 {
580 // TODO: _Scheduler() could directly add the operations to the array.
581 	// move operations to an array and sort it
582 	int32 count = 0;
583 	while (IOOperation* operation = operations.RemoveHead())
584 		fOperationArray[count++] = operation;
585 
586 	std::sort(fOperationArray, fOperationArray + count, OperationComparator());
587 
588 	// move the sorted operations to a temporary list we can work with
589 //dprintf("operations after sorting:\n");
590 	IOOperationList sortedOperations;
591 	for (int32 i = 0; i < count; i++)
592 //{
593 //dprintf("  %3ld: %p: offset: %lld, length: %lu\n", i, fOperationArray[i], fOperationArray[i]->Offset(), fOperationArray[i]->Length());
594 		sortedOperations.Add(fOperationArray[i]);
595 //}
596 
597 	// Sort the operations so that no two adjacent operations overlap. This
598 	// might result in several elevator runs.
599 	while (!sortedOperations.IsEmpty()) {
600 		IOOperation* operation = sortedOperations.Head();
601 		while (operation != NULL) {
602 			IOOperation* nextOperation = sortedOperations.GetNext(operation);
603 			if (operation->Offset() >= lastOffset) {
604 				sortedOperations.Remove(operation);
605 //dprintf("  adding operation %p\n", operation);
606 				operations.Add(operation);
607 				lastOffset = operation->Offset() + operation->Length();
608 			}
609 
610 			operation = nextOperation;
611 		}
612 
613 		if (!sortedOperations.IsEmpty())
614 			lastOffset = 0;
615 	}
616 }
617 
618 
619 status_t
620 IOScheduler::_Scheduler()
621 {
622 	IORequestOwner marker;
623 	marker.thread = -1;
624 	{
625 		MutexLocker locker(fLock);
626 		fActiveRequestOwners.Add(&marker, false);
627 	}
628 
629 	off_t lastOffset = 0;
630 
631 	IORequestOwner* owner = NULL;
632 	off_t quantum = 0;
633 
634 	while (!fTerminating) {
635 //dprintf("IOScheduler::_Scheduler(): next iteration: request owner: %p, quantum: %lld\n", owner, quantum);
636 		MutexLocker locker(fLock);
637 
638 		IOOperationList operations;
639 		int32 operationCount = 0;
640 		bool resourcesAvailable = true;
641 		off_t iterationBandwidth = fIterationBandwidth;
642 
643 		if (owner == NULL) {
644 			owner = fActiveRequestOwners.GetPrevious(&marker);
645 			quantum = 0;
646 			fActiveRequestOwners.Remove(&marker);
647 		}
648 
649 		if (owner == NULL || quantum < fBlockSize) {
650 			if (!_NextActiveRequestOwner(owner, quantum)) {
651 				// we've been asked to terminate
652 				return B_OK;
653 			}
654 		}
655 
656 		while (resourcesAvailable && iterationBandwidth >= fBlockSize) {
657 //dprintf("IOScheduler::_Scheduler(): request owner: %p (thread %ld)\n",
658 //owner, owner->thread);
659 			// Prepare operations for the owner.
660 
661 			// There might still be unfinished ones.
662 			while (IOOperation* operation = owner->operations.RemoveHead()) {
663 				// TODO: We might actually grant the owner more bandwidth than
664 				// it deserves.
665 				// TODO: We should make sure that after the first read operation
666 				// of a partial write, no other write operation to the same
667 				// location is scheduled!
668 				operations.Add(operation);
669 				operationCount++;
670 				off_t bandwidth = operation->Length();
671 				quantum -= bandwidth;
672 				iterationBandwidth -= bandwidth;
673 
674 				if (quantum < fBlockSize || iterationBandwidth < fBlockSize)
675 					break;
676 			}
677 
678 			while (resourcesAvailable && quantum >= fBlockSize
679 					&& iterationBandwidth >= fBlockSize) {
680 				IORequest* request = owner->requests.Head();
681 				if (request == NULL) {
682 					resourcesAvailable = false;
683 if (operationCount == 0)
684 panic("no more requests for owner %p (thread %ld)", owner, owner->thread);
685 					break;
686 				}
687 
688 				off_t bandwidth = 0;
689 				resourcesAvailable = _PrepareRequestOperations(request,
690 					operations, operationCount, quantum, bandwidth);
691 				quantum -= bandwidth;
692 				iterationBandwidth -= bandwidth;
693 				if (request->RemainingBytes() == 0 || request->Status() <= 0) {
694 					// If the request has been completed, move it to the
695 					// completed list, so we don't pick it up again.
696 					owner->requests.Remove(request);
697 					owner->completed_requests.Add(request);
698 				}
699 			}
700 
701 			// Get the next owner.
702 			if (resourcesAvailable)
703 				_NextActiveRequestOwner(owner, quantum);
704 		}
705 
706 		// If the current owner doesn't have anymore requests, we have to
707 		// insert our marker, since the owner will be gone in the next
708 		// iteration.
709 		if (owner->requests.IsEmpty()) {
710 			fActiveRequestOwners.Insert(owner, &marker);
711 			owner = NULL;
712 		}
713 
714 		if (operations.IsEmpty())
715 			continue;
716 
717 		fPendingOperations = operationCount;
718 
719 		locker.Unlock();
720 
721 		// sort the operations
722 		_SortOperations(operations, lastOffset);
723 
724 		// execute the operations
725 #ifdef TRACE_IO_SCHEDULER
726 		int32 i = 0;
727 #endif
728 		while (IOOperation* operation = operations.RemoveHead()) {
729 			TRACE("IOScheduler::_Scheduler(): calling callback for "
730 				"operation %ld: %p\n", i++, operation);
731 
732 			IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_OPERATION_STARTED,
733 				this, operation->Parent(), operation);
734 
735 			fIOCallback(fIOCallbackData, operation);
736 
737 			_Finisher();
738 		}
739 
740 		// wait for all operations to finish
741 		while (!fTerminating) {
742 			locker.Lock();
743 
744 			if (fPendingOperations == 0)
745 				break;
746 
747 			// Before waiting first check whether any finisher work has to be
748 			// done.
749 			InterruptsSpinLocker finisherLocker(fFinisherLock);
750 			if (_FinisherWorkPending()) {
751 				finisherLocker.Unlock();
752 				locker.Unlock();
753 				_Finisher();
754 				continue;
755 			}
756 
757 			// wait for finished operations
758 			ConditionVariableEntry entry;
759 			fFinishedOperationCondition.Add(&entry);
760 
761 			finisherLocker.Unlock();
762 			locker.Unlock();
763 
764 			entry.Wait(B_CAN_INTERRUPT);
765 			_Finisher();
766 		}
767 	}
768 
769 	return B_OK;
770 }
771 
772 
773 /*static*/ status_t
774 IOScheduler::_SchedulerThread(void *_self)
775 {
776 	IOScheduler *self = (IOScheduler *)_self;
777 	return self->_Scheduler();
778 }
779 
780 
781 status_t
782 IOScheduler::_RequestNotifier()
783 {
784 	while (true) {
785 		MutexLocker locker(fLock);
786 
787 		// get a request
788 		IORequest* request = fFinishedRequests.RemoveHead();
789 
790 		if (request == NULL) {
791 			if (fTerminating)
792 				return B_OK;
793 
794 			ConditionVariableEntry entry;
795 			fFinishedRequestCondition.Add(&entry);
796 
797 			locker.Unlock();
798 
799 			entry.Wait();
800 			continue;
801 		}
802 
803 		locker.Unlock();
804 
805 		IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_REQUEST_FINISHED,
806 			this, request);
807 
808 		// notify the request
809 		request->NotifyFinished();
810 	}
811 
812 	// never can get here
813 	return B_OK;
814 }
815 
816 
817 /*static*/ status_t
818 IOScheduler::_RequestNotifierThread(void *_self)
819 {
820 	IOScheduler *self = (IOScheduler*)_self;
821 	return self->_RequestNotifier();
822 }
823 
824 
825 IORequestOwner*
826 IOScheduler::_GetRequestOwner(team_id team, thread_id thread, bool allocate)
827 {
828 	// lookup in table
829 	IORequestOwner* owner = fRequestOwners->Lookup(thread);
830 	if (owner != NULL && !owner->IsActive())
831 		fUnusedRequestOwners.Remove(owner);
832 	if (owner != NULL || !allocate)
833 		return owner;
834 
835 	// not in table -- allocate an unused one
836 	RequestOwnerList existingOwners;
837 
838 	while ((owner = fUnusedRequestOwners.RemoveHead()) != NULL) {
839 		if (owner->thread < 0
840 			|| thread_get_thread_struct(owner->thread) == NULL) {
841 			if (owner->thread >= 0)
842 				fRequestOwners->RemoveUnchecked(owner);
843 			owner->team = team;
844 			owner->thread = thread;
845 			owner->priority = B_IDLE_PRIORITY;
846 			fRequestOwners->InsertUnchecked(owner);
847 			break;
848 		}
849 
850 		existingOwners.Add(owner);
851 	}
852 
853 	fUnusedRequestOwners.MoveFrom(&existingOwners);
854 	return owner;
855 }
856 
857 
858 // #pragma mark - IOSchedulerNotificationService
859 
860 
861 /*static*/ IOSchedulerRoster IOSchedulerRoster::sDefaultInstance;
862 
863 
864 /*static*/ void
865 IOSchedulerRoster::Init()
866 {
867 	new(&sDefaultInstance) IOSchedulerRoster;
868 }
869 
870 
871 void
872 IOSchedulerRoster::AddScheduler(IOScheduler* scheduler)
873 {
874 	AutoLocker<IOSchedulerRoster> locker(this);
875 	fSchedulers.Add(scheduler);
876 	locker.Unlock();
877 
878 	Notify(IO_SCHEDULER_ADDED, scheduler);
879 }
880 
881 
882 void
883 IOSchedulerRoster::RemoveScheduler(IOScheduler* scheduler)
884 {
885 	AutoLocker<IOSchedulerRoster> locker(this);
886 	fSchedulers.Remove(scheduler);
887 	locker.Unlock();
888 
889 	Notify(IO_SCHEDULER_REMOVED, scheduler);
890 }
891 
892 
893 void
894 IOSchedulerRoster::Notify(uint32 eventCode, const IOScheduler* scheduler,
895 	IORequest* request, IOOperation* operation)
896 {
897 	AutoLocker<DefaultNotificationService> locker(fNotificationService);
898 
899 	if (!fNotificationService.HasListeners())
900 		return;
901 
902 	KMessage event;
903 	event.SetTo(fEventBuffer, sizeof(fEventBuffer), IO_SCHEDULER_MONITOR);
904 	event.AddInt32("event", eventCode);
905 	event.AddPointer("scheduler", scheduler);
906 	if (request != NULL) {
907 		event.AddPointer("request", request);
908 		if (operation != NULL)
909 			event.AddPointer("operation", operation);
910 	}
911 
912 	fNotificationService.NotifyLocked(event, eventCode);
913 }
914 
915 
916 int32
917 IOSchedulerRoster::NextID()
918 {
919 	AutoLocker<IOSchedulerRoster> locker(this);
920 	return fNextID++;
921 }
922 
923 
924 IOSchedulerRoster::IOSchedulerRoster()
925 	:
926 	fNextID(1),
927 	fNotificationService("I/O")
928 {
929 	mutex_init(&fLock, "IOSchedulerRoster");
930 }
931 
932 
933 IOSchedulerRoster::~IOSchedulerRoster()
934 {
935 	mutex_destroy(&fLock);
936 }
937