xref: /haiku/src/system/kernel/device_manager/IORequest.cpp (revision b289aaf66bbf6e173aa90fa194fc256965f1b34d)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 #include "IORequest.h"
8 
9 #include <string.h>
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <heap.h>
14 #include <kernel.h>
15 #include <thread.h>
16 #include <util/AutoLock.h>
17 #include <vm/vm.h>
18 #include <vm/VMAddressSpace.h>
19 
20 #include "dma_resources.h"
21 
22 
23 //#define TRACE_IO_REQUEST
24 #ifdef TRACE_IO_REQUEST
25 #	define TRACE(x...) dprintf(x)
26 #else
27 #	define TRACE(x...) ;
28 #endif
29 
30 
31 // partial I/O operation phases
32 enum {
33 	PHASE_READ_BEGIN	= 0,
34 	PHASE_READ_END		= 1,
35 	PHASE_DO_ALL		= 2
36 };
37 
38 
39 // #pragma mark -
40 
41 
42 IORequestChunk::IORequestChunk()
43 	:
44 	fParent(NULL),
45 	fStatus(1)
46 {
47 }
48 
49 
50 IORequestChunk::~IORequestChunk()
51 {
52 }
53 
54 
55 //	#pragma mark -
56 
57 
58 struct virtual_vec_cookie {
59 	uint32	vec_index;
60 	size_t	vec_offset;
61 	area_id	mapped_area;
62 	void*	physical_page_handle;
63 	addr_t	virtual_address;
64 };
65 
66 
67 IOBuffer*
68 IOBuffer::Create(uint32 count, bool vip)
69 {
70 	size_t size = sizeof(IOBuffer) + sizeof(iovec) * (count - 1);
71 	IOBuffer* buffer
72 		= (IOBuffer*)(malloc_etc(size, vip ? HEAP_PRIORITY_VIP : 0));
73 	if (buffer == NULL)
74 		return NULL;
75 
76 	buffer->fCapacity = count;
77 	buffer->fVecCount = 0;
78 	buffer->fUser = false;
79 	buffer->fPhysical = false;
80 	buffer->fVIP = vip;
81 	buffer->fMemoryLocked = false;
82 
83 	return buffer;
84 }
85 
86 
87 void
88 IOBuffer::Delete()
89 {
90 	if (this == NULL)
91 		return;
92 
93 	free_etc(this, fVIP ? HEAP_PRIORITY_VIP : 0);
94 }
95 
96 
97 void
98 IOBuffer::SetVecs(size_t firstVecOffset, const iovec* vecs, uint32 count,
99 	size_t length, uint32 flags)
100 {
101 	memcpy(fVecs, vecs, sizeof(iovec) * count);
102 	if (count > 0 && firstVecOffset > 0) {
103 		fVecs[0].iov_base = (uint8*)fVecs[0].iov_base + firstVecOffset;
104 		fVecs[0].iov_len -= firstVecOffset;
105 	}
106 
107 	fVecCount = count;
108 	fLength = length;
109 	fPhysical = (flags & B_PHYSICAL_IO_REQUEST) != 0;
110 	fUser = !fPhysical && IS_USER_ADDRESS(vecs[0].iov_base);
111 }
112 
113 
114 status_t
115 IOBuffer::GetNextVirtualVec(void*& _cookie, iovec& vector)
116 {
117 	virtual_vec_cookie* cookie = (virtual_vec_cookie*)_cookie;
118 	if (cookie == NULL) {
119 		cookie = new(malloc_flags(fVIP ? HEAP_PRIORITY_VIP : 0))
120 			virtual_vec_cookie;
121 		if (cookie == NULL)
122 			return B_NO_MEMORY;
123 
124 		cookie->vec_index = 0;
125 		cookie->vec_offset = 0;
126 		cookie->mapped_area = -1;
127 		cookie->physical_page_handle = NULL;
128 		cookie->virtual_address = 0;
129 		_cookie = cookie;
130 	}
131 
132 	// recycle a potential previously mapped page
133 	if (cookie->physical_page_handle != NULL) {
134 // TODO: This check is invalid! The physical page mapper is not required to
135 // return a non-NULL handle (the generic implementation does not)!
136 		vm_put_physical_page(cookie->virtual_address,
137 			cookie->physical_page_handle);
138 	}
139 
140 	if (cookie->vec_index >= fVecCount)
141 		return B_BAD_INDEX;
142 
143 	if (!fPhysical) {
144 		vector = fVecs[cookie->vec_index++];
145 		return B_OK;
146 	}
147 
148 	if (cookie->vec_index == 0
149 		&& (fVecCount > 1 || fVecs[0].iov_len > B_PAGE_SIZE)) {
150 		void* mappedAddress;
151 		addr_t mappedSize;
152 
153 // TODO: This is a potential violation of the VIP requirement, since
154 // vm_map_physical_memory_vecs() allocates memory without special flags!
155 		cookie->mapped_area = vm_map_physical_memory_vecs(
156 			VMAddressSpace::KernelID(), "io buffer mapped physical vecs",
157 			&mappedAddress, B_ANY_KERNEL_ADDRESS, &mappedSize,
158 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, fVecs, fVecCount);
159 
160 		if (cookie->mapped_area >= 0) {
161 			vector.iov_base = (void*)mappedAddress;
162 			vector.iov_len = mappedSize;
163 			return B_OK;
164 		} else
165 			ktrace_printf("failed to map area: %s\n", strerror(cookie->mapped_area));
166 	}
167 
168 	// fallback to page wise mapping
169 	iovec& currentVec = fVecs[cookie->vec_index];
170 	addr_t address = (addr_t)currentVec.iov_base + cookie->vec_offset;
171 	addr_t pageOffset = address % B_PAGE_SIZE;
172 
173 // TODO: This is a potential violation of the VIP requirement, since
174 // vm_get_physical_page() may allocate memory without special flags!
175 	status_t result = vm_get_physical_page(address - pageOffset,
176 		&cookie->virtual_address, &cookie->physical_page_handle);
177 	if (result != B_OK)
178 		return result;
179 
180 	size_t length = min_c(currentVec.iov_len - cookie->vec_offset,
181 		B_PAGE_SIZE - pageOffset);
182 
183 	vector.iov_base = (void*)(cookie->virtual_address + pageOffset);
184 	vector.iov_len = length;
185 
186 	cookie->vec_offset += length;
187 	if (cookie->vec_offset >= currentVec.iov_len) {
188 		cookie->vec_index++;
189 		cookie->vec_offset = 0;
190 	}
191 
192 	return B_OK;
193 }
194 
195 
196 void
197 IOBuffer::FreeVirtualVecCookie(void* _cookie)
198 {
199 	virtual_vec_cookie* cookie = (virtual_vec_cookie*)_cookie;
200 	if (cookie->mapped_area >= 0)
201 		delete_area(cookie->mapped_area);
202 // TODO: A vm_get_physical_page() may still be unmatched!
203 
204 	free_etc(cookie, fVIP ? HEAP_PRIORITY_VIP : 0);
205 }
206 
207 
208 status_t
209 IOBuffer::LockMemory(team_id team, bool isWrite)
210 {
211 	if (fMemoryLocked) {
212 		panic("memory already locked!");
213 		return B_BAD_VALUE;
214 	}
215 
216 	for (uint32 i = 0; i < fVecCount; i++) {
217 		status_t status = lock_memory_etc(team, fVecs[i].iov_base,
218 			fVecs[i].iov_len, isWrite ? 0 : B_READ_DEVICE);
219 		if (status != B_OK) {
220 			_UnlockMemory(team, i, isWrite);
221 			return status;
222 		}
223 	}
224 
225 	fMemoryLocked = true;
226 	return B_OK;
227 }
228 
229 
230 void
231 IOBuffer::_UnlockMemory(team_id team, size_t count, bool isWrite)
232 {
233 	for (uint32 i = 0; i < count; i++) {
234 		unlock_memory_etc(team, fVecs[i].iov_base, fVecs[i].iov_len,
235 			isWrite ? 0 : B_READ_DEVICE);
236 	}
237 }
238 
239 
240 void
241 IOBuffer::UnlockMemory(team_id team, bool isWrite)
242 {
243 	if (!fMemoryLocked) {
244 		panic("memory not locked");
245 		return;
246 	}
247 
248 	_UnlockMemory(team, fVecCount, isWrite);
249 	fMemoryLocked = false;
250 }
251 
252 
253 void
254 IOBuffer::Dump() const
255 {
256 	kprintf("IOBuffer at %p\n", this);
257 
258 	kprintf("  origin:     %s\n", fUser ? "user" : "kernel");
259 	kprintf("  kind:       %s\n", fPhysical ? "physical" : "virtual");
260 	kprintf("  length:     %lu\n", fLength);
261 	kprintf("  capacity:   %lu\n", fCapacity);
262 	kprintf("  vecs:       %lu\n", fVecCount);
263 
264 	for (uint32 i = 0; i < fVecCount; i++) {
265 		kprintf("    [%lu] %p, %lu\n", i, fVecs[i].iov_base, fVecs[i].iov_len);
266 	}
267 }
268 
269 
270 // #pragma mark -
271 
272 
273 bool
274 IOOperation::Finish()
275 {
276 	TRACE("IOOperation::Finish()\n");
277 	if (fStatus == B_OK) {
278 		if (fParent->IsWrite()) {
279 			TRACE("  is write\n");
280 			if (fPhase == PHASE_READ_BEGIN) {
281 				TRACE("  phase read begin\n");
282 				// repair phase adjusted vec
283 				fDMABuffer->VecAt(fSavedVecIndex).iov_len = fSavedVecLength;
284 
285 				// partial write: copy partial begin to bounce buffer
286 				bool skipReadEndPhase;
287 				status_t error = _CopyPartialBegin(true, skipReadEndPhase);
288 				if (error == B_OK) {
289 					// We're done with the first phase only (read in begin).
290 					// Get ready for next phase...
291 					fPhase = HasPartialEnd() && !skipReadEndPhase
292 						? PHASE_READ_END : PHASE_DO_ALL;
293 					_PrepareVecs();
294 					ResetStatus();
295 						// TODO: Is there a race condition, if the request is
296 						// aborted at the same time?
297 					return false;
298 				}
299 
300 				SetStatus(error);
301 			} else if (fPhase == PHASE_READ_END) {
302 				TRACE("  phase read end\n");
303 				// repair phase adjusted vec
304 				iovec& vec = fDMABuffer->VecAt(fSavedVecIndex);
305 				vec.iov_base = (uint8*)vec.iov_base
306 					+ vec.iov_len - fSavedVecLength;
307 				vec.iov_len = fSavedVecLength;
308 
309 				// partial write: copy partial end to bounce buffer
310 				status_t error = _CopyPartialEnd(true);
311 				if (error == B_OK) {
312 					// We're done with the second phase only (read in end).
313 					// Get ready for next phase...
314 					fPhase = PHASE_DO_ALL;
315 					ResetStatus();
316 						// TODO: Is there a race condition, if the request is
317 						// aborted at the same time?
318 					return false;
319 				}
320 
321 				SetStatus(error);
322 			}
323 		}
324 	}
325 
326 	if (fParent->IsRead() && UsesBounceBuffer()) {
327 		TRACE("  read with bounce buffer\n");
328 		// copy the bounce buffer segments to the final location
329 		uint8* bounceBuffer = (uint8*)fDMABuffer->BounceBufferAddress();
330 		addr_t bounceBufferStart = fDMABuffer->PhysicalBounceBufferAddress();
331 		addr_t bounceBufferEnd = bounceBufferStart
332 			+ fDMABuffer->BounceBufferSize();
333 
334 		const iovec* vecs = fDMABuffer->Vecs();
335 		uint32 vecCount = fDMABuffer->VecCount();
336 
337 		status_t error = B_OK;
338 
339 		// We iterate through the vecs we have read, moving offset (the device
340 		// offset) as we go. If [offset, offset + vec.iov_len) intersects with
341 		// [startOffset, endOffset) we copy to the final location.
342 		off_t offset = fOffset;
343 		const off_t startOffset = fOriginalOffset;
344 		const off_t endOffset = fOriginalOffset + fOriginalLength;
345 
346 		for (uint32 i = 0; error == B_OK && i < vecCount; i++) {
347 			const iovec& vec = vecs[i];
348 			addr_t base = (addr_t)vec.iov_base;
349 			size_t length = vec.iov_len;
350 
351 			if (offset < startOffset) {
352 				// If the complete vector is before the start offset, skip it.
353 				if (offset + length <= startOffset) {
354 					offset += length;
355 					continue;
356 				}
357 
358 				// The vector starts before the start offset, but intersects
359 				// with it. Skip the part we aren't interested in.
360 				size_t diff = startOffset - offset;
361 				offset += diff;
362 				base += diff;
363 				length -= diff;
364 			}
365 
366 			if (offset + length > endOffset) {
367 				// If we're already beyond the end offset, we're done.
368 				if (offset >= endOffset)
369 					break;
370 
371 				// The vector extends beyond the end offset -- cut it.
372 				length = endOffset - offset;
373 			}
374 
375 			if (base >= bounceBufferStart && base < bounceBufferEnd) {
376 				error = fParent->CopyData(
377 					bounceBuffer + (base - bounceBufferStart), offset, length);
378 			}
379 
380 			offset += length;
381 		}
382 
383 		if (error != B_OK)
384 			SetStatus(error);
385 	}
386 
387 	return true;
388 }
389 
390 
391 /*!	Note: SetPartial() must be called first!
392 */
393 status_t
394 IOOperation::Prepare(IORequest* request)
395 {
396 	if (fParent != NULL)
397 		fParent->RemoveOperation(this);
398 
399 	fParent = request;
400 
401 	fTransferredBytes = 0;
402 
403 	// set initial phase
404 	fPhase = PHASE_DO_ALL;
405 	if (fParent->IsWrite()) {
406 		// Copy data to bounce buffer segments, save the partial begin/end vec,
407 		// which will be copied after their respective read phase.
408 		if (UsesBounceBuffer()) {
409 			TRACE("  write with bounce buffer\n");
410 			uint8* bounceBuffer = (uint8*)fDMABuffer->BounceBufferAddress();
411 			addr_t bounceBufferStart
412 				= fDMABuffer->PhysicalBounceBufferAddress();
413 			addr_t bounceBufferEnd = bounceBufferStart
414 				+ fDMABuffer->BounceBufferSize();
415 
416 			const iovec* vecs = fDMABuffer->Vecs();
417 			uint32 vecCount = fDMABuffer->VecCount();
418 			size_t vecOffset = 0;
419 			uint32 i = 0;
420 
421 			off_t offset = fOffset;
422 			off_t endOffset = fOffset + fLength;
423 
424 			if (HasPartialBegin()) {
425 				// skip first block
426 				size_t toSkip = fBlockSize;
427 				while (toSkip > 0) {
428 					if (vecs[i].iov_len <= toSkip) {
429 						toSkip -= vecs[i].iov_len;
430 						i++;
431 					} else {
432 						vecOffset = toSkip;
433 						break;
434 					}
435 				}
436 
437 				offset += fBlockSize;
438 			}
439 
440 			if (HasPartialEnd()) {
441 				// skip last block
442 				size_t toSkip = fBlockSize;
443 				while (toSkip > 0) {
444 					if (vecs[vecCount - 1].iov_len <= toSkip) {
445 						toSkip -= vecs[vecCount - 1].iov_len;
446 						vecCount--;
447 					} else
448 						break;
449 				}
450 
451 				endOffset -= fBlockSize;
452 			}
453 
454 			for (; i < vecCount; i++) {
455 				const iovec& vec = vecs[i];
456 				addr_t base = (addr_t)vec.iov_base + vecOffset;
457 				size_t length = vec.iov_len - vecOffset;
458 				vecOffset = 0;
459 
460 				if (base >= bounceBufferStart && base < bounceBufferEnd) {
461 					if (offset + length > endOffset)
462 						length = endOffset - offset;
463 					status_t error = fParent->CopyData(offset,
464 						bounceBuffer + (base - bounceBufferStart), length);
465 					if (error != B_OK)
466 						return error;
467 				}
468 
469 				offset += length;
470 			}
471 		}
472 
473 		if (HasPartialBegin())
474 			fPhase = PHASE_READ_BEGIN;
475 		else if (HasPartialEnd())
476 			fPhase = PHASE_READ_END;
477 
478 		_PrepareVecs();
479 	}
480 
481 	ResetStatus();
482 
483 	if (fParent != NULL)
484 		fParent->AddOperation(this);
485 
486 	return B_OK;
487 }
488 
489 
490 void
491 IOOperation::SetOriginalRange(off_t offset, size_t length)
492 {
493 	fOriginalOffset = fOffset = offset;
494 	fOriginalLength = fLength = length;
495 }
496 
497 
498 void
499 IOOperation::SetRange(off_t offset, size_t length)
500 {
501 	fOffset = offset;
502 	fLength = length;
503 }
504 
505 
506 off_t
507 IOOperation::Offset() const
508 {
509 	return fPhase == PHASE_READ_END ? fOffset + fLength - fBlockSize : fOffset;
510 }
511 
512 
513 size_t
514 IOOperation::Length() const
515 {
516 	return fPhase == PHASE_DO_ALL ? fLength : fBlockSize;
517 }
518 
519 
520 iovec*
521 IOOperation::Vecs() const
522 {
523 	switch (fPhase) {
524 		case PHASE_READ_END:
525 			return fDMABuffer->Vecs() + fSavedVecIndex;
526 		case PHASE_READ_BEGIN:
527 		case PHASE_DO_ALL:
528 		default:
529 			return fDMABuffer->Vecs();
530 	}
531 }
532 
533 
534 uint32
535 IOOperation::VecCount() const
536 {
537 	switch (fPhase) {
538 		case PHASE_READ_BEGIN:
539 			return fSavedVecIndex + 1;
540 		case PHASE_READ_END:
541 			return fDMABuffer->VecCount() - fSavedVecIndex;
542 		case PHASE_DO_ALL:
543 		default:
544 			return fDMABuffer->VecCount();
545 	}
546 }
547 
548 
549 void
550 IOOperation::SetPartial(bool partialBegin, bool partialEnd)
551 {
552 	TRACE("partial begin %d, end %d\n", partialBegin, partialEnd);
553 	fPartialBegin = partialBegin;
554 	fPartialEnd = partialEnd;
555 }
556 
557 
558 bool
559 IOOperation::IsWrite() const
560 {
561 	return fParent->IsWrite() && fPhase == PHASE_DO_ALL;
562 }
563 
564 
565 bool
566 IOOperation::IsRead() const
567 {
568 	return fParent->IsRead();
569 }
570 
571 
572 void
573 IOOperation::_PrepareVecs()
574 {
575 	// we need to prepare the vecs for consumption by the drivers
576 	if (fPhase == PHASE_READ_BEGIN) {
577 		iovec* vecs = fDMABuffer->Vecs();
578 		uint32 vecCount = fDMABuffer->VecCount();
579 		size_t vecLength = fBlockSize;
580 		for (uint32 i = 0; i < vecCount; i++) {
581 			iovec& vec = vecs[i];
582 			if (vec.iov_len >= vecLength) {
583 				fSavedVecIndex = i;
584 				fSavedVecLength = vec.iov_len;
585 				vec.iov_len = vecLength;
586 				break;
587 			}
588 			vecLength -= vec.iov_len;
589 		}
590 	} else if (fPhase == PHASE_READ_END) {
591 		iovec* vecs = fDMABuffer->Vecs();
592 		uint32 vecCount = fDMABuffer->VecCount();
593 		size_t vecLength = fBlockSize;
594 		for (int32 i = vecCount - 1; i >= 0; i--) {
595 			iovec& vec = vecs[i];
596 			if (vec.iov_len >= vecLength) {
597 				fSavedVecIndex = i;
598 				fSavedVecLength = vec.iov_len;
599 				vec.iov_base = (uint8*)vec.iov_base
600 					+ vec.iov_len - vecLength;
601 				vec.iov_len = vecLength;
602 				break;
603 			}
604 			vecLength -= vec.iov_len;
605 		}
606 	}
607 }
608 
609 
610 status_t
611 IOOperation::_CopyPartialBegin(bool isWrite, bool& singleBlockOnly)
612 {
613 	size_t relativeOffset = OriginalOffset() - fOffset;
614 	size_t length = fBlockSize - relativeOffset;
615 
616 	singleBlockOnly = length >= OriginalLength();
617 	if (singleBlockOnly)
618 		length = OriginalLength();
619 
620 	TRACE("_CopyPartialBegin(%s, single only %d)\n",
621 		isWrite ? "write" : "read", singleBlockOnly);
622 
623 	if (isWrite) {
624 		return fParent->CopyData(OriginalOffset(),
625 			(uint8*)fDMABuffer->BounceBufferAddress() + relativeOffset, length);
626 	} else {
627 		return fParent->CopyData(
628 			(uint8*)fDMABuffer->BounceBufferAddress() + relativeOffset,
629 			OriginalOffset(), length);
630 	}
631 }
632 
633 
634 status_t
635 IOOperation::_CopyPartialEnd(bool isWrite)
636 {
637 	TRACE("_CopyPartialEnd(%s)\n", isWrite ? "write" : "read");
638 
639 	const iovec& lastVec = fDMABuffer->VecAt(fDMABuffer->VecCount() - 1);
640 	off_t lastVecPos = fOffset + fLength - fBlockSize;
641 	uint8* base = (uint8*)fDMABuffer->BounceBufferAddress()
642 		+ ((addr_t)lastVec.iov_base + lastVec.iov_len - fBlockSize
643 		- fDMABuffer->PhysicalBounceBufferAddress());
644 		// NOTE: this won't work if we don't use the bounce buffer contiguously
645 		// (because of boundary alignments).
646 	size_t length = OriginalOffset() + OriginalLength() - lastVecPos;
647 
648 	if (isWrite)
649 		return fParent->CopyData(lastVecPos, base, length);
650 
651 	return fParent->CopyData(base, lastVecPos, length);
652 }
653 
654 
655 void
656 IOOperation::Dump() const
657 {
658 	kprintf("io_operation at %p\n", this);
659 
660 	kprintf("  parent:           %p\n", fParent);
661 	kprintf("  status:           %s\n", strerror(fStatus));
662 	kprintf("  dma buffer:       %p\n", fDMABuffer);
663 	kprintf("  offset:           %-8Ld (original: %Ld)\n", fOffset,
664 		fOriginalOffset);
665 	kprintf("  length:           %-8lu (original: %lu)\n", fLength,
666 		fOriginalLength);
667 	kprintf("  transferred:      %lu\n", fTransferredBytes);
668 	kprintf("  block size:       %lu\n", fBlockSize);
669 	kprintf("  saved vec index:  %u\n", fSavedVecIndex);
670 	kprintf("  saved vec length: %u\n", fSavedVecLength);
671 	kprintf("  r/w:              %s\n", IsWrite() ? "write" : "read");
672 	kprintf("  phase:            %s\n", fPhase == PHASE_READ_BEGIN
673 		? "read begin" : fPhase == PHASE_READ_END ? "read end"
674 		: fPhase == PHASE_DO_ALL ? "do all" : "unknown");
675 	kprintf("  partial begin:    %s\n", fPartialBegin ? "yes" : "no");
676 	kprintf("  partial end:      %s\n", fPartialEnd ? "yes" : "no");
677 	kprintf("  bounce buffer:    %s\n", fUsesBounceBuffer ? "yes" : "no");
678 
679 	set_debug_variable("_parent", (addr_t)fParent);
680 	set_debug_variable("_buffer", (addr_t)fDMABuffer);
681 }
682 
683 
684 // #pragma mark -
685 
686 
687 IORequest::IORequest()
688 	:
689 	fIsNotified(false),
690 	fFinishedCallback(NULL),
691 	fFinishedCookie(NULL),
692 	fIterationCallback(NULL),
693 	fIterationCookie(NULL)
694 {
695 	mutex_init(&fLock, "I/O request lock");
696 	fFinishedCondition.Init(this, "I/O request finished");
697 }
698 
699 
700 IORequest::~IORequest()
701 {
702 	mutex_lock(&fLock);
703 	DeleteSubRequests();
704 	fBuffer->Delete();
705 	mutex_destroy(&fLock);
706 }
707 
708 
709 /* static */ IORequest*
710 IORequest::Create(bool vip)
711 {
712 	return vip
713 		? new(malloc_flags(HEAP_PRIORITY_VIP)) IORequest
714 		: new(std::nothrow) IORequest;
715 }
716 
717 
718 status_t
719 IORequest::Init(off_t offset, void* buffer, size_t length, bool write,
720 	uint32 flags)
721 {
722 	iovec vec;
723 	vec.iov_base = buffer;
724 	vec.iov_len = length;
725 	return Init(offset, &vec, 1, length, write, flags);
726 }
727 
728 
729 status_t
730 IORequest::Init(off_t offset, size_t firstVecOffset, const iovec* vecs,
731 	size_t count, size_t length, bool write, uint32 flags)
732 {
733 	fBuffer = IOBuffer::Create(count, (flags & B_VIP_IO_REQUEST) != 0);
734 	if (fBuffer == NULL)
735 		return B_NO_MEMORY;
736 
737 	fBuffer->SetVecs(firstVecOffset, vecs, count, length, flags);
738 
739 	fOwner = NULL;
740 	fOffset = offset;
741 	fLength = length;
742 	fRelativeParentOffset = 0;
743 	fTransferSize = 0;
744 	fFlags = flags;
745 	struct thread* thread = thread_get_current_thread();
746 	fTeam = thread->team->id;
747 	fThread = thread->id;
748 	fIsWrite = write;
749 	fPartialTransfer = false;
750 	fSuppressChildNotifications = false;
751 
752 	// these are for iteration
753 	fVecIndex = 0;
754 	fVecOffset = 0;
755 	fRemainingBytes = length;
756 
757 	fPendingChildren = 0;
758 
759 	fStatus = 1;
760 
761 	return B_OK;
762 }
763 
764 
765 status_t
766 IORequest::CreateSubRequest(off_t parentOffset, off_t offset, size_t length,
767 	IORequest*& _subRequest)
768 {
769 	ASSERT(parentOffset >= fOffset && length <= fLength
770 		&& parentOffset - fOffset <= fLength - length);
771 
772 	// find start vec
773 	size_t vecOffset = parentOffset - fOffset;
774 	iovec* vecs = fBuffer->Vecs();
775 	int32 vecCount = fBuffer->VecCount();
776 	int32 startVec = 0;
777 	for (; startVec < vecCount; startVec++) {
778 		const iovec& vec = vecs[startVec];
779 		if (vecOffset < vec.iov_len)
780 			break;
781 
782 		vecOffset -= vec.iov_len;
783 	}
784 
785 	// count vecs
786 	size_t currentVecOffset = vecOffset;
787 	int32 endVec = startVec;
788 	size_t remainingLength = length;
789 	for (; endVec < vecCount; endVec++) {
790 		const iovec& vec = vecs[endVec];
791 		if (vec.iov_len - currentVecOffset >= remainingLength)
792 			break;
793 
794 		remainingLength -= vec.iov_len - currentVecOffset;
795 		currentVecOffset = 0;
796 	}
797 
798 	// create subrequest
799 	IORequest* subRequest = Create((fFlags & B_VIP_IO_REQUEST) != 0);
800 	if (subRequest == NULL)
801 		return B_NO_MEMORY;
802 
803 	status_t error = subRequest->Init(offset, vecOffset, vecs + startVec,
804 		endVec - startVec + 1, length, fIsWrite, fFlags & ~B_DELETE_IO_REQUEST);
805 	if (error != B_OK) {
806 		delete subRequest;
807 		return error;
808 	}
809 
810 	subRequest->fRelativeParentOffset = parentOffset - fOffset;
811 	subRequest->fTeam = fTeam;
812 	subRequest->fThread = fThread;
813 
814 	_subRequest = subRequest;
815 	subRequest->SetParent(this);
816 
817 	MutexLocker _(fLock);
818 
819 	fChildren.Add(subRequest);
820 	fPendingChildren++;
821 	TRACE("IORequest::CreateSubRequest(): request: %p, subrequest: %p\n", this,
822 		subRequest);
823 
824 	return B_OK;
825 }
826 
827 
828 void
829 IORequest::DeleteSubRequests()
830 {
831 	while (IORequestChunk* chunk = fChildren.RemoveHead())
832 		delete chunk;
833 	fPendingChildren = 0;
834 }
835 
836 
837 void
838 IORequest::SetFinishedCallback(io_request_finished_callback callback,
839 	void* cookie)
840 {
841 	fFinishedCallback = callback;
842 	fFinishedCookie = cookie;
843 }
844 
845 
846 void
847 IORequest::SetIterationCallback(io_request_iterate_callback callback,
848 	void* cookie)
849 {
850 	fIterationCallback = callback;
851 	fIterationCookie = cookie;
852 }
853 
854 
855 io_request_finished_callback
856 IORequest::FinishedCallback(void** _cookie) const
857 {
858 	if (_cookie != NULL)
859 		*_cookie = fFinishedCookie;
860 	return fFinishedCallback;
861 }
862 
863 
864 status_t
865 IORequest::Wait(uint32 flags, bigtime_t timeout)
866 {
867 	MutexLocker locker(fLock);
868 
869 	if (IsFinished() && fIsNotified)
870 		return Status();
871 
872 	ConditionVariableEntry entry;
873 	fFinishedCondition.Add(&entry);
874 
875 	locker.Unlock();
876 
877 	status_t error = entry.Wait(flags, timeout);
878 	if (error != B_OK)
879 		return error;
880 
881 	return Status();
882 }
883 
884 
885 void
886 IORequest::NotifyFinished()
887 {
888 	TRACE("IORequest::NotifyFinished(): request: %p\n", this);
889 
890 	MutexLocker locker(fLock);
891 
892 	if (fStatus == B_OK && !fPartialTransfer && RemainingBytes() > 0) {
893 		// The request is not really done yet. If it has an iteration callback,
894 		// call it.
895 		if (fIterationCallback != NULL) {
896 			ResetStatus();
897 			locker.Unlock();
898 			bool partialTransfer = false;
899 			status_t error = fIterationCallback(fIterationCookie, this,
900 				&partialTransfer);
901 			if (error == B_OK && !partialTransfer)
902 				return;
903 
904 			// Iteration failed, which means we're responsible for notifying the
905 			// requests finished.
906 			locker.Lock();
907 			fStatus = error;
908 			fPartialTransfer = true;
909 		}
910 	}
911 
912 	ASSERT(fPendingChildren == 0);
913 	ASSERT(fChildren.IsEmpty()
914 		|| dynamic_cast<IOOperation*>(fChildren.Head()) == NULL);
915 
916 	// unlock the memory
917 	if (fBuffer->IsMemoryLocked())
918 		fBuffer->UnlockMemory(fTeam, fIsWrite);
919 
920 	// Cache the callbacks before we unblock waiters and unlock. Any of the
921 	// following could delete this request, so we don't want to touch it
922 	// once we have started telling others that it is done.
923 	IORequest* parent = fParent;
924 	io_request_finished_callback finishedCallback = fFinishedCallback;
925 	void* finishedCookie = fFinishedCookie;
926 	status_t status = fStatus;
927 	size_t lastTransferredOffset = fRelativeParentOffset + fTransferSize;
928 	bool partialTransfer = status != B_OK || fPartialTransfer;
929 	bool deleteRequest = (fFlags & B_DELETE_IO_REQUEST) != 0;
930 
931 	// unblock waiters
932 	fIsNotified = true;
933 	fFinishedCondition.NotifyAll();
934 
935 	locker.Unlock();
936 
937 	// notify callback
938 	if (finishedCallback != NULL) {
939 		finishedCallback(finishedCookie, this, status, partialTransfer,
940 			lastTransferredOffset);
941 	}
942 
943 	// notify parent
944 	if (parent != NULL) {
945 		parent->SubRequestFinished(this, status, partialTransfer,
946 			lastTransferredOffset);
947 	}
948 
949 	if (deleteRequest)
950 		delete this;
951 }
952 
953 
954 /*!	Returns whether this request or any of it's ancestors has a finished or
955 	notification callback. Used to decide whether NotifyFinished() can be called
956 	synchronously.
957 */
958 bool
959 IORequest::HasCallbacks() const
960 {
961 	if (fFinishedCallback != NULL || fIterationCallback != NULL)
962 		return true;
963 
964 	return fParent != NULL && fParent->HasCallbacks();
965 }
966 
967 
968 void
969 IORequest::SetStatusAndNotify(status_t status)
970 {
971 	MutexLocker locker(fLock);
972 
973 	if (fStatus != 1)
974 		return;
975 
976 	fStatus = status;
977 
978 	locker.Unlock();
979 
980 	NotifyFinished();
981 }
982 
983 
984 void
985 IORequest::OperationFinished(IOOperation* operation, status_t status,
986 	bool partialTransfer, size_t transferEndOffset)
987 {
988 	TRACE("IORequest::OperationFinished(%p, %#lx): request: %p\n", operation,
989 		status, this);
990 
991 	MutexLocker locker(fLock);
992 
993 	fChildren.Remove(operation);
994 	operation->SetParent(NULL);
995 
996 	if (status != B_OK || partialTransfer) {
997 		if (fTransferSize > transferEndOffset)
998 			fTransferSize = transferEndOffset;
999 		fPartialTransfer = true;
1000 	}
1001 
1002 	if (status != B_OK && fStatus == 1)
1003 		fStatus = status;
1004 
1005 	if (--fPendingChildren > 0)
1006 		return;
1007 
1008 	// last child finished
1009 
1010 	// set status, if not done yet
1011 	if (fStatus == 1)
1012 		fStatus = B_OK;
1013 }
1014 
1015 
1016 void
1017 IORequest::SubRequestFinished(IORequest* request, status_t status,
1018 	bool partialTransfer, size_t transferEndOffset)
1019 {
1020 	TRACE("IORequest::SubrequestFinished(%p, %#lx, %d, %lu): request: %p\n",
1021 		request, status, partialTransfer, transferEndOffset, this);
1022 
1023 	MutexLocker locker(fLock);
1024 
1025 	if (status != B_OK || partialTransfer) {
1026 		if (fTransferSize > transferEndOffset)
1027 			fTransferSize = transferEndOffset;
1028 		fPartialTransfer = true;
1029 	}
1030 
1031 	if (status != B_OK && fStatus == 1)
1032 		fStatus = status;
1033 
1034 	if (--fPendingChildren > 0 || fSuppressChildNotifications)
1035 		return;
1036 
1037 	// last child finished
1038 
1039 	// set status, if not done yet
1040 	if (fStatus == 1)
1041 		fStatus = B_OK;
1042 
1043 	locker.Unlock();
1044 
1045 	NotifyFinished();
1046 }
1047 
1048 
1049 void
1050 IORequest::SetUnfinished()
1051 {
1052 	MutexLocker _(fLock);
1053 	ResetStatus();
1054 }
1055 
1056 
1057 void
1058 IORequest::SetTransferredBytes(bool partialTransfer, size_t transferredBytes)
1059 {
1060 	TRACE("%p->IORequest::SetTransferredBytes(%d, %lu)\n", this,
1061 		partialTransfer, transferredBytes);
1062 
1063 	MutexLocker _(fLock);
1064 
1065 	fPartialTransfer = partialTransfer;
1066 	fTransferSize = transferredBytes;
1067 }
1068 
1069 
1070 void
1071 IORequest::SetSuppressChildNotifications(bool suppress)
1072 {
1073 	fSuppressChildNotifications = suppress;
1074 }
1075 
1076 
1077 void
1078 IORequest::Advance(size_t bySize)
1079 {
1080 	TRACE("IORequest::Advance(%lu): remaining: %lu -> %lu\n", bySize,
1081 		fRemainingBytes, fRemainingBytes - bySize);
1082 	fRemainingBytes -= bySize;
1083 	fTransferSize += bySize;
1084 
1085 	iovec* vecs = fBuffer->Vecs();
1086 	uint32 vecCount = fBuffer->VecCount();
1087 	while (fVecIndex < vecCount
1088 			&& vecs[fVecIndex].iov_len - fVecOffset <= bySize) {
1089 		bySize -= vecs[fVecIndex].iov_len - fVecOffset;
1090 		fVecOffset = 0;
1091 		fVecIndex++;
1092 	}
1093 
1094 	fVecOffset += bySize;
1095 }
1096 
1097 
1098 IORequest*
1099 IORequest::FirstSubRequest()
1100 {
1101 	return dynamic_cast<IORequest*>(fChildren.Head());
1102 }
1103 
1104 
1105 IORequest*
1106 IORequest::NextSubRequest(IORequest* previous)
1107 {
1108 	if (previous == NULL)
1109 		return NULL;
1110 	return dynamic_cast<IORequest*>(fChildren.GetNext(previous));
1111 }
1112 
1113 
1114 void
1115 IORequest::AddOperation(IOOperation* operation)
1116 {
1117 	MutexLocker locker(fLock);
1118 	TRACE("IORequest::AddOperation(%p): request: %p\n", operation, this);
1119 	fChildren.Add(operation);
1120 	fPendingChildren++;
1121 }
1122 
1123 
1124 void
1125 IORequest::RemoveOperation(IOOperation* operation)
1126 {
1127 	MutexLocker locker(fLock);
1128 	fChildren.Remove(operation);
1129 	operation->SetParent(NULL);
1130 }
1131 
1132 
1133 status_t
1134 IORequest::CopyData(off_t offset, void* buffer, size_t size)
1135 {
1136 	return _CopyData(buffer, offset, size, true);
1137 }
1138 
1139 
1140 status_t
1141 IORequest::CopyData(const void* buffer, off_t offset, size_t size)
1142 {
1143 	return _CopyData((void*)buffer, offset, size, false);
1144 }
1145 
1146 
1147 status_t
1148 IORequest::_CopyData(void* _buffer, off_t offset, size_t size, bool copyIn)
1149 {
1150 	if (size == 0)
1151 		return B_OK;
1152 
1153 	uint8* buffer = (uint8*)_buffer;
1154 
1155 	if (offset < fOffset || offset + size > fOffset + fLength) {
1156 		panic("IORequest::_CopyData(): invalid range: (%lld, %lu)", offset,
1157 			size);
1158 		return B_BAD_VALUE;
1159 	}
1160 
1161 	// If we can, we directly copy from/to the virtual buffer. The memory is
1162 	// locked in this case.
1163 	status_t (*copyFunction)(void*, void*, size_t, team_id, bool);
1164 	if (fBuffer->IsPhysical()) {
1165 		copyFunction = &IORequest::_CopyPhysical;
1166 	} else {
1167 		copyFunction = fBuffer->IsUser()
1168 			? &IORequest::_CopyUser : &IORequest::_CopySimple;
1169 	}
1170 
1171 	// skip bytes if requested
1172 	iovec* vecs = fBuffer->Vecs();
1173 	size_t skipBytes = offset - fOffset;
1174 	size_t vecOffset = 0;
1175 	while (skipBytes > 0) {
1176 		if (vecs[0].iov_len > skipBytes) {
1177 			vecOffset = skipBytes;
1178 			break;
1179 		}
1180 
1181 		skipBytes -= vecs[0].iov_len;
1182 		vecs++;
1183 	}
1184 
1185 	// copy iovec-wise
1186 	while (size > 0) {
1187 		size_t toCopy = min_c(size, vecs[0].iov_len - vecOffset);
1188 		status_t error = copyFunction(buffer,
1189 			(uint8*)vecs[0].iov_base + vecOffset, toCopy, fTeam, copyIn);
1190 		if (error != B_OK)
1191 			return error;
1192 
1193 		buffer += toCopy;
1194 		size -= toCopy;
1195 		vecs++;
1196 		vecOffset = 0;
1197 	}
1198 
1199 	return B_OK;
1200 }
1201 
1202 
1203 /* static */ status_t
1204 IORequest::_CopySimple(void* bounceBuffer, void* external, size_t size,
1205 	team_id team, bool copyIn)
1206 {
1207 	TRACE("  IORequest::_CopySimple(%p, %p, %lu, %d)\n", bounceBuffer, external,
1208 		size, copyIn);
1209 	if (copyIn)
1210 		memcpy(bounceBuffer, external, size);
1211 	else
1212 		memcpy(external, bounceBuffer, size);
1213 	return B_OK;
1214 }
1215 
1216 
1217 /* static */ status_t
1218 IORequest::_CopyPhysical(void* bounceBuffer, void* external, size_t size,
1219 	team_id team, bool copyIn)
1220 {
1221 	if (copyIn) {
1222 		return vm_memcpy_from_physical(bounceBuffer, (addr_t)external, size,
1223 			false);
1224 	}
1225 
1226 	return vm_memcpy_to_physical((addr_t)external, bounceBuffer, size, false);
1227 }
1228 
1229 
1230 /* static */ status_t
1231 IORequest::_CopyUser(void* _bounceBuffer, void* _external, size_t size,
1232 	team_id team, bool copyIn)
1233 {
1234 	uint8* bounceBuffer = (uint8*)_bounceBuffer;
1235 	uint8* external = (uint8*)_external;
1236 
1237 	while (size > 0) {
1238 		static const int32 kEntryCount = 8;
1239 		physical_entry entries[kEntryCount];
1240 
1241 		uint32 count = kEntryCount;
1242 		status_t error = get_memory_map_etc(team, external, size, entries,
1243 			&count);
1244 		if (error != B_OK && error != B_BUFFER_OVERFLOW) {
1245 			panic("IORequest::_CopyUser(): Failed to get physical memory for "
1246 				"user memory %p\n", external);
1247 			return B_BAD_ADDRESS;
1248 		}
1249 
1250 		for (uint32 i = 0; i < count; i++) {
1251 			const physical_entry& entry = entries[i];
1252 			error = _CopyPhysical(bounceBuffer, entry.address,
1253 				entry.size, team, copyIn);
1254 			if (error != B_OK)
1255 				return error;
1256 
1257 			size -= entry.size;
1258 			bounceBuffer += entry.size;
1259 			external += entry.size;
1260 		}
1261 	}
1262 
1263 	return B_OK;
1264 }
1265 
1266 
1267 void
1268 IORequest::Dump() const
1269 {
1270 	kprintf("io_request at %p\n", this);
1271 
1272 	kprintf("  owner:             %p\n", fOwner);
1273 	kprintf("  parent:            %p\n", fParent);
1274 	kprintf("  status:            %s\n", strerror(fStatus));
1275 	kprintf("  mutex:             %p\n", &fLock);
1276 	kprintf("  IOBuffer:          %p\n", fBuffer);
1277 	kprintf("  offset:            %Ld\n", fOffset);
1278 	kprintf("  length:            %lu\n", fLength);
1279 	kprintf("  transfer size:     %lu\n", fTransferSize);
1280 	kprintf("  relative offset:   %lu\n", fRelativeParentOffset);
1281 	kprintf("  pending children:  %ld\n", fPendingChildren);
1282 	kprintf("  flags:             %#lx\n", fFlags);
1283 	kprintf("  team:              %ld\n", fTeam);
1284 	kprintf("  thread:            %ld\n", fThread);
1285 	kprintf("  r/w:               %s\n", fIsWrite ? "write" : "read");
1286 	kprintf("  partial transfer:  %s\n", fPartialTransfer ? "yes" : "no");
1287 	kprintf("  finished cvar:     %p\n", &fFinishedCondition);
1288 	kprintf("  iteration:\n");
1289 	kprintf("    vec index:       %lu\n", fVecIndex);
1290 	kprintf("    vec offset:      %lu\n", fVecOffset);
1291 	kprintf("    remaining bytes: %lu\n", fRemainingBytes);
1292 	kprintf("  callbacks:\n");
1293 	kprintf("    finished %p, cookie %p\n", fFinishedCallback, fFinishedCookie);
1294 	kprintf("    iteration %p, cookie %p\n", fIterationCallback,
1295 		fIterationCookie);
1296 	kprintf("  children:\n");
1297 
1298 	IORequestChunkList::ConstIterator iterator = fChildren.GetIterator();
1299 	while (iterator.HasNext()) {
1300 		kprintf("    %p\n", iterator.Next());
1301 	}
1302 
1303 	set_debug_variable("_parent", (addr_t)fParent);
1304 	set_debug_variable("_mutex", (addr_t)&fLock);
1305 	set_debug_variable("_buffer", (addr_t)fBuffer);
1306 	set_debug_variable("_cvar", (addr_t)&fFinishedCondition);
1307 }
1308