xref: /haiku/src/add-ons/kernel/drivers/disk/virtual/ram_disk/ram_disk.cpp (revision a5a3b2d9a3d95cbae71eaf371708c73a1780ac0d)
1 /*
2  * Copyright 2010-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <file_systems/ram_disk/ram_disk.h>
8 
9 #include <ctype.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <unistd.h>
15 
16 #include <algorithm>
17 
18 #include <device_manager.h>
19 #include <Drivers.h>
20 
21 #include <AutoDeleter.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 
25 #include <fs/KPath.h>
26 #include <lock.h>
27 #include <vm/vm.h>
28 #include <vm/VMCache.h>
29 #include <vm/vm_page.h>
30 
31 #include "dma_resources.h"
32 #include "io_requests.h"
33 #include "IOSchedulerSimple.h"
34 
35 
36 //#define TRACE_CHECK_SUM_DEVICE
37 #ifdef TRACE_CHECK_SUM_DEVICE
38 #	define TRACE(x...)	dprintf(x)
39 #else
40 #	define TRACE(x) do {} while (false)
41 #endif
42 
43 
44 // parameters for the DMA resource
45 static const uint32 kDMAResourceBufferCount			= 16;
46 static const uint32 kDMAResourceBounceBufferCount	= 16;
47 
48 static const char* const kDriverModuleName
49 	= "drivers/disk/virtual/ram_disk/driver_v1";
50 static const char* const kControlDeviceModuleName
51 	= "drivers/disk/virtual/ram_disk/control/device_v1";
52 static const char* const kRawDeviceModuleName
53 	= "drivers/disk/virtual/ram_disk/raw/device_v1";
54 
55 static const char* const kControlDeviceName = RAM_DISK_CONTROL_DEVICE_NAME;
56 static const char* const kRawDeviceBaseName = RAM_DISK_RAW_DEVICE_BASE_NAME;
57 
58 static const char* const kFilePathItem = "ram_disk/file_path";
59 static const char* const kDeviceSizeItem = "ram_disk/device_size";
60 static const char* const kDeviceIDItem = "ram_disk/id";
61 
62 
63 struct RawDevice;
64 typedef DoublyLinkedList<RawDevice> RawDeviceList;
65 
66 struct device_manager_info* sDeviceManager;
67 
68 static RawDeviceList sDeviceList;
69 static mutex sDeviceListLock = MUTEX_INITIALIZER("ram disk device list");
70 static uint64 sUsedRawDeviceIDs = 0;
71 
72 
73 static int32	allocate_raw_device_id();
74 static void		free_raw_device_id(int32 id);
75 
76 
77 struct Device {
78 	Device(device_node* node)
79 		:
80 		fNode(node)
81 	{
82 		mutex_init(&fLock, "ram disk device");
83 	}
84 
85 	virtual ~Device()
86 	{
87 		mutex_destroy(&fLock);
88 	}
89 
90 	bool Lock()		{ mutex_lock(&fLock); return true; }
91 	void Unlock()	{ mutex_unlock(&fLock); }
92 
93 	device_node* Node() const	{ return fNode; }
94 
95 	virtual status_t PublishDevice() = 0;
96 
97 protected:
98 	mutex			fLock;
99 	device_node*	fNode;
100 };
101 
102 
103 struct ControlDevice : Device {
104 	ControlDevice(device_node* node)
105 		:
106 		Device(node)
107 	{
108 	}
109 
110 	status_t Register(const char* filePath, uint64 deviceSize, int32& _id)
111 	{
112 		int32 id = allocate_raw_device_id();
113 		if (id < 0)
114 			return B_BUSY;
115 
116 		device_attr attrs[] = {
117 			{B_DEVICE_PRETTY_NAME, B_STRING_TYPE,
118 				{string: "RAM Disk Raw Device"}},
119 			{kDeviceSizeItem, B_UINT64_TYPE, {ui64: deviceSize}},
120 			{kDeviceIDItem, B_UINT32_TYPE, {ui32: (uint32)id}},
121 			{kFilePathItem, B_STRING_TYPE, {string: filePath}},
122 			{NULL}
123 		};
124 
125 		// If filePath is NULL, remove the attribute.
126 		if (filePath == NULL) {
127 			size_t count = sizeof(attrs) / sizeof(attrs[0]);
128 			memset(attrs + count - 2, 0, sizeof(attrs[0]));
129 		}
130 
131 		status_t error = sDeviceManager->register_node(
132 			sDeviceManager->get_parent_node(Node()), kDriverModuleName, attrs,
133 			NULL, NULL);
134 		if (error != B_OK) {
135 			free_raw_device_id(id);
136 			return error;
137 		}
138 
139 		_id = id;
140 		return B_OK;
141 	}
142 
143 	virtual status_t PublishDevice()
144 	{
145 		return sDeviceManager->publish_device(Node(), kControlDeviceName,
146 			kControlDeviceModuleName);
147 	}
148 };
149 
150 
151 struct RawDevice : Device, DoublyLinkedListLinkImpl<RawDevice> {
152 	RawDevice(device_node* node)
153 		:
154 		Device(node),
155 		fID(-1),
156 		fUnregistered(false),
157 		fDeviceSize(0),
158 		fDeviceName(NULL),
159 		fFilePath(NULL),
160 		fCache(NULL),
161 		fDMAResource(NULL),
162 		fIOScheduler(NULL)
163 	{
164 	}
165 
166 	virtual ~RawDevice()
167 	{
168 		if (fID >= 0) {
169 			MutexLocker locker(sDeviceListLock);
170 			sDeviceList.Remove(this);
171 		}
172 
173 		free(fDeviceName);
174 		free(fFilePath);
175 	}
176 
177 	int32 ID() const				{ return fID; }
178 	off_t DeviceSize() const		{ return fDeviceSize; }
179 	const char* DeviceName() const	{ return fDeviceName; }
180 
181 	bool IsUnregistered() const		{ return fUnregistered; }
182 
183 	void SetUnregistered(bool unregistered)
184 	{
185 		fUnregistered = unregistered;
186 	}
187 
188 	status_t Init(int32 id, const char* filePath, uint64 deviceSize)
189 	{
190 		fID = id;
191 		fFilePath = filePath != NULL ? strdup(filePath) : NULL;
192 		if (filePath != NULL && fFilePath == NULL)
193 			return B_NO_MEMORY;
194 
195 		fDeviceSize = (deviceSize + B_PAGE_SIZE - 1) / B_PAGE_SIZE
196 			* B_PAGE_SIZE;
197 
198 		if (fDeviceSize < B_PAGE_SIZE
199 			|| (uint64)fDeviceSize / B_PAGE_SIZE
200 				> vm_page_num_pages() * 2 / 3) {
201 			return B_BAD_VALUE;
202 		}
203 
204 		// construct our device path
205 		KPath path(kRawDeviceBaseName);
206 		char buffer[32];
207 		snprintf(buffer, sizeof(buffer), "%" B_PRId32 "/raw", fID);
208 
209 		status_t error = path.Append(buffer);
210 		if (error != B_OK)
211 			return error;
212 
213 		fDeviceName = path.DetachBuffer();
214 
215 		// insert into device list
216 		RawDevice* nextDevice = NULL;
217 		MutexLocker locker(sDeviceListLock);
218 		for (RawDeviceList::Iterator it = sDeviceList.GetIterator();
219 				(nextDevice = it.Next()) != NULL;) {
220 			if (nextDevice->ID() > fID)
221 				break;
222 		}
223 
224 		sDeviceList.InsertBefore(nextDevice, this);
225 
226 		return B_OK;
227 	}
228 
229 	status_t Prepare()
230 	{
231 		status_t error = VMCacheFactory::CreateAnonymousCache(fCache, false, 0,
232 			0, false, VM_PRIORITY_SYSTEM);
233 		if (error != B_OK) {
234 			Unprepare();
235 			return error;
236 		}
237 
238 		fCache->temporary = 1;
239 		fCache->virtual_end = fDeviceSize;
240 
241 		error = fCache->Commit(fDeviceSize, VM_PRIORITY_SYSTEM);
242 		if (error != B_OK) {
243 			Unprepare();
244 			return error;
245 		}
246 
247 		if (fFilePath != NULL) {
248 			error = _LoadFile();
249 			if (error != B_OK) {
250 				Unprepare();
251 				return error;
252 			}
253 		}
254 
255 		// no DMA restrictions
256 		const dma_restrictions restrictions = {};
257 
258 		fDMAResource = new(std::nothrow) DMAResource;
259 		if (fDMAResource == NULL) {
260 			Unprepare();
261 			return B_NO_MEMORY;
262 		}
263 
264 		error = fDMAResource->Init(restrictions, B_PAGE_SIZE,
265 			kDMAResourceBufferCount, kDMAResourceBounceBufferCount);
266 		if (error != B_OK) {
267 			Unprepare();
268 			return error;
269 		}
270 
271 		fIOScheduler = new(std::nothrow) IOSchedulerSimple(fDMAResource);
272 		if (fIOScheduler == NULL) {
273 			Unprepare();
274 			return B_NO_MEMORY;
275 		}
276 
277 		error = fIOScheduler->Init("ram disk device scheduler");
278 		if (error != B_OK) {
279 			Unprepare();
280 			return error;
281 		}
282 
283 		fIOScheduler->SetCallback(&_DoIOEntry, this);
284 
285 		return B_OK;
286 	}
287 
288 	void Unprepare()
289 	{
290 		delete fIOScheduler;
291 		fIOScheduler = NULL;
292 
293 		delete fDMAResource;
294 		fDMAResource = NULL;
295 
296 		if (fCache != NULL) {
297 			fCache->Lock();
298 			fCache->ReleaseRefAndUnlock();
299 			fCache = NULL;
300 		}
301 	}
302 
303 	void GetInfo(ram_disk_ioctl_info& _info) const
304 	{
305 		_info.id = fID;
306 		_info.size = fDeviceSize;
307 		memset(&_info.path, 0, sizeof(_info.path));
308 		if (fFilePath != NULL)
309 			strlcpy(_info.path, fFilePath, sizeof(_info.path));
310 	}
311 
312 	status_t Flush()
313 	{
314 		static const size_t kPageCountPerIteration = 1024;
315 		static const size_t kMaxGapSize = 15;
316 
317 		int fd = open(fFilePath, O_WRONLY);
318 		if (fd < 0)
319 			return errno;
320 		FileDescriptorCloser fdCloser(fd);
321 
322 		vm_page** pages = new(std::nothrow) vm_page*[kPageCountPerIteration];
323 		ArrayDeleter<vm_page*> pagesDeleter(pages);
324 
325 		uint8* buffer = (uint8*)malloc(kPageCountPerIteration * B_PAGE_SIZE);
326 		MemoryDeleter bufferDeleter(buffer);
327 
328 		if (pages == NULL || buffer == NULL)
329 			return B_NO_MEMORY;
330 
331 		// Iterate through all pages of the cache and write those back that have
332 		// been modified.
333 		AutoLocker<VMCache> locker(fCache);
334 
335 		status_t error = B_OK;
336 
337 		for (off_t offset = 0; offset < fDeviceSize;) {
338 			// find the first modified page at or after the current offset
339 			VMCachePagesTree::Iterator it
340 				= fCache->pages.GetIterator(offset / B_PAGE_SIZE, true, true);
341 			vm_page* firstModified;
342 			while ((firstModified = it.Next()) != NULL
343 				&& !firstModified->modified) {
344 			}
345 
346 			if (firstModified == NULL)
347 				break;
348 
349 			if (firstModified->busy) {
350 				fCache->WaitForPageEvents(firstModified, PAGE_EVENT_NOT_BUSY,
351 					true);
352 				continue;
353 			}
354 
355 			pages[0] = firstModified;
356 			page_num_t firstPageIndex = firstModified->cache_offset;
357 			offset = firstPageIndex * B_PAGE_SIZE;
358 
359 			// Collect more pages until the gap between two modified pages gets
360 			// too large or we hit the end of our array.
361 			size_t previousModifiedIndex = 0;
362 			size_t previousIndex = 0;
363 			while (vm_page* page = it.Next()) {
364 				page_num_t index = page->cache_offset - firstPageIndex;
365 				if (page->busy
366 					|| index >= kPageCountPerIteration
367 					|| index - previousModifiedIndex > kMaxGapSize) {
368 					break;
369 				}
370 
371 				pages[index] = page;
372 
373 				// clear page array gap since the previous page
374 				if (previousIndex + 1 < index) {
375 					memset(pages + previousIndex + 1, 0,
376 						(index - previousIndex - 1) * sizeof(vm_page*));
377 				}
378 
379 				previousIndex = index;
380 				if (page->modified)
381 					previousModifiedIndex = index;
382 			}
383 
384 			// mark all pages we want to write busy
385 			size_t pagesToWrite = previousModifiedIndex + 1;
386 			for (size_t i = 0; i < pagesToWrite; i++) {
387 				if (vm_page* page = pages[i]) {
388 					DEBUG_PAGE_ACCESS_START(page);
389 					pages[i]->busy = true;
390 				}
391 			}
392 
393 			locker.Unlock();
394 
395 			// copy the pages to our buffer
396 			for (size_t i = 0; i < pagesToWrite; i++) {
397 				if (vm_page* page = pages[i]) {
398 					error = vm_memcpy_from_physical(buffer + i * B_PAGE_SIZE,
399 						page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE,
400 						false);
401 					if (error != B_OK) {
402 						dprintf("ramdisk: error copying page %" B_PRIu64
403 							" data: %s\n", (uint64)page->physical_page_number,
404 							strerror(error));
405 						break;
406 					}
407 				} else
408 					memset(buffer + i * B_PAGE_SIZE, 0, B_PAGE_SIZE);
409 			}
410 
411 			// write the buffer
412 			if (error == B_OK) {
413 				ssize_t bytesWritten = pwrite(fd, buffer,
414 					pagesToWrite * B_PAGE_SIZE, offset);
415 				if (bytesWritten < 0) {
416 					dprintf("ramdisk: error writing pages to file: %s\n",
417 						strerror(bytesWritten));
418 					error = bytesWritten;
419 				}
420 				else if ((size_t)bytesWritten != pagesToWrite * B_PAGE_SIZE) {
421 					dprintf("ramdisk: error writing pages to file: short "
422 						"write (%zd/%zu)\n", bytesWritten,
423 						pagesToWrite * B_PAGE_SIZE);
424 					error = B_ERROR;
425 				}
426 			}
427 
428 			// mark the pages unbusy, on success also unmodified
429 			locker.Lock();
430 
431 			for (size_t i = 0; i < pagesToWrite; i++) {
432 				if (vm_page* page = pages[i]) {
433 					if (error == B_OK)
434 						page->modified = false;
435 					fCache->MarkPageUnbusy(page);
436 					DEBUG_PAGE_ACCESS_END(page);
437 				}
438 			}
439 
440 			if (error != B_OK)
441 				break;
442 
443 			offset += pagesToWrite * B_PAGE_SIZE;
444 		}
445 
446 		return error;
447 	}
448 
449 	status_t DoIO(IORequest* request)
450 	{
451 		return fIOScheduler->ScheduleRequest(request);
452 	}
453 
454 	virtual status_t PublishDevice()
455 	{
456 		return sDeviceManager->publish_device(Node(), fDeviceName,
457 			kRawDeviceModuleName);
458 	}
459 
460 private:
461 	static status_t _DoIOEntry(void* data, IOOperation* operation)
462 	{
463 		return ((RawDevice*)data)->_DoIO(operation);
464 	}
465 
466 	status_t _DoIO(IOOperation* operation)
467 	{
468 		off_t offset = operation->Offset();
469 		generic_size_t length = operation->Length();
470 
471 		ASSERT(offset % B_PAGE_SIZE == 0);
472 		ASSERT(length % B_PAGE_SIZE == 0);
473 
474 		const generic_io_vec* vecs = operation->Vecs();
475 		generic_size_t vecOffset = 0;
476 		bool isWrite = operation->IsWrite();
477 
478 		vm_page** pages = new(std::nothrow) vm_page*[length / B_PAGE_SIZE];
479 		if (pages == NULL)
480 			return B_NO_MEMORY;
481 		ArrayDeleter<vm_page*> pagesDeleter(pages);
482 
483 		_GetPages(offset, length, isWrite, pages);
484 
485 		status_t error = B_OK;
486 		size_t index = 0;
487 
488 		while (length > 0) {
489 			vm_page* page = pages[index];
490 
491 			if (isWrite)
492 				page->modified = true;
493 
494 			error = _CopyData(page, vecs, vecOffset, isWrite);
495 			if (error != B_OK)
496 				break;
497 
498 			offset += B_PAGE_SIZE;
499 			length -= B_PAGE_SIZE;
500 			index++;
501 		}
502 
503 		_PutPages(operation->Offset(), operation->Length(), pages,
504 			error == B_OK);
505 
506 		if (error != B_OK) {
507 			fIOScheduler->OperationCompleted(operation, error, 0);
508 			return error;
509 		}
510 
511 		fIOScheduler->OperationCompleted(operation, B_OK, operation->Length());
512 		return B_OK;
513 	}
514 
515 	void _GetPages(off_t offset, off_t length, bool isWrite, vm_page** pages)
516 	{
517 		// get the pages, we already have
518 		AutoLocker<VMCache> locker(fCache);
519 
520 		size_t pageCount = length / B_PAGE_SIZE;
521 		size_t index = 0;
522 		size_t missingPages = 0;
523 
524 		while (length > 0) {
525 			vm_page* page = fCache->LookupPage(offset);
526 			if (page != NULL) {
527 				if (page->busy) {
528 					fCache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true);
529 					continue;
530 				}
531 
532 				DEBUG_PAGE_ACCESS_START(page);
533 				page->busy = true;
534 			} else
535 				missingPages++;
536 
537 			pages[index++] = page;
538 			offset += B_PAGE_SIZE;
539 			length -= B_PAGE_SIZE;
540 		}
541 
542 		locker.Unlock();
543 
544 		// For a write we need to reserve the missing pages.
545 		if (isWrite && missingPages > 0) {
546 			vm_page_reservation reservation;
547 			vm_page_reserve_pages(&reservation, missingPages,
548 				VM_PRIORITY_SYSTEM);
549 
550 			for (size_t i = 0; i < pageCount; i++) {
551 				if (pages[i] != NULL)
552 					continue;
553 
554 				pages[i] = vm_page_allocate_page(&reservation,
555 					PAGE_STATE_WIRED | VM_PAGE_ALLOC_BUSY);
556 
557 				if (--missingPages == 0)
558 					break;
559 			}
560 
561 			vm_page_unreserve_pages(&reservation);
562 		}
563 	}
564 
565 	void _PutPages(off_t offset, off_t length, vm_page** pages, bool success)
566 	{
567 		AutoLocker<VMCache> locker(fCache);
568 
569 		// Mark all pages unbusy. On error free the newly allocated pages.
570 		size_t index = 0;
571 
572 		while (length > 0) {
573 			vm_page* page = pages[index++];
574 			if (page != NULL) {
575 				if (page->CacheRef() == NULL) {
576 					if (success) {
577 						fCache->InsertPage(page, offset);
578 						fCache->MarkPageUnbusy(page);
579 						DEBUG_PAGE_ACCESS_END(page);
580 					} else
581 						vm_page_free(NULL, page);
582 				} else {
583 					fCache->MarkPageUnbusy(page);
584 					DEBUG_PAGE_ACCESS_END(page);
585 				}
586 			}
587 
588 			offset += B_PAGE_SIZE;
589 			length -= B_PAGE_SIZE;
590 		}
591 	}
592 
593 	status_t _CopyData(vm_page* page, const generic_io_vec*& vecs,
594 		generic_size_t& vecOffset, bool toPage)
595 	{
596 		// map page to virtual memory
597 		Thread* thread = thread_get_current_thread();
598 		uint8* pageData = NULL;
599 		void* handle;
600 		if (page != NULL) {
601 			thread_pin_to_current_cpu(thread);
602 			addr_t virtualAddress;
603 			status_t error = vm_get_physical_page_current_cpu(
604 				page->physical_page_number * B_PAGE_SIZE, &virtualAddress,
605 				&handle);
606 			if (error != B_OK) {
607 				thread_unpin_from_current_cpu(thread);
608 				return error;
609 			}
610 
611 			pageData = (uint8*)virtualAddress;
612 		}
613 
614 		status_t error = B_OK;
615 		size_t length = B_PAGE_SIZE;
616 		while (length > 0) {
617 			size_t toCopy = std::min((generic_size_t)length,
618 				vecs->length - vecOffset);
619 
620 			if (toCopy == 0) {
621 				vecs++;
622 				vecOffset = 0;
623 				continue;
624 			}
625 
626 			phys_addr_t vecAddress = vecs->base + vecOffset;
627 
628 			error = toPage
629 				? vm_memcpy_from_physical(pageData, vecAddress, toCopy, false)
630 				: (page != NULL
631 					? vm_memcpy_to_physical(vecAddress, pageData, toCopy, false)
632 					: vm_memset_physical(vecAddress, 0, toCopy));
633 			if (error != B_OK)
634 				break;
635 
636 			pageData += toCopy;
637 			length -= toCopy;
638 			vecOffset += toCopy;
639 		}
640 
641 		if (page != NULL) {
642 			vm_put_physical_page_current_cpu((addr_t)pageData, handle);
643 			thread_unpin_from_current_cpu(thread);
644 		}
645 
646 		return error;
647 	}
648 
649 	status_t _LoadFile()
650 	{
651 		static const size_t kPageCountPerIteration = 1024;
652 
653 		int fd = open(fFilePath, O_RDONLY);
654 		if (fd < 0)
655 			return errno;
656 		FileDescriptorCloser fdCloser(fd);
657 
658 		vm_page** pages = new(std::nothrow) vm_page*[kPageCountPerIteration];
659 		ArrayDeleter<vm_page*> pagesDeleter(pages);
660 
661 		uint8* buffer = (uint8*)malloc(kPageCountPerIteration * B_PAGE_SIZE);
662 		MemoryDeleter bufferDeleter(buffer);
663 			// TODO: Ideally we wouldn't use a buffer to read the file content,
664 			// but read into the pages we allocated directly. Unfortunately
665 			// there's no API to do that yet.
666 
667 		if (pages == NULL || buffer == NULL)
668 			return B_NO_MEMORY;
669 
670 		status_t error = B_OK;
671 
672 		page_num_t allocatedPages = 0;
673 		off_t offset = 0;
674 		off_t sizeRemaining = fDeviceSize;
675 		while (sizeRemaining > 0) {
676 			// Note: fDeviceSize is B_PAGE_SIZE aligned.
677 			size_t pagesToRead = std::min(kPageCountPerIteration,
678 				size_t(sizeRemaining / B_PAGE_SIZE));
679 
680 			// allocate the missing pages
681 			if (allocatedPages < pagesToRead) {
682 				vm_page_reservation reservation;
683 				vm_page_reserve_pages(&reservation,
684 					pagesToRead - allocatedPages, VM_PRIORITY_SYSTEM);
685 
686 				while (allocatedPages < pagesToRead) {
687 					pages[allocatedPages++]
688 						= vm_page_allocate_page(&reservation, PAGE_STATE_WIRED);
689 				}
690 
691 				vm_page_unreserve_pages(&reservation);
692 			}
693 
694 			// read from the file
695 			size_t bytesToRead = pagesToRead * B_PAGE_SIZE;
696 			ssize_t bytesRead = pread(fd, buffer, bytesToRead, offset);
697 			if (bytesRead < 0) {
698 				error = bytesRead;
699 				break;
700 			}
701 			size_t pagesRead = (bytesRead + B_PAGE_SIZE - 1) / B_PAGE_SIZE;
702 			if (pagesRead < pagesToRead) {
703 				error = B_ERROR;
704 				break;
705 			}
706 
707 			// clear the last read page, if partial
708 			if ((size_t)bytesRead < pagesRead * B_PAGE_SIZE) {
709 				memset(buffer + bytesRead, 0,
710 					pagesRead * B_PAGE_SIZE - bytesRead);
711 			}
712 
713 			// copy data to allocated pages
714 			for (size_t i = 0; i < pagesRead; i++) {
715 				vm_page* page = pages[i];
716 				error = vm_memcpy_to_physical(
717 					page->physical_page_number * B_PAGE_SIZE,
718 					buffer + i * B_PAGE_SIZE, B_PAGE_SIZE, false);
719 				if (error != B_OK)
720 					break;
721 			}
722 
723 			if (error != B_OK)
724 				break;
725 
726 			// Add pages to cache. Ignore clear pages, though. Move those to the
727 			// beginning of the array, so we can reuse them in the next
728 			// iteration.
729 			AutoLocker<VMCache> locker(fCache);
730 
731 			size_t clearPages = 0;
732 			for (size_t i = 0; i < pagesRead; i++) {
733 				uint64* pageData = (uint64*)(buffer + i * B_PAGE_SIZE);
734 				bool isClear = true;
735 				for (size_t k = 0; isClear && k < B_PAGE_SIZE / 8; k++)
736 					isClear = pageData[k] == 0;
737 
738 				if (isClear) {
739 					pages[clearPages++] = pages[i];
740 				} else {
741 					fCache->InsertPage(pages[i], offset + i * B_PAGE_SIZE);
742 					DEBUG_PAGE_ACCESS_END(pages[i]);
743 				}
744 			}
745 
746 			locker.Unlock();
747 
748 			// Move any left-over allocated pages to the end of the empty pages
749 			// and compute the new allocated pages count.
750 			if (pagesRead < allocatedPages) {
751 				size_t count = allocatedPages - pagesRead;
752 				memcpy(pages + clearPages, pages + pagesRead,
753 					count * sizeof(vm_page*));
754 				clearPages += count;
755 			}
756 			allocatedPages = clearPages;
757 
758 			offset += pagesRead * B_PAGE_SIZE;
759 			sizeRemaining -= pagesRead * B_PAGE_SIZE;
760 		}
761 
762 		// free left-over allocated pages
763 		for (size_t i = 0; i < allocatedPages; i++)
764 			vm_page_free(NULL, pages[i]);
765 
766 		return error;
767 	}
768 
769 private:
770 	int32			fID;
771 	bool			fUnregistered;
772 	off_t			fDeviceSize;
773 	char*			fDeviceName;
774 	char*			fFilePath;
775 	VMCache*		fCache;
776 	DMAResource*	fDMAResource;
777 	IOScheduler*	fIOScheduler;
778 };
779 
780 
781 struct RawDeviceCookie {
782 	RawDeviceCookie(RawDevice* device, int openMode)
783 		:
784 		fDevice(device),
785 		fOpenMode(openMode)
786 	{
787 	}
788 
789 	RawDevice* Device() const	{ return fDevice; }
790 	int OpenMode() const		{ return fOpenMode; }
791 
792 private:
793 	RawDevice*	fDevice;
794 	int			fOpenMode;
795 };
796 
797 
798 // #pragma mark -
799 
800 
801 static int32
802 allocate_raw_device_id()
803 {
804 	MutexLocker deviceListLocker(sDeviceListLock);
805 	for (size_t i = 0; i < sizeof(sUsedRawDeviceIDs) * 8; i++) {
806 		if ((sUsedRawDeviceIDs & ((uint64)1 << i)) == 0) {
807 			sUsedRawDeviceIDs |= (uint64)1 << i;
808 			return (int32)i;
809 		}
810 	}
811 
812 	return -1;
813 }
814 
815 
816 static void
817 free_raw_device_id(int32 id)
818 {
819 	MutexLocker deviceListLocker(sDeviceListLock);
820 	sUsedRawDeviceIDs &= ~((uint64)1 << id);
821 }
822 
823 
824 static RawDevice*
825 find_raw_device(int32 id)
826 {
827 	for (RawDeviceList::Iterator it = sDeviceList.GetIterator();
828 			RawDevice* device = it.Next();) {
829 		if (device->ID() == id)
830 			return device;
831 	}
832 
833 	return NULL;
834 }
835 
836 
837 static status_t
838 ioctl_register(ControlDevice* controlDevice, ram_disk_ioctl_register* request)
839 {
840 	KPath path;
841 	uint64 deviceSize = 0;
842 
843 	if (request->path[0] != '\0') {
844 		// check if the path is null-terminated
845 		if (strnlen(request->path, sizeof(request->path))
846 				== sizeof(request->path)) {
847 			return B_BAD_VALUE;
848 		}
849 
850 		// get a normalized file path
851 		status_t error = path.SetTo(request->path, true);
852 		if (error != B_OK) {
853 			dprintf("ramdisk: register: Invalid path \"%s\": %s\n",
854 				request->path, strerror(error));
855 			return B_BAD_VALUE;
856 		}
857 
858 		struct stat st;
859 		if (lstat(path.Path(), &st) != 0) {
860 			dprintf("ramdisk: register: Failed to stat \"%s\": %s\n",
861 				path.Path(), strerror(errno));
862 			return errno;
863 		}
864 
865 		if (!S_ISREG(st.st_mode)) {
866 			dprintf("ramdisk: register: \"%s\" is not a file!\n", path.Path());
867 			return B_BAD_VALUE;
868 		}
869 
870 		deviceSize = st.st_size;
871 	} else {
872 		deviceSize = request->size;
873 	}
874 
875 	return controlDevice->Register(path.Length() > 0 ? path.Path() : NULL,
876 		deviceSize, request->id);
877 }
878 
879 
880 static status_t
881 ioctl_unregister(ControlDevice* controlDevice,
882 	ram_disk_ioctl_unregister* request)
883 {
884 	// find the device in the list and unregister it
885 	MutexLocker locker(sDeviceListLock);
886 	RawDevice* device = find_raw_device(request->id);
887 	if (device == NULL)
888 		return B_ENTRY_NOT_FOUND;
889 
890 	// mark unregistered before we unlock
891 	if (device->IsUnregistered())
892 		return B_BUSY;
893 	device->SetUnregistered(true);
894 	locker.Unlock();
895 
896 	device_node* node = device->Node();
897 	status_t error = sDeviceManager->unpublish_device(node,
898 		device->DeviceName());
899 	if (error != B_OK) {
900 		dprintf("ramdisk: unregister: Failed to unpublish device \"%s\": %s\n",
901 			device->DeviceName(), strerror(error));
902 		return error;
903 	}
904 
905 	error = sDeviceManager->unregister_node(node);
906 	// Note: B_BUSY is OK. The node will removed as soon as possible.
907 	if (error != B_OK && error != B_BUSY) {
908 		dprintf("ramdisk: unregister: Failed to unregister node for device %"
909 			B_PRId32 ": %s\n", request->id, strerror(error));
910 		return error;
911 	}
912 
913 	return B_OK;
914 }
915 
916 
917 static status_t
918 ioctl_info(RawDevice* device, ram_disk_ioctl_info* request)
919 {
920 	device->GetInfo(*request);
921 	return B_OK;
922 }
923 
924 
925 template<typename DeviceType, typename Request>
926 static status_t
927 handle_ioctl(DeviceType* device,
928 	status_t (*handler)(DeviceType*, Request*), void* buffer)
929 {
930 	// copy request to the kernel heap
931 	if (buffer == NULL || !IS_USER_ADDRESS(buffer))
932 		return B_BAD_ADDRESS;
933 
934 	Request* request = new(std::nothrow) Request;
935 	if (request == NULL)
936 		return B_NO_MEMORY;
937 	ObjectDeleter<Request> requestDeleter(request);
938 
939 	if (user_memcpy(request, buffer, sizeof(Request)) != B_OK)
940 		return B_BAD_ADDRESS;
941 
942 	// handle the ioctl
943 	status_t error = handler(device, request);
944 	if (error != B_OK)
945 		return error;
946 
947 	// copy the request back to userland
948 	if (user_memcpy(buffer, request, sizeof(Request)) != B_OK)
949 		return B_BAD_ADDRESS;
950 
951 	return B_OK;
952 }
953 
954 
955 //	#pragma mark - driver
956 
957 
958 static float
959 ram_disk_driver_supports_device(device_node* parent)
960 {
961 	const char* bus = NULL;
962 	if (sDeviceManager->get_attr_string(parent, B_DEVICE_BUS, &bus, false)
963 			== B_OK
964 		&& strcmp(bus, "generic") == 0) {
965 		return 0.8;
966 	}
967 
968 	return -1;
969 }
970 
971 
972 static status_t
973 ram_disk_driver_register_device(device_node* parent)
974 {
975 	device_attr attrs[] = {
976 		{B_DEVICE_PRETTY_NAME, B_STRING_TYPE,
977 			{string: "RAM Disk Control Device"}},
978 		{NULL}
979 	};
980 
981 	return sDeviceManager->register_node(parent, kDriverModuleName, attrs, NULL,
982 		NULL);
983 }
984 
985 
986 static status_t
987 ram_disk_driver_init_driver(device_node* node, void** _driverCookie)
988 {
989 	uint64 deviceSize;
990 	if (sDeviceManager->get_attr_uint64(node, kDeviceSizeItem, &deviceSize,
991 			false) == B_OK) {
992 		int32 id = -1;
993 		sDeviceManager->get_attr_uint32(node, kDeviceIDItem, (uint32*)&id,
994 			false);
995 		if (id < 0)
996 			return B_ERROR;
997 
998 		const char* filePath = NULL;
999 		sDeviceManager->get_attr_string(node, kFilePathItem, &filePath, false);
1000 
1001 		RawDevice* device = new(std::nothrow) RawDevice(node);
1002 		if (device == NULL)
1003 			return B_NO_MEMORY;
1004 
1005 		status_t error = device->Init(id, filePath, deviceSize);
1006 		if (error != B_OK) {
1007 			delete device;
1008 			return error;
1009 		}
1010 
1011 		*_driverCookie = (Device*)device;
1012 	} else {
1013 		ControlDevice* device = new(std::nothrow) ControlDevice(node);
1014 		if (device == NULL)
1015 			return B_NO_MEMORY;
1016 
1017 		*_driverCookie = (Device*)device;
1018 	}
1019 
1020 	return B_OK;
1021 }
1022 
1023 
1024 static void
1025 ram_disk_driver_uninit_driver(void* driverCookie)
1026 {
1027 	Device* device = (Device*)driverCookie;
1028 	if (RawDevice* rawDevice = dynamic_cast<RawDevice*>(device))
1029 		free_raw_device_id(rawDevice->ID());
1030 	delete device;
1031 }
1032 
1033 
1034 static status_t
1035 ram_disk_driver_register_child_devices(void* driverCookie)
1036 {
1037 	Device* device = (Device*)driverCookie;
1038 	return device->PublishDevice();
1039 }
1040 
1041 
1042 //	#pragma mark - control device
1043 
1044 
1045 static status_t
1046 ram_disk_control_device_init_device(void* driverCookie, void** _deviceCookie)
1047 {
1048 	*_deviceCookie = driverCookie;
1049 	return B_OK;
1050 }
1051 
1052 
1053 static void
1054 ram_disk_control_device_uninit_device(void* deviceCookie)
1055 {
1056 }
1057 
1058 
1059 static status_t
1060 ram_disk_control_device_open(void* deviceCookie, const char* path, int openMode,
1061 	void** _cookie)
1062 {
1063 	*_cookie = deviceCookie;
1064 	return B_OK;
1065 }
1066 
1067 
1068 static status_t
1069 ram_disk_control_device_close(void* cookie)
1070 {
1071 	return B_OK;
1072 }
1073 
1074 
1075 static status_t
1076 ram_disk_control_device_free(void* cookie)
1077 {
1078 	return B_OK;
1079 }
1080 
1081 
1082 static status_t
1083 ram_disk_control_device_read(void* cookie, off_t position, void* buffer,
1084 	size_t* _length)
1085 {
1086 	return B_BAD_VALUE;
1087 }
1088 
1089 
1090 static status_t
1091 ram_disk_control_device_write(void* cookie, off_t position, const void* data,
1092 	size_t* _length)
1093 {
1094 	return B_BAD_VALUE;
1095 }
1096 
1097 
1098 static status_t
1099 ram_disk_control_device_control(void* cookie, uint32 op, void* buffer,
1100 	size_t length)
1101 {
1102 	ControlDevice* device = (ControlDevice*)cookie;
1103 
1104 	switch (op) {
1105 		case RAM_DISK_IOCTL_REGISTER:
1106 			return handle_ioctl(device, &ioctl_register, buffer);
1107 
1108 		case RAM_DISK_IOCTL_UNREGISTER:
1109 			return handle_ioctl(device, &ioctl_unregister, buffer);
1110 	}
1111 
1112 	return B_BAD_VALUE;
1113 }
1114 
1115 
1116 //	#pragma mark - raw device
1117 
1118 
1119 static status_t
1120 ram_disk_raw_device_init_device(void* driverCookie, void** _deviceCookie)
1121 {
1122 	RawDevice* device = static_cast<RawDevice*>((Device*)driverCookie);
1123 
1124 	status_t error = device->Prepare();
1125 	if (error != B_OK)
1126 		return error;
1127 
1128 	*_deviceCookie = device;
1129 	return B_OK;
1130 }
1131 
1132 
1133 static void
1134 ram_disk_raw_device_uninit_device(void* deviceCookie)
1135 {
1136 	RawDevice* device = (RawDevice*)deviceCookie;
1137 	device->Unprepare();
1138 }
1139 
1140 
1141 static status_t
1142 ram_disk_raw_device_open(void* deviceCookie, const char* path, int openMode,
1143 	void** _cookie)
1144 {
1145 	RawDevice* device = (RawDevice*)deviceCookie;
1146 
1147 	RawDeviceCookie* cookie = new(std::nothrow) RawDeviceCookie(device,
1148 		openMode);
1149 	if (cookie == NULL)
1150 		return B_NO_MEMORY;
1151 
1152 	*_cookie = cookie;
1153 	return B_OK;
1154 }
1155 
1156 
1157 static status_t
1158 ram_disk_raw_device_close(void* cookie)
1159 {
1160 	return B_OK;
1161 }
1162 
1163 
1164 static status_t
1165 ram_disk_raw_device_free(void* _cookie)
1166 {
1167 	RawDeviceCookie* cookie = (RawDeviceCookie*)_cookie;
1168 	delete cookie;
1169 	return B_OK;
1170 }
1171 
1172 
1173 static status_t
1174 ram_disk_raw_device_read(void* _cookie, off_t pos, void* buffer,
1175 	size_t* _length)
1176 {
1177 	RawDeviceCookie* cookie = (RawDeviceCookie*)_cookie;
1178 	RawDevice* device = cookie->Device();
1179 
1180 	size_t length = *_length;
1181 
1182 	if (pos >= device->DeviceSize())
1183 		return B_BAD_VALUE;
1184 	if (pos + (off_t)length > device->DeviceSize())
1185 		length = device->DeviceSize() - pos;
1186 
1187 	IORequest request;
1188 	status_t status = request.Init(pos, (addr_t)buffer, length, false, 0);
1189 	if (status != B_OK)
1190 		return status;
1191 
1192 	status = device->DoIO(&request);
1193 	if (status != B_OK)
1194 		return status;
1195 
1196 	status = request.Wait(0, 0);
1197 	if (status == B_OK)
1198 		*_length = length;
1199 	return status;
1200 }
1201 
1202 
1203 static status_t
1204 ram_disk_raw_device_write(void* _cookie, off_t pos, const void* buffer,
1205 	size_t* _length)
1206 {
1207 	RawDeviceCookie* cookie = (RawDeviceCookie*)_cookie;
1208 	RawDevice* device = cookie->Device();
1209 
1210 	size_t length = *_length;
1211 
1212 	if (pos >= device->DeviceSize())
1213 		return B_BAD_VALUE;
1214 	if (pos + (off_t)length > device->DeviceSize())
1215 		length = device->DeviceSize() - pos;
1216 
1217 	IORequest request;
1218 	status_t status = request.Init(pos, (addr_t)buffer, length, true, 0);
1219 	if (status != B_OK)
1220 		return status;
1221 
1222 	status = device->DoIO(&request);
1223 	if (status != B_OK)
1224 		return status;
1225 
1226 	status = request.Wait(0, 0);
1227 	if (status == B_OK)
1228 		*_length = length;
1229 
1230 	return status;
1231 }
1232 
1233 
1234 static status_t
1235 ram_disk_raw_device_io(void* _cookie, io_request* request)
1236 {
1237 	RawDeviceCookie* cookie = (RawDeviceCookie*)_cookie;
1238 	RawDevice* device = cookie->Device();
1239 
1240 	return device->DoIO(request);
1241 }
1242 
1243 
1244 static status_t
1245 ram_disk_raw_device_control(void* _cookie, uint32 op, void* buffer,
1246 	size_t length)
1247 {
1248 	RawDeviceCookie* cookie = (RawDeviceCookie*)_cookie;
1249 	RawDevice* device = cookie->Device();
1250 
1251 	switch (op) {
1252 		case B_GET_DEVICE_SIZE:
1253 		{
1254 			size_t size = device->DeviceSize();
1255 			return user_memcpy(buffer, &size, sizeof(size_t));
1256 		}
1257 
1258 		case B_SET_NONBLOCKING_IO:
1259 		case B_SET_BLOCKING_IO:
1260 			return B_OK;
1261 
1262 		case B_GET_READ_STATUS:
1263 		case B_GET_WRITE_STATUS:
1264 		{
1265 			bool value = true;
1266 			return user_memcpy(buffer, &value, sizeof(bool));
1267 		}
1268 
1269 		case B_GET_GEOMETRY:
1270 		case B_GET_BIOS_GEOMETRY:
1271 		{
1272 			device_geometry geometry;
1273 			geometry.bytes_per_sector = B_PAGE_SIZE;
1274 			geometry.sectors_per_track = 1;
1275 			geometry.cylinder_count = device->DeviceSize() / B_PAGE_SIZE;
1276 				// TODO: We're limited to 2^32 * B_PAGE_SIZE, if we don't use
1277 				// sectors_per_track and head_count.
1278 			geometry.head_count = 1;
1279 			geometry.device_type = B_DISK;
1280 			geometry.removable = true;
1281 			geometry.read_only = false;
1282 			geometry.write_once = false;
1283 
1284 			return user_memcpy(buffer, &geometry, sizeof(device_geometry));
1285 		}
1286 
1287 		case B_GET_MEDIA_STATUS:
1288 		{
1289 			status_t status = B_OK;
1290 			return user_memcpy(buffer, &status, sizeof(status_t));
1291 		}
1292 
1293 		case B_SET_UNINTERRUPTABLE_IO:
1294 		case B_SET_INTERRUPTABLE_IO:
1295 		case B_FLUSH_DRIVE_CACHE:
1296 			return B_OK;
1297 
1298 		case RAM_DISK_IOCTL_FLUSH:
1299 		{
1300 			status_t error = device->Flush();
1301 			if (error != B_OK) {
1302 				dprintf("ramdisk: flush: Failed to flush device: %s\n",
1303 					strerror(error));
1304 				return error;
1305 			}
1306 
1307 			return B_OK;
1308 		}
1309 
1310 		case RAM_DISK_IOCTL_INFO:
1311 			return handle_ioctl(device, &ioctl_info, buffer);
1312 	}
1313 
1314 	return B_BAD_VALUE;
1315 }
1316 
1317 
1318 // #pragma mark -
1319 
1320 
1321 module_dependency module_dependencies[] = {
1322 	{B_DEVICE_MANAGER_MODULE_NAME, (module_info**)&sDeviceManager},
1323 	{}
1324 };
1325 
1326 
1327 static const struct driver_module_info sChecksumDeviceDriverModule = {
1328 	{
1329 		kDriverModuleName,
1330 		0,
1331 		NULL
1332 	},
1333 
1334 	ram_disk_driver_supports_device,
1335 	ram_disk_driver_register_device,
1336 	ram_disk_driver_init_driver,
1337 	ram_disk_driver_uninit_driver,
1338 	ram_disk_driver_register_child_devices
1339 };
1340 
1341 static const struct device_module_info sChecksumControlDeviceModule = {
1342 	{
1343 		kControlDeviceModuleName,
1344 		0,
1345 		NULL
1346 	},
1347 
1348 	ram_disk_control_device_init_device,
1349 	ram_disk_control_device_uninit_device,
1350 	NULL,
1351 
1352 	ram_disk_control_device_open,
1353 	ram_disk_control_device_close,
1354 	ram_disk_control_device_free,
1355 
1356 	ram_disk_control_device_read,
1357 	ram_disk_control_device_write,
1358 	NULL,	// io
1359 
1360 	ram_disk_control_device_control,
1361 
1362 	NULL,	// select
1363 	NULL	// deselect
1364 };
1365 
1366 static const struct device_module_info sChecksumRawDeviceModule = {
1367 	{
1368 		kRawDeviceModuleName,
1369 		0,
1370 		NULL
1371 	},
1372 
1373 	ram_disk_raw_device_init_device,
1374 	ram_disk_raw_device_uninit_device,
1375 	NULL,
1376 
1377 	ram_disk_raw_device_open,
1378 	ram_disk_raw_device_close,
1379 	ram_disk_raw_device_free,
1380 
1381 	ram_disk_raw_device_read,
1382 	ram_disk_raw_device_write,
1383 	ram_disk_raw_device_io,
1384 
1385 	ram_disk_raw_device_control,
1386 
1387 	NULL,	// select
1388 	NULL	// deselect
1389 };
1390 
1391 const module_info* modules[] = {
1392 	(module_info*)&sChecksumDeviceDriverModule,
1393 	(module_info*)&sChecksumControlDeviceModule,
1394 	(module_info*)&sChecksumRawDeviceModule,
1395 	NULL
1396 };
1397