xref: /haiku/src/system/kernel/vm/vm.cpp (revision b9a5b9a6ee494261f2882bfc0ee9fde92282bef6)
1 /*
2  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include "vm_store_anonymous_noswap.h"
11 #include "vm_store_device.h"
12 #include "vm_store_null.h"
13 
14 #include <OS.h>
15 #include <KernelExport.h>
16 
17 #include <vm.h>
18 #include <vm_address_space.h>
19 #include <vm_priv.h>
20 #include <vm_page.h>
21 #include <vm_cache.h>
22 #include <vm_low_memory.h>
23 #include <file_cache.h>
24 #include <memheap.h>
25 #include <debug.h>
26 #include <console.h>
27 #include <int.h>
28 #include <smp.h>
29 #include <lock.h>
30 #include <thread.h>
31 #include <team.h>
32 
33 #include <boot/stage2.h>
34 #include <boot/elf.h>
35 
36 #include <arch/cpu.h>
37 #include <arch/vm.h>
38 
39 #include <string.h>
40 #include <ctype.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 
44 //#define TRACE_VM
45 //#define TRACE_FAULTS
46 #ifdef TRACE_VM
47 #	define TRACE(x) dprintf x
48 #else
49 #	define TRACE(x) ;
50 #endif
51 #ifdef TRACE_FAULTS
52 #	define FTRACE(x) dprintf x
53 #else
54 #	define FTRACE(x) ;
55 #endif
56 
57 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
58 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
59 
60 
61 #define REGION_HASH_TABLE_SIZE 1024
62 static area_id sNextAreaID;
63 static hash_table *sAreaHash;
64 static sem_id sAreaHashLock;
65 static spinlock sMappingLock;
66 
67 static off_t sAvailableMemory;
68 static benaphore sAvailableMemoryLock;
69 
70 // function declarations
71 static status_t vm_soft_fault(addr_t address, bool is_write, bool is_user);
72 static bool vm_put_area(vm_area *area);
73 
74 
75 static int
76 area_compare(void *_area, const void *key)
77 {
78 	vm_area *area = (vm_area *)_area;
79 	const area_id *id = (const area_id *)key;
80 
81 	if (area->id == *id)
82 		return 0;
83 
84 	return -1;
85 }
86 
87 
88 static uint32
89 area_hash(void *_area, const void *key, uint32 range)
90 {
91 	vm_area *area = (vm_area *)_area;
92 	const area_id *id = (const area_id *)key;
93 
94 	if (area != NULL)
95 		return area->id % range;
96 
97 	return (uint32)*id % range;
98 }
99 
100 
101 static vm_area *
102 vm_get_area(area_id id)
103 {
104 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
105 
106 	vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id);
107 	if (area != NULL)
108 		atomic_add(&area->ref_count, 1);
109 
110 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
111 
112 	return area;
113 }
114 
115 
116 static vm_area *
117 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags)
118 {
119 	vm_area *reserved = (vm_area *)malloc(sizeof(vm_area));
120 	if (reserved == NULL)
121 		return NULL;
122 
123 	memset(reserved, 0, sizeof(vm_area));
124 	reserved->id = RESERVED_AREA_ID;
125 		// this marks it as reserved space
126 	reserved->protection = flags;
127 	reserved->address_space = addressSpace;
128 
129 	return reserved;
130 }
131 
132 
133 static vm_area *
134 create_area_struct(vm_address_space *addressSpace, const char *name,
135 	uint32 wiring, uint32 protection)
136 {
137 	// restrict the area name to B_OS_NAME_LENGTH
138 	size_t length = strlen(name) + 1;
139 	if (length > B_OS_NAME_LENGTH)
140 		length = B_OS_NAME_LENGTH;
141 
142 	vm_area *area = (vm_area *)malloc(sizeof(vm_area));
143 	if (area == NULL)
144 		return NULL;
145 
146 	area->name = (char *)malloc(length);
147 	if (area->name == NULL) {
148 		free(area);
149 		return NULL;
150 	}
151 	strlcpy(area->name, name, length);
152 
153 	area->id = atomic_add(&sNextAreaID, 1);
154 	area->base = 0;
155 	area->size = 0;
156 	area->protection = protection;
157 	area->wiring = wiring;
158 	area->memory_type = 0;
159 	area->ref_count = 1;
160 
161 	area->cache_ref = NULL;
162 	area->cache_offset = 0;
163 
164 	area->address_space = addressSpace;
165 	area->address_space_next = NULL;
166 	area->cache_next = area->cache_prev = NULL;
167 	area->hash_next = NULL;
168 	new (&area->mappings) vm_area_mappings;
169 
170 	return area;
171 }
172 
173 
174 /**	Finds a reserved area that covers the region spanned by \a start and
175  *	\a size, inserts the \a area into that region and makes sure that
176  *	there are reserved regions for the remaining parts.
177  */
178 
179 static status_t
180 find_reserved_area(vm_address_space *addressSpace, addr_t start,
181 	addr_t size, vm_area *area)
182 {
183 	vm_area *next, *last = NULL;
184 
185 	next = addressSpace->areas;
186 	while (next) {
187 		if (next->base <= start && next->base + next->size >= start + size) {
188 			// this area covers the requested range
189 			if (next->id != RESERVED_AREA_ID) {
190 				// but it's not reserved space, it's a real area
191 				return B_BAD_VALUE;
192 			}
193 
194 			break;
195 		}
196 		last = next;
197 		next = next->address_space_next;
198 	}
199 	if (next == NULL)
200 		return B_ENTRY_NOT_FOUND;
201 
202 	// now we have to transfer the requested part of the reserved
203 	// range to the new area - and remove, resize or split the old
204 	// reserved area.
205 
206 	if (start == next->base) {
207 		// the area starts at the beginning of the reserved range
208 		if (last)
209 			last->address_space_next = area;
210 		else
211 			addressSpace->areas = area;
212 
213 		if (size == next->size) {
214 			// the new area fully covers the reversed range
215 			area->address_space_next = next->address_space_next;
216 			free(next);
217 		} else {
218 			// resize the reserved range behind the area
219 			area->address_space_next = next;
220 			next->base += size;
221 			next->size -= size;
222 		}
223 	} else if (start + size == next->base + next->size) {
224 		// the area is at the end of the reserved range
225 		area->address_space_next = next->address_space_next;
226 		next->address_space_next = area;
227 
228 		// resize the reserved range before the area
229 		next->size = start - next->base;
230 	} else {
231 		// the area splits the reserved range into two separate ones
232 		// we need a new reserved area to cover this space
233 		vm_area *reserved = create_reserved_area_struct(addressSpace,
234 			next->protection);
235 		if (reserved == NULL)
236 			return B_NO_MEMORY;
237 
238 		reserved->address_space_next = next->address_space_next;
239 		area->address_space_next = reserved;
240 		next->address_space_next = area;
241 
242 		// resize regions
243 		reserved->size = next->base + next->size - start - size;
244 		next->size = start - next->base;
245 		reserved->base = start + size;
246 		reserved->cache_offset = next->cache_offset;
247 	}
248 
249 	area->base = start;
250 	area->size = size;
251 	addressSpace->change_count++;
252 
253 	return B_OK;
254 }
255 
256 
257 /*!	Must be called with this address space's sem held */
258 static status_t
259 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start,
260 	addr_t size, addr_t end, uint32 addressSpec, vm_area *area)
261 {
262 	vm_area *last = NULL;
263 	vm_area *next;
264 	bool foundSpot = false;
265 
266 	TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, "
267 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
268 		size, end, addressSpec, area));
269 
270 	// do some sanity checking
271 	if (start < addressSpace->base || size == 0
272 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
273 		|| start + size > end)
274 		return B_BAD_ADDRESS;
275 
276 	if (addressSpec == B_EXACT_ADDRESS) {
277 		// search for a reserved area
278 		status_t status = find_reserved_area(addressSpace, start, size, area);
279 		if (status == B_OK || status == B_BAD_VALUE)
280 			return status;
281 
282 		// there was no reserved area, and the slot doesn't seem to be used already
283 		// ToDo: this could be further optimized.
284 	}
285 
286 	// walk up to the spot where we should start searching
287 second_chance:
288 	next = addressSpace->areas;
289 	while (next) {
290 		if (next->base >= start + size) {
291 			// we have a winner
292 			break;
293 		}
294 		last = next;
295 		next = next->address_space_next;
296 	}
297 
298 	// find the right spot depending on the address specification - the area
299 	// will be inserted directly after "last" ("next" is not referenced anymore)
300 
301 	switch (addressSpec) {
302 		case B_ANY_ADDRESS:
303 		case B_ANY_KERNEL_ADDRESS:
304 		case B_ANY_KERNEL_BLOCK_ADDRESS:
305 			// find a hole big enough for a new area
306 			if (!last) {
307 				// see if we can build it at the beginning of the virtual map
308 				if (!next || (next->base >= addressSpace->base + size)) {
309 					foundSpot = true;
310 					area->base = addressSpace->base;
311 					break;
312 				}
313 				last = next;
314 				next = next->address_space_next;
315 			}
316 			// keep walking
317 			while (next) {
318 				if (next->base >= last->base + last->size + size) {
319 					// we found a spot (it'll be filled up below)
320 					break;
321 				}
322 				last = next;
323 				next = next->address_space_next;
324 			}
325 
326 			if ((addressSpace->base + (addressSpace->size - 1))
327 					>= (last->base + last->size + (size - 1))) {
328 				// got a spot
329 				foundSpot = true;
330 				area->base = last->base + last->size;
331 				break;
332 			} else {
333 				// we didn't find a free spot - if there were any reserved areas with
334 				// the RESERVED_AVOID_BASE flag set, we can now test those for free
335 				// space
336 				// ToDo: it would make sense to start with the biggest of them
337 				next = addressSpace->areas;
338 				last = NULL;
339 				for (last = NULL; next; next = next->address_space_next, last = next) {
340 					// ToDo: take free space after the reserved area into account!
341 					if (next->size == size) {
342 						// the reserved area is entirely covered, and thus, removed
343 						if (last)
344 							last->address_space_next = next->address_space_next;
345 						else
346 							addressSpace->areas = next->address_space_next;
347 
348 						foundSpot = true;
349 						area->base = next->base;
350 						free(next);
351 						break;
352 					}
353 					if (next->size >= size) {
354 						// the new area will be placed at the end of the reserved
355 						// area, and the reserved area will be resized to make space
356 						foundSpot = true;
357 						next->size -= size;
358 						last = next;
359 						area->base = next->base + next->size;
360 						break;
361 					}
362 				}
363 			}
364 			break;
365 
366 		case B_BASE_ADDRESS:
367 			// find a hole big enough for a new area beginning with "start"
368 			if (!last) {
369 				// see if we can build it at the beginning of the specified start
370 				if (!next || (next->base >= start + size)) {
371 					foundSpot = true;
372 					area->base = start;
373 					break;
374 				}
375 				last = next;
376 				next = next->address_space_next;
377 			}
378 			// keep walking
379 			while (next) {
380 				if (next->base >= last->base + last->size + size) {
381 					// we found a spot (it'll be filled up below)
382 					break;
383 				}
384 				last = next;
385 				next = next->address_space_next;
386 			}
387 
388 			if ((addressSpace->base + (addressSpace->size - 1))
389 					>= (last->base + last->size + (size - 1))) {
390 				// got a spot
391 				foundSpot = true;
392 				if (last->base + last->size <= start)
393 					area->base = start;
394 				else
395 					area->base = last->base + last->size;
396 				break;
397 			}
398 			// we didn't find a free spot in the requested range, so we'll
399 			// try again without any restrictions
400 			start = addressSpace->base;
401 			addressSpec = B_ANY_ADDRESS;
402 			last = NULL;
403 			goto second_chance;
404 
405 		case B_EXACT_ADDRESS:
406 			// see if we can create it exactly here
407 			if (!last) {
408 				if (!next || (next->base >= start + size)) {
409 					foundSpot = true;
410 					area->base = start;
411 					break;
412 				}
413 			} else {
414 				if (next) {
415 					if (last->base + last->size <= start && next->base >= start + size) {
416 						foundSpot = true;
417 						area->base = start;
418 						break;
419 					}
420 				} else {
421 					if ((last->base + (last->size - 1)) <= start - 1) {
422 						foundSpot = true;
423 						area->base = start;
424 					}
425 				}
426 			}
427 			break;
428 		default:
429 			return B_BAD_VALUE;
430 	}
431 
432 	if (!foundSpot)
433 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
434 
435 	area->size = size;
436 	if (last) {
437 		area->address_space_next = last->address_space_next;
438 		last->address_space_next = area;
439 	} else {
440 		area->address_space_next = addressSpace->areas;
441 		addressSpace->areas = area;
442 	}
443 	addressSpace->change_count++;
444 	return B_OK;
445 }
446 
447 
448 /**	This inserts the area you pass into the specified address space.
449  *	It will also set the "_address" argument to its base address when
450  *	the call succeeds.
451  *	You need to hold the vm_address_space semaphore.
452  */
453 
454 static status_t
455 insert_area(vm_address_space *addressSpace, void **_address,
456 	uint32 addressSpec, addr_t size, vm_area *area)
457 {
458 	addr_t searchBase, searchEnd;
459 	status_t status;
460 
461 	switch (addressSpec) {
462 		case B_EXACT_ADDRESS:
463 			searchBase = (addr_t)*_address;
464 			searchEnd = (addr_t)*_address + size;
465 			break;
466 
467 		case B_BASE_ADDRESS:
468 			searchBase = (addr_t)*_address;
469 			searchEnd = addressSpace->base + (addressSpace->size - 1);
470 			break;
471 
472 		case B_ANY_ADDRESS:
473 		case B_ANY_KERNEL_ADDRESS:
474 		case B_ANY_KERNEL_BLOCK_ADDRESS:
475 			searchBase = addressSpace->base;
476 			searchEnd = addressSpace->base + (addressSpace->size - 1);
477 			break;
478 
479 		default:
480 			return B_BAD_VALUE;
481 	}
482 
483 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
484 				searchEnd, addressSpec, area);
485 	if (status == B_OK) {
486 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
487 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
488 		*_address = (void *)area->base;
489 	}
490 
491 	return status;
492 }
493 
494 
495 static status_t
496 map_backing_store(vm_address_space *addressSpace, vm_cache_ref *cacheRef,
497 	void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
498 	int wiring, int protection, int mapping, vm_area **_area, const char *areaName)
499 {
500 	TRACE(("map_backing_store: aspace %p, cacheref %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
501 		addressSpace, cacheRef, *_virtualAddress, offset, size, addressSpec,
502 		wiring, protection, _area, areaName));
503 
504 	vm_area *area = create_area_struct(addressSpace, areaName, wiring, protection);
505 	if (area == NULL)
506 		return B_NO_MEMORY;
507 
508 	mutex_lock(&cacheRef->lock);
509 
510 	vm_cache *cache = cacheRef->cache;
511 	vm_store *store = cache->store;
512 	bool unlock = true;
513 	status_t status;
514 
515 	// if this is a private map, we need to create a new cache & store object
516 	// pair to handle the private copies of pages as they are written to
517 	if (mapping == REGION_PRIVATE_MAP) {
518 		vm_cache_ref *newCacheRef;
519 		vm_cache *newCache;
520 		vm_store *newStore;
521 
522 		// create an anonymous store object
523 		newStore = vm_store_create_anonymous_noswap((protection & B_STACK_AREA) != 0,
524 			0, USER_STACK_GUARD_PAGES);
525 		if (newStore == NULL) {
526 			status = B_NO_MEMORY;
527 			goto err1;
528 		}
529 		newCache = vm_cache_create(newStore);
530 		if (newCache == NULL) {
531 			status = B_NO_MEMORY;
532 			newStore->ops->destroy(newStore);
533 			goto err1;
534 		}
535 		status = vm_cache_ref_create(newCache);
536 		if (status < B_OK) {
537 			newStore->ops->destroy(newStore);
538 			free(newCache);
539 			goto err1;
540 		}
541 
542 		newCacheRef = newCache->ref;
543 		newCache->type = CACHE_TYPE_RAM;
544 		newCache->temporary = 1;
545 		newCache->scan_skip = cache->scan_skip;
546 
547 		vm_cache_add_consumer_locked(cacheRef, newCache);
548 
549 		mutex_unlock(&cacheRef->lock);
550 		mutex_lock(&newCacheRef->lock);
551 
552 		cache = newCache;
553 		cacheRef = newCache->ref;
554 		store = newStore;
555 		cache->virtual_base = offset;
556 		cache->virtual_size = offset + size;
557 	}
558 
559 	status = vm_cache_set_minimal_commitment_locked(cacheRef, offset + size);
560 	if (status != B_OK)
561 		goto err2;
562 
563 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
564 
565 	// check to see if this address space has entered DELETE state
566 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
567 		// okay, someone is trying to delete this address space now, so we can't
568 		// insert the area, so back out
569 		status = B_BAD_TEAM_ID;
570 		goto err3;
571 	}
572 
573 	status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
574 	if (status < B_OK)
575 		goto err3;
576 
577 	// attach the cache to the area
578 	area->cache_ref = cacheRef;
579 	area->cache_offset = offset;
580 
581 	// point the cache back to the area
582 	vm_cache_insert_area_locked(cacheRef, area);
583 	mutex_unlock(&cacheRef->lock);
584 
585 	// insert the area in the global area hash table
586 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0);
587 	hash_insert(sAreaHash, area);
588 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
589 
590 	// grab a ref to the address space (the area holds this)
591 	atomic_add(&addressSpace->ref_count, 1);
592 
593 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
594 
595 	*_area = area;
596 	return B_OK;
597 
598 err3:
599 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
600 err2:
601 	if (mapping == REGION_PRIVATE_MAP) {
602 		// we created this cache, so we must delete it again
603 		mutex_unlock(&cacheRef->lock);
604 		vm_cache_release_ref(cacheRef);
605 		unlock = false;
606 	}
607 err1:
608 	if (unlock)
609 		mutex_unlock(&cacheRef->lock);
610 	free(area->name);
611 	free(area);
612 	return status;
613 }
614 
615 
616 status_t
617 vm_unreserve_address_range(team_id team, void *address, addr_t size)
618 {
619 	vm_address_space *addressSpace;
620 	vm_area *area, *last = NULL;
621 	status_t status = B_OK;
622 
623 	addressSpace = vm_get_address_space_by_id(team);
624 	if (addressSpace == NULL)
625 		return B_BAD_TEAM_ID;
626 
627 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
628 
629 	// check to see if this address space has entered DELETE state
630 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
631 		// okay, someone is trying to delete this address space now, so we can't
632 		// insert the area, so back out
633 		status = B_BAD_TEAM_ID;
634 		goto out;
635 	}
636 
637 	// search area list and remove any matching reserved ranges
638 
639 	area = addressSpace->areas;
640 	while (area) {
641 		// the area must be completely part of the reserved range
642 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
643 			&& area->base + area->size <= (addr_t)address + size) {
644 			// remove reserved range
645 			vm_area *reserved = area;
646 			if (last)
647 				last->address_space_next = reserved->address_space_next;
648 			else
649 				addressSpace->areas = reserved->address_space_next;
650 
651 			area = reserved->address_space_next;
652 			free(reserved);
653 			continue;
654 		}
655 
656 		last = area;
657 		area = area->address_space_next;
658 	}
659 
660 out:
661 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
662 	vm_put_address_space(addressSpace);
663 	return status;
664 }
665 
666 
667 status_t
668 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec,
669 	addr_t size, uint32 flags)
670 {
671 	vm_address_space *addressSpace;
672 	vm_area *area;
673 	status_t status = B_OK;
674 
675 	if (size == 0)
676 		return B_BAD_VALUE;
677 
678 	addressSpace = vm_get_address_space_by_id(team);
679 	if (addressSpace == NULL)
680 		return B_BAD_TEAM_ID;
681 
682 	area = create_reserved_area_struct(addressSpace, flags);
683 	if (area == NULL) {
684 		status = B_NO_MEMORY;
685 		goto err1;
686 	}
687 
688 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
689 
690 	// check to see if this address space has entered DELETE state
691 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
692 		// okay, someone is trying to delete this address space now, so we can't
693 		// insert the area, let's back out
694 		status = B_BAD_TEAM_ID;
695 		goto err2;
696 	}
697 
698 	status = insert_area(addressSpace, _address, addressSpec, size, area);
699 	if (status < B_OK)
700 		goto err2;
701 
702 	// the area is now reserved!
703 
704 	area->cache_offset = area->base;
705 		// we cache the original base address here
706 
707 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
708 	return B_OK;
709 
710 err2:
711 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
712 	free(area);
713 err1:
714 	vm_put_address_space(addressSpace);
715 	return status;
716 }
717 
718 
719 area_id
720 vm_create_anonymous_area(team_id aid, const char *name, void **address,
721 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection)
722 {
723 	vm_cache_ref *cacheRef;
724 	vm_area *area;
725 	vm_cache *cache;
726 	vm_store *store;
727 	vm_page *page = NULL;
728 	bool isStack = (protection & B_STACK_AREA) != 0;
729 	bool canOvercommit = false;
730 	status_t status;
731 
732 	TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size));
733 
734 	if (size == 0)
735 		return B_BAD_VALUE;
736 	if (!arch_vm_supports_protection(protection))
737 		return B_NOT_SUPPORTED;
738 
739 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
740 		canOvercommit = true;
741 
742 #ifdef DEBUG_KERNEL_STACKS
743 	if ((protection & B_KERNEL_STACK_AREA) != 0)
744 		isStack = true;
745 #endif
746 
747 	/* check parameters */
748 	switch (addressSpec) {
749 		case B_ANY_ADDRESS:
750 		case B_EXACT_ADDRESS:
751 		case B_BASE_ADDRESS:
752 		case B_ANY_KERNEL_ADDRESS:
753 			break;
754 
755 		default:
756 			return B_BAD_VALUE;
757 	}
758 
759 	switch (wiring) {
760 		case B_NO_LOCK:
761 		case B_FULL_LOCK:
762 		case B_LAZY_LOCK:
763 		case B_CONTIGUOUS:
764 		case B_ALREADY_WIRED:
765 			break;
766 		case B_LOMEM:
767 		//case B_SLOWMEM:
768 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
769 			wiring = B_FULL_LOCK;
770 			break;
771 		default:
772 			return B_BAD_VALUE;
773 	}
774 
775 	vm_address_space *addressSpace = vm_get_address_space_by_id(aid);
776 	if (addressSpace == NULL)
777 		return B_BAD_TEAM_ID;
778 
779 	size = PAGE_ALIGN(size);
780 
781 	if (wiring == B_CONTIGUOUS) {
782 		// we try to allocate the page run here upfront as this may easily
783 		// fail for obvious reasons
784 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE);
785 		if (page == NULL) {
786 			vm_put_address_space(addressSpace);
787 			return B_NO_MEMORY;
788 		}
789 	}
790 
791 	// create an anonymous store object
792 	// if it's a stack, make sure that two pages are available at least
793 	store = vm_store_create_anonymous_noswap(canOvercommit, isStack ? 2 : 0,
794 		isStack ? ((protection & B_USER_PROTECTION) != 0 ?
795 			USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0);
796 	if (store == NULL) {
797 		status = B_NO_MEMORY;
798 		goto err1;
799 	}
800 	cache = vm_cache_create(store);
801 	if (cache == NULL) {
802 		status = B_NO_MEMORY;
803 		goto err2;
804 	}
805 	status = vm_cache_ref_create(cache);
806 	if (status < B_OK)
807 		goto err3;
808 
809 	cache->temporary = 1;
810 	cache->type = CACHE_TYPE_RAM;
811 	cache->virtual_size = size;
812 
813 	switch (wiring) {
814 		case B_LAZY_LOCK:
815 		case B_FULL_LOCK:
816 		case B_CONTIGUOUS:
817 		case B_ALREADY_WIRED:
818 			cache->scan_skip = 1;
819 			break;
820 		case B_NO_LOCK:
821 			cache->scan_skip = 0;
822 			break;
823 	}
824 
825 	cacheRef = cache->ref;
826 
827 	status = map_backing_store(addressSpace, cacheRef, address, 0, size,
828 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name);
829 	if (status < B_OK) {
830 		vm_cache_release_ref(cacheRef);
831 		goto err1;
832 	}
833 
834 	switch (wiring) {
835 		case B_NO_LOCK:
836 		case B_LAZY_LOCK:
837 			// do nothing - the pages are mapped in as needed
838 			break;
839 
840 		case B_FULL_LOCK:
841 		{
842 			// Allocate and map all pages for this area
843 			mutex_lock(&cacheRef->lock);
844 
845 			off_t offset = 0;
846 			for (addr_t address = area->base; address < area->base + (area->size - 1);
847 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
848 #ifdef DEBUG_KERNEL_STACKS
849 #	ifdef STACK_GROWS_DOWNWARDS
850 				if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES
851 						* B_PAGE_SIZE)
852 #	else
853 				if (isStack && address >= area->base + area->size
854 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
855 #	endif
856 					continue;
857 #endif
858 				vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR);
859 				if (page == NULL) {
860 					// this shouldn't really happen, as we reserve the memory upfront
861 					panic("couldn't fulfill B_FULL lock!");
862 				}
863 
864 				vm_cache_insert_page(cacheRef, page, offset);
865 				vm_map_page(area, page, address, protection);
866 			}
867 
868 			mutex_unlock(&cacheRef->lock);
869 			break;
870 		}
871 
872 		case B_ALREADY_WIRED:
873 		{
874 			// the pages should already be mapped. This is only really useful during
875 			// boot time. Find the appropriate vm_page objects and stick them in
876 			// the cache object.
877 			vm_translation_map *map = &addressSpace->translation_map;
878 			off_t offset = 0;
879 
880 			if (!kernel_startup)
881 				panic("ALREADY_WIRED flag used outside kernel startup\n");
882 
883 			mutex_lock(&cacheRef->lock);
884 			map->ops->lock(map);
885 
886 			for (addr_t virtualAddress = area->base; virtualAddress < area->base
887 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
888 					offset += B_PAGE_SIZE) {
889 				addr_t physicalAddress;
890 				uint32 flags;
891 				status = map->ops->query(map, virtualAddress,
892 					&physicalAddress, &flags);
893 				if (status < B_OK) {
894 					panic("looking up mapping failed for va 0x%lx\n",
895 						virtualAddress);
896 				}
897 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
898 				if (page == NULL) {
899 					panic("looking up page failed for pa 0x%lx\n",
900 						physicalAddress);
901 				}
902 
903 				page->wired_count++;
904 					// TODO: needs to be atomic on all platforms!
905 				vm_page_set_state(page, PAGE_STATE_WIRED);
906 				vm_cache_insert_page(cacheRef, page, offset);
907 			}
908 
909 			map->ops->unlock(map);
910 			mutex_unlock(&cacheRef->lock);
911 			break;
912 		}
913 
914 		case B_CONTIGUOUS:
915 		{
916 			// We have already allocated our continuous pages run, so we can now just
917 			// map them in the address space
918 			vm_translation_map *map = &addressSpace->translation_map;
919 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
920 			addr_t virtualAddress;
921 			off_t offset = 0;
922 
923 			mutex_lock(&cacheRef->lock);
924 			map->ops->lock(map);
925 
926 			for (virtualAddress = area->base; virtualAddress < area->base
927 					+ (area->size - 1); virtualAddress += B_PAGE_SIZE,
928 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
929 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
930 				if (page == NULL)
931 					panic("couldn't lookup physical page just allocated\n");
932 
933 				status = map->ops->map(map, virtualAddress, physicalAddress,
934 					protection);
935 				if (status < B_OK)
936 					panic("couldn't map physical page in page run\n");
937 
938 				page->wired_count++;
939 					// TODO: needs to be atomic on all platforms!
940 				vm_page_set_state(page, PAGE_STATE_WIRED);
941 				vm_cache_insert_page(cacheRef, page, offset);
942 			}
943 
944 			map->ops->unlock(map);
945 			mutex_unlock(&cacheRef->lock);
946 			break;
947 		}
948 
949 		default:
950 			break;
951 	}
952 	vm_put_address_space(addressSpace);
953 
954 	TRACE(("vm_create_anonymous_area: done\n"));
955 
956 	area->cache_type = CACHE_TYPE_RAM;
957 	return area->id;
958 
959 err3:
960 	free(cache);
961 err2:
962 	store->ops->destroy(store);
963 err1:
964 	if (wiring == B_CONTIGUOUS) {
965 		// we had reserved the area space upfront...
966 		addr_t pageNumber = page->physical_page_number;
967 		int32 i;
968 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
969 			page = vm_lookup_page(pageNumber);
970 			if (page == NULL)
971 				panic("couldn't lookup physical page just allocated\n");
972 
973 			vm_page_set_state(page, PAGE_STATE_FREE);
974 		}
975 	}
976 
977 	vm_put_address_space(addressSpace);
978 	return status;
979 }
980 
981 
982 area_id
983 vm_map_physical_memory(team_id aspaceID, const char *name, void **_address,
984 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress)
985 {
986 	vm_cache_ref *cacheRef;
987 	vm_area *area;
988 	vm_cache *cache;
989 	vm_store *store;
990 	addr_t mapOffset;
991 	status_t status;
992 
993 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, spec = %ld,"
994 		" size = %lu, protection = %ld, phys = %p)\n",
995 		aspaceID, name, _address, addressSpec, size, protection,
996 		(void *)physicalAddress));
997 
998 	if (!arch_vm_supports_protection(protection))
999 		return B_NOT_SUPPORTED;
1000 
1001 	vm_address_space *addressSpace = vm_get_address_space_by_id(aspaceID);
1002 	if (addressSpace == NULL)
1003 		return B_BAD_TEAM_ID;
1004 
1005 	// if the physical address is somewhat inside a page,
1006 	// move the actual area down to align on a page boundary
1007 	mapOffset = physicalAddress % B_PAGE_SIZE;
1008 	size += mapOffset;
1009 	physicalAddress -= mapOffset;
1010 
1011 	size = PAGE_ALIGN(size);
1012 
1013 	// create an device store object
1014 
1015 	store = vm_store_create_device(physicalAddress);
1016 	if (store == NULL) {
1017 		status = B_NO_MEMORY;
1018 		goto err1;
1019 	}
1020 	cache = vm_cache_create(store);
1021 	if (cache == NULL) {
1022 		status = B_NO_MEMORY;
1023 		goto err2;
1024 	}
1025 	status = vm_cache_ref_create(cache);
1026 	if (status < B_OK)
1027 		goto err3;
1028 
1029 	// tell the page scanner to skip over this area, it's pages are special
1030 	cache->scan_skip = 1;
1031 	cache->type = CACHE_TYPE_DEVICE;
1032 	cache->virtual_size = size;
1033 
1034 	cacheRef = cache->ref;
1035 
1036 	status = map_backing_store(addressSpace, cacheRef, _address, 0, size,
1037 		addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1038 		REGION_NO_PRIVATE_MAP, &area, name);
1039 	if (status < B_OK)
1040 		vm_cache_release_ref(cacheRef);
1041 
1042 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
1043 		// set requested memory type
1044 		status = arch_vm_set_memory_type(area, physicalAddress,
1045 			addressSpec & B_MTR_MASK);
1046 		if (status < B_OK)
1047 			vm_put_area(area);
1048 	}
1049 
1050 	if (status >= B_OK) {
1051 		// make sure our area is mapped in completely
1052 
1053 		vm_translation_map *map = &addressSpace->translation_map;
1054 		map->ops->lock(map);
1055 
1056 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1057 			map->ops->map(map, area->base + offset, physicalAddress + offset,
1058 				protection);
1059 		}
1060 
1061 		map->ops->unlock(map);
1062 	}
1063 
1064 	vm_put_address_space(addressSpace);
1065 	if (status < B_OK)
1066 		return status;
1067 
1068 	// modify the pointer returned to be offset back into the new area
1069 	// the same way the physical address in was offset
1070 	*_address = (void *)((addr_t)*_address + mapOffset);
1071 
1072 	area->cache_type = CACHE_TYPE_DEVICE;
1073 	return area->id;
1074 
1075 err3:
1076 	free(cache);
1077 err2:
1078 	store->ops->destroy(store);
1079 err1:
1080 	vm_put_address_space(addressSpace);
1081 	return status;
1082 }
1083 
1084 
1085 area_id
1086 vm_create_null_area(team_id team, const char *name, void **address,
1087 	uint32 addressSpec, addr_t size)
1088 {
1089 	vm_area *area;
1090 	vm_cache *cache;
1091 	vm_cache_ref *cacheRef;
1092 	vm_store *store;
1093 	status_t status;
1094 
1095 	vm_address_space *addressSpace = vm_get_address_space_by_id(team);
1096 	if (addressSpace == NULL)
1097 		return B_BAD_TEAM_ID;
1098 
1099 	size = PAGE_ALIGN(size);
1100 
1101 	// create an null store object
1102 
1103 	store = vm_store_create_null();
1104 	if (store == NULL) {
1105 		status = B_NO_MEMORY;
1106 		goto err1;
1107 	}
1108 	cache = vm_cache_create(store);
1109 	if (cache == NULL) {
1110 		status = B_NO_MEMORY;
1111 		goto err2;
1112 	}
1113 	status = vm_cache_ref_create(cache);
1114 	if (status < B_OK)
1115 		goto err3;
1116 
1117 	// tell the page scanner to skip over this area, no pages will be mapped here
1118 	cache->scan_skip = 1;
1119 	cache->type = CACHE_TYPE_NULL;
1120 	cache->virtual_size = size;
1121 
1122 	cacheRef = cache->ref;
1123 
1124 	status = map_backing_store(addressSpace, cacheRef, address, 0, size, addressSpec, 0,
1125 		B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name);
1126 
1127 	vm_put_address_space(addressSpace);
1128 
1129 	if (status < B_OK) {
1130 		vm_cache_release_ref(cacheRef);
1131 		return status;
1132 	}
1133 
1134 	area->cache_type = CACHE_TYPE_NULL;
1135 	return area->id;
1136 
1137 err3:
1138 	free(cache);
1139 err2:
1140 	store->ops->destroy(store);
1141 err1:
1142 	vm_put_address_space(addressSpace);
1143 	return status;
1144 }
1145 
1146 
1147 /**	Creates the vnode cache for the specified \a vnode.
1148  *	The vnode has to be marked busy when calling this function.
1149  *	If successful, it will also acquire an extra reference to
1150  *	the vnode (as the vnode store itself can't do this
1151  *	automatically).
1152  */
1153 
1154 status_t
1155 vm_create_vnode_cache(void *vnode, struct vm_cache_ref **_cacheRef)
1156 {
1157 	status_t status;
1158 
1159 	// create a vnode store object
1160 	vm_store *store = vm_create_vnode_store(vnode);
1161 	if (store == NULL)
1162 		return B_NO_MEMORY;
1163 
1164 	vm_cache *cache = vm_cache_create(store);
1165 	if (cache == NULL) {
1166 		status = B_NO_MEMORY;
1167 		goto err1;
1168 	}
1169 	status = vm_cache_ref_create(cache);
1170 	if (status < B_OK)
1171 		goto err2;
1172 
1173 	cache->type = CACHE_TYPE_VNODE;
1174 
1175 	*_cacheRef = cache->ref;
1176 	vfs_acquire_vnode(vnode);
1177 	return B_OK;
1178 
1179 err2:
1180 	free(cache);
1181 err1:
1182 	store->ops->destroy(store);
1183 	return status;
1184 }
1185 
1186 
1187 /** Will map the file at the path specified by \a name to an area in memory.
1188  *	The file will be mirrored beginning at the specified \a offset. The \a offset
1189  *	and \a size arguments have to be page aligned.
1190  */
1191 
1192 static area_id
1193 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec,
1194 	size_t size, uint32 protection, uint32 mapping, const char *path,
1195 	off_t offset, bool kernel)
1196 {
1197 	vm_cache_ref *cacheRef;
1198 	vm_area *area;
1199 	void *vnode;
1200 	status_t status;
1201 
1202 	// ToDo: maybe attach to an FD, not a path (or both, like VFS calls)
1203 	// ToDo: check file access permissions (would be already done if the above were true)
1204 	// ToDo: for binary files, we want to make sure that they get the
1205 	//	copy of a file at a given time, ie. later changes should not
1206 	//	make it into the mapped copy -- this will need quite some changes
1207 	//	to be done in a nice way
1208 
1209 	vm_address_space *addressSpace = vm_get_address_space_by_id(team);
1210 	if (addressSpace == NULL)
1211 		return B_BAD_TEAM_ID;
1212 
1213 	TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n",
1214 		path, offset, size, mapping));
1215 
1216 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
1217 	size = PAGE_ALIGN(size);
1218 
1219 	// get the vnode for the object, this also grabs a ref to it
1220 	status = vfs_get_vnode_from_path(path, kernel, &vnode);
1221 	if (status < B_OK)
1222 		goto err1;
1223 
1224 	// ToDo: this only works for file systems that use the file cache
1225 	status = vfs_get_vnode_cache(vnode, &cacheRef, false);
1226 
1227 	vfs_put_vnode(vnode);
1228 		// we don't need this vnode anymore - if the above call was
1229 		// successful, the store already has a ref to it
1230 
1231 	if (status < B_OK)
1232 		goto err1;
1233 
1234 	status = map_backing_store(addressSpace, cacheRef, _address,
1235 		offset, size, addressSpec, 0, protection, mapping, &area, name);
1236 	if (status < B_OK || mapping == REGION_PRIVATE_MAP) {
1237 		// map_backing_store() cannot know we no longer need the ref
1238 		vm_cache_release_ref(cacheRef);
1239 	}
1240 	if (status < B_OK)
1241 		goto err1;
1242 
1243 	vm_put_address_space(addressSpace);
1244 	area->cache_type = CACHE_TYPE_VNODE;
1245 	return area->id;
1246 
1247 err1:
1248 	vm_put_address_space(addressSpace);
1249 	return status;
1250 }
1251 
1252 
1253 area_id
1254 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec,
1255 	addr_t size, uint32 protection, uint32 mapping, const char *path, off_t offset)
1256 {
1257 	if (!arch_vm_supports_protection(protection))
1258 		return B_NOT_SUPPORTED;
1259 
1260 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1261 		mapping, path, offset, true);
1262 }
1263 
1264 
1265 // ToDo: create a BeOS style call for this!
1266 
1267 area_id
1268 _user_vm_map_file(const char *userName, void **userAddress, int addressSpec,
1269 	addr_t size, int protection, int mapping, const char *userPath, off_t offset)
1270 {
1271 	char name[B_OS_NAME_LENGTH];
1272 	char path[B_PATH_NAME_LENGTH];
1273 	void *address;
1274 	area_id area;
1275 
1276 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
1277 		|| !IS_USER_ADDRESS(userPath)
1278 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
1279 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
1280 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
1281 		return B_BAD_ADDRESS;
1282 
1283 	// userland created areas can always be accessed by the kernel
1284 	protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1285 
1286 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
1287 		addressSpec, size, protection, mapping, path, offset, false);
1288 	if (area < B_OK)
1289 		return area;
1290 
1291 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
1292 		return B_BAD_ADDRESS;
1293 
1294 	return area;
1295 }
1296 
1297 
1298 area_id
1299 vm_clone_area(team_id team, const char *name, void **address, uint32 addressSpec,
1300 	uint32 protection, uint32 mapping, area_id sourceID)
1301 {
1302 	vm_area *newArea = NULL;
1303 	vm_area *sourceArea;
1304 	status_t status;
1305 
1306 	vm_address_space *addressSpace = vm_get_address_space_by_id(team);
1307 	if (addressSpace == NULL)
1308 		return B_BAD_TEAM_ID;
1309 
1310 	sourceArea = vm_get_area(sourceID);
1311 	if (sourceArea == NULL) {
1312 		vm_put_address_space(addressSpace);
1313 		return B_BAD_VALUE;
1314 	}
1315 
1316 	vm_cache_acquire_ref(sourceArea->cache_ref);
1317 
1318 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
1319 	//	have been adapted. Maybe it should be part of the kernel settings,
1320 	//	anyway (so that old drivers can always work).
1321 #if 0
1322 	if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space()
1323 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1324 		// kernel areas must not be cloned in userland, unless explicitly
1325 		// declared user-cloneable upon construction
1326 		status = B_NOT_ALLOWED;
1327 	} else
1328 #endif
1329 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1330 		status = B_NOT_ALLOWED;
1331 	else {
1332 		status = map_backing_store(addressSpace, sourceArea->cache_ref,
1333 			address, sourceArea->cache_offset, sourceArea->size, addressSpec,
1334 			sourceArea->wiring, protection, mapping, &newArea, name);
1335 	}
1336 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1337 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1338 		// to create a new ref, and has therefore already acquired a reference
1339 		// to the source cache - but otherwise it has no idea that we need
1340 		// one.
1341 		vm_cache_acquire_ref(sourceArea->cache_ref);
1342 	}
1343 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
1344 		// we need to map in everything at this point
1345 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
1346 			// we don't have actual pages to map but a physical area
1347 			vm_translation_map *map = &sourceArea->address_space->translation_map;
1348 			map->ops->lock(map);
1349 
1350 			addr_t physicalAddress;
1351 			uint32 oldProtection;
1352 			map->ops->query(map, sourceArea->base, &physicalAddress,
1353 				&oldProtection);
1354 
1355 			map->ops->unlock(map);
1356 
1357 			map = &addressSpace->translation_map;
1358 			map->ops->lock(map);
1359 
1360 			for (addr_t offset = 0; offset < newArea->size;
1361 					offset += B_PAGE_SIZE) {
1362 				map->ops->map(map, newArea->base + offset,
1363 					physicalAddress + offset, protection);
1364 			}
1365 
1366 			map->ops->unlock(map);
1367 		} else {
1368 			// map in all pages from source
1369 			mutex_lock(&sourceArea->cache_ref->lock);
1370 
1371 			for (vm_page *page = sourceArea->cache_ref->cache->page_list;
1372 					page != NULL; page = page->cache_next) {
1373 				vm_map_page(newArea, page, newArea->base
1374 					+ ((page->cache_offset << PAGE_SHIFT) - newArea->cache_offset),
1375 					protection);
1376 			}
1377 
1378 			mutex_unlock(&sourceArea->cache_ref->lock);
1379 		}
1380 	}
1381 	if (status == B_OK)
1382 		newArea->cache_type = sourceArea->cache_type;
1383 
1384 	vm_cache_release_ref(sourceArea->cache_ref);
1385 
1386 	vm_put_area(sourceArea);
1387 	vm_put_address_space(addressSpace);
1388 
1389 	if (status < B_OK)
1390 		return status;
1391 
1392 	return newArea->id;
1393 }
1394 
1395 
1396 static status_t
1397 _vm_delete_area(vm_address_space *addressSpace, area_id id)
1398 {
1399 	status_t status = B_OK;
1400 	vm_area *area;
1401 
1402 	TRACE(("vm_delete_area: aspace id 0x%lx, area id 0x%lx\n", addressSpace->id, id));
1403 
1404 	area = vm_get_area(id);
1405 	if (area == NULL)
1406 		return B_BAD_VALUE;
1407 
1408 	if (area->address_space == addressSpace) {
1409 		vm_put_area(area);
1410 			// next put below will actually delete it
1411 	} else
1412 		status = B_NOT_ALLOWED;
1413 
1414 	vm_put_area(area);
1415 	return status;
1416 }
1417 
1418 
1419 status_t
1420 vm_delete_area(team_id team, area_id id)
1421 {
1422 	vm_address_space *addressSpace;
1423 	status_t err;
1424 
1425 	addressSpace = vm_get_address_space_by_id(team);
1426 	if (addressSpace == NULL)
1427 		return B_BAD_TEAM_ID;
1428 
1429 	err = _vm_delete_area(addressSpace, id);
1430 	vm_put_address_space(addressSpace);
1431 	return err;
1432 }
1433 
1434 
1435 static void
1436 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area, bool locked)
1437 {
1438 	vm_area *temp, *last = NULL;
1439 
1440 	if (!locked)
1441 		acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
1442 
1443 	temp = addressSpace->areas;
1444 	while (temp != NULL) {
1445 		if (area == temp) {
1446 			if (last != NULL) {
1447 				last->address_space_next = temp->address_space_next;
1448 			} else {
1449 				addressSpace->areas = temp->address_space_next;
1450 			}
1451 			addressSpace->change_count++;
1452 			break;
1453 		}
1454 		last = temp;
1455 		temp = temp->address_space_next;
1456 	}
1457 	if (area == addressSpace->area_hint)
1458 		addressSpace->area_hint = NULL;
1459 
1460 	if (!locked)
1461 		release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
1462 
1463 	if (temp == NULL)
1464 		panic("vm_area_release_ref: area not found in aspace's area list\n");
1465 }
1466 
1467 
1468 static bool
1469 _vm_put_area(vm_area *area, bool aspaceLocked)
1470 {
1471 	vm_address_space *addressSpace;
1472 	bool removeit = false;
1473 
1474 	TRACE(("_vm_put_area(area = %p, aspaceLocked = %s)\n",
1475 		area, aspaceLocked ? "yes" : "no"));
1476 
1477 	// we should never get here, but if we do, we can handle it
1478 	if (area->id == RESERVED_AREA_ID)
1479 		return false;
1480 
1481 	addressSpace = area->address_space;
1482 
1483 	// grab a write lock on the address space around the removal of the area
1484 	// from the global hash table to avoid a race with vm_soft_fault()
1485 	if (!aspaceLocked)
1486 		acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
1487 
1488 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0);
1489 	if (atomic_add(&area->ref_count, -1) == 1) {
1490 		hash_remove(sAreaHash, area);
1491 		removeit = true;
1492 	}
1493 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
1494 
1495 	if (!aspaceLocked)
1496 		release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
1497 
1498 	if (!removeit)
1499 		return false;
1500 
1501 	// at this point the area is removed from the global hash table, but still
1502 	// exists in the area list. it's ref_count is zero, and is guaranteed not to
1503 	// be incremented anymore (by a direct hash lookup, or vm_area_lookup()).
1504 
1505 	// unmap the virtual address space the area occupied. any page faults at this
1506 	// point should fail in vm_area_lookup().
1507 	vm_unmap_pages(area, area->base, area->size);
1508 
1509 	// ToDo: do that only for vnode stores
1510 	vm_cache_write_modified(area->cache_ref, false);
1511 
1512 	arch_vm_unset_memory_type(area);
1513 	remove_area_from_address_space(addressSpace, area, aspaceLocked);
1514 
1515 	vm_cache_remove_area(area->cache_ref, area);
1516 	vm_cache_release_ref(area->cache_ref);
1517 
1518 	// now we can give up the area's reference to the address space
1519 	vm_put_address_space(addressSpace);
1520 
1521 	free(area->name);
1522 	free(area);
1523 	return true;
1524 }
1525 
1526 
1527 static bool
1528 vm_put_area(vm_area *area)
1529 {
1530 	return _vm_put_area(area, false);
1531 }
1532 
1533 
1534 static status_t
1535 vm_copy_on_write_area(vm_area *area)
1536 {
1537 	vm_store *store;
1538 	vm_cache *upperCache, *lowerCache;
1539 	vm_cache_ref *upperCacheRef, *lowerCacheRef;
1540 	vm_translation_map *map;
1541 	vm_page *page;
1542 	uint32 protection;
1543 	status_t status;
1544 
1545 	TRACE(("vm_copy_on_write_area(area = %p)\n", area));
1546 
1547 	// We need to separate the vm_cache from its vm_cache_ref: the area
1548 	// and its cache_ref goes into a new layer on top of the old one.
1549 	// So the old cache gets a new cache_ref and the area a new cache.
1550 
1551 	upperCacheRef = area->cache_ref;
1552 
1553 	// we will exchange the cache_ref's cache, so we better hold its lock
1554 	mutex_lock(&upperCacheRef->lock);
1555 
1556 	lowerCache = upperCacheRef->cache;
1557 
1558 	// create an anonymous store object
1559 	store = vm_store_create_anonymous_noswap(false, 0, 0);
1560 	if (store == NULL) {
1561 		status = B_NO_MEMORY;
1562 		goto err1;
1563 	}
1564 
1565 	upperCache = vm_cache_create(store);
1566 	if (upperCache == NULL) {
1567 		status = B_NO_MEMORY;
1568 		goto err2;
1569 	}
1570 
1571 	status = vm_cache_ref_create(lowerCache);
1572 	if (status < B_OK)
1573 		goto err3;
1574 
1575 	lowerCacheRef = lowerCache->ref;
1576 
1577 	// The area must be readable in the same way it was previously writable
1578 	protection = B_KERNEL_READ_AREA;
1579 	if (area->protection & B_READ_AREA)
1580 		protection |= B_READ_AREA;
1581 
1582 	// we need to hold the cache_ref lock when we want to switch its cache
1583 	mutex_lock(&lowerCacheRef->lock);
1584 
1585 	upperCache->type = CACHE_TYPE_RAM;
1586 	upperCache->temporary = 1;
1587 	upperCache->scan_skip = lowerCache->scan_skip;
1588 	upperCache->virtual_base = lowerCache->virtual_base;
1589 	upperCache->virtual_size = lowerCache->virtual_size;
1590 
1591 	upperCache->ref = upperCacheRef;
1592 	upperCacheRef->cache = upperCache;
1593 
1594 	// we need to manually alter the ref_count (divide it between the two)
1595 	// the lower cache_ref has only known refs, so compute them
1596 	{
1597 		int32 count = 0;
1598 		vm_cache *consumer = NULL;
1599 		while ((consumer = (vm_cache *)list_get_next_item(
1600 				&lowerCache->consumers, consumer)) != NULL) {
1601 			count++;
1602 		}
1603 		lowerCacheRef->ref_count = count;
1604 		atomic_add(&upperCacheRef->ref_count, -count);
1605 	}
1606 
1607 	vm_cache_add_consumer_locked(lowerCacheRef, upperCache);
1608 
1609 	// We now need to remap all pages from the area read-only, so that
1610 	// a copy will be created on next write access
1611 
1612 	map = &area->address_space->translation_map;
1613 	map->ops->lock(map);
1614 	map->ops->unmap(map, area->base, area->base - 1 + area->size);
1615 	map->ops->flush(map);
1616 
1617 	// TODO: does anything guarantee that we remap the same pages here?
1618 	//	Shouldn't we better introduce a "change mapping"?
1619 
1620 	for (page = lowerCache->page_list; page; page = page->cache_next) {
1621 		map->ops->map(map, area->base + (page->cache_offset << PAGE_SHIFT)
1622 			- area->cache_offset, page->physical_page_number << PAGE_SHIFT,
1623 			protection);
1624 	}
1625 
1626 	map->ops->unlock(map);
1627 
1628 	mutex_unlock(&lowerCacheRef->lock);
1629 	mutex_unlock(&upperCacheRef->lock);
1630 
1631 	return B_OK;
1632 
1633 err3:
1634 	free(upperCache);
1635 err2:
1636 	store->ops->destroy(store);
1637 err1:
1638 	mutex_unlock(&upperCacheRef->lock);
1639 	return status;
1640 }
1641 
1642 
1643 area_id
1644 vm_copy_area(team_id addressSpaceID, const char *name, void **_address, uint32 addressSpec,
1645 	uint32 protection, area_id sourceID)
1646 {
1647 	vm_address_space *addressSpace;
1648 	vm_cache_ref *cacheRef;
1649 	vm_area *target, *source;
1650 	status_t status;
1651 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
1652 
1653 	if ((protection & B_KERNEL_PROTECTION) == 0) {
1654 		// set the same protection for the kernel as for userland
1655 		protection |= B_KERNEL_READ_AREA;
1656 		if (writableCopy)
1657 			protection |= B_KERNEL_WRITE_AREA;
1658 	}
1659 
1660 	if ((source = vm_get_area(sourceID)) == NULL)
1661 		return B_BAD_VALUE;
1662 
1663 	addressSpace = vm_get_address_space_by_id(addressSpaceID);
1664 	cacheRef = source->cache_ref;
1665 
1666 	if (addressSpec == B_CLONE_ADDRESS) {
1667 		addressSpec = B_EXACT_ADDRESS;
1668 		*_address = (void *)source->base;
1669 	}
1670 
1671 	// First, create a cache on top of the source area
1672 
1673 	if (!writableCopy) {
1674 		// map_backing_store() cannot know it has to acquire a ref to
1675 		// the store for REGION_NO_PRIVATE_MAP
1676 		vm_cache_acquire_ref(cacheRef);
1677 	}
1678 
1679 	status = map_backing_store(addressSpace, cacheRef, _address,
1680 		source->cache_offset, source->size, addressSpec, source->wiring, protection,
1681 		writableCopy ? REGION_PRIVATE_MAP : REGION_NO_PRIVATE_MAP,
1682 		&target, name);
1683 	if (status < B_OK) {
1684 		if (!writableCopy)
1685 			vm_cache_release_ref(cacheRef);
1686 		goto err;
1687 	}
1688 
1689 	// If the source area is writable, we need to move it one layer up as well
1690 
1691 	if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
1692 		// ToDo: do something more useful if this fails!
1693 		if (vm_copy_on_write_area(source) < B_OK)
1694 			panic("vm_copy_on_write_area() failed!\n");
1695 	}
1696 
1697 	// we want to return the ID of the newly created area
1698 	status = target->id;
1699 
1700 err:
1701 	vm_put_address_space(addressSpace);
1702 	vm_put_area(source);
1703 
1704 	return status;
1705 }
1706 
1707 
1708 static int32
1709 count_writable_areas(vm_cache_ref *ref, vm_area *ignoreArea)
1710 {
1711 	struct vm_area *area = ref->areas;
1712 	uint32 count = 0;
1713 
1714 	for (; area != NULL; area = area->cache_next) {
1715 		if (area != ignoreArea
1716 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
1717 			count++;
1718 	}
1719 
1720 	return count;
1721 }
1722 
1723 
1724 static status_t
1725 vm_set_area_protection(team_id aspaceID, area_id areaID, uint32 newProtection)
1726 {
1727 	vm_cache_ref *cacheRef;
1728 	vm_cache *cache;
1729 	vm_area *area;
1730 	status_t status = B_OK;
1731 
1732 	TRACE(("vm_set_area_protection(aspace = %#lx, area = %#lx, protection = %#lx)\n",
1733 		aspaceID, areaID, newProtection));
1734 
1735 	if (!arch_vm_supports_protection(newProtection))
1736 		return B_NOT_SUPPORTED;
1737 
1738 	area = vm_get_area(areaID);
1739 	if (area == NULL)
1740 		return B_BAD_VALUE;
1741 
1742 	if (aspaceID != vm_kernel_address_space_id() && area->address_space->id != aspaceID) {
1743 		// unless you're the kernel, you are only allowed to set
1744 		// the protection of your own areas
1745 		vm_put_area(area);
1746 		return B_NOT_ALLOWED;
1747 	}
1748 
1749 	cacheRef = area->cache_ref;
1750 	mutex_lock(&cacheRef->lock);
1751 
1752 	cache = cacheRef->cache;
1753 
1754 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1755 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
1756 		// change from read/write to read-only
1757 
1758 		if (cache->source != NULL && cache->temporary) {
1759 			if (count_writable_areas(cacheRef, area) == 0) {
1760 				// Since this cache now lives from the pages in its source cache,
1761 				// we can change the cache's commitment to take only those pages
1762 				// into account that really are in this cache.
1763 
1764 				// count existing pages in this cache
1765 				struct vm_page *page = cache->page_list;
1766 				uint32 count = 0;
1767 
1768 				for (; page != NULL; page = page->cache_next) {
1769 					count++;
1770 				}
1771 
1772 				status = cache->store->ops->commit(cache->store,
1773 					cache->virtual_base + count * B_PAGE_SIZE);
1774 
1775 				// ToDo: we may be able to join with our source cache, if count == 0
1776 			}
1777 		}
1778 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
1779 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
1780 		// change from read-only to read/write
1781 
1782 		// ToDo: if this is a shared cache, insert new cache (we only know about other
1783 		//	areas in this cache yet, though, not about child areas)
1784 		//	-> use this call with care, it might currently have unwanted consequences
1785 		//	   because of this. It should always be safe though, if there are no other
1786 		//	   (child) areas referencing this area's cache (you just might not know).
1787 		if (count_writable_areas(cacheRef, area) == 0
1788 			&& (cacheRef->areas != area || area->cache_next)) {
1789 			// ToDo: child areas are not tested for yet
1790 			dprintf("set_area_protection(): warning, would need to insert a new cache_ref (not yet implemented)!\n");
1791 			status = B_NOT_ALLOWED;
1792 		} else
1793 			dprintf("set_area_protection() may not work correctly yet in this direction!\n");
1794 
1795 		if (status == B_OK && cache->source != NULL && cache->temporary) {
1796 			// the cache's commitment must contain all possible pages
1797 			status = cache->store->ops->commit(cache->store, cache->virtual_size);
1798 		}
1799 	} else {
1800 		// we don't have anything special to do in all other cases
1801 	}
1802 
1803 	if (status == B_OK && area->protection != newProtection) {
1804 		// remap existing pages in this cache
1805 		struct vm_translation_map *map = &area->address_space->translation_map;
1806 
1807 		map->ops->lock(map);
1808 		map->ops->protect(map, area->base, area->base + area->size, newProtection);
1809 		map->ops->unlock(map);
1810 
1811 		area->protection = newProtection;
1812 	}
1813 
1814 	mutex_unlock(&cacheRef->lock);
1815 	vm_put_area(area);
1816 
1817 	return status;
1818 }
1819 
1820 
1821 status_t
1822 vm_get_page_mapping(team_id aid, addr_t vaddr, addr_t *paddr)
1823 {
1824 	vm_address_space *addressSpace;
1825 	uint32 null_flags;
1826 	status_t err;
1827 
1828 	addressSpace = vm_get_address_space_by_id(aid);
1829 	if (addressSpace == NULL)
1830 		return B_BAD_TEAM_ID;
1831 
1832 	err = addressSpace->translation_map.ops->query(&addressSpace->translation_map,
1833 		vaddr, paddr, &null_flags);
1834 
1835 	vm_put_address_space(addressSpace);
1836 	return err;
1837 }
1838 
1839 
1840 int32
1841 vm_test_map_activation(vm_page *page)
1842 {
1843 	int32 activation = 0;
1844 
1845 	// TODO: this can't work... (we need to lock the map, so this has to be a mutex)
1846 	cpu_status state = disable_interrupts();
1847 	acquire_spinlock(&sMappingLock);
1848 
1849 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
1850 	vm_page_mapping *mapping;
1851 	while ((mapping = iterator.Next()) != NULL) {
1852 		vm_area *area = mapping->area;
1853 		vm_translation_map *map = &area->address_space->translation_map;
1854 
1855 		addr_t physicalAddress;
1856 		uint32 flags;
1857 //		map->ops->lock(map);
1858 		addr_t address = area->base + (page->cache_offset << PAGE_SHIFT);
1859 		map->ops->query_interrupt(map, address, &physicalAddress, &flags);
1860 //		map->ops->unlock(map);
1861 
1862 		if (flags & PAGE_ACCESSED)
1863 			activation++;
1864 	}
1865 
1866 	release_spinlock(&sMappingLock);
1867 	restore_interrupts(state);
1868 
1869 	return activation;
1870 }
1871 
1872 
1873 void
1874 vm_clear_map_activation(vm_page *page)
1875 {
1876 	// TODO: this can't work... (we need to lock the map, so this has to be a mutex)
1877 	cpu_status state = disable_interrupts();
1878 	acquire_spinlock(&sMappingLock);
1879 
1880 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
1881 	vm_page_mapping *mapping;
1882 	while ((mapping = iterator.Next()) != NULL) {
1883 		vm_area *area = mapping->area;
1884 		vm_translation_map *map = &area->address_space->translation_map;
1885 
1886 //		map->ops->lock(map);
1887 		addr_t address = area->base + (page->cache_offset << PAGE_SHIFT);
1888 		map->ops->clear_flags(map, address, PAGE_ACCESSED);
1889 //		map->ops->unlock(map);
1890 	}
1891 
1892 	release_spinlock(&sMappingLock);
1893 	restore_interrupts(state);
1894 }
1895 
1896 
1897 void
1898 vm_remove_all_page_mappings(vm_page *page)
1899 {
1900 	// TODO: this can't work... (we need to lock the map, so this has to be a mutex)
1901 	cpu_status state = disable_interrupts();
1902 	acquire_spinlock(&sMappingLock);
1903 
1904 	vm_page_mappings queue;
1905 	queue.MoveFrom(&page->mappings);
1906 
1907 	vm_page_mappings::Iterator iterator = queue.GetIterator();
1908 	vm_page_mapping *mapping;
1909 	while ((mapping = iterator.Next()) != NULL) {
1910 		vm_area *area = mapping->area;
1911 		vm_translation_map *map = &area->address_space->translation_map;
1912 
1913 //		map->ops->lock(map);
1914 		addr_t base = area->base + (page->cache_offset << PAGE_SHIFT);
1915 		map->ops->unmap(map, base, base + (B_PAGE_SIZE - 1));
1916 //		map->ops->unlock(map);
1917 
1918 		area->mappings.Remove(mapping);
1919 	}
1920 
1921 	release_spinlock(&sMappingLock);
1922 	restore_interrupts(state);
1923 
1924 	// free now unused mappings
1925 
1926 	while ((mapping = queue.RemoveHead()) != NULL) {
1927 		free(mapping);
1928 	}
1929 }
1930 
1931 
1932 status_t
1933 vm_unmap_pages(vm_area *area, addr_t base, size_t size)
1934 {
1935 	vm_translation_map *map = &area->address_space->translation_map;
1936 	addr_t end = base + (size - 1);
1937 
1938 	map->ops->lock(map);
1939 
1940 	if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) {
1941 		// iterate through all pages and decrease their wired count
1942 		for (addr_t virtualAddress = base; virtualAddress < end;
1943 				virtualAddress += B_PAGE_SIZE) {
1944 			addr_t physicalAddress;
1945 			uint32 flags;
1946 			status_t status = map->ops->query(map, virtualAddress,
1947 				&physicalAddress, &flags);
1948 			if (status < B_OK || (flags & PAGE_PRESENT) == 0)
1949 				continue;
1950 
1951 			vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1952 			if (page == NULL) {
1953 				panic("area %p looking up page failed for pa 0x%lx\n", area,
1954 					physicalAddress);
1955 			}
1956 
1957 			page->wired_count--;
1958 				// TODO: needs to be atomic on all platforms!
1959 		}
1960 	}
1961 
1962 	map->ops->unmap(map, base, end);
1963 
1964 	if (area->wiring == B_NO_LOCK) {
1965 		vm_area_mappings queue;
1966 		uint32 count = 0;
1967 
1968 		cpu_status state = disable_interrupts();
1969 		acquire_spinlock(&sMappingLock);
1970 
1971 		vm_page_mapping *mapping;
1972 		while ((mapping = area->mappings.RemoveHead()) != NULL) {
1973 			mapping->page->mappings.Remove(mapping);
1974 			queue.Add(mapping);
1975 
1976 			// temporary unlock to handle interrupts and let others play as well
1977 			if ((++count % 256) == 0) {
1978 				release_spinlock(&sMappingLock);
1979 				restore_interrupts(state);
1980 
1981 				state = disable_interrupts();
1982 				acquire_spinlock(&sMappingLock);
1983 			}
1984 		}
1985 
1986 		release_spinlock(&sMappingLock);
1987 		restore_interrupts(state);
1988 
1989 		while ((mapping = queue.RemoveHead()) != NULL) {
1990 			free(mapping);
1991 		}
1992 	}
1993 
1994 	map->ops->unlock(map);
1995 	return B_OK;
1996 }
1997 
1998 
1999 status_t
2000 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection)
2001 {
2002 	vm_translation_map *map = &area->address_space->translation_map;
2003 	vm_page_mapping *mapping = NULL;
2004 
2005 	if (area->wiring == B_NO_LOCK) {
2006 		mapping = (vm_page_mapping *)malloc(sizeof(vm_page_mapping));
2007 		if (mapping == NULL)
2008 			return B_NO_MEMORY;
2009 
2010 		mapping->page = page;
2011 		mapping->area = area;
2012 	}
2013 
2014 	map->ops->lock(map);
2015 	map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE,
2016 		protection);
2017 
2018 	if (area->wiring != B_NO_LOCK) {
2019 		page->wired_count++;
2020 			// TODO: needs to be atomic on all platforms!
2021 	} else {
2022 		// insert mapping into lists
2023 		cpu_status state = disable_interrupts();
2024 		acquire_spinlock(&sMappingLock);
2025 
2026 		page->mappings.Add(mapping);
2027 		area->mappings.Add(mapping);
2028 
2029 		release_spinlock(&sMappingLock);
2030 		restore_interrupts(state);
2031 	}
2032 
2033 	map->ops->unlock(map);
2034 
2035 	vm_page_set_state(page, PAGE_STATE_ACTIVE);
2036 	return B_OK;
2037 }
2038 
2039 
2040 static int
2041 display_mem(int argc, char **argv)
2042 {
2043 	bool physical = false;
2044 	addr_t copyAddress;
2045 	int32 displayWidth;
2046 	int32 itemSize;
2047 	int32 num = -1;
2048 	addr_t address;
2049 	int i = 1, j;
2050 
2051 	if (argc > 1 && argv[1][0] == '-') {
2052 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2053 			physical = true;
2054 			i++;
2055 		} else
2056 			i = 99;
2057 	}
2058 
2059 	if (argc < i + 1 || argc > i + 2) {
2060 		kprintf("usage: dl/dw/ds/db [-p|--physical] <address> [num]\n"
2061 			"\tdl - 8 bytes\n"
2062 			"\tdw - 4 bytes\n"
2063 			"\tds - 2 bytes\n"
2064 			"\tdb - 1 byte\n"
2065 			"  -p or --physical only allows memory from a single page to be displayed.\n");
2066 		return 0;
2067 	}
2068 
2069 	address = strtoul(argv[i], NULL, 0);
2070 
2071 	if (argc > i + 1)
2072 		num = atoi(argv[i + 1]);
2073 
2074 	// build the format string
2075 	if (strcmp(argv[0], "db") == 0) {
2076 		itemSize = 1;
2077 		displayWidth = 16;
2078 	} else if (strcmp(argv[0], "ds") == 0) {
2079 		itemSize = 2;
2080 		displayWidth = 8;
2081 	} else if (strcmp(argv[0], "dw") == 0) {
2082 		itemSize = 4;
2083 		displayWidth = 4;
2084 	} else if (strcmp(argv[0], "dl") == 0) {
2085 		itemSize = 8;
2086 		displayWidth = 2;
2087 	} else {
2088 		kprintf("display_mem called in an invalid way!\n");
2089 		return 0;
2090 	}
2091 
2092 	if (num <= 0)
2093 		num = displayWidth;
2094 
2095 	if (physical) {
2096 		int32 offset = address & (B_PAGE_SIZE - 1);
2097 		if (num * itemSize + offset > B_PAGE_SIZE) {
2098 			num = (B_PAGE_SIZE - offset) / itemSize;
2099 			kprintf("NOTE: number of bytes has been cut to page size\n");
2100 		}
2101 
2102 		address = ROUNDOWN(address, B_PAGE_SIZE);
2103 
2104 		kernel_startup = true;
2105 			// vm_get_physical_page() needs to lock...
2106 
2107 		if (vm_get_physical_page(address, &copyAddress, PHYSICAL_PAGE_NO_WAIT) != B_OK) {
2108 			kprintf("getting the hardware page failed.");
2109 			kernel_startup = false;
2110 			return 0;
2111 		}
2112 
2113 		kernel_startup = false;
2114 		address += offset;
2115 		copyAddress += offset;
2116 	} else
2117 		copyAddress = address;
2118 
2119 	for (i = 0; i < num; i++) {
2120 		uint32 value;
2121 
2122 		if ((i % displayWidth) == 0) {
2123 			int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2124 			if (i != 0)
2125 				kprintf("\n");
2126 
2127 			kprintf("[0x%lx]  ", address + i * itemSize);
2128 
2129 			for (j = 0; j < displayed; j++) {
2130 				char c;
2131 				if (user_memcpy(&c, (char *)copyAddress + i * itemSize + j, 1) != B_OK) {
2132 					displayed = j;
2133 					break;
2134 				}
2135 				if (!isprint(c))
2136 					c = '.';
2137 
2138 				kprintf("%c", c);
2139 			}
2140 			if (num > displayWidth) {
2141 				// make sure the spacing in the last line is correct
2142 				for (j = displayed; j < displayWidth * itemSize; j++)
2143 					kprintf(" ");
2144 			}
2145 			kprintf("  ");
2146 		}
2147 
2148 		if (user_memcpy(&value, (uint8 *)copyAddress + i * itemSize, itemSize) != B_OK) {
2149 			kprintf("read fault");
2150 			break;
2151 		}
2152 
2153 		switch (itemSize) {
2154 			case 1:
2155 				kprintf(" %02x", *(uint8 *)&value);
2156 				break;
2157 			case 2:
2158 				kprintf(" %04x", *(uint16 *)&value);
2159 				break;
2160 			case 4:
2161 				kprintf(" %08lx", *(uint32 *)&value);
2162 				break;
2163 			case 8:
2164 				kprintf(" %016Lx", *(uint64 *)&value);
2165 				break;
2166 		}
2167 	}
2168 
2169 	kprintf("\n");
2170 
2171 	if (physical) {
2172 		copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE);
2173 		kernel_startup = true;
2174 		vm_put_physical_page(copyAddress);
2175 		kernel_startup = false;
2176 	}
2177 	return 0;
2178 }
2179 
2180 
2181 static const char *
2182 page_state_to_string(int state)
2183 {
2184 	switch(state) {
2185 		case PAGE_STATE_ACTIVE:
2186 			return "active";
2187 		case PAGE_STATE_INACTIVE:
2188 			return "inactive";
2189 		case PAGE_STATE_BUSY:
2190 			return "busy";
2191 		case PAGE_STATE_MODIFIED:
2192 			return "modified";
2193 		case PAGE_STATE_FREE:
2194 			return "free";
2195 		case PAGE_STATE_CLEAR:
2196 			return "clear";
2197 		case PAGE_STATE_WIRED:
2198 			return "wired";
2199 		case PAGE_STATE_UNUSED:
2200 			return "unused";
2201 		default:
2202 			return "unknown";
2203 	}
2204 }
2205 
2206 
2207 static int
2208 dump_cache_chain(int argc, char **argv)
2209 {
2210 	if (argc < 2 || strlen(argv[1]) < 2
2211 		|| argv[1][0] != '0'
2212 		|| argv[1][1] != 'x') {
2213 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2214 		return 0;
2215 	}
2216 
2217 	addr_t address = strtoul(argv[1], NULL, 0);
2218 	if (address == NULL)
2219 		return 0;
2220 
2221 	vm_cache *cache = (vm_cache *)address;
2222 	while (cache != NULL) {
2223 		dprintf("%p  (ref %p)\n", cache, cache->ref);
2224 		cache = cache->source;
2225 	}
2226 
2227 	return 0;
2228 }
2229 
2230 
2231 static const char *
2232 cache_type_to_string(int32 type)
2233 {
2234 	switch (type) {
2235 		case CACHE_TYPE_RAM:
2236 			return "RAM";
2237 		case CACHE_TYPE_DEVICE:
2238 			return "device";
2239 		case CACHE_TYPE_VNODE:
2240 			return "vnode";
2241 		case CACHE_TYPE_NULL:
2242 			return "null";
2243 
2244 		default:
2245 			return "unknown";
2246 	}
2247 }
2248 
2249 
2250 static int
2251 dump_cache(int argc, char **argv)
2252 {
2253 	vm_cache *cache;
2254 	vm_cache_ref *cacheRef;
2255 	bool showPages = false;
2256 	bool showCache = true;
2257 	bool showCacheRef = true;
2258 	int i = 1;
2259 
2260 	if (argc < 2) {
2261 		kprintf("usage: %s [-ps] <address>\n"
2262 			"  if -p is specified, all pages are shown, if -s is used\n"
2263 			"  only the cache/cache_ref info is shown respectively.\n", argv[0]);
2264 		return 0;
2265 	}
2266 	while (argv[i][0] == '-') {
2267 		char *arg = argv[i] + 1;
2268 		while (arg[0]) {
2269 			if (arg[0] == 'p')
2270 				showPages = true;
2271 			else if (arg[0] == 's') {
2272 				if (!strcmp(argv[0], "cache"))
2273 					showCacheRef = false;
2274 				else
2275 					showCache = false;
2276 			}
2277 			arg++;
2278 		}
2279 		i++;
2280 	}
2281 	if (argv[i] == NULL || strlen(argv[i]) < 2
2282 		|| argv[i][0] != '0'
2283 		|| argv[i][1] != 'x') {
2284 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2285 		return 0;
2286 	}
2287 
2288 	addr_t address = strtoul(argv[i], NULL, 0);
2289 	if (address == NULL)
2290 		return 0;
2291 
2292 	if (!strcmp(argv[0], "cache")) {
2293 		cache = (vm_cache *)address;
2294 		cacheRef = cache->ref;
2295 	} else {
2296 		cacheRef = (vm_cache_ref *)address;
2297 		cache = cacheRef->cache;
2298 	}
2299 
2300 	if (showCacheRef) {
2301 		kprintf("CACHE_REF %p:\n", cacheRef);
2302 		if (!showCache)
2303 			kprintf("  cache:        %p\n", cacheRef->cache);
2304 		kprintf("  ref_count:    %ld\n", cacheRef->ref_count);
2305 		kprintf("  lock.holder:  %ld\n", cacheRef->lock.holder);
2306 		kprintf("  lock.sem:     0x%lx\n", cacheRef->lock.sem);
2307 		kprintf("  areas:\n");
2308 
2309 		for (vm_area *area = cacheRef->areas; area != NULL; area = area->cache_next) {
2310 			kprintf("    area 0x%lx, %s\n", area->id, area->name);
2311 			kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->base, area->size);
2312 			kprintf("\tprotection: 0x%lx\n", area->protection);
2313 			kprintf("\towner:      0x%lx\n", area->address_space->id);
2314 		}
2315 	}
2316 
2317 	if (showCache) {
2318 		kprintf("CACHE %p:\n", cache);
2319 		if (!showCacheRef)
2320 			kprintf("  cache_ref:    %p\n", cache->ref);
2321 		kprintf("  source:       %p\n", cache->source);
2322 		kprintf("  store:        %p\n", cache->store);
2323 		kprintf("  type:         %s\n", cache_type_to_string(cache->type));
2324 		kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
2325 		kprintf("  virtual_size: 0x%Lx\n", cache->virtual_size);
2326 		kprintf("  temporary:    %ld\n", cache->temporary);
2327 		kprintf("  scan_skip:    %ld\n", cache->scan_skip);
2328 
2329 		kprintf("  consumers:\n");
2330 		vm_cache *consumer = NULL;
2331 		while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) {
2332 			kprintf("\t%p\n", consumer);
2333 		}
2334 
2335 		kprintf("  pages:\n");
2336 		int32 count = 0;
2337 		for (vm_page *page = cache->page_list; page != NULL; page = page->cache_next) {
2338 			count++;
2339 			if (!showPages)
2340 				continue;
2341 
2342 			if (page->type == PAGE_TYPE_PHYSICAL) {
2343 				kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) wired_count %u\n",
2344 					page, page->physical_page_number, page->cache_offset, page->type, page->state,
2345 					page_state_to_string(page->state), page->wired_count);
2346 			} else if(page->type == PAGE_TYPE_DUMMY) {
2347 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
2348 					page, page->state, page_state_to_string(page->state));
2349 			} else
2350 				kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type);
2351 		}
2352 
2353 		if (!showPages)
2354 			kprintf("\t%ld in cache\n", count);
2355 	}
2356 
2357 	return 0;
2358 }
2359 
2360 
2361 static void
2362 dump_area_struct(vm_area *area, bool mappings)
2363 {
2364 	kprintf("AREA: %p\n", area);
2365 	kprintf("name:\t\t'%s'\n", area->name);
2366 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
2367 	kprintf("id:\t\t0x%lx\n", area->id);
2368 	kprintf("base:\t\t0x%lx\n", area->base);
2369 	kprintf("size:\t\t0x%lx\n", area->size);
2370 	kprintf("protection:\t0x%lx\n", area->protection);
2371 	kprintf("wiring:\t\t0x%x\n", area->wiring);
2372 	kprintf("memory_type:\t0x%x\n", area->memory_type);
2373 	kprintf("ref_count:\t%ld\n", area->ref_count);
2374 	kprintf("cache_ref:\t%p\n", area->cache_ref);
2375 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
2376 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
2377 	kprintf("cache_next:\t%p\n", area->cache_next);
2378 	kprintf("cache_prev:\t%p\n", area->cache_prev);
2379 
2380 	vm_area_mappings::Iterator iterator = area->mappings.GetIterator();
2381 	if (mappings) {
2382 		kprintf("page mappings:\n");
2383 		while (iterator.HasNext()) {
2384 			vm_page_mapping *mapping = iterator.Next();
2385 			kprintf("  %p", mapping->page);
2386 		}
2387 		kprintf("\n");
2388 	} else {
2389 		uint32 count = 0;
2390 		while (iterator.Next() != NULL) {
2391 			count++;
2392 		}
2393 		kprintf("page mappings:\t%lu\n", count);
2394 	}
2395 }
2396 
2397 
2398 static int
2399 dump_area(int argc, char **argv)
2400 {
2401 	bool mappings = false;
2402 	bool found = false;
2403 	int32 index = 1;
2404 	vm_area *area;
2405 	addr_t num;
2406 
2407 	if (argc < 2) {
2408 		kprintf("usage: area [-m] <id|address|name>\n");
2409 		return 0;
2410 	}
2411 
2412 	if (!strcmp(argv[1], "-m")) {
2413 		mappings = true;
2414 		index++;
2415 	}
2416 
2417 	num = strtoul(argv[index], NULL, 0);
2418 
2419 	// walk through the area list, looking for the arguments as a name
2420 	struct hash_iterator iter;
2421 
2422 	hash_open(sAreaHash, &iter);
2423 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
2424 		if ((area->name != NULL && !strcmp(argv[index], area->name))
2425 			|| num != 0
2426 				&& ((addr_t)area->id == num
2427 					|| area->base <= num && area->base + area->size > num)) {
2428 			dump_area_struct(area, mappings);
2429 			found = true;
2430 		}
2431 	}
2432 
2433 	if (!found)
2434 		kprintf("could not find area %s (%ld)\n", argv[index], num);
2435 	return 0;
2436 }
2437 
2438 
2439 static int
2440 dump_area_list(int argc, char **argv)
2441 {
2442 	vm_area *area;
2443 	struct hash_iterator iter;
2444 	const char *name = NULL;
2445 	int32 id = 0;
2446 
2447 	if (argc > 1) {
2448 		id = strtoul(argv[1], NULL, 0);
2449 		if (id == 0)
2450 			name = argv[1];
2451 	}
2452 
2453 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
2454 
2455 	hash_open(sAreaHash, &iter);
2456 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
2457 		if (id != 0 && area->address_space->id != id
2458 			|| name != NULL && strstr(area->name, name) == NULL)
2459 			continue;
2460 
2461 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id, (void *)area->base,
2462 			(void *)area->size, area->protection, area->wiring, area->name);
2463 	}
2464 	hash_close(sAreaHash, &iter, false);
2465 	return 0;
2466 }
2467 
2468 
2469 static int
2470 dump_available_memory(int argc, char **argv)
2471 {
2472 	kprintf("Available memory: %Ld/%lu bytes\n",
2473 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
2474 	return 0;
2475 }
2476 
2477 
2478 status_t
2479 vm_delete_areas(struct vm_address_space *addressSpace)
2480 {
2481 	vm_area *area;
2482 	vm_area *next, *last = NULL;
2483 
2484 	TRACE(("vm_delete_areas: called on address space 0x%lx\n", addressSpace->id));
2485 
2486 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
2487 
2488 	// remove all reserved areas in this address space
2489 
2490 	for (area = addressSpace->areas; area; area = next) {
2491 		next = area->address_space_next;
2492 
2493 		if (area->id == RESERVED_AREA_ID) {
2494 			// just remove it
2495 			if (last)
2496 				last->address_space_next = area->address_space_next;
2497 			else
2498 				addressSpace->areas = area->address_space_next;
2499 
2500 			vm_put_address_space(addressSpace);
2501 			free(area);
2502 			continue;
2503 		}
2504 
2505 		last = area;
2506 	}
2507 
2508 	// delete all the areas in this address space
2509 
2510 	for (area = addressSpace->areas; area; area = next) {
2511 		next = area->address_space_next;
2512 
2513 		// decrement the ref on this area, may actually push the ref < 0, if there
2514 		// is a concurrent delete_area() on that specific area, but that's ok here
2515 		if (!_vm_put_area(area, true))
2516 			dprintf("vm_delete_areas() did not delete area %p\n", area);
2517 	}
2518 
2519 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
2520 
2521 	return B_OK;
2522 }
2523 
2524 
2525 static area_id
2526 vm_area_for(team_id team, addr_t address)
2527 {
2528 	vm_address_space *addressSpace;
2529 	area_id id = B_ERROR;
2530 
2531 	addressSpace = vm_get_address_space_by_id(team);
2532 	if (addressSpace == NULL)
2533 		return B_BAD_TEAM_ID;
2534 
2535 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
2536 
2537 	vm_area *area = vm_area_lookup(addressSpace, address);
2538 	if (area != NULL)
2539 		id = area->id;
2540 
2541 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2542 	vm_put_address_space(addressSpace);
2543 
2544 	return id;
2545 }
2546 
2547 
2548 /*!
2549 	Frees physical pages that were used during the boot process.
2550 */
2551 static void
2552 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
2553 {
2554 	// free all physical pages in the specified range
2555 
2556 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
2557 		addr_t physicalAddress;
2558 		uint32 flags;
2559 
2560 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
2561 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
2562 			if (page != NULL)
2563 				vm_page_set_state(page, PAGE_STATE_FREE);
2564 		}
2565 	}
2566 
2567 	// unmap the memory
2568 	map->ops->unmap(map, start, end - 1);
2569 }
2570 
2571 
2572 void
2573 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
2574 {
2575 	vm_translation_map *map = &vm_kernel_address_space()->translation_map;
2576 	addr_t end = start + size;
2577 	addr_t lastEnd = start;
2578 	vm_area *area;
2579 
2580 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
2581 
2582 	// The areas are sorted in virtual address space order, so
2583 	// we just have to find the holes between them that fall
2584 	// into the area we should dispose
2585 
2586 	map->ops->lock(map);
2587 
2588 	for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) {
2589 		addr_t areaStart = area->base;
2590 		addr_t areaEnd = areaStart + area->size;
2591 
2592 		if (area->id == RESERVED_AREA_ID)
2593 			continue;
2594 
2595 		if (areaEnd >= end) {
2596 			// we are done, the areas are already beyond of what we have to free
2597 			lastEnd = end;
2598 			break;
2599 		}
2600 
2601 		if (areaStart > lastEnd) {
2602 			// this is something we can free
2603 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
2604 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
2605 		}
2606 
2607 		lastEnd = areaEnd;
2608 	}
2609 
2610 	if (lastEnd < end) {
2611 		// we can also get rid of some space at the end of the area
2612 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
2613 		unmap_and_free_physical_pages(map, lastEnd, end);
2614 	}
2615 
2616 	map->ops->unlock(map);
2617 }
2618 
2619 
2620 static void
2621 create_preloaded_image_areas(struct preloaded_image *image)
2622 {
2623 	char name[B_OS_NAME_LENGTH];
2624 	void *address;
2625 	int32 length;
2626 
2627 	// use file name to create a good area name
2628 	char *fileName = strrchr(image->name, '/');
2629 	if (fileName == NULL)
2630 		fileName = image->name;
2631 	else
2632 		fileName++;
2633 
2634 	length = strlen(fileName);
2635 	// make sure there is enough space for the suffix
2636 	if (length > 25)
2637 		length = 25;
2638 
2639 	memcpy(name, fileName, length);
2640 	strcpy(name + length, "_text");
2641 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
2642 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2643 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
2644 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2645 		// this will later be remapped read-only/executable by the
2646 		// ELF initialization code
2647 
2648 	strcpy(name + length, "_data");
2649 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
2650 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2651 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
2652 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2653 }
2654 
2655 
2656 /**	Frees all previously kernel arguments areas from the kernel_args structure.
2657  *	Any boot loader resources contained in that arguments must not be accessed
2658  *	anymore past this point.
2659  */
2660 
2661 void
2662 vm_free_kernel_args(kernel_args *args)
2663 {
2664 	uint32 i;
2665 
2666 	TRACE(("vm_free_kernel_args()\n"));
2667 
2668 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2669 		area_id area = area_for((void *)args->kernel_args_range[i].start);
2670 		if (area >= B_OK)
2671 			delete_area(area);
2672 	}
2673 }
2674 
2675 
2676 static void
2677 allocate_kernel_args(kernel_args *args)
2678 {
2679 	uint32 i;
2680 
2681 	TRACE(("allocate_kernel_args()\n"));
2682 
2683 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2684 		void *address = (void *)args->kernel_args_range[i].start;
2685 
2686 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
2687 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2688 	}
2689 }
2690 
2691 
2692 static void
2693 unreserve_boot_loader_ranges(kernel_args *args)
2694 {
2695 	uint32 i;
2696 
2697 	TRACE(("unreserve_boot_loader_ranges()\n"));
2698 
2699 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2700 		vm_unreserve_address_range(vm_kernel_address_space_id(),
2701 			(void *)args->virtual_allocated_range[i].start,
2702 			args->virtual_allocated_range[i].size);
2703 	}
2704 }
2705 
2706 
2707 static void
2708 reserve_boot_loader_ranges(kernel_args *args)
2709 {
2710 	uint32 i;
2711 
2712 	TRACE(("reserve_boot_loader_ranges()\n"));
2713 
2714 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2715 		void *address = (void *)args->virtual_allocated_range[i].start;
2716 
2717 		// If the address is no kernel address, we just skip it. The
2718 		// architecture specific code has to deal with it.
2719 		if (!IS_KERNEL_ADDRESS(address)) {
2720 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
2721 				address, args->virtual_allocated_range[i].size);
2722 			continue;
2723 		}
2724 
2725 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address,
2726 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
2727 		if (status < B_OK)
2728 			panic("could not reserve boot loader ranges\n");
2729 	}
2730 }
2731 
2732 
2733 static addr_t
2734 allocate_early_virtual(kernel_args *args, size_t size)
2735 {
2736 	addr_t spot = 0;
2737 	uint32 i;
2738 	int last_valloc_entry = 0;
2739 
2740 	size = PAGE_ALIGN(size);
2741 	// find a slot in the virtual allocation addr range
2742 	for (i = 1; i < args->num_virtual_allocated_ranges; i++) {
2743 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
2744 			+ args->virtual_allocated_range[i - 1].size;
2745 		last_valloc_entry = i;
2746 		// check to see if the space between this one and the last is big enough
2747 		if (previousRangeEnd >= KERNEL_BASE
2748 			&& args->virtual_allocated_range[i].start
2749 				- previousRangeEnd >= size) {
2750 			spot = previousRangeEnd;
2751 			args->virtual_allocated_range[i - 1].size += size;
2752 			goto out;
2753 		}
2754 	}
2755 	if (spot == 0) {
2756 		// we hadn't found one between allocation ranges. this is ok.
2757 		// see if there's a gap after the last one
2758 		addr_t lastRangeEnd
2759 			= args->virtual_allocated_range[last_valloc_entry].start
2760 				+ args->virtual_allocated_range[last_valloc_entry].size;
2761 		if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) {
2762 			spot = lastRangeEnd;
2763 			args->virtual_allocated_range[last_valloc_entry].size += size;
2764 			goto out;
2765 		}
2766 		// see if there's a gap before the first one
2767 		if (args->virtual_allocated_range[0].start > KERNEL_BASE) {
2768 			if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) {
2769 				args->virtual_allocated_range[0].start -= size;
2770 				spot = args->virtual_allocated_range[0].start;
2771 				goto out;
2772 			}
2773 		}
2774 	}
2775 
2776 out:
2777 	return spot;
2778 }
2779 
2780 
2781 static bool
2782 is_page_in_physical_memory_range(kernel_args *args, addr_t address)
2783 {
2784 	// TODO: horrible brute-force method of determining if the page can be allocated
2785 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
2786 		if (address >= args->physical_memory_range[i].start
2787 			&& address < args->physical_memory_range[i].start
2788 				+ args->physical_memory_range[i].size)
2789 			return true;
2790 	}
2791 	return false;
2792 }
2793 
2794 
2795 static addr_t
2796 allocate_early_physical_page(kernel_args *args)
2797 {
2798 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
2799 		addr_t nextPage;
2800 
2801 		nextPage = args->physical_allocated_range[i].start
2802 			+ args->physical_allocated_range[i].size;
2803 		// see if the page after the next allocated paddr run can be allocated
2804 		if (i + 1 < args->num_physical_allocated_ranges
2805 			&& args->physical_allocated_range[i + 1].size != 0) {
2806 			// see if the next page will collide with the next allocated range
2807 			if (nextPage >= args->physical_allocated_range[i+1].start)
2808 				continue;
2809 		}
2810 		// see if the next physical page fits in the memory block
2811 		if (is_page_in_physical_memory_range(args, nextPage)) {
2812 			// we got one!
2813 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
2814 			return nextPage / B_PAGE_SIZE;
2815 		}
2816 	}
2817 
2818 	return 0;
2819 		// could not allocate a block
2820 }
2821 
2822 
2823 /*!
2824 	This one uses the kernel_args' physical and virtual memory ranges to
2825 	allocate some pages before the VM is completely up.
2826 */
2827 addr_t
2828 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize,
2829 	uint32 attributes)
2830 {
2831 	if (physicalSize > virtualSize)
2832 		physicalSize = virtualSize;
2833 
2834 	// find the vaddr to allocate at
2835 	addr_t virtualBase = allocate_early_virtual(args, virtualSize);
2836 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
2837 
2838 	// map the pages
2839 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
2840 		addr_t physicalAddress = allocate_early_physical_page(args);
2841 		if (physicalAddress == 0)
2842 			panic("error allocating early page!\n");
2843 
2844 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
2845 
2846 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
2847 			physicalAddress * B_PAGE_SIZE, attributes,
2848 			&allocate_early_physical_page);
2849 	}
2850 
2851 	return virtualBase;
2852 }
2853 
2854 
2855 status_t
2856 vm_init(kernel_args *args)
2857 {
2858 	struct preloaded_image *image;
2859 	void *address;
2860 	status_t err = 0;
2861 	uint32 i;
2862 
2863 	TRACE(("vm_init: entry\n"));
2864 	err = arch_vm_translation_map_init(args);
2865 	err = arch_vm_init(args);
2866 
2867 	// initialize some globals
2868 	sNextAreaID = 1;
2869 	sAreaHashLock = -1;
2870 	sAvailableMemoryLock.sem = -1;
2871 
2872 	vm_page_init_num_pages(args);
2873 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
2874 
2875 	// reduce the heap size if we have not so much RAM
2876 	size_t heapSize = HEAP_SIZE;
2877 	if (sAvailableMemory < 100 * 1024 * 1024)
2878 		heapSize /= 4;
2879 	else if (sAvailableMemory < 200 * 1024 * 1024)
2880 		heapSize /= 2;
2881 
2882 	// map in the new heap and initialize it
2883 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
2884 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2885 	TRACE(("heap at 0x%lx\n", heapBase));
2886 	heap_init(heapBase, heapSize);
2887 
2888 	size_t slabInitialSize = 2 * B_PAGE_SIZE;
2889 	addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize,
2890 		slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2891 	slab_init(args, slabInitialBase, slabInitialSize);
2892 
2893 	// initialize the free page list and physical page mapper
2894 	vm_page_init(args);
2895 
2896 	// initialize the hash table that stores the pages mapped to caches
2897 	vm_cache_init(args);
2898 
2899 	{
2900 		vm_area *area;
2901 		sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area,
2902 			&area_compare, &area_hash);
2903 		if (sAreaHash == NULL)
2904 			panic("vm_init: error creating aspace hash table\n");
2905 	}
2906 
2907 	vm_address_space_init();
2908 	reserve_boot_loader_ranges(args);
2909 
2910 	// do any further initialization that the architecture dependant layers may need now
2911 	arch_vm_translation_map_init_post_area(args);
2912 	arch_vm_init_post_area(args);
2913 	vm_page_init_post_area(args);
2914 
2915 	// allocate areas to represent stuff that already exists
2916 
2917 	address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE);
2918 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
2919 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2920 
2921 	address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE);
2922 	create_area("initial slab space", &address, B_EXACT_ADDRESS,
2923 		slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA
2924 		| B_KERNEL_WRITE_AREA);
2925 
2926 	allocate_kernel_args(args);
2927 
2928 	args->kernel_image.name = "kernel";
2929 		// the lazy boot loader currently doesn't set the kernel's name...
2930 	create_preloaded_image_areas(&args->kernel_image);
2931 
2932 	// allocate areas for preloaded images
2933 	for (image = args->preloaded_images; image != NULL; image = image->next) {
2934 		create_preloaded_image_areas(image);
2935 	}
2936 
2937 	// allocate kernel stacks
2938 	for (i = 0; i < args->num_cpus; i++) {
2939 		char name[64];
2940 
2941 		sprintf(name, "idle thread %lu kstack", i + 1);
2942 		address = (void *)args->cpu_kstack[i].start;
2943 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
2944 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2945 	}
2946 
2947 	// add some debugger commands
2948 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
2949 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
2950 	add_debugger_command("cache_ref", &dump_cache, "Dump vm_cache");
2951 	add_debugger_command("cache", &dump_cache, "Dump vm_cache");
2952 	add_debugger_command("cache_chain", &dump_cache_chain, "Dump vm_cache chain");
2953 	add_debugger_command("avail", &dump_available_memory, "Dump available memory");
2954 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
2955 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
2956 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
2957 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
2958 
2959 	TRACE(("vm_init: exit\n"));
2960 
2961 	return err;
2962 }
2963 
2964 
2965 status_t
2966 vm_init_post_sem(kernel_args *args)
2967 {
2968 	vm_area *area;
2969 
2970 	// This frees all unused boot loader resources and makes its space available again
2971 	arch_vm_init_end(args);
2972 	unreserve_boot_loader_ranges(args);
2973 
2974 	// fill in all of the semaphores that were not allocated before
2975 	// since we're still single threaded and only the kernel address space exists,
2976 	// it isn't that hard to find all of the ones we need to create
2977 
2978 	benaphore_init(&sAvailableMemoryLock, "available memory lock");
2979 	arch_vm_translation_map_init_post_sem(args);
2980 	vm_address_space_init_post_sem();
2981 
2982 	for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) {
2983 		if (area->id == RESERVED_AREA_ID)
2984 			continue;
2985 
2986 		if (area->cache_ref->lock.sem < 0)
2987 			mutex_init(&area->cache_ref->lock, "cache_ref_mutex");
2988 	}
2989 
2990 	sAreaHashLock = create_sem(WRITE_COUNT, "area hash");
2991 
2992 	slab_init_post_sem();
2993 
2994 	return heap_init_post_sem(args);
2995 }
2996 
2997 
2998 status_t
2999 vm_init_post_thread(kernel_args *args)
3000 {
3001 	vm_page_init_post_thread(args);
3002 	vm_daemon_init();
3003 	vm_low_memory_init();
3004 
3005 	return heap_init_post_thread(args);
3006 }
3007 
3008 
3009 status_t
3010 vm_init_post_modules(kernel_args *args)
3011 {
3012 	return arch_vm_init_post_modules(args);
3013 }
3014 
3015 
3016 void
3017 permit_page_faults(void)
3018 {
3019 	struct thread *thread = thread_get_current_thread();
3020 	if (thread != NULL)
3021 		atomic_add(&thread->page_faults_allowed, 1);
3022 }
3023 
3024 
3025 void
3026 forbid_page_faults(void)
3027 {
3028 	struct thread *thread = thread_get_current_thread();
3029 	if (thread != NULL)
3030 		atomic_add(&thread->page_faults_allowed, -1);
3031 }
3032 
3033 
3034 status_t
3035 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3036 	addr_t *newIP)
3037 {
3038 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, faultAddress));
3039 
3040 	*newIP = 0;
3041 
3042 	status_t status = vm_soft_fault(address, isWrite, isUser);
3043 	if (status < B_OK) {
3044 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3045 			strerror(status), address, faultAddress, isWrite, isUser,
3046 			thread_get_current_thread_id());
3047 		if (!isUser) {
3048 			struct thread *thread = thread_get_current_thread();
3049 			if (thread != NULL && thread->fault_handler != 0) {
3050 				// this will cause the arch dependant page fault handler to
3051 				// modify the IP on the interrupt frame or whatever to return
3052 				// to this address
3053 				*newIP = thread->fault_handler;
3054 			} else {
3055 				// unhandled page fault in the kernel
3056 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
3057 					address, faultAddress);
3058 			}
3059 		} else {
3060 #if 1
3061 			// ToDo: remove me once we have proper userland debugging support (and tools)
3062 			vm_address_space *addressSpace = vm_get_current_user_address_space();
3063 			vm_area *area;
3064 
3065 			acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3066 			area = vm_area_lookup(addressSpace, faultAddress);
3067 
3068 			dprintf("vm_page_fault: sending team \"%s\" 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n",
3069 				thread_get_current_thread()->team->name,
3070 				thread_get_current_thread()->team->id, faultAddress,
3071 				area ? area->name : "???", faultAddress - (area ? area->base : 0x0));
3072 
3073 			// We can print a stack trace of the userland thread here.
3074 #if 1
3075 			if (area) {
3076 				struct stack_frame {
3077 					#if defined(__INTEL__) || defined(__POWERPC__)
3078 						struct stack_frame*	previous;
3079 						void*				return_address;
3080 					#else
3081 						// ...
3082 					#endif
3083 				} frame;
3084 #ifdef __INTEL__
3085 				struct iframe *iframe = i386_get_user_iframe();
3086 				if (iframe == NULL)
3087 					panic("iframe is NULL!");
3088 
3089 				status_t status = user_memcpy(&frame, (void *)iframe->ebp,
3090 					sizeof(struct stack_frame));
3091 #elif defined(__POWERPC__)
3092 				struct iframe *iframe = ppc_get_user_iframe();
3093 				if (iframe == NULL)
3094 					panic("iframe is NULL!");
3095 
3096 				status_t status = user_memcpy(&frame, (void *)iframe->r1,
3097 					sizeof(struct stack_frame));
3098 #else
3099 #	warn "vm_page_fault() stack trace won't work"
3100 				status = B_ERROR;
3101 #endif
3102 
3103 				dprintf("stack trace:\n");
3104 				while (status == B_OK) {
3105 					dprintf("  %p", frame.return_address);
3106 					area = vm_area_lookup(addressSpace,
3107 						(addr_t)frame.return_address);
3108 					if (area) {
3109 						dprintf(" (%s + %#lx)", area->name,
3110 							(addr_t)frame.return_address - area->base);
3111 					}
3112 					dprintf("\n");
3113 
3114 					status = user_memcpy(&frame, frame.previous,
3115 						sizeof(struct stack_frame));
3116 				}
3117 			}
3118 #endif	// 0 (stack trace)
3119 
3120 			release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3121 			vm_put_address_space(addressSpace);
3122 #endif
3123 			if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV))
3124 				send_signal(team_get_current_team_id(), SIGSEGV);
3125 		}
3126 	}
3127 
3128 	return B_HANDLED_INTERRUPT;
3129 }
3130 
3131 
3132 static inline status_t
3133 fault_acquire_locked_source(vm_cache *cache, vm_cache_ref **_sourceRef)
3134 {
3135 retry:
3136 	vm_cache *source = cache->source;
3137 	if (source == NULL)
3138 		return B_ERROR;
3139 	if (source->busy)
3140 		return B_BUSY;
3141 
3142 	vm_cache_ref *sourceRef = source->ref;
3143 	vm_cache_acquire_ref(sourceRef);
3144 
3145 	mutex_lock(&sourceRef->lock);
3146 
3147 	if (sourceRef->cache != cache->source || sourceRef->cache->busy) {
3148 		mutex_unlock(&sourceRef->lock);
3149 		vm_cache_release_ref(sourceRef);
3150 		goto retry;
3151 	}
3152 
3153 	*_sourceRef = sourceRef;
3154 	return B_OK;
3155 }
3156 
3157 
3158 /*!
3159 	Inserts a busy dummy page into a cache, and makes sure the cache won't go
3160 	away by grabbing a reference to it.
3161 */
3162 static inline void
3163 fault_insert_dummy_page(vm_cache_ref *cacheRef, vm_page &dummyPage, off_t cacheOffset)
3164 {
3165 	dummyPage.state = PAGE_STATE_BUSY;
3166 	vm_cache_acquire_ref(cacheRef);
3167 	vm_cache_insert_page(cacheRef, &dummyPage, cacheOffset);
3168 }
3169 
3170 
3171 /*!
3172 	Removes the busy dummy page from a cache, and releases its reference to
3173 	the cache.
3174 */
3175 static inline void
3176 fault_remove_dummy_page(vm_page &dummyPage, bool isLocked)
3177 {
3178 	vm_cache_ref *cacheRef = dummyPage.cache->ref;
3179 	if (!isLocked)
3180 		mutex_lock(&cacheRef->lock);
3181 
3182 	vm_cache_remove_page(cacheRef, &dummyPage);
3183 
3184 	if (!isLocked)
3185 		mutex_unlock(&cacheRef->lock);
3186 
3187 	vm_cache_release_ref(cacheRef);
3188 
3189 	dummyPage.state = PAGE_STATE_INACTIVE;
3190 }
3191 
3192 
3193 /*!
3194 	Finds a page at the specified \a cacheOffset in either the \a topCacheRef
3195 	or in its source chain. Will also page in a missing page in case there is
3196 	a cache that has the page.
3197 	If it couldn't find a page, it will return the vm_cache that should get it,
3198 	otherwise, it will return the vm_cache that contains the cache.
3199 	It always grabs a reference to the vm_cache that it returns, and also locks it.
3200 */
3201 static inline vm_page *
3202 fault_find_page(vm_translation_map *map, vm_cache_ref *topCacheRef,
3203 	off_t cacheOffset, bool isWrite, vm_page &dummyPage, vm_cache_ref **_pageRef)
3204 {
3205 	vm_cache_ref *cacheRef = topCacheRef;
3206 	vm_cache_ref *lastCacheRef = NULL;
3207 	vm_page *page = NULL;
3208 
3209 	vm_cache_acquire_ref(cacheRef);
3210 	mutex_lock(&cacheRef->lock);
3211 		// we release this later in the loop
3212 
3213 	while (cacheRef != NULL) {
3214 		if (lastCacheRef != NULL)
3215 			vm_cache_release_ref(lastCacheRef);
3216 
3217 		// we hold the lock of the cacheRef at this point
3218 
3219 		lastCacheRef = cacheRef;
3220 
3221 		for (;;) {
3222 			page = vm_cache_lookup_page(cacheRef, cacheOffset);
3223 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
3224 				vm_page_set_state(page, PAGE_STATE_BUSY);
3225 				break;
3226 			}
3227 			if (page == NULL || page == &dummyPage)
3228 				break;
3229 
3230 			// page must be busy
3231 			// ToDo: don't wait forever!
3232 			mutex_unlock(&cacheRef->lock);
3233 			snooze(20000);
3234 			mutex_lock(&cacheRef->lock);
3235 		}
3236 
3237 		if (page != NULL && page != &dummyPage)
3238 			break;
3239 
3240 		// The current cache does not contain the page we're looking for
3241 
3242 		// If we're at the top most cache, insert the dummy page here to keep other threads
3243 		// from faulting on the same address and chasing us up the cache chain
3244 		if (cacheRef == topCacheRef && dummyPage.state != PAGE_STATE_BUSY)
3245 			fault_insert_dummy_page(cacheRef, dummyPage, cacheOffset);
3246 
3247 		// see if the vm_store has it
3248 		vm_store *store = cacheRef->cache->store;
3249 		if (store->ops->has_page != NULL && store->ops->has_page(store, cacheOffset)) {
3250 			size_t bytesRead;
3251 			iovec vec;
3252 
3253 			vec.iov_len = bytesRead = B_PAGE_SIZE;
3254 
3255 			mutex_unlock(&cacheRef->lock);
3256 
3257 			page = vm_page_allocate_page(PAGE_STATE_FREE);
3258 
3259 			dummyPage.queue_next = page;
3260 			dummyPage.busy_reading = true;
3261 				// we mark that page busy reading, so that the file cache can ignore
3262 				// us in case it works on the very same page
3263 
3264 			map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE, (addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT);
3265 			status_t status = store->ops->read(store, cacheOffset, &vec, 1, &bytesRead, false);
3266 			if (status < B_OK) {
3267 				// TODO: real error handling!
3268 				panic("reading from store %p (cacheRef %p) returned: %s!\n", store, cacheRef, strerror(status));
3269 			}
3270 			map->ops->put_physical_page((addr_t)vec.iov_base);
3271 
3272 			mutex_lock(&cacheRef->lock);
3273 
3274 			if (cacheRef == topCacheRef)
3275 				fault_remove_dummy_page(dummyPage, true);
3276 
3277 			// We insert the queue_next here, because someone else could have
3278 			// replaced our page
3279 			vm_cache_insert_page(cacheRef, dummyPage.queue_next, cacheOffset);
3280 
3281 			if (dummyPage.queue_next != page) {
3282 				// Indeed, the page got replaced by someone else - we can safely
3283 				// throw our page away now
3284 				vm_page_set_state(page, PAGE_STATE_FREE);
3285 				page = dummyPage.queue_next;
3286 			}
3287 			break;
3288 		}
3289 
3290 		vm_cache_ref *nextCacheRef;
3291 		status_t status = fault_acquire_locked_source(cacheRef->cache, &nextCacheRef);
3292 		if (status == B_BUSY) {
3293 			// the source cache is currently in the process of being merged
3294 			// with his only consumer (cacheRef); since its pages are moved
3295 			// upwards, too, we try this cache again
3296 			mutex_unlock(&cacheRef->lock);
3297 			mutex_lock(&cacheRef->lock);
3298 			lastCacheRef = NULL;
3299 			continue;
3300 		} else if (status < B_OK)
3301 			nextCacheRef = NULL;
3302 
3303 		mutex_unlock(&cacheRef->lock);
3304 			// at this point, we still hold a ref to this cache (through lastCacheRef)
3305 
3306 		cacheRef = nextCacheRef;
3307 	}
3308 
3309 	if (page == NULL) {
3310 		// there was no adequate page, determine the cache for a clean one
3311 		if (cacheRef == NULL) {
3312 			// We rolled off the end of the cache chain, so we need to decide which
3313 			// cache will get the new page we're about to create.
3314 			cacheRef = isWrite ? topCacheRef : lastCacheRef;
3315 				// Read-only pages come in the deepest cache - only the
3316 				// top most cache may have direct write access.
3317 			vm_cache_acquire_ref(cacheRef);
3318 			mutex_lock(&cacheRef->lock);
3319 		}
3320 
3321 		// release the reference of the last vm_cache_ref we still have from the loop above
3322 		if (lastCacheRef != NULL)
3323 			vm_cache_release_ref(lastCacheRef);
3324 	} else {
3325 		// we still own a reference to the cacheRef
3326 	}
3327 
3328 	*_pageRef = cacheRef;
3329 	return page;
3330 }
3331 
3332 
3333 /*!
3334 	Returns the page that should be mapped into the area that got the fault.
3335 	It returns the owner of the page in \a sourceRef - it keeps a reference
3336 	to it, and has also locked it on exit.
3337 */
3338 static inline vm_page *
3339 fault_get_page(vm_translation_map *map, vm_cache_ref *topCacheRef,
3340 	off_t cacheOffset, bool isWrite, vm_page &dummyPage, vm_cache_ref **_sourceRef)
3341 {
3342 	vm_cache_ref *cacheRef;
3343 	vm_page *page = fault_find_page(map, topCacheRef, cacheOffset, isWrite,
3344 		dummyPage, &cacheRef);
3345 	if (page == NULL) {
3346 		// we still haven't found a page, so we allocate a clean one
3347 
3348 		page = vm_page_allocate_page(PAGE_STATE_CLEAR);
3349 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number));
3350 
3351 		// Insert the new page into our cache, and replace it with the dummy page if necessary
3352 
3353 		// if we inserted a dummy page into this cache, we have to remove it now
3354 		if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cacheRef->cache)
3355 			fault_remove_dummy_page(dummyPage, true);
3356 
3357 		vm_cache_insert_page(cacheRef, page, cacheOffset);
3358 
3359 		if (dummyPage.state == PAGE_STATE_BUSY) {
3360 			// we had inserted the dummy cache in another cache, so let's remove it from there
3361 			fault_remove_dummy_page(dummyPage, false);
3362 		}
3363 	}
3364 
3365 	// We now have the page and a cache it belongs to - we now need to make
3366 	// sure that the area's cache can access it, too, and sees the correct data
3367 
3368 	if (page->cache != topCacheRef->cache && isWrite) {
3369 		// now we have a page that has the data we want, but in the wrong cache object
3370 		// so we need to copy it and stick it into the top cache
3371 		vm_page *sourcePage = page;
3372 		void *source, *dest;
3373 
3374 		// ToDo: if memory is low, it might be a good idea to steal the page
3375 		//	from our source cache - if possible, that is
3376 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
3377 		page = vm_page_allocate_page(PAGE_STATE_FREE);
3378 
3379 		// try to get a mapping for the src and dest page so we can copy it
3380 		for (;;) {
3381 			map->ops->get_physical_page(sourcePage->physical_page_number * B_PAGE_SIZE,
3382 				(addr_t *)&source, PHYSICAL_PAGE_CAN_WAIT);
3383 
3384 			if (map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE,
3385 					(addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT) == B_OK)
3386 				break;
3387 
3388 			// it couldn't map the second one, so sleep and retry
3389 			// keeps an extremely rare deadlock from occuring
3390 			map->ops->put_physical_page((addr_t)source);
3391 			snooze(5000);
3392 		}
3393 
3394 		memcpy(dest, source, B_PAGE_SIZE);
3395 		map->ops->put_physical_page((addr_t)source);
3396 		map->ops->put_physical_page((addr_t)dest);
3397 
3398 		vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE);
3399 
3400 		mutex_unlock(&cacheRef->lock);
3401 		mutex_lock(&topCacheRef->lock);
3402 
3403 		// Insert the new page into our cache, and replace it with the dummy page if necessary
3404 
3405 		// if we inserted a dummy page into this cache, we have to remove it now
3406 		if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == topCacheRef->cache)
3407 			fault_remove_dummy_page(dummyPage, true);
3408 
3409 		vm_cache_insert_page(topCacheRef, page, cacheOffset);
3410 
3411 		if (dummyPage.state == PAGE_STATE_BUSY) {
3412 			// we had inserted the dummy cache in another cache, so let's remove it from there
3413 			fault_remove_dummy_page(dummyPage, false);
3414 		}
3415 
3416 		vm_cache_release_ref(cacheRef);
3417 
3418 		cacheRef = topCacheRef;
3419 		vm_cache_acquire_ref(cacheRef);
3420 	}
3421 
3422 	*_sourceRef = cacheRef;
3423 	return page;
3424 }
3425 
3426 
3427 static status_t
3428 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser)
3429 {
3430 	vm_address_space *addressSpace;
3431 
3432 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
3433 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
3434 
3435 	addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
3436 
3437 	if (IS_KERNEL_ADDRESS(address)) {
3438 		addressSpace = vm_get_kernel_address_space();
3439 	} else if (IS_USER_ADDRESS(address)) {
3440 		addressSpace = vm_get_current_user_address_space();
3441 		if (addressSpace == NULL) {
3442 			if (!isUser) {
3443 				dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n");
3444 				return B_BAD_ADDRESS;
3445 			} else {
3446 				// XXX weird state.
3447 				panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n");
3448 			}
3449 		}
3450 	} else {
3451 		// the hit was probably in the 64k DMZ between kernel and user space
3452 		// this keeps a user space thread from passing a buffer that crosses
3453 		// into kernel space
3454 		return B_BAD_ADDRESS;
3455 	}
3456 
3457 	atomic_add(&addressSpace->fault_count, 1);
3458 
3459 	// Get the area the fault was in
3460 
3461 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3462 
3463 	vm_area *area = vm_area_lookup(addressSpace, address);
3464 	if (area == NULL) {
3465 		release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3466 		vm_put_address_space(addressSpace);
3467 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n",
3468 			originalAddress);
3469 		return B_BAD_ADDRESS;
3470 	}
3471 
3472 	// check permissions
3473 	if (isUser && (area->protection & B_USER_PROTECTION) == 0) {
3474 		release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3475 		vm_put_address_space(addressSpace);
3476 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
3477 		return B_PERMISSION_DENIED;
3478 	}
3479 	if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
3480 		release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3481 		vm_put_address_space(addressSpace);
3482 		dprintf("write access attempted on read-only area 0x%lx at %p\n",
3483 			area->id, (void *)originalAddress);
3484 		return B_PERMISSION_DENIED;
3485 	}
3486 
3487 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
3488 	// At first, the top most cache from the area is investigated
3489 
3490 	vm_cache_ref *topCacheRef = area->cache_ref;
3491 	off_t cacheOffset = address - area->base + area->cache_offset;
3492 	int32 changeCount = addressSpace->change_count;
3493 
3494 	vm_cache_acquire_ref(topCacheRef);
3495 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3496 
3497 	mutex_lock(&topCacheRef->lock);
3498 
3499 	// See if this cache has a fault handler - this will do all the work for us
3500 	{
3501 		vm_store *store = topCacheRef->cache->store;
3502 		if (store->ops->fault != NULL) {
3503 			// Note, since the page fault is resolved with interrupts enabled, the
3504 			// fault handler could be called more than once for the same reason -
3505 			// the store must take this into account
3506 			status_t status = store->ops->fault(store, addressSpace, cacheOffset);
3507 			if (status != B_BAD_HANDLER) {
3508 				mutex_unlock(&topCacheRef->lock);
3509 				vm_cache_release_ref(topCacheRef);
3510 				vm_put_address_space(addressSpace);
3511 				return status;
3512 			}
3513 		}
3514 	}
3515 
3516 	mutex_unlock(&topCacheRef->lock);
3517 
3518 	// The top most cache has no fault handler, so let's see if the cache or its sources
3519 	// already have the page we're searching for (we're going from top to bottom)
3520 
3521 	vm_translation_map *map = &addressSpace->translation_map;
3522 	vm_page dummyPage;
3523 	dummyPage.cache = NULL;
3524 	dummyPage.state = PAGE_STATE_INACTIVE;
3525 	dummyPage.type = PAGE_TYPE_DUMMY;
3526 	dummyPage.wired_count = 0;
3527 
3528 	vm_cache_ref *pageSourceRef;
3529 	vm_page *page = fault_get_page(map, topCacheRef, cacheOffset, isWrite,
3530 		dummyPage, &pageSourceRef);
3531 
3532 	status_t status = B_OK;
3533 
3534 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3535 	if (changeCount != addressSpace->change_count) {
3536 		// something may have changed, see if the address is still valid
3537 		area = vm_area_lookup(addressSpace, address);
3538 		if (area == NULL
3539 			|| area->cache_ref != topCacheRef
3540 			|| (address - area->base + area->cache_offset) != cacheOffset) {
3541 			dprintf("vm_soft_fault: address space layout changed effecting ongoing soft fault\n");
3542 			status = B_BAD_ADDRESS;
3543 		}
3544 	}
3545 
3546 	if (status == B_OK) {
3547 		// All went fine, all there is left to do is to map the page into the address space
3548 
3549 		// In case this is a copy-on-write page, we need to unmap it from the area now
3550 		if (isWrite && page->cache == topCacheRef->cache)
3551 			vm_unmap_pages(area, address, B_PAGE_SIZE);
3552 
3553 		// TODO: there is currently no mechanism to prevent a page being mapped
3554 		//	more than once in case of a second page fault!
3555 
3556 		// If the page doesn't reside in the area's cache, we need to make sure it's
3557 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
3558 		uint32 newProtection = area->protection;
3559 		if (page->cache != topCacheRef->cache && !isWrite)
3560 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
3561 
3562 		vm_map_page(area, page, address, newProtection);
3563 	}
3564 
3565 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3566 
3567 	mutex_unlock(&pageSourceRef->lock);
3568 	vm_cache_release_ref(pageSourceRef);
3569 
3570 	if (dummyPage.state == PAGE_STATE_BUSY) {
3571 		// We still have the dummy page in the cache - that happens if we didn't need
3572 		// to allocate a new page before, but could use one in another cache
3573 		fault_remove_dummy_page(dummyPage, false);
3574 	}
3575 
3576 	vm_cache_release_ref(topCacheRef);
3577 	vm_put_address_space(addressSpace);
3578 
3579 	return status;
3580 }
3581 
3582 
3583 /*! You must have the address space's sem held */
3584 vm_area *
3585 vm_area_lookup(vm_address_space *addressSpace, addr_t address)
3586 {
3587 	vm_area *area;
3588 
3589 	// check the areas list first
3590 	area = addressSpace->area_hint;
3591 	if (area && area->base <= address && area->base + (area->size - 1) >= address)
3592 		goto found;
3593 
3594 	for (area = addressSpace->areas; area != NULL; area = area->address_space_next) {
3595 		if (area->id == RESERVED_AREA_ID)
3596 			continue;
3597 
3598 		if (area->base <= address && area->base + (area->size - 1) >= address)
3599 			break;
3600 	}
3601 
3602 found:
3603 	// if the ref count is zero, the area is in the middle of being
3604 	// destroyed in _vm_put_area. pretend it doesn't exist.
3605 	if (area && area->ref_count == 0)
3606 		return NULL;
3607 
3608 	if (area)
3609 		addressSpace->area_hint = area;
3610 
3611 	return area;
3612 }
3613 
3614 
3615 status_t
3616 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, uint32 flags)
3617 {
3618 	return (*vm_kernel_address_space()->translation_map.ops->get_physical_page)(paddr, _vaddr, flags);
3619 }
3620 
3621 
3622 status_t
3623 vm_put_physical_page(addr_t vaddr)
3624 {
3625 	return (*vm_kernel_address_space()->translation_map.ops->put_physical_page)(vaddr);
3626 }
3627 
3628 
3629 void
3630 vm_unreserve_memory(size_t amount)
3631 {
3632 	benaphore_lock(&sAvailableMemoryLock);
3633 
3634 	sAvailableMemory += amount;
3635 
3636 	benaphore_unlock(&sAvailableMemoryLock);
3637 }
3638 
3639 
3640 status_t
3641 vm_try_reserve_memory(size_t amount)
3642 {
3643 	status_t status;
3644 	benaphore_lock(&sAvailableMemoryLock);
3645 
3646 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
3647 
3648 	if (sAvailableMemory > amount) {
3649 		sAvailableMemory -= amount;
3650 		status = B_OK;
3651 	} else
3652 		status = B_NO_MEMORY;
3653 
3654 	benaphore_unlock(&sAvailableMemoryLock);
3655 	return status;
3656 }
3657 
3658 
3659 status_t
3660 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
3661 {
3662 	vm_area *area = vm_get_area(id);
3663 	if (area == NULL)
3664 		return B_BAD_VALUE;
3665 
3666 	status_t status = arch_vm_set_memory_type(area, physicalBase, type);
3667 
3668 	vm_put_area(area);
3669 	return status;
3670 }
3671 
3672 
3673 /**	This function enforces some protection properties:
3674  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
3675  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
3676  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
3677  *	   and B_KERNEL_WRITE_AREA.
3678  */
3679 
3680 static void
3681 fix_protection(uint32 *protection)
3682 {
3683 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
3684 		if ((*protection & B_USER_PROTECTION) == 0
3685 			|| (*protection & B_WRITE_AREA) != 0)
3686 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
3687 		else
3688 			*protection |= B_KERNEL_READ_AREA;
3689 	}
3690 }
3691 
3692 
3693 static void
3694 fill_area_info(struct vm_area *area, area_info *info, size_t size)
3695 {
3696 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
3697 	info->area = area->id;
3698 	info->address = (void *)area->base;
3699 	info->size = area->size;
3700 	info->protection = area->protection;
3701 	info->lock = B_FULL_LOCK;
3702 	info->team = area->address_space->id;
3703 	info->copy_count = 0;
3704 	info->in_count = 0;
3705 	info->out_count = 0;
3706 		// ToDo: retrieve real values here!
3707 
3708 	mutex_lock(&area->cache_ref->lock);
3709 
3710 	// Note, this is a simplification; the cache could be larger than this area
3711 	info->ram_size = area->cache_ref->cache->page_count * B_PAGE_SIZE;
3712 
3713 	mutex_unlock(&area->cache_ref->lock);
3714 }
3715 
3716 
3717 /*!
3718 	Tests wether or not the area that contains the specified address
3719 	needs any kind of locking, and actually exists.
3720 	Used by both lock_memory() and unlock_memory().
3721 */
3722 status_t
3723 test_lock_memory(vm_address_space *addressSpace, addr_t address,
3724 	bool &needsLocking)
3725 {
3726 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3727 
3728 	vm_area *area = vm_area_lookup(addressSpace, address);
3729 	if (area != NULL) {
3730 		// This determines if we need to lock the memory at all
3731 		needsLocking = area->cache_type != CACHE_TYPE_NULL
3732 			&& area->cache_type != CACHE_TYPE_DEVICE
3733 			&& area->wiring != B_FULL_LOCK
3734 			&& area->wiring != B_CONTIGUOUS;
3735 	}
3736 
3737 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3738 
3739 	if (area == NULL)
3740 		return B_BAD_ADDRESS;
3741 
3742 	return B_OK;
3743 }
3744 
3745 
3746 //	#pragma mark -
3747 
3748 
3749 status_t
3750 user_memcpy(void *to, const void *from, size_t size)
3751 {
3752 	if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK)
3753 		return B_BAD_ADDRESS;
3754 	return B_OK;
3755 }
3756 
3757 
3758 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
3759  *	the string in \a to, NULL-terminating the result.
3760  *
3761  *	\param to Pointer to the destination C-string.
3762  *	\param from Pointer to the source C-string.
3763  *	\param size Size in bytes of the string buffer pointed to by \a to.
3764  *
3765  *	\return strlen(\a from).
3766  */
3767 
3768 ssize_t
3769 user_strlcpy(char *to, const char *from, size_t size)
3770 {
3771 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
3772 }
3773 
3774 
3775 status_t
3776 user_memset(void *s, char c, size_t count)
3777 {
3778 	if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK)
3779 		return B_BAD_ADDRESS;
3780 	return B_OK;
3781 }
3782 
3783 //	#pragma mark - kernel public API
3784 
3785 
3786 long
3787 lock_memory(void *address, ulong numBytes, ulong flags)
3788 {
3789 	vm_address_space *addressSpace = NULL;
3790 	struct vm_translation_map *map;
3791 	addr_t unalignedBase = (addr_t)address;
3792 	addr_t end = unalignedBase + numBytes;
3793 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
3794 	bool isUser = IS_USER_ADDRESS(address);
3795 	bool needsLocking = true;
3796 
3797 	if (isUser)
3798 		addressSpace = vm_get_current_user_address_space();
3799 	else
3800 		addressSpace = vm_get_kernel_address_space();
3801 	if (addressSpace == NULL)
3802 		return B_ERROR;
3803 
3804 	// test if we're on an area that allows faults at all
3805 
3806 	map = &addressSpace->translation_map;
3807 
3808 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
3809 	if (status < B_OK)
3810 		goto out;
3811 	if (!needsLocking)
3812 		goto out;
3813 
3814 	for (; base < end; base += B_PAGE_SIZE) {
3815 		addr_t physicalAddress;
3816 		uint32 protection;
3817 		status_t status;
3818 
3819 		map->ops->lock(map);
3820 		status = map->ops->query(map, base, &physicalAddress, &protection);
3821 		map->ops->unlock(map);
3822 
3823 		if (status < B_OK)
3824 			goto out;
3825 
3826 		if ((protection & PAGE_PRESENT) != 0) {
3827 			// if B_READ_DEVICE is set, the caller intents to write to the locked
3828 			// memory, so if it hasn't been mapped writable, we'll try the soft
3829 			// fault anyway
3830 			if ((flags & B_READ_DEVICE) == 0
3831 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
3832 				// update wiring
3833 				vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3834 				if (page == NULL)
3835 					panic("couldn't lookup physical page just allocated\n");
3836 
3837 				page->wired_count++;
3838 					// TODO: needs to be atomic on all platforms!
3839 				continue;
3840 			}
3841 		}
3842 
3843 		status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser);
3844 		if (status != B_OK)	{
3845 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
3846 				(void *)unalignedBase, numBytes, flags, strerror(status));
3847 			goto out;
3848 		}
3849 
3850 		map->ops->lock(map);
3851 		status = map->ops->query(map, base, &physicalAddress, &protection);
3852 		map->ops->unlock(map);
3853 
3854 		if (status < B_OK)
3855 			goto out;
3856 
3857 		// update wiring
3858 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3859 		if (page == NULL)
3860 			panic("couldn't lookup physical page");
3861 
3862 		page->wired_count++;
3863 			// TODO: needs to be atomic on all platforms!
3864 	}
3865 
3866 out:
3867 	vm_put_address_space(addressSpace);
3868 	return status;
3869 }
3870 
3871 
3872 long
3873 unlock_memory(void *address, ulong numBytes, ulong flags)
3874 {
3875 	vm_address_space *addressSpace = NULL;
3876 	struct vm_translation_map *map;
3877 	addr_t unalignedBase = (addr_t)address;
3878 	addr_t end = unalignedBase + numBytes;
3879 	addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE);
3880 	bool needsLocking = true;
3881 
3882 	if (IS_USER_ADDRESS(address))
3883 		addressSpace = vm_get_current_user_address_space();
3884 	else
3885 		addressSpace = vm_get_kernel_address_space();
3886 	if (addressSpace == NULL)
3887 		return B_ERROR;
3888 
3889 	map = &addressSpace->translation_map;
3890 
3891 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
3892 	if (status < B_OK)
3893 		goto out;
3894 	if (!needsLocking)
3895 		goto out;
3896 
3897 	for (; base < end; base += B_PAGE_SIZE) {
3898 		map->ops->lock(map);
3899 
3900 		addr_t physicalAddress;
3901 		uint32 protection;
3902 		status = map->ops->query(map, base, &physicalAddress,
3903 			&protection);
3904 
3905 		map->ops->unlock(map);
3906 
3907 		if (status < B_OK)
3908 			goto out;
3909 		if ((protection & PAGE_PRESENT) == 0)
3910 			panic("calling unlock_memory() on unmapped memory!");
3911 
3912 		// update wiring
3913 		vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3914 		if (page == NULL)
3915 			panic("couldn't lookup physical page");
3916 
3917 		page->wired_count--;
3918 			// TODO: needs to be atomic on all platforms!
3919 	}
3920 
3921 out:
3922 	vm_put_address_space(addressSpace);
3923 	return status;
3924 }
3925 
3926 
3927 /** According to the BeBook, this function should always succeed.
3928  *	This is no longer the case.
3929  */
3930 
3931 long
3932 get_memory_map(const void *address, ulong numBytes, physical_entry *table, long numEntries)
3933 {
3934 	vm_address_space *addressSpace;
3935 	addr_t virtualAddress = (addr_t)address;
3936 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
3937 	addr_t physicalAddress;
3938 	status_t status = B_OK;
3939 	int32 index = -1;
3940 	addr_t offset = 0;
3941 	bool interrupts = are_interrupts_enabled();
3942 
3943 	TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes, numEntries));
3944 
3945 	if (numEntries == 0 || numBytes == 0)
3946 		return B_BAD_VALUE;
3947 
3948 	// in which address space is the address to be found?
3949 	if (IS_USER_ADDRESS(virtualAddress))
3950 		addressSpace = vm_get_current_user_address_space();
3951 	else
3952 		addressSpace = vm_get_kernel_address_space();
3953 
3954 	if (addressSpace == NULL)
3955 		return B_ERROR;
3956 
3957 	vm_translation_map *map = &addressSpace->translation_map;
3958 
3959 	if (interrupts)
3960 		map->ops->lock(map);
3961 
3962 	while (offset < numBytes) {
3963 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
3964 		uint32 flags;
3965 
3966 		if (interrupts) {
3967 			status = map->ops->query(map, (addr_t)address + offset,
3968 				&physicalAddress, &flags);
3969 		} else {
3970 			status = map->ops->query_interrupt(map, (addr_t)address + offset,
3971 				&physicalAddress, &flags);
3972 		}
3973 		if (status < B_OK)
3974 			break;
3975 		if ((flags & PAGE_PRESENT) == 0) {
3976 			panic("get_memory_map() called on unmapped memory!");
3977 			return B_BAD_ADDRESS;
3978 		}
3979 
3980 		if (index < 0 && pageOffset > 0) {
3981 			physicalAddress += pageOffset;
3982 			if (bytes > B_PAGE_SIZE - pageOffset)
3983 				bytes = B_PAGE_SIZE - pageOffset;
3984 		}
3985 
3986 		// need to switch to the next physical_entry?
3987 		if (index < 0 || (addr_t)table[index].address
3988 				!= physicalAddress - table[index].size) {
3989 			if (++index + 1 > numEntries) {
3990 				// table to small
3991 				status = B_BUFFER_OVERFLOW;
3992 				break;
3993 			}
3994 			table[index].address = (void *)physicalAddress;
3995 			table[index].size = bytes;
3996 		} else {
3997 			// page does fit in current entry
3998 			table[index].size += bytes;
3999 		}
4000 
4001 		offset += bytes;
4002 	}
4003 
4004 	if (interrupts)
4005 		map->ops->unlock(map);
4006 
4007 	// close the entry list
4008 
4009 	if (status == B_OK) {
4010 		// if it's only one entry, we will silently accept the missing ending
4011 		if (numEntries == 1)
4012 			return B_OK;
4013 
4014 		if (++index + 1 > numEntries)
4015 			return B_BUFFER_OVERFLOW;
4016 
4017 		table[index].address = NULL;
4018 		table[index].size = 0;
4019 	}
4020 
4021 	return status;
4022 }
4023 
4024 
4025 area_id
4026 area_for(void *address)
4027 {
4028 	return vm_area_for(vm_kernel_address_space_id(), (addr_t)address);
4029 }
4030 
4031 
4032 area_id
4033 find_area(const char *name)
4034 {
4035 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
4036 	struct hash_iterator iterator;
4037 	hash_open(sAreaHash, &iterator);
4038 
4039 	vm_area *area;
4040 	area_id id = B_NAME_NOT_FOUND;
4041 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
4042 		if (area->id == RESERVED_AREA_ID)
4043 			continue;
4044 
4045 		if (!strcmp(area->name, name)) {
4046 			id = area->id;
4047 			break;
4048 		}
4049 	}
4050 
4051 	hash_close(sAreaHash, &iterator, false);
4052 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
4053 
4054 	return id;
4055 }
4056 
4057 
4058 status_t
4059 _get_area_info(area_id id, area_info *info, size_t size)
4060 {
4061 	if (size != sizeof(area_info) || info == NULL)
4062 		return B_BAD_VALUE;
4063 
4064 	vm_area *area = vm_get_area(id);
4065 	if (area == NULL)
4066 		return B_BAD_VALUE;
4067 
4068 	fill_area_info(area, info, size);
4069 	vm_put_area(area);
4070 
4071 	return B_OK;
4072 }
4073 
4074 
4075 status_t
4076 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
4077 {
4078 	addr_t nextBase = *(addr_t *)cookie;
4079 
4080 	// we're already through the list
4081 	if (nextBase == (addr_t)-1)
4082 		return B_ENTRY_NOT_FOUND;
4083 
4084 	if (team == B_CURRENT_TEAM)
4085 		team = team_get_current_team_id();
4086 
4087 	vm_address_space *addressSpace;
4088 	if (!team_is_valid(team)
4089 		|| team_get_address_space(team, &addressSpace) != B_OK)
4090 		return B_BAD_VALUE;
4091 
4092 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
4093 
4094 	vm_area *area;
4095 	for (area = addressSpace->areas; area; area = area->address_space_next) {
4096 		if (area->id == RESERVED_AREA_ID)
4097 			continue;
4098 
4099 		if (area->base > nextBase)
4100 			break;
4101 	}
4102 
4103 	// make sure this area won't go away
4104 	if (area != NULL)
4105 		area = vm_get_area(area->id);
4106 
4107 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
4108 	vm_put_address_space(addressSpace);
4109 
4110 	if (area == NULL) {
4111 		nextBase = (addr_t)-1;
4112 		return B_ENTRY_NOT_FOUND;
4113 	}
4114 
4115 	fill_area_info(area, info, size);
4116 	*cookie = (int32)(area->base);
4117 
4118 	vm_put_area(area);
4119 
4120 	return B_OK;
4121 }
4122 
4123 
4124 status_t
4125 set_area_protection(area_id area, uint32 newProtection)
4126 {
4127 	fix_protection(&newProtection);
4128 
4129 	return vm_set_area_protection(vm_kernel_address_space_id(), area, newProtection);
4130 }
4131 
4132 
4133 status_t
4134 resize_area(area_id areaID, size_t newSize)
4135 {
4136 	vm_area *current;
4137 
4138 	// is newSize a multiple of B_PAGE_SIZE?
4139 	if (newSize & (B_PAGE_SIZE - 1))
4140 		return B_BAD_VALUE;
4141 
4142 	vm_area *area = vm_get_area(areaID);
4143 	if (area == NULL)
4144 		return B_BAD_VALUE;
4145 
4146 	vm_cache_ref *cacheRef = area->cache_ref;
4147 	mutex_lock(&cacheRef->lock);
4148 
4149 	// Resize all areas of this area's cache
4150 
4151 	size_t oldSize = area->size;
4152 	status_t status = B_OK;
4153 
4154 	// ToDo: we should only allow to resize anonymous memory areas!
4155 	if (!cacheRef->cache->temporary) {
4156 		status = B_NOT_ALLOWED;
4157 		goto out;
4158 	}
4159 
4160 	// ToDo: we must lock all address spaces here!
4161 	if (oldSize < newSize) {
4162 		// We need to check if all areas of this cache can be resized
4163 
4164 		for (current = cacheRef->areas; current; current = current->cache_next) {
4165 			if (current->address_space_next
4166 				&& current->address_space_next->base <= (current->base
4167 					+ newSize)) {
4168 				// if the area was created inside a reserved area, it can also be
4169 				// resized in that area
4170 				// ToDo: if there is free space after the reserved area, it could be used as well...
4171 				vm_area *next = current->address_space_next;
4172 				if (next->id == RESERVED_AREA_ID
4173 					&& next->cache_offset <= current->base
4174 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
4175 					continue;
4176 
4177 				status = B_ERROR;
4178 				goto out;
4179 			}
4180 		}
4181 	}
4182 
4183 	// Okay, looks good so far, so let's do it
4184 
4185 	for (current = cacheRef->areas; current; current = current->cache_next) {
4186 		if (current->address_space_next
4187 			&& current->address_space_next->base <= (current->base + newSize)) {
4188 			vm_area *next = current->address_space_next;
4189 			if (next->id == RESERVED_AREA_ID
4190 				&& next->cache_offset <= current->base
4191 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
4192 				// resize reserved area
4193 				addr_t offset = current->base + newSize - next->base;
4194 				if (next->size <= offset) {
4195 					current->address_space_next = next->address_space_next;
4196 					free(next);
4197 				} else {
4198 					next->size -= offset;
4199 					next->base += offset;
4200 				}
4201 			} else {
4202 				status = B_ERROR;
4203 				break;
4204 			}
4205 		}
4206 
4207 		current->size = newSize;
4208 
4209 		// we also need to unmap all pages beyond the new size, if the area has shrinked
4210 		if (newSize < oldSize)
4211 			vm_unmap_pages(current, current->base + newSize, oldSize - newSize);
4212 	}
4213 
4214 	if (status == B_OK)
4215 		status = vm_cache_resize(cacheRef, newSize);
4216 
4217 	if (status < B_OK) {
4218 		// This shouldn't really be possible, but hey, who knows
4219 		for (current = cacheRef->areas; current; current = current->cache_next)
4220 			current->size = oldSize;
4221 	}
4222 
4223 out:
4224 	mutex_unlock(&cacheRef->lock);
4225 	vm_put_area(area);
4226 
4227 	// ToDo: we must honour the lock restrictions of this area
4228 	return status;
4229 }
4230 
4231 
4232 /**	Transfers the specified area to a new team. The caller must be the owner
4233  *	of the area (not yet enforced but probably should be).
4234  *	This function is currently not exported to the kernel namespace, but is
4235  *	only accessible using the _kern_transfer_area() syscall.
4236  */
4237 
4238 static status_t
4239 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target)
4240 {
4241 	vm_address_space *sourceAddressSpace;
4242 	vm_address_space *targetAddressSpace;
4243 	void *reservedAddress = NULL;
4244 	vm_area *reserved;
4245 	vm_area *area = vm_get_area(id);
4246 	if (area == NULL)
4247 		return B_BAD_VALUE;
4248 
4249 	// ToDo: check if the current team owns the area
4250 	status_t status = team_get_address_space(target, &targetAddressSpace);
4251 	if (status != B_OK)
4252 		goto err1;
4253 
4254 	// We will first remove the area, and then reserve its former
4255 	// address range so that we can later reclaim it if the
4256 	// transfer failed.
4257 
4258 	sourceAddressSpace = area->address_space;
4259 	reserved = create_reserved_area_struct(sourceAddressSpace, 0);
4260 	if (reserved == NULL) {
4261 		status = B_NO_MEMORY;
4262 		goto err2;
4263 	}
4264 
4265 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
4266 
4267 	// unmap the area in the source address space
4268 	vm_unmap_pages(area, area->base, area->size);
4269 
4270 	// TODO: there might be additional page faults at this point!
4271 
4272 	reservedAddress = (void *)area->base;
4273 	remove_area_from_address_space(sourceAddressSpace, area, true);
4274 	status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
4275 		area->size, reserved);
4276 		// famous last words: this cannot fail :)
4277 
4278 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
4279 
4280 	if (status != B_OK)
4281 		goto err3;
4282 
4283 	// insert the area into the target address space
4284 
4285 	acquire_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0, 0);
4286 	// check to see if this address space has entered DELETE state
4287 	if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) {
4288 		// okay, someone is trying to delete this adress space now, so we can't
4289 		// insert the area, so back out
4290 		status = B_BAD_TEAM_ID;
4291 		goto err4;
4292 	}
4293 
4294 	status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area);
4295 	if (status < B_OK)
4296 		goto err4;
4297 
4298 	// The area was successfully transferred to the new team when we got here
4299 	area->address_space = targetAddressSpace;
4300 
4301 	// TODO: take area lock/wiring into account!
4302 
4303 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
4304 
4305 	vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress, area->size);
4306 	vm_put_address_space(sourceAddressSpace);
4307 		// we keep the reference of the target address space for the
4308 		// area, so we only have to put the one from the source
4309 	vm_put_area(area);
4310 
4311 	return B_OK;
4312 
4313 err4:
4314 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
4315 err3:
4316 	// insert the area again into the source address space
4317 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
4318 	// check to see if this address space has entered DELETE state
4319 	if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION
4320 		|| insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, area->size, area) != B_OK) {
4321 		// We can't insert the area anymore - we have to delete it manually
4322 		vm_cache_remove_area(area->cache_ref, area);
4323 		vm_cache_release_ref(area->cache_ref);
4324 		free(area->name);
4325 		free(area);
4326 		area = NULL;
4327 	}
4328 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
4329 err2:
4330 	vm_put_address_space(targetAddressSpace);
4331 err1:
4332 	if (area != NULL)
4333 		vm_put_area(area);
4334 	return status;
4335 }
4336 
4337 
4338 area_id
4339 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
4340 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
4341 {
4342 	if (!arch_vm_supports_protection(protection))
4343 		return B_NOT_SUPPORTED;
4344 
4345 	fix_protection(&protection);
4346 
4347 	return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress,
4348 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
4349 }
4350 
4351 
4352 area_id
4353 clone_area(const char *name, void **_address, uint32 addressSpec, uint32 protection,
4354 	area_id source)
4355 {
4356 	if ((protection & B_KERNEL_PROTECTION) == 0)
4357 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4358 
4359 	return vm_clone_area(vm_kernel_address_space_id(), name, _address, addressSpec,
4360 				protection, REGION_NO_PRIVATE_MAP, source);
4361 }
4362 
4363 
4364 area_id
4365 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec,
4366 	uint32 size, uint32 lock, uint32 protection)
4367 {
4368 	fix_protection(&protection);
4369 
4370 	return vm_create_anonymous_area(team->id, (char *)name, address,
4371 		addressSpec, size, lock, protection);
4372 }
4373 
4374 
4375 area_id
4376 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
4377 	uint32 protection)
4378 {
4379 	fix_protection(&protection);
4380 
4381 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address,
4382 		addressSpec, size, lock, protection);
4383 }
4384 
4385 
4386 status_t
4387 delete_area_etc(struct team *team, area_id area)
4388 {
4389 	return vm_delete_area(team->id, area);
4390 }
4391 
4392 
4393 status_t
4394 delete_area(area_id area)
4395 {
4396 	return vm_delete_area(vm_kernel_address_space_id(), area);
4397 }
4398 
4399 
4400 //	#pragma mark - Userland syscalls
4401 
4402 
4403 status_t
4404 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size)
4405 {
4406 	// filter out some unavailable values (for userland)
4407 	switch (addressSpec) {
4408 		case B_ANY_KERNEL_ADDRESS:
4409 		case B_ANY_KERNEL_BLOCK_ADDRESS:
4410 			return B_BAD_VALUE;
4411 	}
4412 
4413 	addr_t address;
4414 
4415 	if (!IS_USER_ADDRESS(userAddress)
4416 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
4417 		return B_BAD_ADDRESS;
4418 
4419 	status_t status = vm_reserve_address_range(vm_current_user_address_space_id(),
4420 		(void **)&address, addressSpec, size, RESERVED_AVOID_BASE);
4421 	if (status < B_OK)
4422 		return status;
4423 
4424 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
4425 		vm_unreserve_address_range(vm_current_user_address_space_id(),
4426 			(void *)address, size);
4427 		return B_BAD_ADDRESS;
4428 	}
4429 
4430 	return B_OK;
4431 }
4432 
4433 
4434 area_id
4435 _user_area_for(void *address)
4436 {
4437 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
4438 }
4439 
4440 
4441 area_id
4442 _user_find_area(const char *userName)
4443 {
4444 	char name[B_OS_NAME_LENGTH];
4445 
4446 	if (!IS_USER_ADDRESS(userName)
4447 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
4448 		return B_BAD_ADDRESS;
4449 
4450 	return find_area(name);
4451 }
4452 
4453 
4454 status_t
4455 _user_get_area_info(area_id area, area_info *userInfo)
4456 {
4457 	if (!IS_USER_ADDRESS(userInfo))
4458 		return B_BAD_ADDRESS;
4459 
4460 	area_info info;
4461 	status_t status = get_area_info(area, &info);
4462 	if (status < B_OK)
4463 		return status;
4464 
4465 	// TODO: do we want to prevent userland from seeing kernel protections?
4466 	//info.protection &= B_USER_PROTECTION;
4467 
4468 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
4469 		return B_BAD_ADDRESS;
4470 
4471 	return status;
4472 }
4473 
4474 
4475 status_t
4476 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
4477 {
4478 	int32 cookie;
4479 
4480 	if (!IS_USER_ADDRESS(userCookie)
4481 		|| !IS_USER_ADDRESS(userInfo)
4482 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
4483 		return B_BAD_ADDRESS;
4484 
4485 	area_info info;
4486 	status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
4487 	if (status != B_OK)
4488 		return status;
4489 
4490 	//info.protection &= B_USER_PROTECTION;
4491 
4492 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
4493 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
4494 		return B_BAD_ADDRESS;
4495 
4496 	return status;
4497 }
4498 
4499 
4500 status_t
4501 _user_set_area_protection(area_id area, uint32 newProtection)
4502 {
4503 	if ((newProtection & ~B_USER_PROTECTION) != 0)
4504 		return B_BAD_VALUE;
4505 
4506 	fix_protection(&newProtection);
4507 
4508 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
4509 		newProtection);
4510 }
4511 
4512 
4513 status_t
4514 _user_resize_area(area_id area, size_t newSize)
4515 {
4516 	// ToDo: Since we restrict deleting of areas to those owned by the team,
4517 	// we should also do that for resizing (check other functions, too).
4518 	return resize_area(area, newSize);
4519 }
4520 
4521 
4522 status_t
4523 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
4524 {
4525 	// filter out some unavailable values (for userland)
4526 	switch (addressSpec) {
4527 		case B_ANY_KERNEL_ADDRESS:
4528 		case B_ANY_KERNEL_BLOCK_ADDRESS:
4529 			return B_BAD_VALUE;
4530 	}
4531 
4532 	void *address;
4533 	if (!IS_USER_ADDRESS(userAddress)
4534 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
4535 		return B_BAD_ADDRESS;
4536 
4537 	status_t status = transfer_area(area, &address, addressSpec, target);
4538 	if (status < B_OK)
4539 		return status;
4540 
4541 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
4542 		return B_BAD_ADDRESS;
4543 
4544 	return status;
4545 }
4546 
4547 
4548 area_id
4549 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
4550 	uint32 protection, area_id sourceArea)
4551 {
4552 	char name[B_OS_NAME_LENGTH];
4553 	void *address;
4554 
4555 	// filter out some unavailable values (for userland)
4556 	switch (addressSpec) {
4557 		case B_ANY_KERNEL_ADDRESS:
4558 		case B_ANY_KERNEL_BLOCK_ADDRESS:
4559 			return B_BAD_VALUE;
4560 	}
4561 	if ((protection & ~B_USER_PROTECTION) != 0)
4562 		return B_BAD_VALUE;
4563 
4564 	if (!IS_USER_ADDRESS(userName)
4565 		|| !IS_USER_ADDRESS(userAddress)
4566 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
4567 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
4568 		return B_BAD_ADDRESS;
4569 
4570 	fix_protection(&protection);
4571 
4572 	area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address,
4573 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea);
4574 	if (clonedArea < B_OK)
4575 		return clonedArea;
4576 
4577 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
4578 		delete_area(clonedArea);
4579 		return B_BAD_ADDRESS;
4580 	}
4581 
4582 	return clonedArea;
4583 }
4584 
4585 
4586 area_id
4587 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
4588 	size_t size, uint32 lock, uint32 protection)
4589 {
4590 	char name[B_OS_NAME_LENGTH];
4591 	void *address;
4592 
4593 	// filter out some unavailable values (for userland)
4594 	switch (addressSpec) {
4595 		case B_ANY_KERNEL_ADDRESS:
4596 		case B_ANY_KERNEL_BLOCK_ADDRESS:
4597 			return B_BAD_VALUE;
4598 	}
4599 	if ((protection & ~B_USER_PROTECTION) != 0)
4600 		return B_BAD_VALUE;
4601 
4602 	if (!IS_USER_ADDRESS(userName)
4603 		|| !IS_USER_ADDRESS(userAddress)
4604 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
4605 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
4606 		return B_BAD_ADDRESS;
4607 
4608 	if (addressSpec == B_EXACT_ADDRESS
4609 		&& IS_KERNEL_ADDRESS(address))
4610 		return B_BAD_VALUE;
4611 
4612 	fix_protection(&protection);
4613 
4614 	area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(),
4615 		(char *)name, &address, addressSpec, size, lock, protection);
4616 
4617 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
4618 		delete_area(area);
4619 		return B_BAD_ADDRESS;
4620 	}
4621 
4622 	return area;
4623 }
4624 
4625 
4626 status_t
4627 _user_delete_area(area_id area)
4628 {
4629 	// Unlike the BeOS implementation, you can now only delete areas
4630 	// that you have created yourself from userland.
4631 	// The documentation to delete_area() explicetly states that this
4632 	// will be restricted in the future, and so it will.
4633 	return vm_delete_area(vm_current_user_address_space_id(), area);
4634 }
4635 
4636