xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4f00613311d0bd6b70fa82ce19931c41f071ea4e)
1 /*
2  * Copyright 2002-2005, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <OS.h>
11 #include <KernelExport.h>
12 
13 #include <vm.h>
14 #include <vm_priv.h>
15 #include <vm_page.h>
16 #include <vm_cache.h>
17 #include <vm_store_anonymous_noswap.h>
18 #include <vm_store_device.h>
19 #include <vm_store_null.h>
20 #include <vm_low_memory.h>
21 #include <file_cache.h>
22 #include <memheap.h>
23 #include <debug.h>
24 #include <console.h>
25 #include <int.h>
26 #include <smp.h>
27 #include <lock.h>
28 #include <thread.h>
29 #include <team.h>
30 
31 #include <boot/stage2.h>
32 #include <boot/elf.h>
33 
34 #include <arch/cpu.h>
35 #include <arch/vm.h>
36 
37 #include <string.h>
38 #include <ctype.h>
39 #include <stdlib.h>
40 #include <stdio.h>
41 
42 //#define TRACE_VM
43 //#define TRACE_FAULTS
44 #ifdef TRACE_VM
45 #	define TRACE(x) dprintf x
46 #else
47 #	define TRACE(x) ;
48 #endif
49 #ifdef TRACE_FAULTS
50 #	define FTRACE(x) dprintf x
51 #else
52 #	define FTRACE(x) ;
53 #endif
54 
55 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
56 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
57 
58 
59 extern vm_address_space *kernel_aspace;
60 
61 #define REGION_HASH_TABLE_SIZE 1024
62 static area_id sNextAreaID;
63 static hash_table *sAreaHash;
64 static sem_id sAreaHashLock;
65 
66 static off_t sAvailableMemory;
67 static benaphore sAvailableMemoryLock;
68 
69 // function declarations
70 static vm_area *_vm_create_region_struct(vm_address_space *aspace, const char *name, int wiring, int lock);
71 static status_t map_backing_store(vm_address_space *aspace, vm_store *store, void **vaddr,
72 	off_t offset, addr_t size, uint32 addressSpec, int wiring, int lock, int mapping, vm_area **_area, const char *area_name);
73 static status_t vm_soft_fault(addr_t address, bool is_write, bool is_user);
74 static vm_area *vm_virtual_map_lookup(vm_virtual_map *map, addr_t address);
75 static bool vm_put_area(vm_area *area);
76 
77 
78 static int
79 area_compare(void *_area, const void *key)
80 {
81 	vm_area *area = (vm_area *)_area;
82 	const area_id *id = (const area_id *)key;
83 
84 	if (area->id == *id)
85 		return 0;
86 
87 	return -1;
88 }
89 
90 
91 static uint32
92 area_hash(void *_area, const void *key, uint32 range)
93 {
94 	vm_area *area = (vm_area *)_area;
95 	const area_id *id = (const area_id *)key;
96 
97 	if (area != NULL)
98 		return area->id % range;
99 
100 	return (uint32)*id % range;
101 }
102 
103 
104 static vm_area *
105 vm_get_area(area_id id)
106 {
107 	vm_area *area;
108 
109 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
110 
111 	area = (vm_area *)hash_lookup(sAreaHash, &id);
112 	if (area != NULL)
113 		atomic_add(&area->ref_count, 1);
114 
115 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
116 
117 	return area;
118 }
119 
120 
121 static vm_area *
122 _vm_create_reserved_region_struct(vm_virtual_map *map, uint32 flags)
123 {
124 	vm_area *reserved = (vm_area *)malloc(sizeof(vm_area));
125 	if (reserved == NULL)
126 		return NULL;
127 
128 	memset(reserved, 0, sizeof(vm_area));
129 	reserved->id = RESERVED_AREA_ID;
130 		// this marks it as reserved space
131 	reserved->protection = flags;
132 	reserved->map = map;
133 
134 	return reserved;
135 }
136 
137 
138 static vm_area *
139 _vm_create_area_struct(vm_address_space *aspace, const char *name, uint32 wiring, uint32 protection)
140 {
141 	vm_area *area = NULL;
142 
143 	// restrict the area name to B_OS_NAME_LENGTH
144 	size_t length = strlen(name) + 1;
145 	if (length > B_OS_NAME_LENGTH)
146 		length = B_OS_NAME_LENGTH;
147 
148 	area = (vm_area *)malloc(sizeof(vm_area));
149 	if (area == NULL)
150 		return NULL;
151 
152 	area->name = (char *)malloc(length);
153 	if (area->name == NULL) {
154 		free(area);
155 		return NULL;
156 	}
157 	strlcpy(area->name, name, length);
158 
159 	area->id = atomic_add(&sNextAreaID, 1);
160 	area->base = 0;
161 	area->size = 0;
162 	area->protection = protection;
163 	area->wiring = wiring;
164 	area->ref_count = 1;
165 
166 	area->cache_ref = NULL;
167 	area->cache_offset = 0;
168 
169 	area->aspace = aspace;
170 	area->aspace_next = NULL;
171 	area->map = &aspace->virtual_map;
172 	area->cache_next = area->cache_prev = NULL;
173 	area->hash_next = NULL;
174 
175 	arch_vm_init_area(area);
176 	return area;
177 }
178 
179 
180 /**	Finds a reserved area that covers the region spanned by \a start and
181  *	\a size, inserts the \a area into that region and makes sure that
182  *	there are reserved regions for the remaining parts.
183  */
184 
185 static status_t
186 find_reserved_area(vm_virtual_map *map, addr_t start, addr_t size, vm_area *area)
187 {
188 	vm_area *next, *last = NULL;
189 
190 	next = map->areas;
191 	while (next) {
192 		if (next->base <= start && next->base + next->size >= start + size) {
193 			// this area covers the requested range
194 			if (next->id != RESERVED_AREA_ID) {
195 				// but it's not reserved space, it's a real area
196 				return B_BAD_VALUE;
197 			}
198 
199 			break;
200 		}
201 		last = next;
202 		next = next->aspace_next;
203 	}
204 	if (next == NULL)
205 		return B_ENTRY_NOT_FOUND;
206 
207 	// now we have to transfer the requested part of the reserved
208 	// range to the new area - and remove, resize or split the old
209 	// reserved area.
210 
211 	if (start == next->base) {
212 		// the area starts at the beginning of the reserved range
213 		if (last)
214 			last->aspace_next = area;
215 		else
216 			map->areas = area;
217 
218 		if (size == next->size) {
219 			// the new area fully covers the reversed range
220 			area->aspace_next = next->aspace_next;
221 			free(next);
222 		} else {
223 			// resize the reserved range behind the area
224 			area->aspace_next = next;
225 			next->base += size;
226 			next->size -= size;
227 		}
228 	} else if (start + size == next->base + next->size) {
229 		// the area is at the end of the reserved range
230 		area->aspace_next = next->aspace_next;
231 		next->aspace_next = area;
232 
233 		// resize the reserved range before the area
234 		next->size = start - next->base;
235 	} else {
236 		// the area splits the reserved range into two separate ones
237 		// we need a new reserved area to cover this space
238 		vm_area *reserved = _vm_create_reserved_region_struct(map, next->protection);
239 		if (reserved == NULL)
240 			return B_NO_MEMORY;
241 
242 		reserved->aspace_next = next->aspace_next;
243 		area->aspace_next = reserved;
244 		next->aspace_next = area;
245 
246 		// resize regions
247 		reserved->size = next->base + next->size - start - size;
248 		next->size = start - next->base;
249 		reserved->base = start + size;
250 		reserved->cache_offset = next->cache_offset;
251 	}
252 
253 	area->base = start;
254 	area->size = size;
255 	map->change_count++;
256 
257 	return B_OK;
258 }
259 
260 
261 // must be called with this address space's virtual_map.sem held
262 
263 static status_t
264 find_and_insert_area_slot(vm_virtual_map *map, addr_t start, addr_t size, addr_t end,
265 	uint32 addressSpec, vm_area *area)
266 {
267 	vm_area *last = NULL;
268 	vm_area *next;
269 	bool foundSpot = false;
270 
271 	TRACE(("find_and_insert_region_slot: map %p, start 0x%lx, size %ld, end 0x%lx, addressSpec %ld, area %p\n",
272 		map, start, size, end, addressSpec, area));
273 
274 	// do some sanity checking
275 	if (start < map->base || size == 0
276 		|| (end - 1) > (map->base + (map->size - 1))
277 		|| start + size > end)
278 		return B_BAD_ADDRESS;
279 
280 	if (addressSpec == B_EXACT_ADDRESS) {
281 		// search for a reserved area
282 		status_t status = find_reserved_area(map, start, size, area);
283 		if (status == B_OK || status == B_BAD_VALUE)
284 			return status;
285 
286 		// there was no reserved area, and the slot doesn't seem to be used already
287 		// ToDo: this could be further optimized.
288 	}
289 
290 	// walk up to the spot where we should start searching
291 second_chance:
292 	next = map->areas;
293 	while (next) {
294 		if (next->base >= start + size) {
295 			// we have a winner
296 			break;
297 		}
298 		last = next;
299 		next = next->aspace_next;
300 	}
301 
302 	// find the right spot depending on the address specification - the area
303 	// will be inserted directly after "last" ("next" is not referenced anymore)
304 
305 	switch (addressSpec) {
306 		case B_ANY_ADDRESS:
307 		case B_ANY_KERNEL_ADDRESS:
308 		case B_ANY_KERNEL_BLOCK_ADDRESS:
309 			// find a hole big enough for a new area
310 			if (!last) {
311 				// see if we can build it at the beginning of the virtual map
312 				if (!next || (next->base >= map->base + size)) {
313 					foundSpot = true;
314 					area->base = map->base;
315 					break;
316 				}
317 				last = next;
318 				next = next->aspace_next;
319 			}
320 			// keep walking
321 			while (next) {
322 				if (next->base >= last->base + last->size + size) {
323 					// we found a spot (it'll be filled up below)
324 					break;
325 				}
326 				last = next;
327 				next = next->aspace_next;
328 			}
329 
330 			if ((map->base + (map->size - 1)) >= (last->base + last->size + (size - 1))) {
331 				// got a spot
332 				foundSpot = true;
333 				area->base = last->base + last->size;
334 				break;
335 			} else {
336 				// we didn't find a free spot - if there were any reserved areas with
337 				// the RESERVED_AVOID_BASE flag set, we can now test those for free
338 				// space
339 				// ToDo: it would make sense to start with the biggest of them
340 				next = map->areas;
341 				last = NULL;
342 				for (last = NULL; next; next = next->aspace_next, last = next) {
343 					// ToDo: take free space after the reserved area into account!
344 					if (next->size == size) {
345 						// the reserved area is entirely covered, and thus, removed
346 						if (last)
347 							last->aspace_next = next->aspace_next;
348 						else
349 							map->areas = next->aspace_next;
350 
351 						foundSpot = true;
352 						area->base = next->base;
353 						free(next);
354 						break;
355 					}
356 					if (next->size >= size) {
357 						// the new area will be placed at the end of the reserved
358 						// area, and the reserved area will be resized to make space
359 						foundSpot = true;
360 						next->size -= size;
361 						last = next;
362 						area->base = next->base + next->size;
363 						break;
364 					}
365 				}
366 			}
367 			break;
368 
369 		case B_BASE_ADDRESS:
370 			// find a hole big enough for a new area beginning with "start"
371 			if (!last) {
372 				// see if we can build it at the beginning of the specified start
373 				if (!next || (next->base >= start + size)) {
374 					foundSpot = true;
375 					area->base = start;
376 					break;
377 				}
378 				last = next;
379 				next = next->aspace_next;
380 			}
381 			// keep walking
382 			while (next) {
383 				if (next->base >= last->base + last->size + size) {
384 					// we found a spot (it'll be filled up below)
385 					break;
386 				}
387 				last = next;
388 				next = next->aspace_next;
389 			}
390 
391 			if ((map->base + (map->size - 1)) >= (last->base + last->size + (size - 1))) {
392 				// got a spot
393 				foundSpot = true;
394 				if (last->base + last->size <= start)
395 					area->base = start;
396 				else
397 					area->base = last->base + last->size;
398 				break;
399 			}
400 			// we didn't find a free spot in the requested range, so we'll
401 			// try again without any restrictions
402 			start = map->base;
403 			addressSpec = B_ANY_ADDRESS;
404 			last = NULL;
405 			goto second_chance;
406 
407 		case B_EXACT_ADDRESS:
408 			// see if we can create it exactly here
409 			if (!last) {
410 				if (!next || (next->base >= start + size)) {
411 					foundSpot = true;
412 					area->base = start;
413 					break;
414 				}
415 			} else {
416 				if (next) {
417 					if (last->base + last->size <= start && next->base >= start + size) {
418 						foundSpot = true;
419 						area->base = start;
420 						break;
421 					}
422 				} else {
423 					if ((last->base + (last->size - 1)) <= start - 1) {
424 						foundSpot = true;
425 						area->base = start;
426 					}
427 				}
428 			}
429 			break;
430 		default:
431 			return B_BAD_VALUE;
432 	}
433 
434 	if (!foundSpot)
435 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
436 
437 	area->size = size;
438 	if (last) {
439 		area->aspace_next = last->aspace_next;
440 		last->aspace_next = area;
441 	} else {
442 		area->aspace_next = map->areas;
443 		map->areas = area;
444 	}
445 	map->change_count++;
446 	return B_OK;
447 }
448 
449 
450 /**	This inserts the area you pass into the virtual_map of the
451  *	specified address space.
452  *	It will also set the "_address" argument to its base address when
453  *	the call succeeds.
454  *	You need to hold the virtual_map semaphore.
455  */
456 
457 static status_t
458 insert_area(vm_address_space *addressSpace, void **_address,
459 	uint32 addressSpec, addr_t size, vm_area *area)
460 {
461 	addr_t searchBase, searchEnd;
462 	status_t status;
463 
464 	switch (addressSpec) {
465 		case B_EXACT_ADDRESS:
466 			searchBase = (addr_t)*_address;
467 			searchEnd = (addr_t)*_address + size;
468 			break;
469 
470 		case B_BASE_ADDRESS:
471 			searchBase = (addr_t)*_address;
472 			searchEnd = addressSpace->virtual_map.base + (addressSpace->virtual_map.size - 1);
473 			break;
474 
475 		case B_ANY_ADDRESS:
476 		case B_ANY_KERNEL_ADDRESS:
477 		case B_ANY_KERNEL_BLOCK_ADDRESS:
478 			searchBase = addressSpace->virtual_map.base;
479 			searchEnd = addressSpace->virtual_map.base + (addressSpace->virtual_map.size - 1);
480 			break;
481 
482 		default:
483 			return B_BAD_VALUE;
484 	}
485 
486 	status = find_and_insert_area_slot(&addressSpace->virtual_map, searchBase, size,
487 				searchEnd, addressSpec, area);
488 	if (status == B_OK) {
489 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
490 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
491 		*_address = (void *)area->base;
492 	}
493 
494 	return status;
495 }
496 
497 
498 // a ref to the cache holding this store must be held before entering here
499 static status_t
500 map_backing_store(vm_address_space *aspace, vm_store *store, void **_virtualAddress,
501 	off_t offset, addr_t size, uint32 addressSpec, int wiring, int protection,
502 	int mapping, vm_area **_area, const char *areaName)
503 {
504 	vm_cache *cache;
505 	vm_cache_ref *cache_ref;
506 	vm_area *area;
507 	vm_cache *nu_cache;
508 	vm_cache_ref *nu_cache_ref = NULL;
509 	vm_store *nu_store;
510 
511 	int err;
512 
513 	TRACE(("map_backing_store: aspace %p, store %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
514 		aspace, store, *_virtualAddress, offset, size, addressSpec, wiring, protection, _area, areaName));
515 
516 	area = _vm_create_area_struct(aspace, areaName, wiring, protection);
517 	if (area == NULL)
518 		return B_NO_MEMORY;
519 
520 	cache = store->cache;
521 	cache_ref = cache->ref;
522 
523 	// if this is a private map, we need to create a new cache & store object
524 	// pair to handle the private copies of pages as they are written to
525 	if (mapping == REGION_PRIVATE_MAP) {
526 		// create an anonymous store object
527 		nu_store = vm_store_create_anonymous_noswap((protection & B_STACK_AREA) != 0, USER_STACK_GUARD_PAGES);
528 		if (nu_store == NULL)
529 			panic("map_backing_store: vm_create_store_anonymous_noswap returned NULL");
530 		nu_cache = vm_cache_create(nu_store);
531 		if (nu_cache == NULL)
532 			panic("map_backing_store: vm_cache_create returned NULL");
533 		nu_cache_ref = vm_cache_ref_create(nu_cache);
534 		if (nu_cache_ref == NULL)
535 			panic("map_backing_store: vm_cache_ref_create returned NULL");
536 		nu_cache->temporary = 1;
537 		nu_cache->scan_skip = cache->scan_skip;
538 
539 		nu_cache->source = cache;
540 
541 		// grab a ref to the cache object we're now linked to as a source
542 		vm_cache_acquire_ref(cache_ref, true);
543 
544 		cache = nu_cache;
545 		cache_ref = cache->ref;
546 		store = nu_store;
547 		cache->virtual_size = offset + size;
548 	}
549 
550 	err = vm_cache_set_minimal_commitment(cache_ref, offset + size);
551 	if (err != B_OK)
552 		goto err1a;
553 
554 	vm_cache_acquire_ref(cache_ref, true);
555 
556 	acquire_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0, 0);
557 
558 	// check to see if this aspace has entered DELETE state
559 	if (aspace->state == VM_ASPACE_STATE_DELETION) {
560 		// okay, someone is trying to delete this aspace now, so we can't
561 		// insert the area, so back out
562 		err = B_BAD_TEAM_ID;
563 		goto err1b;
564 	}
565 
566 	err = insert_area(aspace, _virtualAddress, addressSpec, size, area);
567 	if (err < B_OK)
568 		goto err1b;
569 
570 	// attach the cache to the area
571 	area->cache_ref = cache_ref;
572 	area->cache_offset = offset;
573 	// point the cache back to the area
574 	vm_cache_insert_area(cache_ref, area);
575 
576 	// insert the area in the global area hash table
577 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0);
578 	hash_insert(sAreaHash, area);
579 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
580 
581 	// grab a ref to the aspace (the area holds this)
582 	atomic_add(&aspace->ref_count, 1);
583 
584 	release_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0);
585 
586 	*_area = area;
587 	return B_OK;
588 
589 err1b:
590 	release_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0);
591 	vm_cache_release_ref(cache_ref);
592 	goto err;
593 err1a:
594 	if (nu_cache_ref) {
595 		// had never acquired it's initial ref, so acquire and then release it
596 		// this should clean up all the objects it references
597 		vm_cache_acquire_ref(cache_ref, true);
598 		vm_cache_release_ref(cache_ref);
599 	}
600 err:
601 	free(area->name);
602 	free(area);
603 	return err;
604 }
605 
606 
607 status_t
608 vm_unreserve_address_range(aspace_id aid, void *address, addr_t size)
609 {
610 	vm_address_space *addressSpace;
611 	vm_area *area, *last = NULL;
612 	status_t status = B_OK;
613 
614 	addressSpace = vm_get_aspace_by_id(aid);
615 	if (addressSpace == NULL)
616 		return B_BAD_TEAM_ID;
617 
618 	acquire_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
619 
620 	// check to see if this aspace has entered DELETE state
621 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
622 		// okay, someone is trying to delete this aspace now, so we can't
623 		// insert the area, so back out
624 		status = B_BAD_TEAM_ID;
625 		goto out;
626 	}
627 
628 	// search area list and remove any matching reserved ranges
629 
630 	area = addressSpace->virtual_map.areas;
631 	while (area) {
632 		// the area must be completely part of the reserved range
633 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
634 			&& area->base + area->size <= (addr_t)address + size) {
635 			// remove reserved range
636 			vm_area *reserved = area;
637 			if (last)
638 				last->aspace_next = reserved->aspace_next;
639 			else
640 				addressSpace->virtual_map.areas = reserved->aspace_next;
641 
642 			area = reserved->aspace_next;
643 			free(reserved);
644 			continue;
645 		}
646 
647 		last = area;
648 		area = area->aspace_next;
649 	}
650 
651 out:
652 	release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
653 	vm_put_aspace(addressSpace);
654 	return status;
655 }
656 
657 
658 status_t
659 vm_reserve_address_range(aspace_id aid, void **_address, uint32 addressSpec,
660 	addr_t size, uint32 flags)
661 {
662 	vm_address_space *addressSpace;
663 	vm_area *area;
664 	status_t status = B_OK;
665 
666 	if (size == 0)
667 		return B_BAD_VALUE;
668 
669 	addressSpace = vm_get_aspace_by_id(aid);
670 	if (addressSpace == NULL)
671 		return B_BAD_TEAM_ID;
672 
673 	area = _vm_create_reserved_region_struct(&addressSpace->virtual_map, flags);
674 	if (area == NULL) {
675 		status = B_NO_MEMORY;
676 		goto err1;
677 	}
678 
679 	acquire_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
680 
681 	// check to see if this aspace has entered DELETE state
682 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
683 		// okay, someone is trying to delete this aspace now, so we can't
684 		// insert the area, let's back out
685 		status = B_BAD_TEAM_ID;
686 		goto err2;
687 	}
688 
689 	status = insert_area(addressSpace, _address, addressSpec, size, area);
690 	if (status < B_OK)
691 		goto err2;
692 
693 	// the area is now reserved!
694 
695 	area->cache_offset = area->base;
696 		// we cache the original base address here
697 
698 	release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
699 	return B_OK;
700 
701 err2:
702 	release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
703 	free(area);
704 err1:
705 	vm_put_aspace(addressSpace);
706 	return status;
707 }
708 
709 
710 area_id
711 vm_create_anonymous_area(aspace_id aid, const char *name, void **address,
712 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection)
713 {
714 	vm_area *area;
715 	vm_cache *cache;
716 	vm_store *store;
717 	vm_address_space *aspace;
718 	vm_cache_ref *cache_ref;
719 	vm_page *page = NULL;
720 	bool isStack = (protection & B_STACK_AREA) != 0;
721 	bool canOvercommit = false;
722 	status_t err;
723 
724 	TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size));
725 
726 	if (!arch_vm_supports_protection(protection))
727 		return B_NOT_SUPPORTED;
728 
729 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
730 		canOvercommit = true;
731 
732 #ifdef DEBUG_KERNEL_STACKS
733 	if ((protection & B_KERNEL_STACK_AREA) != 0)
734 		isStack = true;
735 #endif
736 
737 	/* check parameters */
738 	switch (addressSpec) {
739 		case B_ANY_ADDRESS:
740 		case B_EXACT_ADDRESS:
741 		case B_BASE_ADDRESS:
742 		case B_ANY_KERNEL_ADDRESS:
743 			break;
744 
745 		default:
746 			return B_BAD_VALUE;
747 	}
748 
749 	switch (wiring) {
750 		case B_NO_LOCK:
751 		case B_FULL_LOCK:
752 		case B_LAZY_LOCK:
753 		case B_CONTIGUOUS:
754 		case B_ALREADY_WIRED:
755 			break;
756 		case B_LOMEM:
757 		//case B_SLOWMEM:
758 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
759 			wiring = B_FULL_LOCK;
760 			break;
761 		default:
762 			return B_BAD_VALUE;
763 	}
764 
765 	aspace = vm_get_aspace_by_id(aid);
766 	if (aspace == NULL)
767 		return B_BAD_TEAM_ID;
768 
769 	size = PAGE_ALIGN(size);
770 
771 	if (wiring == B_CONTIGUOUS) {
772 		// we try to allocate the page run here upfront as this may easily
773 		// fail for obvious reasons
774 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE);
775 		if (page == NULL) {
776 			vm_put_aspace(aspace);
777 			return B_NO_MEMORY;
778 		}
779 	}
780 
781 	// create an anonymous store object
782 	store = vm_store_create_anonymous_noswap(canOvercommit, isStack ?
783 		((protection & B_USER_PROTECTION) != 0 ?
784 			USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0);
785 	if (store == NULL)
786 		panic("vm_create_anonymous_area: vm_create_store_anonymous_noswap returned NULL");
787 	cache = vm_cache_create(store);
788 	if (cache == NULL)
789 		panic("vm_create_anonymous_area: vm_cache_create returned NULL");
790 	cache_ref = vm_cache_ref_create(cache);
791 	if (cache_ref == NULL)
792 		panic("vm_create_anonymous_area: vm_cache_ref_create returned NULL");
793 	cache->temporary = 1;
794 
795 	switch (wiring) {
796 		case B_LAZY_LOCK:	// for now
797 		case B_FULL_LOCK:
798 		case B_CONTIGUOUS:
799 		case B_ALREADY_WIRED:
800 			cache->scan_skip = 1;
801 			break;
802 		case B_NO_LOCK:
803 		//case B_LAZY_LOCK:
804 			cache->scan_skip = 0;
805 			break;
806 	}
807 
808 	vm_cache_acquire_ref(cache_ref, true);
809 	err = map_backing_store(aspace, store, address, 0, size, addressSpec, wiring,
810 		protection, REGION_NO_PRIVATE_MAP, &area, name);
811 	vm_cache_release_ref(cache_ref);
812 	if (err < 0) {
813 		vm_put_aspace(aspace);
814 
815 		if (wiring == B_CONTIGUOUS) {
816 			// we had reserved the area space upfront...
817 			addr_t pageNumber = page->ppn;
818 			int32 i;
819 			for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
820 				page = vm_lookup_page(pageNumber);
821 				if (page == NULL)
822 					panic("couldn't lookup physical page just allocated\n");
823 
824 				vm_page_set_state(page, PAGE_STATE_FREE);
825 			}
826 		}
827 		return err;
828 	}
829 
830 	cache_ref = store->cache->ref;
831 	switch (wiring) {
832 		case B_NO_LOCK:
833 		case B_LAZY_LOCK:
834 			break; // do nothing
835 
836 		case B_FULL_LOCK:
837 		{
838 			// Pages aren't mapped at this point, but we just simulate a fault on
839 			// every page, which should allocate them
840 			// ToDo: at this point, it would probably be cheaper to allocate
841 			// and map the pages directly
842 			addr_t va;
843 			for (va = area->base; va < area->base + area->size; va += B_PAGE_SIZE) {
844 #ifdef DEBUG_KERNEL_STACKS
845 #	ifdef STACK_GROWS_DOWNWARDS
846 				if (isStack && va < area->base + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
847 #	else
848 				if (isStack && va >= area->base + area->size - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
849 #	endif
850 					continue;
851 #endif
852 				vm_soft_fault(va, false, false);
853 			}
854 			break;
855 		}
856 
857 		case B_ALREADY_WIRED:
858 		{
859 			// the pages should already be mapped. This is only really useful during
860 			// boot time. Find the appropriate vm_page objects and stick them in
861 			// the cache object.
862 			addr_t va;
863 			addr_t pa;
864 			uint32 flags;
865 			int err;
866 			off_t offset = 0;
867 
868 			if (!kernel_startup)
869 				panic("ALREADY_WIRED flag used outside kernel startup\n");
870 
871 			mutex_lock(&cache_ref->lock);
872 			(*aspace->translation_map.ops->lock)(&aspace->translation_map);
873 			for (va = area->base; va < area->base + area->size; va += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
874 				err = (*aspace->translation_map.ops->query)(&aspace->translation_map, va, &pa, &flags);
875 				if (err < 0) {
876 //					dprintf("vm_create_anonymous_area: error looking up mapping for va 0x%x\n", va);
877 					continue;
878 				}
879 				page = vm_lookup_page(pa / B_PAGE_SIZE);
880 				if (page == NULL) {
881 //					dprintf("vm_create_anonymous_area: error looking up vm_page structure for pa 0x%x\n", pa);
882 					continue;
883 				}
884 				atomic_add(&page->ref_count, 1);
885 				vm_page_set_state(page, PAGE_STATE_WIRED);
886 				vm_cache_insert_page(cache_ref, page, offset);
887 			}
888 			(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
889 			mutex_unlock(&cache_ref->lock);
890 			break;
891 		}
892 
893 		case B_CONTIGUOUS:
894 		{
895 			addr_t physicalAddress = page->ppn * B_PAGE_SIZE;
896 			addr_t virtualAddress;
897 			off_t offset = 0;
898 
899 			mutex_lock(&cache_ref->lock);
900 			(*aspace->translation_map.ops->lock)(&aspace->translation_map);
901 
902 			for (virtualAddress = area->base; virtualAddress < area->base + area->size;
903 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
904 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
905 				if (page == NULL)
906 					panic("couldn't lookup physical page just allocated\n");
907 
908 				atomic_add(&page->ref_count, 1);
909 				err = (*aspace->translation_map.ops->map)(&aspace->translation_map,
910 							virtualAddress, physicalAddress, protection);
911 				if (err < 0)
912 					panic("couldn't map physical page in page run\n");
913 
914 				vm_page_set_state(page, PAGE_STATE_WIRED);
915 				vm_cache_insert_page(cache_ref, page, offset);
916 			}
917 
918 			(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
919 			mutex_unlock(&cache_ref->lock);
920 			break;
921 		}
922 
923 		default:
924 			break;
925 	}
926 	vm_put_aspace(aspace);
927 
928 	TRACE(("vm_create_anonymous_area: done\n"));
929 
930 	if (area == NULL)
931 		return B_NO_MEMORY;
932 
933 	return area->id;
934 }
935 
936 
937 area_id
938 vm_map_physical_memory(aspace_id aid, const char *name, void **_address,
939 	uint32 addressSpec, addr_t size, uint32 protection, addr_t phys_addr)
940 {
941 	vm_area *area;
942 	vm_cache *cache;
943 	vm_cache_ref *cache_ref;
944 	vm_store *store;
945 	addr_t map_offset;
946 	status_t status;
947 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
948 
949 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, spec = %ld,"
950 		" size = %lu, protection = %ld, phys = %p)\n",
951 		aid, name, _address, addressSpec, size, protection, (void *)phys_addr));
952 
953 	if (!arch_vm_supports_protection(protection))
954 		return B_NOT_SUPPORTED;
955 
956 	if (aspace == NULL)
957 		return B_BAD_TEAM_ID;
958 
959 	// if the physical address is somewhat inside a page,
960 	// move the actual area down to align on a page boundary
961 	map_offset = phys_addr % B_PAGE_SIZE;
962 	size += map_offset;
963 	phys_addr -= map_offset;
964 
965 	size = PAGE_ALIGN(size);
966 
967 	// create an device store object
968 	store = vm_store_create_device(phys_addr);
969 	if (store == NULL)
970 		panic("vm_map_physical_memory: vm_store_create_device returned NULL");
971 	cache = vm_cache_create(store);
972 	if (cache == NULL)
973 		panic("vm_map_physical_memory: vm_cache_create returned NULL");
974 	cache_ref = vm_cache_ref_create(cache);
975 	if (cache_ref == NULL)
976 		panic("vm_map_physical_memory: vm_cache_ref_create returned NULL");
977 	// tell the page scanner to skip over this area, it's pages are special
978 	cache->scan_skip = 1;
979 
980 	vm_cache_acquire_ref(cache_ref, true);
981 	status = map_backing_store(aspace, store, _address, 0, size,
982 		addressSpec & ~B_MTR_MASK, 0, protection, REGION_NO_PRIVATE_MAP, &area, name);
983 	vm_cache_release_ref(cache_ref);
984 
985 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
986 		// set requested memory type
987 		status = arch_vm_set_memory_type(area, addressSpec & B_MTR_MASK);
988 		if (status < B_OK)
989 			vm_put_area(area);
990 	}
991 
992 	if (status >= B_OK) {
993 		// make sure our area is mapped in completely
994 		// (even if that makes the fault routine pretty much useless)
995 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
996 			store->ops->fault(store, aspace, offset);
997 		}
998 	}
999 
1000 	vm_put_aspace(aspace);
1001 	if (status < B_OK)
1002 		return status;
1003 
1004 	// modify the pointer returned to be offset back into the new area
1005 	// the same way the physical address in was offset
1006 	*_address = (void *)((addr_t)*_address + map_offset);
1007 
1008 	return area->id;
1009 }
1010 
1011 
1012 area_id
1013 vm_create_null_area(aspace_id aid, const char *name, void **address, uint32 addressSpec, addr_t size)
1014 {
1015 	vm_area *area;
1016 	vm_cache *cache;
1017 	vm_cache_ref *cache_ref;
1018 	vm_store *store;
1019 //	addr_t map_offset;
1020 	int err;
1021 
1022 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
1023 	if (aspace == NULL)
1024 		return B_BAD_TEAM_ID;
1025 
1026 	size = PAGE_ALIGN(size);
1027 
1028 	// create an null store object
1029 	store = vm_store_create_null();
1030 	if (store == NULL)
1031 		panic("vm_map_physical_memory: vm_store_create_null returned NULL");
1032 	cache = vm_cache_create(store);
1033 	if (cache == NULL)
1034 		panic("vm_map_physical_memory: vm_cache_create returned NULL");
1035 	cache_ref = vm_cache_ref_create(cache);
1036 	if (cache_ref == NULL)
1037 		panic("vm_map_physical_memory: vm_cache_ref_create returned NULL");
1038 	// tell the page scanner to skip over this area, no pages will be mapped here
1039 	cache->scan_skip = 1;
1040 
1041 	vm_cache_acquire_ref(cache_ref, true);
1042 	err = map_backing_store(aspace, store, address, 0, size, addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name);
1043 	vm_cache_release_ref(cache_ref);
1044 	vm_put_aspace(aspace);
1045 	if (err < 0)
1046 		return err;
1047 
1048 	return area->id;
1049 }
1050 
1051 
1052 status_t
1053 vm_create_vnode_cache(void *vnode, struct vm_cache_ref **_cacheRef)
1054 {
1055 	vm_cache_ref *cacheRef;
1056 	vm_cache *cache;
1057 	vm_store *store;
1058 
1059 	// create a vnode store object
1060 	store = vm_create_vnode_store(vnode);
1061 	if (store == NULL) {
1062 		dprintf("vm_create_vnode_cache: couldn't create vnode store\n");
1063 		return B_NO_MEMORY;
1064 	}
1065 
1066 	cache = vm_cache_create(store);
1067 	if (cache == NULL) {
1068 		dprintf("vm_create_vnode_cache: vm_cache_create returned NULL\n");
1069 		return B_NO_MEMORY;
1070 	}
1071 
1072 	cacheRef = vm_cache_ref_create(cache);
1073 	if (cacheRef == NULL) {
1074 		dprintf("vm_create_vnode_cache: vm_cache_ref_create returned NULL\n");
1075 		return B_NO_MEMORY;
1076 	}
1077 
1078 	// acquire the cache ref once to represent the ref that the vnode will have
1079 	// this is one of the only places where we dont want to ref to ripple down to the store
1080 	vm_cache_acquire_ref(cacheRef, false);
1081 
1082 	*_cacheRef = cacheRef;
1083 	return B_OK;
1084 }
1085 
1086 
1087 /** Will map the file at the path specified by \a name to an area in memory.
1088  *	The file will be mirrored beginning at the specified \a offset. The \a offset
1089  *	and \a size arguments have to be page aligned.
1090  */
1091 
1092 static area_id
1093 _vm_map_file(aspace_id aid, const char *name, void **_address, uint32 addressSpec,
1094 	size_t size, uint32 protection, uint32 mapping, const char *path, off_t offset, bool kernel)
1095 {
1096 	vm_cache_ref *cacheRef;
1097 	vm_area *area;
1098 	void *vnode;
1099 	status_t status;
1100 
1101 	// ToDo: maybe attach to an FD, not a path (or both, like VFS calls)
1102 	// ToDo: check file access permissions (would be already done if the above were true)
1103 	// ToDo: for binary files, we want to make sure that they get the
1104 	//	copy of a file at a given time, ie. later changes should not
1105 	//	make it into the mapped copy -- this will need quite some changes
1106 	//	to be done in a nice way
1107 
1108 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
1109 	if (aspace == NULL)
1110 		return B_BAD_TEAM_ID;
1111 
1112 	TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n", path, offset, size, mapping));
1113 
1114 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
1115 	size = PAGE_ALIGN(size);
1116 
1117 	// get the vnode for the object, this also grabs a ref to it
1118 	status = vfs_get_vnode_from_path(path, kernel, &vnode);
1119 	if (status < B_OK)
1120 		goto err1;
1121 
1122 	status = vfs_get_vnode_cache(vnode, &cacheRef, false);
1123 	if (status < B_OK)
1124 		goto err2;
1125 
1126 	// acquire a ref to the cache before we do work on it. Dont ripple the ref acquision to the vnode
1127 	// below because we'll have to release it later anyway, since we grabbed a ref to the vnode at
1128 	// vfs_get_vnode_from_path(). This puts the ref counts in sync.
1129 	vm_cache_acquire_ref(cacheRef, false);
1130 	status = map_backing_store(aspace, cacheRef->cache->store, _address, offset, size,
1131 					addressSpec, 0, protection, mapping, &area, name);
1132 	vm_cache_release_ref(cacheRef);
1133 	vm_put_aspace(aspace);
1134 
1135 	if (status < B_OK)
1136 		return status;
1137 
1138 	return area->id;
1139 
1140 err2:
1141 	vfs_put_vnode(vnode);
1142 err1:
1143 	vm_put_aspace(aspace);
1144 	return status;
1145 }
1146 
1147 
1148 area_id
1149 vm_map_file(aspace_id aid, const char *name, void **address, uint32 addressSpec,
1150 	addr_t size, uint32 protection, uint32 mapping, const char *path, off_t offset)
1151 {
1152 	if (!arch_vm_supports_protection(protection))
1153 		return B_NOT_SUPPORTED;
1154 
1155 	return _vm_map_file(aid, name, address, addressSpec, size, protection, mapping, path, offset, true);
1156 }
1157 
1158 
1159 // ToDo: create a BeOS style call for this!
1160 
1161 area_id
1162 _user_vm_map_file(const char *uname, void **uaddress, int addressSpec,
1163 	addr_t size, int protection, int mapping, const char *upath, off_t offset)
1164 {
1165 	char name[B_OS_NAME_LENGTH];
1166 	char path[B_PATH_NAME_LENGTH];
1167 	void *address;
1168 	int rc;
1169 
1170 	if (!IS_USER_ADDRESS(uname) || !IS_USER_ADDRESS(uaddress) || !IS_USER_ADDRESS(upath)
1171 		|| user_strlcpy(name, uname, B_OS_NAME_LENGTH) < B_OK
1172 		|| user_strlcpy(path, upath, B_PATH_NAME_LENGTH) < B_OK
1173 		|| user_memcpy(&address, uaddress, sizeof(address)) < B_OK)
1174 		return B_BAD_ADDRESS;
1175 
1176 	// userland created areas can always be accessed by the kernel
1177 	protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1178 
1179 	rc = _vm_map_file(vm_get_current_user_aspace_id(), name, &address, addressSpec, size,
1180 			protection, mapping, path, offset, false);
1181 	if (rc < 0)
1182 		return rc;
1183 
1184 	if (user_memcpy(uaddress, &address, sizeof(address)) < B_OK)
1185 		return B_BAD_ADDRESS;
1186 
1187 	return rc;
1188 }
1189 
1190 
1191 area_id
1192 vm_clone_area(aspace_id aid, const char *name, void **address, uint32 addressSpec,
1193 	uint32 protection, uint32 mapping, area_id sourceID)
1194 {
1195 	vm_area *newArea = NULL;
1196 	vm_area *sourceArea;
1197 	status_t status;
1198 
1199 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
1200 	if (aspace == NULL)
1201 		return B_BAD_TEAM_ID;
1202 
1203 	sourceArea = vm_get_area(sourceID);
1204 	if (sourceArea == NULL) {
1205 		vm_put_aspace(aspace);
1206 		return B_BAD_VALUE;
1207 	}
1208 
1209 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
1210 	//	have been adapted. Maybe it should be part of the kernel settings,
1211 	//	anyway (so that old drivers can always work).
1212 #if 0
1213 	if (sourceArea->aspace == kernel_aspace && aspace != kernel_aspace
1214 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1215 		// kernel areas must not be cloned in userland, unless explicitly
1216 		// declared user-cloneable upon construction
1217 		status = B_NOT_ALLOWED;
1218 	} else
1219 #endif
1220 	{
1221 		vm_cache_acquire_ref(sourceArea->cache_ref, true);
1222 		status = map_backing_store(aspace, sourceArea->cache_ref->cache->store, address,
1223 					sourceArea->cache_offset, sourceArea->size, addressSpec, sourceArea->wiring,
1224 					protection, mapping, &newArea, name);
1225 		vm_cache_release_ref(sourceArea->cache_ref);
1226 	}
1227 
1228 	vm_put_area(sourceArea);
1229 	vm_put_aspace(aspace);
1230 
1231 	if (status < B_OK)
1232 		return status;
1233 
1234 	return newArea->id;
1235 }
1236 
1237 
1238 static status_t
1239 _vm_delete_area(vm_address_space *aspace, area_id id)
1240 {
1241 	status_t status = B_OK;
1242 	vm_area *area;
1243 
1244 	TRACE(("vm_delete_area: aspace id 0x%lx, area id 0x%lx\n", aspace->id, id));
1245 
1246 	area = vm_get_area(id);
1247 	if (area == NULL)
1248 		return B_BAD_VALUE;
1249 
1250 	if (area->aspace == aspace) {
1251 		vm_put_area(area);
1252 			// next put below will actually delete it
1253 	} else
1254 		status = B_NOT_ALLOWED;
1255 
1256 	vm_put_area(area);
1257 	return status;
1258 }
1259 
1260 
1261 status_t
1262 vm_delete_area(aspace_id aid, area_id rid)
1263 {
1264 	vm_address_space *aspace;
1265 	status_t err;
1266 
1267 	aspace = vm_get_aspace_by_id(aid);
1268 	if (aspace == NULL)
1269 		return B_BAD_TEAM_ID;
1270 
1271 	err = _vm_delete_area(aspace, rid);
1272 	vm_put_aspace(aspace);
1273 	return err;
1274 }
1275 
1276 
1277 static void
1278 remove_area_from_virtual_map(vm_address_space *addressSpace, vm_area *area, bool locked)
1279 {
1280 	vm_area *temp, *last = NULL;
1281 
1282 	if (!locked)
1283 		acquire_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
1284 
1285 	temp = addressSpace->virtual_map.areas;
1286 	while (temp != NULL) {
1287 		if (area == temp) {
1288 			if (last != NULL) {
1289 				last->aspace_next = temp->aspace_next;
1290 			} else {
1291 				addressSpace->virtual_map.areas = temp->aspace_next;
1292 			}
1293 			addressSpace->virtual_map.change_count++;
1294 			break;
1295 		}
1296 		last = temp;
1297 		temp = temp->aspace_next;
1298 	}
1299 	if (area == addressSpace->virtual_map.area_hint)
1300 		addressSpace->virtual_map.area_hint = NULL;
1301 
1302 	if (!locked)
1303 		release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
1304 
1305 	if (temp == NULL)
1306 		panic("vm_area_release_ref: area not found in aspace's area list\n");
1307 }
1308 
1309 
1310 static bool
1311 _vm_put_area(vm_area *area, bool aspaceLocked)
1312 {
1313 	vm_address_space *aspace;
1314 	bool removeit = false;
1315 
1316 	//TRACE(("_vm_put_area(area = %p, aspaceLocked = %s)\n",
1317 	//	area, aspaceLocked ? "yes" : "no"));
1318 
1319 	// we should never get here, but if we do, we can handle it
1320 	if (area->id == RESERVED_AREA_ID)
1321 		return false;
1322 
1323 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0);
1324 	if (atomic_add(&area->ref_count, -1) == 1) {
1325 		hash_remove(sAreaHash, area);
1326 		removeit = true;
1327 	}
1328 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
1329 
1330 	if (!removeit)
1331 		return false;
1332 
1333 	aspace = area->aspace;
1334 
1335 	arch_vm_unset_memory_type(area);
1336 	remove_area_from_virtual_map(aspace, area, aspaceLocked);
1337 
1338 	vm_cache_remove_area(area->cache_ref, area);
1339 	vm_cache_release_ref(area->cache_ref);
1340 
1341 	(*aspace->translation_map.ops->lock)(&aspace->translation_map);
1342 	(*aspace->translation_map.ops->unmap)(&aspace->translation_map, area->base,
1343 		area->base + (area->size - 1));
1344 	(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
1345 
1346 	// now we can give up the area's reference to the address space
1347 	vm_put_aspace(aspace);
1348 
1349 	free(area->name);
1350 	free(area);
1351 	return true;
1352 }
1353 
1354 
1355 static bool
1356 vm_put_area(vm_area *area)
1357 {
1358 	return _vm_put_area(area, false);
1359 }
1360 
1361 
1362 static status_t
1363 vm_copy_on_write_area(vm_area *area)
1364 {
1365 	vm_store *store;
1366 	vm_cache *upperCache, *lowerCache;
1367 	vm_cache_ref *upperCacheRef, *lowerCacheRef;
1368 	vm_translation_map *map;
1369 	vm_page *page;
1370 	uint32 protection;
1371 	status_t status;
1372 
1373 	TRACE(("vm_copy_on_write_area(area = %p)\n", area));
1374 
1375 	// We need to separate the vm_cache from its vm_cache_ref: the area
1376 	// and its cache_ref goes into a new layer on top of the old one.
1377 	// So the old cache gets a new cache_ref and the area a new cache.
1378 
1379 	upperCacheRef = area->cache_ref;
1380 	lowerCache = upperCacheRef->cache;
1381 
1382 	// create an anonymous store object
1383 	store = vm_store_create_anonymous_noswap(false, 0);
1384 	if (store == NULL)
1385 		return B_NO_MEMORY;
1386 
1387 	upperCache = vm_cache_create(store);
1388 	if (upperCache == NULL) {
1389 		status = B_NO_MEMORY;
1390 		goto err1;
1391 	}
1392 
1393 	lowerCacheRef = vm_cache_ref_create(lowerCache);
1394 	if (lowerCacheRef == NULL) {
1395 		status = B_NO_MEMORY;
1396 		goto err2;
1397 	}
1398 
1399 	// The area must be readable in the same way it was previously writable
1400 	protection = B_KERNEL_READ_AREA;
1401 	if (area->protection & B_READ_AREA)
1402 		protection |= B_READ_AREA;
1403 
1404 	// we need to hold the cache_ref lock when we want to switch its cache
1405 	mutex_lock(&upperCacheRef->lock);
1406 	mutex_lock(&lowerCacheRef->lock);
1407 
1408 	// ToDo: add a child counter to vm_cache - so that we can collapse a
1409 	//		cache layer when possible (ie. "the other" area was deleted)
1410 	upperCache->temporary = 1;
1411 	upperCache->scan_skip = lowerCache->scan_skip;
1412 	upperCache->source = lowerCache;
1413 	upperCache->ref = upperCacheRef;
1414 	upperCacheRef->cache = upperCache;
1415 
1416 	// we need to manually alter the ref_count
1417 	// ToDo: investigate a bit deeper if this is really correct
1418 	// (doesn't look like it, but it works)
1419 	lowerCacheRef->ref_count = upperCacheRef->ref_count;
1420 	upperCacheRef->ref_count = 1;
1421 
1422 	// grab a ref to the cache object we're now linked to as a source
1423 	vm_cache_acquire_ref(lowerCacheRef, true);
1424 
1425 	// We now need to remap all pages from the area read-only, so that
1426 	// a copy will be created on next write access
1427 
1428 	map = &area->aspace->translation_map;
1429 	map->ops->lock(map);
1430 	map->ops->unmap(map, area->base, area->base - 1 + area->size);
1431 
1432 	for (page = lowerCache->page_list; page; page = page->cache_next) {
1433 		map->ops->map(map, area->base + (page->offset - area->cache_offset),
1434 			page->ppn * B_PAGE_SIZE, protection);
1435 	}
1436 
1437 	map->ops->unlock(map);
1438 
1439 	mutex_unlock(&lowerCacheRef->lock);
1440 	mutex_unlock(&upperCacheRef->lock);
1441 
1442 	return B_OK;
1443 
1444 err2:
1445 	free(upperCache);
1446 err1:
1447 	store->ops->destroy(store);
1448 	return status;
1449 }
1450 
1451 
1452 area_id
1453 vm_copy_area(aspace_id addressSpaceID, const char *name, void **_address, uint32 addressSpec,
1454 	uint32 protection, area_id sourceID)
1455 {
1456 	vm_address_space *addressSpace;
1457 	vm_cache_ref *cacheRef;
1458 	vm_area *target, *source;
1459 	status_t status;
1460 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
1461 
1462 	if ((protection & B_KERNEL_PROTECTION) == 0) {
1463 		// set the same protection for the kernel as for userland
1464 		protection |= B_KERNEL_READ_AREA;
1465 		if (writableCopy)
1466 			protection |= B_KERNEL_WRITE_AREA;
1467 	}
1468 
1469 	if ((source = vm_get_area(sourceID)) == NULL)
1470 		return B_BAD_VALUE;
1471 
1472 	addressSpace = vm_get_aspace_by_id(addressSpaceID);
1473 	cacheRef = source->cache_ref;
1474 
1475 	if (addressSpec == B_CLONE_ADDRESS) {
1476 		addressSpec = B_EXACT_ADDRESS;
1477 		*_address = (void *)source->base;
1478 	}
1479 
1480 	// First, create a cache on top of the source area
1481 
1482 	status = map_backing_store(addressSpace, cacheRef->cache->store, _address,
1483 		source->cache_offset, source->size, addressSpec, source->wiring, protection,
1484 		writableCopy ? REGION_PRIVATE_MAP : REGION_NO_PRIVATE_MAP,
1485 		&target, name);
1486 
1487 	if (status < B_OK)
1488 		goto err;
1489 
1490 	// If the source area is writable, we need to move it one layer up as well
1491 
1492 	if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
1493 		// ToDo: do something more useful if this fails!
1494 		if (vm_copy_on_write_area(source) < B_OK)
1495 			panic("vm_copy_on_write_area() failed!\n");
1496 	}
1497 
1498 	// we want to return the ID of the newly created area
1499 	status = target->id;
1500 
1501 err:
1502 	vm_put_aspace(addressSpace);
1503 	vm_put_area(source);
1504 
1505 	return status;
1506 }
1507 
1508 
1509 static int32
1510 count_writable_areas(vm_cache_ref *ref, vm_area *ignoreArea)
1511 {
1512 	struct vm_area *area = ref->areas;
1513 	uint32 count = 0;
1514 
1515 	for (; area != NULL; area = area->cache_next) {
1516 		if (area != ignoreArea
1517 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
1518 			count++;
1519 	}
1520 
1521 	return count;
1522 }
1523 
1524 
1525 static status_t
1526 vm_set_area_protection(aspace_id aspaceID, area_id areaID, uint32 newProtection)
1527 {
1528 	vm_cache_ref *cacheRef;
1529 	vm_cache *cache;
1530 	vm_area *area;
1531 	status_t status = B_OK;
1532 
1533 	TRACE(("vm_set_area_protection(aspace = %#lx, area = %#lx, protection = %#lx)\n",
1534 		aspaceID, areaID, newProtection));
1535 
1536 	if (!arch_vm_supports_protection(newProtection))
1537 		return B_NOT_SUPPORTED;
1538 
1539 	area = vm_get_area(areaID);
1540 	if (area == NULL)
1541 		return B_BAD_VALUE;
1542 
1543 	if (aspaceID != vm_get_kernel_aspace_id() && area->aspace->id != aspaceID) {
1544 		// unless you're the kernel, you are only allowed to set
1545 		// the protection of your own areas
1546 		vm_put_area(area);
1547 		return B_NOT_ALLOWED;
1548 	}
1549 
1550 	cacheRef = area->cache_ref;
1551 	cache = cacheRef->cache;
1552 
1553 	mutex_lock(&cacheRef->lock);
1554 
1555 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1556 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
1557 		// change from read/write to read-only
1558 
1559 		if (cache->source != NULL && cache->temporary) {
1560 			if (count_writable_areas(cacheRef, area) == 0) {
1561 				// Since this cache now lives from the pages in its source cache,
1562 				// we can change the cache's commitment to take only those pages
1563 				// into account that really are in this cache.
1564 
1565 				// count existing pages in this cache
1566 				struct vm_page *page = cache->page_list;
1567 				uint32 count = 0;
1568 
1569 				for (; page != NULL; page = page->cache_next) {
1570 					count++;
1571 				}
1572 
1573 				status = cache->store->ops->commit(cache->store, count * B_PAGE_SIZE);
1574 
1575 				// ToDo: we may be able to join with our source cache, if count == 0
1576 			}
1577 		}
1578 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
1579 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
1580 		// change from read-only to read/write
1581 
1582 		// ToDo: if this is a shared cache, insert new cache (we only know about other
1583 		//	areas in this cache yet, though, not about child areas)
1584 		//	-> use this call with care, it might currently have unwanted consequences
1585 		//	   because of this. It should always be safe though, if there are no other
1586 		//	   (child) areas referencing this area's cache (you just might not know).
1587 		if (count_writable_areas(cacheRef, area) == 0
1588 			&& (cacheRef->areas != area || area->cache_next)) {
1589 			// ToDo: child areas are not tested for yet
1590 			dprintf("set_area_protection(): warning, would need to insert a new cache_ref (not yet implemented)!\n");
1591 			status = B_NOT_ALLOWED;
1592 		} else
1593 			dprintf("set_area_protection() may not work correctly yet in this direction!\n");
1594 
1595 		if (status == B_OK && cache->source != NULL && cache->temporary) {
1596 			// the cache's commitment must contain all possible pages
1597 			status = cache->store->ops->commit(cache->store, cache->virtual_size);
1598 		}
1599 	} else {
1600 		// we don't have anything special to do in all other cases
1601 	}
1602 
1603 	if (status == B_OK && area->protection != newProtection) {
1604 		// remap existing pages in this cache
1605 		struct vm_translation_map *map = &area->aspace->translation_map;
1606 
1607 		map->ops->lock(map);
1608 		map->ops->protect(map, area->base, area->base + area->size, newProtection);
1609 		map->ops->unlock(map);
1610 
1611 		area->protection = newProtection;
1612 	}
1613 
1614 	mutex_unlock(&cacheRef->lock);
1615 	vm_put_area(area);
1616 
1617 	return status;
1618 }
1619 
1620 
1621 status_t
1622 vm_get_page_mapping(aspace_id aid, addr_t vaddr, addr_t *paddr)
1623 {
1624 	vm_address_space *aspace;
1625 	uint32 null_flags;
1626 	status_t err;
1627 
1628 	aspace = vm_get_aspace_by_id(aid);
1629 	if (aspace == NULL)
1630 		return B_BAD_TEAM_ID;
1631 
1632 	err = aspace->translation_map.ops->query(&aspace->translation_map,
1633 		vaddr, paddr, &null_flags);
1634 
1635 	vm_put_aspace(aspace);
1636 	return err;
1637 }
1638 
1639 
1640 static int
1641 display_mem(int argc, char **argv)
1642 {
1643 	int32 displayWidth;
1644 	int32 itemSize;
1645 	int32 num = 1;
1646 	addr_t address;
1647 	int i, j;
1648 
1649 	if (argc < 2) {
1650 		kprintf("usage: dw/ds/db <address> [num]\n"
1651 			"\tdw - 4 bytes\n"
1652 			"\tds - 2 bytes\n"
1653 			"\tdb - 1 byte\n");
1654 		return 0;
1655 	}
1656 
1657 	address = strtoul(argv[1], NULL, 0);
1658 
1659 	if (argc >= 3) {
1660 		num = -1;
1661 		num = atoi(argv[2]);
1662 	}
1663 
1664 	// build the format string
1665 	if (strcmp(argv[0], "db") == 0) {
1666 		itemSize = 1;
1667 		displayWidth = 16;
1668 	} else if (strcmp(argv[0], "ds") == 0) {
1669 		itemSize = 2;
1670 		displayWidth = 8;
1671 	} else if (strcmp(argv[0], "dw") == 0) {
1672 		itemSize = 4;
1673 		displayWidth = 4;
1674 	} else {
1675 		kprintf("display_mem called in an invalid way!\n");
1676 		return 0;
1677 	}
1678 
1679 	for (i = 0; i < num; i++) {
1680 		uint32 value;
1681 
1682 		if ((i % displayWidth) == 0) {
1683 			int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
1684 			if (i != 0)
1685 				kprintf("\n");
1686 
1687 			kprintf("[0x%lx]  ", address + i * itemSize);
1688 
1689 			for (j = 0; j < displayed; j++) {
1690 				char c;
1691 				if (user_memcpy(&c, (char *)address + i * itemSize + j, 1) != B_OK) {
1692 					displayed = j;
1693 					break;
1694 				}
1695 				if (!isalnum(c))
1696 					c = '.';
1697 
1698 				kprintf("%c", c);
1699 			}
1700 			if (num > displayWidth) {
1701 				// make sure the spacing in the last line is correct
1702 				for (j = displayed; j < displayWidth * itemSize; j++)
1703 					kprintf(" ");
1704 			}
1705 			kprintf("  ");
1706 		}
1707 
1708 		if (user_memcpy(&value, (uint8 *)address + i * itemSize, itemSize) != B_OK) {
1709 			kprintf("read fault");
1710 			break;
1711 		}
1712 
1713 		switch (itemSize) {
1714 			case 1:
1715 				kprintf(" 0x%02x", *(uint8 *)&value);
1716 				break;
1717 			case 2:
1718 				kprintf(" 0x%04x", *(uint16 *)&value);
1719 				break;
1720 			case 4:
1721 				kprintf(" 0x%08lx", *(uint32 *)&value);
1722 				break;
1723 		}
1724 	}
1725 
1726 	kprintf("\n");
1727 	return 0;
1728 }
1729 
1730 
1731 static int
1732 dump_cache_ref(int argc, char **argv)
1733 {
1734 	addr_t address;
1735 	vm_area *area;
1736 	vm_cache_ref *cache_ref;
1737 
1738 	if (argc < 2) {
1739 		kprintf("cache_ref: not enough arguments\n");
1740 		return 0;
1741 	}
1742 	if (strlen(argv[1]) < 2 || argv[1][0] != '0' || argv[1][1] != 'x') {
1743 		kprintf("cache_ref: invalid argument, pass address\n");
1744 		return 0;
1745 	}
1746 
1747 	address = atoul(argv[1]);
1748 	cache_ref = (vm_cache_ref *)address;
1749 
1750 	kprintf("cache_ref at %p:\n", cache_ref);
1751 	kprintf("cache: %p\n", cache_ref->cache);
1752 	kprintf("lock.holder: %ld\n", cache_ref->lock.holder);
1753 	kprintf("lock.sem: 0x%lx\n", cache_ref->lock.sem);
1754 	kprintf("areas:\n");
1755 	for (area = cache_ref->areas; area != NULL; area = area->cache_next) {
1756 		kprintf(" area 0x%lx: ", area->id);
1757 		kprintf("base_addr = 0x%lx ", area->base);
1758 		kprintf("size = 0x%lx ", area->size);
1759 		kprintf("name = '%s' ", area->name);
1760 		kprintf("protection = 0x%lx\n", area->protection);
1761 	}
1762 	kprintf("ref_count: %ld\n", cache_ref->ref_count);
1763 	return 0;
1764 }
1765 
1766 
1767 static const char *
1768 page_state_to_text(int state)
1769 {
1770 	switch(state) {
1771 		case PAGE_STATE_ACTIVE:
1772 			return "active";
1773 		case PAGE_STATE_INACTIVE:
1774 			return "inactive";
1775 		case PAGE_STATE_BUSY:
1776 			return "busy";
1777 		case PAGE_STATE_MODIFIED:
1778 			return "modified";
1779 		case PAGE_STATE_FREE:
1780 			return "free";
1781 		case PAGE_STATE_CLEAR:
1782 			return "clear";
1783 		case PAGE_STATE_WIRED:
1784 			return "wired";
1785 		case PAGE_STATE_UNUSED:
1786 			return "unused";
1787 		default:
1788 			return "unknown";
1789 	}
1790 }
1791 
1792 
1793 static int
1794 dump_cache(int argc, char **argv)
1795 {
1796 	addr_t address;
1797 	vm_cache *cache;
1798 	vm_page *page;
1799 
1800 	if (argc < 2) {
1801 		kprintf("cache: not enough arguments\n");
1802 		return 0;
1803 	}
1804 	if (strlen(argv[1]) < 2 || argv[1][0] != '0' || argv[1][1] != 'x') {
1805 		kprintf("cache: invalid argument, pass address\n");
1806 		return 0;
1807 	}
1808 
1809 	address = atoul(argv[1]);
1810 	cache = (vm_cache *)address;
1811 
1812 	kprintf("cache at %p:\n", cache);
1813 	kprintf("cache_ref: %p\n", cache->ref);
1814 	kprintf("source: %p\n", cache->source);
1815 	kprintf("store: %p\n", cache->store);
1816 	kprintf("virtual_size: 0x%Lx\n", cache->virtual_size);
1817 	kprintf("temporary: %ld\n", cache->temporary);
1818 	kprintf("scan_skip: %ld\n", cache->scan_skip);
1819 	kprintf("page_list:\n");
1820 	for (page = cache->page_list; page != NULL; page = page->cache_next) {
1821 		if (page->type == PAGE_TYPE_PHYSICAL) {
1822 			kprintf(" %p ppn 0x%lx offset 0x%Lx type %ld state %ld (%s) ref_count %ld\n",
1823 				page, page->ppn, page->offset, page->type, page->state,
1824 				page_state_to_text(page->state), page->ref_count);
1825 		} else if(page->type == PAGE_TYPE_DUMMY) {
1826 			kprintf(" %p DUMMY PAGE state %ld (%s)\n",
1827 				page, page->state, page_state_to_text(page->state));
1828 		} else
1829 			kprintf(" %p UNKNOWN PAGE type %ld\n", page, page->type);
1830 	}
1831 	return 0;
1832 }
1833 
1834 
1835 static void
1836 _dump_area(vm_area *area)
1837 {
1838 	kprintf("dump of area at %p:\n", area);
1839 	kprintf("name: '%s'\n", area->name);
1840 	kprintf("id: 0x%lx\n", area->id);
1841 	kprintf("base: 0x%lx\n", area->base);
1842 	kprintf("size: 0x%lx\n", area->size);
1843 	kprintf("protection: 0x%lx\n", area->protection);
1844 	kprintf("wiring: 0x%lx\n", area->wiring);
1845 	kprintf("ref_count: %ld\n", area->ref_count);
1846 	kprintf("cache_ref: %p\n", area->cache_ref);
1847 	kprintf("cache_offset: 0x%Lx\n", area->cache_offset);
1848 	kprintf("cache_next: %p\n", area->cache_next);
1849 	kprintf("cache_prev: %p\n", area->cache_prev);
1850 }
1851 
1852 
1853 static int
1854 dump_area(int argc, char **argv)
1855 {
1856 	bool found = false;
1857 	vm_area *area;
1858 	addr_t num;
1859 
1860 	if (argc < 2) {
1861 		kprintf("usage: area <id|address|name>\n");
1862 		return 0;
1863 	}
1864 
1865 	num = strtoul(argv[1], NULL, 0);
1866 
1867 	// walk through the area list, looking for the arguments as a name
1868 	struct hash_iterator iter;
1869 
1870 	hash_open(sAreaHash, &iter);
1871 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
1872 		if ((area->name != NULL && !strcmp(argv[1], area->name))
1873 			|| num != 0
1874 				&& ((addr_t)area->id == num
1875 					|| area->base <= num && area->base + area->size > num)) {
1876 			_dump_area(area);
1877 			found = true;
1878 		}
1879 	}
1880 
1881 	if (!found)
1882 		kprintf("could not find area %s (%ld)\n", argv[1], num);
1883 	return 0;
1884 }
1885 
1886 
1887 static int
1888 dump_area_list(int argc, char **argv)
1889 {
1890 	vm_area *area;
1891 	struct hash_iterator iter;
1892 
1893 	kprintf("addr\t      id  base\t\tsize\t\tprotect\tlock\tname\n");
1894 
1895 	hash_open(sAreaHash, &iter);
1896 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
1897 		kprintf("%p %5lx  %p\t%p\t%ld\t%ld\t%s\n", area, area->id, (void *)area->base,
1898 			(void *)area->size, area->protection, area->wiring, area->name);
1899 	}
1900 	hash_close(sAreaHash, &iter, false);
1901 	return 0;
1902 }
1903 
1904 
1905 status_t
1906 vm_delete_areas(struct vm_address_space *aspace)
1907 {
1908 	vm_area *area;
1909 	vm_area *next, *last = NULL;
1910 
1911 	TRACE(("vm_delete_areas: called on aspace 0x%lx\n", aspace->id));
1912 
1913 	acquire_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0, 0);
1914 
1915 	// remove all reserved areas in this address space
1916 
1917 	for (area = aspace->virtual_map.areas; area; area = next) {
1918 		next = area->aspace_next;
1919 
1920 		if (area->id == RESERVED_AREA_ID) {
1921 			// just remove it
1922 			if (last)
1923 				last->aspace_next = area->aspace_next;
1924 			else
1925 				aspace->virtual_map.areas = area->aspace_next;
1926 
1927 			free(area);
1928 			continue;
1929 		}
1930 
1931 		last = area;
1932 	}
1933 
1934 	// delete all the areas in this aspace
1935 
1936 	for (area = aspace->virtual_map.areas; area; area = next) {
1937 		next = area->aspace_next;
1938 
1939 		// decrement the ref on this area, may actually push the ref < 0, if there
1940 		// is a concurrent delete_area() on that specific area, but that's ok here
1941 		if (!_vm_put_area(area, true))
1942 			dprintf("vm_delete_areas() did not delete area %p\n", area);
1943 	}
1944 
1945 	release_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0);
1946 
1947 	return B_OK;
1948 }
1949 
1950 
1951 static area_id
1952 vm_area_for(aspace_id aid, addr_t address)
1953 {
1954 	vm_address_space *addressSpace;
1955 	area_id id = B_ERROR;
1956 	vm_area *area;
1957 
1958 	addressSpace = vm_get_aspace_by_id(aid);
1959 	if (addressSpace == NULL)
1960 		return B_BAD_TEAM_ID;
1961 
1962 	acquire_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0, 0);
1963 
1964 	area = addressSpace->virtual_map.areas;
1965 	for (; area != NULL; area = area->aspace_next) {
1966 		// ignore reserved space regions
1967 		if (area->id == RESERVED_AREA_ID)
1968 			continue;
1969 
1970 		if (address >= area->base && address < area->base + area->size) {
1971 			id = area->id;
1972 			break;
1973 		}
1974 	}
1975 
1976 	release_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0);
1977 	vm_put_aspace(addressSpace);
1978 
1979 	return id;
1980 }
1981 
1982 
1983 static void
1984 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
1985 {
1986 	// free all physical pages in the specified range
1987 
1988 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
1989 		addr_t physicalAddress;
1990 		uint32 flags;
1991 
1992 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
1993 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
1994 			if (page != NULL)
1995 				vm_page_set_state(page, PAGE_STATE_FREE);
1996 		}
1997 	}
1998 
1999 	// unmap the memory
2000 	map->ops->unmap(map, start, end - 1);
2001 }
2002 
2003 
2004 void
2005 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
2006 {
2007 	vm_translation_map *map = &kernel_aspace->translation_map;
2008 	addr_t end = start + size;
2009 	addr_t lastEnd = start;
2010 	vm_area *area;
2011 
2012 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
2013 
2014 	// The areas are sorted in virtual address space order, so
2015 	// we just have to find the holes between them that fall
2016 	// into the area we should dispose
2017 
2018 	map->ops->lock(map);
2019 
2020 	for (area = kernel_aspace->virtual_map.areas; area; area = area->aspace_next) {
2021 		addr_t areaStart = area->base;
2022 		addr_t areaEnd = areaStart + area->size;
2023 
2024 		if (area->id == RESERVED_AREA_ID)
2025 			continue;
2026 
2027 		if (areaEnd >= end) {
2028 			// we are done, the areas are already beyond of what we have to free
2029 			lastEnd = end;
2030 			break;
2031 		}
2032 
2033 		if (areaStart > lastEnd) {
2034 			// this is something we can free
2035 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
2036 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
2037 		}
2038 
2039 		lastEnd = areaEnd;
2040 	}
2041 
2042 	if (lastEnd < end) {
2043 		// we can also get rid of some space at the end of the area
2044 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
2045 		unmap_and_free_physical_pages(map, lastEnd, end);
2046 	}
2047 
2048 	map->ops->unlock(map);
2049 }
2050 
2051 
2052 static void
2053 create_preloaded_image_areas(struct preloaded_image *image)
2054 {
2055 	char name[B_OS_NAME_LENGTH];
2056 	void *address;
2057 	int32 length;
2058 
2059 	// use file name to create a good area name
2060 	char *fileName = strrchr(image->name, '/');
2061 	if (fileName == NULL)
2062 		fileName = image->name;
2063 	else
2064 		fileName++;
2065 
2066 	length = strlen(fileName);
2067 	// make sure there is enough space for the suffix
2068 	if (length > 25)
2069 		length = 25;
2070 
2071 	memcpy(name, fileName, length);
2072 	strcpy(name + length, "_text");
2073 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
2074 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2075 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
2076 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2077 
2078 	strcpy(name + length, "_data");
2079 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
2080 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2081 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
2082 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2083 }
2084 
2085 
2086 /**	Frees all previously kernel arguments areas from the kernel_args structure.
2087  *	Any boot loader resources contained in that arguments must not be accessed
2088  *	anymore past this point.
2089  */
2090 
2091 void
2092 vm_free_kernel_args(kernel_args *args)
2093 {
2094 	uint32 i;
2095 
2096 	TRACE(("vm_free_kernel_args()\n"));
2097 
2098 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2099 		area_id area = area_for((void *)args->kernel_args_range[i].start);
2100 		if (area >= B_OK)
2101 			delete_area(area);
2102 	}
2103 }
2104 
2105 
2106 static void
2107 allocate_kernel_args(kernel_args *args)
2108 {
2109 	uint32 i;
2110 
2111 	TRACE(("allocate_kernel_args()\n"));
2112 
2113 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2114 		void *address = (void *)args->kernel_args_range[i].start;
2115 
2116 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
2117 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2118 	}
2119 }
2120 
2121 
2122 static void
2123 unreserve_boot_loader_ranges(kernel_args *args)
2124 {
2125 	uint32 i;
2126 
2127 	TRACE(("unreserve_boot_loader_ranges()\n"));
2128 
2129 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2130 		vm_unreserve_address_range(vm_get_kernel_aspace_id(),
2131 			(void *)args->virtual_allocated_range[i].start,
2132 			args->virtual_allocated_range[i].size);
2133 	}
2134 }
2135 
2136 
2137 static void
2138 reserve_boot_loader_ranges(kernel_args *args)
2139 {
2140 	uint32 i;
2141 
2142 	TRACE(("reserve_boot_loader_ranges()\n"));
2143 
2144 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2145 		void *address = (void *)args->virtual_allocated_range[i].start;
2146 		status_t status = vm_reserve_address_range(vm_get_kernel_aspace_id(), &address,
2147 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
2148 		if (status < B_OK)
2149 			panic("could not reserve boot loader ranges\n");
2150 	}
2151 }
2152 
2153 
2154 status_t
2155 vm_init(kernel_args *args)
2156 {
2157 	struct preloaded_image *image;
2158 	addr_t heap_base;
2159 	void *address;
2160 	status_t err = 0;
2161 	uint32 i;
2162 
2163 	TRACE(("vm_init: entry\n"));
2164 	err = arch_vm_translation_map_init(args);
2165 	err = arch_vm_init(args);
2166 
2167 	// initialize some globals
2168 	sNextAreaID = 1;
2169 	sAreaHashLock = -1;
2170 
2171 	// map in the new heap and initialize it
2172 	heap_base = vm_alloc_from_kernel_args(args, HEAP_SIZE, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2173 	TRACE(("heap at 0x%lx\n", heap_base));
2174 	heap_init(heap_base);
2175 
2176 	// initialize the free page list and physical page mapper
2177 	vm_page_init(args);
2178 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
2179 
2180 	// initialize the hash table that stores the pages mapped to caches
2181 	vm_cache_init(args);
2182 
2183 	{
2184 		vm_area *area;
2185 		sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area,
2186 			&area_compare, &area_hash);
2187 		if (sAreaHash == NULL)
2188 			panic("vm_init: error creating aspace hash table\n");
2189 	}
2190 
2191 	vm_aspace_init();
2192 	reserve_boot_loader_ranges(args);
2193 
2194 	// do any further initialization that the architecture dependant layers may need now
2195 	arch_vm_translation_map_init_post_area(args);
2196 	arch_vm_init_post_area(args);
2197 	vm_page_init_post_area(args);
2198 
2199 	// allocate areas to represent stuff that already exists
2200 
2201 	address = (void *)ROUNDOWN(heap_base, B_PAGE_SIZE);
2202 	create_area("kernel heap", &address, B_EXACT_ADDRESS, HEAP_SIZE,
2203 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2204 
2205 	allocate_kernel_args(args);
2206 
2207 	args->kernel_image.name = "kernel";
2208 		// the lazy boot loader currently doesn't set the kernel's name...
2209 	create_preloaded_image_areas(&args->kernel_image);
2210 
2211 	// allocate areas for preloaded images
2212 	for (image = args->preloaded_images; image != NULL; image = image->next) {
2213 		create_preloaded_image_areas(image);
2214 	}
2215 
2216 	// allocate kernel stacks
2217 	for (i = 0; i < args->num_cpus; i++) {
2218 		char name[64];
2219 
2220 		sprintf(name, "idle thread %lu kstack", i + 1);
2221 		address = (void *)args->cpu_kstack[i].start;
2222 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
2223 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2224 	}
2225 	{
2226 		void *null;
2227 		vm_map_physical_memory(vm_get_kernel_aspace_id(), "bootdir", &null, B_ANY_KERNEL_ADDRESS,
2228 			args->bootdir_addr.size, B_KERNEL_READ_AREA, args->bootdir_addr.start);
2229 	}
2230 
2231 	// add some debugger commands
2232 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
2233 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
2234 	add_debugger_command("cache_ref", &dump_cache_ref, "Dump cache_ref data structure");
2235 	add_debugger_command("cache", &dump_cache, "Dump cache_ref data structure");
2236 //	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
2237 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
2238 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
2239 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
2240 
2241 	TRACE(("vm_init: exit\n"));
2242 
2243 	return err;
2244 }
2245 
2246 
2247 status_t
2248 vm_init_post_sem(kernel_args *args)
2249 {
2250 	vm_area *area;
2251 
2252 	// This frees all unused boot loader resources and makes its space available again
2253 	arch_vm_init_end(args);
2254 	unreserve_boot_loader_ranges(args);
2255 
2256 	// fill in all of the semaphores that were not allocated before
2257 	// since we're still single threaded and only the kernel address space exists,
2258 	// it isn't that hard to find all of the ones we need to create
2259 
2260 	benaphore_init(&sAvailableMemoryLock, "available memory lock");
2261 	arch_vm_translation_map_init_post_sem(args);
2262 	vm_aspace_init_post_sem();
2263 
2264 	for (area = kernel_aspace->virtual_map.areas; area; area = area->aspace_next) {
2265 		if (area->id == RESERVED_AREA_ID)
2266 			continue;
2267 
2268 		if (area->cache_ref->lock.sem < 0)
2269 			mutex_init(&area->cache_ref->lock, "cache_ref_mutex");
2270 	}
2271 
2272 	sAreaHashLock = create_sem(WRITE_COUNT, "area hash");
2273 
2274 	return heap_init_post_sem(args);
2275 }
2276 
2277 
2278 status_t
2279 vm_init_post_thread(kernel_args *args)
2280 {
2281 	vm_page_init_post_thread(args);
2282 	vm_daemon_init();
2283 	vm_low_memory_init();
2284 
2285 	return heap_init_post_thread(args);
2286 }
2287 
2288 
2289 void
2290 permit_page_faults(void)
2291 {
2292 	struct thread *thread = thread_get_current_thread();
2293 	if (thread != NULL)
2294 		atomic_add(&thread->page_faults_allowed, 1);
2295 }
2296 
2297 
2298 void
2299 forbid_page_faults(void)
2300 {
2301 	struct thread *thread = thread_get_current_thread();
2302 	if (thread != NULL)
2303 		atomic_add(&thread->page_faults_allowed, -1);
2304 }
2305 
2306 
2307 status_t
2308 vm_page_fault(addr_t address, addr_t fault_address, bool is_write, bool is_user, addr_t *newip)
2309 {
2310 	int err;
2311 
2312 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, fault_address));
2313 
2314 	*newip = 0;
2315 
2316 	err = vm_soft_fault(address, is_write, is_user);
2317 	if (err < 0) {
2318 		dprintf("vm_page_fault: vm_soft_fault returned error %d on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
2319 			err, address, fault_address, is_write, is_user, thread_get_current_thread_id());
2320 		if (!is_user) {
2321 			struct thread *t = thread_get_current_thread();
2322 			if (t && t->fault_handler != 0) {
2323 				// this will cause the arch dependant page fault handler to
2324 				// modify the IP on the interrupt frame or whatever to return
2325 				// to this address
2326 				*newip = t->fault_handler;
2327 			} else {
2328 				// unhandled page fault in the kernel
2329 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
2330 					address, fault_address);
2331 			}
2332 		} else {
2333 #if 1
2334 			// ToDo: remove me once we have proper userland debugging support (and tools)
2335 			vm_address_space *aspace = vm_get_current_user_aspace();
2336 			vm_virtual_map *map = &aspace->virtual_map;
2337 			vm_area *area;
2338 
2339 			acquire_sem_etc(map->sem, READ_COUNT, 0, 0);
2340 			area = vm_virtual_map_lookup(map, fault_address);
2341 
2342 			dprintf("vm_page_fault: sending team 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n",
2343 				thread_get_current_thread()->team->id, fault_address,
2344 				area ? area->name : "???", fault_address - (area ? area->base : 0x0));
2345 
2346 // We can print a stack trace of the userland thread here. Since we're accessing
2347 // user memory freely and unchecked, this is not enabled by default.
2348 #if 0
2349 			if (area) {
2350 				struct stack_frame {
2351 					#ifdef __INTEL__
2352 						struct stack_frame*	previous;
2353 						void*				return_address;
2354 					#else
2355 						// ...
2356 					#endif
2357 				};
2358 				struct iframe *iframe = i386_get_user_iframe();
2359 				struct stack_frame *frame = (struct stack_frame *)iframe->ebp;
2360 
2361 				dprintf("stack trace:\n");
2362 				for (; frame; frame = frame->previous) {
2363 					dprintf("  0x%p", frame->return_address);
2364 					area = vm_virtual_map_lookup(map,
2365 						(addr_t)frame->return_address);
2366 					if (area) {
2367 						dprintf(" (%s + %#lx)", area->name,
2368 							(addr_t)frame->return_address - area->base);
2369 					}
2370 					dprintf("\n");
2371 				}
2372 			}
2373 #endif	// 0 (stack trace)
2374 
2375 			release_sem_etc(map->sem, READ_COUNT, 0);
2376 			vm_put_aspace(aspace);
2377 #endif
2378 			if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV))
2379 				send_signal(team_get_current_team_id(), SIGSEGV);
2380 		}
2381 	}
2382 
2383 	return B_HANDLED_INTERRUPT;
2384 }
2385 
2386 
2387 static status_t
2388 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser)
2389 {
2390 	vm_address_space *aspace;
2391 	vm_virtual_map *map;
2392 	vm_area *area;
2393 	vm_cache_ref *cache_ref;
2394 	vm_cache_ref *last_cache_ref;
2395 	vm_cache_ref *top_cache_ref;
2396 	off_t cache_offset;
2397 	vm_page dummy_page;
2398 	vm_page *page = NULL;
2399 	addr_t address;
2400 	int change_count;
2401 	int err;
2402 
2403 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
2404 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
2405 
2406 	address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
2407 
2408 	if (IS_KERNEL_ADDRESS(address)) {
2409 		aspace = vm_get_kernel_aspace();
2410 	} else if (IS_USER_ADDRESS(address)) {
2411 		aspace = vm_get_current_user_aspace();
2412 		if (aspace == NULL) {
2413 			if (isUser == false) {
2414 				dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n");
2415 				return B_BAD_ADDRESS;
2416 			} else {
2417 				// XXX weird state.
2418 				panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n");
2419 			}
2420 		}
2421 	} else {
2422 		// the hit was probably in the 64k DMZ between kernel and user space
2423 		// this keeps a user space thread from passing a buffer that crosses into kernel space
2424 		return B_BAD_ADDRESS;
2425 	}
2426 	map = &aspace->virtual_map;
2427 	atomic_add(&aspace->fault_count, 1);
2428 
2429 	// Get the area the fault was in
2430 
2431 	acquire_sem_etc(map->sem, READ_COUNT, 0, 0);
2432 	area = vm_virtual_map_lookup(map, address);
2433 	if (area == NULL) {
2434 		release_sem_etc(map->sem, READ_COUNT, 0);
2435 		vm_put_aspace(aspace);
2436 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n", originalAddress);
2437 		return B_BAD_ADDRESS;
2438 	}
2439 
2440 	// check permissions
2441 	if (isUser && (area->protection & B_USER_PROTECTION) == 0) {
2442 		release_sem_etc(map->sem, READ_COUNT, 0);
2443 		vm_put_aspace(aspace);
2444 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
2445 		return B_PERMISSION_DENIED;
2446 	}
2447 	if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
2448 		release_sem_etc(map->sem, READ_COUNT, 0);
2449 		vm_put_aspace(aspace);
2450 		dprintf("write access attempted on read-only area 0x%lx at %p\n", area->id, (void *)originalAddress);
2451 		return B_PERMISSION_DENIED;
2452 	}
2453 
2454 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
2455 	// At first, the top most cache from the area is investigated
2456 
2457 	top_cache_ref = area->cache_ref;
2458 	cache_offset = address - area->base + area->cache_offset;
2459 	vm_cache_acquire_ref(top_cache_ref, true);
2460 	change_count = map->change_count;
2461 	release_sem_etc(map->sem, READ_COUNT, 0);
2462 
2463 	// See if this cache has a fault handler - this will do all the work for us
2464 	if (top_cache_ref->cache->store->ops->fault != NULL) {
2465 		// Note, since the page fault is resolved with interrupts enabled, the
2466 		// fault handler could be called more than once for the same reason -
2467 		// the store must take this into account
2468 		status_t status = (*top_cache_ref->cache->store->ops->fault)(top_cache_ref->cache->store, aspace, cache_offset);
2469 		if (status != B_BAD_HANDLER) {
2470 			vm_cache_release_ref(top_cache_ref);
2471 			vm_put_aspace(aspace);
2472 			return status;
2473 		}
2474 	}
2475 
2476 	// The top most cache has no fault handler, so let's see if the cache or its sources
2477 	// already have the page we're searching for (we're going from top to bottom)
2478 
2479 	dummy_page.state = PAGE_STATE_INACTIVE;
2480 	dummy_page.type = PAGE_TYPE_DUMMY;
2481 
2482 	last_cache_ref = top_cache_ref;
2483 	for (cache_ref = top_cache_ref; cache_ref; cache_ref = (cache_ref->cache->source) ? cache_ref->cache->source->ref : NULL) {
2484 		mutex_lock(&cache_ref->lock);
2485 
2486 		for (;;) {
2487 			page = vm_cache_lookup_page(cache_ref, cache_offset);
2488 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
2489 				vm_page_set_state(page, PAGE_STATE_BUSY);
2490 				mutex_unlock(&cache_ref->lock);
2491 				break;
2492 			}
2493 
2494 			if (page == NULL)
2495 				break;
2496 
2497 			// page must be busy
2498 			// ToDo: don't wait forever!
2499 			mutex_unlock(&cache_ref->lock);
2500 			snooze(20000);
2501 			mutex_lock(&cache_ref->lock);
2502 		}
2503 
2504 		if (page != NULL)
2505 			break;
2506 
2507 		// The current cache does not contain the page we're looking for
2508 
2509 		// If we're at the top most cache, insert the dummy page here to keep other threads
2510 		// from faulting on the same address and chasing us up the cache chain
2511 		if (cache_ref == top_cache_ref) {
2512 			dummy_page.state = PAGE_STATE_BUSY;
2513 			vm_cache_insert_page(cache_ref, &dummy_page, cache_offset);
2514 		}
2515 
2516 		// see if the vm_store has it
2517 		if (cache_ref->cache->store->ops->has_page != NULL
2518 			&& cache_ref->cache->store->ops->has_page(cache_ref->cache->store, cache_offset)) {
2519 			size_t bytesRead;
2520 			iovec vec;
2521 
2522 			vec.iov_len = bytesRead = B_PAGE_SIZE;
2523 
2524 			mutex_unlock(&cache_ref->lock);
2525 
2526 			page = vm_page_allocate_page(PAGE_STATE_FREE);
2527 			aspace->translation_map.ops->get_physical_page(page->ppn * B_PAGE_SIZE, (addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT);
2528 			// ToDo: handle errors here
2529 			err = cache_ref->cache->store->ops->read(cache_ref->cache->store, cache_offset, &vec, 1, &bytesRead);
2530 			aspace->translation_map.ops->put_physical_page((addr_t)vec.iov_base);
2531 
2532 			mutex_lock(&cache_ref->lock);
2533 
2534 			if (cache_ref == top_cache_ref) {
2535 				vm_cache_remove_page(cache_ref, &dummy_page);
2536 				dummy_page.state = PAGE_STATE_INACTIVE;
2537 			}
2538 			vm_cache_insert_page(cache_ref, page, cache_offset);
2539 			mutex_unlock(&cache_ref->lock);
2540 			break;
2541 		}
2542 		mutex_unlock(&cache_ref->lock);
2543 		last_cache_ref = cache_ref;
2544 	}
2545 
2546 	if (!cache_ref) {
2547 		// We rolled off the end of the cache chain, so we need to decide which
2548 		// cache will get the new page we're about to create.
2549 
2550 		cache_ref = isWrite ? top_cache_ref : last_cache_ref;
2551 			// Read-only pages come in the deepest cache - only the
2552 			// top most cache may have direct write access.
2553 	}
2554 
2555 	if (page == NULL) {
2556 		// we still haven't found a page, so we allocate a clean one
2557 		page = vm_page_allocate_page(PAGE_STATE_CLEAR);
2558 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->ppn));
2559 
2560 		// Insert the new page into our cache, and replace it with the dummy page if necessary
2561 
2562 		mutex_lock(&cache_ref->lock);
2563 
2564 		// if we inserted a dummy page into this cache, we have to remove it now
2565 		if (dummy_page.state == PAGE_STATE_BUSY && dummy_page.cache == cache_ref->cache) {
2566 			vm_cache_remove_page(cache_ref, &dummy_page);
2567 			dummy_page.state = PAGE_STATE_INACTIVE;
2568 		}
2569 
2570 		vm_cache_insert_page(cache_ref, page, cache_offset);
2571 		mutex_unlock(&cache_ref->lock);
2572 
2573 		if (dummy_page.state == PAGE_STATE_BUSY) {
2574 			// we had inserted the dummy cache in another cache, so let's remove it from there
2575 			vm_cache_ref *temp_cache = dummy_page.cache->ref;
2576 			mutex_lock(&temp_cache->lock);
2577 			vm_cache_remove_page(temp_cache, &dummy_page);
2578 			mutex_unlock(&temp_cache->lock);
2579 			dummy_page.state = PAGE_STATE_INACTIVE;
2580 		}
2581 	}
2582 
2583 	// We now have the page and a cache it belongs to - we now need to make
2584 	// sure that the area's cache can access it, too, and sees the correct data
2585 
2586 	if (page->cache != top_cache_ref->cache && isWrite) {
2587 		// now we have a page that has the data we want, but in the wrong cache object
2588 		// so we need to copy it and stick it into the top cache
2589 		vm_page *src_page = page;
2590 		void *src, *dest;
2591 
2592 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
2593 		page = vm_page_allocate_page(PAGE_STATE_FREE);
2594 
2595 		// try to get a mapping for the src and dest page so we can copy it
2596 		for (;;) {
2597 			(*aspace->translation_map.ops->get_physical_page)(src_page->ppn * B_PAGE_SIZE, (addr_t *)&src, PHYSICAL_PAGE_CAN_WAIT);
2598 			err = (*aspace->translation_map.ops->get_physical_page)(page->ppn * B_PAGE_SIZE, (addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT);
2599 			if (err == B_NO_ERROR)
2600 				break;
2601 
2602 			// it couldn't map the second one, so sleep and retry
2603 			// keeps an extremely rare deadlock from occuring
2604 			(*aspace->translation_map.ops->put_physical_page)((addr_t)src);
2605 			snooze(5000);
2606 		}
2607 
2608 		memcpy(dest, src, B_PAGE_SIZE);
2609 		(*aspace->translation_map.ops->put_physical_page)((addr_t)src);
2610 		(*aspace->translation_map.ops->put_physical_page)((addr_t)dest);
2611 
2612 		vm_page_set_state(src_page, PAGE_STATE_ACTIVE);
2613 
2614 		mutex_lock(&top_cache_ref->lock);
2615 
2616 		// Insert the new page into our cache, and replace it with the dummy page if necessary
2617 
2618 		// if we inserted a dummy page into this cache, we have to remove it now
2619 		if (dummy_page.state == PAGE_STATE_BUSY && dummy_page.cache == top_cache_ref->cache) {
2620 			vm_cache_remove_page(top_cache_ref, &dummy_page);
2621 			dummy_page.state = PAGE_STATE_INACTIVE;
2622 		}
2623 
2624 		vm_cache_insert_page(top_cache_ref, page, cache_offset);
2625 		mutex_unlock(&top_cache_ref->lock);
2626 
2627 		if (dummy_page.state == PAGE_STATE_BUSY) {
2628 			// we had inserted the dummy cache in another cache, so let's remove it from there
2629 			vm_cache_ref *temp_cache = dummy_page.cache->ref;
2630 			mutex_lock(&temp_cache->lock);
2631 			vm_cache_remove_page(temp_cache, &dummy_page);
2632 			mutex_unlock(&temp_cache->lock);
2633 			dummy_page.state = PAGE_STATE_INACTIVE;
2634 		}
2635 	}
2636 
2637 	err = B_OK;
2638 	acquire_sem_etc(map->sem, READ_COUNT, 0, 0);
2639 	if (change_count != map->change_count) {
2640 		// something may have changed, see if the address is still valid
2641 		area = vm_virtual_map_lookup(map, address);
2642 		if (area == NULL
2643 			|| area->cache_ref != top_cache_ref
2644 			|| (address - area->base + area->cache_offset) != cache_offset) {
2645 			dprintf("vm_soft_fault: address space layout changed effecting ongoing soft fault\n");
2646 			err = B_BAD_ADDRESS;
2647 		}
2648 	}
2649 
2650 	if (err == B_OK) {
2651 		// All went fine, all there is left to do is to map the page into the address space
2652 
2653 		// If the page doesn't reside in the area's cache, we need to make sure it's
2654 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
2655 		uint32 newProtection = area->protection;
2656 		if (page->cache != top_cache_ref->cache && !isWrite)
2657 			newProtection &= ~(isUser ? B_WRITE_AREA : B_KERNEL_WRITE_AREA);
2658 
2659 		atomic_add(&page->ref_count, 1);
2660 		(*aspace->translation_map.ops->lock)(&aspace->translation_map);
2661 		(*aspace->translation_map.ops->map)(&aspace->translation_map, address,
2662 			page->ppn * B_PAGE_SIZE, newProtection);
2663 		(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
2664 	}
2665 
2666 	release_sem_etc(map->sem, READ_COUNT, 0);
2667 
2668 	if (dummy_page.state == PAGE_STATE_BUSY) {
2669 		// We still have the dummy page in the cache - that happens if we didn't need
2670 		// to allocate a new page before, but could use one in another cache
2671 		vm_cache_ref *temp_cache = dummy_page.cache->ref;
2672 		mutex_lock(&temp_cache->lock);
2673 		vm_cache_remove_page(temp_cache, &dummy_page);
2674 		mutex_unlock(&temp_cache->lock);
2675 		dummy_page.state = PAGE_STATE_INACTIVE;
2676 	}
2677 
2678 	vm_page_set_state(page, PAGE_STATE_ACTIVE);
2679 
2680 	vm_cache_release_ref(top_cache_ref);
2681 	vm_put_aspace(aspace);
2682 
2683 	return err;
2684 }
2685 
2686 
2687 static vm_area *
2688 vm_virtual_map_lookup(vm_virtual_map *map, addr_t address)
2689 {
2690 	vm_area *area;
2691 
2692 	// check the areas list first
2693 	area = map->area_hint;
2694 	if (area && area->base <= address && (area->base + area->size) > address)
2695 		return area;
2696 
2697 	for (area = map->areas; area != NULL; area = area->aspace_next) {
2698 		if (area->id == RESERVED_AREA_ID)
2699 			continue;
2700 
2701 		if (area->base <= address && (area->base + area->size) > address)
2702 			break;
2703 	}
2704 
2705 	if (area)
2706 		map->area_hint = area;
2707 	return area;
2708 }
2709 
2710 
2711 status_t
2712 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, int flags)
2713 {
2714 	return (*kernel_aspace->translation_map.ops->get_physical_page)(paddr, _vaddr, flags);
2715 }
2716 
2717 
2718 status_t
2719 vm_put_physical_page(addr_t vaddr)
2720 {
2721 	return (*kernel_aspace->translation_map.ops->put_physical_page)(vaddr);
2722 }
2723 
2724 
2725 void
2726 vm_unreserve_memory(size_t amount)
2727 {
2728 	benaphore_lock(&sAvailableMemoryLock);
2729 
2730 	sAvailableMemory += amount;
2731 
2732 	benaphore_unlock(&sAvailableMemoryLock);
2733 }
2734 
2735 
2736 status_t
2737 vm_try_reserve_memory(size_t amount)
2738 {
2739 	status_t status;
2740 	benaphore_lock(&sAvailableMemoryLock);
2741 
2742 	if (sAvailableMemory > amount) {
2743 		sAvailableMemory -= amount;
2744 		status = B_OK;
2745 	} else
2746 		status = B_NO_MEMORY;
2747 
2748 	benaphore_unlock(&sAvailableMemoryLock);
2749 	return status;
2750 }
2751 
2752 
2753 /**	This function enforces some protection properties:
2754  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
2755  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
2756  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
2757  *	   and B_KERNEL_WRITE_AREA.
2758  */
2759 
2760 static void
2761 fix_protection(uint32 *protection)
2762 {
2763 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
2764 		if ((*protection & B_USER_PROTECTION) == 0
2765 			|| (*protection & B_WRITE_AREA) != 0)
2766 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
2767 		else
2768 			*protection |= B_KERNEL_READ_AREA;
2769 	}
2770 }
2771 
2772 
2773 //	#pragma mark -
2774 
2775 
2776 status_t
2777 user_memcpy(void *to, const void *from, size_t size)
2778 {
2779 	return arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler);
2780 }
2781 
2782 
2783 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
2784  *	the string in \a to, NULL-terminating the result.
2785  *
2786  *	\param to Pointer to the destination C-string.
2787  *	\param from Pointer to the source C-string.
2788  *	\param size Size in bytes of the string buffer pointed to by \a to.
2789  *
2790  *	\return strlen(\a from).
2791  */
2792 
2793 ssize_t
2794 user_strlcpy(char *to, const char *from, size_t size)
2795 {
2796 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
2797 }
2798 
2799 
2800 status_t
2801 user_memset(void *s, char c, size_t count)
2802 {
2803 	return arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler);
2804 }
2805 
2806 
2807 //	#pragma mark -
2808 
2809 
2810 long
2811 lock_memory(void *address, ulong numBytes, ulong flags)
2812 {
2813 	vm_address_space *aspace = NULL;
2814 	struct vm_translation_map *map;
2815 	addr_t base = (addr_t)address;
2816 	addr_t end = base + numBytes;
2817 	bool isUser = IS_USER_ADDRESS(address);
2818 
2819 	// ToDo: Our VM currently doesn't support locking, this function
2820 	//	will now at least make sure that the memory is paged in, but
2821 	//	that's about it.
2822 	//	Nevertheless, it must be implemented as soon as we're able to
2823 	//	swap pages out of memory.
2824 
2825 	// ToDo: this is a hack, too; the iospace area is a null region and
2826 	//	officially cannot be written to or read; ie. vm_soft_fault() will
2827 	//	fail there. Furthermore, this is x86 specific as well.
2828 	#define IOSPACE_SIZE (256 * 1024 * 1024)
2829 	if (base >= KERNEL_BASE + IOSPACE_SIZE && base + numBytes < KERNEL_BASE + 2 * IOSPACE_SIZE)
2830 		return B_OK;
2831 
2832 	if (isUser)
2833 		aspace = vm_get_current_user_aspace();
2834 	else
2835 		aspace = vm_get_kernel_aspace();
2836 	if (aspace == NULL)
2837 		return B_ERROR;
2838 
2839 	map = &aspace->translation_map;
2840 
2841 	for (; base < end; base += B_PAGE_SIZE) {
2842 		addr_t physicalAddress;
2843 		uint32 protection;
2844 		status_t status;
2845 
2846 		map->ops->lock(map);
2847 		map->ops->query(map, base, &physicalAddress, &protection);
2848 		map->ops->unlock(map);
2849 
2850 		if ((protection & PAGE_PRESENT) != 0) {
2851 			// if B_READ_DEVICE is set, the caller intents to write to the locked
2852 			// memory, so if it hasn't been mapped writable, we'll try the soft
2853 			// fault anyway
2854 			if ((flags & B_READ_DEVICE) == 0
2855 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2856 			continue;
2857 		}
2858 
2859 		status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser);
2860 		if (status != B_OK)	{
2861 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
2862 				address, numBytes, flags, strerror(status));
2863 			vm_put_aspace(aspace);
2864 			return status;
2865 		}
2866 	}
2867 
2868 	vm_put_aspace(aspace);
2869 	return B_OK;
2870 }
2871 
2872 
2873 long
2874 unlock_memory(void *buffer, ulong numBytes, ulong flags)
2875 {
2876 	return B_OK;
2877 }
2878 
2879 
2880 /** According to the BeBook, this function should always succeed.
2881  *	This is no longer the case.
2882  */
2883 
2884 long
2885 get_memory_map(const void *address, ulong numBytes, physical_entry *table, long numEntries)
2886 {
2887 	vm_address_space *addressSpace;
2888 	addr_t virtualAddress = (addr_t)address;
2889 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
2890 	addr_t physicalAddress;
2891 	status_t status = B_OK;
2892 	int32 index = -1;
2893 	addr_t offset = 0;
2894 	uint32 flags;
2895 
2896 	TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes, numEntries));
2897 
2898 	if (numEntries == 0 || numBytes == 0)
2899 		return B_BAD_VALUE;
2900 
2901 	// in which address space is the address to be found?
2902 	if (IS_USER_ADDRESS(virtualAddress))
2903 		addressSpace = vm_get_current_user_aspace();
2904 	else
2905 		addressSpace = vm_get_kernel_aspace();
2906 
2907 	if (addressSpace == NULL)
2908 		return B_ERROR;
2909 
2910 	(*addressSpace->translation_map.ops->lock)(&addressSpace->translation_map);
2911 
2912 	while (offset < numBytes) {
2913 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
2914 
2915 		status = (*addressSpace->translation_map.ops->query)(&addressSpace->translation_map,
2916 					(addr_t)address + offset, &physicalAddress, &flags);
2917 		if (status < 0)
2918 			break;
2919 
2920 		if (index < 0 && pageOffset > 0) {
2921 			physicalAddress += pageOffset;
2922 			if (bytes > B_PAGE_SIZE - pageOffset)
2923 				bytes = B_PAGE_SIZE - pageOffset;
2924 		}
2925 
2926 		// need to switch to the next physical_entry?
2927 		if (index < 0 || (addr_t)table[index].address != physicalAddress - table[index].size) {
2928 			if (++index + 1 > numEntries) {
2929 				// table to small
2930 				status = B_BUFFER_OVERFLOW;
2931 				break;
2932 			}
2933 			table[index].address = (void *)physicalAddress;
2934 			table[index].size = bytes;
2935 		} else {
2936 			// page does fit in current entry
2937 			table[index].size += bytes;
2938 		}
2939 
2940 		offset += bytes;
2941 	}
2942 	(*addressSpace->translation_map.ops->unlock)(&addressSpace->translation_map);
2943 
2944 	// close the entry list
2945 
2946 	if (status == B_OK) {
2947 		// if it's only one entry, we will silently accept the missing ending
2948 		if (numEntries == 1)
2949 			return B_OK;
2950 
2951 		if (++index + 1 > numEntries)
2952 			return B_BUFFER_OVERFLOW;
2953 
2954 		table[index].address = NULL;
2955 		table[index].size = 0;
2956 	}
2957 
2958 	return status;
2959 }
2960 
2961 
2962 area_id
2963 area_for(void *address)
2964 {
2965 	return vm_area_for(vm_get_kernel_aspace_id(), (addr_t)address);
2966 }
2967 
2968 
2969 area_id
2970 find_area(const char *name)
2971 {
2972 	struct hash_iterator iterator;
2973 	vm_area *area;
2974 	area_id id = B_NAME_NOT_FOUND;
2975 
2976 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
2977 	hash_open(sAreaHash, &iterator);
2978 
2979 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
2980 		if (area->id == RESERVED_AREA_ID)
2981 			continue;
2982 
2983 		if (!strcmp(area->name, name)) {
2984 			id = area->id;
2985 			break;
2986 		}
2987 	}
2988 
2989 	hash_close(sAreaHash, &iterator, false);
2990 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
2991 
2992 	return id;
2993 }
2994 
2995 
2996 static void
2997 fill_area_info(struct vm_area *area, area_info *info, size_t size)
2998 {
2999 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
3000 	info->area = area->id;
3001 	info->address = (void *)area->base;
3002 	info->size = area->size;
3003 	info->protection = area->protection & B_USER_PROTECTION;
3004 	info->lock = B_FULL_LOCK;
3005 	info->team = area->aspace->id;
3006 	info->ram_size = area->size;
3007 	info->copy_count = 0;
3008 	info->in_count = 0;
3009 	info->out_count = 0;
3010 		// ToDo: retrieve real values here!
3011 }
3012 
3013 
3014 status_t
3015 _get_area_info(area_id id, area_info *info, size_t size)
3016 {
3017 	vm_area *area;
3018 
3019 	if (size != sizeof(area_info) || info == NULL)
3020 		return B_BAD_VALUE;
3021 
3022 	area = vm_get_area(id);
3023 	if (area == NULL)
3024 		return B_BAD_VALUE;
3025 
3026 	fill_area_info(area, info, size);
3027 	vm_put_area(area);
3028 
3029 	return B_OK;
3030 }
3031 
3032 
3033 status_t
3034 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
3035 {
3036 	addr_t nextBase = *(addr_t *)cookie;
3037 	vm_address_space *addressSpace;
3038 	vm_area *area;
3039 
3040 	// we're already through the list
3041 	if (nextBase == (addr_t)-1)
3042 		return B_ENTRY_NOT_FOUND;
3043 
3044 	if (team == B_CURRENT_TEAM)
3045 		team = team_get_current_team_id();
3046 
3047 	if (!team_is_valid(team)
3048 		|| team_get_address_space(team, &addressSpace) != B_OK)
3049 		return B_BAD_VALUE;
3050 
3051 	acquire_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0, 0);
3052 
3053 	for (area = addressSpace->virtual_map.areas; area; area = area->aspace_next) {
3054 		if (area->id == RESERVED_AREA_ID)
3055 			continue;
3056 
3057 		if (area->base > nextBase)
3058 			break;
3059 	}
3060 
3061 	// make sure this area won't go away
3062 	if (area != NULL)
3063 		area = vm_get_area(area->id);
3064 
3065 	release_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0);
3066 	vm_put_aspace(addressSpace);
3067 
3068 	if (area == NULL) {
3069 		nextBase = (addr_t)-1;
3070 		return B_ENTRY_NOT_FOUND;
3071 	}
3072 
3073 	fill_area_info(area, info, size);
3074 	*cookie = (int32)(area->base);
3075 
3076 	vm_put_area(area);
3077 
3078 	return B_OK;
3079 }
3080 
3081 
3082 status_t
3083 set_area_protection(area_id area, uint32 newProtection)
3084 {
3085 	fix_protection(&newProtection);
3086 
3087 	return vm_set_area_protection(vm_get_kernel_aspace_id(), area, newProtection);
3088 }
3089 
3090 
3091 status_t
3092 resize_area(area_id areaID, size_t newSize)
3093 {
3094 	vm_cache_ref *cache;
3095 	vm_area *area, *current;
3096 	status_t status = B_OK;
3097 	size_t oldSize;
3098 
3099 	// is newSize a multiple of B_PAGE_SIZE?
3100 	if (newSize & (B_PAGE_SIZE - 1))
3101 		return B_BAD_VALUE;
3102 
3103 	area = vm_get_area(areaID);
3104 	if (area == NULL)
3105 		return B_BAD_VALUE;
3106 
3107 	// Resize all areas of this area's cache
3108 
3109 	cache = area->cache_ref;
3110 	oldSize = area->size;
3111 
3112 	// ToDo: we should only allow to resize anonymous memory areas!
3113 	if (!cache->cache->temporary) {
3114 		status = B_NOT_ALLOWED;
3115 		goto err1;
3116 	}
3117 
3118 	// ToDo: we must lock all address spaces here!
3119 
3120 	mutex_lock(&cache->lock);
3121 
3122 	if (oldSize < newSize) {
3123 		// We need to check if all areas of this cache can be resized
3124 
3125 		for (current = cache->areas; current; current = current->cache_next) {
3126 			if (current->aspace_next && current->aspace_next->base <= (current->base + newSize)) {
3127 				// if the area was created inside a reserved area, it can also be
3128 				// resized in that area
3129 				// ToDo: if there is free space after the reserved area, it could be used as well...
3130 				vm_area *next = current->aspace_next;
3131 				if (next->id == RESERVED_AREA_ID && next->cache_offset <= current->base
3132 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
3133 					continue;
3134 
3135 				status = B_ERROR;
3136 				goto err2;
3137 			}
3138 		}
3139 	}
3140 
3141 	// Okay, looks good so far, so let's do it
3142 
3143 	for (current = cache->areas; current; current = current->cache_next) {
3144 		if (current->aspace_next && current->aspace_next->base <= (current->base + newSize)) {
3145 			vm_area *next = current->aspace_next;
3146 			if (next->id == RESERVED_AREA_ID && next->cache_offset <= current->base
3147 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
3148 				// resize reserved area
3149 				addr_t offset = current->base + newSize - next->base;
3150 				if (next->size <= offset) {
3151 					current->aspace_next = next->aspace_next;
3152 					free(next);
3153 				} else {
3154 					next->size -= offset;
3155 					next->base += offset;
3156 				}
3157 			} else {
3158 				status = B_ERROR;
3159 				break;
3160 			}
3161 		}
3162 
3163 		current->size = newSize;
3164 
3165 		// we also need to unmap all pages beyond the new size, if the area has shrinked
3166 		if (newSize < oldSize) {
3167 			vm_translation_map *map = &current->aspace->translation_map;
3168 
3169 			map->ops->lock(map);
3170 			map->ops->unmap(map, current->base + newSize, current->base + oldSize - 1);
3171 			map->ops->unlock(map);
3172 		}
3173 	}
3174 
3175 	if (status == B_OK)
3176 		status = vm_cache_resize(cache, newSize);
3177 
3178 	if (status < B_OK) {
3179 		// This shouldn't really be possible, but hey, who knows
3180 		for (current = cache->areas; current; current = current->cache_next)
3181 			current->size = oldSize;
3182 	}
3183 
3184 err2:
3185 	mutex_unlock(&cache->lock);
3186 err1:
3187 	vm_put_area(area);
3188 
3189 	// ToDo: we must honour the lock restrictions of this area
3190 	return status;
3191 }
3192 
3193 
3194 /**	Transfers the specified area to a new team. The caller must be the owner
3195  *	of the area (not yet enforced but probably should be).
3196  *	This function is currently not exported to the kernel namespace, but is
3197  *	only accessible using the _kern_transfer_area() syscall.
3198  */
3199 
3200 static status_t
3201 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target)
3202 {
3203 	vm_address_space *sourceAddressSpace, *targetAddressSpace;
3204 	vm_translation_map *map;
3205 	vm_area *area, *reserved;
3206 	void *reservedAddress;
3207 	status_t status;
3208 
3209 	area = vm_get_area(id);
3210 	if (area == NULL)
3211 		return B_BAD_VALUE;
3212 
3213 	// ToDo: check if the current team owns the area
3214 
3215 	status = team_get_address_space(target, &targetAddressSpace);
3216 	if (status != B_OK)
3217 		goto err1;
3218 
3219 	// We will first remove the area, and then reserve its former
3220 	// address range so that we can later reclaim it if the
3221 	// transfer failed.
3222 
3223 	sourceAddressSpace = area->aspace;
3224 
3225 	reserved = _vm_create_reserved_region_struct(&sourceAddressSpace->virtual_map, 0);
3226 	if (reserved == NULL) {
3227 		status = B_NO_MEMORY;
3228 		goto err2;
3229 	}
3230 
3231 	acquire_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
3232 
3233 	reservedAddress = (void *)area->base;
3234 	remove_area_from_virtual_map(sourceAddressSpace, area, true);
3235 	status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
3236 		area->size, reserved);
3237 		// famous last words: this cannot fail :)
3238 
3239 	release_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3240 
3241 	if (status != B_OK)
3242 		goto err3;
3243 
3244 	// unmap the area in the source address space
3245 	map = &sourceAddressSpace->translation_map;
3246 	map->ops->lock(map);
3247 	map->ops->unmap(map, area->base, area->base + (area->size - 1));
3248 	map->ops->unlock(map);
3249 
3250 	// insert the area into the target address space
3251 
3252 	acquire_sem_etc(targetAddressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
3253 	// check to see if this aspace has entered DELETE state
3254 	if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) {
3255 		// okay, someone is trying to delete this aspace now, so we can't
3256 		// insert the area, so back out
3257 		status = B_BAD_TEAM_ID;
3258 		goto err4;
3259 	}
3260 
3261 	status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area);
3262 	if (status < B_OK)
3263 		goto err4;
3264 
3265 	// The area was successfully transferred to the new team when we got here
3266 	area->aspace = targetAddressSpace;
3267 
3268 	release_sem_etc(targetAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3269 
3270 	vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress, area->size);
3271 	vm_put_aspace(sourceAddressSpace);
3272 		// we keep the reference of the target address space for the
3273 		// area, so we only have to put the one from the source
3274 	vm_put_area(area);
3275 
3276 	return B_OK;
3277 
3278 err4:
3279 	release_sem_etc(targetAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3280 err3:
3281 	// insert the area again into the source address space
3282 	acquire_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
3283 	// check to see if this aspace has entered DELETE state
3284 	if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION
3285 		|| insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, area->size, area) != B_OK) {
3286 		// We can't insert the area anymore - we have to delete it manually
3287 		vm_cache_remove_area(area->cache_ref, area);
3288 		vm_cache_release_ref(area->cache_ref);
3289 		free(area->name);
3290 		free(area);
3291 		area = NULL;
3292 	}
3293 	release_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3294 err2:
3295 	vm_put_aspace(targetAddressSpace);
3296 err1:
3297 	if (area != NULL)
3298 		vm_put_area(area);
3299 	return status;
3300 }
3301 
3302 
3303 area_id
3304 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
3305 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
3306 {
3307 	if (!arch_vm_supports_protection(protection))
3308 		return B_NOT_SUPPORTED;
3309 
3310 	fix_protection(&protection);
3311 
3312 	return vm_map_physical_memory(vm_get_kernel_aspace_id(), name, _virtualAddress,
3313 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
3314 }
3315 
3316 
3317 area_id
3318 clone_area(const char *name, void **_address, uint32 addressSpec, uint32 protection,
3319 	area_id source)
3320 {
3321 	if ((protection & B_KERNEL_PROTECTION) == 0)
3322 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
3323 
3324 	return vm_clone_area(vm_get_kernel_aspace_id(), name, _address, addressSpec,
3325 				protection, REGION_NO_PRIVATE_MAP, source);
3326 }
3327 
3328 
3329 area_id
3330 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec,
3331 	uint32 size, uint32 lock, uint32 protection)
3332 {
3333 	fix_protection(&protection);
3334 
3335 	return vm_create_anonymous_area(team->aspace->id, (char *)name, address,
3336 				addressSpec, size, lock, protection);
3337 }
3338 
3339 
3340 area_id
3341 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
3342 	uint32 protection)
3343 {
3344 	fix_protection(&protection);
3345 
3346 	return vm_create_anonymous_area(vm_get_kernel_aspace_id(), (char *)name, _address,
3347 				addressSpec, size, lock, protection);
3348 }
3349 
3350 
3351 status_t
3352 delete_area_etc(struct team *team, area_id area)
3353 {
3354 	return vm_delete_area(team->aspace->id, area);
3355 }
3356 
3357 
3358 status_t
3359 delete_area(area_id area)
3360 {
3361 	return vm_delete_area(vm_get_kernel_aspace_id(), area);
3362 }
3363 
3364 
3365 //	#pragma mark -
3366 
3367 
3368 status_t
3369 _user_init_heap_address_range(addr_t base, addr_t size)
3370 {
3371 	return vm_reserve_address_range(vm_get_current_user_aspace_id(), (void **)&base,
3372 		B_EXACT_ADDRESS, size, RESERVED_AVOID_BASE);
3373 }
3374 
3375 
3376 area_id
3377 _user_area_for(void *address)
3378 {
3379 	return vm_area_for(vm_get_current_user_aspace_id(), (addr_t)address);
3380 }
3381 
3382 
3383 area_id
3384 _user_find_area(const char *userName)
3385 {
3386 	char name[B_OS_NAME_LENGTH];
3387 
3388 	if (!IS_USER_ADDRESS(userName)
3389 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
3390 		return B_BAD_ADDRESS;
3391 
3392 	return find_area(name);
3393 }
3394 
3395 
3396 status_t
3397 _user_get_area_info(area_id area, area_info *userInfo)
3398 {
3399 	area_info info;
3400 	status_t status;
3401 
3402 	if (!IS_USER_ADDRESS(userInfo))
3403 		return B_BAD_ADDRESS;
3404 
3405 	status = get_area_info(area, &info);
3406 	if (status < B_OK)
3407 		return status;
3408 
3409 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
3410 		return B_BAD_ADDRESS;
3411 
3412 	return status;
3413 }
3414 
3415 
3416 status_t
3417 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
3418 {
3419 	status_t status;
3420 	area_info info;
3421 	int32 cookie;
3422 
3423 	if (!IS_USER_ADDRESS(userCookie)
3424 		|| !IS_USER_ADDRESS(userInfo)
3425 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
3426 		return B_BAD_ADDRESS;
3427 
3428 	status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
3429 	if (status != B_OK)
3430 		return status;
3431 
3432 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
3433 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
3434 		return B_BAD_ADDRESS;
3435 
3436 	return status;
3437 }
3438 
3439 
3440 status_t
3441 _user_set_area_protection(area_id area, uint32 newProtection)
3442 {
3443 	if ((newProtection & ~B_USER_PROTECTION) != 0)
3444 		return B_BAD_VALUE;
3445 
3446 	fix_protection(&newProtection);
3447 
3448 	return vm_set_area_protection(vm_get_current_user_aspace_id(), area,
3449 		newProtection);
3450 }
3451 
3452 
3453 status_t
3454 _user_resize_area(area_id area, size_t newSize)
3455 {
3456 	// ToDo: Since we restrict deleting of areas to those owned by the team,
3457 	// we should also do that for resizing (check other functions, too).
3458 	return resize_area(area, newSize);
3459 }
3460 
3461 
3462 status_t
3463 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
3464 {
3465 	status_t status;
3466 	void *address;
3467 
3468 	// filter out some unavailable values (for userland)
3469 	switch (addressSpec) {
3470 		case B_ANY_KERNEL_ADDRESS:
3471 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3472 			return B_BAD_VALUE;
3473 	}
3474 
3475 	if (!IS_USER_ADDRESS(userAddress)
3476 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3477 		return B_BAD_ADDRESS;
3478 
3479 	status = transfer_area(area, &address, addressSpec, target);
3480 	if (status < B_OK)
3481 		return status;
3482 
3483 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
3484 		return B_BAD_ADDRESS;
3485 
3486 	return status;
3487 }
3488 
3489 
3490 area_id
3491 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
3492 	uint32 protection, area_id sourceArea)
3493 {
3494 	char name[B_OS_NAME_LENGTH];
3495 	void *address;
3496 	area_id clonedArea;
3497 
3498 	// filter out some unavailable values (for userland)
3499 	switch (addressSpec) {
3500 		case B_ANY_KERNEL_ADDRESS:
3501 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3502 			return B_BAD_VALUE;
3503 	}
3504 	if ((protection & ~B_USER_PROTECTION) != 0)
3505 		return B_BAD_VALUE;
3506 
3507 	if (!IS_USER_ADDRESS(userName)
3508 		|| !IS_USER_ADDRESS(userAddress)
3509 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
3510 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3511 		return B_BAD_ADDRESS;
3512 
3513 	fix_protection(&protection);
3514 
3515 	clonedArea = vm_clone_area(vm_get_current_user_aspace_id(), name, &address,
3516 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea);
3517 	if (clonedArea < B_OK)
3518 		return clonedArea;
3519 
3520 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
3521 		delete_area(clonedArea);
3522 		return B_BAD_ADDRESS;
3523 	}
3524 
3525 	return clonedArea;
3526 }
3527 
3528 
3529 area_id
3530 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
3531 	size_t size, uint32 lock, uint32 protection)
3532 {
3533 	char name[B_OS_NAME_LENGTH];
3534 	area_id area;
3535 	void *address;
3536 
3537 	// filter out some unavailable values (for userland)
3538 	switch (addressSpec) {
3539 		case B_ANY_KERNEL_ADDRESS:
3540 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3541 			return B_BAD_VALUE;
3542 	}
3543 	if ((protection & ~B_USER_PROTECTION) != 0)
3544 		return B_BAD_VALUE;
3545 
3546 	if (!IS_USER_ADDRESS(userName)
3547 		|| !IS_USER_ADDRESS(userAddress)
3548 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
3549 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3550 		return B_BAD_ADDRESS;
3551 
3552 	if (addressSpec == B_EXACT_ADDRESS
3553 		&& IS_KERNEL_ADDRESS(address))
3554 		return B_BAD_VALUE;
3555 
3556 	fix_protection(&protection);
3557 
3558 	area = vm_create_anonymous_area(vm_get_current_user_aspace_id(), (char *)name, &address,
3559 				addressSpec, size, lock, protection);
3560 
3561 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
3562 		delete_area(area);
3563 		return B_BAD_ADDRESS;
3564 	}
3565 
3566 	return area;
3567 }
3568 
3569 
3570 status_t
3571 _user_delete_area(area_id area)
3572 {
3573 	// Unlike the BeOS implementation, you can now only delete areas
3574 	// that you have created yourself from userland.
3575 	// The documentation to delete_area() explicetly states that this
3576 	// will be restricted in the future, and so it will.
3577 	return vm_delete_area(vm_get_current_user_aspace_id(), area);
3578 }
3579 
3580