xref: /haiku/src/system/kernel/vm/vm.cpp (revision 2ae568931fcac7deb9f1e6ff4e47213fbfe4029b)
1 /*
2  * Copyright 2002-2005, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <OS.h>
11 #include <KernelExport.h>
12 
13 #include <vm.h>
14 #include <vm_priv.h>
15 #include <vm_page.h>
16 #include <vm_cache.h>
17 #include <vm_store_anonymous_noswap.h>
18 #include <vm_store_device.h>
19 #include <vm_store_null.h>
20 #include <vm_low_memory.h>
21 #include <file_cache.h>
22 #include <memheap.h>
23 #include <debug.h>
24 #include <console.h>
25 #include <int.h>
26 #include <smp.h>
27 #include <lock.h>
28 #include <thread.h>
29 #include <team.h>
30 
31 #include <boot/stage2.h>
32 #include <boot/elf.h>
33 
34 #include <arch/cpu.h>
35 #include <arch/vm.h>
36 
37 #include <string.h>
38 #include <ctype.h>
39 #include <stdlib.h>
40 #include <stdio.h>
41 
42 //#define TRACE_VM
43 //#define TRACE_FAULTS
44 #ifdef TRACE_VM
45 #	define TRACE(x) dprintf x
46 #else
47 #	define TRACE(x) ;
48 #endif
49 #ifdef TRACE_FAULTS
50 #	define FTRACE(x) dprintf x
51 #else
52 #	define FTRACE(x) ;
53 #endif
54 
55 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
56 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
57 
58 
59 extern vm_address_space *kernel_aspace;
60 
61 #define REGION_HASH_TABLE_SIZE 1024
62 static area_id sNextAreaID;
63 static hash_table *sAreaHash;
64 static sem_id sAreaHashLock;
65 
66 static off_t sAvailableMemory;
67 static benaphore sAvailableMemoryLock;
68 
69 // function declarations
70 static vm_area *_vm_create_region_struct(vm_address_space *aspace, const char *name, int wiring, int lock);
71 static status_t map_backing_store(vm_address_space *aspace, vm_store *store, void **vaddr,
72 	off_t offset, addr_t size, uint32 addressSpec, int wiring, int lock, int mapping, vm_area **_area, const char *area_name);
73 static status_t vm_soft_fault(addr_t address, bool is_write, bool is_user);
74 static vm_area *vm_virtual_map_lookup(vm_virtual_map *map, addr_t address);
75 static bool vm_put_area(vm_area *area);
76 
77 
78 static int
79 area_compare(void *_area, const void *key)
80 {
81 	vm_area *area = (vm_area *)_area;
82 	const area_id *id = (const area_id *)key;
83 
84 	if (area->id == *id)
85 		return 0;
86 
87 	return -1;
88 }
89 
90 
91 static uint32
92 area_hash(void *_area, const void *key, uint32 range)
93 {
94 	vm_area *area = (vm_area *)_area;
95 	const area_id *id = (const area_id *)key;
96 
97 	if (area != NULL)
98 		return area->id % range;
99 
100 	return (uint32)*id % range;
101 }
102 
103 
104 static vm_area *
105 vm_get_area(area_id id)
106 {
107 	vm_area *area;
108 
109 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
110 
111 	area = (vm_area *)hash_lookup(sAreaHash, &id);
112 	if (area != NULL)
113 		atomic_add(&area->ref_count, 1);
114 
115 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
116 
117 	return area;
118 }
119 
120 
121 static vm_area *
122 _vm_create_reserved_region_struct(vm_virtual_map *map, uint32 flags)
123 {
124 	vm_area *reserved = (vm_area *)malloc(sizeof(vm_area));
125 	if (reserved == NULL)
126 		return NULL;
127 
128 	memset(reserved, 0, sizeof(vm_area));
129 	reserved->id = RESERVED_AREA_ID;
130 		// this marks it as reserved space
131 	reserved->protection = flags;
132 	reserved->map = map;
133 
134 	return reserved;
135 }
136 
137 
138 static vm_area *
139 _vm_create_area_struct(vm_address_space *aspace, const char *name, uint32 wiring, uint32 protection)
140 {
141 	vm_area *area = NULL;
142 
143 	// restrict the area name to B_OS_NAME_LENGTH
144 	size_t length = strlen(name) + 1;
145 	if (length > B_OS_NAME_LENGTH)
146 		length = B_OS_NAME_LENGTH;
147 
148 	area = (vm_area *)malloc(sizeof(vm_area));
149 	if (area == NULL)
150 		return NULL;
151 
152 	area->name = (char *)malloc(length);
153 	if (area->name == NULL) {
154 		free(area);
155 		return NULL;
156 	}
157 	strlcpy(area->name, name, length);
158 
159 	area->id = atomic_add(&sNextAreaID, 1);
160 	area->base = 0;
161 	area->size = 0;
162 	area->protection = protection;
163 	area->wiring = wiring;
164 	area->ref_count = 1;
165 
166 	area->cache_ref = NULL;
167 	area->cache_offset = 0;
168 
169 	area->aspace = aspace;
170 	area->aspace_next = NULL;
171 	area->map = &aspace->virtual_map;
172 	area->cache_next = area->cache_prev = NULL;
173 	area->hash_next = NULL;
174 
175 	return area;
176 }
177 
178 
179 /**	Finds a reserved area that covers the region spanned by \a start and
180  *	\a size, inserts the \a area into that region and makes sure that
181  *	there are reserved regions for the remaining parts.
182  */
183 
184 static status_t
185 find_reserved_area(vm_virtual_map *map, addr_t start, addr_t size, vm_area *area)
186 {
187 	vm_area *next, *last = NULL;
188 
189 	next = map->areas;
190 	while (next) {
191 		if (next->base <= start && next->base + next->size >= start + size) {
192 			// this area covers the requested range
193 			if (next->id != RESERVED_AREA_ID) {
194 				// but it's not reserved space, it's a real area
195 				return B_BAD_VALUE;
196 			}
197 
198 			break;
199 		}
200 		last = next;
201 		next = next->aspace_next;
202 	}
203 	if (next == NULL)
204 		return B_ENTRY_NOT_FOUND;
205 
206 	// now we have to transfer the requested part of the reserved
207 	// range to the new area - and remove, resize or split the old
208 	// reserved area.
209 
210 	if (start == next->base) {
211 		// the area starts at the beginning of the reserved range
212 		if (last)
213 			last->aspace_next = area;
214 		else
215 			map->areas = area;
216 
217 		if (size == next->size) {
218 			// the new area fully covers the reversed range
219 			area->aspace_next = next->aspace_next;
220 			free(next);
221 		} else {
222 			// resize the reserved range behind the area
223 			area->aspace_next = next;
224 			next->base += size;
225 			next->size -= size;
226 		}
227 	} else if (start + size == next->base + next->size) {
228 		// the area is at the end of the reserved range
229 		area->aspace_next = next->aspace_next;
230 		next->aspace_next = area;
231 
232 		// resize the reserved range before the area
233 		next->size = start - next->base;
234 	} else {
235 		// the area splits the reserved range into two separate ones
236 		// we need a new reserved area to cover this space
237 		vm_area *reserved = _vm_create_reserved_region_struct(map, next->protection);
238 		if (reserved == NULL)
239 			return B_NO_MEMORY;
240 
241 		reserved->aspace_next = next->aspace_next;
242 		area->aspace_next = reserved;
243 		next->aspace_next = area;
244 
245 		// resize regions
246 		reserved->size = next->base + next->size - start - size;
247 		next->size = start - next->base;
248 		reserved->base = start + size;
249 		reserved->cache_offset = next->cache_offset;
250 	}
251 
252 	area->base = start;
253 	area->size = size;
254 	map->change_count++;
255 
256 	return B_OK;
257 }
258 
259 
260 // must be called with this address space's virtual_map.sem held
261 
262 static status_t
263 find_and_insert_area_slot(vm_virtual_map *map, addr_t start, addr_t size, addr_t end,
264 	uint32 addressSpec, vm_area *area)
265 {
266 	vm_area *last = NULL;
267 	vm_area *next;
268 	bool foundSpot = false;
269 
270 	TRACE(("find_and_insert_region_slot: map %p, start 0x%lx, size %ld, end 0x%lx, addressSpec %ld, area %p\n",
271 		map, start, size, end, addressSpec, area));
272 
273 	// do some sanity checking
274 	if (start < map->base || size == 0
275 		|| (end - 1) > (map->base + (map->size - 1))
276 		|| start + size > end)
277 		return B_BAD_ADDRESS;
278 
279 	if (addressSpec == B_EXACT_ADDRESS) {
280 		// search for a reserved area
281 		status_t status = find_reserved_area(map, start, size, area);
282 		if (status == B_OK || status == B_BAD_VALUE)
283 			return status;
284 
285 		// there was no reserved area, and the slot doesn't seem to be used already
286 		// ToDo: this could be further optimized.
287 	}
288 
289 	// walk up to the spot where we should start searching
290 second_chance:
291 	next = map->areas;
292 	while (next) {
293 		if (next->base >= start + size) {
294 			// we have a winner
295 			break;
296 		}
297 		last = next;
298 		next = next->aspace_next;
299 	}
300 
301 	// find the right spot depending on the address specification - the area
302 	// will be inserted directly after "last" ("next" is not referenced anymore)
303 
304 	switch (addressSpec) {
305 		case B_ANY_ADDRESS:
306 		case B_ANY_KERNEL_ADDRESS:
307 		case B_ANY_KERNEL_BLOCK_ADDRESS:
308 			// find a hole big enough for a new area
309 			if (!last) {
310 				// see if we can build it at the beginning of the virtual map
311 				if (!next || (next->base >= map->base + size)) {
312 					foundSpot = true;
313 					area->base = map->base;
314 					break;
315 				}
316 				last = next;
317 				next = next->aspace_next;
318 			}
319 			// keep walking
320 			while (next) {
321 				if (next->base >= last->base + last->size + size) {
322 					// we found a spot (it'll be filled up below)
323 					break;
324 				}
325 				last = next;
326 				next = next->aspace_next;
327 			}
328 
329 			if ((map->base + (map->size - 1)) >= (last->base + last->size + (size - 1))) {
330 				// got a spot
331 				foundSpot = true;
332 				area->base = last->base + last->size;
333 				break;
334 			} else {
335 				// we didn't find a free spot - if there were any reserved areas with
336 				// the RESERVED_AVOID_BASE flag set, we can now test those for free
337 				// space
338 				// ToDo: it would make sense to start with the biggest of them
339 				next = map->areas;
340 				last = NULL;
341 				for (last = NULL; next; next = next->aspace_next, last = next) {
342 					// ToDo: take free space after the reserved area into account!
343 					if (next->size == size) {
344 						// the reserved area is entirely covered, and thus, removed
345 						if (last)
346 							last->aspace_next = next->aspace_next;
347 						else
348 							map->areas = next->aspace_next;
349 
350 						foundSpot = true;
351 						area->base = next->base;
352 						free(next);
353 						break;
354 					}
355 					if (next->size >= size) {
356 						// the new area will be placed at the end of the reserved
357 						// area, and the reserved area will be resized to make space
358 						foundSpot = true;
359 						next->size -= size;
360 						last = next;
361 						area->base = next->base + next->size;
362 						break;
363 					}
364 				}
365 			}
366 			break;
367 
368 		case B_BASE_ADDRESS:
369 			// find a hole big enough for a new area beginning with "start"
370 			if (!last) {
371 				// see if we can build it at the beginning of the specified start
372 				if (!next || (next->base >= start + size)) {
373 					foundSpot = true;
374 					area->base = start;
375 					break;
376 				}
377 				last = next;
378 				next = next->aspace_next;
379 			}
380 			// keep walking
381 			while (next) {
382 				if (next->base >= last->base + last->size + size) {
383 					// we found a spot (it'll be filled up below)
384 					break;
385 				}
386 				last = next;
387 				next = next->aspace_next;
388 			}
389 
390 			if ((map->base + (map->size - 1)) >= (last->base + last->size + (size - 1))) {
391 				// got a spot
392 				foundSpot = true;
393 				if (last->base + last->size <= start)
394 					area->base = start;
395 				else
396 					area->base = last->base + last->size;
397 				break;
398 			}
399 			// we didn't find a free spot in the requested range, so we'll
400 			// try again without any restrictions
401 			start = map->base;
402 			addressSpec = B_ANY_ADDRESS;
403 			last = NULL;
404 			goto second_chance;
405 
406 		case B_EXACT_ADDRESS:
407 			// see if we can create it exactly here
408 			if (!last) {
409 				if (!next || (next->base >= start + size)) {
410 					foundSpot = true;
411 					area->base = start;
412 					break;
413 				}
414 			} else {
415 				if (next) {
416 					if (last->base + last->size <= start && next->base >= start + size) {
417 						foundSpot = true;
418 						area->base = start;
419 						break;
420 					}
421 				} else {
422 					if ((last->base + (last->size - 1)) <= start - 1) {
423 						foundSpot = true;
424 						area->base = start;
425 					}
426 				}
427 			}
428 			break;
429 		default:
430 			return B_BAD_VALUE;
431 	}
432 
433 	if (!foundSpot)
434 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
435 
436 	area->size = size;
437 	if (last) {
438 		area->aspace_next = last->aspace_next;
439 		last->aspace_next = area;
440 	} else {
441 		area->aspace_next = map->areas;
442 		map->areas = area;
443 	}
444 	map->change_count++;
445 	return B_OK;
446 }
447 
448 
449 /**	This inserts the area you pass into the virtual_map of the
450  *	specified address space.
451  *	It will also set the "_address" argument to its base address when
452  *	the call succeeds.
453  *	You need to hold the virtual_map semaphore.
454  */
455 
456 static status_t
457 insert_area(vm_address_space *addressSpace, void **_address,
458 	uint32 addressSpec, addr_t size, vm_area *area)
459 {
460 	addr_t searchBase, searchEnd;
461 	status_t status;
462 
463 	switch (addressSpec) {
464 		case B_EXACT_ADDRESS:
465 			searchBase = (addr_t)*_address;
466 			searchEnd = (addr_t)*_address + size;
467 			break;
468 
469 		case B_BASE_ADDRESS:
470 			searchBase = (addr_t)*_address;
471 			searchEnd = addressSpace->virtual_map.base + (addressSpace->virtual_map.size - 1);
472 			break;
473 
474 		case B_ANY_ADDRESS:
475 		case B_ANY_KERNEL_ADDRESS:
476 		case B_ANY_KERNEL_BLOCK_ADDRESS:
477 			searchBase = addressSpace->virtual_map.base;
478 			searchEnd = addressSpace->virtual_map.base + (addressSpace->virtual_map.size - 1);
479 			break;
480 
481 		default:
482 			return B_BAD_VALUE;
483 	}
484 
485 	status = find_and_insert_area_slot(&addressSpace->virtual_map, searchBase, size,
486 				searchEnd, addressSpec, area);
487 	if (status == B_OK)
488 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
489 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
490 		*_address = (void *)area->base;
491 
492 	return status;
493 }
494 
495 
496 // a ref to the cache holding this store must be held before entering here
497 static status_t
498 map_backing_store(vm_address_space *aspace, vm_store *store, void **_virtualAddress,
499 	off_t offset, addr_t size, uint32 addressSpec, int wiring, int protection,
500 	int mapping, vm_area **_area, const char *areaName)
501 {
502 	vm_cache *cache;
503 	vm_cache_ref *cache_ref;
504 	vm_area *area;
505 	vm_cache *nu_cache;
506 	vm_cache_ref *nu_cache_ref = NULL;
507 	vm_store *nu_store;
508 
509 	int err;
510 
511 	TRACE(("map_backing_store: aspace %p, store %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
512 		aspace, store, *_virtualAddress, offset, size, addressSpec, wiring, protection, _area, areaName));
513 
514 	area = _vm_create_area_struct(aspace, areaName, wiring, protection);
515 	if (area == NULL)
516 		return B_NO_MEMORY;
517 
518 	cache = store->cache;
519 	cache_ref = cache->ref;
520 
521 	// if this is a private map, we need to create a new cache & store object
522 	// pair to handle the private copies of pages as they are written to
523 	if (mapping == REGION_PRIVATE_MAP) {
524 		// create an anonymous store object
525 		nu_store = vm_store_create_anonymous_noswap((protection & B_STACK_AREA) != 0, USER_STACK_GUARD_PAGES);
526 		if (nu_store == NULL)
527 			panic("map_backing_store: vm_create_store_anonymous_noswap returned NULL");
528 		nu_cache = vm_cache_create(nu_store);
529 		if (nu_cache == NULL)
530 			panic("map_backing_store: vm_cache_create returned NULL");
531 		nu_cache_ref = vm_cache_ref_create(nu_cache);
532 		if (nu_cache_ref == NULL)
533 			panic("map_backing_store: vm_cache_ref_create returned NULL");
534 		nu_cache->temporary = 1;
535 		nu_cache->scan_skip = cache->scan_skip;
536 
537 		nu_cache->source = cache;
538 
539 		// grab a ref to the cache object we're now linked to as a source
540 		vm_cache_acquire_ref(cache_ref, true);
541 
542 		cache = nu_cache;
543 		cache_ref = cache->ref;
544 		store = nu_store;
545 		cache->virtual_size = offset + size;
546 	}
547 
548 	err = vm_cache_set_minimal_commitment(cache_ref, offset + size);
549 	if (err != B_OK)
550 		goto err1a;
551 
552 	vm_cache_acquire_ref(cache_ref, true);
553 
554 	acquire_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0, 0);
555 
556 	// check to see if this aspace has entered DELETE state
557 	if (aspace->state == VM_ASPACE_STATE_DELETION) {
558 		// okay, someone is trying to delete this aspace now, so we can't
559 		// insert the area, so back out
560 		err = B_BAD_TEAM_ID;
561 		goto err1b;
562 	}
563 
564 	err = insert_area(aspace, _virtualAddress, addressSpec, size, area);
565 	if (err < B_OK)
566 		goto err1b;
567 
568 	// attach the cache to the area
569 	area->cache_ref = cache_ref;
570 	area->cache_offset = offset;
571 	// point the cache back to the area
572 	vm_cache_insert_area(cache_ref, area);
573 
574 	// insert the area in the global area hash table
575 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0);
576 	hash_insert(sAreaHash, area);
577 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
578 
579 	// grab a ref to the aspace (the area holds this)
580 	atomic_add(&aspace->ref_count, 1);
581 
582 	release_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0);
583 
584 	*_area = area;
585 	return B_OK;
586 
587 err1b:
588 	release_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0);
589 	vm_cache_release_ref(cache_ref);
590 	goto err;
591 err1a:
592 	if (nu_cache_ref) {
593 		// had never acquired it's initial ref, so acquire and then release it
594 		// this should clean up all the objects it references
595 		vm_cache_acquire_ref(cache_ref, true);
596 		vm_cache_release_ref(cache_ref);
597 	}
598 err:
599 	free(area->name);
600 	free(area);
601 	return err;
602 }
603 
604 
605 status_t
606 vm_unreserve_address_range(aspace_id aid, void *address, addr_t size)
607 {
608 	vm_address_space *addressSpace;
609 	vm_area *area, *last = NULL;
610 	status_t status = B_OK;
611 
612 	addressSpace = vm_get_aspace_by_id(aid);
613 	if (addressSpace == NULL)
614 		return B_BAD_TEAM_ID;
615 
616 	acquire_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
617 
618 	// check to see if this aspace has entered DELETE state
619 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
620 		// okay, someone is trying to delete this aspace now, so we can't
621 		// insert the area, so back out
622 		status = B_BAD_TEAM_ID;
623 		goto out;
624 	}
625 
626 	// search area list and remove any matching reserved ranges
627 
628 	area = addressSpace->virtual_map.areas;
629 	while (area) {
630 		// the area must be completely part of the reserved range
631 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
632 			&& area->base + area->size <= (addr_t)address + size) {
633 			// remove reserved range
634 			vm_area *reserved = area;
635 			if (last)
636 				last->aspace_next = reserved->aspace_next;
637 			else
638 				addressSpace->virtual_map.areas = reserved->aspace_next;
639 
640 			area = reserved->aspace_next;
641 			free(reserved);
642 			continue;
643 		}
644 
645 		last = area;
646 		area = area->aspace_next;
647 	}
648 
649 out:
650 	release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
651 	vm_put_aspace(addressSpace);
652 	return status;
653 }
654 
655 
656 status_t
657 vm_reserve_address_range(aspace_id aid, void **_address, uint32 addressSpec,
658 	addr_t size, uint32 flags)
659 {
660 	vm_address_space *addressSpace;
661 	vm_area *area;
662 	status_t status = B_OK;
663 
664 	if (size == 0)
665 		return B_BAD_VALUE;
666 
667 	addressSpace = vm_get_aspace_by_id(aid);
668 	if (addressSpace == NULL)
669 		return B_BAD_TEAM_ID;
670 
671 	area = _vm_create_reserved_region_struct(&addressSpace->virtual_map, flags);
672 	if (area == NULL) {
673 		status = B_NO_MEMORY;
674 		goto err1;
675 	}
676 
677 	acquire_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
678 
679 	// check to see if this aspace has entered DELETE state
680 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
681 		// okay, someone is trying to delete this aspace now, so we can't
682 		// insert the area, let's back out
683 		status = B_BAD_TEAM_ID;
684 		goto err2;
685 	}
686 
687 	status = insert_area(addressSpace, _address, addressSpec, size, area);
688 	if (status < B_OK)
689 		goto err2;
690 
691 	// the area is now reserved!
692 
693 	area->cache_offset = area->base;
694 		// we cache the original base address here
695 
696 	release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
697 	return B_OK;
698 
699 err2:
700 	release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
701 	free(area);
702 err1:
703 	vm_put_aspace(addressSpace);
704 	return status;
705 }
706 
707 
708 area_id
709 vm_create_anonymous_area(aspace_id aid, const char *name, void **address,
710 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection)
711 {
712 	vm_area *area;
713 	vm_cache *cache;
714 	vm_store *store;
715 	vm_address_space *aspace;
716 	vm_cache_ref *cache_ref;
717 	vm_page *page = NULL;
718 	bool isStack = (protection & B_STACK_AREA) != 0;
719 	bool canOvercommit = false;
720 	status_t err;
721 
722 	TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size));
723 
724 	if (!arch_vm_supports_protection(protection))
725 		return B_NOT_SUPPORTED;
726 
727 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
728 		canOvercommit = true;
729 
730 #ifdef DEBUG_KERNEL_STACKS
731 	if ((protection & B_KERNEL_STACK_AREA) != 0)
732 		isStack = true;
733 #endif
734 
735 	/* check parameters */
736 	switch (addressSpec) {
737 		case B_ANY_ADDRESS:
738 		case B_EXACT_ADDRESS:
739 		case B_BASE_ADDRESS:
740 		case B_ANY_KERNEL_ADDRESS:
741 			break;
742 
743 		default:
744 			return B_BAD_VALUE;
745 	}
746 
747 	switch (wiring) {
748 		case B_NO_LOCK:
749 		case B_FULL_LOCK:
750 		case B_LAZY_LOCK:
751 		case B_CONTIGUOUS:
752 		case B_ALREADY_WIRED:
753 			break;
754 		case B_LOMEM:
755 		//case B_SLOWMEM:
756 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
757 			wiring = B_FULL_LOCK;
758 			break;
759 		default:
760 			return B_BAD_VALUE;
761 	}
762 
763 	aspace = vm_get_aspace_by_id(aid);
764 	if (aspace == NULL)
765 		return B_BAD_TEAM_ID;
766 
767 	size = PAGE_ALIGN(size);
768 
769 	if (wiring == B_CONTIGUOUS) {
770 		// we try to allocate the page run here upfront as this may easily
771 		// fail for obvious reasons
772 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE);
773 		if (page == NULL) {
774 			vm_put_aspace(aspace);
775 			return B_NO_MEMORY;
776 		}
777 	}
778 
779 	// create an anonymous store object
780 	store = vm_store_create_anonymous_noswap(canOvercommit, isStack ?
781 		((protection & B_USER_PROTECTION) != 0 ?
782 			USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0);
783 	if (store == NULL)
784 		panic("vm_create_anonymous_area: vm_create_store_anonymous_noswap returned NULL");
785 	cache = vm_cache_create(store);
786 	if (cache == NULL)
787 		panic("vm_create_anonymous_area: vm_cache_create returned NULL");
788 	cache_ref = vm_cache_ref_create(cache);
789 	if (cache_ref == NULL)
790 		panic("vm_create_anonymous_area: vm_cache_ref_create returned NULL");
791 	cache->temporary = 1;
792 
793 	switch (wiring) {
794 		case B_LAZY_LOCK:	// for now
795 		case B_FULL_LOCK:
796 		case B_CONTIGUOUS:
797 		case B_ALREADY_WIRED:
798 			cache->scan_skip = 1;
799 			break;
800 		case B_NO_LOCK:
801 		//case B_LAZY_LOCK:
802 			cache->scan_skip = 0;
803 			break;
804 	}
805 
806 	vm_cache_acquire_ref(cache_ref, true);
807 	err = map_backing_store(aspace, store, address, 0, size, addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name);
808 	vm_cache_release_ref(cache_ref);
809 	if (err < 0) {
810 		vm_put_aspace(aspace);
811 
812 		if (wiring == B_CONTIGUOUS) {
813 			// we had reserved the area space upfront...
814 			addr_t pageNumber = page->ppn;
815 			int32 i;
816 			for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
817 				page = vm_lookup_page(pageNumber);
818 				if (page == NULL)
819 					panic("couldn't lookup physical page just allocated\n");
820 
821 				vm_page_set_state(page, PAGE_STATE_FREE);
822 			}
823 		}
824 		return err;
825 	}
826 
827 	cache_ref = store->cache->ref;
828 	switch (wiring) {
829 		case B_NO_LOCK:
830 		case B_LAZY_LOCK:
831 			break; // do nothing
832 
833 		case B_FULL_LOCK:
834 		{
835 			// Pages aren't mapped at this point, but we just simulate a fault on
836 			// every page, which should allocate them
837 			// ToDo: at this point, it would probably be cheaper to allocate
838 			// and map the pages directly
839 			addr_t va;
840 			for (va = area->base; va < area->base + area->size; va += B_PAGE_SIZE) {
841 #ifdef DEBUG_KERNEL_STACKS
842 #	ifdef STACK_GROWS_DOWNWARDS
843 				if (isStack && va < area->base + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
844 #	else
845 				if (isStack && va >= area->base + area->size - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
846 #	endif
847 					continue;
848 #endif
849 				vm_soft_fault(va, false, false);
850 			}
851 			break;
852 		}
853 
854 		case B_ALREADY_WIRED:
855 		{
856 			// the pages should already be mapped. This is only really useful during
857 			// boot time. Find the appropriate vm_page objects and stick them in
858 			// the cache object.
859 			addr_t va;
860 			addr_t pa;
861 			uint32 flags;
862 			int err;
863 			off_t offset = 0;
864 
865 			if (!kernel_startup)
866 				panic("ALREADY_WIRED flag used outside kernel startup\n");
867 
868 			mutex_lock(&cache_ref->lock);
869 			(*aspace->translation_map.ops->lock)(&aspace->translation_map);
870 			for (va = area->base; va < area->base + area->size; va += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
871 				err = (*aspace->translation_map.ops->query)(&aspace->translation_map, va, &pa, &flags);
872 				if (err < 0) {
873 //					dprintf("vm_create_anonymous_area: error looking up mapping for va 0x%x\n", va);
874 					continue;
875 				}
876 				page = vm_lookup_page(pa / B_PAGE_SIZE);
877 				if (page == NULL) {
878 //					dprintf("vm_create_anonymous_area: error looking up vm_page structure for pa 0x%x\n", pa);
879 					continue;
880 				}
881 				atomic_add(&page->ref_count, 1);
882 				vm_page_set_state(page, PAGE_STATE_WIRED);
883 				vm_cache_insert_page(cache_ref, page, offset);
884 			}
885 			(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
886 			mutex_unlock(&cache_ref->lock);
887 			break;
888 		}
889 
890 		case B_CONTIGUOUS:
891 		{
892 			addr_t physicalAddress = page->ppn * B_PAGE_SIZE;
893 			addr_t virtualAddress;
894 			off_t offset = 0;
895 
896 			mutex_lock(&cache_ref->lock);
897 			(*aspace->translation_map.ops->lock)(&aspace->translation_map);
898 
899 			for (virtualAddress = area->base; virtualAddress < area->base + area->size;
900 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
901 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
902 				if (page == NULL)
903 					panic("couldn't lookup physical page just allocated\n");
904 
905 				atomic_add(&page->ref_count, 1);
906 				err = (*aspace->translation_map.ops->map)(&aspace->translation_map,
907 							virtualAddress, physicalAddress, protection);
908 				if (err < 0)
909 					panic("couldn't map physical page in page run\n");
910 
911 				vm_page_set_state(page, PAGE_STATE_WIRED);
912 				vm_cache_insert_page(cache_ref, page, offset);
913 			}
914 
915 			(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
916 			mutex_unlock(&cache_ref->lock);
917 			break;
918 		}
919 
920 		default:
921 			break;
922 	}
923 	vm_put_aspace(aspace);
924 
925 	TRACE(("vm_create_anonymous_area: done\n"));
926 
927 	if (area == NULL)
928 		return B_NO_MEMORY;
929 
930 	return area->id;
931 }
932 
933 
934 area_id
935 vm_map_physical_memory(aspace_id aid, const char *name, void **_address,
936 	uint32 addressSpec, addr_t size, uint32 protection, addr_t phys_addr)
937 {
938 	vm_area *area;
939 	vm_cache *cache;
940 	vm_cache_ref *cache_ref;
941 	vm_store *store;
942 	addr_t map_offset;
943 	status_t status;
944 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
945 
946 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, spec = %ld,"
947 		" size = %lu, protection = %ld, phys = %p)\n",
948 		aid, name, _address, addressSpec, size, protection, (void *)phys_addr));
949 
950 	if (!arch_vm_supports_protection(protection))
951 		return B_NOT_SUPPORTED;
952 
953 	if (aspace == NULL)
954 		return B_BAD_TEAM_ID;
955 
956 	// if the physical address is somewhat inside a page,
957 	// move the actual area down to align on a page boundary
958 	map_offset = phys_addr % B_PAGE_SIZE;
959 	size += map_offset;
960 	phys_addr -= map_offset;
961 
962 	size = PAGE_ALIGN(size);
963 
964 	// create an device store object
965 	store = vm_store_create_device(phys_addr);
966 	if (store == NULL)
967 		panic("vm_map_physical_memory: vm_store_create_device returned NULL");
968 	cache = vm_cache_create(store);
969 	if (cache == NULL)
970 		panic("vm_map_physical_memory: vm_cache_create returned NULL");
971 	cache_ref = vm_cache_ref_create(cache);
972 	if (cache_ref == NULL)
973 		panic("vm_map_physical_memory: vm_cache_ref_create returned NULL");
974 	// tell the page scanner to skip over this area, it's pages are special
975 	cache->scan_skip = 1;
976 
977 	vm_cache_acquire_ref(cache_ref, true);
978 	status = map_backing_store(aspace, store, _address, 0, size, addressSpec, 0, protection, REGION_NO_PRIVATE_MAP, &area, name);
979 	vm_cache_release_ref(cache_ref);
980 
981 	if (status >= B_OK) {
982 		// make sure our area is mapped in completely
983 		// (even if that makes the fault routine pretty much useless)
984 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
985 			store->ops->fault(store, aspace, offset);
986 		}
987 	}
988 
989 	vm_put_aspace(aspace);
990 	if (status < B_OK)
991 		return status;
992 
993 	// modify the pointer returned to be offset back into the new area
994 	// the same way the physical address in was offset
995 	*_address = (void *)((addr_t)*_address + map_offset);
996 
997 	return area->id;
998 }
999 
1000 
1001 area_id
1002 vm_create_null_area(aspace_id aid, const char *name, void **address, uint32 addressSpec, addr_t size)
1003 {
1004 	vm_area *area;
1005 	vm_cache *cache;
1006 	vm_cache_ref *cache_ref;
1007 	vm_store *store;
1008 //	addr_t map_offset;
1009 	int err;
1010 
1011 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
1012 	if (aspace == NULL)
1013 		return B_BAD_TEAM_ID;
1014 
1015 	size = PAGE_ALIGN(size);
1016 
1017 	// create an null store object
1018 	store = vm_store_create_null();
1019 	if (store == NULL)
1020 		panic("vm_map_physical_memory: vm_store_create_null returned NULL");
1021 	cache = vm_cache_create(store);
1022 	if (cache == NULL)
1023 		panic("vm_map_physical_memory: vm_cache_create returned NULL");
1024 	cache_ref = vm_cache_ref_create(cache);
1025 	if (cache_ref == NULL)
1026 		panic("vm_map_physical_memory: vm_cache_ref_create returned NULL");
1027 	// tell the page scanner to skip over this area, no pages will be mapped here
1028 	cache->scan_skip = 1;
1029 
1030 	vm_cache_acquire_ref(cache_ref, true);
1031 	err = map_backing_store(aspace, store, address, 0, size, addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name);
1032 	vm_cache_release_ref(cache_ref);
1033 	vm_put_aspace(aspace);
1034 	if (err < 0)
1035 		return err;
1036 
1037 	return area->id;
1038 }
1039 
1040 
1041 status_t
1042 vm_create_vnode_cache(void *vnode, struct vm_cache_ref **_cacheRef)
1043 {
1044 	vm_cache_ref *cacheRef;
1045 	vm_cache *cache;
1046 	vm_store *store;
1047 
1048 	// create a vnode store object
1049 	store = vm_create_vnode_store(vnode);
1050 	if (store == NULL) {
1051 		dprintf("vm_create_vnode_cache: couldn't create vnode store\n");
1052 		return B_NO_MEMORY;
1053 	}
1054 
1055 	cache = vm_cache_create(store);
1056 	if (cache == NULL) {
1057 		dprintf("vm_create_vnode_cache: vm_cache_create returned NULL\n");
1058 		return B_NO_MEMORY;
1059 	}
1060 
1061 	cacheRef = vm_cache_ref_create(cache);
1062 	if (cacheRef == NULL) {
1063 		dprintf("vm_create_vnode_cache: vm_cache_ref_create returned NULL\n");
1064 		return B_NO_MEMORY;
1065 	}
1066 
1067 	// acquire the cache ref once to represent the ref that the vnode will have
1068 	// this is one of the only places where we dont want to ref to ripple down to the store
1069 	vm_cache_acquire_ref(cacheRef, false);
1070 
1071 	*_cacheRef = cacheRef;
1072 	return B_OK;
1073 }
1074 
1075 
1076 /** Will map the file at the path specified by \a name to an area in memory.
1077  *	The file will be mirrored beginning at the specified \a offset. The \a offset
1078  *	and \a size arguments have to be page aligned.
1079  */
1080 
1081 static area_id
1082 _vm_map_file(aspace_id aid, const char *name, void **_address, uint32 addressSpec,
1083 	size_t size, uint32 protection, uint32 mapping, const char *path, off_t offset, bool kernel)
1084 {
1085 	vm_cache_ref *cacheRef;
1086 	vm_area *area;
1087 	void *vnode;
1088 	status_t status;
1089 
1090 	// ToDo: maybe attach to an FD, not a path (or both, like VFS calls)
1091 	// ToDo: check file access permissions (would be already done if the above were true)
1092 	// ToDo: for binary files, we want to make sure that they get the
1093 	//	copy of a file at a given time, ie. later changes should not
1094 	//	make it into the mapped copy -- this will need quite some changes
1095 	//	to be done in a nice way
1096 
1097 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
1098 	if (aspace == NULL)
1099 		return B_BAD_TEAM_ID;
1100 
1101 	TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n", path, offset, size, mapping));
1102 
1103 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
1104 	size = PAGE_ALIGN(size);
1105 
1106 	// get the vnode for the object, this also grabs a ref to it
1107 	status = vfs_get_vnode_from_path(path, kernel, &vnode);
1108 	if (status < B_OK)
1109 		goto err1;
1110 
1111 	status = vfs_get_vnode_cache(vnode, &cacheRef, false);
1112 	if (status < B_OK)
1113 		goto err2;
1114 
1115 	// acquire a ref to the cache before we do work on it. Dont ripple the ref acquision to the vnode
1116 	// below because we'll have to release it later anyway, since we grabbed a ref to the vnode at
1117 	// vfs_get_vnode_from_path(). This puts the ref counts in sync.
1118 	vm_cache_acquire_ref(cacheRef, false);
1119 	status = map_backing_store(aspace, cacheRef->cache->store, _address, offset, size,
1120 					addressSpec, 0, protection, mapping, &area, name);
1121 	vm_cache_release_ref(cacheRef);
1122 	vm_put_aspace(aspace);
1123 
1124 	if (status < B_OK)
1125 		return status;
1126 
1127 	return area->id;
1128 
1129 err2:
1130 	vfs_put_vnode(vnode);
1131 err1:
1132 	vm_put_aspace(aspace);
1133 	return status;
1134 }
1135 
1136 
1137 area_id
1138 vm_map_file(aspace_id aid, const char *name, void **address, uint32 addressSpec,
1139 	addr_t size, uint32 protection, uint32 mapping, const char *path, off_t offset)
1140 {
1141 	if (!arch_vm_supports_protection(protection))
1142 		return B_NOT_SUPPORTED;
1143 
1144 	return _vm_map_file(aid, name, address, addressSpec, size, protection, mapping, path, offset, true);
1145 }
1146 
1147 
1148 // ToDo: create a BeOS style call for this!
1149 
1150 area_id
1151 _user_vm_map_file(const char *uname, void **uaddress, int addressSpec,
1152 	addr_t size, int protection, int mapping, const char *upath, off_t offset)
1153 {
1154 	char name[B_OS_NAME_LENGTH];
1155 	char path[B_PATH_NAME_LENGTH];
1156 	void *address;
1157 	int rc;
1158 
1159 	if (!IS_USER_ADDRESS(uname) || !IS_USER_ADDRESS(uaddress) || !IS_USER_ADDRESS(upath)
1160 		|| user_strlcpy(name, uname, B_OS_NAME_LENGTH) < B_OK
1161 		|| user_strlcpy(path, upath, B_PATH_NAME_LENGTH) < B_OK
1162 		|| user_memcpy(&address, uaddress, sizeof(address)) < B_OK)
1163 		return B_BAD_ADDRESS;
1164 
1165 	// userland created areas can always be accessed by the kernel
1166 	protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1167 
1168 	rc = _vm_map_file(vm_get_current_user_aspace_id(), name, &address, addressSpec, size,
1169 			protection, mapping, path, offset, false);
1170 	if (rc < 0)
1171 		return rc;
1172 
1173 	if (user_memcpy(uaddress, &address, sizeof(address)) < B_OK)
1174 		return B_BAD_ADDRESS;
1175 
1176 	return rc;
1177 }
1178 
1179 
1180 area_id
1181 vm_clone_area(aspace_id aid, const char *name, void **address, uint32 addressSpec,
1182 	uint32 protection, uint32 mapping, area_id sourceID)
1183 {
1184 	vm_area *newArea = NULL;
1185 	vm_area *sourceArea;
1186 	status_t status;
1187 
1188 	vm_address_space *aspace = vm_get_aspace_by_id(aid);
1189 	if (aspace == NULL)
1190 		return B_BAD_TEAM_ID;
1191 
1192 	sourceArea = vm_get_area(sourceID);
1193 	if (sourceArea == NULL) {
1194 		vm_put_aspace(aspace);
1195 		return B_BAD_VALUE;
1196 	}
1197 
1198 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
1199 	//	have been adapted. Maybe it should be part of the kernel settings,
1200 	//	anyway (so that old drivers can always work).
1201 #if 0
1202 	if (sourceArea->aspace == kernel_aspace && aspace != kernel_aspace
1203 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1204 		// kernel areas must not be cloned in userland, unless explicitly
1205 		// declared user-cloneable upon construction
1206 		status = B_NOT_ALLOWED;
1207 	} else
1208 #endif
1209 	{
1210 		vm_cache_acquire_ref(sourceArea->cache_ref, true);
1211 		status = map_backing_store(aspace, sourceArea->cache_ref->cache->store, address,
1212 					sourceArea->cache_offset, sourceArea->size, addressSpec, sourceArea->wiring,
1213 					protection, mapping, &newArea, name);
1214 		vm_cache_release_ref(sourceArea->cache_ref);
1215 	}
1216 
1217 	vm_put_area(sourceArea);
1218 	vm_put_aspace(aspace);
1219 
1220 	if (status < B_OK)
1221 		return status;
1222 
1223 	return newArea->id;
1224 }
1225 
1226 
1227 static status_t
1228 _vm_delete_area(vm_address_space *aspace, area_id id)
1229 {
1230 	status_t status = B_OK;
1231 	vm_area *area;
1232 
1233 	TRACE(("vm_delete_area: aspace id 0x%lx, area id 0x%lx\n", aspace->id, id));
1234 
1235 	area = vm_get_area(id);
1236 	if (area == NULL)
1237 		return B_BAD_VALUE;
1238 
1239 	if (area->aspace == aspace) {
1240 		vm_put_area(area);
1241 			// next put below will actually delete it
1242 	} else
1243 		status = B_NOT_ALLOWED;
1244 
1245 	vm_put_area(area);
1246 	return status;
1247 }
1248 
1249 
1250 status_t
1251 vm_delete_area(aspace_id aid, area_id rid)
1252 {
1253 	vm_address_space *aspace;
1254 	status_t err;
1255 
1256 	aspace = vm_get_aspace_by_id(aid);
1257 	if (aspace == NULL)
1258 		return B_BAD_TEAM_ID;
1259 
1260 	err = _vm_delete_area(aspace, rid);
1261 	vm_put_aspace(aspace);
1262 	return err;
1263 }
1264 
1265 
1266 static void
1267 remove_area_from_virtual_map(vm_address_space *addressSpace, vm_area *area, bool locked)
1268 {
1269 	vm_area *temp, *last = NULL;
1270 
1271 	if (!locked)
1272 		acquire_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
1273 
1274 	temp = addressSpace->virtual_map.areas;
1275 	while (temp != NULL) {
1276 		if (area == temp) {
1277 			if (last != NULL) {
1278 				last->aspace_next = temp->aspace_next;
1279 			} else {
1280 				addressSpace->virtual_map.areas = temp->aspace_next;
1281 			}
1282 			addressSpace->virtual_map.change_count++;
1283 			break;
1284 		}
1285 		last = temp;
1286 		temp = temp->aspace_next;
1287 	}
1288 	if (area == addressSpace->virtual_map.area_hint)
1289 		addressSpace->virtual_map.area_hint = NULL;
1290 
1291 	if (!locked)
1292 		release_sem_etc(addressSpace->virtual_map.sem, WRITE_COUNT, 0);
1293 
1294 	if (temp == NULL)
1295 		panic("vm_area_release_ref: area not found in aspace's area list\n");
1296 }
1297 
1298 
1299 static bool
1300 _vm_put_area(vm_area *area, bool aspaceLocked)
1301 {
1302 	vm_address_space *aspace;
1303 	bool removeit = false;
1304 
1305 	//TRACE(("_vm_put_area(area = %p, aspaceLocked = %s)\n",
1306 	//	area, aspaceLocked ? "yes" : "no"));
1307 
1308 	// we should never get here, but if we do, we can handle it
1309 	if (area->id == RESERVED_AREA_ID)
1310 		return false;
1311 
1312 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0);
1313 	if (atomic_add(&area->ref_count, -1) == 1) {
1314 		hash_remove(sAreaHash, area);
1315 		removeit = true;
1316 	}
1317 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
1318 
1319 	if (!removeit)
1320 		return false;
1321 
1322 	aspace = area->aspace;
1323 
1324 	remove_area_from_virtual_map(aspace, area, aspaceLocked);
1325 
1326 	vm_cache_remove_area(area->cache_ref, area);
1327 	vm_cache_release_ref(area->cache_ref);
1328 
1329 	(*aspace->translation_map.ops->lock)(&aspace->translation_map);
1330 	(*aspace->translation_map.ops->unmap)(&aspace->translation_map, area->base,
1331 		area->base + (area->size - 1));
1332 	(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
1333 
1334 	// now we can give up the area's reference to the address space
1335 	vm_put_aspace(aspace);
1336 
1337 	free(area->name);
1338 	free(area);
1339 	return true;
1340 }
1341 
1342 
1343 static bool
1344 vm_put_area(vm_area *area)
1345 {
1346 	return _vm_put_area(area, false);
1347 }
1348 
1349 
1350 static status_t
1351 vm_copy_on_write_area(vm_area *area)
1352 {
1353 	vm_store *store;
1354 	vm_cache *upperCache, *lowerCache;
1355 	vm_cache_ref *upperCacheRef, *lowerCacheRef;
1356 	vm_translation_map *map;
1357 	vm_page *page;
1358 	uint32 protection;
1359 	status_t status;
1360 
1361 	TRACE(("vm_copy_on_write_area(area = %p)\n", area));
1362 
1363 	// We need to separate the vm_cache from its vm_cache_ref: the area
1364 	// and its cache_ref goes into a new layer on top of the old one.
1365 	// So the old cache gets a new cache_ref and the area a new cache.
1366 
1367 	upperCacheRef = area->cache_ref;
1368 	lowerCache = upperCacheRef->cache;
1369 
1370 	// create an anonymous store object
1371 	store = vm_store_create_anonymous_noswap(false, 0);
1372 	if (store == NULL)
1373 		return B_NO_MEMORY;
1374 
1375 	upperCache = vm_cache_create(store);
1376 	if (upperCache == NULL) {
1377 		status = B_NO_MEMORY;
1378 		goto err1;
1379 	}
1380 
1381 	lowerCacheRef = vm_cache_ref_create(lowerCache);
1382 	if (lowerCacheRef == NULL) {
1383 		status = B_NO_MEMORY;
1384 		goto err2;
1385 	}
1386 
1387 	// The area must be readable in the same way it was previously writable
1388 	protection = B_KERNEL_READ_AREA;
1389 	if (area->protection & B_READ_AREA)
1390 		protection |= B_READ_AREA;
1391 
1392 	// we need to hold the cache_ref lock when we want to switch its cache
1393 	mutex_lock(&upperCacheRef->lock);
1394 	mutex_lock(&lowerCacheRef->lock);
1395 
1396 	// ToDo: add a child counter to vm_cache - so that we can collapse a
1397 	//		cache layer when possible (ie. "the other" area was deleted)
1398 	upperCache->temporary = 1;
1399 	upperCache->scan_skip = lowerCache->scan_skip;
1400 	upperCache->source = lowerCache;
1401 	upperCache->ref = upperCacheRef;
1402 	upperCacheRef->cache = upperCache;
1403 
1404 	// we need to manually alter the ref_count
1405 	// ToDo: investigate a bit deeper if this is really correct
1406 	// (doesn't look like it, but it works)
1407 	lowerCacheRef->ref_count = upperCacheRef->ref_count;
1408 	upperCacheRef->ref_count = 1;
1409 
1410 	// grab a ref to the cache object we're now linked to as a source
1411 	vm_cache_acquire_ref(lowerCacheRef, true);
1412 
1413 	// We now need to remap all pages from the area read-only, so that
1414 	// a copy will be created on next write access
1415 
1416 	map = &area->aspace->translation_map;
1417 	map->ops->lock(map);
1418 	map->ops->unmap(map, area->base, area->base - 1 + area->size);
1419 
1420 	for (page = lowerCache->page_list; page; page = page->cache_next) {
1421 		map->ops->map(map, area->base + (page->offset - area->cache_offset),
1422 			page->ppn * B_PAGE_SIZE, protection);
1423 	}
1424 
1425 	map->ops->unlock(map);
1426 
1427 	mutex_unlock(&lowerCacheRef->lock);
1428 	mutex_unlock(&upperCacheRef->lock);
1429 
1430 	return B_OK;
1431 
1432 err2:
1433 	free(upperCache);
1434 err1:
1435 	store->ops->destroy(store);
1436 	return status;
1437 }
1438 
1439 
1440 area_id
1441 vm_copy_area(aspace_id addressSpaceID, const char *name, void **_address, uint32 addressSpec,
1442 	uint32 protection, area_id sourceID)
1443 {
1444 	vm_address_space *addressSpace;
1445 	vm_cache_ref *cacheRef;
1446 	vm_area *target, *source;
1447 	status_t status;
1448 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
1449 
1450 	if ((protection & B_KERNEL_PROTECTION) == 0) {
1451 		// set the same protection for the kernel as for userland
1452 		protection |= B_KERNEL_READ_AREA;
1453 		if (writableCopy)
1454 			protection |= B_KERNEL_WRITE_AREA;
1455 	}
1456 
1457 	if ((source = vm_get_area(sourceID)) == NULL)
1458 		return B_BAD_VALUE;
1459 
1460 	addressSpace = vm_get_aspace_by_id(addressSpaceID);
1461 	cacheRef = source->cache_ref;
1462 
1463 	if (addressSpec == B_CLONE_ADDRESS) {
1464 		addressSpec = B_EXACT_ADDRESS;
1465 		*_address = (void *)source->base;
1466 	}
1467 
1468 	// First, create a cache on top of the source area
1469 
1470 	status = map_backing_store(addressSpace, cacheRef->cache->store, _address,
1471 		source->cache_offset, source->size, addressSpec, source->wiring, protection,
1472 		writableCopy ? REGION_PRIVATE_MAP : REGION_NO_PRIVATE_MAP,
1473 		&target, name);
1474 
1475 	if (status < B_OK)
1476 		goto err;
1477 
1478 	// If the source area is writable, we need to move it one layer up as well
1479 
1480 	if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
1481 		// ToDo: do something more useful if this fails!
1482 		if (vm_copy_on_write_area(source) < B_OK)
1483 			panic("vm_copy_on_write_area() failed!\n");
1484 	}
1485 
1486 	// we want to return the ID of the newly created area
1487 	status = target->id;
1488 
1489 err:
1490 	vm_put_aspace(addressSpace);
1491 	vm_put_area(source);
1492 
1493 	return status;
1494 }
1495 
1496 
1497 static int32
1498 count_writable_areas(vm_cache_ref *ref, vm_area *ignoreArea)
1499 {
1500 	struct vm_area *area = ref->areas;
1501 	uint32 count = 0;
1502 
1503 	for (; area != NULL; area = area->cache_next) {
1504 		if (area != ignoreArea
1505 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
1506 			count++;
1507 	}
1508 
1509 	return count;
1510 }
1511 
1512 
1513 static status_t
1514 vm_set_area_protection(aspace_id aspaceID, area_id areaID, uint32 newProtection)
1515 {
1516 	vm_cache_ref *cacheRef;
1517 	vm_cache *cache;
1518 	vm_area *area;
1519 	status_t status = B_OK;
1520 
1521 	TRACE(("vm_set_area_protection(aspace = %#lx, area = %#lx, protection = %#lx)\n",
1522 		aspaceID, areaID, newProtection));
1523 
1524 	if (!arch_vm_supports_protection(newProtection))
1525 		return B_NOT_SUPPORTED;
1526 
1527 	area = vm_get_area(areaID);
1528 	if (area == NULL)
1529 		return B_BAD_VALUE;
1530 
1531 	if (aspaceID != vm_get_kernel_aspace_id() && area->aspace->id != aspaceID) {
1532 		// unless you're the kernel, you are only allowed to set
1533 		// the protection of your own areas
1534 		vm_put_area(area);
1535 		return B_NOT_ALLOWED;
1536 	}
1537 
1538 	cacheRef = area->cache_ref;
1539 	cache = cacheRef->cache;
1540 
1541 	mutex_lock(&cacheRef->lock);
1542 
1543 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1544 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
1545 		// change from read/write to read-only
1546 
1547 		if (cache->source != NULL && cache->temporary) {
1548 			if (count_writable_areas(cacheRef, area) == 0) {
1549 				// Since this cache now lives from the pages in its source cache,
1550 				// we can change the cache's commitment to take only those pages
1551 				// into account that really are in this cache.
1552 
1553 				// count existing pages in this cache
1554 				struct vm_page *page = cache->page_list;
1555 				uint32 count = 0;
1556 
1557 				for (; page != NULL; page = page->cache_next) {
1558 					count++;
1559 				}
1560 
1561 				status = cache->store->ops->commit(cache->store, count * B_PAGE_SIZE);
1562 
1563 				// ToDo: we may be able to join with our source cache, if count == 0
1564 			}
1565 		}
1566 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
1567 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
1568 		// change from read-only to read/write
1569 
1570 		// ToDo: if this is a shared cache, insert new cache (we only know about other
1571 		//	areas in this cache yet, though, not about child areas)
1572 		//	-> use this call with care, it might currently have unwanted consequences
1573 		//	   because of this. It should always be safe though, if there are no other
1574 		//	   (child) areas referencing this area's cache (you just might not know).
1575 		if (count_writable_areas(cacheRef, area) == 0
1576 			&& (cacheRef->areas != area || area->cache_next)) {
1577 			// ToDo: child areas are not tested for yet
1578 			dprintf("set_area_protection(): warning, would need to insert a new cache_ref (not yet implemented)!\n");
1579 			status = B_NOT_ALLOWED;
1580 		} else
1581 			dprintf("set_area_protection() may not work correctly yet in this direction!\n");
1582 
1583 		if (status == B_OK && cache->source != NULL && cache->temporary) {
1584 			// the cache's commitment must contain all possible pages
1585 			status = cache->store->ops->commit(cache->store, cache->virtual_size);
1586 		}
1587 	} else {
1588 		// we don't have anything special to do in all other cases
1589 	}
1590 
1591 	if (status == B_OK && area->protection != newProtection) {
1592 		// remap existing pages in this cache
1593 		struct vm_translation_map *map = &area->aspace->translation_map;
1594 
1595 		map->ops->lock(map);
1596 		map->ops->protect(map, area->base, area->base + area->size, newProtection);
1597 		map->ops->unlock(map);
1598 
1599 		area->protection = newProtection;
1600 	}
1601 
1602 	mutex_unlock(&cacheRef->lock);
1603 	vm_put_area(area);
1604 
1605 	return status;
1606 }
1607 
1608 
1609 status_t
1610 vm_get_page_mapping(aspace_id aid, addr_t vaddr, addr_t *paddr)
1611 {
1612 	vm_address_space *aspace;
1613 	uint32 null_flags;
1614 	status_t err;
1615 
1616 	aspace = vm_get_aspace_by_id(aid);
1617 	if (aspace == NULL)
1618 		return B_BAD_TEAM_ID;
1619 
1620 	err = aspace->translation_map.ops->query(&aspace->translation_map,
1621 		vaddr, paddr, &null_flags);
1622 
1623 	vm_put_aspace(aspace);
1624 	return err;
1625 }
1626 
1627 
1628 static int
1629 display_mem(int argc, char **argv)
1630 {
1631 	int32 displayWidth;
1632 	int32 itemSize;
1633 	int32 num = 1;
1634 	addr_t address;
1635 	int i, j;
1636 
1637 	if (argc < 2) {
1638 		kprintf("usage: dw/ds/db <address> [num]\n"
1639 			"\tdw - 4 bytes\n"
1640 			"\tds - 2 bytes\n"
1641 			"\tdb - 1 byte\n");
1642 		return 0;
1643 	}
1644 
1645 	address = strtoul(argv[1], NULL, 0);
1646 
1647 	if (argc >= 3) {
1648 		num = -1;
1649 		num = atoi(argv[2]);
1650 	}
1651 
1652 	// build the format string
1653 	if (strcmp(argv[0], "db") == 0) {
1654 		itemSize = 1;
1655 		displayWidth = 16;
1656 	} else if (strcmp(argv[0], "ds") == 0) {
1657 		itemSize = 2;
1658 		displayWidth = 8;
1659 	} else if (strcmp(argv[0], "dw") == 0) {
1660 		itemSize = 4;
1661 		displayWidth = 4;
1662 	} else {
1663 		kprintf("display_mem called in an invalid way!\n");
1664 		return 0;
1665 	}
1666 
1667 	for (i = 0; i < num; i++) {
1668 		uint32 value;
1669 
1670 		if ((i % displayWidth) == 0) {
1671 			int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
1672 			if (i != 0)
1673 				kprintf("\n");
1674 
1675 			kprintf("[0x%lx]  ", address + i * itemSize);
1676 
1677 			for (j = 0; j < displayed; j++) {
1678 				char c;
1679 				if (user_memcpy(&c, (char *)address + i * itemSize + j, 1) != B_OK) {
1680 					displayed = j;
1681 					break;
1682 				}
1683 				if (!isalnum(c))
1684 					c = '.';
1685 
1686 				kprintf("%c", c);
1687 			}
1688 			if (num > displayWidth) {
1689 				// make sure the spacing in the last line is correct
1690 				for (j = displayed; j < displayWidth * itemSize; j++)
1691 					kprintf(" ");
1692 			}
1693 			kprintf("  ");
1694 		}
1695 
1696 		if (user_memcpy(&value, (uint8 *)address + i * itemSize, itemSize) != B_OK) {
1697 			kprintf("read fault");
1698 			break;
1699 		}
1700 
1701 		switch (itemSize) {
1702 			case 1:
1703 				kprintf(" 0x%02x", *(uint8 *)&value);
1704 				break;
1705 			case 2:
1706 				kprintf(" 0x%04x", *(uint16 *)&value);
1707 				break;
1708 			case 4:
1709 				kprintf(" 0x%08lx", *(uint32 *)&value);
1710 				break;
1711 		}
1712 	}
1713 
1714 	kprintf("\n");
1715 	return 0;
1716 }
1717 
1718 
1719 static int
1720 dump_cache_ref(int argc, char **argv)
1721 {
1722 	addr_t address;
1723 	vm_area *area;
1724 	vm_cache_ref *cache_ref;
1725 
1726 	if (argc < 2) {
1727 		kprintf("cache_ref: not enough arguments\n");
1728 		return 0;
1729 	}
1730 	if (strlen(argv[1]) < 2 || argv[1][0] != '0' || argv[1][1] != 'x') {
1731 		kprintf("cache_ref: invalid argument, pass address\n");
1732 		return 0;
1733 	}
1734 
1735 	address = atoul(argv[1]);
1736 	cache_ref = (vm_cache_ref *)address;
1737 
1738 	kprintf("cache_ref at %p:\n", cache_ref);
1739 	kprintf("cache: %p\n", cache_ref->cache);
1740 	kprintf("lock.holder: %ld\n", cache_ref->lock.holder);
1741 	kprintf("lock.sem: 0x%lx\n", cache_ref->lock.sem);
1742 	kprintf("areas:\n");
1743 	for (area = cache_ref->areas; area != NULL; area = area->cache_next) {
1744 		kprintf(" area 0x%lx: ", area->id);
1745 		kprintf("base_addr = 0x%lx ", area->base);
1746 		kprintf("size = 0x%lx ", area->size);
1747 		kprintf("name = '%s' ", area->name);
1748 		kprintf("protection = 0x%lx\n", area->protection);
1749 	}
1750 	kprintf("ref_count: %ld\n", cache_ref->ref_count);
1751 	return 0;
1752 }
1753 
1754 
1755 static const char *
1756 page_state_to_text(int state)
1757 {
1758 	switch(state) {
1759 		case PAGE_STATE_ACTIVE:
1760 			return "active";
1761 		case PAGE_STATE_INACTIVE:
1762 			return "inactive";
1763 		case PAGE_STATE_BUSY:
1764 			return "busy";
1765 		case PAGE_STATE_MODIFIED:
1766 			return "modified";
1767 		case PAGE_STATE_FREE:
1768 			return "free";
1769 		case PAGE_STATE_CLEAR:
1770 			return "clear";
1771 		case PAGE_STATE_WIRED:
1772 			return "wired";
1773 		case PAGE_STATE_UNUSED:
1774 			return "unused";
1775 		default:
1776 			return "unknown";
1777 	}
1778 }
1779 
1780 
1781 static int
1782 dump_cache(int argc, char **argv)
1783 {
1784 	addr_t address;
1785 	vm_cache *cache;
1786 	vm_page *page;
1787 
1788 	if (argc < 2) {
1789 		kprintf("cache: not enough arguments\n");
1790 		return 0;
1791 	}
1792 	if (strlen(argv[1]) < 2 || argv[1][0] != '0' || argv[1][1] != 'x') {
1793 		kprintf("cache: invalid argument, pass address\n");
1794 		return 0;
1795 	}
1796 
1797 	address = atoul(argv[1]);
1798 	cache = (vm_cache *)address;
1799 
1800 	kprintf("cache at %p:\n", cache);
1801 	kprintf("cache_ref: %p\n", cache->ref);
1802 	kprintf("source: %p\n", cache->source);
1803 	kprintf("store: %p\n", cache->store);
1804 	kprintf("virtual_size: 0x%Lx\n", cache->virtual_size);
1805 	kprintf("temporary: %ld\n", cache->temporary);
1806 	kprintf("scan_skip: %ld\n", cache->scan_skip);
1807 	kprintf("page_list:\n");
1808 	for (page = cache->page_list; page != NULL; page = page->cache_next) {
1809 		if (page->type == PAGE_TYPE_PHYSICAL) {
1810 			kprintf(" %p ppn 0x%lx offset 0x%Lx type %ld state %ld (%s) ref_count %ld\n",
1811 				page, page->ppn, page->offset, page->type, page->state,
1812 				page_state_to_text(page->state), page->ref_count);
1813 		} else if(page->type == PAGE_TYPE_DUMMY) {
1814 			kprintf(" %p DUMMY PAGE state %ld (%s)\n",
1815 				page, page->state, page_state_to_text(page->state));
1816 		} else
1817 			kprintf(" %p UNKNOWN PAGE type %ld\n", page, page->type);
1818 	}
1819 	return 0;
1820 }
1821 
1822 
1823 static void
1824 _dump_area(vm_area *area)
1825 {
1826 	kprintf("dump of area at %p:\n", area);
1827 	kprintf("name: '%s'\n", area->name);
1828 	kprintf("id: 0x%lx\n", area->id);
1829 	kprintf("base: 0x%lx\n", area->base);
1830 	kprintf("size: 0x%lx\n", area->size);
1831 	kprintf("protection: 0x%lx\n", area->protection);
1832 	kprintf("wiring: 0x%lx\n", area->wiring);
1833 	kprintf("ref_count: %ld\n", area->ref_count);
1834 	kprintf("cache_ref: %p\n", area->cache_ref);
1835 	kprintf("cache_offset: 0x%Lx\n", area->cache_offset);
1836 	kprintf("cache_next: %p\n", area->cache_next);
1837 	kprintf("cache_prev: %p\n", area->cache_prev);
1838 }
1839 
1840 
1841 static int
1842 dump_area(int argc, char **argv)
1843 {
1844 	bool found = false;
1845 	vm_area *area;
1846 	addr_t num;
1847 
1848 	if (argc < 2) {
1849 		kprintf("usage: area <id|address|name>\n");
1850 		return 0;
1851 	}
1852 
1853 	num = strtoul(argv[1], NULL, 0);
1854 
1855 	// walk through the area list, looking for the arguments as a name
1856 	struct hash_iterator iter;
1857 
1858 	hash_open(sAreaHash, &iter);
1859 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
1860 		if ((area->name != NULL && !strcmp(argv[1], area->name))
1861 			|| num != 0
1862 				&& ((addr_t)area->id == num
1863 					|| area->base <= num && area->base + area->size > num)) {
1864 			_dump_area(area);
1865 			found = true;
1866 		}
1867 	}
1868 
1869 	if (!found)
1870 		kprintf("could not find area %s (%ld)\n", argv[1], num);
1871 	return 0;
1872 }
1873 
1874 
1875 static int
1876 dump_area_list(int argc, char **argv)
1877 {
1878 	vm_area *area;
1879 	struct hash_iterator iter;
1880 
1881 	kprintf("addr\t      id  base\t\tsize\t\tprotect\tlock\tname\n");
1882 
1883 	hash_open(sAreaHash, &iter);
1884 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
1885 		kprintf("%p %5lx  %p\t%p\t%ld\t%ld\t%s\n", area, area->id, (void *)area->base,
1886 			(void *)area->size, area->protection, area->wiring, area->name);
1887 	}
1888 	hash_close(sAreaHash, &iter, false);
1889 	return 0;
1890 }
1891 
1892 
1893 status_t
1894 vm_delete_areas(struct vm_address_space *aspace)
1895 {
1896 	vm_area *area;
1897 	vm_area *next, *last = NULL;
1898 
1899 	TRACE(("vm_delete_areas: called on aspace 0x%lx\n", aspace->id));
1900 
1901 	acquire_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0, 0);
1902 
1903 	// remove all reserved areas in this address space
1904 
1905 	for (area = aspace->virtual_map.areas; area; area = next) {
1906 		next = area->aspace_next;
1907 
1908 		if (area->id == RESERVED_AREA_ID) {
1909 			// just remove it
1910 			if (last)
1911 				last->aspace_next = area->aspace_next;
1912 			else
1913 				aspace->virtual_map.areas = area->aspace_next;
1914 
1915 			free(area);
1916 			continue;
1917 		}
1918 
1919 		last = area;
1920 	}
1921 
1922 	// delete all the areas in this aspace
1923 
1924 	for (area = aspace->virtual_map.areas; area; area = next) {
1925 		next = area->aspace_next;
1926 
1927 		// decrement the ref on this area, may actually push the ref < 0, if there
1928 		// is a concurrent delete_area() on that specific area, but that's ok here
1929 		if (!_vm_put_area(area, true))
1930 			dprintf("vm_delete_areas() did not delete area %p\n", area);
1931 	}
1932 
1933 	release_sem_etc(aspace->virtual_map.sem, WRITE_COUNT, 0);
1934 
1935 	return B_OK;
1936 }
1937 
1938 
1939 static area_id
1940 vm_area_for(aspace_id aid, addr_t address)
1941 {
1942 	vm_address_space *addressSpace;
1943 	area_id id = B_ERROR;
1944 	vm_area *area;
1945 
1946 	addressSpace = vm_get_aspace_by_id(aid);
1947 	if (addressSpace == NULL)
1948 		return B_BAD_TEAM_ID;
1949 
1950 	acquire_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0, 0);
1951 
1952 	area = addressSpace->virtual_map.areas;
1953 	for (; area != NULL; area = area->aspace_next) {
1954 		// ignore reserved space regions
1955 		if (area->id == RESERVED_AREA_ID)
1956 			continue;
1957 
1958 		if (address >= area->base && address < area->base + area->size) {
1959 			id = area->id;
1960 			break;
1961 		}
1962 	}
1963 
1964 	release_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0);
1965 	vm_put_aspace(addressSpace);
1966 
1967 	return id;
1968 }
1969 
1970 
1971 static void
1972 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
1973 {
1974 	// free all physical pages in the specified range
1975 
1976 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
1977 		addr_t physicalAddress;
1978 		uint32 flags;
1979 
1980 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
1981 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
1982 			if (page != NULL)
1983 				vm_page_set_state(page, PAGE_STATE_FREE);
1984 		}
1985 	}
1986 
1987 	// unmap the memory
1988 	map->ops->unmap(map, start, end - 1);
1989 }
1990 
1991 
1992 void
1993 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
1994 {
1995 	vm_translation_map *map = &kernel_aspace->translation_map;
1996 	addr_t end = start + size;
1997 	addr_t lastEnd = start;
1998 	vm_area *area;
1999 
2000 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
2001 
2002 	// The areas are sorted in virtual address space order, so
2003 	// we just have to find the holes between them that fall
2004 	// into the area we should dispose
2005 
2006 	map->ops->lock(map);
2007 
2008 	for (area = kernel_aspace->virtual_map.areas; area; area = area->aspace_next) {
2009 		addr_t areaStart = area->base;
2010 		addr_t areaEnd = areaStart + area->size;
2011 
2012 		if (area->id == RESERVED_AREA_ID)
2013 			continue;
2014 
2015 		if (areaEnd >= end) {
2016 			// we are done, the areas are already beyond of what we have to free
2017 			lastEnd = end;
2018 			break;
2019 		}
2020 
2021 		if (areaStart > lastEnd) {
2022 			// this is something we can free
2023 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
2024 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
2025 		}
2026 
2027 		lastEnd = areaEnd;
2028 	}
2029 
2030 	if (lastEnd < end) {
2031 		// we can also get rid of some space at the end of the area
2032 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
2033 		unmap_and_free_physical_pages(map, lastEnd, end);
2034 	}
2035 
2036 	map->ops->unlock(map);
2037 }
2038 
2039 
2040 static void
2041 create_preloaded_image_areas(struct preloaded_image *image)
2042 {
2043 	char name[B_OS_NAME_LENGTH];
2044 	void *address;
2045 	int32 length;
2046 
2047 	// use file name to create a good area name
2048 	char *fileName = strrchr(image->name, '/');
2049 	if (fileName == NULL)
2050 		fileName = image->name;
2051 	else
2052 		fileName++;
2053 
2054 	length = strlen(fileName);
2055 	// make sure there is enough space for the suffix
2056 	if (length > 25)
2057 		length = 25;
2058 
2059 	memcpy(name, fileName, length);
2060 	strcpy(name + length, "_text");
2061 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
2062 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2063 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
2064 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2065 
2066 	strcpy(name + length, "_data");
2067 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
2068 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2069 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
2070 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2071 }
2072 
2073 
2074 /**	Frees all previously kernel arguments areas from the kernel_args structure.
2075  *	Any boot loader resources contained in that arguments must not be accessed
2076  *	anymore past this point.
2077  */
2078 
2079 void
2080 vm_free_kernel_args(kernel_args *args)
2081 {
2082 	uint32 i;
2083 
2084 	TRACE(("vm_free_kernel_args()\n"));
2085 
2086 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2087 		area_id area = area_for((void *)args->kernel_args_range[i].start);
2088 		if (area >= B_OK)
2089 			delete_area(area);
2090 	}
2091 }
2092 
2093 
2094 static void
2095 allocate_kernel_args(kernel_args *args)
2096 {
2097 	uint32 i;
2098 
2099 	TRACE(("allocate_kernel_args()\n"));
2100 
2101 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2102 		void *address = (void *)args->kernel_args_range[i].start;
2103 
2104 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
2105 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2106 	}
2107 }
2108 
2109 
2110 static void
2111 unreserve_boot_loader_ranges(kernel_args *args)
2112 {
2113 	uint32 i;
2114 
2115 	TRACE(("unreserve_boot_loader_ranges()\n"));
2116 
2117 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2118 		vm_unreserve_address_range(vm_get_kernel_aspace_id(),
2119 			(void *)args->virtual_allocated_range[i].start,
2120 			args->virtual_allocated_range[i].size);
2121 	}
2122 }
2123 
2124 
2125 static void
2126 reserve_boot_loader_ranges(kernel_args *args)
2127 {
2128 	uint32 i;
2129 
2130 	TRACE(("reserve_boot_loader_ranges()\n"));
2131 
2132 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2133 		void *address = (void *)args->virtual_allocated_range[i].start;
2134 		status_t status = vm_reserve_address_range(vm_get_kernel_aspace_id(), &address,
2135 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
2136 		if (status < B_OK)
2137 			panic("could not reserve boot loader ranges\n");
2138 	}
2139 }
2140 
2141 
2142 status_t
2143 vm_init(kernel_args *args)
2144 {
2145 	struct preloaded_image *image;
2146 	addr_t heap_base;
2147 	void *address;
2148 	status_t err = 0;
2149 	uint32 i;
2150 
2151 	TRACE(("vm_init: entry\n"));
2152 	err = arch_vm_translation_map_init(args);
2153 	err = arch_vm_init(args);
2154 
2155 	// initialize some globals
2156 	sNextAreaID = 1;
2157 	sAreaHashLock = -1;
2158 
2159 	// map in the new heap and initialize it
2160 	heap_base = vm_alloc_from_kernel_args(args, HEAP_SIZE, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2161 	TRACE(("heap at 0x%lx\n", heap_base));
2162 	heap_init(heap_base);
2163 
2164 	// initialize the free page list and physical page mapper
2165 	vm_page_init(args);
2166 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
2167 
2168 	// initialize the hash table that stores the pages mapped to caches
2169 	vm_cache_init(args);
2170 
2171 	{
2172 		vm_area *area;
2173 		sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area,
2174 			&area_compare, &area_hash);
2175 		if (sAreaHash == NULL)
2176 			panic("vm_init: error creating aspace hash table\n");
2177 	}
2178 
2179 	vm_aspace_init();
2180 	reserve_boot_loader_ranges(args);
2181 
2182 	// do any further initialization that the architecture dependant layers may need now
2183 	arch_vm_translation_map_init_post_area(args);
2184 	arch_vm_init_post_area(args);
2185 	vm_page_init_post_area(args);
2186 
2187 	// allocate areas to represent stuff that already exists
2188 
2189 	address = (void *)ROUNDOWN(heap_base, B_PAGE_SIZE);
2190 	create_area("kernel heap", &address, B_EXACT_ADDRESS, HEAP_SIZE,
2191 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2192 
2193 	allocate_kernel_args(args);
2194 
2195 	args->kernel_image.name = "kernel";
2196 		// the lazy boot loader currently doesn't set the kernel's name...
2197 	create_preloaded_image_areas(&args->kernel_image);
2198 
2199 	// allocate areas for preloaded images
2200 	for (image = args->preloaded_images; image != NULL; image = image->next) {
2201 		create_preloaded_image_areas(image);
2202 	}
2203 
2204 	// allocate kernel stacks
2205 	for (i = 0; i < args->num_cpus; i++) {
2206 		char name[64];
2207 
2208 		sprintf(name, "idle thread %lu kstack", i + 1);
2209 		address = (void *)args->cpu_kstack[i].start;
2210 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
2211 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2212 	}
2213 	{
2214 		void *null;
2215 		vm_map_physical_memory(vm_get_kernel_aspace_id(), "bootdir", &null, B_ANY_KERNEL_ADDRESS,
2216 			args->bootdir_addr.size, B_KERNEL_READ_AREA, args->bootdir_addr.start);
2217 	}
2218 
2219 	// add some debugger commands
2220 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
2221 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
2222 	add_debugger_command("cache_ref", &dump_cache_ref, "Dump cache_ref data structure");
2223 	add_debugger_command("cache", &dump_cache, "Dump cache_ref data structure");
2224 //	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
2225 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
2226 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
2227 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
2228 
2229 	TRACE(("vm_init: exit\n"));
2230 
2231 	return err;
2232 }
2233 
2234 
2235 status_t
2236 vm_init_post_sem(kernel_args *args)
2237 {
2238 	vm_area *area;
2239 
2240 	// This frees all unused boot loader resources and makes its space available again
2241 	arch_vm_init_end(args);
2242 	unreserve_boot_loader_ranges(args);
2243 
2244 	// fill in all of the semaphores that were not allocated before
2245 	// since we're still single threaded and only the kernel address space exists,
2246 	// it isn't that hard to find all of the ones we need to create
2247 
2248 	benaphore_init(&sAvailableMemoryLock, "available memory lock");
2249 	arch_vm_translation_map_init_post_sem(args);
2250 	vm_aspace_init_post_sem();
2251 
2252 	for (area = kernel_aspace->virtual_map.areas; area; area = area->aspace_next) {
2253 		if (area->id == RESERVED_AREA_ID)
2254 			continue;
2255 
2256 		if (area->cache_ref->lock.sem < 0)
2257 			mutex_init(&area->cache_ref->lock, "cache_ref_mutex");
2258 	}
2259 
2260 	sAreaHashLock = create_sem(WRITE_COUNT, "area hash");
2261 
2262 	return heap_init_post_sem(args);
2263 }
2264 
2265 
2266 status_t
2267 vm_init_post_thread(kernel_args *args)
2268 {
2269 	vm_page_init_post_thread(args);
2270 	vm_daemon_init();
2271 	vm_low_memory_init();
2272 
2273 	return heap_init_post_thread(args);
2274 }
2275 
2276 
2277 void
2278 permit_page_faults(void)
2279 {
2280 	struct thread *thread = thread_get_current_thread();
2281 	if (thread != NULL)
2282 		atomic_add(&thread->page_faults_allowed, 1);
2283 }
2284 
2285 
2286 void
2287 forbid_page_faults(void)
2288 {
2289 	struct thread *thread = thread_get_current_thread();
2290 	if (thread != NULL)
2291 		atomic_add(&thread->page_faults_allowed, -1);
2292 }
2293 
2294 
2295 status_t
2296 vm_page_fault(addr_t address, addr_t fault_address, bool is_write, bool is_user, addr_t *newip)
2297 {
2298 	int err;
2299 
2300 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, fault_address));
2301 
2302 	*newip = 0;
2303 
2304 	err = vm_soft_fault(address, is_write, is_user);
2305 	if (err < 0) {
2306 		dprintf("vm_page_fault: vm_soft_fault returned error %d on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
2307 			err, address, fault_address, is_write, is_user, thread_get_current_thread_id());
2308 		if (!is_user) {
2309 			struct thread *t = thread_get_current_thread();
2310 			if (t && t->fault_handler != 0) {
2311 				// this will cause the arch dependant page fault handler to
2312 				// modify the IP on the interrupt frame or whatever to return
2313 				// to this address
2314 				*newip = t->fault_handler;
2315 			} else {
2316 				// unhandled page fault in the kernel
2317 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
2318 					address, fault_address);
2319 			}
2320 		} else {
2321 #if 1
2322 			// ToDo: remove me once we have proper userland debugging support (and tools)
2323 			vm_address_space *aspace = vm_get_current_user_aspace();
2324 			vm_virtual_map *map = &aspace->virtual_map;
2325 			vm_area *area;
2326 
2327 			acquire_sem_etc(map->sem, READ_COUNT, 0, 0);
2328 			area = vm_virtual_map_lookup(map, fault_address);
2329 
2330 			dprintf("vm_page_fault: sending team 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n",
2331 				thread_get_current_thread()->team->id, fault_address,
2332 				area ? area->name : "???", fault_address - (area ? area->base : 0x0));
2333 
2334 // We can print a stack trace of the userland thread here. Since we're accessing
2335 // user memory freely and unchecked, this is not enabled by default.
2336 #if 0
2337 			if (area) {
2338 				struct stack_frame {
2339 					#ifdef __INTEL__
2340 						struct stack_frame*	previous;
2341 						void*				return_address;
2342 					#else
2343 						// ...
2344 					#endif
2345 				};
2346 				struct iframe *iframe = i386_get_user_iframe();
2347 				struct stack_frame *frame = (struct stack_frame *)iframe->ebp;
2348 
2349 				dprintf("stack trace:\n");
2350 				for (; frame; frame = frame->previous) {
2351 					dprintf("  0x%p", frame->return_address);
2352 					area = vm_virtual_map_lookup(map,
2353 						(addr_t)frame->return_address);
2354 					if (area) {
2355 						dprintf(" (%s + %#lx)", area->name,
2356 							(addr_t)frame->return_address - area->base);
2357 					}
2358 					dprintf("\n");
2359 				}
2360 			}
2361 #endif	// 0 (stack trace)
2362 
2363 			release_sem_etc(map->sem, READ_COUNT, 0);
2364 			vm_put_aspace(aspace);
2365 #endif
2366 			if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV))
2367 				send_signal(team_get_current_team_id(), SIGSEGV);
2368 		}
2369 	}
2370 
2371 	return B_HANDLED_INTERRUPT;
2372 }
2373 
2374 
2375 static status_t
2376 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser)
2377 {
2378 	vm_address_space *aspace;
2379 	vm_virtual_map *map;
2380 	vm_area *area;
2381 	vm_cache_ref *cache_ref;
2382 	vm_cache_ref *last_cache_ref;
2383 	vm_cache_ref *top_cache_ref;
2384 	off_t cache_offset;
2385 	vm_page dummy_page;
2386 	vm_page *page = NULL;
2387 	addr_t address;
2388 	int change_count;
2389 	int err;
2390 
2391 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
2392 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
2393 
2394 	address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
2395 
2396 	if (IS_KERNEL_ADDRESS(address)) {
2397 		aspace = vm_get_kernel_aspace();
2398 	} else if (IS_USER_ADDRESS(address)) {
2399 		aspace = vm_get_current_user_aspace();
2400 		if (aspace == NULL) {
2401 			if (isUser == false) {
2402 				dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n");
2403 				return B_BAD_ADDRESS;
2404 			} else {
2405 				// XXX weird state.
2406 				panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n");
2407 			}
2408 		}
2409 	} else {
2410 		// the hit was probably in the 64k DMZ between kernel and user space
2411 		// this keeps a user space thread from passing a buffer that crosses into kernel space
2412 		return B_BAD_ADDRESS;
2413 	}
2414 	map = &aspace->virtual_map;
2415 	atomic_add(&aspace->fault_count, 1);
2416 
2417 	// Get the area the fault was in
2418 
2419 	acquire_sem_etc(map->sem, READ_COUNT, 0, 0);
2420 	area = vm_virtual_map_lookup(map, address);
2421 	if (area == NULL) {
2422 		release_sem_etc(map->sem, READ_COUNT, 0);
2423 		vm_put_aspace(aspace);
2424 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n", originalAddress);
2425 		return B_BAD_ADDRESS;
2426 	}
2427 
2428 	// check permissions
2429 	if (isUser && (area->protection & B_USER_PROTECTION) == 0) {
2430 		release_sem_etc(map->sem, READ_COUNT, 0);
2431 		vm_put_aspace(aspace);
2432 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
2433 		return B_PERMISSION_DENIED;
2434 	}
2435 	if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
2436 		release_sem_etc(map->sem, READ_COUNT, 0);
2437 		vm_put_aspace(aspace);
2438 		dprintf("write access attempted on read-only area 0x%lx at %p\n", area->id, (void *)originalAddress);
2439 		return B_PERMISSION_DENIED;
2440 	}
2441 
2442 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
2443 	// At first, the top most cache from the area is investigated
2444 
2445 	top_cache_ref = area->cache_ref;
2446 	cache_offset = address - area->base + area->cache_offset;
2447 	vm_cache_acquire_ref(top_cache_ref, true);
2448 	change_count = map->change_count;
2449 	release_sem_etc(map->sem, READ_COUNT, 0);
2450 
2451 	// See if this cache has a fault handler - this will do all the work for us
2452 	if (top_cache_ref->cache->store->ops->fault != NULL) {
2453 		// Note, since the page fault is resolved with interrupts enabled, the
2454 		// fault handler could be called more than once for the same reason -
2455 		// the store must take this into account
2456 		status_t status = (*top_cache_ref->cache->store->ops->fault)(top_cache_ref->cache->store, aspace, cache_offset);
2457 		if (status != B_BAD_HANDLER) {
2458 			vm_cache_release_ref(top_cache_ref);
2459 			vm_put_aspace(aspace);
2460 			return status;
2461 		}
2462 	}
2463 
2464 	// The top most cache has no fault handler, so let's see if the cache or its sources
2465 	// already have the page we're searching for (we're going from top to bottom)
2466 
2467 	dummy_page.state = PAGE_STATE_INACTIVE;
2468 	dummy_page.type = PAGE_TYPE_DUMMY;
2469 
2470 	last_cache_ref = top_cache_ref;
2471 	for (cache_ref = top_cache_ref; cache_ref; cache_ref = (cache_ref->cache->source) ? cache_ref->cache->source->ref : NULL) {
2472 		mutex_lock(&cache_ref->lock);
2473 
2474 		for (;;) {
2475 			page = vm_cache_lookup_page(cache_ref, cache_offset);
2476 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
2477 				vm_page_set_state(page, PAGE_STATE_BUSY);
2478 				mutex_unlock(&cache_ref->lock);
2479 				break;
2480 			}
2481 
2482 			if (page == NULL)
2483 				break;
2484 
2485 			// page must be busy
2486 			// ToDo: don't wait forever!
2487 			mutex_unlock(&cache_ref->lock);
2488 			snooze(20000);
2489 			mutex_lock(&cache_ref->lock);
2490 		}
2491 
2492 		if (page != NULL)
2493 			break;
2494 
2495 		// The current cache does not contain the page we're looking for
2496 
2497 		// If we're at the top most cache, insert the dummy page here to keep other threads
2498 		// from faulting on the same address and chasing us up the cache chain
2499 		if (cache_ref == top_cache_ref) {
2500 			dummy_page.state = PAGE_STATE_BUSY;
2501 			vm_cache_insert_page(cache_ref, &dummy_page, cache_offset);
2502 		}
2503 
2504 		// see if the vm_store has it
2505 		if (cache_ref->cache->store->ops->has_page != NULL
2506 			&& cache_ref->cache->store->ops->has_page(cache_ref->cache->store, cache_offset)) {
2507 			size_t bytesRead;
2508 			iovec vec;
2509 
2510 			vec.iov_len = bytesRead = B_PAGE_SIZE;
2511 
2512 			mutex_unlock(&cache_ref->lock);
2513 
2514 			page = vm_page_allocate_page(PAGE_STATE_FREE);
2515 			aspace->translation_map.ops->get_physical_page(page->ppn * B_PAGE_SIZE, (addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT);
2516 			// ToDo: handle errors here
2517 			err = cache_ref->cache->store->ops->read(cache_ref->cache->store, cache_offset, &vec, 1, &bytesRead);
2518 			aspace->translation_map.ops->put_physical_page((addr_t)vec.iov_base);
2519 
2520 			mutex_lock(&cache_ref->lock);
2521 
2522 			if (cache_ref == top_cache_ref) {
2523 				vm_cache_remove_page(cache_ref, &dummy_page);
2524 				dummy_page.state = PAGE_STATE_INACTIVE;
2525 			}
2526 			vm_cache_insert_page(cache_ref, page, cache_offset);
2527 			mutex_unlock(&cache_ref->lock);
2528 			break;
2529 		}
2530 		mutex_unlock(&cache_ref->lock);
2531 		last_cache_ref = cache_ref;
2532 	}
2533 
2534 	if (!cache_ref) {
2535 		// We rolled off the end of the cache chain, so we need to decide which
2536 		// cache will get the new page we're about to create.
2537 
2538 		cache_ref = isWrite ? top_cache_ref : last_cache_ref;
2539 			// Read-only pages come in the deepest cache - only the
2540 			// top most cache may have direct write access.
2541 	}
2542 
2543 	if (page == NULL) {
2544 		// we still haven't found a page, so we allocate a clean one
2545 		page = vm_page_allocate_page(PAGE_STATE_CLEAR);
2546 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->ppn));
2547 
2548 		// Insert the new page into our cache, and replace it with the dummy page if necessary
2549 
2550 		mutex_lock(&cache_ref->lock);
2551 
2552 		// if we inserted a dummy page into this cache, we have to remove it now
2553 		if (dummy_page.state == PAGE_STATE_BUSY && dummy_page.cache == cache_ref->cache) {
2554 			vm_cache_remove_page(cache_ref, &dummy_page);
2555 			dummy_page.state = PAGE_STATE_INACTIVE;
2556 		}
2557 
2558 		vm_cache_insert_page(cache_ref, page, cache_offset);
2559 		mutex_unlock(&cache_ref->lock);
2560 
2561 		if (dummy_page.state == PAGE_STATE_BUSY) {
2562 			// we had inserted the dummy cache in another cache, so let's remove it from there
2563 			vm_cache_ref *temp_cache = dummy_page.cache->ref;
2564 			mutex_lock(&temp_cache->lock);
2565 			vm_cache_remove_page(temp_cache, &dummy_page);
2566 			mutex_unlock(&temp_cache->lock);
2567 			dummy_page.state = PAGE_STATE_INACTIVE;
2568 		}
2569 	}
2570 
2571 	// We now have the page and a cache it belongs to - we now need to make
2572 	// sure that the area's cache can access it, too, and sees the correct data
2573 
2574 	if (page->cache != top_cache_ref->cache && isWrite) {
2575 		// now we have a page that has the data we want, but in the wrong cache object
2576 		// so we need to copy it and stick it into the top cache
2577 		vm_page *src_page = page;
2578 		void *src, *dest;
2579 
2580 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
2581 		page = vm_page_allocate_page(PAGE_STATE_FREE);
2582 
2583 		// try to get a mapping for the src and dest page so we can copy it
2584 		for (;;) {
2585 			(*aspace->translation_map.ops->get_physical_page)(src_page->ppn * B_PAGE_SIZE, (addr_t *)&src, PHYSICAL_PAGE_CAN_WAIT);
2586 			err = (*aspace->translation_map.ops->get_physical_page)(page->ppn * B_PAGE_SIZE, (addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT);
2587 			if (err == B_NO_ERROR)
2588 				break;
2589 
2590 			// it couldn't map the second one, so sleep and retry
2591 			// keeps an extremely rare deadlock from occuring
2592 			(*aspace->translation_map.ops->put_physical_page)((addr_t)src);
2593 			snooze(5000);
2594 		}
2595 
2596 		memcpy(dest, src, B_PAGE_SIZE);
2597 		(*aspace->translation_map.ops->put_physical_page)((addr_t)src);
2598 		(*aspace->translation_map.ops->put_physical_page)((addr_t)dest);
2599 
2600 		vm_page_set_state(src_page, PAGE_STATE_ACTIVE);
2601 
2602 		mutex_lock(&top_cache_ref->lock);
2603 
2604 		// Insert the new page into our cache, and replace it with the dummy page if necessary
2605 
2606 		// if we inserted a dummy page into this cache, we have to remove it now
2607 		if (dummy_page.state == PAGE_STATE_BUSY && dummy_page.cache == top_cache_ref->cache) {
2608 			vm_cache_remove_page(top_cache_ref, &dummy_page);
2609 			dummy_page.state = PAGE_STATE_INACTIVE;
2610 		}
2611 
2612 		vm_cache_insert_page(top_cache_ref, page, cache_offset);
2613 		mutex_unlock(&top_cache_ref->lock);
2614 
2615 		if (dummy_page.state == PAGE_STATE_BUSY) {
2616 			// we had inserted the dummy cache in another cache, so let's remove it from there
2617 			vm_cache_ref *temp_cache = dummy_page.cache->ref;
2618 			mutex_lock(&temp_cache->lock);
2619 			vm_cache_remove_page(temp_cache, &dummy_page);
2620 			mutex_unlock(&temp_cache->lock);
2621 			dummy_page.state = PAGE_STATE_INACTIVE;
2622 		}
2623 	}
2624 
2625 	err = B_OK;
2626 	acquire_sem_etc(map->sem, READ_COUNT, 0, 0);
2627 	if (change_count != map->change_count) {
2628 		// something may have changed, see if the address is still valid
2629 		area = vm_virtual_map_lookup(map, address);
2630 		if (area == NULL
2631 			|| area->cache_ref != top_cache_ref
2632 			|| (address - area->base + area->cache_offset) != cache_offset) {
2633 			dprintf("vm_soft_fault: address space layout changed effecting ongoing soft fault\n");
2634 			err = B_BAD_ADDRESS;
2635 		}
2636 	}
2637 
2638 	if (err == B_OK) {
2639 		// All went fine, all there is left to do is to map the page into the address space
2640 
2641 		// If the page doesn't reside in the area's cache, we need to make sure it's
2642 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
2643 		uint32 newProtection = area->protection;
2644 		if (page->cache != top_cache_ref->cache && !isWrite)
2645 			newProtection &= ~(isUser ? B_WRITE_AREA : B_KERNEL_WRITE_AREA);
2646 
2647 		atomic_add(&page->ref_count, 1);
2648 		(*aspace->translation_map.ops->lock)(&aspace->translation_map);
2649 		(*aspace->translation_map.ops->map)(&aspace->translation_map, address,
2650 			page->ppn * B_PAGE_SIZE, newProtection);
2651 		(*aspace->translation_map.ops->unlock)(&aspace->translation_map);
2652 	}
2653 
2654 	release_sem_etc(map->sem, READ_COUNT, 0);
2655 
2656 	if (dummy_page.state == PAGE_STATE_BUSY) {
2657 		// We still have the dummy page in the cache - that happens if we didn't need
2658 		// to allocate a new page before, but could use one in another cache
2659 		vm_cache_ref *temp_cache = dummy_page.cache->ref;
2660 		mutex_lock(&temp_cache->lock);
2661 		vm_cache_remove_page(temp_cache, &dummy_page);
2662 		mutex_unlock(&temp_cache->lock);
2663 		dummy_page.state = PAGE_STATE_INACTIVE;
2664 	}
2665 
2666 	vm_page_set_state(page, PAGE_STATE_ACTIVE);
2667 
2668 	vm_cache_release_ref(top_cache_ref);
2669 	vm_put_aspace(aspace);
2670 
2671 	return err;
2672 }
2673 
2674 
2675 static vm_area *
2676 vm_virtual_map_lookup(vm_virtual_map *map, addr_t address)
2677 {
2678 	vm_area *area;
2679 
2680 	// check the areas list first
2681 	area = map->area_hint;
2682 	if (area && area->base <= address && (area->base + area->size) > address)
2683 		return area;
2684 
2685 	for (area = map->areas; area != NULL; area = area->aspace_next) {
2686 		if (area->id == RESERVED_AREA_ID)
2687 			continue;
2688 
2689 		if (area->base <= address && (area->base + area->size) > address)
2690 			break;
2691 	}
2692 
2693 	if (area)
2694 		map->area_hint = area;
2695 	return area;
2696 }
2697 
2698 
2699 status_t
2700 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, int flags)
2701 {
2702 	return (*kernel_aspace->translation_map.ops->get_physical_page)(paddr, _vaddr, flags);
2703 }
2704 
2705 
2706 status_t
2707 vm_put_physical_page(addr_t vaddr)
2708 {
2709 	return (*kernel_aspace->translation_map.ops->put_physical_page)(vaddr);
2710 }
2711 
2712 
2713 void
2714 vm_unreserve_memory(size_t amount)
2715 {
2716 	benaphore_lock(&sAvailableMemoryLock);
2717 
2718 	sAvailableMemory += amount;
2719 
2720 	benaphore_unlock(&sAvailableMemoryLock);
2721 }
2722 
2723 
2724 status_t
2725 vm_try_reserve_memory(size_t amount)
2726 {
2727 	status_t status;
2728 	benaphore_lock(&sAvailableMemoryLock);
2729 
2730 	if (sAvailableMemory > amount) {
2731 		sAvailableMemory -= amount;
2732 		status = B_OK;
2733 	} else
2734 		status = B_NO_MEMORY;
2735 
2736 	benaphore_unlock(&sAvailableMemoryLock);
2737 	return status;
2738 }
2739 
2740 
2741 /**	This function enforces some protection properties:
2742  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
2743  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
2744  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
2745  *	   and B_KERNEL_WRITE_AREA.
2746  */
2747 
2748 static void
2749 fix_protection(uint32 *protection)
2750 {
2751 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
2752 		if ((*protection & B_USER_PROTECTION) == 0
2753 			|| (*protection & B_WRITE_AREA) != 0)
2754 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
2755 		else
2756 			*protection |= B_KERNEL_READ_AREA;
2757 	}
2758 }
2759 
2760 
2761 //	#pragma mark -
2762 
2763 
2764 status_t
2765 user_memcpy(void *to, const void *from, size_t size)
2766 {
2767 	return arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler);
2768 }
2769 
2770 
2771 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
2772  *	the string in \a to, NULL-terminating the result.
2773  *
2774  *	\param to Pointer to the destination C-string.
2775  *	\param from Pointer to the source C-string.
2776  *	\param size Size in bytes of the string buffer pointed to by \a to.
2777  *
2778  *	\return strlen(\a from).
2779  */
2780 
2781 ssize_t
2782 user_strlcpy(char *to, const char *from, size_t size)
2783 {
2784 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
2785 }
2786 
2787 
2788 status_t
2789 user_memset(void *s, char c, size_t count)
2790 {
2791 	return arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler);
2792 }
2793 
2794 
2795 //	#pragma mark -
2796 
2797 
2798 long
2799 lock_memory(void *address, ulong numBytes, ulong flags)
2800 {
2801 	vm_address_space *aspace = NULL;
2802 	struct vm_translation_map *map;
2803 	addr_t base = (addr_t)address;
2804 	addr_t end = base + numBytes;
2805 	bool isUser = IS_USER_ADDRESS(address);
2806 
2807 	// ToDo: Our VM currently doesn't support locking, this function
2808 	//	will now at least make sure that the memory is paged in, but
2809 	//	that's about it.
2810 	//	Nevertheless, it must be implemented as soon as we're able to
2811 	//	swap pages out of memory.
2812 
2813 	// ToDo: this is a hack, too; the iospace area is a null region and
2814 	//	officially cannot be written to or read; ie. vm_soft_fault() will
2815 	//	fail there. Furthermore, this is x86 specific as well.
2816 	#define IOSPACE_SIZE (256 * 1024 * 1024)
2817 	if (base >= KERNEL_BASE + IOSPACE_SIZE && base + numBytes < KERNEL_BASE + 2 * IOSPACE_SIZE)
2818 		return B_OK;
2819 
2820 	if (isUser)
2821 		aspace = vm_get_current_user_aspace();
2822 	else
2823 		aspace = vm_get_kernel_aspace();
2824 	if (aspace == NULL)
2825 		return B_ERROR;
2826 
2827 	map = &aspace->translation_map;
2828 
2829 	for (; base < end; base += B_PAGE_SIZE) {
2830 		addr_t physicalAddress;
2831 		uint32 protection;
2832 		status_t status;
2833 
2834 		map->ops->lock(map);
2835 		map->ops->query(map, base, &physicalAddress, &protection);
2836 		map->ops->unlock(map);
2837 
2838 		if ((protection & PAGE_PRESENT) != 0) {
2839 			// if B_READ_DEVICE is set, the caller intents to write to the locked
2840 			// memory, so if it hasn't been mapped writable, we'll try the soft
2841 			// fault anyway
2842 			if ((flags & B_READ_DEVICE) == 0
2843 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2844 			continue;
2845 		}
2846 
2847 		status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser);
2848 		if (status != B_OK)	{
2849 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
2850 				address, numBytes, flags, strerror(status));
2851 			vm_put_aspace(aspace);
2852 			return status;
2853 		}
2854 	}
2855 
2856 	vm_put_aspace(aspace);
2857 	return B_OK;
2858 }
2859 
2860 
2861 long
2862 unlock_memory(void *buffer, ulong numBytes, ulong flags)
2863 {
2864 	return B_OK;
2865 }
2866 
2867 
2868 /** According to the BeBook, this function should always succeed.
2869  *	This is no longer the case.
2870  */
2871 
2872 long
2873 get_memory_map(const void *address, ulong numBytes, physical_entry *table, long numEntries)
2874 {
2875 	vm_address_space *addressSpace;
2876 	addr_t virtualAddress = (addr_t)address;
2877 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
2878 	addr_t physicalAddress;
2879 	status_t status = B_OK;
2880 	int32 index = -1;
2881 	addr_t offset = 0;
2882 	uint32 flags;
2883 
2884 	TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes, numEntries));
2885 
2886 	if (numEntries == 0 || numBytes == 0)
2887 		return B_BAD_VALUE;
2888 
2889 	// in which address space is the address to be found?
2890 	if (IS_USER_ADDRESS(virtualAddress))
2891 		addressSpace = vm_get_current_user_aspace();
2892 	else
2893 		addressSpace = vm_get_kernel_aspace();
2894 
2895 	if (addressSpace == NULL)
2896 		return B_ERROR;
2897 
2898 	(*addressSpace->translation_map.ops->lock)(&addressSpace->translation_map);
2899 
2900 	while (offset < numBytes) {
2901 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
2902 
2903 		status = (*addressSpace->translation_map.ops->query)(&addressSpace->translation_map,
2904 					(addr_t)address + offset, &physicalAddress, &flags);
2905 		if (status < 0)
2906 			break;
2907 
2908 		if (index < 0 && pageOffset > 0) {
2909 			physicalAddress += pageOffset;
2910 			if (bytes > B_PAGE_SIZE - pageOffset)
2911 				bytes = B_PAGE_SIZE - pageOffset;
2912 		}
2913 
2914 		// need to switch to the next physical_entry?
2915 		if (index < 0 || (addr_t)table[index].address != physicalAddress - table[index].size) {
2916 			if (++index + 1 > numEntries) {
2917 				// table to small
2918 				status = B_BUFFER_OVERFLOW;
2919 				break;
2920 			}
2921 			table[index].address = (void *)physicalAddress;
2922 			table[index].size = bytes;
2923 		} else {
2924 			// page does fit in current entry
2925 			table[index].size += bytes;
2926 		}
2927 
2928 		offset += bytes;
2929 	}
2930 	(*addressSpace->translation_map.ops->unlock)(&addressSpace->translation_map);
2931 
2932 	// close the entry list
2933 
2934 	if (status == B_OK) {
2935 		// if it's only one entry, we will silently accept the missing ending
2936 		if (numEntries == 1)
2937 			return B_OK;
2938 
2939 		if (++index + 1 > numEntries)
2940 			return B_BUFFER_OVERFLOW;
2941 
2942 		table[index].address = NULL;
2943 		table[index].size = 0;
2944 	}
2945 
2946 	return status;
2947 }
2948 
2949 
2950 area_id
2951 area_for(void *address)
2952 {
2953 	return vm_area_for(vm_get_kernel_aspace_id(), (addr_t)address);
2954 }
2955 
2956 
2957 area_id
2958 find_area(const char *name)
2959 {
2960 	struct hash_iterator iterator;
2961 	vm_area *area;
2962 	area_id id = B_NAME_NOT_FOUND;
2963 
2964 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
2965 	hash_open(sAreaHash, &iterator);
2966 
2967 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
2968 		if (area->id == RESERVED_AREA_ID)
2969 			continue;
2970 
2971 		if (!strcmp(area->name, name)) {
2972 			id = area->id;
2973 			break;
2974 		}
2975 	}
2976 
2977 	hash_close(sAreaHash, &iterator, false);
2978 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
2979 
2980 	return id;
2981 }
2982 
2983 
2984 static void
2985 fill_area_info(struct vm_area *area, area_info *info, size_t size)
2986 {
2987 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
2988 	info->area = area->id;
2989 	info->address = (void *)area->base;
2990 	info->size = area->size;
2991 	info->protection = area->protection & B_USER_PROTECTION;
2992 	info->lock = B_FULL_LOCK;
2993 	info->team = area->aspace->id;
2994 	info->ram_size = area->size;
2995 	info->copy_count = 0;
2996 	info->in_count = 0;
2997 	info->out_count = 0;
2998 		// ToDo: retrieve real values here!
2999 }
3000 
3001 
3002 status_t
3003 _get_area_info(area_id id, area_info *info, size_t size)
3004 {
3005 	vm_area *area;
3006 
3007 	if (size != sizeof(area_info) || info == NULL)
3008 		return B_BAD_VALUE;
3009 
3010 	area = vm_get_area(id);
3011 	if (area == NULL)
3012 		return B_BAD_VALUE;
3013 
3014 	fill_area_info(area, info, size);
3015 	vm_put_area(area);
3016 
3017 	return B_OK;
3018 }
3019 
3020 
3021 status_t
3022 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
3023 {
3024 	addr_t nextBase = *(addr_t *)cookie;
3025 	vm_address_space *addressSpace;
3026 	vm_area *area;
3027 
3028 	// we're already through the list
3029 	if (nextBase == (addr_t)-1)
3030 		return B_ENTRY_NOT_FOUND;
3031 
3032 	if (team == B_CURRENT_TEAM)
3033 		team = team_get_current_team_id();
3034 
3035 	if (!team_is_valid(team)
3036 		|| team_get_address_space(team, &addressSpace) != B_OK)
3037 		return B_BAD_VALUE;
3038 
3039 	acquire_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0, 0);
3040 
3041 	for (area = addressSpace->virtual_map.areas; area; area = area->aspace_next) {
3042 		if (area->id == RESERVED_AREA_ID)
3043 			continue;
3044 
3045 		if (area->base > nextBase)
3046 			break;
3047 	}
3048 
3049 	// make sure this area won't go away
3050 	if (area != NULL)
3051 		area = vm_get_area(area->id);
3052 
3053 	release_sem_etc(addressSpace->virtual_map.sem, READ_COUNT, 0);
3054 	vm_put_aspace(addressSpace);
3055 
3056 	if (area == NULL) {
3057 		nextBase = (addr_t)-1;
3058 		return B_ENTRY_NOT_FOUND;
3059 	}
3060 
3061 	fill_area_info(area, info, size);
3062 	*cookie = (int32)(area->base);
3063 
3064 	vm_put_area(area);
3065 
3066 	return B_OK;
3067 }
3068 
3069 
3070 status_t
3071 set_area_protection(area_id area, uint32 newProtection)
3072 {
3073 	fix_protection(&newProtection);
3074 
3075 	return vm_set_area_protection(vm_get_kernel_aspace_id(), area, newProtection);
3076 }
3077 
3078 
3079 status_t
3080 resize_area(area_id areaID, size_t newSize)
3081 {
3082 	vm_cache_ref *cache;
3083 	vm_area *area, *current;
3084 	status_t status = B_OK;
3085 	size_t oldSize;
3086 
3087 	// is newSize a multiple of B_PAGE_SIZE?
3088 	if (newSize & (B_PAGE_SIZE - 1))
3089 		return B_BAD_VALUE;
3090 
3091 	area = vm_get_area(areaID);
3092 	if (area == NULL)
3093 		return B_BAD_VALUE;
3094 
3095 	// Resize all areas of this area's cache
3096 
3097 	cache = area->cache_ref;
3098 	oldSize = area->size;
3099 
3100 	// ToDo: we should only allow to resize anonymous memory areas!
3101 	if (!cache->cache->temporary) {
3102 		status = B_NOT_ALLOWED;
3103 		goto err1;
3104 	}
3105 
3106 	// ToDo: we must lock all address spaces here!
3107 
3108 	mutex_lock(&cache->lock);
3109 
3110 	if (oldSize < newSize) {
3111 		// We need to check if all areas of this cache can be resized
3112 
3113 		for (current = cache->areas; current; current = current->cache_next) {
3114 			if (current->aspace_next && current->aspace_next->base <= (current->base + newSize)) {
3115 				// if the area was created inside a reserved area, it can also be
3116 				// resized in that area
3117 				// ToDo: if there is free space after the reserved area, it could be used as well...
3118 				vm_area *next = current->aspace_next;
3119 				if (next->id == RESERVED_AREA_ID && next->cache_offset <= current->base
3120 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
3121 					continue;
3122 
3123 				status = B_ERROR;
3124 				goto err2;
3125 			}
3126 		}
3127 	}
3128 
3129 	// Okay, looks good so far, so let's do it
3130 
3131 	for (current = cache->areas; current; current = current->cache_next) {
3132 		if (current->aspace_next && current->aspace_next->base <= (current->base + newSize)) {
3133 			vm_area *next = current->aspace_next;
3134 			if (next->id == RESERVED_AREA_ID && next->cache_offset <= current->base
3135 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
3136 				// resize reserved area
3137 				addr_t offset = current->base + newSize - next->base;
3138 				if (next->size <= offset) {
3139 					current->aspace_next = next->aspace_next;
3140 					free(next);
3141 				} else {
3142 					next->size -= offset;
3143 					next->base += offset;
3144 				}
3145 			} else {
3146 				status = B_ERROR;
3147 				break;
3148 			}
3149 		}
3150 
3151 		current->size = newSize;
3152 
3153 		// we also need to unmap all pages beyond the new size, if the area has shrinked
3154 		if (newSize < oldSize) {
3155 			vm_translation_map *map = &current->aspace->translation_map;
3156 
3157 			map->ops->lock(map);
3158 			map->ops->unmap(map, current->base + newSize, current->base + oldSize - 1);
3159 			map->ops->unlock(map);
3160 		}
3161 	}
3162 
3163 	if (status == B_OK)
3164 		status = vm_cache_resize(cache, newSize);
3165 
3166 	if (status < B_OK) {
3167 		// This shouldn't really be possible, but hey, who knows
3168 		for (current = cache->areas; current; current = current->cache_next)
3169 			current->size = oldSize;
3170 	}
3171 
3172 err2:
3173 	mutex_unlock(&cache->lock);
3174 err1:
3175 	vm_put_area(area);
3176 
3177 	// ToDo: we must honour the lock restrictions of this area
3178 	return status;
3179 }
3180 
3181 
3182 /**	Transfers the specified area to a new team. The caller must be the owner
3183  *	of the area (not yet enforced but probably should be).
3184  *	This function is currently not exported to the kernel namespace, but is
3185  *	only accessible using the _kern_transfer_area() syscall.
3186  */
3187 
3188 static status_t
3189 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target)
3190 {
3191 	vm_address_space *sourceAddressSpace, *targetAddressSpace;
3192 	vm_translation_map *map;
3193 	vm_area *area, *reserved;
3194 	void *reservedAddress;
3195 	status_t status;
3196 
3197 	area = vm_get_area(id);
3198 	if (area == NULL)
3199 		return B_BAD_VALUE;
3200 
3201 	// ToDo: check if the current team owns the area
3202 
3203 	status = team_get_address_space(target, &targetAddressSpace);
3204 	if (status != B_OK)
3205 		goto err1;
3206 
3207 	// We will first remove the area, and then reserve its former
3208 	// address range so that we can later reclaim it if the
3209 	// transfer failed.
3210 
3211 	sourceAddressSpace = area->aspace;
3212 
3213 	reserved = _vm_create_reserved_region_struct(&sourceAddressSpace->virtual_map, 0);
3214 	if (reserved == NULL) {
3215 		status = B_NO_MEMORY;
3216 		goto err2;
3217 	}
3218 
3219 	acquire_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
3220 
3221 	reservedAddress = (void *)area->base;
3222 	remove_area_from_virtual_map(sourceAddressSpace, area, true);
3223 	status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
3224 		area->size, reserved);
3225 		// famous last words: this cannot fail :)
3226 
3227 	release_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3228 
3229 	if (status != B_OK)
3230 		goto err3;
3231 
3232 	// unmap the area in the source address space
3233 	map = &sourceAddressSpace->translation_map;
3234 	map->ops->lock(map);
3235 	map->ops->unmap(map, area->base, area->base + (area->size - 1));
3236 	map->ops->unlock(map);
3237 
3238 	// insert the area into the target address space
3239 
3240 	acquire_sem_etc(targetAddressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
3241 	// check to see if this aspace has entered DELETE state
3242 	if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) {
3243 		// okay, someone is trying to delete this aspace now, so we can't
3244 		// insert the area, so back out
3245 		status = B_BAD_TEAM_ID;
3246 		goto err4;
3247 	}
3248 
3249 	status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area);
3250 	if (status < B_OK)
3251 		goto err4;
3252 
3253 	// The area was successfully transferred to the new team when we got here
3254 	area->aspace = targetAddressSpace;
3255 
3256 	release_sem_etc(targetAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3257 
3258 	vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress, area->size);
3259 	vm_put_aspace(sourceAddressSpace);
3260 		// we keep the reference of the target address space for the
3261 		// area, so we only have to put the one from the source
3262 	vm_put_area(area);
3263 
3264 	return B_OK;
3265 
3266 err4:
3267 	release_sem_etc(targetAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3268 err3:
3269 	// insert the area again into the source address space
3270 	acquire_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0, 0);
3271 	// check to see if this aspace has entered DELETE state
3272 	if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION
3273 		|| insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, area->size, area) != B_OK) {
3274 		// We can't insert the area anymore - we have to delete it manually
3275 		vm_cache_remove_area(area->cache_ref, area);
3276 		vm_cache_release_ref(area->cache_ref);
3277 		free(area->name);
3278 		free(area);
3279 		area = NULL;
3280 	}
3281 	release_sem_etc(sourceAddressSpace->virtual_map.sem, WRITE_COUNT, 0);
3282 err2:
3283 	vm_put_aspace(targetAddressSpace);
3284 err1:
3285 	if (area != NULL)
3286 		vm_put_area(area);
3287 	return status;
3288 }
3289 
3290 
3291 area_id
3292 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
3293 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
3294 {
3295 	if (!arch_vm_supports_protection(protection))
3296 		return B_NOT_SUPPORTED;
3297 
3298 	fix_protection(&protection);
3299 
3300 	return vm_map_physical_memory(vm_get_kernel_aspace_id(), name, _virtualAddress,
3301 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
3302 }
3303 
3304 
3305 area_id
3306 clone_area(const char *name, void **_address, uint32 addressSpec, uint32 protection,
3307 	area_id source)
3308 {
3309 	if ((protection & B_KERNEL_PROTECTION) == 0)
3310 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
3311 
3312 	return vm_clone_area(vm_get_kernel_aspace_id(), name, _address, addressSpec,
3313 				protection, REGION_NO_PRIVATE_MAP, source);
3314 }
3315 
3316 
3317 area_id
3318 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec,
3319 	uint32 size, uint32 lock, uint32 protection)
3320 {
3321 	fix_protection(&protection);
3322 
3323 	return vm_create_anonymous_area(team->aspace->id, (char *)name, address,
3324 				addressSpec, size, lock, protection);
3325 }
3326 
3327 
3328 area_id
3329 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
3330 	uint32 protection)
3331 {
3332 	fix_protection(&protection);
3333 
3334 	return vm_create_anonymous_area(vm_get_kernel_aspace_id(), (char *)name, _address,
3335 				addressSpec, size, lock, protection);
3336 }
3337 
3338 
3339 status_t
3340 delete_area_etc(struct team *team, area_id area)
3341 {
3342 	return vm_delete_area(team->aspace->id, area);
3343 }
3344 
3345 
3346 status_t
3347 delete_area(area_id area)
3348 {
3349 	return vm_delete_area(vm_get_kernel_aspace_id(), area);
3350 }
3351 
3352 
3353 //	#pragma mark -
3354 
3355 
3356 status_t
3357 _user_init_heap_address_range(addr_t base, addr_t size)
3358 {
3359 	return vm_reserve_address_range(vm_get_current_user_aspace_id(), (void **)&base,
3360 		B_EXACT_ADDRESS, size, RESERVED_AVOID_BASE);
3361 }
3362 
3363 
3364 area_id
3365 _user_area_for(void *address)
3366 {
3367 	return vm_area_for(vm_get_current_user_aspace_id(), (addr_t)address);
3368 }
3369 
3370 
3371 area_id
3372 _user_find_area(const char *userName)
3373 {
3374 	char name[B_OS_NAME_LENGTH];
3375 
3376 	if (!IS_USER_ADDRESS(userName)
3377 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
3378 		return B_BAD_ADDRESS;
3379 
3380 	return find_area(name);
3381 }
3382 
3383 
3384 status_t
3385 _user_get_area_info(area_id area, area_info *userInfo)
3386 {
3387 	area_info info;
3388 	status_t status;
3389 
3390 	if (!IS_USER_ADDRESS(userInfo))
3391 		return B_BAD_ADDRESS;
3392 
3393 	status = get_area_info(area, &info);
3394 	if (status < B_OK)
3395 		return status;
3396 
3397 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
3398 		return B_BAD_ADDRESS;
3399 
3400 	return status;
3401 }
3402 
3403 
3404 status_t
3405 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
3406 {
3407 	status_t status;
3408 	area_info info;
3409 	int32 cookie;
3410 
3411 	if (!IS_USER_ADDRESS(userCookie)
3412 		|| !IS_USER_ADDRESS(userInfo)
3413 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
3414 		return B_BAD_ADDRESS;
3415 
3416 	status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
3417 	if (status != B_OK)
3418 		return status;
3419 
3420 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
3421 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
3422 		return B_BAD_ADDRESS;
3423 
3424 	return status;
3425 }
3426 
3427 
3428 status_t
3429 _user_set_area_protection(area_id area, uint32 newProtection)
3430 {
3431 	if ((newProtection & ~B_USER_PROTECTION) != 0)
3432 		return B_BAD_VALUE;
3433 
3434 	fix_protection(&newProtection);
3435 
3436 	return vm_set_area_protection(vm_get_current_user_aspace_id(), area,
3437 		newProtection);
3438 }
3439 
3440 
3441 status_t
3442 _user_resize_area(area_id area, size_t newSize)
3443 {
3444 	// ToDo: Since we restrict deleting of areas to those owned by the team,
3445 	// we should also do that for resizing (check other functions, too).
3446 	return resize_area(area, newSize);
3447 }
3448 
3449 
3450 status_t
3451 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
3452 {
3453 	status_t status;
3454 	void *address;
3455 
3456 	// filter out some unavailable values (for userland)
3457 	switch (addressSpec) {
3458 		case B_ANY_KERNEL_ADDRESS:
3459 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3460 			return B_BAD_VALUE;
3461 	}
3462 
3463 	if (!IS_USER_ADDRESS(userAddress)
3464 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3465 		return B_BAD_ADDRESS;
3466 
3467 	status = transfer_area(area, &address, addressSpec, target);
3468 	if (status < B_OK)
3469 		return status;
3470 
3471 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
3472 		return B_BAD_ADDRESS;
3473 
3474 	return status;
3475 }
3476 
3477 
3478 area_id
3479 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
3480 	uint32 protection, area_id sourceArea)
3481 {
3482 	char name[B_OS_NAME_LENGTH];
3483 	void *address;
3484 	area_id clonedArea;
3485 
3486 	// filter out some unavailable values (for userland)
3487 	switch (addressSpec) {
3488 		case B_ANY_KERNEL_ADDRESS:
3489 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3490 			return B_BAD_VALUE;
3491 	}
3492 	if ((protection & ~B_USER_PROTECTION) != 0)
3493 		return B_BAD_VALUE;
3494 
3495 	if (!IS_USER_ADDRESS(userName)
3496 		|| !IS_USER_ADDRESS(userAddress)
3497 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
3498 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3499 		return B_BAD_ADDRESS;
3500 
3501 	fix_protection(&protection);
3502 
3503 	clonedArea = vm_clone_area(vm_get_current_user_aspace_id(), name, &address,
3504 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea);
3505 	if (clonedArea < B_OK)
3506 		return clonedArea;
3507 
3508 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
3509 		delete_area(clonedArea);
3510 		return B_BAD_ADDRESS;
3511 	}
3512 
3513 	return clonedArea;
3514 }
3515 
3516 
3517 area_id
3518 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
3519 	size_t size, uint32 lock, uint32 protection)
3520 {
3521 	char name[B_OS_NAME_LENGTH];
3522 	area_id area;
3523 	void *address;
3524 
3525 	// filter out some unavailable values (for userland)
3526 	switch (addressSpec) {
3527 		case B_ANY_KERNEL_ADDRESS:
3528 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3529 			return B_BAD_VALUE;
3530 	}
3531 	if ((protection & ~B_USER_PROTECTION) != 0)
3532 		return B_BAD_VALUE;
3533 
3534 	if (!IS_USER_ADDRESS(userName)
3535 		|| !IS_USER_ADDRESS(userAddress)
3536 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
3537 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3538 		return B_BAD_ADDRESS;
3539 
3540 	if (addressSpec == B_EXACT_ADDRESS
3541 		&& IS_KERNEL_ADDRESS(address))
3542 		return B_BAD_VALUE;
3543 
3544 	fix_protection(&protection);
3545 
3546 	area = vm_create_anonymous_area(vm_get_current_user_aspace_id(), (char *)name, &address,
3547 				addressSpec, size, lock, protection);
3548 
3549 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
3550 		delete_area(area);
3551 		return B_BAD_ADDRESS;
3552 	}
3553 
3554 	return area;
3555 }
3556 
3557 
3558 status_t
3559 _user_delete_area(area_id area)
3560 {
3561 	// Unlike the BeOS implementation, you can now only delete areas
3562 	// that you have created yourself from userland.
3563 	// The documentation to delete_area() explicetly states that this
3564 	// will be restricted in the future, and so it will.
3565 	return vm_delete_area(vm_get_current_user_aspace_id(), area);
3566 }
3567 
3568