xref: /haiku/src/system/kernel/vm/vm.cpp (revision a4f6a81235ca2522c01f532de13cad9b729d4029)
1 /*
2  * Copyright 2002-2005, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include "vm_store_anonymous_noswap.h"
11 #include "vm_store_device.h"
12 #include "vm_store_null.h"
13 
14 #include <OS.h>
15 #include <KernelExport.h>
16 
17 #include <vm.h>
18 #include <vm_address_space.h>
19 #include <vm_priv.h>
20 #include <vm_page.h>
21 #include <vm_cache.h>
22 #include <vm_low_memory.h>
23 #include <file_cache.h>
24 #include <memheap.h>
25 #include <debug.h>
26 #include <console.h>
27 #include <int.h>
28 #include <smp.h>
29 #include <lock.h>
30 #include <thread.h>
31 #include <team.h>
32 
33 #include <boot/stage2.h>
34 #include <boot/elf.h>
35 
36 #include <arch/cpu.h>
37 #include <arch/vm.h>
38 
39 #include <string.h>
40 #include <ctype.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 
44 //#define TRACE_VM
45 //#define TRACE_FAULTS
46 #ifdef TRACE_VM
47 #	define TRACE(x) dprintf x
48 #else
49 #	define TRACE(x) ;
50 #endif
51 #ifdef TRACE_FAULTS
52 #	define FTRACE(x) dprintf x
53 #else
54 #	define FTRACE(x) ;
55 #endif
56 
57 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1))
58 #define ROUNDOWN(a, b) (((a) / (b)) * (b))
59 
60 
61 extern vm_address_space *kernel_aspace;
62 
63 #define REGION_HASH_TABLE_SIZE 1024
64 static area_id sNextAreaID;
65 static hash_table *sAreaHash;
66 static sem_id sAreaHashLock;
67 
68 static off_t sAvailableMemory;
69 static benaphore sAvailableMemoryLock;
70 
71 // function declarations
72 static vm_area *_vm_create_region_struct(vm_address_space *addressSpace, const char *name, int wiring, int lock);
73 static status_t map_backing_store(vm_address_space *addressSpace, vm_store *store, void **vaddr,
74 	off_t offset, addr_t size, uint32 addressSpec, int wiring, int lock, int mapping, vm_area **_area, const char *area_name);
75 static status_t vm_soft_fault(addr_t address, bool is_write, bool is_user);
76 static vm_area *vm_area_lookup(vm_address_space *addressSpace, addr_t address);
77 static bool vm_put_area(vm_area *area);
78 
79 
80 static int
81 area_compare(void *_area, const void *key)
82 {
83 	vm_area *area = (vm_area *)_area;
84 	const area_id *id = (const area_id *)key;
85 
86 	if (area->id == *id)
87 		return 0;
88 
89 	return -1;
90 }
91 
92 
93 static uint32
94 area_hash(void *_area, const void *key, uint32 range)
95 {
96 	vm_area *area = (vm_area *)_area;
97 	const area_id *id = (const area_id *)key;
98 
99 	if (area != NULL)
100 		return area->id % range;
101 
102 	return (uint32)*id % range;
103 }
104 
105 
106 static vm_area *
107 vm_get_area(area_id id)
108 {
109 	vm_area *area;
110 
111 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
112 
113 	area = (vm_area *)hash_lookup(sAreaHash, &id);
114 	if (area != NULL)
115 		atomic_add(&area->ref_count, 1);
116 
117 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
118 
119 	return area;
120 }
121 
122 
123 static vm_area *
124 _vm_create_reserved_region_struct(vm_address_space *addressSpace, uint32 flags)
125 {
126 	vm_area *reserved = (vm_area *)malloc(sizeof(vm_area));
127 	if (reserved == NULL)
128 		return NULL;
129 
130 	memset(reserved, 0, sizeof(vm_area));
131 	reserved->id = RESERVED_AREA_ID;
132 		// this marks it as reserved space
133 	reserved->protection = flags;
134 	reserved->address_space = addressSpace;
135 
136 	return reserved;
137 }
138 
139 
140 static vm_area *
141 _vm_create_area_struct(vm_address_space *addressSpace, const char *name,
142 	uint32 wiring, uint32 protection)
143 {
144 	vm_area *area = NULL;
145 
146 	// restrict the area name to B_OS_NAME_LENGTH
147 	size_t length = strlen(name) + 1;
148 	if (length > B_OS_NAME_LENGTH)
149 		length = B_OS_NAME_LENGTH;
150 
151 	area = (vm_area *)malloc(sizeof(vm_area));
152 	if (area == NULL)
153 		return NULL;
154 
155 	area->name = (char *)malloc(length);
156 	if (area->name == NULL) {
157 		free(area);
158 		return NULL;
159 	}
160 	strlcpy(area->name, name, length);
161 
162 	area->id = atomic_add(&sNextAreaID, 1);
163 	area->base = 0;
164 	area->size = 0;
165 	area->protection = protection;
166 	area->wiring = wiring;
167 	area->memory_type = 0;
168 	area->ref_count = 1;
169 
170 	area->cache_ref = NULL;
171 	area->cache_offset = 0;
172 
173 	area->address_space = addressSpace;
174 	area->address_space_next = NULL;
175 	area->cache_next = area->cache_prev = NULL;
176 	area->hash_next = NULL;
177 
178 	return area;
179 }
180 
181 
182 /**	Finds a reserved area that covers the region spanned by \a start and
183  *	\a size, inserts the \a area into that region and makes sure that
184  *	there are reserved regions for the remaining parts.
185  */
186 
187 static status_t
188 find_reserved_area(vm_address_space *addressSpace, addr_t start,
189 	addr_t size, vm_area *area)
190 {
191 	vm_area *next, *last = NULL;
192 
193 	next = addressSpace->areas;
194 	while (next) {
195 		if (next->base <= start && next->base + next->size >= start + size) {
196 			// this area covers the requested range
197 			if (next->id != RESERVED_AREA_ID) {
198 				// but it's not reserved space, it's a real area
199 				return B_BAD_VALUE;
200 			}
201 
202 			break;
203 		}
204 		last = next;
205 		next = next->address_space_next;
206 	}
207 	if (next == NULL)
208 		return B_ENTRY_NOT_FOUND;
209 
210 	// now we have to transfer the requested part of the reserved
211 	// range to the new area - and remove, resize or split the old
212 	// reserved area.
213 
214 	if (start == next->base) {
215 		// the area starts at the beginning of the reserved range
216 		if (last)
217 			last->address_space_next = area;
218 		else
219 			addressSpace->areas = area;
220 
221 		if (size == next->size) {
222 			// the new area fully covers the reversed range
223 			area->address_space_next = next->address_space_next;
224 			free(next);
225 		} else {
226 			// resize the reserved range behind the area
227 			area->address_space_next = next;
228 			next->base += size;
229 			next->size -= size;
230 		}
231 	} else if (start + size == next->base + next->size) {
232 		// the area is at the end of the reserved range
233 		area->address_space_next = next->address_space_next;
234 		next->address_space_next = area;
235 
236 		// resize the reserved range before the area
237 		next->size = start - next->base;
238 	} else {
239 		// the area splits the reserved range into two separate ones
240 		// we need a new reserved area to cover this space
241 		vm_area *reserved = _vm_create_reserved_region_struct(addressSpace,
242 			next->protection);
243 		if (reserved == NULL)
244 			return B_NO_MEMORY;
245 
246 		reserved->address_space_next = next->address_space_next;
247 		area->address_space_next = reserved;
248 		next->address_space_next = area;
249 
250 		// resize regions
251 		reserved->size = next->base + next->size - start - size;
252 		next->size = start - next->base;
253 		reserved->base = start + size;
254 		reserved->cache_offset = next->cache_offset;
255 	}
256 
257 	area->base = start;
258 	area->size = size;
259 	addressSpace->change_count++;
260 
261 	return B_OK;
262 }
263 
264 
265 /**	must be called with this address space's sem held */
266 
267 static status_t
268 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start,
269 	addr_t size, addr_t end, uint32 addressSpec, vm_area *area)
270 {
271 	vm_area *last = NULL;
272 	vm_area *next;
273 	bool foundSpot = false;
274 
275 	TRACE(("find_and_insert_region_slot: address space %p, start 0x%lx, "
276 		"size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start,
277 		size, end, addressSpec, area));
278 
279 	// do some sanity checking
280 	if (start < addressSpace->base || size == 0
281 		|| (end - 1) > (addressSpace->base + (addressSpace->size - 1))
282 		|| start + size > end)
283 		return B_BAD_ADDRESS;
284 
285 	if (addressSpec == B_EXACT_ADDRESS) {
286 		// search for a reserved area
287 		status_t status = find_reserved_area(addressSpace, start, size, area);
288 		if (status == B_OK || status == B_BAD_VALUE)
289 			return status;
290 
291 		// there was no reserved area, and the slot doesn't seem to be used already
292 		// ToDo: this could be further optimized.
293 	}
294 
295 	// walk up to the spot where we should start searching
296 second_chance:
297 	next = addressSpace->areas;
298 	while (next) {
299 		if (next->base >= start + size) {
300 			// we have a winner
301 			break;
302 		}
303 		last = next;
304 		next = next->address_space_next;
305 	}
306 
307 	// find the right spot depending on the address specification - the area
308 	// will be inserted directly after "last" ("next" is not referenced anymore)
309 
310 	switch (addressSpec) {
311 		case B_ANY_ADDRESS:
312 		case B_ANY_KERNEL_ADDRESS:
313 		case B_ANY_KERNEL_BLOCK_ADDRESS:
314 			// find a hole big enough for a new area
315 			if (!last) {
316 				// see if we can build it at the beginning of the virtual map
317 				if (!next || (next->base >= addressSpace->base + size)) {
318 					foundSpot = true;
319 					area->base = addressSpace->base;
320 					break;
321 				}
322 				last = next;
323 				next = next->address_space_next;
324 			}
325 			// keep walking
326 			while (next) {
327 				if (next->base >= last->base + last->size + size) {
328 					// we found a spot (it'll be filled up below)
329 					break;
330 				}
331 				last = next;
332 				next = next->address_space_next;
333 			}
334 
335 			if ((addressSpace->base + (addressSpace->size - 1))
336 					>= (last->base + last->size + (size - 1))) {
337 				// got a spot
338 				foundSpot = true;
339 				area->base = last->base + last->size;
340 				break;
341 			} else {
342 				// we didn't find a free spot - if there were any reserved areas with
343 				// the RESERVED_AVOID_BASE flag set, we can now test those for free
344 				// space
345 				// ToDo: it would make sense to start with the biggest of them
346 				next = addressSpace->areas;
347 				last = NULL;
348 				for (last = NULL; next; next = next->address_space_next, last = next) {
349 					// ToDo: take free space after the reserved area into account!
350 					if (next->size == size) {
351 						// the reserved area is entirely covered, and thus, removed
352 						if (last)
353 							last->address_space_next = next->address_space_next;
354 						else
355 							addressSpace->areas = next->address_space_next;
356 
357 						foundSpot = true;
358 						area->base = next->base;
359 						free(next);
360 						break;
361 					}
362 					if (next->size >= size) {
363 						// the new area will be placed at the end of the reserved
364 						// area, and the reserved area will be resized to make space
365 						foundSpot = true;
366 						next->size -= size;
367 						last = next;
368 						area->base = next->base + next->size;
369 						break;
370 					}
371 				}
372 			}
373 			break;
374 
375 		case B_BASE_ADDRESS:
376 			// find a hole big enough for a new area beginning with "start"
377 			if (!last) {
378 				// see if we can build it at the beginning of the specified start
379 				if (!next || (next->base >= start + size)) {
380 					foundSpot = true;
381 					area->base = start;
382 					break;
383 				}
384 				last = next;
385 				next = next->address_space_next;
386 			}
387 			// keep walking
388 			while (next) {
389 				if (next->base >= last->base + last->size + size) {
390 					// we found a spot (it'll be filled up below)
391 					break;
392 				}
393 				last = next;
394 				next = next->address_space_next;
395 			}
396 
397 			if ((addressSpace->base + (addressSpace->size - 1))
398 					>= (last->base + last->size + (size - 1))) {
399 				// got a spot
400 				foundSpot = true;
401 				if (last->base + last->size <= start)
402 					area->base = start;
403 				else
404 					area->base = last->base + last->size;
405 				break;
406 			}
407 			// we didn't find a free spot in the requested range, so we'll
408 			// try again without any restrictions
409 			start = addressSpace->base;
410 			addressSpec = B_ANY_ADDRESS;
411 			last = NULL;
412 			goto second_chance;
413 
414 		case B_EXACT_ADDRESS:
415 			// see if we can create it exactly here
416 			if (!last) {
417 				if (!next || (next->base >= start + size)) {
418 					foundSpot = true;
419 					area->base = start;
420 					break;
421 				}
422 			} else {
423 				if (next) {
424 					if (last->base + last->size <= start && next->base >= start + size) {
425 						foundSpot = true;
426 						area->base = start;
427 						break;
428 					}
429 				} else {
430 					if ((last->base + (last->size - 1)) <= start - 1) {
431 						foundSpot = true;
432 						area->base = start;
433 					}
434 				}
435 			}
436 			break;
437 		default:
438 			return B_BAD_VALUE;
439 	}
440 
441 	if (!foundSpot)
442 		return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY;
443 
444 	area->size = size;
445 	if (last) {
446 		area->address_space_next = last->address_space_next;
447 		last->address_space_next = area;
448 	} else {
449 		area->address_space_next = addressSpace->areas;
450 		addressSpace->areas = area;
451 	}
452 	addressSpace->change_count++;
453 	return B_OK;
454 }
455 
456 
457 /**	This inserts the area you pass into the specified address space.
458  *	It will also set the "_address" argument to its base address when
459  *	the call succeeds.
460  *	You need to hold the vm_address_space semaphore.
461  */
462 
463 static status_t
464 insert_area(vm_address_space *addressSpace, void **_address,
465 	uint32 addressSpec, addr_t size, vm_area *area)
466 {
467 	addr_t searchBase, searchEnd;
468 	status_t status;
469 
470 	switch (addressSpec) {
471 		case B_EXACT_ADDRESS:
472 			searchBase = (addr_t)*_address;
473 			searchEnd = (addr_t)*_address + size;
474 			break;
475 
476 		case B_BASE_ADDRESS:
477 			searchBase = (addr_t)*_address;
478 			searchEnd = addressSpace->base + (addressSpace->size - 1);
479 			break;
480 
481 		case B_ANY_ADDRESS:
482 		case B_ANY_KERNEL_ADDRESS:
483 		case B_ANY_KERNEL_BLOCK_ADDRESS:
484 			searchBase = addressSpace->base;
485 			searchEnd = addressSpace->base + (addressSpace->size - 1);
486 			break;
487 
488 		default:
489 			return B_BAD_VALUE;
490 	}
491 
492 	status = find_and_insert_area_slot(addressSpace, searchBase, size,
493 				searchEnd, addressSpec, area);
494 	if (status == B_OK) {
495 		// ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS
496 		//		vs. B_ANY_KERNEL_BLOCK_ADDRESS here?
497 		*_address = (void *)area->base;
498 	}
499 
500 	return status;
501 }
502 
503 
504 static status_t
505 map_backing_store(vm_address_space *addressSpace, vm_store *store, void **_virtualAddress,
506 	off_t offset, addr_t size, uint32 addressSpec, int wiring, int protection,
507 	int mapping, vm_area **_area, const char *areaName)
508 {
509 	vm_cache *cache;
510 	vm_cache_ref *cache_ref;
511 	vm_area *area;
512 
513 	status_t err;
514 
515 	TRACE(("map_backing_store: aspace %p, store %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n",
516 		addressSpace, store, *_virtualAddress, offset, size, addressSpec,
517 		wiring, protection, _area, areaName));
518 
519 	area = _vm_create_area_struct(addressSpace, areaName, wiring, protection);
520 	if (area == NULL)
521 		return B_NO_MEMORY;
522 
523 	cache = store->cache;
524 	cache_ref = cache->ref;
525 
526 	// if this is a private map, we need to create a new cache & store object
527 	// pair to handle the private copies of pages as they are written to
528 	if (mapping == REGION_PRIVATE_MAP) {
529 		vm_cache *nu_cache;
530 		vm_cache_ref *nu_cache_ref = NULL;
531 		vm_store *nu_store;
532 
533 		// ToDo: panic???
534 		// create an anonymous store object
535 		nu_store = vm_store_create_anonymous_noswap((protection & B_STACK_AREA) != 0, USER_STACK_GUARD_PAGES);
536 		if (nu_store == NULL)
537 			panic("map_backing_store: vm_create_store_anonymous_noswap returned NULL");
538 		nu_cache = vm_cache_create(nu_store);
539 		if (nu_cache == NULL)
540 			panic("map_backing_store: vm_cache_create returned NULL");
541 		nu_cache_ref = vm_cache_ref_create(nu_cache);
542 		if (nu_cache_ref == NULL)
543 			panic("map_backing_store: vm_cache_ref_create returned NULL");
544 		nu_cache->temporary = 1;
545 		nu_cache->scan_skip = cache->scan_skip;
546 
547 		nu_cache->source = cache;
548 
549 		cache = nu_cache;
550 		cache_ref = cache->ref;
551 		store = nu_store;
552 		cache->virtual_size = offset + size;
553 	}
554 
555 	err = vm_cache_set_minimal_commitment(cache_ref, offset + size);
556 	if (err != B_OK)
557 		goto err1;
558 
559 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
560 
561 	// check to see if this address space has entered DELETE state
562 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
563 		// okay, someone is trying to delete this address space now, so we can't
564 		// insert the area, so back out
565 		err = B_BAD_TEAM_ID;
566 		goto err2;
567 	}
568 
569 	err = insert_area(addressSpace, _virtualAddress, addressSpec, size, area);
570 	if (err < B_OK)
571 		goto err2;
572 
573 	// attach the cache to the area
574 	area->cache_ref = cache_ref;
575 	area->cache_offset = offset;
576 	// point the cache back to the area
577 	vm_cache_insert_area(cache_ref, area);
578 
579 	// insert the area in the global area hash table
580 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0);
581 	hash_insert(sAreaHash, area);
582 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
583 
584 	// grab a ref to the address space (the area holds this)
585 	atomic_add(&addressSpace->ref_count, 1);
586 
587 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
588 
589 	*_area = area;
590 	return B_OK;
591 
592 err2:
593 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
594 err1:
595 	if (mapping == REGION_PRIVATE_MAP) {
596 		// we created this cache, so we must delete it again
597 		vm_cache_release_ref(cache_ref);
598 	}
599 
600 	free(area->name);
601 	free(area);
602 	return err;
603 }
604 
605 
606 status_t
607 vm_unreserve_address_range(team_id team, void *address, addr_t size)
608 {
609 	vm_address_space *addressSpace;
610 	vm_area *area, *last = NULL;
611 	status_t status = B_OK;
612 
613 	addressSpace = vm_get_address_space_by_id(team);
614 	if (addressSpace == NULL)
615 		return B_BAD_TEAM_ID;
616 
617 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
618 
619 	// check to see if this address space has entered DELETE state
620 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
621 		// okay, someone is trying to delete this address space now, so we can't
622 		// insert the area, so back out
623 		status = B_BAD_TEAM_ID;
624 		goto out;
625 	}
626 
627 	// search area list and remove any matching reserved ranges
628 
629 	area = addressSpace->areas;
630 	while (area) {
631 		// the area must be completely part of the reserved range
632 		if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address
633 			&& area->base + area->size <= (addr_t)address + size) {
634 			// remove reserved range
635 			vm_area *reserved = area;
636 			if (last)
637 				last->address_space_next = reserved->address_space_next;
638 			else
639 				addressSpace->areas = reserved->address_space_next;
640 
641 			area = reserved->address_space_next;
642 			free(reserved);
643 			continue;
644 		}
645 
646 		last = area;
647 		area = area->address_space_next;
648 	}
649 
650 out:
651 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
652 	vm_put_address_space(addressSpace);
653 	return status;
654 }
655 
656 
657 status_t
658 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec,
659 	addr_t size, uint32 flags)
660 {
661 	vm_address_space *addressSpace;
662 	vm_area *area;
663 	status_t status = B_OK;
664 
665 	if (size == 0)
666 		return B_BAD_VALUE;
667 
668 	addressSpace = vm_get_address_space_by_id(team);
669 	if (addressSpace == NULL)
670 		return B_BAD_TEAM_ID;
671 
672 	area = _vm_create_reserved_region_struct(addressSpace, flags);
673 	if (area == NULL) {
674 		status = B_NO_MEMORY;
675 		goto err1;
676 	}
677 
678 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
679 
680 	// check to see if this address space has entered DELETE state
681 	if (addressSpace->state == VM_ASPACE_STATE_DELETION) {
682 		// okay, someone is trying to delete this address space now, so we can't
683 		// insert the area, let's back out
684 		status = B_BAD_TEAM_ID;
685 		goto err2;
686 	}
687 
688 	status = insert_area(addressSpace, _address, addressSpec, size, area);
689 	if (status < B_OK)
690 		goto err2;
691 
692 	// the area is now reserved!
693 
694 	area->cache_offset = area->base;
695 		// we cache the original base address here
696 
697 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
698 	return B_OK;
699 
700 err2:
701 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
702 	free(area);
703 err1:
704 	vm_put_address_space(addressSpace);
705 	return status;
706 }
707 
708 
709 area_id
710 vm_create_anonymous_area(team_id aid, const char *name, void **address,
711 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection)
712 {
713 	vm_area *area;
714 	vm_cache *cache;
715 	vm_store *store;
716 	vm_address_space *addressSpace;
717 	vm_cache_ref *cache_ref;
718 	vm_page *page = NULL;
719 	bool isStack = (protection & B_STACK_AREA) != 0;
720 	bool canOvercommit = false;
721 	status_t status;
722 
723 	TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size));
724 
725 	if (!arch_vm_supports_protection(protection))
726 		return B_NOT_SUPPORTED;
727 
728 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
729 		canOvercommit = true;
730 
731 #ifdef DEBUG_KERNEL_STACKS
732 	if ((protection & B_KERNEL_STACK_AREA) != 0)
733 		isStack = true;
734 #endif
735 
736 	/* check parameters */
737 	switch (addressSpec) {
738 		case B_ANY_ADDRESS:
739 		case B_EXACT_ADDRESS:
740 		case B_BASE_ADDRESS:
741 		case B_ANY_KERNEL_ADDRESS:
742 			break;
743 
744 		default:
745 			return B_BAD_VALUE;
746 	}
747 
748 	switch (wiring) {
749 		case B_NO_LOCK:
750 		case B_FULL_LOCK:
751 		case B_LAZY_LOCK:
752 		case B_CONTIGUOUS:
753 		case B_ALREADY_WIRED:
754 			break;
755 		case B_LOMEM:
756 		//case B_SLOWMEM:
757 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
758 			wiring = B_FULL_LOCK;
759 			break;
760 		default:
761 			return B_BAD_VALUE;
762 	}
763 
764 	addressSpace = vm_get_address_space_by_id(aid);
765 	if (addressSpace == NULL)
766 		return B_BAD_TEAM_ID;
767 
768 	size = PAGE_ALIGN(size);
769 
770 	if (wiring == B_CONTIGUOUS) {
771 		// we try to allocate the page run here upfront as this may easily
772 		// fail for obvious reasons
773 		page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE);
774 		if (page == NULL) {
775 			vm_put_address_space(addressSpace);
776 			return B_NO_MEMORY;
777 		}
778 	}
779 
780 	// ToDo: panic???
781 	// create an anonymous store object
782 	store = vm_store_create_anonymous_noswap(canOvercommit, isStack ?
783 		((protection & B_USER_PROTECTION) != 0 ?
784 			USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0);
785 	if (store == NULL)
786 		panic("vm_create_anonymous_area: vm_create_store_anonymous_noswap returned NULL");
787 	cache = vm_cache_create(store);
788 	if (cache == NULL)
789 		panic("vm_create_anonymous_area: vm_cache_create returned NULL");
790 	cache_ref = vm_cache_ref_create(cache);
791 	if (cache_ref == NULL)
792 		panic("vm_create_anonymous_area: vm_cache_ref_create returned NULL");
793 	cache->temporary = 1;
794 
795 	switch (wiring) {
796 		case B_LAZY_LOCK:	// for now
797 		case B_FULL_LOCK:
798 		case B_CONTIGUOUS:
799 		case B_ALREADY_WIRED:
800 			cache->scan_skip = 1;
801 			break;
802 		case B_NO_LOCK:
803 		//case B_LAZY_LOCK:
804 			cache->scan_skip = 0;
805 			break;
806 	}
807 
808 	status = map_backing_store(addressSpace, store, address, 0, size, addressSpec, wiring,
809 		protection, REGION_NO_PRIVATE_MAP, &area, name);
810 	if (status < B_OK) {
811 		vm_cache_release_ref(cache_ref);
812 		vm_put_address_space(addressSpace);
813 
814 		if (wiring == B_CONTIGUOUS) {
815 			// we had reserved the area space upfront...
816 			addr_t pageNumber = page->physical_page_number;
817 			int32 i;
818 			for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
819 				page = vm_lookup_page(pageNumber);
820 				if (page == NULL)
821 					panic("couldn't lookup physical page just allocated\n");
822 
823 				vm_page_set_state(page, PAGE_STATE_FREE);
824 			}
825 		}
826 		return status;
827 	}
828 
829 	cache_ref = store->cache->ref;
830 	switch (wiring) {
831 		case B_NO_LOCK:
832 		case B_LAZY_LOCK:
833 			// do nothing - the pages are mapped in as needed
834 			break;
835 
836 		case B_FULL_LOCK:
837 		{
838 			// Pages aren't mapped at this point, but we just simulate a fault on
839 			// every page, which should allocate them
840 			// ToDo: at this point, it would probably be cheaper to allocate
841 			// and map the pages directly
842 			addr_t va;
843 			for (va = area->base; va < area->base + area->size; va += B_PAGE_SIZE) {
844 #ifdef DEBUG_KERNEL_STACKS
845 #	ifdef STACK_GROWS_DOWNWARDS
846 				if (isStack && va < area->base + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
847 #	else
848 				if (isStack && va >= area->base + area->size - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
849 #	endif
850 					continue;
851 #endif
852 				vm_soft_fault(va, false, false);
853 			}
854 			break;
855 		}
856 
857 		case B_ALREADY_WIRED:
858 		{
859 			// the pages should already be mapped. This is only really useful during
860 			// boot time. Find the appropriate vm_page objects and stick them in
861 			// the cache object.
862 			vm_translation_map *map = &addressSpace->translation_map;
863 			addr_t va;
864 			addr_t pa;
865 			uint32 flags;
866 			int err;
867 			off_t offset = 0;
868 
869 			if (!kernel_startup)
870 				panic("ALREADY_WIRED flag used outside kernel startup\n");
871 
872 			mutex_lock(&cache_ref->lock);
873 			(*map->ops->lock)(map);
874 			for (va = area->base; va < area->base + area->size; va += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
875 				err = (*map->ops->query)(map, va, &pa, &flags);
876 				if (err < 0) {
877 //					dprintf("vm_create_anonymous_area: error looking up mapping for va 0x%x\n", va);
878 					continue;
879 				}
880 				page = vm_lookup_page(pa / B_PAGE_SIZE);
881 				if (page == NULL) {
882 //					dprintf("vm_create_anonymous_area: error looking up vm_page structure for pa 0x%x\n", pa);
883 					continue;
884 				}
885 				atomic_add(&page->ref_count, 1);
886 				vm_page_set_state(page, PAGE_STATE_WIRED);
887 				vm_cache_insert_page(cache_ref, page, offset);
888 			}
889 			(*map->ops->unlock)(map);
890 			mutex_unlock(&cache_ref->lock);
891 			break;
892 		}
893 
894 		case B_CONTIGUOUS:
895 		{
896 			// We have already allocated our continuous pages run, so we can now just
897 			// map them in the address space
898 			vm_translation_map *map = &addressSpace->translation_map;
899 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
900 			addr_t virtualAddress;
901 			off_t offset = 0;
902 
903 			mutex_lock(&cache_ref->lock);
904 			(*map->ops->lock)(map);
905 
906 			for (virtualAddress = area->base; virtualAddress < area->base + area->size;
907 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE,
908 					physicalAddress += B_PAGE_SIZE) {
909 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
910 				if (page == NULL)
911 					panic("couldn't lookup physical page just allocated\n");
912 
913 				atomic_add(&page->ref_count, 1);
914 				status = (*map->ops->map)(map, virtualAddress, physicalAddress, protection);
915 				if (status < 0)
916 					panic("couldn't map physical page in page run\n");
917 
918 				vm_page_set_state(page, PAGE_STATE_WIRED);
919 				vm_cache_insert_page(cache_ref, page, offset);
920 			}
921 
922 			(*map->ops->unlock)(map);
923 			mutex_unlock(&cache_ref->lock);
924 			break;
925 		}
926 
927 		default:
928 			break;
929 	}
930 	vm_put_address_space(addressSpace);
931 
932 	TRACE(("vm_create_anonymous_area: done\n"));
933 
934 	return area->id;
935 }
936 
937 
938 area_id
939 vm_map_physical_memory(team_id areaID, const char *name, void **_address,
940 	uint32 addressSpec, addr_t size, uint32 protection,
941 	addr_t physicalAddress)
942 {
943 	vm_area *area;
944 	vm_cache *cache;
945 	vm_cache_ref *cacheRef;
946 	vm_store *store;
947 	addr_t mapOffset;
948 	status_t status;
949 	vm_address_space *addressSpace = vm_get_address_space_by_id(areaID);
950 
951 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, spec = %ld,"
952 		" size = %lu, protection = %ld, phys = %p)\n",
953 		areaID, name, _address, addressSpec, size, protection,
954 		(void *)physicalAddress));
955 
956 	if (!arch_vm_supports_protection(protection))
957 		return B_NOT_SUPPORTED;
958 
959 	if (addressSpace == NULL)
960 		return B_BAD_TEAM_ID;
961 
962 	// if the physical address is somewhat inside a page,
963 	// move the actual area down to align on a page boundary
964 	mapOffset = physicalAddress % B_PAGE_SIZE;
965 	size += mapOffset;
966 	physicalAddress -= mapOffset;
967 
968 	size = PAGE_ALIGN(size);
969 
970 	// create an device store object
971 	// TODO: panic???
972 	store = vm_store_create_device(physicalAddress);
973 	if (store == NULL)
974 		panic("vm_map_physical_memory: vm_store_create_device returned NULL");
975 	cache = vm_cache_create(store);
976 	if (cache == NULL)
977 		panic("vm_map_physical_memory: vm_cache_create returned NULL");
978 	cacheRef = vm_cache_ref_create(cache);
979 	if (cacheRef == NULL)
980 		panic("vm_map_physical_memory: vm_cache_ref_create returned NULL");
981 
982 	// tell the page scanner to skip over this area, it's pages are special
983 	cache->scan_skip = 1;
984 
985 	status = map_backing_store(addressSpace, store, _address, 0, size,
986 		addressSpec & ~B_MTR_MASK, 0, protection, REGION_NO_PRIVATE_MAP, &area, name);
987 	if (status < B_OK)
988 		vm_cache_release_ref(cacheRef);
989 
990 	if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) {
991 		// set requested memory type
992 		status = arch_vm_set_memory_type(area, physicalAddress,
993 			addressSpec & B_MTR_MASK);
994 		if (status < B_OK)
995 			vm_put_area(area);
996 	}
997 
998 	if (status >= B_OK) {
999 		// make sure our area is mapped in completely
1000 		// (even if that makes the fault routine pretty much useless)
1001 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1002 			store->ops->fault(store, addressSpace, offset);
1003 		}
1004 	}
1005 
1006 	vm_put_address_space(addressSpace);
1007 	if (status < B_OK)
1008 		return status;
1009 
1010 	// modify the pointer returned to be offset back into the new area
1011 	// the same way the physical address in was offset
1012 	*_address = (void *)((addr_t)*_address + mapOffset);
1013 
1014 	return area->id;
1015 }
1016 
1017 
1018 area_id
1019 vm_create_null_area(team_id aid, const char *name, void **address, uint32 addressSpec, addr_t size)
1020 {
1021 	vm_area *area;
1022 	vm_cache *cache;
1023 	vm_cache_ref *cache_ref;
1024 	vm_store *store;
1025 //	addr_t map_offset;
1026 	int err;
1027 
1028 	vm_address_space *addressSpace = vm_get_address_space_by_id(aid);
1029 	if (addressSpace == NULL)
1030 		return B_BAD_TEAM_ID;
1031 
1032 	size = PAGE_ALIGN(size);
1033 
1034 	// create an null store object
1035 	// TODO: panic???
1036 	store = vm_store_create_null();
1037 	if (store == NULL)
1038 		panic("vm_map_physical_memory: vm_store_create_null returned NULL");
1039 	cache = vm_cache_create(store);
1040 	if (cache == NULL)
1041 		panic("vm_map_physical_memory: vm_cache_create returned NULL");
1042 	cache_ref = vm_cache_ref_create(cache);
1043 	if (cache_ref == NULL)
1044 		panic("vm_map_physical_memory: vm_cache_ref_create returned NULL");
1045 	// tell the page scanner to skip over this area, no pages will be mapped here
1046 	cache->scan_skip = 1;
1047 
1048 	err = map_backing_store(addressSpace, store, address, 0, size, addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name);
1049 	vm_put_address_space(addressSpace);
1050 
1051 	if (err < B_OK) {
1052 		vm_cache_release_ref(cache_ref);
1053 		return err;
1054 	}
1055 
1056 	return area->id;
1057 }
1058 
1059 
1060 status_t
1061 vm_create_vnode_cache(void *vnode, struct vm_cache_ref **_cacheRef)
1062 {
1063 	vm_cache_ref *cacheRef;
1064 	vm_cache *cache;
1065 	vm_store *store;
1066 
1067 	// create a vnode store object
1068 	store = vm_create_vnode_store(vnode);
1069 	if (store == NULL) {
1070 		dprintf("vm_create_vnode_cache: couldn't create vnode store\n");
1071 		return B_NO_MEMORY;
1072 	}
1073 
1074 	cache = vm_cache_create(store);
1075 	if (cache == NULL) {
1076 		dprintf("vm_create_vnode_cache: vm_cache_create returned NULL\n");
1077 		return B_NO_MEMORY;
1078 	}
1079 
1080 	cacheRef = vm_cache_ref_create(cache);
1081 	if (cacheRef == NULL) {
1082 		dprintf("vm_create_vnode_cache: vm_cache_ref_create returned NULL\n");
1083 		return B_NO_MEMORY;
1084 	}
1085 
1086 	*_cacheRef = cacheRef;
1087 	return B_OK;
1088 }
1089 
1090 
1091 /** Will map the file at the path specified by \a name to an area in memory.
1092  *	The file will be mirrored beginning at the specified \a offset. The \a offset
1093  *	and \a size arguments have to be page aligned.
1094  */
1095 
1096 static area_id
1097 _vm_map_file(team_id aid, const char *name, void **_address, uint32 addressSpec,
1098 	size_t size, uint32 protection, uint32 mapping, const char *path, off_t offset, bool kernel)
1099 {
1100 	vm_cache_ref *cacheRef;
1101 	vm_area *area;
1102 	void *vnode;
1103 	status_t status;
1104 
1105 	// ToDo: maybe attach to an FD, not a path (or both, like VFS calls)
1106 	// ToDo: check file access permissions (would be already done if the above were true)
1107 	// ToDo: for binary files, we want to make sure that they get the
1108 	//	copy of a file at a given time, ie. later changes should not
1109 	//	make it into the mapped copy -- this will need quite some changes
1110 	//	to be done in a nice way
1111 
1112 	vm_address_space *addressSpace = vm_get_address_space_by_id(aid);
1113 	if (addressSpace == NULL)
1114 		return B_BAD_TEAM_ID;
1115 
1116 	TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n",
1117 		path, offset, size, mapping));
1118 
1119 	offset = ROUNDOWN(offset, B_PAGE_SIZE);
1120 	size = PAGE_ALIGN(size);
1121 
1122 	// get the vnode for the object, this also grabs a ref to it
1123 	status = vfs_get_vnode_from_path(path, kernel, &vnode);
1124 	if (status < B_OK)
1125 		goto err1;
1126 
1127 	// ToDo: this only works for file systems that use the file cache
1128 	status = vfs_get_vnode_cache(vnode, &cacheRef, false);
1129 
1130 	vfs_put_vnode(vnode);
1131 		// we don't need this vnode anymore - if the above call was
1132 		// successful, the store already has a ref to it
1133 
1134 	if (status < B_OK)
1135 		goto err1;
1136 
1137 	status = map_backing_store(addressSpace, cacheRef->cache->store, _address,
1138 		offset, size, addressSpec, 0, protection, mapping, &area, name);
1139 	if (status < B_OK)
1140 		goto err2;
1141 
1142 	vm_put_address_space(addressSpace);
1143 	return area->id;
1144 
1145 err2:
1146 	vm_cache_release_ref(cacheRef);
1147 err1:
1148 	vm_put_address_space(addressSpace);
1149 	return status;
1150 }
1151 
1152 
1153 area_id
1154 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec,
1155 	addr_t size, uint32 protection, uint32 mapping, const char *path, off_t offset)
1156 {
1157 	if (!arch_vm_supports_protection(protection))
1158 		return B_NOT_SUPPORTED;
1159 
1160 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1161 		mapping, path, offset, true);
1162 }
1163 
1164 
1165 // ToDo: create a BeOS style call for this!
1166 
1167 area_id
1168 _user_vm_map_file(const char *userName, void **userAddress, int addressSpec,
1169 	addr_t size, int protection, int mapping, const char *userPath, off_t offset)
1170 {
1171 	char name[B_OS_NAME_LENGTH];
1172 	char path[B_PATH_NAME_LENGTH];
1173 	void *address;
1174 	area_id area;
1175 
1176 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
1177 		|| !IS_USER_ADDRESS(userPath)
1178 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
1179 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
1180 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
1181 		return B_BAD_ADDRESS;
1182 
1183 	// userland created areas can always be accessed by the kernel
1184 	protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
1185 
1186 	area = _vm_map_file(vm_current_user_address_space_id(), name, &address,
1187 		addressSpec, size, protection, mapping, path, offset, false);
1188 	if (area < B_OK)
1189 		return area;
1190 
1191 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
1192 		return B_BAD_ADDRESS;
1193 
1194 	return area;
1195 }
1196 
1197 
1198 area_id
1199 vm_clone_area(team_id team, const char *name, void **address, uint32 addressSpec,
1200 	uint32 protection, uint32 mapping, area_id sourceID)
1201 {
1202 	vm_area *newArea = NULL;
1203 	vm_area *sourceArea;
1204 	status_t status;
1205 
1206 	vm_address_space *addressSpace = vm_get_address_space_by_id(team);
1207 	if (addressSpace == NULL)
1208 		return B_BAD_TEAM_ID;
1209 
1210 	sourceArea = vm_get_area(sourceID);
1211 	if (sourceArea == NULL) {
1212 		vm_put_address_space(addressSpace);
1213 		return B_BAD_VALUE;
1214 	}
1215 
1216 	// ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers
1217 	//	have been adapted. Maybe it should be part of the kernel settings,
1218 	//	anyway (so that old drivers can always work).
1219 #if 0
1220 	if (sourceArea->aspace == kernel_aspace && addressSpace != kernel_aspace
1221 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1222 		// kernel areas must not be cloned in userland, unless explicitly
1223 		// declared user-cloneable upon construction
1224 		status = B_NOT_ALLOWED;
1225 	} else
1226 #endif
1227 	{
1228 		status = map_backing_store(addressSpace, sourceArea->cache_ref->cache->store,
1229 			address, sourceArea->cache_offset, sourceArea->size, addressSpec,
1230 			sourceArea->wiring, protection, mapping, &newArea, name);
1231 	}
1232 	if (status == B_OK)
1233 		vm_cache_acquire_ref(sourceArea->cache_ref);
1234 
1235 	vm_put_area(sourceArea);
1236 	vm_put_address_space(addressSpace);
1237 
1238 	if (status < B_OK)
1239 		return status;
1240 
1241 	return newArea->id;
1242 }
1243 
1244 
1245 static status_t
1246 _vm_delete_area(vm_address_space *addressSpace, area_id id)
1247 {
1248 	status_t status = B_OK;
1249 	vm_area *area;
1250 
1251 	TRACE(("vm_delete_area: aspace id 0x%lx, area id 0x%lx\n", addressSpace->id, id));
1252 
1253 	area = vm_get_area(id);
1254 	if (area == NULL)
1255 		return B_BAD_VALUE;
1256 
1257 	if (area->address_space == addressSpace) {
1258 		vm_put_area(area);
1259 			// next put below will actually delete it
1260 	} else
1261 		status = B_NOT_ALLOWED;
1262 
1263 	vm_put_area(area);
1264 	return status;
1265 }
1266 
1267 
1268 status_t
1269 vm_delete_area(team_id team, area_id id)
1270 {
1271 	vm_address_space *addressSpace;
1272 	status_t err;
1273 
1274 	addressSpace = vm_get_address_space_by_id(team);
1275 	if (addressSpace == NULL)
1276 		return B_BAD_TEAM_ID;
1277 
1278 	err = _vm_delete_area(addressSpace, id);
1279 	vm_put_address_space(addressSpace);
1280 	return err;
1281 }
1282 
1283 
1284 static void
1285 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area, bool locked)
1286 {
1287 	vm_area *temp, *last = NULL;
1288 
1289 	if (!locked)
1290 		acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
1291 
1292 	temp = addressSpace->areas;
1293 	while (temp != NULL) {
1294 		if (area == temp) {
1295 			if (last != NULL) {
1296 				last->address_space_next = temp->address_space_next;
1297 			} else {
1298 				addressSpace->areas = temp->address_space_next;
1299 			}
1300 			addressSpace->change_count++;
1301 			break;
1302 		}
1303 		last = temp;
1304 		temp = temp->address_space_next;
1305 	}
1306 	if (area == addressSpace->area_hint)
1307 		addressSpace->area_hint = NULL;
1308 
1309 	if (!locked)
1310 		release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
1311 
1312 	if (temp == NULL)
1313 		panic("vm_area_release_ref: area not found in aspace's area list\n");
1314 }
1315 
1316 
1317 static bool
1318 _vm_put_area(vm_area *area, bool aspaceLocked)
1319 {
1320 	vm_address_space *addressSpace;
1321 	bool removeit = false;
1322 
1323 	//TRACE(("_vm_put_area(area = %p, aspaceLocked = %s)\n",
1324 	//	area, aspaceLocked ? "yes" : "no"));
1325 
1326 	// we should never get here, but if we do, we can handle it
1327 	if (area->id == RESERVED_AREA_ID)
1328 		return false;
1329 
1330 	acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0);
1331 	if (atomic_add(&area->ref_count, -1) == 1) {
1332 		hash_remove(sAreaHash, area);
1333 		removeit = true;
1334 	}
1335 	release_sem_etc(sAreaHashLock, WRITE_COUNT, 0);
1336 
1337 	if (!removeit)
1338 		return false;
1339 
1340 	addressSpace = area->address_space;
1341 
1342 	// ToDo: do that only for vnode stores
1343 	vm_cache_write_modified(area->cache_ref);
1344 
1345 	arch_vm_unset_memory_type(area);
1346 	remove_area_from_address_space(addressSpace, area, aspaceLocked);
1347 
1348 	vm_cache_remove_area(area->cache_ref, area);
1349 	vm_cache_release_ref(area->cache_ref);
1350 
1351 	vm_translation_map *map = &addressSpace->translation_map;
1352 	(*map->ops->lock)(map);
1353 	(*map->ops->unmap)(map, area->base, area->base + (area->size - 1));
1354 	(*map->ops->unlock)(map);
1355 
1356 	// now we can give up the area's reference to the address space
1357 	vm_put_address_space(addressSpace);
1358 
1359 	free(area->name);
1360 	free(area);
1361 	return true;
1362 }
1363 
1364 
1365 static bool
1366 vm_put_area(vm_area *area)
1367 {
1368 	return _vm_put_area(area, false);
1369 }
1370 
1371 
1372 static status_t
1373 vm_copy_on_write_area(vm_area *area)
1374 {
1375 	vm_store *store;
1376 	vm_cache *upperCache, *lowerCache;
1377 	vm_cache_ref *upperCacheRef, *lowerCacheRef;
1378 	vm_translation_map *map;
1379 	vm_page *page;
1380 	uint32 protection;
1381 	status_t status;
1382 
1383 	TRACE(("vm_copy_on_write_area(area = %p)\n", area));
1384 
1385 	// We need to separate the vm_cache from its vm_cache_ref: the area
1386 	// and its cache_ref goes into a new layer on top of the old one.
1387 	// So the old cache gets a new cache_ref and the area a new cache.
1388 
1389 	upperCacheRef = area->cache_ref;
1390 	lowerCache = upperCacheRef->cache;
1391 
1392 	// create an anonymous store object
1393 	store = vm_store_create_anonymous_noswap(false, 0);
1394 	if (store == NULL)
1395 		return B_NO_MEMORY;
1396 
1397 	upperCache = vm_cache_create(store);
1398 	if (upperCache == NULL) {
1399 		status = B_NO_MEMORY;
1400 		goto err1;
1401 	}
1402 
1403 	lowerCacheRef = vm_cache_ref_create(lowerCache);
1404 	if (lowerCacheRef == NULL) {
1405 		status = B_NO_MEMORY;
1406 		goto err2;
1407 	}
1408 
1409 	// The area must be readable in the same way it was previously writable
1410 	protection = B_KERNEL_READ_AREA;
1411 	if (area->protection & B_READ_AREA)
1412 		protection |= B_READ_AREA;
1413 
1414 	// we need to hold the cache_ref lock when we want to switch its cache
1415 	mutex_lock(&upperCacheRef->lock);
1416 	mutex_lock(&lowerCacheRef->lock);
1417 
1418 	// ToDo: add a child counter to vm_cache - so that we can collapse a
1419 	//		cache layer when possible (ie. "the other" area was deleted)
1420 	upperCache->temporary = 1;
1421 	upperCache->scan_skip = lowerCache->scan_skip;
1422 	upperCache->source = lowerCache;
1423 	upperCache->ref = upperCacheRef;
1424 	upperCacheRef->cache = upperCache;
1425 
1426 	// we need to manually alter the ref_count
1427 	// ToDo: investigate a bit deeper if this is really correct
1428 	// (doesn't look like it, but it works)
1429 	lowerCacheRef->ref_count = upperCacheRef->ref_count;
1430 	upperCacheRef->ref_count = 1;
1431 
1432 	// grab a ref to the cache object we're now linked to as a source
1433 	vm_cache_acquire_ref(lowerCacheRef);
1434 
1435 	// We now need to remap all pages from the area read-only, so that
1436 	// a copy will be created on next write access
1437 
1438 	map = &area->address_space->translation_map;
1439 	map->ops->lock(map);
1440 	map->ops->unmap(map, area->base, area->base - 1 + area->size);
1441 
1442 	for (page = lowerCache->page_list; page; page = page->cache_next) {
1443 		map->ops->map(map, area->base + (page->cache_offset << PAGE_SHIFT)
1444 			- area->cache_offset, page->physical_page_number << PAGE_SHIFT,
1445 			protection);
1446 	}
1447 
1448 	map->ops->unlock(map);
1449 
1450 	mutex_unlock(&lowerCacheRef->lock);
1451 	mutex_unlock(&upperCacheRef->lock);
1452 
1453 	return B_OK;
1454 
1455 err2:
1456 	free(upperCache);
1457 err1:
1458 	store->ops->destroy(store);
1459 	return status;
1460 }
1461 
1462 
1463 area_id
1464 vm_copy_area(team_id addressSpaceID, const char *name, void **_address, uint32 addressSpec,
1465 	uint32 protection, area_id sourceID)
1466 {
1467 	vm_address_space *addressSpace;
1468 	vm_cache_ref *cacheRef;
1469 	vm_area *target, *source;
1470 	status_t status;
1471 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
1472 
1473 	if ((protection & B_KERNEL_PROTECTION) == 0) {
1474 		// set the same protection for the kernel as for userland
1475 		protection |= B_KERNEL_READ_AREA;
1476 		if (writableCopy)
1477 			protection |= B_KERNEL_WRITE_AREA;
1478 	}
1479 
1480 	if ((source = vm_get_area(sourceID)) == NULL)
1481 		return B_BAD_VALUE;
1482 
1483 	addressSpace = vm_get_address_space_by_id(addressSpaceID);
1484 	cacheRef = source->cache_ref;
1485 
1486 	if (addressSpec == B_CLONE_ADDRESS) {
1487 		addressSpec = B_EXACT_ADDRESS;
1488 		*_address = (void *)source->base;
1489 	}
1490 
1491 	// First, create a cache on top of the source area
1492 
1493 	status = map_backing_store(addressSpace, cacheRef->cache->store, _address,
1494 		source->cache_offset, source->size, addressSpec, source->wiring, protection,
1495 		writableCopy ? REGION_PRIVATE_MAP : REGION_NO_PRIVATE_MAP,
1496 		&target, name);
1497 
1498 	if (status < B_OK)
1499 		goto err;
1500 
1501 	vm_cache_acquire_ref(cacheRef);
1502 
1503 	// If the source area is writable, we need to move it one layer up as well
1504 
1505 	if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
1506 		// ToDo: do something more useful if this fails!
1507 		if (vm_copy_on_write_area(source) < B_OK)
1508 			panic("vm_copy_on_write_area() failed!\n");
1509 	}
1510 
1511 	// we want to return the ID of the newly created area
1512 	status = target->id;
1513 
1514 err:
1515 	vm_put_address_space(addressSpace);
1516 	vm_put_area(source);
1517 
1518 	return status;
1519 }
1520 
1521 
1522 static int32
1523 count_writable_areas(vm_cache_ref *ref, vm_area *ignoreArea)
1524 {
1525 	struct vm_area *area = ref->areas;
1526 	uint32 count = 0;
1527 
1528 	for (; area != NULL; area = area->cache_next) {
1529 		if (area != ignoreArea
1530 			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
1531 			count++;
1532 	}
1533 
1534 	return count;
1535 }
1536 
1537 
1538 static status_t
1539 vm_set_area_protection(team_id aspaceID, area_id areaID, uint32 newProtection)
1540 {
1541 	vm_cache_ref *cacheRef;
1542 	vm_cache *cache;
1543 	vm_area *area;
1544 	status_t status = B_OK;
1545 
1546 	TRACE(("vm_set_area_protection(aspace = %#lx, area = %#lx, protection = %#lx)\n",
1547 		aspaceID, areaID, newProtection));
1548 
1549 	if (!arch_vm_supports_protection(newProtection))
1550 		return B_NOT_SUPPORTED;
1551 
1552 	area = vm_get_area(areaID);
1553 	if (area == NULL)
1554 		return B_BAD_VALUE;
1555 
1556 	if (aspaceID != vm_kernel_address_space_id() && area->address_space->id != aspaceID) {
1557 		// unless you're the kernel, you are only allowed to set
1558 		// the protection of your own areas
1559 		vm_put_area(area);
1560 		return B_NOT_ALLOWED;
1561 	}
1562 
1563 	cacheRef = area->cache_ref;
1564 	cache = cacheRef->cache;
1565 
1566 	mutex_lock(&cacheRef->lock);
1567 
1568 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1569 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
1570 		// change from read/write to read-only
1571 
1572 		if (cache->source != NULL && cache->temporary) {
1573 			if (count_writable_areas(cacheRef, area) == 0) {
1574 				// Since this cache now lives from the pages in its source cache,
1575 				// we can change the cache's commitment to take only those pages
1576 				// into account that really are in this cache.
1577 
1578 				// count existing pages in this cache
1579 				struct vm_page *page = cache->page_list;
1580 				uint32 count = 0;
1581 
1582 				for (; page != NULL; page = page->cache_next) {
1583 					count++;
1584 				}
1585 
1586 				status = cache->store->ops->commit(cache->store, count * B_PAGE_SIZE);
1587 
1588 				// ToDo: we may be able to join with our source cache, if count == 0
1589 			}
1590 		}
1591 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
1592 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
1593 		// change from read-only to read/write
1594 
1595 		// ToDo: if this is a shared cache, insert new cache (we only know about other
1596 		//	areas in this cache yet, though, not about child areas)
1597 		//	-> use this call with care, it might currently have unwanted consequences
1598 		//	   because of this. It should always be safe though, if there are no other
1599 		//	   (child) areas referencing this area's cache (you just might not know).
1600 		if (count_writable_areas(cacheRef, area) == 0
1601 			&& (cacheRef->areas != area || area->cache_next)) {
1602 			// ToDo: child areas are not tested for yet
1603 			dprintf("set_area_protection(): warning, would need to insert a new cache_ref (not yet implemented)!\n");
1604 			status = B_NOT_ALLOWED;
1605 		} else
1606 			dprintf("set_area_protection() may not work correctly yet in this direction!\n");
1607 
1608 		if (status == B_OK && cache->source != NULL && cache->temporary) {
1609 			// the cache's commitment must contain all possible pages
1610 			status = cache->store->ops->commit(cache->store, cache->virtual_size);
1611 		}
1612 	} else {
1613 		// we don't have anything special to do in all other cases
1614 	}
1615 
1616 	if (status == B_OK && area->protection != newProtection) {
1617 		// remap existing pages in this cache
1618 		struct vm_translation_map *map = &area->address_space->translation_map;
1619 
1620 		map->ops->lock(map);
1621 		map->ops->protect(map, area->base, area->base + area->size, newProtection);
1622 		map->ops->unlock(map);
1623 
1624 		area->protection = newProtection;
1625 	}
1626 
1627 	mutex_unlock(&cacheRef->lock);
1628 	vm_put_area(area);
1629 
1630 	return status;
1631 }
1632 
1633 
1634 status_t
1635 vm_get_page_mapping(team_id aid, addr_t vaddr, addr_t *paddr)
1636 {
1637 	vm_address_space *addressSpace;
1638 	uint32 null_flags;
1639 	status_t err;
1640 
1641 	addressSpace = vm_get_address_space_by_id(aid);
1642 	if (addressSpace == NULL)
1643 		return B_BAD_TEAM_ID;
1644 
1645 	err = addressSpace->translation_map.ops->query(&addressSpace->translation_map,
1646 		vaddr, paddr, &null_flags);
1647 
1648 	vm_put_address_space(addressSpace);
1649 	return err;
1650 }
1651 
1652 
1653 static int
1654 display_mem(int argc, char **argv)
1655 {
1656 	int32 displayWidth;
1657 	int32 itemSize;
1658 	int32 num = 1;
1659 	addr_t address;
1660 	int i, j;
1661 
1662 	if (argc < 2) {
1663 		kprintf("usage: dw/ds/db <address> [num]\n"
1664 			"\tdw - 4 bytes\n"
1665 			"\tds - 2 bytes\n"
1666 			"\tdb - 1 byte\n");
1667 		return 0;
1668 	}
1669 
1670 	address = strtoul(argv[1], NULL, 0);
1671 
1672 	if (argc >= 3) {
1673 		num = -1;
1674 		num = atoi(argv[2]);
1675 	}
1676 
1677 	// build the format string
1678 	if (strcmp(argv[0], "db") == 0) {
1679 		itemSize = 1;
1680 		displayWidth = 16;
1681 	} else if (strcmp(argv[0], "ds") == 0) {
1682 		itemSize = 2;
1683 		displayWidth = 8;
1684 	} else if (strcmp(argv[0], "dw") == 0) {
1685 		itemSize = 4;
1686 		displayWidth = 4;
1687 	} else {
1688 		kprintf("display_mem called in an invalid way!\n");
1689 		return 0;
1690 	}
1691 
1692 	for (i = 0; i < num; i++) {
1693 		uint32 value;
1694 
1695 		if ((i % displayWidth) == 0) {
1696 			int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
1697 			if (i != 0)
1698 				kprintf("\n");
1699 
1700 			kprintf("[0x%lx]  ", address + i * itemSize);
1701 
1702 			for (j = 0; j < displayed; j++) {
1703 				char c;
1704 				if (user_memcpy(&c, (char *)address + i * itemSize + j, 1) != B_OK) {
1705 					displayed = j;
1706 					break;
1707 				}
1708 				if (!isalnum(c))
1709 					c = '.';
1710 
1711 				kprintf("%c", c);
1712 			}
1713 			if (num > displayWidth) {
1714 				// make sure the spacing in the last line is correct
1715 				for (j = displayed; j < displayWidth * itemSize; j++)
1716 					kprintf(" ");
1717 			}
1718 			kprintf("  ");
1719 		}
1720 
1721 		if (user_memcpy(&value, (uint8 *)address + i * itemSize, itemSize) != B_OK) {
1722 			kprintf("read fault");
1723 			break;
1724 		}
1725 
1726 		switch (itemSize) {
1727 			case 1:
1728 				kprintf(" 0x%02x", *(uint8 *)&value);
1729 				break;
1730 			case 2:
1731 				kprintf(" 0x%04x", *(uint16 *)&value);
1732 				break;
1733 			case 4:
1734 				kprintf(" 0x%08lx", *(uint32 *)&value);
1735 				break;
1736 		}
1737 	}
1738 
1739 	kprintf("\n");
1740 	return 0;
1741 }
1742 
1743 
1744 static int
1745 dump_cache_ref(int argc, char **argv)
1746 {
1747 	addr_t address;
1748 	vm_area *area;
1749 	vm_cache_ref *cache_ref;
1750 
1751 	if (argc < 2) {
1752 		kprintf("cache_ref: not enough arguments\n");
1753 		return 0;
1754 	}
1755 	if (strlen(argv[1]) < 2 || argv[1][0] != '0' || argv[1][1] != 'x') {
1756 		kprintf("cache_ref: invalid argument, pass address\n");
1757 		return 0;
1758 	}
1759 
1760 	address = atoul(argv[1]);
1761 	cache_ref = (vm_cache_ref *)address;
1762 
1763 	kprintf("cache_ref at %p:\n", cache_ref);
1764 	kprintf("cache: %p\n", cache_ref->cache);
1765 	kprintf("lock.holder: %ld\n", cache_ref->lock.holder);
1766 	kprintf("lock.sem: 0x%lx\n", cache_ref->lock.sem);
1767 	kprintf("areas:\n");
1768 	for (area = cache_ref->areas; area != NULL; area = area->cache_next) {
1769 		kprintf(" area 0x%lx: ", area->id);
1770 		kprintf("base_addr = 0x%lx ", area->base);
1771 		kprintf("size = 0x%lx ", area->size);
1772 		kprintf("name = '%s' ", area->name);
1773 		kprintf("protection = 0x%lx\n", area->protection);
1774 	}
1775 	kprintf("ref_count: %ld\n", cache_ref->ref_count);
1776 	return 0;
1777 }
1778 
1779 
1780 static const char *
1781 page_state_to_text(int state)
1782 {
1783 	switch(state) {
1784 		case PAGE_STATE_ACTIVE:
1785 			return "active";
1786 		case PAGE_STATE_INACTIVE:
1787 			return "inactive";
1788 		case PAGE_STATE_BUSY:
1789 			return "busy";
1790 		case PAGE_STATE_MODIFIED:
1791 			return "modified";
1792 		case PAGE_STATE_FREE:
1793 			return "free";
1794 		case PAGE_STATE_CLEAR:
1795 			return "clear";
1796 		case PAGE_STATE_WIRED:
1797 			return "wired";
1798 		case PAGE_STATE_UNUSED:
1799 			return "unused";
1800 		default:
1801 			return "unknown";
1802 	}
1803 }
1804 
1805 
1806 static int
1807 dump_cache(int argc, char **argv)
1808 {
1809 	addr_t address;
1810 	vm_cache *cache;
1811 	vm_page *page;
1812 
1813 	if (argc < 2) {
1814 		kprintf("cache: not enough arguments\n");
1815 		return 0;
1816 	}
1817 	if (strlen(argv[1]) < 2 || argv[1][0] != '0' || argv[1][1] != 'x') {
1818 		kprintf("cache: invalid argument, pass address\n");
1819 		return 0;
1820 	}
1821 
1822 	address = atoul(argv[1]);
1823 	cache = (vm_cache *)address;
1824 
1825 	kprintf("cache at %p:\n", cache);
1826 	kprintf("cache_ref: %p\n", cache->ref);
1827 	kprintf("source: %p\n", cache->source);
1828 	kprintf("store: %p\n", cache->store);
1829 	kprintf("virtual_size: 0x%Lx\n", cache->virtual_size);
1830 	kprintf("temporary: %ld\n", cache->temporary);
1831 	kprintf("scan_skip: %ld\n", cache->scan_skip);
1832 	kprintf("page_list:\n");
1833 	for (page = cache->page_list; page != NULL; page = page->cache_next) {
1834 		if (page->type == PAGE_TYPE_PHYSICAL) {
1835 			kprintf(" %p ppn 0x%lx offset 0x%lx type %ld state %ld (%s) ref_count %ld\n",
1836 				page, page->physical_page_number, page->cache_offset, page->type, page->state,
1837 				page_state_to_text(page->state), page->ref_count);
1838 		} else if(page->type == PAGE_TYPE_DUMMY) {
1839 			kprintf(" %p DUMMY PAGE state %ld (%s)\n",
1840 				page, page->state, page_state_to_text(page->state));
1841 		} else
1842 			kprintf(" %p UNKNOWN PAGE type %ld\n", page, page->type);
1843 	}
1844 	return 0;
1845 }
1846 
1847 
1848 static void
1849 _dump_area(vm_area *area)
1850 {
1851 	kprintf("AREA: %p\n", area);
1852 	kprintf("name:\t\t'%s'\n", area->name);
1853 	kprintf("owner:\t\t0x%lx\n", area->address_space->id);
1854 	kprintf("id:\t\t0x%lx\n", area->id);
1855 	kprintf("base:\t\t0x%lx\n", area->base);
1856 	kprintf("size:\t\t0x%lx\n", area->size);
1857 	kprintf("protection:\t0x%lx\n", area->protection);
1858 	kprintf("wiring:\t\t0x%x\n", area->wiring);
1859 	kprintf("memory_type:\t0x%x\n", area->memory_type);
1860 	kprintf("ref_count:\t%ld\n", area->ref_count);
1861 	kprintf("cache_ref:\t%p\n", area->cache_ref);
1862 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
1863 	kprintf("cache_next:\t%p\n", area->cache_next);
1864 	kprintf("cache_prev:\t%p\n", area->cache_prev);
1865 }
1866 
1867 
1868 static int
1869 dump_area(int argc, char **argv)
1870 {
1871 	bool found = false;
1872 	vm_area *area;
1873 	addr_t num;
1874 
1875 	if (argc < 2) {
1876 		kprintf("usage: area <id|address|name>\n");
1877 		return 0;
1878 	}
1879 
1880 	num = strtoul(argv[1], NULL, 0);
1881 
1882 	// walk through the area list, looking for the arguments as a name
1883 	struct hash_iterator iter;
1884 
1885 	hash_open(sAreaHash, &iter);
1886 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
1887 		if ((area->name != NULL && !strcmp(argv[1], area->name))
1888 			|| num != 0
1889 				&& ((addr_t)area->id == num
1890 					|| area->base <= num && area->base + area->size > num)) {
1891 			_dump_area(area);
1892 			found = true;
1893 		}
1894 	}
1895 
1896 	if (!found)
1897 		kprintf("could not find area %s (%ld)\n", argv[1], num);
1898 	return 0;
1899 }
1900 
1901 
1902 static int
1903 dump_area_list(int argc, char **argv)
1904 {
1905 	vm_area *area;
1906 	struct hash_iterator iter;
1907 	int32 id = -1;
1908 
1909 	if (argc > 1)
1910 		id = strtoul(argv[1], NULL, 0);
1911 
1912 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
1913 
1914 	hash_open(sAreaHash, &iter);
1915 	while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) {
1916 		if (id != -1 && area->address_space->id != id)
1917 			continue;
1918 
1919 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id, (void *)area->base,
1920 			(void *)area->size, area->protection, area->wiring, area->name);
1921 	}
1922 	hash_close(sAreaHash, &iter, false);
1923 	return 0;
1924 }
1925 
1926 
1927 status_t
1928 vm_delete_areas(struct vm_address_space *addressSpace)
1929 {
1930 	vm_area *area;
1931 	vm_area *next, *last = NULL;
1932 
1933 	TRACE(("vm_delete_areas: called on address space 0x%lx\n", addressSpace->id));
1934 
1935 	acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0);
1936 
1937 	// remove all reserved areas in this address space
1938 
1939 	for (area = addressSpace->areas; area; area = next) {
1940 		next = area->address_space_next;
1941 
1942 		if (area->id == RESERVED_AREA_ID) {
1943 			// just remove it
1944 			if (last)
1945 				last->address_space_next = area->address_space_next;
1946 			else
1947 				addressSpace->areas = area->address_space_next;
1948 
1949 			free(area);
1950 			continue;
1951 		}
1952 
1953 		last = area;
1954 	}
1955 
1956 	// delete all the areas in this address space
1957 
1958 	for (area = addressSpace->areas; area; area = next) {
1959 		next = area->address_space_next;
1960 
1961 		// decrement the ref on this area, may actually push the ref < 0, if there
1962 		// is a concurrent delete_area() on that specific area, but that's ok here
1963 		if (!_vm_put_area(area, true))
1964 			dprintf("vm_delete_areas() did not delete area %p\n", area);
1965 	}
1966 
1967 	release_sem_etc(addressSpace->sem, WRITE_COUNT, 0);
1968 
1969 	return B_OK;
1970 }
1971 
1972 
1973 static area_id
1974 vm_area_for(team_id team, addr_t address)
1975 {
1976 	vm_address_space *addressSpace;
1977 	area_id id = B_ERROR;
1978 	vm_area *area;
1979 
1980 	addressSpace = vm_get_address_space_by_id(team);
1981 	if (addressSpace == NULL)
1982 		return B_BAD_TEAM_ID;
1983 
1984 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
1985 
1986 	area = addressSpace->areas;
1987 	for (; area != NULL; area = area->address_space_next) {
1988 		// ignore reserved space regions
1989 		if (area->id == RESERVED_AREA_ID)
1990 			continue;
1991 
1992 		if (address >= area->base && address < area->base + area->size) {
1993 			id = area->id;
1994 			break;
1995 		}
1996 	}
1997 
1998 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
1999 	vm_put_address_space(addressSpace);
2000 
2001 	return id;
2002 }
2003 
2004 
2005 static void
2006 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end)
2007 {
2008 	// free all physical pages in the specified range
2009 
2010 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
2011 		addr_t physicalAddress;
2012 		uint32 flags;
2013 
2014 		if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) {
2015 			vm_page *page = vm_lookup_page(current / B_PAGE_SIZE);
2016 			if (page != NULL)
2017 				vm_page_set_state(page, PAGE_STATE_FREE);
2018 		}
2019 	}
2020 
2021 	// unmap the memory
2022 	map->ops->unmap(map, start, end - 1);
2023 }
2024 
2025 
2026 void
2027 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
2028 {
2029 	vm_translation_map *map = &kernel_aspace->translation_map;
2030 	addr_t end = start + size;
2031 	addr_t lastEnd = start;
2032 	vm_area *area;
2033 
2034 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end));
2035 
2036 	// The areas are sorted in virtual address space order, so
2037 	// we just have to find the holes between them that fall
2038 	// into the area we should dispose
2039 
2040 	map->ops->lock(map);
2041 
2042 	for (area = kernel_aspace->areas; area; area = area->address_space_next) {
2043 		addr_t areaStart = area->base;
2044 		addr_t areaEnd = areaStart + area->size;
2045 
2046 		if (area->id == RESERVED_AREA_ID)
2047 			continue;
2048 
2049 		if (areaEnd >= end) {
2050 			// we are done, the areas are already beyond of what we have to free
2051 			lastEnd = end;
2052 			break;
2053 		}
2054 
2055 		if (areaStart > lastEnd) {
2056 			// this is something we can free
2057 			TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart));
2058 			unmap_and_free_physical_pages(map, lastEnd, areaStart);
2059 		}
2060 
2061 		lastEnd = areaEnd;
2062 	}
2063 
2064 	if (lastEnd < end) {
2065 		// we can also get rid of some space at the end of the area
2066 		TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end));
2067 		unmap_and_free_physical_pages(map, lastEnd, end);
2068 	}
2069 
2070 	map->ops->unlock(map);
2071 }
2072 
2073 
2074 static void
2075 create_preloaded_image_areas(struct preloaded_image *image)
2076 {
2077 	char name[B_OS_NAME_LENGTH];
2078 	void *address;
2079 	int32 length;
2080 
2081 	// use file name to create a good area name
2082 	char *fileName = strrchr(image->name, '/');
2083 	if (fileName == NULL)
2084 		fileName = image->name;
2085 	else
2086 		fileName++;
2087 
2088 	length = strlen(fileName);
2089 	// make sure there is enough space for the suffix
2090 	if (length > 25)
2091 		length = 25;
2092 
2093 	memcpy(name, fileName, length);
2094 	strcpy(name + length, "_text");
2095 	address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE);
2096 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2097 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
2098 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2099 
2100 	strcpy(name + length, "_data");
2101 	address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE);
2102 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
2103 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
2104 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2105 }
2106 
2107 
2108 /**	Frees all previously kernel arguments areas from the kernel_args structure.
2109  *	Any boot loader resources contained in that arguments must not be accessed
2110  *	anymore past this point.
2111  */
2112 
2113 void
2114 vm_free_kernel_args(kernel_args *args)
2115 {
2116 	uint32 i;
2117 
2118 	TRACE(("vm_free_kernel_args()\n"));
2119 
2120 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2121 		area_id area = area_for((void *)args->kernel_args_range[i].start);
2122 		if (area >= B_OK)
2123 			delete_area(area);
2124 	}
2125 }
2126 
2127 
2128 static void
2129 allocate_kernel_args(kernel_args *args)
2130 {
2131 	uint32 i;
2132 
2133 	TRACE(("allocate_kernel_args()\n"));
2134 
2135 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
2136 		void *address = (void *)args->kernel_args_range[i].start;
2137 
2138 		create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size,
2139 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2140 	}
2141 }
2142 
2143 
2144 static void
2145 unreserve_boot_loader_ranges(kernel_args *args)
2146 {
2147 	uint32 i;
2148 
2149 	TRACE(("unreserve_boot_loader_ranges()\n"));
2150 
2151 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2152 		vm_unreserve_address_range(vm_kernel_address_space_id(),
2153 			(void *)args->virtual_allocated_range[i].start,
2154 			args->virtual_allocated_range[i].size);
2155 	}
2156 }
2157 
2158 
2159 static void
2160 reserve_boot_loader_ranges(kernel_args *args)
2161 {
2162 	uint32 i;
2163 
2164 	TRACE(("reserve_boot_loader_ranges()\n"));
2165 
2166 	for (i = 0; i < args->num_virtual_allocated_ranges; i++) {
2167 		void *address = (void *)args->virtual_allocated_range[i].start;
2168 
2169 		// If the address is no kernel address, we just skip it. The
2170 		// architecture specific code has to deal with it.
2171 		if (!IS_KERNEL_ADDRESS(address)) {
2172 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
2173 				address, args->virtual_allocated_range[i].size);
2174 			continue;
2175 		}
2176 
2177 		status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address,
2178 			B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
2179 		if (status < B_OK)
2180 			panic("could not reserve boot loader ranges\n");
2181 	}
2182 }
2183 
2184 
2185 status_t
2186 vm_init(kernel_args *args)
2187 {
2188 	struct preloaded_image *image;
2189 	addr_t heap_base;
2190 	void *address;
2191 	status_t err = 0;
2192 	uint32 i;
2193 
2194 	TRACE(("vm_init: entry\n"));
2195 	err = arch_vm_translation_map_init(args);
2196 	err = arch_vm_init(args);
2197 
2198 	// initialize some globals
2199 	sNextAreaID = 1;
2200 	sAreaHashLock = -1;
2201 	sAvailableMemoryLock.sem = -1;
2202 
2203 	// map in the new heap and initialize it
2204 	heap_base = vm_alloc_from_kernel_args(args, HEAP_SIZE, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2205 	TRACE(("heap at 0x%lx\n", heap_base));
2206 	heap_init(heap_base);
2207 
2208 	// initialize the free page list and physical page mapper
2209 	vm_page_init(args);
2210 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
2211 
2212 	// initialize the hash table that stores the pages mapped to caches
2213 	vm_cache_init(args);
2214 
2215 	{
2216 		vm_area *area;
2217 		sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area,
2218 			&area_compare, &area_hash);
2219 		if (sAreaHash == NULL)
2220 			panic("vm_init: error creating aspace hash table\n");
2221 	}
2222 
2223 	vm_address_space_init();
2224 	reserve_boot_loader_ranges(args);
2225 
2226 	// do any further initialization that the architecture dependant layers may need now
2227 	arch_vm_translation_map_init_post_area(args);
2228 	arch_vm_init_post_area(args);
2229 	vm_page_init_post_area(args);
2230 
2231 	// allocate areas to represent stuff that already exists
2232 
2233 	address = (void *)ROUNDOWN(heap_base, B_PAGE_SIZE);
2234 	create_area("kernel heap", &address, B_EXACT_ADDRESS, HEAP_SIZE,
2235 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2236 
2237 	allocate_kernel_args(args);
2238 
2239 	args->kernel_image.name = "kernel";
2240 		// the lazy boot loader currently doesn't set the kernel's name...
2241 	create_preloaded_image_areas(&args->kernel_image);
2242 
2243 	// allocate areas for preloaded images
2244 	for (image = args->preloaded_images; image != NULL; image = image->next) {
2245 		create_preloaded_image_areas(image);
2246 	}
2247 
2248 	// allocate kernel stacks
2249 	for (i = 0; i < args->num_cpus; i++) {
2250 		char name[64];
2251 
2252 		sprintf(name, "idle thread %lu kstack", i + 1);
2253 		address = (void *)args->cpu_kstack[i].start;
2254 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
2255 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2256 	}
2257 
2258 	// add some debugger commands
2259 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
2260 	add_debugger_command("area", &dump_area, "Dump info about a particular area");
2261 	add_debugger_command("cache_ref", &dump_cache_ref, "Dump cache_ref data structure");
2262 	add_debugger_command("cache", &dump_cache, "Dump cache_ref data structure");
2263 //	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
2264 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
2265 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
2266 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
2267 
2268 	TRACE(("vm_init: exit\n"));
2269 
2270 	return err;
2271 }
2272 
2273 
2274 status_t
2275 vm_init_post_sem(kernel_args *args)
2276 {
2277 	vm_area *area;
2278 
2279 	// This frees all unused boot loader resources and makes its space available again
2280 	arch_vm_init_end(args);
2281 	unreserve_boot_loader_ranges(args);
2282 
2283 	// fill in all of the semaphores that were not allocated before
2284 	// since we're still single threaded and only the kernel address space exists,
2285 	// it isn't that hard to find all of the ones we need to create
2286 
2287 	benaphore_init(&sAvailableMemoryLock, "available memory lock");
2288 	arch_vm_translation_map_init_post_sem(args);
2289 	vm_address_space_init_post_sem();
2290 
2291 	for (area = kernel_aspace->areas; area; area = area->address_space_next) {
2292 		if (area->id == RESERVED_AREA_ID)
2293 			continue;
2294 
2295 		if (area->cache_ref->lock.sem < 0)
2296 			mutex_init(&area->cache_ref->lock, "cache_ref_mutex");
2297 	}
2298 
2299 	sAreaHashLock = create_sem(WRITE_COUNT, "area hash");
2300 
2301 	return heap_init_post_sem(args);
2302 }
2303 
2304 
2305 status_t
2306 vm_init_post_thread(kernel_args *args)
2307 {
2308 	vm_page_init_post_thread(args);
2309 	vm_daemon_init();
2310 	vm_low_memory_init();
2311 
2312 	return heap_init_post_thread(args);
2313 }
2314 
2315 
2316 status_t
2317 vm_init_post_modules(kernel_args *args)
2318 {
2319 	return arch_vm_init_post_modules(args);
2320 }
2321 
2322 
2323 void
2324 permit_page_faults(void)
2325 {
2326 	struct thread *thread = thread_get_current_thread();
2327 	if (thread != NULL)
2328 		atomic_add(&thread->page_faults_allowed, 1);
2329 }
2330 
2331 
2332 void
2333 forbid_page_faults(void)
2334 {
2335 	struct thread *thread = thread_get_current_thread();
2336 	if (thread != NULL)
2337 		atomic_add(&thread->page_faults_allowed, -1);
2338 }
2339 
2340 
2341 status_t
2342 vm_page_fault(addr_t address, addr_t fault_address, bool is_write, bool is_user, addr_t *newip)
2343 {
2344 	int err;
2345 
2346 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, fault_address));
2347 
2348 	*newip = 0;
2349 
2350 	err = vm_soft_fault(address, is_write, is_user);
2351 	if (err < 0) {
2352 		dprintf("vm_page_fault: vm_soft_fault returned error %d on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
2353 			err, address, fault_address, is_write, is_user, thread_get_current_thread_id());
2354 		if (!is_user) {
2355 			struct thread *t = thread_get_current_thread();
2356 			if (t && t->fault_handler != 0) {
2357 				// this will cause the arch dependant page fault handler to
2358 				// modify the IP on the interrupt frame or whatever to return
2359 				// to this address
2360 				*newip = t->fault_handler;
2361 			} else {
2362 				// unhandled page fault in the kernel
2363 				panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n",
2364 					address, fault_address);
2365 			}
2366 		} else {
2367 #if 1
2368 			// ToDo: remove me once we have proper userland debugging support (and tools)
2369 			vm_address_space *addressSpace = vm_get_current_user_address_space();
2370 			vm_area *area;
2371 
2372 			acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
2373 			area = vm_area_lookup(addressSpace, fault_address);
2374 
2375 			dprintf("vm_page_fault: sending team \"%s\" 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n",
2376 				thread_get_current_thread()->team->name,
2377 				thread_get_current_thread()->team->id, fault_address,
2378 				area ? area->name : "???", fault_address - (area ? area->base : 0x0));
2379 
2380 // We can print a stack trace of the userland thread here.
2381 #if 0
2382 			if (area) {
2383 				struct stack_frame {
2384 					#ifdef __INTEL__
2385 						struct stack_frame*	previous;
2386 						void*				return_address;
2387 					#else
2388 						// ...
2389 					#endif
2390 				};
2391 				struct iframe *iframe = i386_get_user_iframe();
2392 				if (iframe == NULL)
2393 					panic("iframe is NULL!");
2394 
2395 				struct stack_frame frame;
2396 				status_t status = user_memcpy(&frame, (void *)iframe->ebp,
2397 					sizeof(struct stack_frame));
2398 
2399 				dprintf("stack trace:\n");
2400 				while (status == B_OK) {
2401 					dprintf("  0x%p", frame.return_address);
2402 					area = vm_area_lookup(addressSpace,
2403 						(addr_t)frame.return_address);
2404 					if (area) {
2405 						dprintf(" (%s + %#lx)", area->name,
2406 							(addr_t)frame.return_address - area->base);
2407 					}
2408 					dprintf("\n");
2409 
2410 					status = user_memcpy(&frame, frame.previous,
2411 						sizeof(struct stack_frame));
2412 				}
2413 			}
2414 #endif	// 0 (stack trace)
2415 
2416 			release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2417 			vm_put_address_space(addressSpace);
2418 #endif
2419 			if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV))
2420 				send_signal(team_get_current_team_id(), SIGSEGV);
2421 		}
2422 	}
2423 
2424 	return B_HANDLED_INTERRUPT;
2425 }
2426 
2427 
2428 static status_t
2429 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser)
2430 {
2431 	vm_address_space *addressSpace;
2432 	vm_area *area;
2433 	vm_cache_ref *cache_ref;
2434 	vm_cache_ref *last_cache_ref;
2435 	vm_cache_ref *top_cache_ref;
2436 	off_t cacheOffset;
2437 	vm_page dummy_page;
2438 	vm_page *page = NULL;
2439 	addr_t address;
2440 	int change_count;
2441 	int err;
2442 
2443 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
2444 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
2445 
2446 	address = ROUNDOWN(originalAddress, B_PAGE_SIZE);
2447 
2448 	if (IS_KERNEL_ADDRESS(address)) {
2449 		addressSpace = vm_get_kernel_address_space();
2450 	} else if (IS_USER_ADDRESS(address)) {
2451 		addressSpace = vm_get_current_user_address_space();
2452 		if (addressSpace == NULL) {
2453 			if (isUser == false) {
2454 				dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n");
2455 				return B_BAD_ADDRESS;
2456 			} else {
2457 				// XXX weird state.
2458 				panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n");
2459 			}
2460 		}
2461 	} else {
2462 		// the hit was probably in the 64k DMZ between kernel and user space
2463 		// this keeps a user space thread from passing a buffer that crosses into kernel space
2464 		return B_BAD_ADDRESS;
2465 	}
2466 
2467 	atomic_add(&addressSpace->fault_count, 1);
2468 
2469 	// Get the area the fault was in
2470 
2471 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
2472 	area = vm_area_lookup(addressSpace, address);
2473 	if (area == NULL) {
2474 		release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2475 		vm_put_address_space(addressSpace);
2476 		dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n",
2477 			originalAddress);
2478 		return B_BAD_ADDRESS;
2479 	}
2480 
2481 	// check permissions
2482 	if (isUser && (area->protection & B_USER_PROTECTION) == 0) {
2483 		release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2484 		vm_put_address_space(addressSpace);
2485 		dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress);
2486 		return B_PERMISSION_DENIED;
2487 	}
2488 	if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
2489 		release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2490 		vm_put_address_space(addressSpace);
2491 		dprintf("write access attempted on read-only area 0x%lx at %p\n",
2492 			area->id, (void *)originalAddress);
2493 		return B_PERMISSION_DENIED;
2494 	}
2495 
2496 	// We have the area, it was a valid access, so let's try to resolve the page fault now.
2497 	// At first, the top most cache from the area is investigated
2498 
2499 	top_cache_ref = area->cache_ref;
2500 	cacheOffset = address - area->base + area->cache_offset;
2501 	vm_cache_acquire_ref(top_cache_ref);
2502 	change_count = addressSpace->change_count;
2503 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2504 
2505 	// See if this cache has a fault handler - this will do all the work for us
2506 	if (top_cache_ref->cache->store->ops->fault != NULL) {
2507 		// Note, since the page fault is resolved with interrupts enabled, the
2508 		// fault handler could be called more than once for the same reason -
2509 		// the store must take this into account
2510 		status_t status = (*top_cache_ref->cache->store->ops->fault)(top_cache_ref->cache->store, addressSpace, cacheOffset);
2511 		if (status != B_BAD_HANDLER) {
2512 			vm_cache_release_ref(top_cache_ref);
2513 			vm_put_address_space(addressSpace);
2514 			return status;
2515 		}
2516 	}
2517 
2518 	// The top most cache has no fault handler, so let's see if the cache or its sources
2519 	// already have the page we're searching for (we're going from top to bottom)
2520 
2521 	dummy_page.state = PAGE_STATE_INACTIVE;
2522 	dummy_page.type = PAGE_TYPE_DUMMY;
2523 
2524 	last_cache_ref = top_cache_ref;
2525 	for (cache_ref = top_cache_ref; cache_ref; cache_ref = (cache_ref->cache->source) ? cache_ref->cache->source->ref : NULL) {
2526 		mutex_lock(&cache_ref->lock);
2527 
2528 		for (;;) {
2529 			page = vm_cache_lookup_page(cache_ref, cacheOffset);
2530 			if (page != NULL && page->state != PAGE_STATE_BUSY) {
2531 				vm_page_set_state(page, PAGE_STATE_BUSY);
2532 				mutex_unlock(&cache_ref->lock);
2533 				break;
2534 			}
2535 
2536 			if (page == NULL)
2537 				break;
2538 
2539 			// page must be busy
2540 			// ToDo: don't wait forever!
2541 			mutex_unlock(&cache_ref->lock);
2542 			snooze(20000);
2543 			mutex_lock(&cache_ref->lock);
2544 		}
2545 
2546 		if (page != NULL)
2547 			break;
2548 
2549 		// The current cache does not contain the page we're looking for
2550 
2551 		// If we're at the top most cache, insert the dummy page here to keep other threads
2552 		// from faulting on the same address and chasing us up the cache chain
2553 		if (cache_ref == top_cache_ref) {
2554 			dummy_page.state = PAGE_STATE_BUSY;
2555 			vm_cache_insert_page(cache_ref, &dummy_page, cacheOffset);
2556 		}
2557 
2558 		// see if the vm_store has it
2559 		if (cache_ref->cache->store->ops->has_page != NULL
2560 			&& cache_ref->cache->store->ops->has_page(cache_ref->cache->store, cacheOffset)) {
2561 			size_t bytesRead;
2562 			iovec vec;
2563 
2564 			vec.iov_len = bytesRead = B_PAGE_SIZE;
2565 
2566 			mutex_unlock(&cache_ref->lock);
2567 
2568 			page = vm_page_allocate_page(PAGE_STATE_FREE);
2569 			addressSpace->translation_map.ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE, (addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT);
2570 			// ToDo: handle errors here
2571 			err = cache_ref->cache->store->ops->read(cache_ref->cache->store, cacheOffset, &vec, 1, &bytesRead);
2572 			addressSpace->translation_map.ops->put_physical_page((addr_t)vec.iov_base);
2573 
2574 			mutex_lock(&cache_ref->lock);
2575 
2576 			if (cache_ref == top_cache_ref) {
2577 				vm_cache_remove_page(cache_ref, &dummy_page);
2578 				dummy_page.state = PAGE_STATE_INACTIVE;
2579 			}
2580 			vm_cache_insert_page(cache_ref, page, cacheOffset);
2581 			mutex_unlock(&cache_ref->lock);
2582 			break;
2583 		}
2584 		mutex_unlock(&cache_ref->lock);
2585 		last_cache_ref = cache_ref;
2586 	}
2587 
2588 	if (!cache_ref) {
2589 		// We rolled off the end of the cache chain, so we need to decide which
2590 		// cache will get the new page we're about to create.
2591 
2592 		cache_ref = isWrite ? top_cache_ref : last_cache_ref;
2593 			// Read-only pages come in the deepest cache - only the
2594 			// top most cache may have direct write access.
2595 	}
2596 
2597 	if (page == NULL) {
2598 		// we still haven't found a page, so we allocate a clean one
2599 		page = vm_page_allocate_page(PAGE_STATE_CLEAR);
2600 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number));
2601 
2602 		// Insert the new page into our cache, and replace it with the dummy page if necessary
2603 
2604 		mutex_lock(&cache_ref->lock);
2605 
2606 		// if we inserted a dummy page into this cache, we have to remove it now
2607 		if (dummy_page.state == PAGE_STATE_BUSY && dummy_page.cache == cache_ref->cache) {
2608 			vm_cache_remove_page(cache_ref, &dummy_page);
2609 			dummy_page.state = PAGE_STATE_INACTIVE;
2610 		}
2611 
2612 		vm_cache_insert_page(cache_ref, page, cacheOffset);
2613 		mutex_unlock(&cache_ref->lock);
2614 
2615 		if (dummy_page.state == PAGE_STATE_BUSY) {
2616 			// we had inserted the dummy cache in another cache, so let's remove it from there
2617 			vm_cache_ref *temp_cache = dummy_page.cache->ref;
2618 			mutex_lock(&temp_cache->lock);
2619 			vm_cache_remove_page(temp_cache, &dummy_page);
2620 			mutex_unlock(&temp_cache->lock);
2621 			dummy_page.state = PAGE_STATE_INACTIVE;
2622 		}
2623 	}
2624 
2625 	// We now have the page and a cache it belongs to - we now need to make
2626 	// sure that the area's cache can access it, too, and sees the correct data
2627 
2628 	if (page->cache != top_cache_ref->cache && isWrite) {
2629 		// now we have a page that has the data we want, but in the wrong cache object
2630 		// so we need to copy it and stick it into the top cache
2631 		vm_page *src_page = page;
2632 		void *src, *dest;
2633 
2634 		// ToDo: if memory is low, it might be a good idea to steal the page
2635 		//	from our source cache - if possible, that is
2636 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
2637 		page = vm_page_allocate_page(PAGE_STATE_FREE);
2638 
2639 		// try to get a mapping for the src and dest page so we can copy it
2640 		for (;;) {
2641 			(*addressSpace->translation_map.ops->get_physical_page)(src_page->physical_page_number * B_PAGE_SIZE, (addr_t *)&src, PHYSICAL_PAGE_CAN_WAIT);
2642 			err = (*addressSpace->translation_map.ops->get_physical_page)(page->physical_page_number * B_PAGE_SIZE, (addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT);
2643 			if (err == B_NO_ERROR)
2644 				break;
2645 
2646 			// it couldn't map the second one, so sleep and retry
2647 			// keeps an extremely rare deadlock from occuring
2648 			(*addressSpace->translation_map.ops->put_physical_page)((addr_t)src);
2649 			snooze(5000);
2650 		}
2651 
2652 		memcpy(dest, src, B_PAGE_SIZE);
2653 		(*addressSpace->translation_map.ops->put_physical_page)((addr_t)src);
2654 		(*addressSpace->translation_map.ops->put_physical_page)((addr_t)dest);
2655 
2656 		vm_page_set_state(src_page, PAGE_STATE_ACTIVE);
2657 
2658 		mutex_lock(&top_cache_ref->lock);
2659 
2660 		// Insert the new page into our cache, and replace it with the dummy page if necessary
2661 
2662 		// if we inserted a dummy page into this cache, we have to remove it now
2663 		if (dummy_page.state == PAGE_STATE_BUSY && dummy_page.cache == top_cache_ref->cache) {
2664 			vm_cache_remove_page(top_cache_ref, &dummy_page);
2665 			dummy_page.state = PAGE_STATE_INACTIVE;
2666 		}
2667 
2668 		vm_cache_insert_page(top_cache_ref, page, cacheOffset);
2669 		mutex_unlock(&top_cache_ref->lock);
2670 
2671 		if (dummy_page.state == PAGE_STATE_BUSY) {
2672 			// we had inserted the dummy cache in another cache, so let's remove it from there
2673 			vm_cache_ref *temp_cache = dummy_page.cache->ref;
2674 			mutex_lock(&temp_cache->lock);
2675 			vm_cache_remove_page(temp_cache, &dummy_page);
2676 			mutex_unlock(&temp_cache->lock);
2677 			dummy_page.state = PAGE_STATE_INACTIVE;
2678 		}
2679 	}
2680 
2681 	err = B_OK;
2682 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
2683 	if (change_count != addressSpace->change_count) {
2684 		// something may have changed, see if the address is still valid
2685 		area = vm_area_lookup(addressSpace, address);
2686 		if (area == NULL
2687 			|| area->cache_ref != top_cache_ref
2688 			|| (address - area->base + area->cache_offset) != cacheOffset) {
2689 			dprintf("vm_soft_fault: address space layout changed effecting ongoing soft fault\n");
2690 			err = B_BAD_ADDRESS;
2691 		}
2692 	}
2693 
2694 	if (err == B_OK) {
2695 		// All went fine, all there is left to do is to map the page into the address space
2696 
2697 		// If the page doesn't reside in the area's cache, we need to make sure it's
2698 		// mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write)
2699 		uint32 newProtection = area->protection;
2700 		if (page->cache != top_cache_ref->cache && !isWrite)
2701 			newProtection &= ~(isUser ? B_WRITE_AREA : B_KERNEL_WRITE_AREA);
2702 
2703 		atomic_add(&page->ref_count, 1);
2704 		(*addressSpace->translation_map.ops->lock)(&addressSpace->translation_map);
2705 		(*addressSpace->translation_map.ops->map)(&addressSpace->translation_map, address,
2706 			page->physical_page_number * B_PAGE_SIZE, newProtection);
2707 		(*addressSpace->translation_map.ops->unlock)(&addressSpace->translation_map);
2708 	}
2709 
2710 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
2711 
2712 	if (dummy_page.state == PAGE_STATE_BUSY) {
2713 		// We still have the dummy page in the cache - that happens if we didn't need
2714 		// to allocate a new page before, but could use one in another cache
2715 		vm_cache_ref *temp_cache = dummy_page.cache->ref;
2716 		mutex_lock(&temp_cache->lock);
2717 		vm_cache_remove_page(temp_cache, &dummy_page);
2718 		mutex_unlock(&temp_cache->lock);
2719 		dummy_page.state = PAGE_STATE_INACTIVE;
2720 	}
2721 
2722 	vm_page_set_state(page, PAGE_STATE_ACTIVE);
2723 
2724 	vm_cache_release_ref(top_cache_ref);
2725 	vm_put_address_space(addressSpace);
2726 
2727 	return err;
2728 }
2729 
2730 
2731 static vm_area *
2732 vm_area_lookup(vm_address_space *addressSpace, addr_t address)
2733 {
2734 	vm_area *area;
2735 
2736 	// check the areas list first
2737 	area = addressSpace->area_hint;
2738 	if (area && area->base <= address && (area->base + area->size) > address)
2739 		return area;
2740 
2741 	for (area = addressSpace->areas; area != NULL; area = area->address_space_next) {
2742 		if (area->id == RESERVED_AREA_ID)
2743 			continue;
2744 
2745 		if (area->base <= address && (area->base + area->size) > address)
2746 			break;
2747 	}
2748 
2749 	if (area)
2750 		addressSpace->area_hint = area;
2751 	return area;
2752 }
2753 
2754 
2755 status_t
2756 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, uint32 flags)
2757 {
2758 	return (*kernel_aspace->translation_map.ops->get_physical_page)(paddr, _vaddr, flags);
2759 }
2760 
2761 
2762 status_t
2763 vm_put_physical_page(addr_t vaddr)
2764 {
2765 	return (*kernel_aspace->translation_map.ops->put_physical_page)(vaddr);
2766 }
2767 
2768 
2769 void
2770 vm_unreserve_memory(size_t amount)
2771 {
2772 	benaphore_lock(&sAvailableMemoryLock);
2773 
2774 	sAvailableMemory += amount;
2775 
2776 	benaphore_unlock(&sAvailableMemoryLock);
2777 }
2778 
2779 
2780 status_t
2781 vm_try_reserve_memory(size_t amount)
2782 {
2783 	status_t status;
2784 	benaphore_lock(&sAvailableMemoryLock);
2785 
2786 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
2787 
2788 	if (sAvailableMemory > amount) {
2789 		sAvailableMemory -= amount;
2790 		status = B_OK;
2791 	} else
2792 		status = B_NO_MEMORY;
2793 
2794 	benaphore_unlock(&sAvailableMemoryLock);
2795 	return status;
2796 }
2797 
2798 
2799 status_t
2800 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
2801 {
2802 	vm_area *area = vm_get_area(id);
2803 	if (area == NULL)
2804 		return B_BAD_VALUE;
2805 
2806 	status_t status = arch_vm_set_memory_type(area, physicalBase, type);
2807 
2808 	vm_put_area(area);
2809 	return status;
2810 }
2811 
2812 
2813 /**	This function enforces some protection properties:
2814  *	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
2815  *	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
2816  *	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
2817  *	   and B_KERNEL_WRITE_AREA.
2818  */
2819 
2820 static void
2821 fix_protection(uint32 *protection)
2822 {
2823 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
2824 		if ((*protection & B_USER_PROTECTION) == 0
2825 			|| (*protection & B_WRITE_AREA) != 0)
2826 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
2827 		else
2828 			*protection |= B_KERNEL_READ_AREA;
2829 	}
2830 }
2831 
2832 
2833 //	#pragma mark -
2834 
2835 
2836 status_t
2837 user_memcpy(void *to, const void *from, size_t size)
2838 {
2839 	return arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler);
2840 }
2841 
2842 
2843 /**	\brief Copies at most (\a size - 1) characters from the string in \a from to
2844  *	the string in \a to, NULL-terminating the result.
2845  *
2846  *	\param to Pointer to the destination C-string.
2847  *	\param from Pointer to the source C-string.
2848  *	\param size Size in bytes of the string buffer pointed to by \a to.
2849  *
2850  *	\return strlen(\a from).
2851  */
2852 
2853 ssize_t
2854 user_strlcpy(char *to, const char *from, size_t size)
2855 {
2856 	return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler);
2857 }
2858 
2859 
2860 status_t
2861 user_memset(void *s, char c, size_t count)
2862 {
2863 	return arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler);
2864 }
2865 
2866 
2867 //	#pragma mark -
2868 
2869 
2870 long
2871 lock_memory(void *address, ulong numBytes, ulong flags)
2872 {
2873 	vm_address_space *addressSpace = NULL;
2874 	struct vm_translation_map *map;
2875 	addr_t base = (addr_t)address;
2876 	addr_t end = base + numBytes;
2877 	bool isUser = IS_USER_ADDRESS(address);
2878 
2879 	// ToDo: Our VM currently doesn't support locking, this function
2880 	//	will now at least make sure that the memory is paged in, but
2881 	//	that's about it.
2882 	//	Nevertheless, it must be implemented as soon as we're able to
2883 	//	swap pages out of memory.
2884 
2885 	// ToDo: this is a hack, too; the iospace area is a null region and
2886 	//	officially cannot be written to or read; ie. vm_soft_fault() will
2887 	//	fail there. Furthermore, this is x86 specific as well.
2888 	#define IOSPACE_SIZE (256 * 1024 * 1024)
2889 	if (base >= KERNEL_BASE + IOSPACE_SIZE && base + numBytes < KERNEL_BASE + 2 * IOSPACE_SIZE)
2890 		return B_OK;
2891 
2892 	if (isUser)
2893 		addressSpace = vm_get_current_user_address_space();
2894 	else
2895 		addressSpace = vm_get_kernel_address_space();
2896 	if (addressSpace == NULL)
2897 		return B_ERROR;
2898 
2899 	map = &addressSpace->translation_map;
2900 
2901 	for (; base < end; base += B_PAGE_SIZE) {
2902 		addr_t physicalAddress;
2903 		uint32 protection;
2904 		status_t status;
2905 
2906 		map->ops->lock(map);
2907 		map->ops->query(map, base, &physicalAddress, &protection);
2908 		map->ops->unlock(map);
2909 
2910 		if ((protection & PAGE_PRESENT) != 0) {
2911 			// if B_READ_DEVICE is set, the caller intents to write to the locked
2912 			// memory, so if it hasn't been mapped writable, we'll try the soft
2913 			// fault anyway
2914 			if ((flags & B_READ_DEVICE) == 0
2915 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0)
2916 			continue;
2917 		}
2918 
2919 		status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser);
2920 		if (status != B_OK)	{
2921 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n",
2922 				address, numBytes, flags, strerror(status));
2923 			vm_put_address_space(addressSpace);
2924 			return status;
2925 		}
2926 	}
2927 
2928 	vm_put_address_space(addressSpace);
2929 	return B_OK;
2930 }
2931 
2932 
2933 long
2934 unlock_memory(void *buffer, ulong numBytes, ulong flags)
2935 {
2936 	return B_OK;
2937 }
2938 
2939 
2940 /** According to the BeBook, this function should always succeed.
2941  *	This is no longer the case.
2942  */
2943 
2944 long
2945 get_memory_map(const void *address, ulong numBytes, physical_entry *table, long numEntries)
2946 {
2947 	vm_address_space *addressSpace;
2948 	addr_t virtualAddress = (addr_t)address;
2949 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
2950 	addr_t physicalAddress;
2951 	status_t status = B_OK;
2952 	int32 index = -1;
2953 	addr_t offset = 0;
2954 	uint32 flags;
2955 
2956 	TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes, numEntries));
2957 
2958 	if (numEntries == 0 || numBytes == 0)
2959 		return B_BAD_VALUE;
2960 
2961 	// in which address space is the address to be found?
2962 	if (IS_USER_ADDRESS(virtualAddress))
2963 		addressSpace = vm_get_current_user_address_space();
2964 	else
2965 		addressSpace = vm_get_kernel_address_space();
2966 
2967 	if (addressSpace == NULL)
2968 		return B_ERROR;
2969 
2970 	(*addressSpace->translation_map.ops->lock)(&addressSpace->translation_map);
2971 
2972 	while (offset < numBytes) {
2973 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
2974 
2975 		status = (*addressSpace->translation_map.ops->query)(&addressSpace->translation_map,
2976 					(addr_t)address + offset, &physicalAddress, &flags);
2977 		if (status < 0)
2978 			break;
2979 
2980 		if (index < 0 && pageOffset > 0) {
2981 			physicalAddress += pageOffset;
2982 			if (bytes > B_PAGE_SIZE - pageOffset)
2983 				bytes = B_PAGE_SIZE - pageOffset;
2984 		}
2985 
2986 		// need to switch to the next physical_entry?
2987 		if (index < 0 || (addr_t)table[index].address != physicalAddress - table[index].size) {
2988 			if (++index + 1 > numEntries) {
2989 				// table to small
2990 				status = B_BUFFER_OVERFLOW;
2991 				break;
2992 			}
2993 			table[index].address = (void *)physicalAddress;
2994 			table[index].size = bytes;
2995 		} else {
2996 			// page does fit in current entry
2997 			table[index].size += bytes;
2998 		}
2999 
3000 		offset += bytes;
3001 	}
3002 	(*addressSpace->translation_map.ops->unlock)(&addressSpace->translation_map);
3003 
3004 	// close the entry list
3005 
3006 	if (status == B_OK) {
3007 		// if it's only one entry, we will silently accept the missing ending
3008 		if (numEntries == 1)
3009 			return B_OK;
3010 
3011 		if (++index + 1 > numEntries)
3012 			return B_BUFFER_OVERFLOW;
3013 
3014 		table[index].address = NULL;
3015 		table[index].size = 0;
3016 	}
3017 
3018 	return status;
3019 }
3020 
3021 
3022 area_id
3023 area_for(void *address)
3024 {
3025 	return vm_area_for(vm_kernel_address_space_id(), (addr_t)address);
3026 }
3027 
3028 
3029 area_id
3030 find_area(const char *name)
3031 {
3032 	struct hash_iterator iterator;
3033 	vm_area *area;
3034 	area_id id = B_NAME_NOT_FOUND;
3035 
3036 	acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0);
3037 	hash_open(sAreaHash, &iterator);
3038 
3039 	while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) {
3040 		if (area->id == RESERVED_AREA_ID)
3041 			continue;
3042 
3043 		if (!strcmp(area->name, name)) {
3044 			id = area->id;
3045 			break;
3046 		}
3047 	}
3048 
3049 	hash_close(sAreaHash, &iterator, false);
3050 	release_sem_etc(sAreaHashLock, READ_COUNT, 0);
3051 
3052 	return id;
3053 }
3054 
3055 
3056 static void
3057 fill_area_info(struct vm_area *area, area_info *info, size_t size)
3058 {
3059 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
3060 	info->area = area->id;
3061 	info->address = (void *)area->base;
3062 	info->size = area->size;
3063 	info->protection = area->protection & B_USER_PROTECTION;
3064 	info->lock = B_FULL_LOCK;
3065 	info->team = area->address_space->id;
3066 	info->ram_size = area->size;
3067 	info->copy_count = 0;
3068 	info->in_count = 0;
3069 	info->out_count = 0;
3070 		// ToDo: retrieve real values here!
3071 }
3072 
3073 
3074 status_t
3075 _get_area_info(area_id id, area_info *info, size_t size)
3076 {
3077 	vm_area *area;
3078 
3079 	if (size != sizeof(area_info) || info == NULL)
3080 		return B_BAD_VALUE;
3081 
3082 	area = vm_get_area(id);
3083 	if (area == NULL)
3084 		return B_BAD_VALUE;
3085 
3086 	fill_area_info(area, info, size);
3087 	vm_put_area(area);
3088 
3089 	return B_OK;
3090 }
3091 
3092 
3093 status_t
3094 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size)
3095 {
3096 	addr_t nextBase = *(addr_t *)cookie;
3097 	vm_address_space *addressSpace;
3098 	vm_area *area;
3099 
3100 	// we're already through the list
3101 	if (nextBase == (addr_t)-1)
3102 		return B_ENTRY_NOT_FOUND;
3103 
3104 	if (team == B_CURRENT_TEAM)
3105 		team = team_get_current_team_id();
3106 
3107 	if (!team_is_valid(team)
3108 		|| team_get_address_space(team, &addressSpace) != B_OK)
3109 		return B_BAD_VALUE;
3110 
3111 	acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0);
3112 
3113 	for (area = addressSpace->areas; area; area = area->address_space_next) {
3114 		if (area->id == RESERVED_AREA_ID)
3115 			continue;
3116 
3117 		if (area->base > nextBase)
3118 			break;
3119 	}
3120 
3121 	// make sure this area won't go away
3122 	if (area != NULL)
3123 		area = vm_get_area(area->id);
3124 
3125 	release_sem_etc(addressSpace->sem, READ_COUNT, 0);
3126 	vm_put_address_space(addressSpace);
3127 
3128 	if (area == NULL) {
3129 		nextBase = (addr_t)-1;
3130 		return B_ENTRY_NOT_FOUND;
3131 	}
3132 
3133 	fill_area_info(area, info, size);
3134 	*cookie = (int32)(area->base);
3135 
3136 	vm_put_area(area);
3137 
3138 	return B_OK;
3139 }
3140 
3141 
3142 status_t
3143 set_area_protection(area_id area, uint32 newProtection)
3144 {
3145 	fix_protection(&newProtection);
3146 
3147 	return vm_set_area_protection(vm_kernel_address_space_id(), area, newProtection);
3148 }
3149 
3150 
3151 status_t
3152 resize_area(area_id areaID, size_t newSize)
3153 {
3154 	vm_cache_ref *cache;
3155 	vm_area *area, *current;
3156 	status_t status = B_OK;
3157 	size_t oldSize;
3158 
3159 	// is newSize a multiple of B_PAGE_SIZE?
3160 	if (newSize & (B_PAGE_SIZE - 1))
3161 		return B_BAD_VALUE;
3162 
3163 	area = vm_get_area(areaID);
3164 	if (area == NULL)
3165 		return B_BAD_VALUE;
3166 
3167 	// Resize all areas of this area's cache
3168 
3169 	cache = area->cache_ref;
3170 	oldSize = area->size;
3171 
3172 	// ToDo: we should only allow to resize anonymous memory areas!
3173 	if (!cache->cache->temporary) {
3174 		status = B_NOT_ALLOWED;
3175 		goto err1;
3176 	}
3177 
3178 	// ToDo: we must lock all address spaces here!
3179 
3180 	mutex_lock(&cache->lock);
3181 
3182 	if (oldSize < newSize) {
3183 		// We need to check if all areas of this cache can be resized
3184 
3185 		for (current = cache->areas; current; current = current->cache_next) {
3186 			if (current->address_space_next && current->address_space_next->base <= (current->base + newSize)) {
3187 				// if the area was created inside a reserved area, it can also be
3188 				// resized in that area
3189 				// ToDo: if there is free space after the reserved area, it could be used as well...
3190 				vm_area *next = current->address_space_next;
3191 				if (next->id == RESERVED_AREA_ID && next->cache_offset <= current->base
3192 					&& next->base - 1 + next->size >= current->base - 1 + newSize)
3193 					continue;
3194 
3195 				status = B_ERROR;
3196 				goto err2;
3197 			}
3198 		}
3199 	}
3200 
3201 	// Okay, looks good so far, so let's do it
3202 
3203 	for (current = cache->areas; current; current = current->cache_next) {
3204 		if (current->address_space_next && current->address_space_next->base <= (current->base + newSize)) {
3205 			vm_area *next = current->address_space_next;
3206 			if (next->id == RESERVED_AREA_ID && next->cache_offset <= current->base
3207 				&& next->base - 1 + next->size >= current->base - 1 + newSize) {
3208 				// resize reserved area
3209 				addr_t offset = current->base + newSize - next->base;
3210 				if (next->size <= offset) {
3211 					current->address_space_next = next->address_space_next;
3212 					free(next);
3213 				} else {
3214 					next->size -= offset;
3215 					next->base += offset;
3216 				}
3217 			} else {
3218 				status = B_ERROR;
3219 				break;
3220 			}
3221 		}
3222 
3223 		current->size = newSize;
3224 
3225 		// we also need to unmap all pages beyond the new size, if the area has shrinked
3226 		if (newSize < oldSize) {
3227 			vm_translation_map *map = &current->address_space->translation_map;
3228 
3229 			map->ops->lock(map);
3230 			map->ops->unmap(map, current->base + newSize, current->base + oldSize - 1);
3231 			map->ops->unlock(map);
3232 		}
3233 	}
3234 
3235 	if (status == B_OK)
3236 		status = vm_cache_resize(cache, newSize);
3237 
3238 	if (status < B_OK) {
3239 		// This shouldn't really be possible, but hey, who knows
3240 		for (current = cache->areas; current; current = current->cache_next)
3241 			current->size = oldSize;
3242 	}
3243 
3244 err2:
3245 	mutex_unlock(&cache->lock);
3246 err1:
3247 	vm_put_area(area);
3248 
3249 	// ToDo: we must honour the lock restrictions of this area
3250 	return status;
3251 }
3252 
3253 
3254 /**	Transfers the specified area to a new team. The caller must be the owner
3255  *	of the area (not yet enforced but probably should be).
3256  *	This function is currently not exported to the kernel namespace, but is
3257  *	only accessible using the _kern_transfer_area() syscall.
3258  */
3259 
3260 static status_t
3261 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target)
3262 {
3263 	vm_address_space *sourceAddressSpace, *targetAddressSpace;
3264 	vm_translation_map *map;
3265 	vm_area *area, *reserved;
3266 	void *reservedAddress;
3267 	status_t status;
3268 
3269 	area = vm_get_area(id);
3270 	if (area == NULL)
3271 		return B_BAD_VALUE;
3272 
3273 	// ToDo: check if the current team owns the area
3274 
3275 	status = team_get_address_space(target, &targetAddressSpace);
3276 	if (status != B_OK)
3277 		goto err1;
3278 
3279 	// We will first remove the area, and then reserve its former
3280 	// address range so that we can later reclaim it if the
3281 	// transfer failed.
3282 
3283 	sourceAddressSpace = area->address_space;
3284 
3285 	reserved = _vm_create_reserved_region_struct(sourceAddressSpace, 0);
3286 	if (reserved == NULL) {
3287 		status = B_NO_MEMORY;
3288 		goto err2;
3289 	}
3290 
3291 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
3292 
3293 	reservedAddress = (void *)area->base;
3294 	remove_area_from_address_space(sourceAddressSpace, area, true);
3295 	status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS,
3296 		area->size, reserved);
3297 		// famous last words: this cannot fail :)
3298 
3299 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
3300 
3301 	if (status != B_OK)
3302 		goto err3;
3303 
3304 	// unmap the area in the source address space
3305 	map = &sourceAddressSpace->translation_map;
3306 	map->ops->lock(map);
3307 	map->ops->unmap(map, area->base, area->base + (area->size - 1));
3308 	map->ops->unlock(map);
3309 
3310 	// insert the area into the target address space
3311 
3312 	acquire_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0, 0);
3313 	// check to see if this address space has entered DELETE state
3314 	if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) {
3315 		// okay, someone is trying to delete this adress space now, so we can't
3316 		// insert the area, so back out
3317 		status = B_BAD_TEAM_ID;
3318 		goto err4;
3319 	}
3320 
3321 	status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area);
3322 	if (status < B_OK)
3323 		goto err4;
3324 
3325 	// The area was successfully transferred to the new team when we got here
3326 	area->address_space = targetAddressSpace;
3327 
3328 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
3329 
3330 	vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress, area->size);
3331 	vm_put_address_space(sourceAddressSpace);
3332 		// we keep the reference of the target address space for the
3333 		// area, so we only have to put the one from the source
3334 	vm_put_area(area);
3335 
3336 	return B_OK;
3337 
3338 err4:
3339 	release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0);
3340 err3:
3341 	// insert the area again into the source address space
3342 	acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0);
3343 	// check to see if this address space has entered DELETE state
3344 	if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION
3345 		|| insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, area->size, area) != B_OK) {
3346 		// We can't insert the area anymore - we have to delete it manually
3347 		vm_cache_remove_area(area->cache_ref, area);
3348 		vm_cache_release_ref(area->cache_ref);
3349 		free(area->name);
3350 		free(area);
3351 		area = NULL;
3352 	}
3353 	release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0);
3354 err2:
3355 	vm_put_address_space(targetAddressSpace);
3356 err1:
3357 	if (area != NULL)
3358 		vm_put_area(area);
3359 	return status;
3360 }
3361 
3362 
3363 area_id
3364 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes,
3365 	uint32 addressSpec, uint32 protection, void **_virtualAddress)
3366 {
3367 	if (!arch_vm_supports_protection(protection))
3368 		return B_NOT_SUPPORTED;
3369 
3370 	fix_protection(&protection);
3371 
3372 	return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress,
3373 		addressSpec, numBytes, protection, (addr_t)physicalAddress);
3374 }
3375 
3376 
3377 area_id
3378 clone_area(const char *name, void **_address, uint32 addressSpec, uint32 protection,
3379 	area_id source)
3380 {
3381 	if ((protection & B_KERNEL_PROTECTION) == 0)
3382 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
3383 
3384 	return vm_clone_area(vm_kernel_address_space_id(), name, _address, addressSpec,
3385 				protection, REGION_NO_PRIVATE_MAP, source);
3386 }
3387 
3388 
3389 area_id
3390 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec,
3391 	uint32 size, uint32 lock, uint32 protection)
3392 {
3393 	fix_protection(&protection);
3394 
3395 	return vm_create_anonymous_area(team->id, (char *)name, address,
3396 		addressSpec, size, lock, protection);
3397 }
3398 
3399 
3400 area_id
3401 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock,
3402 	uint32 protection)
3403 {
3404 	fix_protection(&protection);
3405 
3406 	return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address,
3407 		addressSpec, size, lock, protection);
3408 }
3409 
3410 
3411 status_t
3412 delete_area_etc(struct team *team, area_id area)
3413 {
3414 	return vm_delete_area(team->id, area);
3415 }
3416 
3417 
3418 status_t
3419 delete_area(area_id area)
3420 {
3421 	return vm_delete_area(vm_kernel_address_space_id(), area);
3422 }
3423 
3424 
3425 //	#pragma mark -
3426 
3427 
3428 status_t
3429 _user_init_heap_address_range(addr_t base, addr_t size)
3430 {
3431 	return vm_reserve_address_range(vm_current_user_address_space_id(), (void **)&base,
3432 		B_EXACT_ADDRESS, size, RESERVED_AVOID_BASE);
3433 }
3434 
3435 
3436 area_id
3437 _user_area_for(void *address)
3438 {
3439 	return vm_area_for(vm_current_user_address_space_id(), (addr_t)address);
3440 }
3441 
3442 
3443 area_id
3444 _user_find_area(const char *userName)
3445 {
3446 	char name[B_OS_NAME_LENGTH];
3447 
3448 	if (!IS_USER_ADDRESS(userName)
3449 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
3450 		return B_BAD_ADDRESS;
3451 
3452 	return find_area(name);
3453 }
3454 
3455 
3456 status_t
3457 _user_get_area_info(area_id area, area_info *userInfo)
3458 {
3459 	area_info info;
3460 	status_t status;
3461 
3462 	if (!IS_USER_ADDRESS(userInfo))
3463 		return B_BAD_ADDRESS;
3464 
3465 	status = get_area_info(area, &info);
3466 	if (status < B_OK)
3467 		return status;
3468 
3469 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
3470 		return B_BAD_ADDRESS;
3471 
3472 	return status;
3473 }
3474 
3475 
3476 status_t
3477 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo)
3478 {
3479 	status_t status;
3480 	area_info info;
3481 	int32 cookie;
3482 
3483 	if (!IS_USER_ADDRESS(userCookie)
3484 		|| !IS_USER_ADDRESS(userInfo)
3485 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
3486 		return B_BAD_ADDRESS;
3487 
3488 	status = _get_next_area_info(team, &cookie, &info, sizeof(area_info));
3489 	if (status != B_OK)
3490 		return status;
3491 
3492 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
3493 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
3494 		return B_BAD_ADDRESS;
3495 
3496 	return status;
3497 }
3498 
3499 
3500 status_t
3501 _user_set_area_protection(area_id area, uint32 newProtection)
3502 {
3503 	if ((newProtection & ~B_USER_PROTECTION) != 0)
3504 		return B_BAD_VALUE;
3505 
3506 	fix_protection(&newProtection);
3507 
3508 	return vm_set_area_protection(vm_current_user_address_space_id(), area,
3509 		newProtection);
3510 }
3511 
3512 
3513 status_t
3514 _user_resize_area(area_id area, size_t newSize)
3515 {
3516 	// ToDo: Since we restrict deleting of areas to those owned by the team,
3517 	// we should also do that for resizing (check other functions, too).
3518 	return resize_area(area, newSize);
3519 }
3520 
3521 
3522 status_t
3523 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target)
3524 {
3525 	status_t status;
3526 	void *address;
3527 
3528 	// filter out some unavailable values (for userland)
3529 	switch (addressSpec) {
3530 		case B_ANY_KERNEL_ADDRESS:
3531 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3532 			return B_BAD_VALUE;
3533 	}
3534 
3535 	if (!IS_USER_ADDRESS(userAddress)
3536 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3537 		return B_BAD_ADDRESS;
3538 
3539 	status = transfer_area(area, &address, addressSpec, target);
3540 	if (status < B_OK)
3541 		return status;
3542 
3543 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
3544 		return B_BAD_ADDRESS;
3545 
3546 	return status;
3547 }
3548 
3549 
3550 area_id
3551 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec,
3552 	uint32 protection, area_id sourceArea)
3553 {
3554 	char name[B_OS_NAME_LENGTH];
3555 	void *address;
3556 	area_id clonedArea;
3557 
3558 	// filter out some unavailable values (for userland)
3559 	switch (addressSpec) {
3560 		case B_ANY_KERNEL_ADDRESS:
3561 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3562 			return B_BAD_VALUE;
3563 	}
3564 	if ((protection & ~B_USER_PROTECTION) != 0)
3565 		return B_BAD_VALUE;
3566 
3567 	if (!IS_USER_ADDRESS(userName)
3568 		|| !IS_USER_ADDRESS(userAddress)
3569 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
3570 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3571 		return B_BAD_ADDRESS;
3572 
3573 	fix_protection(&protection);
3574 
3575 	clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address,
3576 		addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea);
3577 	if (clonedArea < B_OK)
3578 		return clonedArea;
3579 
3580 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
3581 		delete_area(clonedArea);
3582 		return B_BAD_ADDRESS;
3583 	}
3584 
3585 	return clonedArea;
3586 }
3587 
3588 
3589 area_id
3590 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec,
3591 	size_t size, uint32 lock, uint32 protection)
3592 {
3593 	char name[B_OS_NAME_LENGTH];
3594 	area_id area;
3595 	void *address;
3596 
3597 	// filter out some unavailable values (for userland)
3598 	switch (addressSpec) {
3599 		case B_ANY_KERNEL_ADDRESS:
3600 		case B_ANY_KERNEL_BLOCK_ADDRESS:
3601 			return B_BAD_VALUE;
3602 	}
3603 	if ((protection & ~B_USER_PROTECTION) != 0)
3604 		return B_BAD_VALUE;
3605 
3606 	if (!IS_USER_ADDRESS(userName)
3607 		|| !IS_USER_ADDRESS(userAddress)
3608 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
3609 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
3610 		return B_BAD_ADDRESS;
3611 
3612 	if (addressSpec == B_EXACT_ADDRESS
3613 		&& IS_KERNEL_ADDRESS(address))
3614 		return B_BAD_VALUE;
3615 
3616 	fix_protection(&protection);
3617 
3618 	area = vm_create_anonymous_area(vm_current_user_address_space_id(),
3619 		(char *)name, &address, addressSpec, size, lock, protection);
3620 
3621 	if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
3622 		delete_area(area);
3623 		return B_BAD_ADDRESS;
3624 	}
3625 
3626 	return area;
3627 }
3628 
3629 
3630 status_t
3631 _user_delete_area(area_id area)
3632 {
3633 	// Unlike the BeOS implementation, you can now only delete areas
3634 	// that you have created yourself from userland.
3635 	// The documentation to delete_area() explicetly states that this
3636 	// will be restricted in the future, and so it will.
3637 	return vm_delete_area(vm_current_user_address_space_id(), area);
3638 }
3639 
3640