xref: /haiku/src/system/kernel/arch/x86/arch_vm_translation_map.cpp (revision 03187b607b2b5eec7ee059f1ead09bdba14991fb)
1 /*
2  * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 #include <arch/vm_translation_map.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include <AutoDeleter.h>
16 
17 #include <arch_system_info.h>
18 #include <heap.h>
19 #include <int.h>
20 #include <thread.h>
21 #include <smp.h>
22 #include <util/AutoLock.h>
23 #include <util/queue.h>
24 #include <vm_address_space.h>
25 #include <vm_page.h>
26 #include <vm_priv.h>
27 
28 #include "x86_paging.h"
29 #include "x86_physical_page_mapper.h"
30 
31 
32 //#define TRACE_VM_TMAP
33 #ifdef TRACE_VM_TMAP
34 #	define TRACE(x) dprintf x
35 #else
36 #	define TRACE(x) ;
37 #endif
38 
39 static page_table_entry *sPageHole = NULL;
40 static page_directory_entry *sPageHolePageDir = NULL;
41 static page_directory_entry *sKernelPhysicalPageDirectory = NULL;
42 static page_directory_entry *sKernelVirtualPageDirectory = NULL;
43 
44 
45 // Accessor class to reuse the SinglyLinkedListLink of DeferredDeletable for
46 // vm_translation_map_arch_info.
47 struct ArchTMapGetLink {
48 private:
49 	typedef SinglyLinkedListLink<vm_translation_map_arch_info> Link;
50 
51 public:
52 	inline Link* operator()(vm_translation_map_arch_info* element) const
53 	{
54 		return (Link*)element->GetSinglyLinkedListLink();
55 	}
56 
57 	inline const Link* operator()(
58 		const vm_translation_map_arch_info* element) const
59 	{
60 		return (const Link*)element->GetSinglyLinkedListLink();
61 	}
62 
63 };
64 
65 
66 typedef SinglyLinkedList<vm_translation_map_arch_info, ArchTMapGetLink>
67 	ArchTMapList;
68 
69 
70 static ArchTMapList sTMapList;
71 static spinlock sTMapListLock;
72 
73 #define CHATTY_TMAP 0
74 
75 #define FIRST_USER_PGDIR_ENT    (VADDR_TO_PDENT(USER_BASE))
76 #define NUM_USER_PGDIR_ENTS     (VADDR_TO_PDENT(ROUNDUP(USER_SIZE, \
77 									B_PAGE_SIZE * 1024)))
78 #define FIRST_KERNEL_PGDIR_ENT  (VADDR_TO_PDENT(KERNEL_BASE))
79 #define NUM_KERNEL_PGDIR_ENTS   (VADDR_TO_PDENT(KERNEL_SIZE))
80 #define IS_KERNEL_MAP(map)		(map->arch_data->pgdir_phys \
81 									== sKernelPhysicalPageDirectory)
82 
83 static status_t early_query(addr_t va, addr_t *out_physical);
84 
85 static void flush_tmap(vm_translation_map *map);
86 
87 
88 void *
89 i386_translation_map_get_pgdir(vm_translation_map *map)
90 {
91 	return map->arch_data->pgdir_phys;
92 }
93 
94 
95 void
96 x86_update_all_pgdirs(int index, page_directory_entry e)
97 {
98 	unsigned int state = disable_interrupts();
99 
100 	acquire_spinlock(&sTMapListLock);
101 
102 	ArchTMapList::Iterator it = sTMapList.GetIterator();
103 	while (vm_translation_map_arch_info* info = it.Next())
104 		info->pgdir_virt[index] = e;
105 
106 	release_spinlock(&sTMapListLock);
107 	restore_interrupts(state);
108 }
109 
110 
111 // XXX currently assumes this translation map is active
112 
113 static status_t
114 early_query(addr_t va, addr_t *_physicalAddress)
115 {
116 	page_table_entry *pentry;
117 
118 	if (sPageHolePageDir[VADDR_TO_PDENT(va)].present == 0) {
119 		// no pagetable here
120 		return B_ERROR;
121 	}
122 
123 	pentry = sPageHole + va / B_PAGE_SIZE;
124 	if (pentry->present == 0) {
125 		// page mapping not valid
126 		return B_ERROR;
127 	}
128 
129 	*_physicalAddress = pentry->addr << 12;
130 	return B_OK;
131 }
132 
133 
134 /*!	Acquires the map's recursive lock, and resets the invalidate pages counter
135 	in case it's the first locking recursion.
136 */
137 static status_t
138 lock_tmap(vm_translation_map *map)
139 {
140 	TRACE(("lock_tmap: map %p\n", map));
141 
142 	recursive_lock_lock(&map->lock);
143 	if (recursive_lock_get_recursion(&map->lock) == 1) {
144 		// we were the first one to grab the lock
145 		TRACE(("clearing invalidated page count\n"));
146 		map->arch_data->num_invalidate_pages = 0;
147 	}
148 
149 	return B_OK;
150 }
151 
152 
153 /*!	Unlocks the map, and, if we'll actually losing the recursive lock,
154 	flush all pending changes of this map (ie. flush TLB caches as
155 	needed).
156 */
157 static status_t
158 unlock_tmap(vm_translation_map *map)
159 {
160 	TRACE(("unlock_tmap: map %p\n", map));
161 
162 	if (recursive_lock_get_recursion(&map->lock) == 1) {
163 		// we're about to release it for the last time
164 		flush_tmap(map);
165 	}
166 
167 	recursive_lock_unlock(&map->lock);
168 	return B_OK;
169 }
170 
171 
172 vm_translation_map_arch_info::vm_translation_map_arch_info()
173 	:
174 	pgdir_virt(NULL),
175 	ref_count(1)
176 {
177 }
178 
179 
180 vm_translation_map_arch_info::~vm_translation_map_arch_info()
181 {
182 	// free the page dir
183 	free(pgdir_virt);
184 }
185 
186 
187 void
188 vm_translation_map_arch_info::Delete()
189 {
190 	// remove from global list
191 	InterruptsSpinLocker locker(sTMapListLock);
192 	sTMapList.Remove(this);
193 	locker.Unlock();
194 
195 #if 0
196 	// this sanity check can be enabled when corruption due to
197 	// overwriting an active page directory is suspected
198 	addr_t activePageDirectory;
199 	read_cr3(activePageDirectory);
200 	if (activePageDirectory == (addr_t)pgdir_phys)
201 		panic("deleting a still active page directory\n");
202 #endif
203 
204 	if (are_interrupts_enabled())
205 		delete this;
206 	else
207 		deferred_delete(this);
208 }
209 
210 
211 static void
212 destroy_tmap(vm_translation_map *map)
213 {
214 	if (map == NULL)
215 		return;
216 
217 	if (map->arch_data->page_mapper != NULL)
218 		map->arch_data->page_mapper->Delete();
219 
220 	if (map->arch_data->pgdir_virt != NULL) {
221 		// cycle through and free all of the user space pgtables
222 		for (uint32 i = VADDR_TO_PDENT(USER_BASE);
223 				i <= VADDR_TO_PDENT(USER_BASE + (USER_SIZE - 1)); i++) {
224 			addr_t pgtable_addr;
225 			vm_page *page;
226 
227 			if (map->arch_data->pgdir_virt[i].present == 1) {
228 				pgtable_addr = map->arch_data->pgdir_virt[i].addr;
229 				page = vm_lookup_page(pgtable_addr);
230 				if (!page)
231 					panic("destroy_tmap: didn't find pgtable page\n");
232 				vm_page_set_state(page, PAGE_STATE_FREE);
233 			}
234 		}
235 	}
236 
237 	map->arch_data->RemoveReference();
238 
239 	recursive_lock_destroy(&map->lock);
240 }
241 
242 
243 void
244 x86_put_pgtable_in_pgdir(page_directory_entry *entry,
245 	addr_t pgtable_phys, uint32 attributes)
246 {
247 	page_directory_entry table;
248 	// put it in the pgdir
249 	init_page_directory_entry(&table);
250 	table.addr = ADDR_SHIFT(pgtable_phys);
251 
252 	// ToDo: we ignore the attributes of the page table - for compatibility
253 	//	with BeOS we allow having user accessible areas in the kernel address
254 	//	space. This is currently being used by some drivers, mainly for the
255 	//	frame buffer. Our current real time data implementation makes use of
256 	//	this fact, too.
257 	//	We might want to get rid of this possibility one day, especially if
258 	//	we intend to port it to a platform that does not support this.
259 	table.user = 1;
260 	table.rw = 1;
261 	table.present = 1;
262 	update_page_directory_entry(entry, &table);
263 }
264 
265 
266 static void
267 put_page_table_entry_in_pgtable(page_table_entry *entry,
268 	addr_t physicalAddress, uint32 attributes, bool globalPage)
269 {
270 	page_table_entry page;
271 	init_page_table_entry(&page);
272 
273 	page.addr = ADDR_SHIFT(physicalAddress);
274 
275 	// if the page is user accessible, it's automatically
276 	// accessible in kernel space, too (but with the same
277 	// protection)
278 	page.user = (attributes & B_USER_PROTECTION) != 0;
279 	if (page.user)
280 		page.rw = (attributes & B_WRITE_AREA) != 0;
281 	else
282 		page.rw = (attributes & B_KERNEL_WRITE_AREA) != 0;
283 	page.present = 1;
284 
285 	if (globalPage)
286 		page.global = 1;
287 
288 	// put it in the page table
289 	update_page_table_entry(entry, &page);
290 }
291 
292 
293 static size_t
294 map_max_pages_need(vm_translation_map */*map*/, addr_t start, addr_t end)
295 {
296 	// If start == 0, the actual base address is not yet known to the caller and
297 	// we shall assume the worst case.
298 	if (start == 0) {
299 		// offset the range so it has the worst possible alignment
300 		start = 1023 * B_PAGE_SIZE;
301 		end += 1023 * B_PAGE_SIZE;
302 	}
303 
304 	return VADDR_TO_PDENT(end) + 1 - VADDR_TO_PDENT(start);
305 }
306 
307 
308 static status_t
309 map_tmap(vm_translation_map *map, addr_t va, addr_t pa, uint32 attributes)
310 {
311 	page_directory_entry *pd;
312 	page_table_entry *pt;
313 	unsigned int index;
314 
315 	TRACE(("map_tmap: entry pa 0x%lx va 0x%lx\n", pa, va));
316 
317 /*
318 	dprintf("pgdir at 0x%x\n", pgdir);
319 	dprintf("index is %d\n", va / B_PAGE_SIZE / 1024);
320 	dprintf("final at 0x%x\n", &pgdir[va / B_PAGE_SIZE / 1024]);
321 	dprintf("value is 0x%x\n", *(int *)&pgdir[va / B_PAGE_SIZE / 1024]);
322 	dprintf("present bit is %d\n", pgdir[va / B_PAGE_SIZE / 1024].present);
323 	dprintf("addr is %d\n", pgdir[va / B_PAGE_SIZE / 1024].addr);
324 */
325 	pd = map->arch_data->pgdir_virt;
326 
327 	// check to see if a page table exists for this range
328 	index = VADDR_TO_PDENT(va);
329 	if (pd[index].present == 0) {
330 		addr_t pgtable;
331 		vm_page *page;
332 
333 		// we need to allocate a pgtable
334 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
335 
336 		// mark the page WIRED
337 		vm_page_set_state(page, PAGE_STATE_WIRED);
338 
339 		pgtable = page->physical_page_number * B_PAGE_SIZE;
340 
341 		TRACE(("map_tmap: asked for free page for pgtable. 0x%lx\n", pgtable));
342 
343 		// put it in the pgdir
344 		x86_put_pgtable_in_pgdir(&pd[index], pgtable, attributes
345 			| ((attributes & B_USER_PROTECTION) != 0
346 					? B_WRITE_AREA : B_KERNEL_WRITE_AREA));
347 
348 		// update any other page directories, if it maps kernel space
349 		if (index >= FIRST_KERNEL_PGDIR_ENT
350 			&& index < (FIRST_KERNEL_PGDIR_ENT + NUM_KERNEL_PGDIR_ENTS))
351 			x86_update_all_pgdirs(index, pd[index]);
352 
353 		map->map_count++;
354 	}
355 
356 	// now, fill in the pentry
357 	struct thread* thread = thread_get_current_thread();
358 	ThreadCPUPinner pinner(thread);
359 
360 	pt = map->arch_data->page_mapper->GetPageTableAt(
361 		ADDR_REVERSE_SHIFT(pd[index].addr));
362 	index = VADDR_TO_PTENT(va);
363 
364 	put_page_table_entry_in_pgtable(&pt[index], pa, attributes,
365 		IS_KERNEL_MAP(map));
366 
367 	pinner.Unlock();
368 
369 	if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE) {
370 		map->arch_data->pages_to_invalidate[
371 			map->arch_data->num_invalidate_pages] = va;
372 	}
373 
374 	map->arch_data->num_invalidate_pages++;
375 
376 	map->map_count++;
377 
378 	return 0;
379 }
380 
381 
382 static status_t
383 unmap_tmap(vm_translation_map *map, addr_t start, addr_t end)
384 {
385 	page_table_entry *pt;
386 	page_directory_entry *pd = map->arch_data->pgdir_virt;
387 	int index;
388 
389 	start = ROUNDDOWN(start, B_PAGE_SIZE);
390 	end = ROUNDUP(end, B_PAGE_SIZE);
391 
392 	TRACE(("unmap_tmap: asked to free pages 0x%lx to 0x%lx\n", start, end));
393 
394 restart:
395 	if (start >= end)
396 		return B_OK;
397 
398 	index = VADDR_TO_PDENT(start);
399 	if (pd[index].present == 0) {
400 		// no pagetable here, move the start up to access the next page table
401 		start = ROUNDUP(start + 1, B_PAGE_SIZE);
402 		goto restart;
403 	}
404 
405 	struct thread* thread = thread_get_current_thread();
406 	ThreadCPUPinner pinner(thread);
407 
408 	pt = map->arch_data->page_mapper->GetPageTableAt(
409 		ADDR_REVERSE_SHIFT(pd[index].addr));
410 
411 	for (index = VADDR_TO_PTENT(start); (index < 1024) && (start < end);
412 			index++, start += B_PAGE_SIZE) {
413 		if (pt[index].present == 0) {
414 			// page mapping not valid
415 			continue;
416 		}
417 
418 		TRACE(("unmap_tmap: removing page 0x%lx\n", start));
419 
420 		pt[index].present = 0;
421 		map->map_count--;
422 
423 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE) {
424 			map->arch_data->pages_to_invalidate[
425 				map->arch_data->num_invalidate_pages] = start;
426 		}
427 
428 		map->arch_data->num_invalidate_pages++;
429 	}
430 
431 	pinner.Unlock();
432 
433 	goto restart;
434 }
435 
436 
437 static status_t
438 query_tmap_interrupt(vm_translation_map *map, addr_t va, addr_t *_physical,
439 	uint32 *_flags)
440 {
441 	page_directory_entry *pd = map->arch_data->pgdir_virt;
442 	page_table_entry *pt;
443 	addr_t physicalPageTable;
444 	int32 index;
445 
446 	*_physical = 0;
447 
448 	index = VADDR_TO_PDENT(va);
449 	if (pd[index].present == 0) {
450 		// no pagetable here
451 		return B_ERROR;
452 	}
453 
454 	// map page table entry
455 	physicalPageTable = ADDR_REVERSE_SHIFT(pd[index].addr);
456 	pt = gPhysicalPageMapper->InterruptGetPageTableAt(physicalPageTable);
457 
458 	index = VADDR_TO_PTENT(va);
459 	*_physical = ADDR_REVERSE_SHIFT(pt[index].addr);
460 
461 	*_flags |= ((pt[index].rw ? B_KERNEL_WRITE_AREA : 0) | B_KERNEL_READ_AREA)
462 		| (pt[index].dirty ? PAGE_MODIFIED : 0)
463 		| (pt[index].accessed ? PAGE_ACCESSED : 0)
464 		| (pt[index].present ? PAGE_PRESENT : 0);
465 
466 	return B_OK;
467 }
468 
469 
470 static status_t
471 query_tmap(vm_translation_map *map, addr_t va, addr_t *_physical,
472 	uint32 *_flags)
473 {
474 	page_table_entry *pt;
475 	page_directory_entry *pd = map->arch_data->pgdir_virt;
476 	int32 index;
477 
478 	// default the flags to not present
479 	*_flags = 0;
480 	*_physical = 0;
481 
482 	index = VADDR_TO_PDENT(va);
483 	if (pd[index].present == 0) {
484 		// no pagetable here
485 		return B_NO_ERROR;
486 	}
487 
488 	struct thread* thread = thread_get_current_thread();
489 	ThreadCPUPinner pinner(thread);
490 
491 	pt = map->arch_data->page_mapper->GetPageTableAt(
492 		ADDR_REVERSE_SHIFT(pd[index].addr));
493 	index = VADDR_TO_PTENT(va);
494 
495 	*_physical = ADDR_REVERSE_SHIFT(pt[index].addr);
496 
497 	// read in the page state flags
498 	if (pt[index].user)
499 		*_flags |= (pt[index].rw ? B_WRITE_AREA : 0) | B_READ_AREA;
500 
501 	*_flags |= ((pt[index].rw ? B_KERNEL_WRITE_AREA : 0) | B_KERNEL_READ_AREA)
502 		| (pt[index].dirty ? PAGE_MODIFIED : 0)
503 		| (pt[index].accessed ? PAGE_ACCESSED : 0)
504 		| (pt[index].present ? PAGE_PRESENT : 0);
505 
506 	pinner.Unlock();
507 
508 	TRACE(("query_tmap: returning pa 0x%lx for va 0x%lx\n", *_physical, va));
509 
510 	return B_OK;
511 }
512 
513 
514 static addr_t
515 get_mapped_size_tmap(vm_translation_map *map)
516 {
517 	return map->map_count;
518 }
519 
520 
521 static status_t
522 protect_tmap(vm_translation_map *map, addr_t start, addr_t end,
523 	uint32 attributes)
524 {
525 	page_table_entry *pt;
526 	page_directory_entry *pd = map->arch_data->pgdir_virt;
527 	int index;
528 
529 	start = ROUNDDOWN(start, B_PAGE_SIZE);
530 	end = ROUNDUP(end, B_PAGE_SIZE);
531 
532 	TRACE(("protect_tmap: pages 0x%lx to 0x%lx, attributes %lx\n", start, end,
533 		attributes));
534 
535 restart:
536 	if (start >= end)
537 		return B_OK;
538 
539 	index = VADDR_TO_PDENT(start);
540 	if (pd[index].present == 0) {
541 		// no pagetable here, move the start up to access the next page table
542 		start = ROUNDUP(start + 1, B_PAGE_SIZE);
543 		goto restart;
544 	}
545 
546 	struct thread* thread = thread_get_current_thread();
547 	ThreadCPUPinner pinner(thread);
548 
549 	pt = map->arch_data->page_mapper->GetPageTableAt(
550 		ADDR_REVERSE_SHIFT(pd[index].addr));
551 
552 	for (index = VADDR_TO_PTENT(start); index < 1024 && start < end;
553 			index++, start += B_PAGE_SIZE) {
554 		if (pt[index].present == 0) {
555 			// page mapping not valid
556 			continue;
557 		}
558 
559 		TRACE(("protect_tmap: protect page 0x%lx\n", start));
560 
561 		pt[index].user = (attributes & B_USER_PROTECTION) != 0;
562 		if ((attributes & B_USER_PROTECTION) != 0)
563 			pt[index].rw = (attributes & B_WRITE_AREA) != 0;
564 		else
565 			pt[index].rw = (attributes & B_KERNEL_WRITE_AREA) != 0;
566 
567 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE) {
568 			map->arch_data->pages_to_invalidate[
569 				map->arch_data->num_invalidate_pages] = start;
570 		}
571 
572 		map->arch_data->num_invalidate_pages++;
573 	}
574 
575 	pinner.Unlock();
576 
577 	goto restart;
578 }
579 
580 
581 static status_t
582 clear_flags_tmap(vm_translation_map *map, addr_t va, uint32 flags)
583 {
584 	page_table_entry *pt;
585 	page_directory_entry *pd = map->arch_data->pgdir_virt;
586 	int index;
587 	int tlb_flush = false;
588 
589 	index = VADDR_TO_PDENT(va);
590 	if (pd[index].present == 0) {
591 		// no pagetable here
592 		return B_OK;
593 	}
594 
595 	struct thread* thread = thread_get_current_thread();
596 	ThreadCPUPinner pinner(thread);
597 
598 	pt = map->arch_data->page_mapper->GetPageTableAt(
599 		ADDR_REVERSE_SHIFT(pd[index].addr));
600 	index = VADDR_TO_PTENT(va);
601 
602 	// clear out the flags we've been requested to clear
603 	if (flags & PAGE_MODIFIED) {
604 		pt[index].dirty = 0;
605 		tlb_flush = true;
606 	}
607 	if (flags & PAGE_ACCESSED) {
608 		pt[index].accessed = 0;
609 		tlb_flush = true;
610 	}
611 
612 	pinner.Unlock();
613 
614 	if (tlb_flush) {
615 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE) {
616 			map->arch_data->pages_to_invalidate[
617 				map->arch_data->num_invalidate_pages] = va;
618 		}
619 
620 		map->arch_data->num_invalidate_pages++;
621 	}
622 
623 	return B_OK;
624 }
625 
626 
627 static void
628 flush_tmap(vm_translation_map *map)
629 {
630 	if (map->arch_data->num_invalidate_pages <= 0)
631 		return;
632 
633 	struct thread* thread = thread_get_current_thread();
634 	thread_pin_to_current_cpu(thread);
635 
636 	if (map->arch_data->num_invalidate_pages > PAGE_INVALIDATE_CACHE_SIZE) {
637 		// invalidate all pages
638 		TRACE(("flush_tmap: %d pages to invalidate, invalidate all\n",
639 			map->arch_data->num_invalidate_pages));
640 
641 		if (IS_KERNEL_MAP(map)) {
642 			arch_cpu_global_TLB_invalidate();
643 			smp_send_broadcast_ici(SMP_MSG_GLOBAL_INVALIDATE_PAGES, 0, 0, 0,
644 				NULL, SMP_MSG_FLAG_SYNC);
645 		} else {
646 			cpu_status state = disable_interrupts();
647 			arch_cpu_user_TLB_invalidate();
648 			restore_interrupts(state);
649 
650 			int cpu = smp_get_current_cpu();
651 			uint32 cpuMask = map->arch_data->active_on_cpus
652 				& ~((uint32)1 << cpu);
653 			if (cpuMask != 0) {
654 				smp_send_multicast_ici(cpuMask, SMP_MSG_USER_INVALIDATE_PAGES,
655 					0, 0, 0, NULL, SMP_MSG_FLAG_SYNC);
656 			}
657 		}
658 	} else {
659 		TRACE(("flush_tmap: %d pages to invalidate, invalidate list\n",
660 			map->arch_data->num_invalidate_pages));
661 
662 		arch_cpu_invalidate_TLB_list(map->arch_data->pages_to_invalidate,
663 			map->arch_data->num_invalidate_pages);
664 
665 		if (IS_KERNEL_MAP(map)) {
666 			smp_send_broadcast_ici(SMP_MSG_INVALIDATE_PAGE_LIST,
667 				(uint32)map->arch_data->pages_to_invalidate,
668 				map->arch_data->num_invalidate_pages, 0, NULL,
669 				SMP_MSG_FLAG_SYNC);
670 		} else {
671 			int cpu = smp_get_current_cpu();
672 			uint32 cpuMask = map->arch_data->active_on_cpus
673 				& ~((uint32)1 << cpu);
674 			if (cpuMask != 0) {
675 				smp_send_multicast_ici(cpuMask, SMP_MSG_INVALIDATE_PAGE_LIST,
676 					(uint32)map->arch_data->pages_to_invalidate,
677 					map->arch_data->num_invalidate_pages, 0, NULL,
678 					SMP_MSG_FLAG_SYNC);
679 			}
680 		}
681 	}
682 	map->arch_data->num_invalidate_pages = 0;
683 
684 	thread_unpin_from_current_cpu(thread);
685 }
686 
687 
688 static vm_translation_map_ops tmap_ops = {
689 	destroy_tmap,
690 	lock_tmap,
691 	unlock_tmap,
692 	map_max_pages_need,
693 	map_tmap,
694 	unmap_tmap,
695 	query_tmap,
696 	query_tmap_interrupt,
697 	get_mapped_size_tmap,
698 	protect_tmap,
699 	clear_flags_tmap,
700 	flush_tmap
701 
702 	// The physical page ops are initialized by the respective physical page
703 	// mapper.
704 };
705 
706 
707 //	#pragma mark -
708 
709 
710 void
711 x86_early_prepare_page_tables(page_table_entry* pageTables, addr_t address,
712 	size_t size)
713 {
714 	memset(pageTables, 0, B_PAGE_SIZE * (size / (B_PAGE_SIZE * 1024)));
715 
716 	// put the array of pgtables directly into the kernel pagedir
717 	// these will be wired and kept mapped into virtual space to be easy to get
718 	// to
719 	{
720 		addr_t virtualTable = (addr_t)pageTables;
721 
722 		for (size_t i = 0; i < (size / (B_PAGE_SIZE * 1024));
723 				i++, virtualTable += B_PAGE_SIZE) {
724 			addr_t physicalTable = 0;
725 			early_query(virtualTable, &physicalTable);
726 			page_directory_entry* entry = &sPageHolePageDir[
727 				(address / (B_PAGE_SIZE * 1024)) + i];
728 			x86_put_pgtable_in_pgdir(entry, physicalTable,
729 				B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
730 		}
731 	}
732 }
733 
734 
735 //	#pragma mark -
736 //	VM API
737 
738 
739 status_t
740 arch_vm_translation_map_init_map(vm_translation_map *map, bool kernel)
741 {
742 	if (map == NULL)
743 		return B_BAD_VALUE;
744 
745 	TRACE(("vm_translation_map_create\n"));
746 
747 	// initialize the new object
748 	map->ops = &tmap_ops;
749 	map->map_count = 0;
750 
751 	recursive_lock_init(&map->lock, "translation map");
752 	CObjectDeleter<recursive_lock> lockDeleter(&map->lock,
753 		&recursive_lock_destroy);
754 
755 	map->arch_data = new(std::nothrow) vm_translation_map_arch_info;
756 	if (map->arch_data == NULL)
757 		return B_NO_MEMORY;
758 	ObjectDeleter<vm_translation_map_arch_info> archInfoDeleter(map->arch_data);
759 
760 	map->arch_data->active_on_cpus = 0;
761 	map->arch_data->num_invalidate_pages = 0;
762 	map->arch_data->page_mapper = NULL;
763 
764 	if (!kernel) {
765 		// user
766 		// allocate a physical page mapper
767 		status_t error = gPhysicalPageMapper
768 			->CreateTranslationMapPhysicalPageMapper(
769 				&map->arch_data->page_mapper);
770 		if (error != B_OK)
771 			return error;
772 
773 		// allocate a pgdir
774 		map->arch_data->pgdir_virt = (page_directory_entry *)memalign(
775 			B_PAGE_SIZE, B_PAGE_SIZE);
776 		if (map->arch_data->pgdir_virt == NULL) {
777 			map->arch_data->page_mapper->Delete();
778 			return B_NO_MEMORY;
779 		}
780 		vm_get_page_mapping(vm_kernel_address_space_id(),
781 			(addr_t)map->arch_data->pgdir_virt,
782 			(addr_t*)&map->arch_data->pgdir_phys);
783 	} else {
784 		// kernel
785 		// get the physical page mapper
786 		map->arch_data->page_mapper = gKernelPhysicalPageMapper;
787 
788 		// we already know the kernel pgdir mapping
789 		map->arch_data->pgdir_virt = sKernelVirtualPageDirectory;
790 		map->arch_data->pgdir_phys = sKernelPhysicalPageDirectory;
791 	}
792 
793 	// zero out the bottom portion of the new pgdir
794 	memset(map->arch_data->pgdir_virt + FIRST_USER_PGDIR_ENT, 0,
795 		NUM_USER_PGDIR_ENTS * sizeof(page_directory_entry));
796 
797 	// insert this new map into the map list
798 	{
799 		int state = disable_interrupts();
800 		acquire_spinlock(&sTMapListLock);
801 
802 		// copy the top portion of the pgdir from the current one
803 		memcpy(map->arch_data->pgdir_virt + FIRST_KERNEL_PGDIR_ENT,
804 			sKernelVirtualPageDirectory + FIRST_KERNEL_PGDIR_ENT,
805 			NUM_KERNEL_PGDIR_ENTS * sizeof(page_directory_entry));
806 
807 		sTMapList.Add(map->arch_data);
808 
809 		release_spinlock(&sTMapListLock);
810 		restore_interrupts(state);
811 	}
812 
813 	archInfoDeleter.Detach();
814 	lockDeleter.Detach();
815 
816 	return B_OK;
817 }
818 
819 
820 status_t
821 arch_vm_translation_map_init_kernel_map_post_sem(vm_translation_map *map)
822 {
823 	return B_OK;
824 }
825 
826 
827 status_t
828 arch_vm_translation_map_init(kernel_args *args)
829 {
830 	TRACE(("vm_translation_map_init: entry\n"));
831 
832 	// page hole set up in stage2
833 	sPageHole = (page_table_entry *)args->arch_args.page_hole;
834 	// calculate where the pgdir would be
835 	sPageHolePageDir = (page_directory_entry*)
836 		(((addr_t)args->arch_args.page_hole)
837 			+ (B_PAGE_SIZE * 1024 - B_PAGE_SIZE));
838 	// clear out the bottom 2 GB, unmap everything
839 	memset(sPageHolePageDir + FIRST_USER_PGDIR_ENT, 0,
840 		sizeof(page_directory_entry) * NUM_USER_PGDIR_ENTS);
841 
842 	sKernelPhysicalPageDirectory = (page_directory_entry*)
843 		args->arch_args.phys_pgdir;
844 	sKernelVirtualPageDirectory = (page_directory_entry*)
845 		args->arch_args.vir_pgdir;
846 
847 	B_INITIALIZE_SPINLOCK(&sTMapListLock);
848 	new (&sTMapList) ArchTMapList;
849 
850 // TODO: Select the best page mapper!
851 	large_memory_physical_page_ops_init(args, &tmap_ops);
852 
853 	// enable global page feature if available
854 	if (x86_check_feature(IA32_FEATURE_PGE, FEATURE_COMMON)) {
855 		// this prevents kernel pages from being flushed from TLB on
856 		// context-switch
857 		x86_write_cr4(x86_read_cr4() | IA32_CR4_GLOBAL_PAGES);
858 	}
859 
860 	TRACE(("vm_translation_map_init: done\n"));
861 
862 	return B_OK;
863 }
864 
865 
866 status_t
867 arch_vm_translation_map_init_post_sem(kernel_args *args)
868 {
869 	return B_OK;
870 }
871 
872 
873 status_t
874 arch_vm_translation_map_init_post_area(kernel_args *args)
875 {
876 	// now that the vm is initialized, create a region that represents
877 	// the page hole
878 	void *temp;
879 	status_t error;
880 	area_id area;
881 
882 	TRACE(("vm_translation_map_init_post_area: entry\n"));
883 
884 	// unmap the page hole hack we were using before
885 	sKernelVirtualPageDirectory[1023].present = 0;
886 	sPageHolePageDir = NULL;
887 	sPageHole = NULL;
888 
889 	temp = (void *)sKernelVirtualPageDirectory;
890 	area = create_area("kernel_pgdir", &temp, B_EXACT_ADDRESS, B_PAGE_SIZE,
891 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
892 	if (area < B_OK)
893 		return area;
894 
895 	error = gPhysicalPageMapper->InitPostArea(args);
896 	if (error != B_OK)
897 		return error;
898 
899 	TRACE(("vm_translation_map_init_post_area: done\n"));
900 	return B_OK;
901 }
902 
903 
904 // XXX horrible back door to map a page quickly regardless of translation map
905 // object, etc.
906 // used only during VM setup.
907 // uses a 'page hole' set up in the stage 2 bootloader. The page hole is created
908 // by pointing one of the pgdir entries back at itself, effectively mapping the
909 // contents of all of the 4MB of pagetables into a 4 MB region. It's only used
910 // here, and is later unmapped.
911 
912 status_t
913 arch_vm_translation_map_early_map(kernel_args *args, addr_t va, addr_t pa,
914 	uint8 attributes, addr_t (*get_free_page)(kernel_args *))
915 {
916 	int index;
917 
918 	TRACE(("early_tmap: entry pa 0x%lx va 0x%lx\n", pa, va));
919 
920 	// check to see if a page table exists for this range
921 	index = VADDR_TO_PDENT(va);
922 	if (sPageHolePageDir[index].present == 0) {
923 		addr_t pgtable;
924 		page_directory_entry *e;
925 		// we need to allocate a pgtable
926 		pgtable = get_free_page(args);
927 		// pgtable is in pages, convert to physical address
928 		pgtable *= B_PAGE_SIZE;
929 
930 		TRACE(("early_map: asked for free page for pgtable. 0x%lx\n", pgtable));
931 
932 		// put it in the pgdir
933 		e = &sPageHolePageDir[index];
934 		x86_put_pgtable_in_pgdir(e, pgtable, attributes);
935 
936 		// zero it out in it's new mapping
937 		memset((unsigned int*)((addr_t)sPageHole
938 			+ (va / B_PAGE_SIZE / 1024) * B_PAGE_SIZE), 0, B_PAGE_SIZE);
939 	}
940 
941 	// now, fill in the pentry
942 	put_page_table_entry_in_pgtable(sPageHole + va / B_PAGE_SIZE, pa,
943 		attributes, IS_KERNEL_ADDRESS(va));
944 
945 	arch_cpu_invalidate_TLB_range(va, va);
946 
947 	return B_OK;
948 }
949 
950 
951 /*!	Verifies that the page at the given virtual address can be accessed in the
952 	current context.
953 
954 	This function is invoked in the kernel debugger. Paranoid checking is in
955 	order.
956 
957 	\param virtualAddress The virtual address to be checked.
958 	\param protection The area protection for which to check. Valid is a bitwise
959 		or of one or more of \c B_KERNEL_READ_AREA or \c B_KERNEL_WRITE_AREA.
960 	\return \c true, if the address can be accessed in all ways specified by
961 		\a protection, \c false otherwise.
962 */
963 bool
964 arch_vm_translation_map_is_kernel_page_accessible(addr_t virtualAddress,
965 	uint32 protection)
966 {
967 	// We only trust the kernel team's page directory. So switch to it first.
968 	// Always set it to make sure the TLBs don't contain obsolete data.
969 	addr_t physicalPageDirectory;
970 	read_cr3(physicalPageDirectory);
971 	write_cr3(sKernelPhysicalPageDirectory);
972 
973 	// get the page directory entry for the address
974 	page_directory_entry pageDirectoryEntry;
975 	uint32 index = VADDR_TO_PDENT(virtualAddress);
976 
977 	if (physicalPageDirectory == (addr_t)sKernelPhysicalPageDirectory) {
978 		pageDirectoryEntry = sKernelVirtualPageDirectory[index];
979 	} else {
980 		// map the original page directory and get the entry
981 		void* handle;
982 		addr_t virtualPageDirectory;
983 		status_t error = gPhysicalPageMapper->GetPageDebug(
984 			physicalPageDirectory, &virtualPageDirectory, &handle);
985 		if (error == B_OK) {
986 			pageDirectoryEntry
987 				= ((page_directory_entry*)virtualPageDirectory)[index];
988 			gPhysicalPageMapper->PutPageDebug(virtualPageDirectory,
989 				handle);
990 		} else
991 			pageDirectoryEntry.present = 0;
992 	}
993 
994 	// map the page table and get the entry
995 	page_table_entry pageTableEntry;
996 	index = VADDR_TO_PTENT(virtualAddress);
997 
998 	if (pageDirectoryEntry.present != 0) {
999 		void* handle;
1000 		addr_t virtualPageTable;
1001 		status_t error = gPhysicalPageMapper->GetPageDebug(
1002 			ADDR_REVERSE_SHIFT(pageDirectoryEntry.addr), &virtualPageTable,
1003 			&handle);
1004 		if (error == B_OK) {
1005 			pageTableEntry = ((page_table_entry*)virtualPageTable)[index];
1006 			gPhysicalPageMapper->PutPageDebug(virtualPageTable, handle);
1007 		} else
1008 			pageTableEntry.present = 0;
1009 	} else
1010 		pageTableEntry.present = 0;
1011 
1012 	// switch back to the original page directory
1013 	if (physicalPageDirectory != (addr_t)sKernelPhysicalPageDirectory)
1014 		write_cr3(physicalPageDirectory);
1015 
1016 	if (pageTableEntry.present == 0)
1017 		return false;
1018 
1019 	// present means kernel-readable, so check for writable
1020 	return (protection & B_KERNEL_WRITE_AREA) == 0 || pageTableEntry.rw != 0;
1021 }
1022