xref: /haiku/src/system/kernel/arch/x86/arch_vm_translation_map.cpp (revision a381c8a06378de22ff08adf4282b4e3f7e50d250)
1 /*
2  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <vm_address_space.h>
11 #include <vm_page.h>
12 #include <vm_priv.h>
13 #include <smp.h>
14 #include <util/queue.h>
15 #include <heap.h>
16 #include <arch_system_info.h>
17 #include <arch/vm_translation_map.h>
18 
19 #include <string.h>
20 #include <stdlib.h>
21 
22 #include "generic_vm_physical_page_mapper.h"
23 
24 //#define TRACE_VM_TMAP
25 #ifdef TRACE_VM_TMAP
26 #	define TRACE(x) dprintf x
27 #else
28 #	define TRACE(x) ;
29 #endif
30 
31 // 256 MB of iospace
32 #define IOSPACE_SIZE (256*1024*1024)
33 // 4 MB chunks, to optimize for 4 MB pages
34 #define IOSPACE_CHUNK_SIZE (4*1024*1024)
35 
36 typedef struct page_table_entry {
37 	uint32	present:1;
38 	uint32	rw:1;
39 	uint32	user:1;
40 	uint32	write_through:1;
41 	uint32	cache_disabled:1;
42 	uint32	accessed:1;
43 	uint32	dirty:1;
44 	uint32	reserved:1;
45 	uint32	global:1;
46 	uint32	avail:3;
47 	uint32	addr:20;
48 } page_table_entry;
49 
50 typedef struct page_directory_entry {
51 	uint32	present:1;
52 	uint32	rw:1;
53 	uint32	user:1;
54 	uint32	write_through:1;
55 	uint32	cache_disabled:1;
56 	uint32	accessed:1;
57 	uint32	reserved:1;
58 	uint32	page_size:1;
59 	uint32	global:1;
60 	uint32	avail:3;
61 	uint32	addr:20;
62 } page_directory_entry;
63 
64 static page_table_entry *iospace_pgtables = NULL;
65 
66 #define PAGE_INVALIDATE_CACHE_SIZE 64
67 
68 // vm_translation object stuff
69 typedef struct vm_translation_map_arch_info {
70 	page_directory_entry *pgdir_virt;
71 	page_directory_entry *pgdir_phys;
72 	int num_invalidate_pages;
73 	addr_t pages_to_invalidate[PAGE_INVALIDATE_CACHE_SIZE];
74 } vm_translation_map_arch_info;
75 
76 
77 static page_table_entry *page_hole = NULL;
78 static page_directory_entry *page_hole_pgdir = NULL;
79 static page_directory_entry *sKernelPhysicalPageDirectory = NULL;
80 static page_directory_entry *sKernelVirtualPageDirectory = NULL;
81 static addr_t sQueryPages;
82 static page_table_entry *sQueryPageTable;
83 
84 static vm_translation_map *tmap_list;
85 static spinlock tmap_list_lock;
86 
87 static addr_t sIOSpaceBase;
88 
89 #define CHATTY_TMAP 0
90 
91 #define ADDR_SHIFT(x) ((x)>>12)
92 #define ADDR_REVERSE_SHIFT(x) ((x)<<12)
93 
94 #define VADDR_TO_PDENT(va) (((va) / B_PAGE_SIZE) / 1024)
95 #define VADDR_TO_PTENT(va) (((va) / B_PAGE_SIZE) % 1024)
96 
97 #define FIRST_USER_PGDIR_ENT    (VADDR_TO_PDENT(USER_BASE))
98 #define NUM_USER_PGDIR_ENTS     (VADDR_TO_PDENT(ROUNDUP(USER_SIZE, B_PAGE_SIZE * 1024)))
99 #define FIRST_KERNEL_PGDIR_ENT  (VADDR_TO_PDENT(KERNEL_BASE))
100 #define NUM_KERNEL_PGDIR_ENTS   (VADDR_TO_PDENT(KERNEL_SIZE))
101 #define IS_KERNEL_MAP(map)		(map->arch_data->pgdir_phys == sKernelPhysicalPageDirectory)
102 
103 static status_t early_query(addr_t va, addr_t *out_physical);
104 static status_t get_physical_page_tmap(addr_t pa, addr_t *va, uint32 flags);
105 static status_t put_physical_page_tmap(addr_t va);
106 
107 static void flush_tmap(vm_translation_map *map);
108 
109 
110 void *
111 i386_translation_map_get_pgdir(vm_translation_map *map)
112 {
113 	return map->arch_data->pgdir_phys;
114 }
115 
116 
117 static inline void
118 init_page_directory_entry(page_directory_entry *entry)
119 {
120 	*(uint32 *)entry = 0;
121 }
122 
123 
124 static inline void
125 update_page_directory_entry(page_directory_entry *entry, page_directory_entry *with)
126 {
127 	// update page directory entry atomically
128 	*(uint32 *)entry = *(uint32 *)with;
129 }
130 
131 
132 static inline void
133 init_page_table_entry(page_table_entry *entry)
134 {
135 	*(uint32 *)entry = 0;
136 }
137 
138 
139 static inline void
140 update_page_table_entry(page_table_entry *entry, page_table_entry *with)
141 {
142 	// update page table entry atomically
143 	*(uint32 *)entry = *(uint32 *)with;
144 }
145 
146 
147 static void
148 _update_all_pgdirs(int index, page_directory_entry e)
149 {
150 	vm_translation_map *entry;
151 	unsigned int state = disable_interrupts();
152 
153 	acquire_spinlock(&tmap_list_lock);
154 
155 	for(entry = tmap_list; entry != NULL; entry = entry->next)
156 		entry->arch_data->pgdir_virt[index] = e;
157 
158 	release_spinlock(&tmap_list_lock);
159 	restore_interrupts(state);
160 }
161 
162 
163 // XXX currently assumes this translation map is active
164 
165 static status_t
166 early_query(addr_t va, addr_t *_physicalAddress)
167 {
168 	page_table_entry *pentry;
169 
170 	if (page_hole_pgdir[VADDR_TO_PDENT(va)].present == 0) {
171 		// no pagetable here
172 		return B_ERROR;
173 	}
174 
175 	pentry = page_hole + va / B_PAGE_SIZE;
176 	if (pentry->present == 0) {
177 		// page mapping not valid
178 		return B_ERROR;
179 	}
180 
181 	*_physicalAddress = pentry->addr << 12;
182 	return B_OK;
183 }
184 
185 
186 /*!	Acquires the map's recursive lock, and resets the invalidate pages counter
187 	in case it's the first locking recursion.
188 */
189 static status_t
190 lock_tmap(vm_translation_map *map)
191 {
192 	TRACE(("lock_tmap: map %p\n", map));
193 
194 	recursive_lock_lock(&map->lock);
195 	if (recursive_lock_get_recursion(&map->lock) == 1) {
196 		// we were the first one to grab the lock
197 		TRACE(("clearing invalidated page count\n"));
198 		map->arch_data->num_invalidate_pages = 0;
199 	}
200 
201 	return B_OK;
202 }
203 
204 
205 /*!	Unlocks the map, and, if we'll actually losing the recursive lock,
206 	flush all pending changes of this map (ie. flush TLB caches as
207 	needed).
208 */
209 static status_t
210 unlock_tmap(vm_translation_map *map)
211 {
212 	TRACE(("unlock_tmap: map %p\n", map));
213 
214 	if (recursive_lock_get_recursion(&map->lock) == 1) {
215 		// we're about to release it for the last time
216 		flush_tmap(map);
217 	}
218 
219 	recursive_lock_unlock(&map->lock);
220 	return B_OK;
221 }
222 
223 
224 static void
225 destroy_tmap(vm_translation_map *map)
226 {
227 	int state;
228 	vm_translation_map *entry;
229 	vm_translation_map *last = NULL;
230 	unsigned int i;
231 
232 	if (map == NULL)
233 		return;
234 
235 	// remove it from the tmap list
236 	state = disable_interrupts();
237 	acquire_spinlock(&tmap_list_lock);
238 
239 	entry = tmap_list;
240 	while (entry != NULL) {
241 		if (entry == map) {
242 			if (last != NULL)
243 				last->next = entry->next;
244 			else
245 				tmap_list = entry->next;
246 
247 			break;
248 		}
249 		last = entry;
250 		entry = entry->next;
251 	}
252 
253 	release_spinlock(&tmap_list_lock);
254 	restore_interrupts(state);
255 
256 	if (map->arch_data->pgdir_virt != NULL) {
257 		// cycle through and free all of the user space pgtables
258 		for (i = VADDR_TO_PDENT(USER_BASE); i <= VADDR_TO_PDENT(USER_BASE + (USER_SIZE - 1)); i++) {
259 			addr_t pgtable_addr;
260 			vm_page *page;
261 
262 			if (map->arch_data->pgdir_virt[i].present == 1) {
263 				pgtable_addr = map->arch_data->pgdir_virt[i].addr;
264 				page = vm_lookup_page(pgtable_addr);
265 				if (!page)
266 					panic("destroy_tmap: didn't find pgtable page\n");
267 				vm_page_set_state(page, PAGE_STATE_FREE);
268 			}
269 		}
270 		free(map->arch_data->pgdir_virt);
271 	}
272 
273 	free(map->arch_data);
274 	recursive_lock_destroy(&map->lock);
275 }
276 
277 
278 static void
279 put_pgtable_in_pgdir(page_directory_entry *entry,
280 	addr_t pgtable_phys, uint32 attributes)
281 {
282 	page_directory_entry table;
283 	// put it in the pgdir
284 	init_page_directory_entry(&table);
285 	table.addr = ADDR_SHIFT(pgtable_phys);
286 
287 	// ToDo: we ignore the attributes of the page table - for compatibility
288 	//	with BeOS we allow having user accessible areas in the kernel address
289 	//	space. This is currently being used by some drivers, mainly for the
290 	//	frame buffer. Our current real time data implementation makes use of
291 	//	this fact, too.
292 	//	We might want to get rid of this possibility one day, especially if
293 	//	we intend to port it to a platform that does not support this.
294 	table.user = 1;
295 	table.rw = 1;
296 	table.present = 1;
297 	update_page_directory_entry(entry, &table);
298 }
299 
300 
301 static void
302 put_page_table_entry_in_pgtable(page_table_entry *entry,
303 	addr_t physicalAddress, uint32 attributes, bool globalPage)
304 {
305 	page_table_entry page;
306 	init_page_table_entry(&page);
307 
308 	page.addr = ADDR_SHIFT(physicalAddress);
309 
310 	// if the page is user accessible, it's automatically
311 	// accessible in kernel space, too (but with the same
312 	// protection)
313 	page.user = (attributes & B_USER_PROTECTION) != 0;
314 	if (page.user)
315 		page.rw = (attributes & B_WRITE_AREA) != 0;
316 	else
317 		page.rw = (attributes & B_KERNEL_WRITE_AREA) != 0;
318 	page.present = 1;
319 
320 	if (globalPage)
321 		page.global = 1;
322 
323 	// put it in the page table
324 	update_page_table_entry(entry, &page);
325 }
326 
327 
328 static size_t
329 map_max_pages_need(vm_translation_map */*map*/, addr_t start, addr_t end)
330 {
331 	return VADDR_TO_PDENT(end) + 1 - VADDR_TO_PDENT(start);
332 }
333 
334 
335 static status_t
336 map_tmap(vm_translation_map *map, addr_t va, addr_t pa, uint32 attributes)
337 {
338 	page_directory_entry *pd;
339 	page_table_entry *pt;
340 	unsigned int index;
341 	int err;
342 
343 	TRACE(("map_tmap: entry pa 0x%lx va 0x%lx\n", pa, va));
344 
345 /*
346 	dprintf("pgdir at 0x%x\n", pgdir);
347 	dprintf("index is %d\n", va / B_PAGE_SIZE / 1024);
348 	dprintf("final at 0x%x\n", &pgdir[va / B_PAGE_SIZE / 1024]);
349 	dprintf("value is 0x%x\n", *(int *)&pgdir[va / B_PAGE_SIZE / 1024]);
350 	dprintf("present bit is %d\n", pgdir[va / B_PAGE_SIZE / 1024].present);
351 	dprintf("addr is %d\n", pgdir[va / B_PAGE_SIZE / 1024].addr);
352 */
353 	pd = map->arch_data->pgdir_virt;
354 
355 	// check to see if a page table exists for this range
356 	index = VADDR_TO_PDENT(va);
357 	if (pd[index].present == 0) {
358 		addr_t pgtable;
359 		vm_page *page;
360 
361 		// we need to allocate a pgtable
362 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
363 
364 		// mark the page WIRED
365 		vm_page_set_state(page, PAGE_STATE_WIRED);
366 
367 		pgtable = page->physical_page_number * B_PAGE_SIZE;
368 
369 		TRACE(("map_tmap: asked for free page for pgtable. 0x%lx\n", pgtable));
370 
371 		// put it in the pgdir
372 		put_pgtable_in_pgdir(&pd[index], pgtable, attributes
373 			| (attributes & B_USER_PROTECTION ? B_WRITE_AREA : B_KERNEL_WRITE_AREA));
374 
375 		// update any other page directories, if it maps kernel space
376 		if (index >= FIRST_KERNEL_PGDIR_ENT
377 			&& index < (FIRST_KERNEL_PGDIR_ENT + NUM_KERNEL_PGDIR_ENTS))
378 			_update_all_pgdirs(index, pd[index]);
379 
380 		map->map_count++;
381 	}
382 
383 	// now, fill in the pentry
384 	do {
385 		err = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
386 				(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
387 	} while (err < 0);
388 	index = VADDR_TO_PTENT(va);
389 
390 	put_page_table_entry_in_pgtable(&pt[index], pa, attributes,
391 		IS_KERNEL_MAP(map));
392 
393 	put_physical_page_tmap((addr_t)pt);
394 
395 	if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
396 		map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = va;
397 
398 	map->arch_data->num_invalidate_pages++;
399 
400 	map->map_count++;
401 
402 	return 0;
403 }
404 
405 
406 static status_t
407 unmap_tmap(vm_translation_map *map, addr_t start, addr_t end)
408 {
409 	page_table_entry *pt;
410 	page_directory_entry *pd = map->arch_data->pgdir_virt;
411 	status_t status;
412 	int index;
413 
414 	start = ROUNDOWN(start, B_PAGE_SIZE);
415 	end = ROUNDUP(end, B_PAGE_SIZE);
416 
417 	TRACE(("unmap_tmap: asked to free pages 0x%lx to 0x%lx\n", start, end));
418 
419 restart:
420 	if (start >= end)
421 		return B_OK;
422 
423 	index = VADDR_TO_PDENT(start);
424 	if (pd[index].present == 0) {
425 		// no pagetable here, move the start up to access the next page table
426 		start = ROUNDUP(start + 1, B_PAGE_SIZE);
427 		goto restart;
428 	}
429 
430 	do {
431 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
432 			(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
433 	} while (status < B_OK);
434 
435 	for (index = VADDR_TO_PTENT(start); (index < 1024) && (start < end);
436 			index++, start += B_PAGE_SIZE) {
437 		if (pt[index].present == 0) {
438 			// page mapping not valid
439 			continue;
440 		}
441 
442 		TRACE(("unmap_tmap: removing page 0x%lx\n", start));
443 
444 		pt[index].present = 0;
445 		map->map_count--;
446 
447 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
448 			map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = start;
449 
450 		map->arch_data->num_invalidate_pages++;
451 	}
452 
453 	put_physical_page_tmap((addr_t)pt);
454 
455 	goto restart;
456 }
457 
458 
459 static status_t
460 query_tmap_interrupt(vm_translation_map *map, addr_t va, addr_t *_physical,
461 	uint32 *_flags)
462 {
463 	page_directory_entry *pd = map->arch_data->pgdir_virt;
464 	page_table_entry *pt;
465 	addr_t physicalPageTable;
466 	int32 cpu = smp_get_current_cpu();
467 	int32 index;
468 
469 	*_physical = 0;
470 
471 	index = VADDR_TO_PDENT(va);
472 	if (pd[index].present == 0) {
473 		// no pagetable here
474 		return B_ERROR;
475 	}
476 
477 	// map page table entry using our per CPU mapping page
478 
479 	physicalPageTable = ADDR_REVERSE_SHIFT(pd[index].addr);
480 	pt = (page_table_entry *)(sQueryPages + cpu * B_PAGE_SIZE);
481 	index = VADDR_TO_PDENT((addr_t)pt);
482 	if (pd[index].present == 0) {
483 		// no page table here
484 		return B_ERROR;
485 	}
486 
487 	index = VADDR_TO_PTENT((addr_t)pt);
488 	put_page_table_entry_in_pgtable(&sQueryPageTable[index], physicalPageTable,
489 		B_KERNEL_READ_AREA, false);
490 	invalidate_TLB(pt);
491 
492 	index = VADDR_TO_PTENT(va);
493 	*_physical = ADDR_REVERSE_SHIFT(pt[index].addr);
494 
495 	*_flags |= ((pt[index].rw ? B_KERNEL_WRITE_AREA : 0) | B_KERNEL_READ_AREA)
496 		| (pt[index].dirty ? PAGE_MODIFIED : 0)
497 		| (pt[index].accessed ? PAGE_ACCESSED : 0)
498 		| (pt[index].present ? PAGE_PRESENT : 0);
499 
500 	return B_OK;
501 }
502 
503 
504 static status_t
505 query_tmap(vm_translation_map *map, addr_t va, addr_t *_physical, uint32 *_flags)
506 {
507 	page_table_entry *pt;
508 	page_directory_entry *pd = map->arch_data->pgdir_virt;
509 	status_t status;
510 	int32 index;
511 
512 	// default the flags to not present
513 	*_flags = 0;
514 	*_physical = 0;
515 
516 	index = VADDR_TO_PDENT(va);
517 	if (pd[index].present == 0) {
518 		// no pagetable here
519 		return B_NO_ERROR;
520 	}
521 
522 	do {
523 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
524 			(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
525 	} while (status < B_OK);
526 	index = VADDR_TO_PTENT(va);
527 
528 	*_physical = ADDR_REVERSE_SHIFT(pt[index].addr);
529 
530 	// read in the page state flags
531 	if (pt[index].user)
532 		*_flags |= (pt[index].rw ? B_WRITE_AREA : 0) | B_READ_AREA;
533 
534 	*_flags |= ((pt[index].rw ? B_KERNEL_WRITE_AREA : 0) | B_KERNEL_READ_AREA)
535 		| (pt[index].dirty ? PAGE_MODIFIED : 0)
536 		| (pt[index].accessed ? PAGE_ACCESSED : 0)
537 		| (pt[index].present ? PAGE_PRESENT : 0);
538 
539 	put_physical_page_tmap((addr_t)pt);
540 
541 	TRACE(("query_tmap: returning pa 0x%lx for va 0x%lx\n", *_physical, va));
542 
543 	return B_OK;
544 }
545 
546 
547 static addr_t
548 get_mapped_size_tmap(vm_translation_map *map)
549 {
550 	return map->map_count;
551 }
552 
553 
554 static status_t
555 protect_tmap(vm_translation_map *map, addr_t start, addr_t end, uint32 attributes)
556 {
557 	page_table_entry *pt;
558 	page_directory_entry *pd = map->arch_data->pgdir_virt;
559 	status_t status;
560 	int index;
561 
562 	start = ROUNDOWN(start, B_PAGE_SIZE);
563 	end = ROUNDUP(end, B_PAGE_SIZE);
564 
565 	TRACE(("protect_tmap: pages 0x%lx to 0x%lx, attributes %lx\n", start, end, attributes));
566 
567 restart:
568 	if (start >= end)
569 		return B_OK;
570 
571 	index = VADDR_TO_PDENT(start);
572 	if (pd[index].present == 0) {
573 		// no pagetable here, move the start up to access the next page table
574 		start = ROUNDUP(start + 1, B_PAGE_SIZE);
575 		goto restart;
576 	}
577 
578 	do {
579 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
580 				(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
581 	} while (status < B_OK);
582 
583 	for (index = VADDR_TO_PTENT(start); index < 1024 && start < end; index++, start += B_PAGE_SIZE) {
584 		if (pt[index].present == 0) {
585 			// page mapping not valid
586 			continue;
587 		}
588 
589 		TRACE(("protect_tmap: protect page 0x%lx\n", start));
590 
591 		pt[index].user = (attributes & B_USER_PROTECTION) != 0;
592 		if ((attributes & B_USER_PROTECTION) != 0)
593 			pt[index].rw = (attributes & B_WRITE_AREA) != 0;
594 		else
595 			pt[index].rw = (attributes & B_KERNEL_WRITE_AREA) != 0;
596 
597 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
598 			map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = start;
599 
600 		map->arch_data->num_invalidate_pages++;
601 	}
602 
603 	put_physical_page_tmap((addr_t)pt);
604 
605 	goto restart;
606 }
607 
608 
609 static status_t
610 clear_flags_tmap(vm_translation_map *map, addr_t va, uint32 flags)
611 {
612 	page_table_entry *pt;
613 	page_directory_entry *pd = map->arch_data->pgdir_virt;
614 	status_t status;
615 	int index;
616 	int tlb_flush = false;
617 
618 	index = VADDR_TO_PDENT(va);
619 	if (pd[index].present == 0) {
620 		// no pagetable here
621 		return B_OK;
622 	}
623 
624 	do {
625 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
626 			(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
627 	} while (status < B_OK);
628 	index = VADDR_TO_PTENT(va);
629 
630 	// clear out the flags we've been requested to clear
631 	if (flags & PAGE_MODIFIED) {
632 		pt[index].dirty = 0;
633 		tlb_flush = true;
634 	}
635 	if (flags & PAGE_ACCESSED) {
636 		pt[index].accessed = 0;
637 		tlb_flush = true;
638 	}
639 
640 	put_physical_page_tmap((addr_t)pt);
641 
642 	if (tlb_flush) {
643 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
644 			map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = va;
645 
646 		map->arch_data->num_invalidate_pages++;
647 	}
648 
649 	return B_OK;
650 }
651 
652 
653 static void
654 flush_tmap(vm_translation_map *map)
655 {
656 	cpu_status state;
657 
658 	if (map->arch_data->num_invalidate_pages <= 0)
659 		return;
660 
661 	state = disable_interrupts();
662 
663 	if (map->arch_data->num_invalidate_pages > PAGE_INVALIDATE_CACHE_SIZE) {
664 		// invalidate all pages
665 		TRACE(("flush_tmap: %d pages to invalidate, invalidate all\n",
666 			map->arch_data->num_invalidate_pages));
667 
668 		if (IS_KERNEL_MAP(map)) {
669 			arch_cpu_global_TLB_invalidate();
670 			smp_send_broadcast_ici(SMP_MSG_GLOBAL_INVALIDATE_PAGES, 0, 0, 0, NULL,
671 				SMP_MSG_FLAG_SYNC);
672 		} else {
673 			arch_cpu_user_TLB_invalidate();
674 			smp_send_broadcast_ici(SMP_MSG_USER_INVALIDATE_PAGES, 0, 0, 0, NULL,
675 				SMP_MSG_FLAG_SYNC);
676 		}
677 	} else {
678 		TRACE(("flush_tmap: %d pages to invalidate, invalidate list\n",
679 			map->arch_data->num_invalidate_pages));
680 
681 		arch_cpu_invalidate_TLB_list(map->arch_data->pages_to_invalidate,
682 			map->arch_data->num_invalidate_pages);
683 		smp_send_broadcast_ici(SMP_MSG_INVALIDATE_PAGE_LIST,
684 			(uint32)map->arch_data->pages_to_invalidate,
685 			map->arch_data->num_invalidate_pages, 0, NULL,
686 			SMP_MSG_FLAG_SYNC);
687 	}
688 	map->arch_data->num_invalidate_pages = 0;
689 
690 	restore_interrupts(state);
691 }
692 
693 
694 static status_t
695 map_iospace_chunk(addr_t va, addr_t pa)
696 {
697 	int i;
698 	page_table_entry *pt;
699 	addr_t ppn;
700 	int state;
701 
702 	pa &= ~(B_PAGE_SIZE - 1); // make sure it's page aligned
703 	va &= ~(B_PAGE_SIZE - 1); // make sure it's page aligned
704 	if (va < sIOSpaceBase || va >= (sIOSpaceBase + IOSPACE_SIZE))
705 		panic("map_iospace_chunk: passed invalid va 0x%lx\n", va);
706 
707 	ppn = ADDR_SHIFT(pa);
708 	pt = &iospace_pgtables[(va - sIOSpaceBase) / B_PAGE_SIZE];
709 	for (i = 0; i < 1024; i++) {
710 		init_page_table_entry(&pt[i]);
711 		pt[i].addr = ppn + i;
712 		pt[i].user = 0;
713 		pt[i].rw = 1;
714 		pt[i].present = 1;
715 		pt[i].global = 1;
716 	}
717 
718 	state = disable_interrupts();
719 	arch_cpu_invalidate_TLB_range(va, va + (IOSPACE_CHUNK_SIZE - B_PAGE_SIZE));
720 	smp_send_broadcast_ici(SMP_MSG_INVALIDATE_PAGE_RANGE,
721 		va, va + (IOSPACE_CHUNK_SIZE - B_PAGE_SIZE), 0,
722 		NULL, SMP_MSG_FLAG_SYNC);
723 	restore_interrupts(state);
724 
725 	return B_OK;
726 }
727 
728 
729 static status_t
730 get_physical_page_tmap(addr_t pa, addr_t *va, uint32 flags)
731 {
732 	return generic_get_physical_page(pa, va, flags);
733 }
734 
735 
736 static status_t
737 put_physical_page_tmap(addr_t va)
738 {
739 	return generic_put_physical_page(va);
740 }
741 
742 
743 static vm_translation_map_ops tmap_ops = {
744 	destroy_tmap,
745 	lock_tmap,
746 	unlock_tmap,
747 	map_max_pages_need,
748 	map_tmap,
749 	unmap_tmap,
750 	query_tmap,
751 	query_tmap_interrupt,
752 	get_mapped_size_tmap,
753 	protect_tmap,
754 	clear_flags_tmap,
755 	flush_tmap,
756 	get_physical_page_tmap,
757 	put_physical_page_tmap
758 };
759 
760 
761 //	#pragma mark -
762 //	VM API
763 
764 
765 status_t
766 arch_vm_translation_map_init_map(vm_translation_map *map, bool kernel)
767 {
768 	if (map == NULL)
769 		return B_BAD_VALUE;
770 
771 	TRACE(("vm_translation_map_create\n"));
772 
773 	// initialize the new object
774 	map->ops = &tmap_ops;
775 	map->map_count = 0;
776 
777 	if (!kernel) {
778 		// During the boot process, there are no semaphores available at this
779 		// point, so we only try to create the translation map lock if we're
780 		// initialize a user translation map.
781 		// vm_translation_map_init_kernel_map_post_sem() is used to complete
782 		// the kernel translation map.
783 		if (recursive_lock_init(&map->lock, "translation map") < B_OK)
784 			return map->lock.sem;
785 	}
786 
787 	map->arch_data = (vm_translation_map_arch_info *)malloc(sizeof(vm_translation_map_arch_info));
788 	if (map == NULL) {
789 		recursive_lock_destroy(&map->lock);
790 		return B_NO_MEMORY;
791 	}
792 
793 	map->arch_data->num_invalidate_pages = 0;
794 
795 	if (!kernel) {
796 		// user
797 		// allocate a pgdir
798 		map->arch_data->pgdir_virt = (page_directory_entry *)memalign(
799 			B_PAGE_SIZE, B_PAGE_SIZE);
800 		if (map->arch_data->pgdir_virt == NULL) {
801 			free(map->arch_data);
802 			recursive_lock_destroy(&map->lock);
803 			return B_NO_MEMORY;
804 		}
805 		vm_get_page_mapping(vm_kernel_address_space_id(),
806 			(addr_t)map->arch_data->pgdir_virt, (addr_t *)&map->arch_data->pgdir_phys);
807 	} else {
808 		// kernel
809 		// we already know the kernel pgdir mapping
810 		map->arch_data->pgdir_virt = sKernelVirtualPageDirectory;
811 		map->arch_data->pgdir_phys = sKernelPhysicalPageDirectory;
812 	}
813 
814 	// zero out the bottom portion of the new pgdir
815 	memset(map->arch_data->pgdir_virt + FIRST_USER_PGDIR_ENT, 0,
816 		NUM_USER_PGDIR_ENTS * sizeof(page_directory_entry));
817 
818 	// insert this new map into the map list
819 	{
820 		int state = disable_interrupts();
821 		acquire_spinlock(&tmap_list_lock);
822 
823 		// copy the top portion of the pgdir from the current one
824 		memcpy(map->arch_data->pgdir_virt + FIRST_KERNEL_PGDIR_ENT,
825 			sKernelVirtualPageDirectory + FIRST_KERNEL_PGDIR_ENT,
826 			NUM_KERNEL_PGDIR_ENTS * sizeof(page_directory_entry));
827 
828 		map->next = tmap_list;
829 		tmap_list = map;
830 
831 		release_spinlock(&tmap_list_lock);
832 		restore_interrupts(state);
833 	}
834 
835 	return B_OK;
836 }
837 
838 
839 status_t
840 arch_vm_translation_map_init_kernel_map_post_sem(vm_translation_map *map)
841 {
842 	if (recursive_lock_init(&map->lock, "translation map") < B_OK)
843 		return map->lock.sem;
844 
845 	return B_OK;
846 }
847 
848 
849 status_t
850 arch_vm_translation_map_init(kernel_args *args)
851 {
852 	status_t error;
853 
854 	TRACE(("vm_translation_map_init: entry\n"));
855 
856 	// page hole set up in stage2
857 	page_hole = (page_table_entry *)args->arch_args.page_hole;
858 	// calculate where the pgdir would be
859 	page_hole_pgdir = (page_directory_entry *)(((unsigned int)args->arch_args.page_hole) + (B_PAGE_SIZE * 1024 - B_PAGE_SIZE));
860 	// clear out the bottom 2 GB, unmap everything
861 	memset(page_hole_pgdir + FIRST_USER_PGDIR_ENT, 0, sizeof(page_directory_entry) * NUM_USER_PGDIR_ENTS);
862 
863 	sKernelPhysicalPageDirectory = (page_directory_entry *)args->arch_args.phys_pgdir;
864 	sKernelVirtualPageDirectory = (page_directory_entry *)args->arch_args.vir_pgdir;
865 
866 	tmap_list_lock = 0;
867 	tmap_list = NULL;
868 
869 	// allocate some space to hold physical page mapping info
870 	iospace_pgtables = (page_table_entry *)vm_allocate_early(args,
871 		B_PAGE_SIZE * (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)), ~0L,
872 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
873 
874 	TRACE(("iospace_pgtables %p\n", iospace_pgtables));
875 
876 	// init physical page mapper
877 	error = generic_vm_physical_page_mapper_init(args, map_iospace_chunk,
878 		&sIOSpaceBase, IOSPACE_SIZE, IOSPACE_CHUNK_SIZE);
879 	if (error != B_OK)
880 		return error;
881 
882 	// initialize our data structures
883 	memset(iospace_pgtables, 0, B_PAGE_SIZE * (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)));
884 
885 	TRACE(("mapping iospace_pgtables\n"));
886 
887 	// put the array of pgtables directly into the kernel pagedir
888 	// these will be wired and kept mapped into virtual space to be easy to get to
889 	{
890 		addr_t phys_pgtable;
891 		addr_t virt_pgtable;
892 		page_directory_entry *e;
893 		int i;
894 
895 		virt_pgtable = (addr_t)iospace_pgtables;
896 		for (i = 0; i < (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)); i++, virt_pgtable += B_PAGE_SIZE) {
897 			early_query(virt_pgtable, &phys_pgtable);
898 			e = &page_hole_pgdir[(sIOSpaceBase / (B_PAGE_SIZE * 1024)) + i];
899 			put_pgtable_in_pgdir(e, phys_pgtable, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
900 		}
901 	}
902 
903 	// enable global page feature if available
904 	if (x86_check_feature(IA32_FEATURE_PGE, FEATURE_COMMON)) {
905 		// this prevents kernel pages from being flushed from TLB on context-switch
906 		x86_write_cr4(x86_read_cr4() | IA32_CR4_GLOBAL_PAGES);
907 	}
908 
909 	TRACE(("vm_translation_map_init: done\n"));
910 
911 	return B_OK;
912 }
913 
914 
915 status_t
916 arch_vm_translation_map_init_post_sem(kernel_args *args)
917 {
918 	return generic_vm_physical_page_mapper_init_post_sem(args);
919 }
920 
921 
922 status_t
923 arch_vm_translation_map_init_post_area(kernel_args *args)
924 {
925 	// now that the vm is initialized, create a region that represents
926 	// the page hole
927 	void *temp;
928 	status_t error;
929 	area_id area;
930 
931 	TRACE(("vm_translation_map_init_post_area: entry\n"));
932 
933 	// unmap the page hole hack we were using before
934 	sKernelVirtualPageDirectory[1023].present = 0;
935 	page_hole_pgdir = NULL;
936 	page_hole = NULL;
937 
938 	temp = (void *)sKernelVirtualPageDirectory;
939 	area = create_area("kernel_pgdir", &temp, B_EXACT_ADDRESS, B_PAGE_SIZE,
940 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
941 	if (area < B_OK)
942 		return area;
943 
944 	temp = (void *)iospace_pgtables;
945 	area = create_area("iospace_pgtables", &temp, B_EXACT_ADDRESS,
946 		B_PAGE_SIZE * (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)),
947 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
948 	if (area < B_OK)
949 		return area;
950 
951 	error = generic_vm_physical_page_mapper_init_post_area(args);
952 	if (error != B_OK)
953 		return error;
954 
955 	// this area is used for query_tmap_interrupt()
956 	// TODO: Note, this only works as long as all pages belong to the same
957 	//	page table, which is not yet enforced (or even tested)!
958 
959 	area = vm_create_null_area(vm_kernel_address_space_id(),
960 		"interrupt query pages", (void **)&sQueryPages, B_ANY_ADDRESS,
961 		B_PAGE_SIZE * (smp_get_num_cpus() + 1));
962 	if (area < B_OK)
963 		return area;
964 
965 	// map the last page of the query pages to the page table entry they're in
966 
967 	{
968 		page_table_entry *pageTableEntry;
969 		addr_t physicalPageTable;
970 		int32 index;
971 
972 		sQueryPageTable = (page_table_entry *)(sQueryPages + smp_get_num_cpus() * B_PAGE_SIZE);
973 
974 		index = VADDR_TO_PDENT((addr_t)sQueryPageTable);
975 		physicalPageTable = ADDR_REVERSE_SHIFT(sKernelVirtualPageDirectory[index].addr);
976 
977 		get_physical_page_tmap(physicalPageTable,
978 			(addr_t *)&pageTableEntry, PHYSICAL_PAGE_NO_WAIT);
979 
980 		index = VADDR_TO_PTENT((addr_t)sQueryPageTable);
981 		put_page_table_entry_in_pgtable(&pageTableEntry[index], physicalPageTable,
982 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false);
983 
984 		put_physical_page_tmap((addr_t)pageTableEntry);
985 		//invalidate_TLB(sQueryPageTable);
986 	}
987 
988 	TRACE(("vm_translation_map_init_post_area: done\n"));
989 	return B_OK;
990 }
991 
992 
993 // XXX horrible back door to map a page quickly regardless of translation map object, etc.
994 // used only during VM setup.
995 // uses a 'page hole' set up in the stage 2 bootloader. The page hole is created by pointing one of
996 // the pgdir entries back at itself, effectively mapping the contents of all of the 4MB of pagetables
997 // into a 4 MB region. It's only used here, and is later unmapped.
998 
999 status_t
1000 arch_vm_translation_map_early_map(kernel_args *args, addr_t va, addr_t pa,
1001 	uint8 attributes, addr_t (*get_free_page)(kernel_args *))
1002 {
1003 	int index;
1004 
1005 	TRACE(("early_tmap: entry pa 0x%lx va 0x%lx\n", pa, va));
1006 
1007 	// check to see if a page table exists for this range
1008 	index = VADDR_TO_PDENT(va);
1009 	if (page_hole_pgdir[index].present == 0) {
1010 		addr_t pgtable;
1011 		page_directory_entry *e;
1012 		// we need to allocate a pgtable
1013 		pgtable = get_free_page(args);
1014 		// pgtable is in pages, convert to physical address
1015 		pgtable *= B_PAGE_SIZE;
1016 
1017 		TRACE(("early_map: asked for free page for pgtable. 0x%lx\n", pgtable));
1018 
1019 		// put it in the pgdir
1020 		e = &page_hole_pgdir[index];
1021 		put_pgtable_in_pgdir(e, pgtable, attributes);
1022 
1023 		// zero it out in it's new mapping
1024 		memset((unsigned int *)((unsigned int)page_hole + (va / B_PAGE_SIZE / 1024) * B_PAGE_SIZE), 0, B_PAGE_SIZE);
1025 	}
1026 
1027 	// now, fill in the pentry
1028 	put_page_table_entry_in_pgtable(page_hole + va / B_PAGE_SIZE, pa, attributes,
1029 		IS_KERNEL_ADDRESS(va));
1030 
1031 	arch_cpu_invalidate_TLB_range(va, va);
1032 
1033 	return B_OK;
1034 }
1035 
1036