xref: /haiku/src/system/kernel/arch/x86/arch_vm_translation_map.cpp (revision 746cac055adc6ac3308c7bc2d29040fb95689cc9)
1 /*
2  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <vm_address_space.h>
11 #include <vm_page.h>
12 #include <vm_priv.h>
13 #include <smp.h>
14 #include <util/queue.h>
15 #include <heap.h>
16 #include <arch_system_info.h>
17 #include <arch/vm_translation_map.h>
18 
19 #include <string.h>
20 #include <stdlib.h>
21 
22 #include "generic_vm_physical_page_mapper.h"
23 
24 //#define TRACE_VM_TMAP
25 #ifdef TRACE_VM_TMAP
26 #	define TRACE(x) dprintf x
27 #else
28 #	define TRACE(x) ;
29 #endif
30 
31 // 256 MB of iospace
32 #define IOSPACE_SIZE (256*1024*1024)
33 // 4 MB chunks, to optimize for 4 MB pages
34 #define IOSPACE_CHUNK_SIZE (4*1024*1024)
35 
36 typedef struct page_table_entry {
37 	uint32	present:1;
38 	uint32	rw:1;
39 	uint32	user:1;
40 	uint32	write_through:1;
41 	uint32	cache_disabled:1;
42 	uint32	accessed:1;
43 	uint32	dirty:1;
44 	uint32	reserved:1;
45 	uint32	global:1;
46 	uint32	avail:3;
47 	uint32	addr:20;
48 } page_table_entry;
49 
50 typedef struct page_directory_entry {
51 	uint32	present:1;
52 	uint32	rw:1;
53 	uint32	user:1;
54 	uint32	write_through:1;
55 	uint32	cache_disabled:1;
56 	uint32	accessed:1;
57 	uint32	reserved:1;
58 	uint32	page_size:1;
59 	uint32	global:1;
60 	uint32	avail:3;
61 	uint32	addr:20;
62 } page_directory_entry;
63 
64 static page_table_entry *iospace_pgtables = NULL;
65 
66 #define PAGE_INVALIDATE_CACHE_SIZE 64
67 
68 // vm_translation object stuff
69 typedef struct vm_translation_map_arch_info {
70 	page_directory_entry *pgdir_virt;
71 	page_directory_entry *pgdir_phys;
72 	int num_invalidate_pages;
73 	addr_t pages_to_invalidate[PAGE_INVALIDATE_CACHE_SIZE];
74 } vm_translation_map_arch_info;
75 
76 
77 static page_table_entry *page_hole = NULL;
78 static page_directory_entry *page_hole_pgdir = NULL;
79 static page_directory_entry *sKernelPhysicalPageDirectory = NULL;
80 static page_directory_entry *sKernelVirtualPageDirectory = NULL;
81 static addr_t sQueryPages;
82 static page_table_entry *sQueryPageTable;
83 
84 static vm_translation_map *tmap_list;
85 static spinlock tmap_list_lock;
86 
87 static addr_t sIOSpaceBase;
88 
89 #define CHATTY_TMAP 0
90 
91 #define ADDR_SHIFT(x) ((x)>>12)
92 #define ADDR_REVERSE_SHIFT(x) ((x)<<12)
93 
94 #define VADDR_TO_PDENT(va) (((va) / B_PAGE_SIZE) / 1024)
95 #define VADDR_TO_PTENT(va) (((va) / B_PAGE_SIZE) % 1024)
96 
97 #define FIRST_USER_PGDIR_ENT    (VADDR_TO_PDENT(USER_BASE))
98 #define NUM_USER_PGDIR_ENTS     (VADDR_TO_PDENT(ROUNDUP(USER_SIZE, B_PAGE_SIZE * 1024)))
99 #define FIRST_KERNEL_PGDIR_ENT  (VADDR_TO_PDENT(KERNEL_BASE))
100 #define NUM_KERNEL_PGDIR_ENTS   (VADDR_TO_PDENT(KERNEL_SIZE))
101 #define IS_KERNEL_MAP(map)		(map->arch_data->pgdir_phys == sKernelPhysicalPageDirectory)
102 
103 static status_t early_query(addr_t va, addr_t *out_physical);
104 static status_t get_physical_page_tmap(addr_t pa, addr_t *va, uint32 flags);
105 static status_t put_physical_page_tmap(addr_t va);
106 
107 static void flush_tmap(vm_translation_map *map);
108 
109 
110 void *
111 i386_translation_map_get_pgdir(vm_translation_map *map)
112 {
113 	return map->arch_data->pgdir_phys;
114 }
115 
116 
117 static inline void
118 init_page_directory_entry(page_directory_entry *entry)
119 {
120 	*(uint32 *)entry = 0;
121 }
122 
123 
124 static inline void
125 update_page_directory_entry(page_directory_entry *entry, page_directory_entry *with)
126 {
127 	// update page directory entry atomically
128 	*(uint32 *)entry = *(uint32 *)with;
129 }
130 
131 
132 static inline void
133 init_page_table_entry(page_table_entry *entry)
134 {
135 	*(uint32 *)entry = 0;
136 }
137 
138 
139 static inline void
140 update_page_table_entry(page_table_entry *entry, page_table_entry *with)
141 {
142 	// update page table entry atomically
143 	*(uint32 *)entry = *(uint32 *)with;
144 }
145 
146 
147 static void
148 _update_all_pgdirs(int index, page_directory_entry e)
149 {
150 	vm_translation_map *entry;
151 	unsigned int state = disable_interrupts();
152 
153 	acquire_spinlock(&tmap_list_lock);
154 
155 	for(entry = tmap_list; entry != NULL; entry = entry->next)
156 		entry->arch_data->pgdir_virt[index] = e;
157 
158 	release_spinlock(&tmap_list_lock);
159 	restore_interrupts(state);
160 }
161 
162 
163 // XXX currently assumes this translation map is active
164 
165 static status_t
166 early_query(addr_t va, addr_t *_physicalAddress)
167 {
168 	page_table_entry *pentry;
169 
170 	if (page_hole_pgdir[VADDR_TO_PDENT(va)].present == 0) {
171 		// no pagetable here
172 		return B_ERROR;
173 	}
174 
175 	pentry = page_hole + va / B_PAGE_SIZE;
176 	if (pentry->present == 0) {
177 		// page mapping not valid
178 		return B_ERROR;
179 	}
180 
181 	*_physicalAddress = pentry->addr << 12;
182 	return B_OK;
183 }
184 
185 
186 /*!	Acquires the map's recursive lock, and resets the invalidate pages counter
187 	in case it's the first locking recursion.
188 */
189 static status_t
190 lock_tmap(vm_translation_map *map)
191 {
192 	TRACE(("lock_tmap: map %p\n", map));
193 
194 	recursive_lock_lock(&map->lock);
195 	if (recursive_lock_get_recursion(&map->lock) == 1) {
196 		// we were the first one to grab the lock
197 		TRACE(("clearing invalidated page count\n"));
198 		map->arch_data->num_invalidate_pages = 0;
199 	}
200 
201 	return B_OK;
202 }
203 
204 
205 /*!	Unlocks the map, and, if we'll actually losing the recursive lock,
206 	flush all pending changes of this map (ie. flush TLB caches as
207 	needed).
208 */
209 static status_t
210 unlock_tmap(vm_translation_map *map)
211 {
212 	TRACE(("unlock_tmap: map %p\n", map));
213 
214 	if (recursive_lock_get_recursion(&map->lock) == 1) {
215 		// we're about to release it for the last time
216 		flush_tmap(map);
217 	}
218 
219 	recursive_lock_unlock(&map->lock);
220 	return B_OK;
221 }
222 
223 
224 static void
225 destroy_tmap(vm_translation_map *map)
226 {
227 	int state;
228 	vm_translation_map *entry;
229 	vm_translation_map *last = NULL;
230 	unsigned int i;
231 
232 	if (map == NULL)
233 		return;
234 
235 	// remove it from the tmap list
236 	state = disable_interrupts();
237 	acquire_spinlock(&tmap_list_lock);
238 
239 	entry = tmap_list;
240 	while (entry != NULL) {
241 		if (entry == map) {
242 			if (last != NULL)
243 				last->next = entry->next;
244 			else
245 				tmap_list = entry->next;
246 
247 			break;
248 		}
249 		last = entry;
250 		entry = entry->next;
251 	}
252 
253 	release_spinlock(&tmap_list_lock);
254 	restore_interrupts(state);
255 
256 	if (map->arch_data->pgdir_virt != NULL) {
257 		// cycle through and free all of the user space pgtables
258 		for (i = VADDR_TO_PDENT(USER_BASE); i <= VADDR_TO_PDENT(USER_BASE + (USER_SIZE - 1)); i++) {
259 			addr_t pgtable_addr;
260 			vm_page *page;
261 
262 			if (map->arch_data->pgdir_virt[i].present == 1) {
263 				pgtable_addr = map->arch_data->pgdir_virt[i].addr;
264 				page = vm_lookup_page(pgtable_addr);
265 				if (!page)
266 					panic("destroy_tmap: didn't find pgtable page\n");
267 				vm_page_set_state(page, PAGE_STATE_FREE);
268 			}
269 		}
270 		free(map->arch_data->pgdir_virt);
271 	}
272 
273 	free(map->arch_data);
274 	recursive_lock_destroy(&map->lock);
275 }
276 
277 
278 static void
279 put_pgtable_in_pgdir(page_directory_entry *entry,
280 	addr_t pgtable_phys, uint32 attributes)
281 {
282 	page_directory_entry table;
283 	// put it in the pgdir
284 	init_page_directory_entry(&table);
285 	table.addr = ADDR_SHIFT(pgtable_phys);
286 
287 	// ToDo: we ignore the attributes of the page table - for compatibility
288 	//	with BeOS we allow having user accessible areas in the kernel address
289 	//	space. This is currently being used by some drivers, mainly for the
290 	//	frame buffer. Our current real time data implementation makes use of
291 	//	this fact, too.
292 	//	We might want to get rid of this possibility one day, especially if
293 	//	we intend to port it to a platform that does not support this.
294 	table.user = 1;
295 	table.rw = 1;
296 	table.present = 1;
297 	update_page_directory_entry(entry, &table);
298 }
299 
300 
301 static void
302 put_page_table_entry_in_pgtable(page_table_entry *entry,
303 	addr_t physicalAddress, uint32 attributes, bool globalPage)
304 {
305 	page_table_entry page;
306 	init_page_table_entry(&page);
307 
308 	page.addr = ADDR_SHIFT(physicalAddress);
309 
310 	// if the page is user accessible, it's automatically
311 	// accessible in kernel space, too (but with the same
312 	// protection)
313 	page.user = (attributes & B_USER_PROTECTION) != 0;
314 	if (page.user)
315 		page.rw = (attributes & B_WRITE_AREA) != 0;
316 	else
317 		page.rw = (attributes & B_KERNEL_WRITE_AREA) != 0;
318 	page.present = 1;
319 
320 	if (globalPage)
321 		page.global = 1;
322 
323 	// put it in the page table
324 	update_page_table_entry(entry, &page);
325 }
326 
327 
328 static size_t
329 map_max_pages_need(vm_translation_map */*map*/, addr_t start, addr_t end)
330 {
331 	// If start == 0, the actual base address is not yet known to the caller and
332 	// we shall assume the worst case.
333 	if (start == 0) {
334 		start = 1023 * B_PAGE_SIZE;
335 		end += 1023 * B_PAGE_SIZE;
336 	}
337 	return VADDR_TO_PDENT(end) + 1 - VADDR_TO_PDENT(start);
338 }
339 
340 
341 static status_t
342 map_tmap(vm_translation_map *map, addr_t va, addr_t pa, uint32 attributes)
343 {
344 	page_directory_entry *pd;
345 	page_table_entry *pt;
346 	unsigned int index;
347 	int err;
348 
349 	TRACE(("map_tmap: entry pa 0x%lx va 0x%lx\n", pa, va));
350 
351 /*
352 	dprintf("pgdir at 0x%x\n", pgdir);
353 	dprintf("index is %d\n", va / B_PAGE_SIZE / 1024);
354 	dprintf("final at 0x%x\n", &pgdir[va / B_PAGE_SIZE / 1024]);
355 	dprintf("value is 0x%x\n", *(int *)&pgdir[va / B_PAGE_SIZE / 1024]);
356 	dprintf("present bit is %d\n", pgdir[va / B_PAGE_SIZE / 1024].present);
357 	dprintf("addr is %d\n", pgdir[va / B_PAGE_SIZE / 1024].addr);
358 */
359 	pd = map->arch_data->pgdir_virt;
360 
361 	// check to see if a page table exists for this range
362 	index = VADDR_TO_PDENT(va);
363 	if (pd[index].present == 0) {
364 		addr_t pgtable;
365 		vm_page *page;
366 
367 		// we need to allocate a pgtable
368 		page = vm_page_allocate_page(PAGE_STATE_CLEAR, true);
369 
370 		// mark the page WIRED
371 		vm_page_set_state(page, PAGE_STATE_WIRED);
372 
373 		pgtable = page->physical_page_number * B_PAGE_SIZE;
374 
375 		TRACE(("map_tmap: asked for free page for pgtable. 0x%lx\n", pgtable));
376 
377 		// put it in the pgdir
378 		put_pgtable_in_pgdir(&pd[index], pgtable, attributes
379 			| (attributes & B_USER_PROTECTION ? B_WRITE_AREA : B_KERNEL_WRITE_AREA));
380 
381 		// update any other page directories, if it maps kernel space
382 		if (index >= FIRST_KERNEL_PGDIR_ENT
383 			&& index < (FIRST_KERNEL_PGDIR_ENT + NUM_KERNEL_PGDIR_ENTS))
384 			_update_all_pgdirs(index, pd[index]);
385 
386 		map->map_count++;
387 	}
388 
389 	// now, fill in the pentry
390 	do {
391 		err = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
392 				(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
393 	} while (err < 0);
394 	index = VADDR_TO_PTENT(va);
395 
396 	put_page_table_entry_in_pgtable(&pt[index], pa, attributes,
397 		IS_KERNEL_MAP(map));
398 
399 	put_physical_page_tmap((addr_t)pt);
400 
401 	if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
402 		map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = va;
403 
404 	map->arch_data->num_invalidate_pages++;
405 
406 	map->map_count++;
407 
408 	return 0;
409 }
410 
411 
412 static status_t
413 unmap_tmap(vm_translation_map *map, addr_t start, addr_t end)
414 {
415 	page_table_entry *pt;
416 	page_directory_entry *pd = map->arch_data->pgdir_virt;
417 	status_t status;
418 	int index;
419 
420 	start = ROUNDOWN(start, B_PAGE_SIZE);
421 	end = ROUNDUP(end, B_PAGE_SIZE);
422 
423 	TRACE(("unmap_tmap: asked to free pages 0x%lx to 0x%lx\n", start, end));
424 
425 restart:
426 	if (start >= end)
427 		return B_OK;
428 
429 	index = VADDR_TO_PDENT(start);
430 	if (pd[index].present == 0) {
431 		// no pagetable here, move the start up to access the next page table
432 		start = ROUNDUP(start + 1, B_PAGE_SIZE);
433 		goto restart;
434 	}
435 
436 	do {
437 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
438 			(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
439 	} while (status < B_OK);
440 
441 	for (index = VADDR_TO_PTENT(start); (index < 1024) && (start < end);
442 			index++, start += B_PAGE_SIZE) {
443 		if (pt[index].present == 0) {
444 			// page mapping not valid
445 			continue;
446 		}
447 
448 		TRACE(("unmap_tmap: removing page 0x%lx\n", start));
449 
450 		pt[index].present = 0;
451 		map->map_count--;
452 
453 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
454 			map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = start;
455 
456 		map->arch_data->num_invalidate_pages++;
457 	}
458 
459 	put_physical_page_tmap((addr_t)pt);
460 
461 	goto restart;
462 }
463 
464 
465 static status_t
466 query_tmap_interrupt(vm_translation_map *map, addr_t va, addr_t *_physical,
467 	uint32 *_flags)
468 {
469 	page_directory_entry *pd = map->arch_data->pgdir_virt;
470 	page_table_entry *pt;
471 	addr_t physicalPageTable;
472 	int32 cpu = smp_get_current_cpu();
473 	int32 index;
474 
475 	*_physical = 0;
476 
477 	index = VADDR_TO_PDENT(va);
478 	if (pd[index].present == 0) {
479 		// no pagetable here
480 		return B_ERROR;
481 	}
482 
483 	// map page table entry using our per CPU mapping page
484 
485 	physicalPageTable = ADDR_REVERSE_SHIFT(pd[index].addr);
486 	pt = (page_table_entry *)(sQueryPages + cpu * B_PAGE_SIZE);
487 	index = VADDR_TO_PDENT((addr_t)pt);
488 	if (pd[index].present == 0) {
489 		// no page table here
490 		return B_ERROR;
491 	}
492 
493 	index = VADDR_TO_PTENT((addr_t)pt);
494 	put_page_table_entry_in_pgtable(&sQueryPageTable[index], physicalPageTable,
495 		B_KERNEL_READ_AREA, false);
496 	invalidate_TLB(pt);
497 
498 	index = VADDR_TO_PTENT(va);
499 	*_physical = ADDR_REVERSE_SHIFT(pt[index].addr);
500 
501 	*_flags |= ((pt[index].rw ? B_KERNEL_WRITE_AREA : 0) | B_KERNEL_READ_AREA)
502 		| (pt[index].dirty ? PAGE_MODIFIED : 0)
503 		| (pt[index].accessed ? PAGE_ACCESSED : 0)
504 		| (pt[index].present ? PAGE_PRESENT : 0);
505 
506 	return B_OK;
507 }
508 
509 
510 static status_t
511 query_tmap(vm_translation_map *map, addr_t va, addr_t *_physical, uint32 *_flags)
512 {
513 	page_table_entry *pt;
514 	page_directory_entry *pd = map->arch_data->pgdir_virt;
515 	status_t status;
516 	int32 index;
517 
518 	// default the flags to not present
519 	*_flags = 0;
520 	*_physical = 0;
521 
522 	index = VADDR_TO_PDENT(va);
523 	if (pd[index].present == 0) {
524 		// no pagetable here
525 		return B_NO_ERROR;
526 	}
527 
528 	do {
529 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
530 			(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
531 	} while (status < B_OK);
532 	index = VADDR_TO_PTENT(va);
533 
534 	*_physical = ADDR_REVERSE_SHIFT(pt[index].addr);
535 
536 	// read in the page state flags
537 	if (pt[index].user)
538 		*_flags |= (pt[index].rw ? B_WRITE_AREA : 0) | B_READ_AREA;
539 
540 	*_flags |= ((pt[index].rw ? B_KERNEL_WRITE_AREA : 0) | B_KERNEL_READ_AREA)
541 		| (pt[index].dirty ? PAGE_MODIFIED : 0)
542 		| (pt[index].accessed ? PAGE_ACCESSED : 0)
543 		| (pt[index].present ? PAGE_PRESENT : 0);
544 
545 	put_physical_page_tmap((addr_t)pt);
546 
547 	TRACE(("query_tmap: returning pa 0x%lx for va 0x%lx\n", *_physical, va));
548 
549 	return B_OK;
550 }
551 
552 
553 static addr_t
554 get_mapped_size_tmap(vm_translation_map *map)
555 {
556 	return map->map_count;
557 }
558 
559 
560 static status_t
561 protect_tmap(vm_translation_map *map, addr_t start, addr_t end, uint32 attributes)
562 {
563 	page_table_entry *pt;
564 	page_directory_entry *pd = map->arch_data->pgdir_virt;
565 	status_t status;
566 	int index;
567 
568 	start = ROUNDOWN(start, B_PAGE_SIZE);
569 	end = ROUNDUP(end, B_PAGE_SIZE);
570 
571 	TRACE(("protect_tmap: pages 0x%lx to 0x%lx, attributes %lx\n", start, end, attributes));
572 
573 restart:
574 	if (start >= end)
575 		return B_OK;
576 
577 	index = VADDR_TO_PDENT(start);
578 	if (pd[index].present == 0) {
579 		// no pagetable here, move the start up to access the next page table
580 		start = ROUNDUP(start + 1, B_PAGE_SIZE);
581 		goto restart;
582 	}
583 
584 	do {
585 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
586 				(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
587 	} while (status < B_OK);
588 
589 	for (index = VADDR_TO_PTENT(start); index < 1024 && start < end; index++, start += B_PAGE_SIZE) {
590 		if (pt[index].present == 0) {
591 			// page mapping not valid
592 			continue;
593 		}
594 
595 		TRACE(("protect_tmap: protect page 0x%lx\n", start));
596 
597 		pt[index].user = (attributes & B_USER_PROTECTION) != 0;
598 		if ((attributes & B_USER_PROTECTION) != 0)
599 			pt[index].rw = (attributes & B_WRITE_AREA) != 0;
600 		else
601 			pt[index].rw = (attributes & B_KERNEL_WRITE_AREA) != 0;
602 
603 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
604 			map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = start;
605 
606 		map->arch_data->num_invalidate_pages++;
607 	}
608 
609 	put_physical_page_tmap((addr_t)pt);
610 
611 	goto restart;
612 }
613 
614 
615 static status_t
616 clear_flags_tmap(vm_translation_map *map, addr_t va, uint32 flags)
617 {
618 	page_table_entry *pt;
619 	page_directory_entry *pd = map->arch_data->pgdir_virt;
620 	status_t status;
621 	int index;
622 	int tlb_flush = false;
623 
624 	index = VADDR_TO_PDENT(va);
625 	if (pd[index].present == 0) {
626 		// no pagetable here
627 		return B_OK;
628 	}
629 
630 	do {
631 		status = get_physical_page_tmap(ADDR_REVERSE_SHIFT(pd[index].addr),
632 			(addr_t *)&pt, PHYSICAL_PAGE_NO_WAIT);
633 	} while (status < B_OK);
634 	index = VADDR_TO_PTENT(va);
635 
636 	// clear out the flags we've been requested to clear
637 	if (flags & PAGE_MODIFIED) {
638 		pt[index].dirty = 0;
639 		tlb_flush = true;
640 	}
641 	if (flags & PAGE_ACCESSED) {
642 		pt[index].accessed = 0;
643 		tlb_flush = true;
644 	}
645 
646 	put_physical_page_tmap((addr_t)pt);
647 
648 	if (tlb_flush) {
649 		if (map->arch_data->num_invalidate_pages < PAGE_INVALIDATE_CACHE_SIZE)
650 			map->arch_data->pages_to_invalidate[map->arch_data->num_invalidate_pages] = va;
651 
652 		map->arch_data->num_invalidate_pages++;
653 	}
654 
655 	return B_OK;
656 }
657 
658 
659 static void
660 flush_tmap(vm_translation_map *map)
661 {
662 	cpu_status state;
663 
664 	if (map->arch_data->num_invalidate_pages <= 0)
665 		return;
666 
667 	state = disable_interrupts();
668 
669 	if (map->arch_data->num_invalidate_pages > PAGE_INVALIDATE_CACHE_SIZE) {
670 		// invalidate all pages
671 		TRACE(("flush_tmap: %d pages to invalidate, invalidate all\n",
672 			map->arch_data->num_invalidate_pages));
673 
674 		if (IS_KERNEL_MAP(map)) {
675 			arch_cpu_global_TLB_invalidate();
676 			smp_send_broadcast_ici(SMP_MSG_GLOBAL_INVALIDATE_PAGES, 0, 0, 0, NULL,
677 				SMP_MSG_FLAG_SYNC);
678 		} else {
679 			arch_cpu_user_TLB_invalidate();
680 			smp_send_broadcast_ici(SMP_MSG_USER_INVALIDATE_PAGES, 0, 0, 0, NULL,
681 				SMP_MSG_FLAG_SYNC);
682 		}
683 	} else {
684 		TRACE(("flush_tmap: %d pages to invalidate, invalidate list\n",
685 			map->arch_data->num_invalidate_pages));
686 
687 		arch_cpu_invalidate_TLB_list(map->arch_data->pages_to_invalidate,
688 			map->arch_data->num_invalidate_pages);
689 		smp_send_broadcast_ici(SMP_MSG_INVALIDATE_PAGE_LIST,
690 			(uint32)map->arch_data->pages_to_invalidate,
691 			map->arch_data->num_invalidate_pages, 0, NULL,
692 			SMP_MSG_FLAG_SYNC);
693 	}
694 	map->arch_data->num_invalidate_pages = 0;
695 
696 	restore_interrupts(state);
697 }
698 
699 
700 static status_t
701 map_iospace_chunk(addr_t va, addr_t pa)
702 {
703 	int i;
704 	page_table_entry *pt;
705 	addr_t ppn;
706 	int state;
707 
708 	pa &= ~(B_PAGE_SIZE - 1); // make sure it's page aligned
709 	va &= ~(B_PAGE_SIZE - 1); // make sure it's page aligned
710 	if (va < sIOSpaceBase || va >= (sIOSpaceBase + IOSPACE_SIZE))
711 		panic("map_iospace_chunk: passed invalid va 0x%lx\n", va);
712 
713 	ppn = ADDR_SHIFT(pa);
714 	pt = &iospace_pgtables[(va - sIOSpaceBase) / B_PAGE_SIZE];
715 	for (i = 0; i < 1024; i++) {
716 		init_page_table_entry(&pt[i]);
717 		pt[i].addr = ppn + i;
718 		pt[i].user = 0;
719 		pt[i].rw = 1;
720 		pt[i].present = 1;
721 		pt[i].global = 1;
722 	}
723 
724 	state = disable_interrupts();
725 	arch_cpu_invalidate_TLB_range(va, va + (IOSPACE_CHUNK_SIZE - B_PAGE_SIZE));
726 	smp_send_broadcast_ici(SMP_MSG_INVALIDATE_PAGE_RANGE,
727 		va, va + (IOSPACE_CHUNK_SIZE - B_PAGE_SIZE), 0,
728 		NULL, SMP_MSG_FLAG_SYNC);
729 	restore_interrupts(state);
730 
731 	return B_OK;
732 }
733 
734 
735 static status_t
736 get_physical_page_tmap(addr_t pa, addr_t *va, uint32 flags)
737 {
738 	return generic_get_physical_page(pa, va, flags);
739 }
740 
741 
742 static status_t
743 put_physical_page_tmap(addr_t va)
744 {
745 	return generic_put_physical_page(va);
746 }
747 
748 
749 static vm_translation_map_ops tmap_ops = {
750 	destroy_tmap,
751 	lock_tmap,
752 	unlock_tmap,
753 	map_max_pages_need,
754 	map_tmap,
755 	unmap_tmap,
756 	query_tmap,
757 	query_tmap_interrupt,
758 	get_mapped_size_tmap,
759 	protect_tmap,
760 	clear_flags_tmap,
761 	flush_tmap,
762 	get_physical_page_tmap,
763 	put_physical_page_tmap
764 };
765 
766 
767 //	#pragma mark -
768 //	VM API
769 
770 
771 status_t
772 arch_vm_translation_map_init_map(vm_translation_map *map, bool kernel)
773 {
774 	if (map == NULL)
775 		return B_BAD_VALUE;
776 
777 	TRACE(("vm_translation_map_create\n"));
778 
779 	// initialize the new object
780 	map->ops = &tmap_ops;
781 	map->map_count = 0;
782 
783 	recursive_lock_init(&map->lock, "translation map");
784 
785 	map->arch_data = (vm_translation_map_arch_info *)malloc(sizeof(vm_translation_map_arch_info));
786 	if (map == NULL) {
787 		recursive_lock_destroy(&map->lock);
788 		return B_NO_MEMORY;
789 	}
790 
791 	map->arch_data->num_invalidate_pages = 0;
792 
793 	if (!kernel) {
794 		// user
795 		// allocate a pgdir
796 		map->arch_data->pgdir_virt = (page_directory_entry *)memalign(
797 			B_PAGE_SIZE, B_PAGE_SIZE);
798 		if (map->arch_data->pgdir_virt == NULL) {
799 			free(map->arch_data);
800 			recursive_lock_destroy(&map->lock);
801 			return B_NO_MEMORY;
802 		}
803 		vm_get_page_mapping(vm_kernel_address_space_id(),
804 			(addr_t)map->arch_data->pgdir_virt, (addr_t *)&map->arch_data->pgdir_phys);
805 	} else {
806 		// kernel
807 		// we already know the kernel pgdir mapping
808 		map->arch_data->pgdir_virt = sKernelVirtualPageDirectory;
809 		map->arch_data->pgdir_phys = sKernelPhysicalPageDirectory;
810 	}
811 
812 	// zero out the bottom portion of the new pgdir
813 	memset(map->arch_data->pgdir_virt + FIRST_USER_PGDIR_ENT, 0,
814 		NUM_USER_PGDIR_ENTS * sizeof(page_directory_entry));
815 
816 	// insert this new map into the map list
817 	{
818 		int state = disable_interrupts();
819 		acquire_spinlock(&tmap_list_lock);
820 
821 		// copy the top portion of the pgdir from the current one
822 		memcpy(map->arch_data->pgdir_virt + FIRST_KERNEL_PGDIR_ENT,
823 			sKernelVirtualPageDirectory + FIRST_KERNEL_PGDIR_ENT,
824 			NUM_KERNEL_PGDIR_ENTS * sizeof(page_directory_entry));
825 
826 		map->next = tmap_list;
827 		tmap_list = map;
828 
829 		release_spinlock(&tmap_list_lock);
830 		restore_interrupts(state);
831 	}
832 
833 	return B_OK;
834 }
835 
836 
837 status_t
838 arch_vm_translation_map_init_kernel_map_post_sem(vm_translation_map *map)
839 {
840 	return B_OK;
841 }
842 
843 
844 status_t
845 arch_vm_translation_map_init(kernel_args *args)
846 {
847 	status_t error;
848 
849 	TRACE(("vm_translation_map_init: entry\n"));
850 
851 	// page hole set up in stage2
852 	page_hole = (page_table_entry *)args->arch_args.page_hole;
853 	// calculate where the pgdir would be
854 	page_hole_pgdir = (page_directory_entry *)(((unsigned int)args->arch_args.page_hole) + (B_PAGE_SIZE * 1024 - B_PAGE_SIZE));
855 	// clear out the bottom 2 GB, unmap everything
856 	memset(page_hole_pgdir + FIRST_USER_PGDIR_ENT, 0, sizeof(page_directory_entry) * NUM_USER_PGDIR_ENTS);
857 
858 	sKernelPhysicalPageDirectory = (page_directory_entry *)args->arch_args.phys_pgdir;
859 	sKernelVirtualPageDirectory = (page_directory_entry *)args->arch_args.vir_pgdir;
860 
861 	B_INITIALIZE_SPINLOCK(&tmap_list_lock);
862 	tmap_list = NULL;
863 
864 	// allocate some space to hold physical page mapping info
865 	iospace_pgtables = (page_table_entry *)vm_allocate_early(args,
866 		B_PAGE_SIZE * (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)), ~0L,
867 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
868 
869 	TRACE(("iospace_pgtables %p\n", iospace_pgtables));
870 
871 	// init physical page mapper
872 	error = generic_vm_physical_page_mapper_init(args, map_iospace_chunk,
873 		&sIOSpaceBase, IOSPACE_SIZE, IOSPACE_CHUNK_SIZE);
874 	if (error != B_OK)
875 		return error;
876 
877 	// initialize our data structures
878 	memset(iospace_pgtables, 0, B_PAGE_SIZE * (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)));
879 
880 	TRACE(("mapping iospace_pgtables\n"));
881 
882 	// put the array of pgtables directly into the kernel pagedir
883 	// these will be wired and kept mapped into virtual space to be easy to get to
884 	{
885 		addr_t phys_pgtable;
886 		addr_t virt_pgtable;
887 		page_directory_entry *e;
888 		int i;
889 
890 		virt_pgtable = (addr_t)iospace_pgtables;
891 		for (i = 0; i < (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)); i++, virt_pgtable += B_PAGE_SIZE) {
892 			early_query(virt_pgtable, &phys_pgtable);
893 			e = &page_hole_pgdir[(sIOSpaceBase / (B_PAGE_SIZE * 1024)) + i];
894 			put_pgtable_in_pgdir(e, phys_pgtable, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
895 		}
896 	}
897 
898 	// enable global page feature if available
899 	if (x86_check_feature(IA32_FEATURE_PGE, FEATURE_COMMON)) {
900 		// this prevents kernel pages from being flushed from TLB on context-switch
901 		x86_write_cr4(x86_read_cr4() | IA32_CR4_GLOBAL_PAGES);
902 	}
903 
904 	TRACE(("vm_translation_map_init: done\n"));
905 
906 	return B_OK;
907 }
908 
909 
910 status_t
911 arch_vm_translation_map_init_post_sem(kernel_args *args)
912 {
913 	return generic_vm_physical_page_mapper_init_post_sem(args);
914 }
915 
916 
917 status_t
918 arch_vm_translation_map_init_post_area(kernel_args *args)
919 {
920 	// now that the vm is initialized, create a region that represents
921 	// the page hole
922 	void *temp;
923 	status_t error;
924 	area_id area;
925 
926 	TRACE(("vm_translation_map_init_post_area: entry\n"));
927 
928 	// unmap the page hole hack we were using before
929 	sKernelVirtualPageDirectory[1023].present = 0;
930 	page_hole_pgdir = NULL;
931 	page_hole = NULL;
932 
933 	temp = (void *)sKernelVirtualPageDirectory;
934 	area = create_area("kernel_pgdir", &temp, B_EXACT_ADDRESS, B_PAGE_SIZE,
935 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
936 	if (area < B_OK)
937 		return area;
938 
939 	temp = (void *)iospace_pgtables;
940 	area = create_area("iospace_pgtables", &temp, B_EXACT_ADDRESS,
941 		B_PAGE_SIZE * (IOSPACE_SIZE / (B_PAGE_SIZE * 1024)),
942 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
943 	if (area < B_OK)
944 		return area;
945 
946 	error = generic_vm_physical_page_mapper_init_post_area(args);
947 	if (error != B_OK)
948 		return error;
949 
950 	// this area is used for query_tmap_interrupt()
951 	// TODO: Note, this only works as long as all pages belong to the same
952 	//	page table, which is not yet enforced (or even tested)!
953 
954 	area = vm_create_null_area(vm_kernel_address_space_id(),
955 		"interrupt query pages", (void **)&sQueryPages, B_ANY_ADDRESS,
956 		B_PAGE_SIZE * (smp_get_num_cpus() + 1));
957 	if (area < B_OK)
958 		return area;
959 
960 	// map the last page of the query pages to the page table entry they're in
961 
962 	{
963 		page_table_entry *pageTableEntry;
964 		addr_t physicalPageTable;
965 		int32 index;
966 
967 		sQueryPageTable = (page_table_entry *)(sQueryPages + smp_get_num_cpus() * B_PAGE_SIZE);
968 
969 		index = VADDR_TO_PDENT((addr_t)sQueryPageTable);
970 		physicalPageTable = ADDR_REVERSE_SHIFT(sKernelVirtualPageDirectory[index].addr);
971 
972 		get_physical_page_tmap(physicalPageTable,
973 			(addr_t *)&pageTableEntry, PHYSICAL_PAGE_NO_WAIT);
974 
975 		index = VADDR_TO_PTENT((addr_t)sQueryPageTable);
976 		put_page_table_entry_in_pgtable(&pageTableEntry[index], physicalPageTable,
977 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false);
978 
979 		put_physical_page_tmap((addr_t)pageTableEntry);
980 		//invalidate_TLB(sQueryPageTable);
981 	}
982 
983 	TRACE(("vm_translation_map_init_post_area: done\n"));
984 	return B_OK;
985 }
986 
987 
988 // XXX horrible back door to map a page quickly regardless of translation map object, etc.
989 // used only during VM setup.
990 // uses a 'page hole' set up in the stage 2 bootloader. The page hole is created by pointing one of
991 // the pgdir entries back at itself, effectively mapping the contents of all of the 4MB of pagetables
992 // into a 4 MB region. It's only used here, and is later unmapped.
993 
994 status_t
995 arch_vm_translation_map_early_map(kernel_args *args, addr_t va, addr_t pa,
996 	uint8 attributes, addr_t (*get_free_page)(kernel_args *))
997 {
998 	int index;
999 
1000 	TRACE(("early_tmap: entry pa 0x%lx va 0x%lx\n", pa, va));
1001 
1002 	// check to see if a page table exists for this range
1003 	index = VADDR_TO_PDENT(va);
1004 	if (page_hole_pgdir[index].present == 0) {
1005 		addr_t pgtable;
1006 		page_directory_entry *e;
1007 		// we need to allocate a pgtable
1008 		pgtable = get_free_page(args);
1009 		// pgtable is in pages, convert to physical address
1010 		pgtable *= B_PAGE_SIZE;
1011 
1012 		TRACE(("early_map: asked for free page for pgtable. 0x%lx\n", pgtable));
1013 
1014 		// put it in the pgdir
1015 		e = &page_hole_pgdir[index];
1016 		put_pgtable_in_pgdir(e, pgtable, attributes);
1017 
1018 		// zero it out in it's new mapping
1019 		memset((unsigned int *)((unsigned int)page_hole + (va / B_PAGE_SIZE / 1024) * B_PAGE_SIZE), 0, B_PAGE_SIZE);
1020 	}
1021 
1022 	// now, fill in the pentry
1023 	put_page_table_entry_in_pgtable(page_hole + va / B_PAGE_SIZE, pa, attributes,
1024 		IS_KERNEL_ADDRESS(va));
1025 
1026 	arch_cpu_invalidate_TLB_range(va, va);
1027 
1028 	return B_OK;
1029 }
1030 
1031