xref: /haiku/src/system/kernel/arch/arm64/VMSAv8TranslationMap.cpp (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /*
2  * Copyright 2022 Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 #include "VMSAv8TranslationMap.h"
6 
7 #include <algorithm>
8 #include <slab/Slab.h>
9 #include <util/AutoLock.h>
10 #include <util/ThreadAutoLock.h>
11 #include <vm/VMAddressSpace.h>
12 #include <vm/VMCache.h>
13 #include <vm/vm_page.h>
14 #include <vm/vm_priv.h>
15 
16 
17 //#define DO_TRACE
18 #ifdef DO_TRACE
19 #	define TRACE(x...) dprintf(x)
20 #else
21 #	define TRACE(x...) ;
22 #endif
23 
24 
25 uint32_t VMSAv8TranslationMap::fHwFeature;
26 uint64_t VMSAv8TranslationMap::fMair;
27 
28 // ASID Management
29 static constexpr size_t kAsidBits = 8;
30 static constexpr size_t kNumAsids = (1 << kAsidBits);
31 static spinlock sAsidLock = B_SPINLOCK_INITIALIZER;
32 // A bitmap to track which ASIDs are in use.
33 static uint64 sAsidBitMap[kNumAsids / 64] = {};
34 // A mapping from ASID to translation map.
35 static VMSAv8TranslationMap* sAsidMapping[kNumAsids] = {};
36 
37 
38 static void
39 free_asid(size_t asid)
40 {
41 	for (size_t i = 0; i < B_COUNT_OF(sAsidBitMap); ++i) {
42 		if (asid < 64) {
43 			sAsidBitMap[i] &= ~(uint64_t{1} << asid);
44 			return;
45 		}
46 		asid -= 64;
47 	}
48 
49 	panic("Could not free ASID!");
50 }
51 
52 
53 static void
54 flush_tlb_whole_asid(uint64_t asid)
55 {
56 	asm("dsb ishst");
57 	asm("tlbi aside1is, %0" ::"r"(asid << 48));
58 	asm("dsb ish");
59 	asm("isb");
60 }
61 
62 
63 static size_t
64 alloc_first_free_asid(void)
65 {
66 	int asid = 0;
67 	for (size_t i = 0; i < B_COUNT_OF(sAsidBitMap); ++i) {
68 		int avail = __builtin_ffsll(~sAsidBitMap[i]);
69 		if (avail != 0) {
70 			sAsidBitMap[i] |= (uint64_t{1} << (avail-1));
71 			asid += (avail - 1);
72 			return asid;
73 		}
74 		asid += 64;
75 	}
76 
77 	return kNumAsids;
78 }
79 
80 
81 static bool
82 is_pte_dirty(uint64_t pte)
83 {
84 	if ((pte & kAttrSWDIRTY) != 0)
85 		return true;
86 
87 	return (pte & kAttrAPReadOnly) == 0;
88 }
89 
90 
91 static uint64_t
92 set_pte_dirty(uint64_t pte)
93 {
94 	if ((pte & kAttrSWDBM) != 0)
95 		return pte & ~kAttrAPReadOnly;
96 
97 	return pte | kAttrSWDIRTY;
98 }
99 
100 
101 static uint64_t
102 set_pte_clean(uint64_t pte)
103 {
104 	pte &= ~kAttrSWDIRTY;
105 	return pte | kAttrAPReadOnly;
106 }
107 
108 
109 static bool
110 is_pte_accessed(uint64_t pte)
111 {
112 	return (pte & kPteValidMask) != 0 && (pte & kAttrAF) != 0;
113 }
114 
115 
116 VMSAv8TranslationMap::VMSAv8TranslationMap(
117 	bool kernel, phys_addr_t pageTable, int pageBits, int vaBits, int minBlockLevel)
118 	:
119 	fIsKernel(kernel),
120 	fPageTable(pageTable),
121 	fPageBits(pageBits),
122 	fVaBits(vaBits),
123 	fMinBlockLevel(minBlockLevel),
124 	fASID(kernel ? 0 : -1),
125 	fRefcount(0)
126 {
127 	TRACE("+VMSAv8TranslationMap(%p, %d, 0x%" B_PRIxADDR ", %d, %d, %d)\n", this,
128 		kernel, pageTable, pageBits, vaBits, minBlockLevel);
129 
130 	fInitialLevel = CalcStartLevel(fVaBits, fPageBits);
131 }
132 
133 
134 VMSAv8TranslationMap::~VMSAv8TranslationMap()
135 {
136 	TRACE("-VMSAv8TranslationMap(%p)\n", this);
137 	TRACE("  fIsKernel: %d, fPageTable: 0x%" B_PRIxADDR ", fASID: %d, fRefcount: %d\n",
138 		fIsKernel, fPageTable, fASID, fRefcount);
139 
140 	ASSERT(!fIsKernel);
141 	ASSERT(fRefcount == 0);
142 
143 	ThreadCPUPinner pinner(thread_get_current_thread());
144 	InterruptsSpinLocker locker(sAsidLock);
145 
146 	FreeTable(fPageTable, 0, fInitialLevel);
147 
148 	if (fASID != -1) {
149 		sAsidMapping[fASID] = NULL;
150 		free_asid(fASID);
151 	}
152 }
153 
154 
155 // Switch user map into TTBR0.
156 // Passing kernel map here configures empty page table.
157 void
158 VMSAv8TranslationMap::SwitchUserMap(VMSAv8TranslationMap *from, VMSAv8TranslationMap *to)
159 {
160 	InterruptsSpinLocker locker(sAsidLock);
161 
162 	if (!from->fIsKernel) {
163 		from->fRefcount--;
164 	}
165 
166 	if (!to->fIsKernel) {
167 		to->fRefcount++;
168 	} else {
169 		arch_vm_install_empty_table_ttbr0();
170 		return;
171 	}
172 
173 	ASSERT(to->fPageTable != 0);
174 	uint64_t ttbr = to->fPageTable | ((fHwFeature & HW_COMMON_NOT_PRIVATE) != 0 ? 1 : 0);
175 
176 	if (to->fASID != -1) {
177 		WRITE_SPECIALREG(TTBR0_EL1, ((uint64_t)to->fASID << 48) | ttbr);
178 		asm("isb");
179 		return;
180 	}
181 
182 	size_t allocatedAsid = alloc_first_free_asid();
183 	if (allocatedAsid != kNumAsids) {
184 		to->fASID = allocatedAsid;
185 		sAsidMapping[allocatedAsid] = to;
186 
187 		WRITE_SPECIALREG(TTBR0_EL1, (allocatedAsid << 48) | ttbr);
188 		flush_tlb_whole_asid(allocatedAsid);
189 		return;
190 	}
191 
192 	for (size_t i = 0; i < kNumAsids; ++i) {
193 		if (sAsidMapping[i]->fRefcount == 0) {
194 			sAsidMapping[i]->fASID = -1;
195 			to->fASID = i;
196 			sAsidMapping[i] = to;
197 
198 			WRITE_SPECIALREG(TTBR0_EL1, (i << 48) | ttbr);
199 			flush_tlb_whole_asid(i);
200 			return;
201 		}
202 	}
203 
204 	panic("cannot assign ASID");
205 }
206 
207 
208 int
209 VMSAv8TranslationMap::CalcStartLevel(int vaBits, int pageBits)
210 {
211 	int level = 4;
212 
213 	int bitsLeft = vaBits - pageBits;
214 	while (bitsLeft > 0) {
215 		int tableBits = pageBits - 3;
216 		bitsLeft -= tableBits;
217 		level--;
218 	}
219 
220 	ASSERT(level >= 0);
221 
222 	return level;
223 }
224 
225 
226 bool
227 VMSAv8TranslationMap::Lock()
228 {
229 	TRACE("VMSAv8TranslationMap::Lock()\n");
230 	recursive_lock_lock(&fLock);
231 	return true;
232 }
233 
234 
235 void
236 VMSAv8TranslationMap::Unlock()
237 {
238 	TRACE("VMSAv8TranslationMap::Unlock()\n");
239 	recursive_lock_unlock(&fLock);
240 }
241 
242 
243 addr_t
244 VMSAv8TranslationMap::MappedSize() const
245 {
246 	panic("VMSAv8TranslationMap::MappedSize not implemented");
247 	return 0;
248 }
249 
250 
251 size_t
252 VMSAv8TranslationMap::MaxPagesNeededToMap(addr_t start, addr_t end) const
253 {
254 	constexpr uint64_t level3Range = B_PAGE_SIZE * 512;
255 	constexpr uint64_t level2Range = level3Range * 512;
256 	constexpr uint64_t level1Range = level2Range * 512;
257 	constexpr uint64_t level0Range = level1Range * 512;
258 
259 	if (start == 0) {
260 		start = level3Range - B_PAGE_SIZE;
261 		end += start;
262 	}
263 
264 	size_t requiredPages[] = {
265 		end / level0Range + 1 - start / level0Range,
266 		end / level1Range + 1 - start / level1Range,
267 		end / level2Range + 1 - start / level2Range,
268 		end / level3Range + 1 - start / level3Range
269 	};
270 
271 	size_t ret = 0;
272 	for (int i = fInitialLevel; i < 4; ++i) {
273 		ret += requiredPages[i];
274 	}
275 
276 	return ret;
277 }
278 
279 
280 uint64_t*
281 VMSAv8TranslationMap::TableFromPa(phys_addr_t pa)
282 {
283 	return reinterpret_cast<uint64_t*>(KERNEL_PMAP_BASE + pa);
284 }
285 
286 
287 void
288 VMSAv8TranslationMap::FreeTable(phys_addr_t ptPa, uint64_t va, int level)
289 {
290 	ASSERT(level < 4);
291 
292 	int tableBits = fPageBits - 3;
293 	uint64_t tableSize = 1UL << tableBits;
294 	uint64_t vaMask = (1UL << fVaBits) - 1;
295 
296 	int shift = tableBits * (3 - level) + fPageBits;
297 	uint64_t entrySize = 1UL << shift;
298 
299 	uint64_t nextVa = va;
300 	uint64_t* pt = TableFromPa(ptPa);
301 	for (uint64_t i = 0; i < tableSize; i++) {
302 		uint64_t oldPte = (uint64_t) atomic_get_and_set64((int64*) &pt[i], 0);
303 
304 		if (level < 3 && (oldPte & kPteTypeMask) == kPteTypeL012Table) {
305 			FreeTable(oldPte & kPteAddrMask, nextVa, level + 1);
306 		} else if ((oldPte & kPteTypeMask) != 0) {
307 			uint64_t fullVa = (fIsKernel ? ~vaMask : 0) | nextVa;
308 
309 			// Use this rather than FlushVAIfAccessed so that we don't have to
310 			// acquire sAsidLock for every entry.
311 			flush_va_if_accessed(oldPte, nextVa, fASID);
312 		}
313 
314 		nextVa += entrySize;
315 	}
316 
317 	vm_page* page = vm_lookup_page(ptPa >> fPageBits);
318 	DEBUG_PAGE_ACCESS_START(page);
319 	vm_page_set_state(page, PAGE_STATE_FREE);
320 }
321 
322 
323 // Make a new page sub-table.
324 // The parent table is `ptPa`, and the new sub-table's PTE will be at `index`
325 // in it.
326 // Returns the physical address of the new table, or the address of the existing
327 // one if the PTE is already filled.
328 phys_addr_t
329 VMSAv8TranslationMap::GetOrMakeTable(phys_addr_t ptPa, int level, int index,
330 	vm_page_reservation* reservation)
331 {
332 	ASSERT(level < 3);
333 
334 	uint64_t* ptePtr = TableFromPa(ptPa) + index;
335 	uint64_t oldPte = atomic_get64((int64*) ptePtr);
336 
337 	int type = oldPte & kPteTypeMask;
338 	ASSERT(type != kPteTypeL12Block);
339 
340 	if (type == kPteTypeL012Table) {
341 		// This is table entry already, just return it
342 		return oldPte & kPteAddrMask;
343 	} else if (reservation != nullptr) {
344 		// Create new table there
345 		vm_page* page = vm_page_allocate_page(reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
346 		phys_addr_t newTablePa = page->physical_page_number << fPageBits;
347 		DEBUG_PAGE_ACCESS_END(page);
348 
349 		// We only create mappings at the final level so we don't need to handle
350 		// splitting block mappings
351 		ASSERT(type != kPteTypeL12Block);
352 
353 		// Ensure that writes to page being attached have completed
354 		asm("dsb ishst");
355 
356 		uint64_t oldPteRefetch = (uint64_t)atomic_test_and_set64((int64*) ptePtr,
357 			newTablePa | kPteTypeL012Table, oldPte);
358 		if (oldPteRefetch != oldPte) {
359 			// If the old PTE has mutated, it must be because another thread has allocated the
360 			// sub-table at the same time as us. If that has happened, deallocate the page we
361 			// setup and use the one they installed instead.
362 			ASSERT((oldPteRefetch & kPteTypeMask) == kPteTypeL012Table);
363 			DEBUG_PAGE_ACCESS_START(page);
364 			vm_page_set_state(page, PAGE_STATE_FREE);
365 			return oldPteRefetch & kPteAddrMask;
366 		}
367 
368 		return newTablePa;
369 	}
370 
371 	// There's no existing table and we have no reservation
372 	return 0;
373 }
374 
375 
376 bool
377 flush_va_if_accessed(uint64_t pte, addr_t va, int asid)
378 {
379 	if (!is_pte_accessed(pte))
380 		return false;
381 
382 	if ((pte & kAttrNG) == 0) {
383 		// Flush from all address spaces
384 		asm("dsb ishst"); // Ensure PTE write completed
385 		asm("tlbi vaae1is, %0" ::"r"(((va >> 12) & kTLBIMask)));
386 		asm("dsb ish");
387 		asm("isb");
388 	} else if (asid != -1) {
389 		asm("dsb ishst"); // Ensure PTE write completed
390         asm("tlbi vae1is, %0" ::"r"(((va >> 12) & kTLBIMask) | (uint64_t(asid) << 48)));
391 		asm("dsb ish"); // Wait for TLB flush to complete
392 		asm("isb");
393 		return true;
394 	}
395 
396 	return false;
397 }
398 
399 bool
400 VMSAv8TranslationMap::FlushVAIfAccessed(uint64_t pte, addr_t va) {
401 	InterruptsSpinLocker locker(sAsidLock);
402 	return flush_va_if_accessed(pte, va, fASID);
403 }
404 
405 
406 bool
407 VMSAv8TranslationMap::AttemptPteBreakBeforeMake(uint64_t* ptePtr, uint64_t oldPte, addr_t va)
408 {
409 	uint64_t loadedPte = atomic_test_and_set64((int64_t*)ptePtr, 0, oldPte);
410 	if (loadedPte != oldPte)
411 		return false;
412 
413 	FlushVAIfAccessed(oldPte, va);
414 
415 	return true;
416 }
417 
418 
419 template<typename UpdatePte>
420 void
421 VMSAv8TranslationMap::ProcessRange(phys_addr_t ptPa, int level, addr_t va, size_t size,
422     vm_page_reservation* reservation, UpdatePte&& updatePte)
423 {
424 	ASSERT(level < 4);
425 	ASSERT(ptPa != 0);
426 
427 	uint64_t pageMask = (1UL << fPageBits) - 1;
428 	uint64_t vaMask = (1UL << fVaBits) - 1;
429 
430 	ASSERT((va & pageMask) == 0);
431 
432 	int tableBits = fPageBits - 3;
433 	uint64_t tableMask = (1UL << tableBits) - 1;
434 
435 	int shift = tableBits * (3 - level) + fPageBits;
436 	uint64_t entrySize = 1UL << shift;
437 	uint64_t entryMask = entrySize - 1;
438 
439 	uint64_t alignedDownVa = va & ~entryMask;
440 	uint64_t end = va + size - 1;
441 	if (level == 3)
442 		ASSERT(alignedDownVa == va);
443 
444     for (uint64_t effectiveVa = alignedDownVa; effectiveVa < end; effectiveVa += entrySize) {
445 		int index = ((effectiveVa & vaMask) >> shift) & tableMask;
446 		uint64_t* ptePtr = TableFromPa(ptPa) + index;
447 
448 		if (level == 3) {
449 			updatePte(ptePtr, effectiveVa);
450 		} else {
451 			phys_addr_t subTable = GetOrMakeTable(ptPa, level, index, reservation);
452 
453 			// When reservation is null, we can't create a new subtable. This can be intentional,
454 			// for example when called from Unmap().
455 			if (subTable == 0)
456 				continue;
457 
458 			if (effectiveVa < va) {
459 				// The range begins inside the slot.
460 				if (effectiveVa + entrySize - 1 > end) {
461 					// The range ends within the slot.
462 					ProcessRange(subTable, level + 1, va, size, reservation, updatePte);
463 				} else {
464 					// The range extends past the end of the slot.
465 					ProcessRange(subTable, level + 1, va, effectiveVa + entrySize - va, reservation, updatePte);
466 				}
467 			} else {
468 				// The range beginning is aligned to the slot.
469 				if (effectiveVa + entrySize - 1 > end) {
470 					// The range ends within the slot.
471 					ProcessRange(subTable, level + 1, effectiveVa, end - effectiveVa + 1,
472 						reservation, updatePte);
473 				} else {
474 					// The range extends past the end of the slot.
475 					ProcessRange(subTable, level + 1, effectiveVa, entrySize, reservation, updatePte);
476 				}
477 			}
478 		}
479 	}
480 }
481 
482 
483 uint8_t
484 VMSAv8TranslationMap::MairIndex(uint8_t type)
485 {
486 	for (int i = 0; i < 8; i++)
487 		if (((fMair >> (i * 8)) & 0xff) == type)
488 			return i;
489 
490 	panic("MAIR entry not found");
491 	return 0;
492 }
493 
494 
495 uint64_t
496 VMSAv8TranslationMap::GetMemoryAttr(uint32 attributes, uint32 memoryType, bool isKernel)
497 {
498 	uint64_t attr = 0;
499 
500 	if (!isKernel)
501 		attr |= kAttrNG;
502 
503 	if ((attributes & B_EXECUTE_AREA) == 0)
504 		attr |= kAttrUXN;
505 	if ((attributes & B_KERNEL_EXECUTE_AREA) == 0)
506 		attr |= kAttrPXN;
507 
508 	// SWDBM is software reserved bit that we use to mark that
509 	// writes are allowed, and fault handler should clear kAttrAPReadOnly.
510 	// In that case kAttrAPReadOnly doubles as not-dirty bit.
511 	// Additionally dirty state can be stored in SWDIRTY, in order not to lose
512 	// dirty state when changing protection from RW to RO.
513 
514 	// All page permissions begin life in RO state.
515 	attr |= kAttrAPReadOnly;
516 
517 	// User-Execute implies User-Read, because it would break PAN otherwise
518 	if ((attributes & B_READ_AREA) != 0 || (attributes & B_EXECUTE_AREA) != 0)
519 		attr |= kAttrAPUserAccess; // Allow user reads
520 
521 	if ((attributes & B_WRITE_AREA) != 0 || (attributes & B_KERNEL_WRITE_AREA) != 0)
522 		attr |= kAttrSWDBM; // Mark as writeable
523 
524 	// When supported by hardware copy our SWDBM bit into DBM,
525 	// so that kAttrAPReadOnly is cleared on write attempt automatically
526 	// without going through fault handler.
527 	if ((fHwFeature & HW_DIRTY) != 0 && (attr & kAttrSWDBM) != 0)
528 		attr |= kAttrDBM;
529 
530 	attr |= kAttrSHInnerShareable; // Inner Shareable
531 
532 	uint8_t type = MAIR_NORMAL_WB;
533 
534 	switch (memoryType & B_MEMORY_TYPE_MASK) {
535 		case B_UNCACHED_MEMORY:
536 			// TODO: This probably should be nGnRE for PCI
537 			type = MAIR_DEVICE_nGnRnE;
538 			break;
539 		case B_WRITE_COMBINING_MEMORY:
540 			type = MAIR_NORMAL_NC;
541 			break;
542 		case B_WRITE_THROUGH_MEMORY:
543 			type = MAIR_NORMAL_WT;
544 			break;
545 		case B_WRITE_PROTECTED_MEMORY:
546 			type = MAIR_NORMAL_WT;
547 			break;
548 		default:
549 		case B_WRITE_BACK_MEMORY:
550 			type = MAIR_NORMAL_WB;
551 			break;
552 	}
553 
554 	attr |= MairIndex(type) << 2;
555 
556 	return attr;
557 }
558 
559 
560 status_t
561 VMSAv8TranslationMap::Map(addr_t va, phys_addr_t pa, uint32 attributes, uint32 memoryType,
562 	vm_page_reservation* reservation)
563 {
564 	TRACE("VMSAv8TranslationMap::Map(0x%" B_PRIxADDR ", 0x%" B_PRIxADDR
565 		", 0x%x, 0x%x)\n", va, pa, attributes, memoryType);
566 
567 	ThreadCPUPinner pinner(thread_get_current_thread());
568 
569 	ASSERT(ValidateVa(va));
570 	uint64_t attr = GetMemoryAttr(attributes, memoryType, fIsKernel);
571 
572 	// During first mapping we need to allocate root table
573 	if (fPageTable == 0) {
574 		vm_page* page = vm_page_allocate_page(reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
575 		DEBUG_PAGE_ACCESS_END(page);
576 		fPageTable = page->physical_page_number << fPageBits;
577 	}
578 
579 	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, reservation,
580 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
581 			while (true) {
582 				phys_addr_t effectivePa = effectiveVa - va + pa;
583 				uint64_t oldPte = atomic_get64((int64*)ptePtr);
584 				uint64_t newPte = effectivePa | attr | kPteTypeL3Page;
585 
586 				if (newPte == oldPte)
587 					return;
588 
589 				if ((oldPte & kPteValidMask) != 0) {
590 					// ARM64 requires "break-before-make". We must set the PTE to an invalid
591 					// entry and flush the TLB as appropriate before we can write the new PTE.
592 					if (!AttemptPteBreakBeforeMake(ptePtr, oldPte, effectiveVa))
593 						continue;
594 				}
595 
596 				// Install the new PTE
597 				atomic_set64((int64*)ptePtr, newPte);
598 				asm("dsb ishst"); // Ensure PTE write completed
599 				asm("isb");
600 				break;
601 			}
602 		});
603 
604 	return B_OK;
605 }
606 
607 
608 status_t
609 VMSAv8TranslationMap::Unmap(addr_t start, addr_t end)
610 {
611 	TRACE("VMSAv8TranslationMap::Unmap(0x%" B_PRIxADDR ", 0x%" B_PRIxADDR
612 		")\n", start, end);
613 	ThreadCPUPinner pinner(thread_get_current_thread());
614 
615 	size_t size = end - start + 1;
616 	ASSERT(ValidateVa(start));
617 
618 	if (fPageTable == 0)
619 		return B_OK;
620 
621 	ProcessRange(fPageTable, fInitialLevel, start, size, nullptr,
622 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
623 			ASSERT(effectiveVa <= end);
624 			uint64_t oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
625 			FlushVAIfAccessed(oldPte, effectiveVa);
626 		});
627 
628 	return B_OK;
629 }
630 
631 
632 status_t
633 VMSAv8TranslationMap::UnmapPage(VMArea* area, addr_t address, bool updatePageQueue)
634 {
635 	TRACE("VMSAv8TranslationMap::UnmapPage(0x%" B_PRIxADDR "(%s), 0x%"
636 		B_PRIxADDR ", %d)\n", (addr_t)area, area->name, address,
637 		updatePageQueue);
638 
639 	ASSERT(ValidateVa(address));
640 	ThreadCPUPinner pinner(thread_get_current_thread());
641 	RecursiveLocker locker(fLock);
642 
643 	uint64_t oldPte = 0;
644 	ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
645 		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
646 			oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
647 			FlushVAIfAccessed(oldPte, effectiveVa);
648 		});
649 
650 	if ((oldPte & kPteValidMask) == 0)
651 		return B_ENTRY_NOT_FOUND;
652 
653 	pinner.Unlock();
654 	locker.Detach();
655 	PageUnmapped(area, (oldPte & kPteAddrMask) >> fPageBits, is_pte_accessed(oldPte),
656 		is_pte_dirty(oldPte), updatePageQueue);
657 
658 	return B_OK;
659 }
660 
661 
662 void
663 VMSAv8TranslationMap::UnmapPages(VMArea* area, addr_t address, size_t size, bool updatePageQueue)
664 {
665 	TRACE("VMSAv8TranslationMap::UnmapPages(0x%" B_PRIxADDR "(%s), 0x%"
666 		B_PRIxADDR ", 0x%" B_PRIxSIZE ", %d)\n", (addr_t)area,
667 		area->name, address, size, updatePageQueue);
668 
669 	ASSERT(ValidateVa(address));
670 	VMAreaMappings queue;
671 	ThreadCPUPinner pinner(thread_get_current_thread());
672 	RecursiveLocker locker(fLock);
673 
674 	ProcessRange(fPageTable, fInitialLevel, address, size, nullptr,
675 		[=, &queue](uint64_t* ptePtr, uint64_t effectiveVa) {
676 			uint64_t oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
677 			FlushVAIfAccessed(oldPte, effectiveVa);
678 			if ((oldPte & kPteValidMask) == 0)
679 				return;
680 
681 			if (area->cache_type == CACHE_TYPE_DEVICE)
682 				return;
683 
684 			// get the page
685 			vm_page* page = vm_lookup_page((oldPte & kPteAddrMask) >> fPageBits);
686 			ASSERT(page != NULL);
687 
688 			DEBUG_PAGE_ACCESS_START(page);
689 
690 			// transfer the accessed/dirty flags to the page
691 			page->accessed = is_pte_accessed(oldPte);
692 			page->modified = is_pte_dirty(oldPte);
693 
694 			// remove the mapping object/decrement the wired_count of the
695 			// page
696 			if (area->wiring == B_NO_LOCK) {
697 				vm_page_mapping* mapping = NULL;
698 				vm_page_mappings::Iterator iterator
699 					= page->mappings.GetIterator();
700 				while ((mapping = iterator.Next()) != NULL) {
701 					if (mapping->area == area)
702 						break;
703 				}
704 
705 				ASSERT(mapping != NULL);
706 
707 				area->mappings.Remove(mapping);
708 				page->mappings.Remove(mapping);
709 				queue.Add(mapping);
710 			} else
711 				page->DecrementWiredCount();
712 
713 			if (!page->IsMapped()) {
714 				atomic_add(&gMappedPagesCount, -1);
715 
716 				if (updatePageQueue) {
717 					if (page->Cache()->temporary)
718 						vm_page_set_state(page, PAGE_STATE_INACTIVE);
719 					else if (page->modified)
720 						vm_page_set_state(page, PAGE_STATE_MODIFIED);
721 					else
722 						vm_page_set_state(page, PAGE_STATE_CACHED);
723 				}
724 			}
725 
726 			DEBUG_PAGE_ACCESS_END(page);
727 		});
728 
729 	// TODO: As in UnmapPage() we can lose page dirty flags here. ATM it's not
730 	// really critical here, as in all cases this method is used, the unmapped
731 	// area range is unmapped for good (resized/cut) and the pages will likely
732 	// be freed.
733 
734 	locker.Unlock();
735 
736 	// free removed mappings
737 	bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
738 	uint32 freeFlags = CACHE_DONT_WAIT_FOR_MEMORY
739 		| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0);
740 
741 	while (vm_page_mapping* mapping = queue.RemoveHead())
742 		vm_free_page_mapping(mapping->page->physical_page_number, mapping, freeFlags);
743 }
744 
745 
746 void
747 VMSAv8TranslationMap::UnmapArea(VMArea* area, bool deletingAddressSpace,
748 	bool ignoreTopCachePageFlags)
749 {
750 	TRACE("VMSAv8TranslationMap::UnmapArea(0x%" B_PRIxADDR "(%s), 0x%"
751 		B_PRIxADDR ", 0x%" B_PRIxSIZE ", %d, %d)\n", (addr_t)area,
752 		area->name, area->Base(), area->Size(), deletingAddressSpace,
753 		ignoreTopCachePageFlags);
754 
755 	if (area->cache_type == CACHE_TYPE_DEVICE || area->wiring != B_NO_LOCK) {
756 		UnmapPages(area, area->Base(), area->Size(), true);
757 		return;
758 	}
759 
760 	bool unmapPages = !deletingAddressSpace || !ignoreTopCachePageFlags;
761 
762 	RecursiveLocker locker(fLock);
763 	ThreadCPUPinner pinner(thread_get_current_thread());
764 
765 	VMAreaMappings mappings;
766 	mappings.MoveFrom(&area->mappings);
767 
768 	for (VMAreaMappings::Iterator it = mappings.GetIterator();
769 			vm_page_mapping* mapping = it.Next();) {
770 
771 		vm_page* page = mapping->page;
772 		page->mappings.Remove(mapping);
773 
774 		VMCache* cache = page->Cache();
775 
776 		bool pageFullyUnmapped = false;
777 		if (!page->IsMapped()) {
778 			atomic_add(&gMappedPagesCount, -1);
779 			pageFullyUnmapped = true;
780 		}
781 
782 		if (unmapPages || cache != area->cache) {
783 			addr_t address = area->Base()
784 				+ ((page->cache_offset * B_PAGE_SIZE)
785 				- area->cache_offset);
786 
787 			uint64_t oldPte = 0;
788 			ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
789 				[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
790 					oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
791 					if (!deletingAddressSpace)
792 						FlushVAIfAccessed(oldPte, effectiveVa);
793 				});
794 
795 			if ((oldPte & kPteValidMask) == 0) {
796 				panic("page %p has mapping for area %p "
797 					"(%#" B_PRIxADDR "), but has no "
798 					"page table", page, area, address);
799 				continue;
800 			}
801 
802 			// transfer the accessed/dirty flags to the page and
803 			// invalidate the mapping, if necessary
804 			if (is_pte_dirty(oldPte))
805 				page->modified = true;
806 			if (is_pte_accessed(oldPte))
807 				page->accessed = true;
808 
809 			if (pageFullyUnmapped) {
810 				DEBUG_PAGE_ACCESS_START(page);
811 
812 				if (cache->temporary) {
813 					vm_page_set_state(page,
814 						PAGE_STATE_INACTIVE);
815 				} else if (page->modified) {
816 					vm_page_set_state(page,
817 						PAGE_STATE_MODIFIED);
818 				} else {
819 					vm_page_set_state(page,
820 						PAGE_STATE_CACHED);
821 				}
822 
823 				DEBUG_PAGE_ACCESS_END(page);
824 			}
825 		}
826 	}
827 
828 	locker.Unlock();
829 
830 	bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
831 	uint32 freeFlags = CACHE_DONT_WAIT_FOR_MEMORY
832 		| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0);
833 
834 	while (vm_page_mapping* mapping = mappings.RemoveHead())
835 		vm_free_page_mapping(mapping->page->physical_page_number, mapping, freeFlags);
836 }
837 
838 
839 bool
840 VMSAv8TranslationMap::ValidateVa(addr_t va)
841 {
842 	uint64_t vaMask = (1UL << fVaBits) - 1;
843 	bool kernelAddr = (va & (1UL << 63)) != 0;
844 	if (kernelAddr != fIsKernel)
845 		return false;
846 	if ((va & ~vaMask) != (fIsKernel ? ~vaMask : 0))
847 		return false;
848 	return true;
849 }
850 
851 
852 status_t
853 VMSAv8TranslationMap::Query(addr_t va, phys_addr_t* pa, uint32* flags)
854 {
855 	*flags = 0;
856 	*pa = 0;
857 
858 	uint64_t pageMask = (1UL << fPageBits) - 1;
859 	va &= ~pageMask;
860 
861 	ThreadCPUPinner pinner(thread_get_current_thread());
862 	ASSERT(ValidateVa(va));
863 
864 	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, nullptr,
865 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
866 			uint64_t pte = atomic_get64((int64_t*)ptePtr);
867 			*pa = pte & kPteAddrMask;
868 			*flags |= PAGE_PRESENT | B_KERNEL_READ_AREA;
869 			if (is_pte_accessed(pte))
870 				*flags |= PAGE_ACCESSED;
871 			if (is_pte_dirty(pte))
872 				*flags |= PAGE_MODIFIED;
873 
874 			if ((pte & kAttrPXN) == 0)
875 				*flags |= B_KERNEL_EXECUTE_AREA;
876 
877 			if ((pte & kAttrAPUserAccess) != 0) {
878 				*flags |= B_READ_AREA;
879 				if ((pte & kAttrUXN) == 0)
880 					*flags |= B_EXECUTE_AREA;
881 			}
882 
883 			if ((pte & kAttrSWDBM) != 0) {
884 				*flags |= B_KERNEL_WRITE_AREA;
885 				if ((pte & kAttrAPUserAccess) != 0)
886 					*flags |= B_WRITE_AREA;
887 			}
888 		});
889 
890 	return B_OK;
891 }
892 
893 
894 status_t
895 VMSAv8TranslationMap::QueryInterrupt(
896 	addr_t virtualAddress, phys_addr_t* _physicalAddress, uint32* _flags)
897 {
898 	return Query(virtualAddress, _physicalAddress, _flags);
899 }
900 
901 
902 status_t
903 VMSAv8TranslationMap::Protect(addr_t start, addr_t end, uint32 attributes, uint32 memoryType)
904 {
905 	TRACE("VMSAv8TranslationMap::Protect(0x%" B_PRIxADDR ", 0x%"
906 		B_PRIxADDR ", 0x%x, 0x%x)\n", start, end, attributes, memoryType);
907 
908 	uint64_t attr = GetMemoryAttr(attributes, memoryType, fIsKernel);
909 	size_t size = end - start + 1;
910 	ASSERT(ValidateVa(start));
911 
912 	ThreadCPUPinner pinner(thread_get_current_thread());
913 
914 	ProcessRange(fPageTable, fInitialLevel, start, size, nullptr,
915 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
916 			ASSERT(effectiveVa <= end);
917 
918 			// We need to use an atomic compare-swap loop because we must
919 			// need to clear somes bits while setting others.
920 			while (true) {
921 				uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
922 				uint64_t newPte = oldPte & ~kPteAttrMask;
923 				newPte |= attr;
924 
925 				// Preserve access bit.
926 				newPte |= oldPte & kAttrAF;
927 
928 				// Preserve the dirty bit.
929 				if (is_pte_dirty(oldPte))
930 					newPte = set_pte_dirty(newPte);
931 
932 				uint64_t oldMemoryType = oldPte & (kAttrShareability | kAttrMemoryAttrIdx);
933 				uint64_t newMemoryType = newPte & (kAttrShareability | kAttrMemoryAttrIdx);
934 				if (oldMemoryType != newMemoryType) {
935 					// ARM64 requires "break-before-make". We must set the PTE to an invalid
936 					// entry and flush the TLB as appropriate before we can write the new PTE.
937 					// In this case specifically, it applies any time we change cacheability or
938 					// shareability.
939 					if (!AttemptPteBreakBeforeMake(ptePtr, oldPte, effectiveVa))
940 						continue;
941 
942 					atomic_set64((int64_t*)ptePtr, newPte);
943 					asm("dsb ishst"); // Ensure PTE write completed
944 					asm("isb");
945 
946 					// No compare-exchange loop required in this case.
947 					break;
948 				} else {
949 					if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
950 						FlushVAIfAccessed(oldPte, effectiveVa);
951 						break;
952 					}
953 				}
954 			}
955 		});
956 
957 	return B_OK;
958 }
959 
960 
961 status_t
962 VMSAv8TranslationMap::ClearFlags(addr_t va, uint32 flags)
963 {
964 	ASSERT(ValidateVa(va));
965 
966 	bool clearAF = flags & PAGE_ACCESSED;
967 	bool setRO = flags & PAGE_MODIFIED;
968 
969 	if (!clearAF && !setRO)
970 		return B_OK;
971 
972 	ThreadCPUPinner pinner(thread_get_current_thread());
973 
974 	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, nullptr,
975 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
976 			if (clearAF && setRO) {
977 				// We need to use an atomic compare-swap loop because we must
978 				// need to clear one bit while setting the other.
979 				while (true) {
980 					uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
981 					uint64_t newPte = oldPte & ~kAttrAF;
982 					newPte = set_pte_clean(newPte);
983 
984                     if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
985 						FlushVAIfAccessed(oldPte, va);
986 						break;
987 					}
988 				}
989 			} else if (clearAF) {
990 				atomic_and64((int64_t*)ptePtr, ~kAttrAF);
991 			} else {
992 				while (true) {
993 					uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
994 					if (!is_pte_dirty(oldPte))
995 						return;
996 					uint64_t newPte = set_pte_clean(oldPte);
997                     if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
998 						FlushVAIfAccessed(oldPte, va);
999 						break;
1000 					}
1001 				}
1002 			}
1003 		});
1004 
1005 	return B_OK;
1006 }
1007 
1008 
1009 bool
1010 VMSAv8TranslationMap::ClearAccessedAndModified(
1011 	VMArea* area, addr_t address, bool unmapIfUnaccessed, bool& _modified)
1012 {
1013 	TRACE("VMSAv8TranslationMap::ClearAccessedAndModified(0x%"
1014 		B_PRIxADDR "(%s), 0x%" B_PRIxADDR ", %d)\n", (addr_t)area,
1015 		area->name, address, unmapIfUnaccessed);
1016 	ASSERT(ValidateVa(address));
1017 
1018 	RecursiveLocker locker(fLock);
1019 	ThreadCPUPinner pinner(thread_get_current_thread());
1020 
1021 	uint64_t oldPte = 0;
1022 	ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
1023 		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
1024 			// We need to use an atomic compare-swap loop because we must
1025 			// first read the old PTE and make decisions based on the AF
1026 			// bit to proceed.
1027 			while (true) {
1028 				oldPte = atomic_get64((int64_t*)ptePtr);
1029 				uint64_t newPte = oldPte & ~kAttrAF;
1030 				newPte = set_pte_clean(newPte);
1031 
1032 				// If the page has been not be accessed, then unmap it.
1033 				if (unmapIfUnaccessed && (oldPte & kAttrAF) == 0)
1034 					newPte = 0;
1035 
1036 				if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte)
1037 					break;
1038 			}
1039 			asm("dsb ishst"); // Ensure PTE write completed
1040 		});
1041 
1042 	pinner.Unlock();
1043 	_modified = is_pte_dirty(oldPte);
1044 
1045 	if (FlushVAIfAccessed(oldPte, address))
1046 		return true;
1047 
1048 	if (!unmapIfUnaccessed)
1049 		return false;
1050 
1051 	locker.Detach(); // UnaccessedPageUnmapped takes ownership
1052 	phys_addr_t oldPa = oldPte & kPteAddrMask;
1053 	UnaccessedPageUnmapped(area, oldPa >> fPageBits);
1054 	return false;
1055 }
1056 
1057 
1058 void
1059 VMSAv8TranslationMap::Flush()
1060 {
1061 	// Necessary invalidation is performed during mapping,
1062 	// no need to do anything more here.
1063 }
1064