xref: /haiku/src/system/kernel/arch/arm64/VMSAv8TranslationMap.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2022 Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 #include "VMSAv8TranslationMap.h"
6 
7 #include <algorithm>
8 #include <slab/Slab.h>
9 #include <util/AutoLock.h>
10 #include <util/ThreadAutoLock.h>
11 #include <vm/VMAddressSpace.h>
12 #include <vm/VMCache.h>
13 #include <vm/vm_page.h>
14 #include <vm/vm_priv.h>
15 
16 
17 //#define DO_TRACE
18 #ifdef DO_TRACE
19 #	define TRACE(x...) dprintf(x)
20 #else
21 #	define TRACE(x...) ;
22 #endif
23 
24 
25 uint32_t VMSAv8TranslationMap::fHwFeature;
26 uint64_t VMSAv8TranslationMap::fMair;
27 
28 // ASID Management
29 static constexpr size_t kAsidBits = 8;
30 static constexpr size_t kNumAsids = (1 << kAsidBits);
31 static spinlock sAsidLock = B_SPINLOCK_INITIALIZER;
32 // A bitmap to track which ASIDs are in use.
33 static uint64 sAsidBitMap[kNumAsids / 64] = {};
34 // A mapping from ASID to translation map.
35 static VMSAv8TranslationMap* sAsidMapping[kNumAsids] = {};
36 
37 
38 static void
39 free_asid(size_t asid)
40 {
41 	for (size_t i = 0; i < B_COUNT_OF(sAsidBitMap); ++i) {
42 		if (asid < 64) {
43 			sAsidBitMap[i] &= ~(uint64_t{1} << asid);
44 			return;
45 		}
46 		asid -= 64;
47 	}
48 
49 	panic("Could not free ASID!");
50 }
51 
52 
53 static void
54 flush_tlb_whole_asid(uint64_t asid)
55 {
56 	asm("dsb ishst");
57 	asm("tlbi aside1is, %0" ::"r"(asid << 48));
58 	asm("dsb ish");
59 	asm("isb");
60 }
61 
62 
63 static size_t
64 alloc_first_free_asid(void)
65 {
66 	int asid = 0;
67 	for (size_t i = 0; i < B_COUNT_OF(sAsidBitMap); ++i) {
68 		int avail = __builtin_ffsll(~sAsidBitMap[i]);
69 		if (avail != 0) {
70 			sAsidBitMap[i] |= (uint64_t{1} << (avail-1));
71 			asid += (avail - 1);
72 			return asid;
73 		}
74 		asid += 64;
75 	}
76 
77 	return kNumAsids;
78 }
79 
80 
81 static bool
82 is_pte_dirty(uint64_t pte)
83 {
84 	if ((pte & kAttrSWDIRTY) != 0)
85 		return true;
86 
87 	return (pte & kAttrAPReadOnly) == 0;
88 }
89 
90 
91 static uint64_t
92 set_pte_dirty(uint64_t pte)
93 {
94 	if ((pte & kAttrSWDBM) != 0)
95 		return pte & ~kAttrAPReadOnly;
96 
97 	return pte | kAttrSWDIRTY;
98 }
99 
100 
101 static uint64_t
102 set_pte_clean(uint64_t pte)
103 {
104 	pte &= ~kAttrSWDIRTY;
105 	return pte | kAttrAPReadOnly;
106 }
107 
108 
109 static bool
110 is_pte_accessed(uint64_t pte)
111 {
112 	return (pte & kPteValidMask) != 0 && (pte & kAttrAF) != 0;
113 }
114 
115 
116 VMSAv8TranslationMap::VMSAv8TranslationMap(
117 	bool kernel, phys_addr_t pageTable, int pageBits, int vaBits, int minBlockLevel)
118 	:
119 	fIsKernel(kernel),
120 	fPageTable(pageTable),
121 	fPageBits(pageBits),
122 	fVaBits(vaBits),
123 	fMinBlockLevel(minBlockLevel),
124 	fASID(kernel ? 0 : -1),
125 	fRefcount(0)
126 {
127 	TRACE("+VMSAv8TranslationMap(%p, %d, 0x%" B_PRIxADDR ", %d, %d, %d)\n", this,
128 		kernel, pageTable, pageBits, vaBits, minBlockLevel);
129 
130 	fInitialLevel = CalcStartLevel(fVaBits, fPageBits);
131 }
132 
133 
134 VMSAv8TranslationMap::~VMSAv8TranslationMap()
135 {
136 	TRACE("-VMSAv8TranslationMap(%p)\n", this);
137 	TRACE("  fIsKernel: %d, fPageTable: 0x%" B_PRIxADDR ", fASID: %d, fRefcount: %d\n",
138 		fIsKernel, fPageTable, fASID, fRefcount);
139 
140 	ASSERT(!fIsKernel);
141 	ASSERT(fRefcount == 0);
142 	{
143 		ThreadCPUPinner pinner(thread_get_current_thread());
144 		FreeTable(fPageTable, 0, fInitialLevel, [](int level, uint64_t oldPte) {});
145 	}
146 
147 	{
148 		InterruptsSpinLocker locker(sAsidLock);
149 
150 		if (fASID != -1) {
151 			sAsidMapping[fASID] = NULL;
152 			free_asid(fASID);
153 		}
154 	}
155 }
156 
157 
158 // Switch user map into TTBR0.
159 // Passing kernel map here configures empty page table.
160 void
161 VMSAv8TranslationMap::SwitchUserMap(VMSAv8TranslationMap *from, VMSAv8TranslationMap *to)
162 {
163 	InterruptsSpinLocker locker(sAsidLock);
164 
165 	if (!from->fIsKernel) {
166 		from->fRefcount--;
167 	}
168 
169 	if (!to->fIsKernel) {
170 		to->fRefcount++;
171 	} else {
172 		arch_vm_install_empty_table_ttbr0();
173 		return;
174 	}
175 
176 	ASSERT(to->fPageTable != 0);
177 	uint64_t ttbr = to->fPageTable | ((fHwFeature & HW_COMMON_NOT_PRIVATE) != 0 ? 1 : 0);
178 
179 	if (to->fASID != -1) {
180 		WRITE_SPECIALREG(TTBR0_EL1, ((uint64_t)to->fASID << 48) | ttbr);
181 		asm("isb");
182 		return;
183 	}
184 
185 	size_t allocatedAsid = alloc_first_free_asid();
186 	if (allocatedAsid != kNumAsids) {
187 		to->fASID = allocatedAsid;
188 		sAsidMapping[allocatedAsid] = to;
189 
190 		WRITE_SPECIALREG(TTBR0_EL1, (allocatedAsid << 48) | ttbr);
191 		flush_tlb_whole_asid(allocatedAsid);
192 		return;
193 	}
194 
195 	// ASID 0 is reserved for the kernel.
196 	for (size_t i = 1; i < kNumAsids; ++i) {
197 		if (sAsidMapping[i]->fRefcount == 0) {
198 			sAsidMapping[i]->fASID = -1;
199 			to->fASID = i;
200 			sAsidMapping[i] = to;
201 
202 			WRITE_SPECIALREG(TTBR0_EL1, (i << 48) | ttbr);
203 			flush_tlb_whole_asid(i);
204 			return;
205 		}
206 	}
207 
208 	panic("cannot assign ASID");
209 }
210 
211 
212 int
213 VMSAv8TranslationMap::CalcStartLevel(int vaBits, int pageBits)
214 {
215 	int level = 4;
216 
217 	int bitsLeft = vaBits - pageBits;
218 	while (bitsLeft > 0) {
219 		int tableBits = pageBits - 3;
220 		bitsLeft -= tableBits;
221 		level--;
222 	}
223 
224 	ASSERT(level >= 0);
225 
226 	return level;
227 }
228 
229 
230 bool
231 VMSAv8TranslationMap::Lock()
232 {
233 	TRACE("VMSAv8TranslationMap::Lock()\n");
234 	recursive_lock_lock(&fLock);
235 	return true;
236 }
237 
238 
239 void
240 VMSAv8TranslationMap::Unlock()
241 {
242 	TRACE("VMSAv8TranslationMap::Unlock()\n");
243 	recursive_lock_unlock(&fLock);
244 }
245 
246 
247 addr_t
248 VMSAv8TranslationMap::MappedSize() const
249 {
250 	panic("VMSAv8TranslationMap::MappedSize not implemented");
251 	return 0;
252 }
253 
254 
255 size_t
256 VMSAv8TranslationMap::MaxPagesNeededToMap(addr_t start, addr_t end) const
257 {
258 	size_t result = 0;
259 	size_t size = end - start + 1;
260 
261 	for (int i = fInitialLevel; i < 3; i++) {
262 		int tableBits = fPageBits - 3;
263 		int shift = tableBits * (3 - i) + fPageBits;
264 		uint64_t entrySize = 1UL << shift;
265 
266 		result += size / entrySize + 2;
267 	}
268 
269 	return result;
270 }
271 
272 
273 uint64_t*
274 VMSAv8TranslationMap::TableFromPa(phys_addr_t pa)
275 {
276 	return reinterpret_cast<uint64_t*>(KERNEL_PMAP_BASE + pa);
277 }
278 
279 
280 template<typename EntryRemoved>
281 void
282 VMSAv8TranslationMap::FreeTable(phys_addr_t ptPa, uint64_t va, int level,
283 	EntryRemoved &&entryRemoved)
284 {
285 	ASSERT(level < 4);
286 
287 	int tableBits = fPageBits - 3;
288 	uint64_t tableSize = 1UL << tableBits;
289 	uint64_t vaMask = (1UL << fVaBits) - 1;
290 
291 	int shift = tableBits * (3 - level) + fPageBits;
292 	uint64_t entrySize = 1UL << shift;
293 
294 	uint64_t nextVa = va;
295 	uint64_t* pt = TableFromPa(ptPa);
296 	for (uint64_t i = 0; i < tableSize; i++) {
297 		uint64_t oldPte = (uint64_t) atomic_get_and_set64((int64*) &pt[i], 0);
298 
299 		if (level < 3 && (oldPte & kPteTypeMask) == kPteTypeL012Table) {
300 			FreeTable(oldPte & kPteAddrMask, nextVa, level + 1, entryRemoved);
301 		} else if ((oldPte & kPteTypeMask) != 0) {
302 			uint64_t fullVa = (fIsKernel ? ~vaMask : 0) | nextVa;
303 			asm("dsb ishst");
304 			asm("tlbi vaae1is, %0" :: "r" ((fullVa >> 12) & kTLBIMask));
305 			// Does it correctly flush block entries at level < 3? We don't use them anyway though.
306 			// TODO: Flush only currently used ASID (using vae1is)
307 			entryRemoved(level, oldPte);
308 		}
309 
310 		nextVa += entrySize;
311 	}
312 
313 	asm("dsb ish");
314 
315 	vm_page* page = vm_lookup_page(ptPa >> fPageBits);
316 	DEBUG_PAGE_ACCESS_START(page);
317 	vm_page_set_state(page, PAGE_STATE_FREE);
318 }
319 
320 
321 // Make a new page sub-table.
322 // The parent table is `ptPa`, and the new sub-table's PTE will be at `index`
323 // in it.
324 // Returns the physical address of the new table, or the address of the existing
325 // one if the PTE is already filled.
326 phys_addr_t
327 VMSAv8TranslationMap::GetOrMakeTable(phys_addr_t ptPa, int level, int index,
328 	vm_page_reservation* reservation)
329 {
330 	ASSERT(level < 3);
331 
332 	uint64_t* ptePtr = TableFromPa(ptPa) + index;
333 	uint64_t oldPte = atomic_get64((int64*) ptePtr);
334 
335 	int type = oldPte & kPteTypeMask;
336 	ASSERT(type != kPteTypeL12Block);
337 
338 	if (type == kPteTypeL012Table) {
339 		// This is table entry already, just return it
340 		return oldPte & kPteAddrMask;
341 	} else if (reservation != nullptr) {
342 		// Create new table there
343 		vm_page* page = vm_page_allocate_page(reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
344 		phys_addr_t newTablePa = page->physical_page_number << fPageBits;
345 		DEBUG_PAGE_ACCESS_END(page);
346 
347 		// We only create mappings at the final level so we don't need to handle
348 		// splitting block mappings
349 		ASSERT(type != kPteTypeL12Block);
350 
351 		// Ensure that writes to page being attached have completed
352 		asm("dsb ishst");
353 
354 		uint64_t oldPteRefetch = (uint64_t)atomic_test_and_set64((int64*) ptePtr,
355 			newTablePa | kPteTypeL012Table, oldPte);
356 		if (oldPteRefetch != oldPte) {
357 			// If the old PTE has mutated, it must be because another thread has allocated the
358 			// sub-table at the same time as us. If that has happened, deallocate the page we
359 			// setup and use the one they installed instead.
360 			ASSERT((oldPteRefetch & kPteTypeMask) == kPteTypeL012Table);
361 			DEBUG_PAGE_ACCESS_START(page);
362 			vm_page_set_state(page, PAGE_STATE_FREE);
363 			return oldPteRefetch & kPteAddrMask;
364 		}
365 
366 		return newTablePa;
367 	}
368 
369 	// There's no existing table and we have no reservation
370 	return 0;
371 }
372 
373 
374 bool
375 VMSAv8TranslationMap::FlushVAIfAccessed(uint64_t pte, addr_t va)
376 {
377 	if (!is_pte_accessed(pte))
378 		return false;
379 
380 	InterruptsSpinLocker locker(sAsidLock);
381 	if ((pte & kAttrNG) == 0) {
382 		// Flush from all address spaces
383 		asm("dsb ishst"); // Ensure PTE write completed
384 		asm("tlbi vaae1is, %0" ::"r"(((va >> 12) & kTLBIMask)));
385 		asm("dsb ish");
386 		asm("isb");
387 	} else if (fASID != -1) {
388 		asm("dsb ishst"); // Ensure PTE write completed
389         asm("tlbi vae1is, %0" ::"r"(((va >> 12) & kTLBIMask) | (uint64_t(fASID) << 48)));
390 		asm("dsb ish"); // Wait for TLB flush to complete
391 		asm("isb");
392 		return true;
393 	}
394 
395 	return false;
396 }
397 
398 
399 bool
400 VMSAv8TranslationMap::AttemptPteBreakBeforeMake(uint64_t* ptePtr, uint64_t oldPte, addr_t va)
401 {
402 	uint64_t loadedPte = atomic_test_and_set64((int64_t*)ptePtr, 0, oldPte);
403 	if (loadedPte != oldPte)
404 		return false;
405 
406 	FlushVAIfAccessed(oldPte, va);
407 
408 	return true;
409 }
410 
411 
412 template<typename UpdatePte>
413 void
414 VMSAv8TranslationMap::ProcessRange(phys_addr_t ptPa, int level, addr_t va, size_t size,
415     vm_page_reservation* reservation, UpdatePte&& updatePte)
416 {
417 	ASSERT(level < 4);
418 	ASSERT(ptPa != 0);
419 
420 	uint64_t pageMask = (1UL << fPageBits) - 1;
421 	uint64_t vaMask = (1UL << fVaBits) - 1;
422 
423 	ASSERT((va & pageMask) == 0);
424 
425 	int tableBits = fPageBits - 3;
426 	uint64_t tableMask = (1UL << tableBits) - 1;
427 
428 	int shift = tableBits * (3 - level) + fPageBits;
429 	uint64_t entrySize = 1UL << shift;
430 	uint64_t entryMask = entrySize - 1;
431 
432 	uint64_t alignedDownVa = va & ~entryMask;
433 	uint64_t alignedUpEnd = (va + size + (entrySize - 1)) & ~entryMask;
434 	if (level == 3)
435 		ASSERT(alignedDownVa == va);
436 
437     for (uint64_t effectiveVa = alignedDownVa; effectiveVa < alignedUpEnd;
438         effectiveVa += entrySize) {
439 		int index = ((effectiveVa & vaMask) >> shift) & tableMask;
440 		uint64_t* ptePtr = TableFromPa(ptPa) + index;
441 
442 		if (level == 3) {
443 			updatePte(ptePtr, effectiveVa);
444 		} else {
445 			phys_addr_t subTable = GetOrMakeTable(ptPa, level, index, reservation);
446 
447 			// When reservation is null, we can't create a new subtable. This can be intentional,
448 			// for example when called from Unmap().
449 			if (subTable == 0)
450 				continue;
451 
452 			uint64_t subVa = std::max(effectiveVa, va);
453 			size_t subSize = std::min(size_t(entrySize - (subVa & entryMask)), size);
454             ProcessRange(subTable, level + 1, subVa, subSize, reservation, updatePte);
455 
456 			size -= subSize;
457 		}
458 	}
459 }
460 
461 
462 uint8_t
463 VMSAv8TranslationMap::MairIndex(uint8_t type)
464 {
465 	for (int i = 0; i < 8; i++)
466 		if (((fMair >> (i * 8)) & 0xff) == type)
467 			return i;
468 
469 	panic("MAIR entry not found");
470 	return 0;
471 }
472 
473 
474 uint64_t
475 VMSAv8TranslationMap::GetMemoryAttr(uint32 attributes, uint32 memoryType, bool isKernel)
476 {
477 	uint64_t attr = 0;
478 
479 	if (!isKernel)
480 		attr |= kAttrNG;
481 
482 	if ((attributes & B_EXECUTE_AREA) == 0)
483 		attr |= kAttrUXN;
484 	if ((attributes & B_KERNEL_EXECUTE_AREA) == 0)
485 		attr |= kAttrPXN;
486 
487 	// SWDBM is software reserved bit that we use to mark that
488 	// writes are allowed, and fault handler should clear kAttrAPReadOnly.
489 	// In that case kAttrAPReadOnly doubles as not-dirty bit.
490 	// Additionally dirty state can be stored in SWDIRTY, in order not to lose
491 	// dirty state when changing protection from RW to RO.
492 
493 	// All page permissions begin life in RO state.
494 	attr |= kAttrAPReadOnly;
495 
496 	// User-Execute implies User-Read, because it would break PAN otherwise
497 	if ((attributes & B_READ_AREA) != 0 || (attributes & B_EXECUTE_AREA) != 0)
498 		attr |= kAttrAPUserAccess; // Allow user reads
499 
500 	if ((attributes & B_WRITE_AREA) != 0 || (attributes & B_KERNEL_WRITE_AREA) != 0)
501 		attr |= kAttrSWDBM; // Mark as writeable
502 
503 	// When supported by hardware copy our SWDBM bit into DBM,
504 	// so that kAttrAPReadOnly is cleared on write attempt automatically
505 	// without going through fault handler.
506 	if ((fHwFeature & HW_DIRTY) != 0 && (attr & kAttrSWDBM) != 0)
507 		attr |= kAttrDBM;
508 
509 	attr |= kAttrSHInnerShareable; // Inner Shareable
510 
511 	uint8_t type = MAIR_NORMAL_WB;
512 
513 	switch (memoryType & B_MEMORY_TYPE_MASK) {
514 		case B_UNCACHED_MEMORY:
515 			// TODO: This probably should be nGnRE for PCI
516 			type = MAIR_DEVICE_nGnRnE;
517 			break;
518 		case B_WRITE_COMBINING_MEMORY:
519 			type = MAIR_NORMAL_NC;
520 			break;
521 		case B_WRITE_THROUGH_MEMORY:
522 			type = MAIR_NORMAL_WT;
523 			break;
524 		case B_WRITE_PROTECTED_MEMORY:
525 			type = MAIR_NORMAL_WT;
526 			break;
527 		default:
528 		case B_WRITE_BACK_MEMORY:
529 			type = MAIR_NORMAL_WB;
530 			break;
531 	}
532 
533 	attr |= MairIndex(type) << 2;
534 
535 	return attr;
536 }
537 
538 
539 status_t
540 VMSAv8TranslationMap::Map(addr_t va, phys_addr_t pa, uint32 attributes, uint32 memoryType,
541 	vm_page_reservation* reservation)
542 {
543 	TRACE("VMSAv8TranslationMap::Map(0x%" B_PRIxADDR ", 0x%" B_PRIxADDR
544 		", 0x%x, 0x%x)\n", va, pa, attributes, memoryType);
545 
546 	ThreadCPUPinner pinner(thread_get_current_thread());
547 
548 	ASSERT(ValidateVa(va));
549 	uint64_t attr = GetMemoryAttr(attributes, memoryType, fIsKernel);
550 
551 	// During first mapping we need to allocate root table
552 	if (fPageTable == 0) {
553 		vm_page* page = vm_page_allocate_page(reservation, PAGE_STATE_WIRED | VM_PAGE_ALLOC_CLEAR);
554 		DEBUG_PAGE_ACCESS_END(page);
555 		fPageTable = page->physical_page_number << fPageBits;
556 	}
557 
558 	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, reservation,
559 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
560 			while (true) {
561 				phys_addr_t effectivePa = effectiveVa - va + pa;
562 				uint64_t oldPte = atomic_get64((int64*)ptePtr);
563 				uint64_t newPte = effectivePa | attr | kPteTypeL3Page;
564 
565 				if (newPte == oldPte)
566 					return;
567 
568 				if ((oldPte & kPteValidMask) != 0) {
569 					// ARM64 requires "break-before-make". We must set the PTE to an invalid
570 					// entry and flush the TLB as appropriate before we can write the new PTE.
571 					if (!AttemptPteBreakBeforeMake(ptePtr, oldPte, effectiveVa))
572 						continue;
573 				}
574 
575 				// Install the new PTE
576 				atomic_set64((int64*)ptePtr, newPte);
577 				asm("dsb ishst"); // Ensure PTE write completed
578 				asm("isb");
579 				break;
580 			}
581 		});
582 
583 	return B_OK;
584 }
585 
586 
587 status_t
588 VMSAv8TranslationMap::Unmap(addr_t start, addr_t end)
589 {
590 	TRACE("VMSAv8TranslationMap::Unmap(0x%" B_PRIxADDR ", 0x%" B_PRIxADDR
591 		")\n", start, end);
592 	ThreadCPUPinner pinner(thread_get_current_thread());
593 
594 	size_t size = end - start + 1;
595 	ASSERT(ValidateVa(start));
596 
597 	if (fPageTable == 0)
598 		return B_OK;
599 
600 	ProcessRange(fPageTable, fInitialLevel, start, size, nullptr,
601 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
602 			ASSERT(effectiveVa <= end);
603 			uint64_t oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
604 			FlushVAIfAccessed(oldPte, effectiveVa);
605 		});
606 
607 	return B_OK;
608 }
609 
610 
611 status_t
612 VMSAv8TranslationMap::UnmapPage(VMArea* area, addr_t address, bool updatePageQueue)
613 {
614 	TRACE("VMSAv8TranslationMap::UnmapPage(0x%" B_PRIxADDR "(%s), 0x%"
615 		B_PRIxADDR ", %d)\n", (addr_t)area, area->name, address,
616 		updatePageQueue);
617 
618 	ASSERT(ValidateVa(address));
619 	ThreadCPUPinner pinner(thread_get_current_thread());
620 	RecursiveLocker locker(fLock);
621 
622 	uint64_t oldPte = 0;
623 	ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
624 		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
625 			oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
626 			FlushVAIfAccessed(oldPte, effectiveVa);
627 		});
628 
629 	if ((oldPte & kPteValidMask) == 0)
630 		return B_ENTRY_NOT_FOUND;
631 
632 	pinner.Unlock();
633 	locker.Detach();
634 	PageUnmapped(area, (oldPte & kPteAddrMask) >> fPageBits, (oldPte & kAttrAF) != 0,
635 		is_pte_dirty(oldPte), updatePageQueue);
636 
637 	return B_OK;
638 }
639 
640 
641 void
642 VMSAv8TranslationMap::UnmapPages(VMArea* area, addr_t address, size_t size, bool updatePageQueue)
643 {
644 	TRACE("VMSAv8TranslationMap::UnmapPages(0x%" B_PRIxADDR "(%s), 0x%"
645 		B_PRIxADDR ", 0x%" B_PRIxSIZE ", %d)\n", (addr_t)area,
646 		area->name, address, size, updatePageQueue);
647 
648 	ASSERT(ValidateVa(address));
649 	VMAreaMappings queue;
650 	ThreadCPUPinner pinner(thread_get_current_thread());
651 	RecursiveLocker locker(fLock);
652 
653 	ProcessRange(fPageTable, fInitialLevel, address, size, nullptr,
654 		[=, &queue](uint64_t* ptePtr, uint64_t effectiveVa) {
655 			uint64_t oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
656 			FlushVAIfAccessed(oldPte, effectiveVa);
657 			if ((oldPte & kPteValidMask) == 0)
658 				return;
659 
660 			if (area->cache_type == CACHE_TYPE_DEVICE)
661 				return;
662 
663 			// get the page
664 			vm_page* page = vm_lookup_page((oldPte & kPteAddrMask) >> fPageBits);
665 			ASSERT(page != NULL);
666 
667 			DEBUG_PAGE_ACCESS_START(page);
668 
669 			// transfer the accessed/dirty flags to the page
670 			page->accessed = (oldPte & kAttrAF) != 0;
671 			page->modified = is_pte_dirty(oldPte);
672 
673 			// remove the mapping object/decrement the wired_count of the
674 			// page
675 			if (area->wiring == B_NO_LOCK) {
676 				vm_page_mapping* mapping = NULL;
677 				vm_page_mappings::Iterator iterator
678 					= page->mappings.GetIterator();
679 				while ((mapping = iterator.Next()) != NULL) {
680 					if (mapping->area == area)
681 						break;
682 				}
683 
684 				ASSERT(mapping != NULL);
685 
686 				area->mappings.Remove(mapping);
687 				page->mappings.Remove(mapping);
688 				queue.Add(mapping);
689 			} else
690 				page->DecrementWiredCount();
691 
692 			if (!page->IsMapped()) {
693 				atomic_add(&gMappedPagesCount, -1);
694 
695 				if (updatePageQueue) {
696 					if (page->Cache()->temporary)
697 						vm_page_set_state(page, PAGE_STATE_INACTIVE);
698 					else if (page->modified)
699 						vm_page_set_state(page, PAGE_STATE_MODIFIED);
700 					else
701 						vm_page_set_state(page, PAGE_STATE_CACHED);
702 				}
703 			}
704 
705 			DEBUG_PAGE_ACCESS_END(page);
706 		});
707 
708 	// TODO: As in UnmapPage() we can lose page dirty flags here. ATM it's not
709 	// really critical here, as in all cases this method is used, the unmapped
710 	// area range is unmapped for good (resized/cut) and the pages will likely
711 	// be freed.
712 
713 	locker.Unlock();
714 
715 	// free removed mappings
716 	bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
717 	uint32 freeFlags = CACHE_DONT_WAIT_FOR_MEMORY
718 		| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0);
719 
720 	while (vm_page_mapping* mapping = queue.RemoveHead())
721 		vm_free_page_mapping(mapping->page->physical_page_number, mapping, freeFlags);
722 }
723 
724 
725 void
726 VMSAv8TranslationMap::UnmapArea(VMArea* area, bool deletingAddressSpace,
727 	bool ignoreTopCachePageFlags)
728 {
729 	TRACE("VMSAv8TranslationMap::UnmapArea(0x%" B_PRIxADDR "(%s), 0x%"
730 		B_PRIxADDR ", 0x%" B_PRIxSIZE ", %d, %d)\n", (addr_t)area,
731 		area->name, area->Base(), area->Size(), deletingAddressSpace,
732 		ignoreTopCachePageFlags);
733 
734 	if (area->cache_type == CACHE_TYPE_DEVICE || area->wiring != B_NO_LOCK) {
735 		UnmapPages(area, area->Base(), area->Size(), true);
736 		return;
737 	}
738 
739 	bool unmapPages = !deletingAddressSpace || !ignoreTopCachePageFlags;
740 
741 	RecursiveLocker locker(fLock);
742 	ThreadCPUPinner pinner(thread_get_current_thread());
743 
744 	VMAreaMappings mappings;
745 	mappings.MoveFrom(&area->mappings);
746 
747 	for (VMAreaMappings::Iterator it = mappings.GetIterator();
748 			vm_page_mapping* mapping = it.Next();) {
749 
750 		vm_page* page = mapping->page;
751 		page->mappings.Remove(mapping);
752 
753 		VMCache* cache = page->Cache();
754 
755 		bool pageFullyUnmapped = false;
756 		if (!page->IsMapped()) {
757 			atomic_add(&gMappedPagesCount, -1);
758 			pageFullyUnmapped = true;
759 		}
760 
761 		if (unmapPages || cache != area->cache) {
762 			addr_t address = area->Base()
763 				+ ((page->cache_offset * B_PAGE_SIZE)
764 				- area->cache_offset);
765 
766 			uint64_t oldPte = 0;
767 			ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
768 				[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
769 					oldPte = atomic_get_and_set64((int64_t*)ptePtr, 0);
770 					if (!deletingAddressSpace)
771 						FlushVAIfAccessed(oldPte, effectiveVa);
772 				});
773 
774 			if ((oldPte & kPteValidMask) == 0) {
775 				panic("page %p has mapping for area %p "
776 					"(%#" B_PRIxADDR "), but has no "
777 					"page table", page, area, address);
778 				continue;
779 			}
780 
781 			// transfer the accessed/dirty flags to the page and
782 			// invalidate the mapping, if necessary
783 			if (is_pte_dirty(oldPte))
784 				page->modified = true;
785 			if (oldPte & kAttrAF)
786 				page->accessed = true;
787 
788 			if (pageFullyUnmapped) {
789 				DEBUG_PAGE_ACCESS_START(page);
790 
791 				if (cache->temporary) {
792 					vm_page_set_state(page,
793 						PAGE_STATE_INACTIVE);
794 				} else if (page->modified) {
795 					vm_page_set_state(page,
796 						PAGE_STATE_MODIFIED);
797 				} else {
798 					vm_page_set_state(page,
799 						PAGE_STATE_CACHED);
800 				}
801 
802 				DEBUG_PAGE_ACCESS_END(page);
803 			}
804 		}
805 	}
806 
807 	locker.Unlock();
808 
809 	bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
810 	uint32 freeFlags = CACHE_DONT_WAIT_FOR_MEMORY
811 		| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0);
812 
813 	while (vm_page_mapping* mapping = mappings.RemoveHead())
814 		vm_free_page_mapping(mapping->page->physical_page_number, mapping, freeFlags);
815 }
816 
817 
818 bool
819 VMSAv8TranslationMap::ValidateVa(addr_t va)
820 {
821 	uint64_t vaMask = (1UL << fVaBits) - 1;
822 	bool kernelAddr = (va & (1UL << 63)) != 0;
823 	if (kernelAddr != fIsKernel)
824 		return false;
825 	if ((va & ~vaMask) != (fIsKernel ? ~vaMask : 0))
826 		return false;
827 	return true;
828 }
829 
830 
831 status_t
832 VMSAv8TranslationMap::Query(addr_t va, phys_addr_t* pa, uint32* flags)
833 {
834 	*flags = 0;
835 	*pa = 0;
836 
837 	uint64_t pageMask = (1UL << fPageBits) - 1;
838 	va &= ~pageMask;
839 
840 	ThreadCPUPinner pinner(thread_get_current_thread());
841 	ASSERT(ValidateVa(va));
842 
843 	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, nullptr,
844 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
845 			uint64_t pte = atomic_get64((int64_t*)ptePtr);
846 			*pa = pte & kPteAddrMask;
847 			*flags |= PAGE_PRESENT | B_KERNEL_READ_AREA;
848 			if ((pte & kAttrAF) != 0)
849 				*flags |= PAGE_ACCESSED;
850 			if (is_pte_dirty(pte))
851 				*flags |= PAGE_MODIFIED;
852 
853 			if ((pte & kAttrUXN) == 0)
854 				*flags |= B_EXECUTE_AREA;
855 			if ((pte & kAttrPXN) == 0)
856 				*flags |= B_KERNEL_EXECUTE_AREA;
857 
858 			if ((pte & kAttrAPUserAccess) != 0)
859 				*flags |= B_READ_AREA;
860 
861 			if ((pte & kAttrSWDBM) != 0) {
862 				*flags |= B_KERNEL_WRITE_AREA;
863 				if ((pte & kAttrAPUserAccess) != 0)
864 					*flags |= B_WRITE_AREA;
865 			}
866 		});
867 
868 	return B_OK;
869 }
870 
871 
872 status_t
873 VMSAv8TranslationMap::QueryInterrupt(
874 	addr_t virtualAddress, phys_addr_t* _physicalAddress, uint32* _flags)
875 {
876 	return Query(virtualAddress, _physicalAddress, _flags);
877 }
878 
879 
880 status_t
881 VMSAv8TranslationMap::Protect(addr_t start, addr_t end, uint32 attributes, uint32 memoryType)
882 {
883 	TRACE("VMSAv8TranslationMap::Protect(0x%" B_PRIxADDR ", 0x%"
884 		B_PRIxADDR ", 0x%x, 0x%x)\n", start, end, attributes, memoryType);
885 
886 	uint64_t attr = GetMemoryAttr(attributes, memoryType, fIsKernel);
887 	size_t size = end - start + 1;
888 	ASSERT(ValidateVa(start));
889 
890 	ThreadCPUPinner pinner(thread_get_current_thread());
891 
892 	ProcessRange(fPageTable, fInitialLevel, start, size, nullptr,
893 		[=](uint64_t* ptePtr, uint64_t effectiveVa) {
894 			ASSERT(effectiveVa <= end);
895 
896 			// We need to use an atomic compare-swap loop because we must
897 			// need to clear somes bits while setting others.
898 			while (true) {
899 				uint64_t oldPte = atomic_get64((int64_t*)ptePtr);
900 				uint64_t newPte = oldPte & ~kPteAttrMask;
901 				newPte |= attr;
902 
903 				// Preserve access bit.
904 				newPte |= oldPte & kAttrAF;
905 
906 				// Preserve the dirty bit.
907 				if (is_pte_dirty(oldPte))
908 					newPte = set_pte_dirty(newPte);
909 
910 				uint64_t oldMemoryType = oldPte & (kAttrShareability | kAttrMemoryAttrIdx);
911 				uint64_t newMemoryType = newPte & (kAttrShareability | kAttrMemoryAttrIdx);
912 				if (oldMemoryType != newMemoryType) {
913 					// ARM64 requires "break-before-make". We must set the PTE to an invalid
914 					// entry and flush the TLB as appropriate before we can write the new PTE.
915 					// In this case specifically, it applies any time we change cacheability or
916 					// shareability.
917 					if (!AttemptPteBreakBeforeMake(ptePtr, oldPte, effectiveVa))
918 						continue;
919 
920 					atomic_set64((int64_t*)ptePtr, newPte);
921 					asm("dsb ishst"); // Ensure PTE write completed
922 					asm("isb");
923 
924 					// No compare-exchange loop required in this case.
925 					break;
926 				} else {
927 					if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte) {
928 						FlushVAIfAccessed(oldPte, effectiveVa);
929 						break;
930 					}
931 				}
932 			}
933 		});
934 
935 	return B_OK;
936 }
937 
938 
939 status_t
940 VMSAv8TranslationMap::ClearFlags(addr_t va, uint32 flags)
941 {
942 	ASSERT(ValidateVa(va));
943 
944 	bool clearAF = flags & PAGE_ACCESSED;
945 	bool setRO = flags & PAGE_MODIFIED;
946 
947 	if (!clearAF && !setRO)
948 		return B_OK;
949 
950 	ThreadCPUPinner pinner(thread_get_current_thread());
951 
952 	uint64_t oldPte = 0;
953 	ProcessRange(fPageTable, fInitialLevel, va, B_PAGE_SIZE, nullptr,
954 		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
955 			if (clearAF && setRO) {
956 				// We need to use an atomic compare-swap loop because we must
957 				// need to clear one bit while setting the other.
958 				while (true) {
959 					oldPte = atomic_get64((int64_t*)ptePtr);
960 					uint64_t newPte = oldPte & ~kAttrAF;
961 					newPte = set_pte_clean(newPte);
962 
963                     if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte)
964 						break;
965 				}
966 			} else if (clearAF) {
967 				oldPte = atomic_and64((int64_t*)ptePtr, ~kAttrAF);
968 			} else {
969 				while (true) {
970 					oldPte = atomic_get64((int64_t*)ptePtr);
971 					if (!is_pte_dirty(oldPte)) {
972 						// Avoid a TLB flush
973 						oldPte = 0;
974 						return;
975 					}
976 					uint64_t newPte = set_pte_clean(oldPte);
977                     if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte)
978 						break;
979 				}
980 			}
981 		});
982 
983 	FlushVAIfAccessed(oldPte, va);
984 
985 	return B_OK;
986 }
987 
988 
989 bool
990 VMSAv8TranslationMap::ClearAccessedAndModified(
991 	VMArea* area, addr_t address, bool unmapIfUnaccessed, bool& _modified)
992 {
993 	TRACE("VMSAv8TranslationMap::ClearAccessedAndModified(0x%"
994 		B_PRIxADDR "(%s), 0x%" B_PRIxADDR ", %d)\n", (addr_t)area,
995 		area->name, address, unmapIfUnaccessed);
996 	ASSERT(ValidateVa(address));
997 
998 	RecursiveLocker locker(fLock);
999 	ThreadCPUPinner pinner(thread_get_current_thread());
1000 
1001 	uint64_t oldPte = 0;
1002 	ProcessRange(fPageTable, fInitialLevel, address, B_PAGE_SIZE, nullptr,
1003 		[=, &oldPte](uint64_t* ptePtr, uint64_t effectiveVa) {
1004 			// We need to use an atomic compare-swap loop because we must
1005 			// first read the old PTE and make decisions based on the AF
1006 			// bit to proceed.
1007 			while (true) {
1008 				oldPte = atomic_get64((int64_t*)ptePtr);
1009 				uint64_t newPte = oldPte & ~kAttrAF;
1010 				newPte = set_pte_clean(newPte);
1011 
1012 				// If the page has been not be accessed, then unmap it.
1013 				if (unmapIfUnaccessed && (oldPte & kAttrAF) == 0)
1014 					newPte = 0;
1015 
1016 				if ((uint64_t)atomic_test_and_set64((int64_t*)ptePtr, newPte, oldPte) == oldPte)
1017 					break;
1018 			}
1019 			asm("dsb ishst"); // Ensure PTE write completed
1020 		});
1021 
1022 	pinner.Unlock();
1023 	_modified = is_pte_dirty(oldPte);
1024 
1025 	if (FlushVAIfAccessed(oldPte, address))
1026 		return true;
1027 
1028 	if (!unmapIfUnaccessed)
1029 		return false;
1030 
1031 	locker.Detach(); // UnaccessedPageUnmapped takes ownership
1032 	phys_addr_t oldPa = oldPte & kPteAddrMask;
1033 	UnaccessedPageUnmapped(area, oldPa >> fPageBits);
1034 	return false;
1035 }
1036 
1037 
1038 void
1039 VMSAv8TranslationMap::Flush()
1040 {
1041 	// Necessary invalidation is performed during mapping,
1042 	// no need to do anything more here.
1043 }
1044