kern: audit (and fix) our hardware maintenance instructions to match official kernel

This commit is contained in:
Michael Scire 2021-10-27 12:31:53 -07:00
parent fb59d0ad43
commit e81a1ce5a8
16 changed files with 104 additions and 203 deletions

View file

@ -93,3 +93,4 @@
/* Deferred includes. */
#include <mesosphere/kern_k_auto_object_impls.hpp>
#include <mesosphere/kern_k_scheduler_impls.hpp>

View file

@ -279,7 +279,7 @@ namespace ams::kern::arch::arm64::init {
/* Invalidate the entire tlb. */
cpu::DataSynchronizationBarrierInnerShareable();
cpu::InvalidateEntireTlb();
cpu::InvalidateEntireTlbInnerShareable();
/* Copy data, if we should. */
const u64 negative_block_size_for_mask = static_cast<u64>(-static_cast<s64>(block_size));
@ -350,7 +350,6 @@ namespace ams::kern::arch::arm64::init {
/* If we don't already have an L2 table, we need to make a new one. */
if (!l1_entry->IsTable()) {
KPhysicalAddress new_table = AllocateNewPageTable(allocator);
ClearNewPageTable(new_table);
*l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever());
cpu::DataSynchronizationBarrierInnerShareable();
}
@ -361,12 +360,12 @@ namespace ams::kern::arch::arm64::init {
if (util::IsAligned(GetInteger(virt_addr), L2ContiguousBlockSize) && util::IsAligned(GetInteger(phys_addr), L2ContiguousBlockSize) && size >= L2ContiguousBlockSize) {
for (size_t i = 0; i < L2ContiguousBlockSize / L2BlockSize; i++) {
l2_entry[i] = L2PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, true);
cpu::DataSynchronizationBarrierInnerShareable();
virt_addr += L2BlockSize;
phys_addr += L2BlockSize;
size -= L2BlockSize;
}
cpu::DataSynchronizationBarrierInnerShareable();
continue;
}
@ -384,7 +383,6 @@ namespace ams::kern::arch::arm64::init {
/* If we don't already have an L3 table, we need to make a new one. */
if (!l2_entry->IsTable()) {
KPhysicalAddress new_table = AllocateNewPageTable(allocator);
ClearNewPageTable(new_table);
*l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever());
cpu::DataSynchronizationBarrierInnerShareable();
}
@ -395,12 +393,12 @@ namespace ams::kern::arch::arm64::init {
if (util::IsAligned(GetInteger(virt_addr), L3ContiguousBlockSize) && util::IsAligned(GetInteger(phys_addr), L3ContiguousBlockSize) && size >= L3ContiguousBlockSize) {
for (size_t i = 0; i < L3ContiguousBlockSize / L3BlockSize; i++) {
l3_entry[i] = L3PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, true);
cpu::DataSynchronizationBarrierInnerShareable();
virt_addr += L3BlockSize;
phys_addr += L3BlockSize;
size -= L3BlockSize;
}
cpu::DataSynchronizationBarrierInnerShareable();
continue;
}

View file

@ -60,6 +60,11 @@ namespace ams::kern::arch::arm64::cpu {
__asm__ __volatile__("isb" ::: "memory");
}
ALWAYS_INLINE void EnsureInstructionConsistencyInnerShareable() {
DataSynchronizationBarrierInnerShareable();
InstructionMemoryBarrier();
}
ALWAYS_INLINE void EnsureInstructionConsistency() {
DataSynchronizationBarrier();
InstructionMemoryBarrier();
@ -177,7 +182,6 @@ namespace ams::kern::arch::arm64::cpu {
NOINLINE void SynchronizeAllCores();
/* Cache management helpers. */
void ClearPageToZeroImpl(void *);
void StoreEntireCacheForInit();
void FlushEntireCacheForInit();
@ -190,10 +194,16 @@ namespace ams::kern::arch::arm64::cpu {
void InvalidateEntireInstructionCache();
ALWAYS_INLINE void ClearPageToZero(void *page) {
ALWAYS_INLINE void ClearPageToZero(void * const page) {
MESOSPHERE_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(page), PageSize));
MESOSPHERE_ASSERT(page != nullptr);
ClearPageToZeroImpl(page);
uintptr_t cur = reinterpret_cast<uintptr_t>(__builtin_assume_aligned(page, PageSize));
const uintptr_t last = cur + PageSize - DataCacheLineSize;
for (/* ... */; cur <= last; cur += DataCacheLineSize) {
__asm__ __volatile__("dc zva, %[cur]" :: [cur]"r"(cur) : "memory");
}
}
ALWAYS_INLINE void InvalidateTlbByAsid(u32 asid) {
@ -213,6 +223,11 @@ namespace ams::kern::arch::arm64::cpu {
EnsureInstructionConsistency();
}
ALWAYS_INLINE void InvalidateEntireTlbInnerShareable() {
__asm__ __volatile__("tlbi vmalle1is" ::: "memory");
EnsureInstructionConsistencyInnerShareable();
}
ALWAYS_INLINE void InvalidateEntireTlbDataOnly() {
__asm__ __volatile__("tlbi vmalle1is" ::: "memory");
DataSynchronizationBarrier();

View file

@ -219,27 +219,27 @@ namespace ams::kern::arch::arm64 {
Result ChangePermissions(KProcessAddress virt_addr, size_t num_pages, PageTableEntry entry_template, DisableMergeAttribute disable_merge_attr, bool refresh_mapping, PageLinkedList *page_list, bool reuse_ll);
static void PteDataSynchronizationBarrier() {
static ALWAYS_INLINE void PteDataSynchronizationBarrier() {
cpu::DataSynchronizationBarrierInnerShareable();
}
static void ClearPageTable(KVirtualAddress table) {
static ALWAYS_INLINE void ClearPageTable(KVirtualAddress table) {
cpu::ClearPageToZero(GetVoidPointer(table));
}
void OnTableUpdated() const {
ALWAYS_INLINE void OnTableUpdated() const {
cpu::InvalidateTlbByAsid(m_asid);
}
void OnKernelTableUpdated() const {
ALWAYS_INLINE void OnKernelTableUpdated() const {
cpu::InvalidateEntireTlbDataOnly();
}
void OnKernelTableSinglePageUpdated(KProcessAddress virt_addr) const {
ALWAYS_INLINE void OnKernelTableSinglePageUpdated(KProcessAddress virt_addr) const {
cpu::InvalidateTlbByVaDataOnly(virt_addr);
}
void NoteUpdated() const {
ALWAYS_INLINE void NoteUpdated() const {
cpu::DataSynchronizationBarrier();
if (this->IsKernel()) {
@ -249,7 +249,7 @@ namespace ams::kern::arch::arm64 {
}
}
void NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const {
ALWAYS_INLINE void NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const {
MESOSPHERE_ASSERT(this->IsKernel());
cpu::DataSynchronizationBarrier();

View file

@ -45,6 +45,7 @@ namespace ams::kern::arch::arm64 {
/* Select L1 cache. */
cpu::SetCsselrEl1(0);
cpu::InstructionMemoryBarrier();
/* Check that the L1 cache is not direct-mapped. */
return cpu::CacheSizeIdRegisterAccessor().GetAssociativity() != 0;

View file

@ -46,7 +46,7 @@ namespace ams::kern {
return m_slab_heap->Allocate(m_page_allocator);
}
void Free(T *t) const {
ALWAYS_INLINE void Free(T *t) const {
m_slab_heap->Free(t);
}
};

View file

@ -211,18 +211,6 @@ namespace ams::kern {
static consteval bool ValidateAssemblyOffsets();
};
consteval bool KScheduler::ValidateAssemblyOffsets() {
static_assert(AMS_OFFSETOF(KScheduler, m_state.needs_scheduling) == KSCHEDULER_NEEDS_SCHEDULING);
static_assert(AMS_OFFSETOF(KScheduler, m_state.interrupt_task_runnable) == KSCHEDULER_INTERRUPT_TASK_RUNNABLE);
static_assert(AMS_OFFSETOF(KScheduler, m_state.highest_priority_thread) == KSCHEDULER_HIGHEST_PRIORITY_THREAD);
static_assert(AMS_OFFSETOF(KScheduler, m_state.idle_thread_stack) == KSCHEDULER_IDLE_THREAD_STACK);
static_assert(AMS_OFFSETOF(KScheduler, m_state.prev_thread) == KSCHEDULER_PREVIOUS_THREAD);
static_assert(AMS_OFFSETOF(KScheduler, m_state.interrupt_task_manager) == KSCHEDULER_INTERRUPT_TASK_MANAGER);
return true;
}
static_assert(KScheduler::ValidateAssemblyOffsets());
class KScopedSchedulerLock : KScopedLock<KScheduler::LockType> {
public:
explicit ALWAYS_INLINE KScopedSchedulerLock() : KScopedLock(KScheduler::s_scheduler_lock) { /* ... */ }

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <mesosphere/kern_common.hpp>
#include <mesosphere/kern_k_scheduler.hpp>
#include <mesosphere/kern_select_interrupt_manager.hpp>
namespace ams::kern {
/* NOTE: This header is included after all main headers. */
consteval bool KScheduler::ValidateAssemblyOffsets() {
static_assert(AMS_OFFSETOF(KScheduler, m_state.needs_scheduling) == KSCHEDULER_NEEDS_SCHEDULING);
static_assert(AMS_OFFSETOF(KScheduler, m_state.interrupt_task_runnable) == KSCHEDULER_INTERRUPT_TASK_RUNNABLE);
static_assert(AMS_OFFSETOF(KScheduler, m_state.highest_priority_thread) == KSCHEDULER_HIGHEST_PRIORITY_THREAD);
static_assert(AMS_OFFSETOF(KScheduler, m_state.idle_thread_stack) == KSCHEDULER_IDLE_THREAD_STACK);
static_assert(AMS_OFFSETOF(KScheduler, m_state.prev_thread) == KSCHEDULER_PREVIOUS_THREAD);
static_assert(AMS_OFFSETOF(KScheduler, m_state.interrupt_task_manager) == KSCHEDULER_INTERRUPT_TASK_MANAGER);
return true;
}
static_assert(KScheduler::ValidateAssemblyOffsets());
ALWAYS_INLINE void KScheduler::RescheduleOtherCores(u64 cores_needing_scheduling) {
if (const u64 core_mask = cores_needing_scheduling & ~(1ul << m_core_id); core_mask != 0) {
cpu::DataSynchronizationBarrier();
Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_Scheduler, core_mask);
}
}
}

View file

@ -61,139 +61,3 @@ _ZN3ams4kern4arch5arm643cpu23SynchronizeAllCoresImplEPii:
5:
stlr wzr, [x0]
ret
/* ams::kern::arch::arm64::cpu::ClearPageToZero(void *) */
.section .text._ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv, "ax", %progbits
.global _ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv
.type _ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv, %function
_ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv:
/* Efficiently clear the page using dc zva. */
dc zva, x0
add x8, x0, #0x040
dc zva, x8
add x8, x0, #0x080
dc zva, x8
add x8, x0, #0x0c0
dc zva, x8
add x8, x0, #0x100
dc zva, x8
add x8, x0, #0x140
dc zva, x8
add x8, x0, #0x180
dc zva, x8
add x8, x0, #0x1c0
dc zva, x8
add x8, x0, #0x200
dc zva, x8
add x8, x0, #0x240
dc zva, x8
add x8, x0, #0x280
dc zva, x8
add x8, x0, #0x2c0
dc zva, x8
add x8, x0, #0x300
dc zva, x8
add x8, x0, #0x340
dc zva, x8
add x8, x0, #0x380
dc zva, x8
add x8, x0, #0x3c0
dc zva, x8
add x8, x0, #0x400
dc zva, x8
add x8, x0, #0x440
dc zva, x8
add x8, x0, #0x480
dc zva, x8
add x8, x0, #0x4c0
dc zva, x8
add x8, x0, #0x500
dc zva, x8
add x8, x0, #0x540
dc zva, x8
add x8, x0, #0x580
dc zva, x8
add x8, x0, #0x5c0
dc zva, x8
add x8, x0, #0x600
dc zva, x8
add x8, x0, #0x640
dc zva, x8
add x8, x0, #0x680
dc zva, x8
add x8, x0, #0x6c0
dc zva, x8
add x8, x0, #0x700
dc zva, x8
add x8, x0, #0x740
dc zva, x8
add x8, x0, #0x780
dc zva, x8
add x8, x0, #0x7c0
dc zva, x8
add x8, x0, #0x800
dc zva, x8
add x8, x0, #0x840
dc zva, x8
add x8, x0, #0x880
dc zva, x8
add x8, x0, #0x8c0
dc zva, x8
add x8, x0, #0x900
dc zva, x8
add x8, x0, #0x940
dc zva, x8
add x8, x0, #0x980
dc zva, x8
add x8, x0, #0x9c0
dc zva, x8
add x8, x0, #0xa00
dc zva, x8
add x8, x0, #0xa40
dc zva, x8
add x8, x0, #0xa80
dc zva, x8
add x8, x0, #0xac0
dc zva, x8
add x8, x0, #0xb00
dc zva, x8
add x8, x0, #0xb40
dc zva, x8
add x8, x0, #0xb80
dc zva, x8
add x8, x0, #0xbc0
dc zva, x8
add x8, x0, #0xc00
dc zva, x8
add x8, x0, #0xc40
dc zva, x8
add x8, x0, #0xc80
dc zva, x8
add x8, x0, #0xcc0
dc zva, x8
add x8, x0, #0xd00
dc zva, x8
add x8, x0, #0xd40
dc zva, x8
add x8, x0, #0xd80
dc zva, x8
add x8, x0, #0xdc0
dc zva, x8
add x8, x0, #0xe00
dc zva, x8
add x8, x0, #0xe40
dc zva, x8
add x8, x0, #0xe80
dc zva, x8
add x8, x0, #0xec0
dc zva, x8
add x8, x0, #0xf00
dc zva, x8
add x8, x0, #0xf40
dc zva, x8
add x8, x0, #0xf80
dc zva, x8
add x8, x0, #0xfc0
dc zva, x8
ret

View file

@ -225,7 +225,7 @@ namespace ams::kern::arch::arm64 {
if (AMS_UNLIKELY(GetCurrentThread().IsSingleStep())) {
GetCurrentThread().ClearSingleStep();
cpu::MonitorDebugSystemControlRegisterAccessor().SetSoftwareStep(false).Store();
cpu::EnsureInstructionConsistency();
cpu::InstructionMemoryBarrier();
}
#endif

View file

@ -169,10 +169,10 @@ namespace ams::kern::arch::arm64 {
m_manager = std::addressof(Kernel::GetSystemPageTableManager());
/* Allocate a page for ttbr. */
/* NOTE: It is a postcondition of page table manager allocation that the page is all-zero. */
const u64 asid_tag = (static_cast<u64>(m_asid) << 48ul);
const KVirtualAddress page = m_manager->Allocate();
MESOSPHERE_ASSERT(page != Null<KVirtualAddress>);
cpu::ClearPageToZero(GetVoidPointer(page));
m_ttbr = GetInteger(KPageTableBase::GetLinearMappedPhysicalAddress(page)) | asid_tag;
/* Initialize the base page table. */
@ -1058,7 +1058,7 @@ namespace ams::kern::arch::arm64 {
auto sw_reserved_bits = PageTableEntry::EncodeSoftwareReservedBits(head_entry->IsHeadMergeDisabled(), head_entry->IsHeadAndBodyMergeDisabled(), tail_entry->IsTailMergeDisabled());
/* Merge! */
PteDataSynchronizationBarrier();
/* NOTE: As of 13.1.0, Nintendo does not do: PteDataSynchronizationBarrier(); */
*l1_entry = L1PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, PageTableEntry(entry_template), sw_reserved_bits, false);
/* Note that we updated. */

View file

@ -656,9 +656,8 @@ namespace ams::kern::board::nintendo::nx {
MESOSPHERE_ASSERT(IsValidPhysicalAddress(table_phys_addr));
Kernel::GetSystemPageTableManager().Open(table_virt_addr, 1);
/* Clear the page and save it. */
/* Save the page. Note that it is a pre-condition that the page is cleared, when allocated from the system page table manager. */
/* NOTE: Nintendo does not check the result of StoreDataCache. */
cpu::ClearPageToZero(GetVoidPointer(table_virt_addr));
cpu::StoreDataCache(GetVoidPointer(table_virt_addr), PageDirectorySize);
g_reserved_table_phys_addr = table_phys_addr;

View file

@ -341,7 +341,9 @@ namespace ams::kern::board::nintendo::nx {
/* Restore pmu registers. */
cpu::SetPmUserEnrEl0(0);
cpu::PerformanceMonitorsControlRegisterAccessor().SetEventCounterReset(true).SetCycleCounterReset(true).Store();
cpu::PerformanceMonitorsControlRegisterAccessor(0).SetEventCounterReset(true).SetCycleCounterReset(true).Store();
cpu::EnsureInstructionConsistency();
cpu::SetPmOvsClrEl0(static_cast<u64>(static_cast<u32>(~u32())));
cpu::SetPmIntEnClrEl1(static_cast<u64>(static_cast<u32>(~u32())));
cpu::SetPmCntEnClrEl0(static_cast<u64>(static_cast<u32>(~u32())));

View file

@ -79,13 +79,6 @@ namespace ams::kern {
RescheduleCurrentCore();
}
void KScheduler::RescheduleOtherCores(u64 cores_needing_scheduling) {
if (const u64 core_mask = cores_needing_scheduling & ~(1ul << m_core_id); core_mask != 0) {
cpu::DataSynchronizationBarrier();
Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_Scheduler, core_mask);
}
}
u64 KScheduler::UpdateHighestPriorityThread(KThread *highest_thread) {
if (KThread *prev_highest_thread = m_state.highest_priority_thread; AMS_LIKELY(prev_highest_thread != highest_thread)) {
if (AMS_LIKELY(prev_highest_thread != nullptr)) {
@ -254,9 +247,24 @@ namespace ams::kern {
MESOSPHERE_KTRACE_THREAD_SWITCH(next_thread);
#if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP)
/* Ensure the single-step bit in mdscr reflects the correct single-step state for the new thread. */
cpu::MonitorDebugSystemControlRegisterAccessor().SetSoftwareStep(next_thread->IsSingleStep()).Store();
#endif
/* Switch the current process, if we're switching processes. */
if (KProcess *next_process = next_thread->GetOwnerProcess(); next_process != cur_process) {
KProcess::Switch(cur_process, next_process);
} else {
/* The single-step bit set up above requires an instruction synchronization barrier, to ensure */
/* the state change takes before we actually perform a return which might break-to-step. */
/* KProcess::Switch performs an isb incidentally, and so when we're changing process we */
/* can piggy-back off of that isb to avoid unnecessarily emptying the pipeline twice. */
/* However, this means that when we're switching to thread in a different process, */
/* we must ensure that we still isb. In practice, gcc will deduplicate into a single isb. */
#if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP)
cpu::InstructionMemoryBarrier();
#endif
}
/* Set the new thread. */

View file

@ -49,10 +49,9 @@ namespace ams::kern::init {
constexpr PageTableEntry KernelRwDataUncachedAttribute(PageTableEntry::Permission_KernelRW, PageTableEntry::PageAttribute_NormalMemoryNotCacheable, PageTableEntry::Shareable_InnerShareable, PageTableEntry::MappingFlag_Mapped);
void StoreDataCache(const void *addr, size_t size) {
uintptr_t start = util::AlignDown(reinterpret_cast<uintptr_t>(addr), cpu::DataCacheLineSize);
uintptr_t end = reinterpret_cast<uintptr_t>(addr) + size;
for (uintptr_t cur = start; cur < end; cur += cpu::DataCacheLineSize) {
__asm__ __volatile__("dc cvac, %[cur]" :: [cur]"r"(cur) : "memory");
const uintptr_t start = util::AlignDown(reinterpret_cast<uintptr_t>(addr), cpu::DataCacheLineSize);
for (size_t stored = 0; stored < size; stored += cpu::DataCacheLineSize) {
__asm__ __volatile__("dc cvac, %[cur]" :: [cur]"r"(start + stored) : "memory");
}
cpu::DataSynchronizationBarrier();
}
@ -594,11 +593,13 @@ namespace ams::kern::init {
switch (num_watchpoints) {
FOR_I_IN_15_TO_1(MESOSPHERE_INITIALIZE_WATCHPOINT_CASE, 0)
case 0:
cpu::SetDbgWcr0El1(0);
cpu::SetDbgWvr0El1(0);
[[fallthrough]];
default:
break;
}
cpu::SetDbgWcr0El1(0);
cpu::SetDbgWvr0El1(0);
switch (num_breakpoints) {
FOR_I_IN_15_TO_1(MESOSPHERE_INITIALIZE_BREAKPOINT_CASE, 0)

View file

@ -227,26 +227,7 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv:
mov x0, x22
RESTORE_THREAD_CONTEXT(x0, x1, x2, 9f)
9: /* Configure single-step, if we should. */
#if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP)
/* Get a reference to the new thread's stack parameters. */
add x2, sp, #0x1000
and x2, x2, #~(0x1000-1)
/* Read the single-step flag. */
ldurb w2, [x2, #-(THREAD_STACK_PARAMETERS_SIZE - THREAD_STACK_PARAMETERS_IS_SINGLE_STEP)]
/* Update the single-step bit in mdscr_el1. */
mrs x1, mdscr_el1
bic x1, x1, #1
orr x1, x1, x2
msr mdscr_el1, x1
isb
#endif
/* We're done restoring the thread context, and can return safely. */
9: /* We're done restoring the thread context, and can return safely. */
ret
10: /* Our switch failed. */