From 6e17317d5dbd7c4ea00cb587d1afdedac3ea7e57 Mon Sep 17 00:00:00 2001 From: Michael Scire Date: Tue, 22 Mar 2022 19:59:39 -0700 Subject: [PATCH] kern: implement new thread context/fpu semantics --- .../arch/arm64/kern_assembly_macros.h | 96 +++++++ .../arch/arm64/kern_assembly_offsets.h | 184 +++++++++--- .../arch/arm64/kern_k_thread_context.hpp | 233 ++++++++++++++-- .../mesosphere/kern_k_capabilities.hpp | 73 +---- .../include/mesosphere/kern_k_process.hpp | 20 +- .../include/mesosphere/kern_k_thread.hpp | 264 ++++++++++-------- .../mesosphere/kern_select_assembly_macros.h | 26 ++ .../arch/arm64/kern_exception_handlers.cpp | 12 +- .../source/arch/arm64/kern_k_debug.cpp | 20 +- .../arch/arm64/kern_k_interrupt_manager.cpp | 36 ++- .../arch/arm64/kern_k_thread_context.cpp | 95 +++---- .../arch/arm64/svc/kern_svc_exception_asm.s | 18 +- .../arch/arm64/svc/kern_svc_handlers_asm.s | 229 +++++++++++---- .../source/kern_k_debug_base.cpp | 8 +- .../libmesosphere/source/kern_k_scheduler.cpp | 2 +- .../libmesosphere/source/kern_k_thread.cpp | 118 +++++++- .../arch/arm64/svc_thread_local_region.hpp | 4 + .../source/arch/arm64/exception_vectors.s | 4 +- .../arch/arm64/kern_exception_handlers_asm.s | 208 +++++++++----- .../source/arch/arm64/kern_k_scheduler_asm.s | 254 +++++++++-------- .../arch/arm64/kern_k_thread_context_asm.s | 55 ---- 21 files changed, 1291 insertions(+), 668 deletions(-) create mode 100644 libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_macros.h create mode 100644 libraries/libmesosphere/include/mesosphere/kern_select_assembly_macros.h diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_macros.h b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_macros.h new file mode 100644 index 000000000..97b50afb6 --- /dev/null +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_macros.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once +#include + +#define ENABLE_FPU(tmp) \ + mrs tmp, cpacr_el1; \ + orr tmp, tmp, #0x300000; \ + msr cpacr_el1, tmp; \ + isb; + +#define GET_THREAD_CONTEXT_AND_RESTORE_FPCR_FPSR(ctx, xtmp1, xtmp2, wtmp1, wtmp2) \ + add ctx, sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_THREAD_CONTEXT); \ + ldp wtmp1, wtmp2, [ctx, #(THREAD_CONTEXT_FPCR_FPSR)]; \ + msr fpcr, xtmp1; \ + msr fpsr, xtmp2; + +#define RESTORE_FPU64_CALLEE_SAVE_REGISTERS(ctx) \ + ldp q8, q9, [ctx, #(THREAD_CONTEXT_FPU64_Q8_Q9)]; \ + ldp q10, q11, [ctx, #(THREAD_CONTEXT_FPU64_Q10_Q11)]; \ + ldp q12, q13, [ctx, #(THREAD_CONTEXT_FPU64_Q12_Q13)]; \ + ldp q14, q15, [ctx, #(THREAD_CONTEXT_FPU64_Q14_Q15)]; + +#define RESTORE_FPU64_CALLER_SAVE_REGISTERS(tmp) \ + ldr tmp, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CALLER_SAVE_FPU_REGISTERS)]; \ + ldp q0, q1, [tmp, #(THREAD_FPU64_CONTEXT_Q0_Q1)]; \ + ldp q2, q3, [tmp, #(THREAD_FPU64_CONTEXT_Q2_Q3)]; \ + ldp q4, q5, [tmp, #(THREAD_FPU64_CONTEXT_Q4_Q5)]; \ + ldp q6, q7, [tmp, #(THREAD_FPU64_CONTEXT_Q6_Q7)]; \ + ldp q16, q17, [tmp, #(THREAD_FPU64_CONTEXT_Q16_Q17)]; \ + ldp q18, q19, [tmp, #(THREAD_FPU64_CONTEXT_Q18_Q19)]; \ + ldp q20, q21, [tmp, #(THREAD_FPU64_CONTEXT_Q20_Q21)]; \ + ldp q22, q23, [tmp, #(THREAD_FPU64_CONTEXT_Q22_Q23)]; \ + ldp q24, q25, [tmp, #(THREAD_FPU64_CONTEXT_Q24_Q25)]; \ + ldp q26, q27, [tmp, #(THREAD_FPU64_CONTEXT_Q26_Q27)]; \ + ldp q28, q29, [tmp, #(THREAD_FPU64_CONTEXT_Q28_Q29)]; \ + ldp q30, q31, [tmp, #(THREAD_FPU64_CONTEXT_Q30_Q31)]; + +#define RESTORE_FPU64_ALL_REGISTERS(ctx, tmp) \ + RESTORE_FPU64_CALLEE_SAVE_REGISTERS(ctx) \ + RESTORE_FPU64_CALLER_SAVE_REGISTERS(tmp) + +#define RESTORE_FPU32_CALLEE_SAVE_REGISTERS(ctx) \ + ldp q4, q5, [ctx, #(THREAD_CONTEXT_FPU32_Q4_Q5)]; \ + ldp q6, q7, [ctx, #(THREAD_CONTEXT_FPU32_Q6_Q7)]; + +#define RESTORE_FPU32_CALLER_SAVE_REGISTERS(tmp) \ + ldr tmp, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CALLER_SAVE_FPU_REGISTERS)]; \ + ldp q0, q1, [tmp, #(THREAD_FPU32_CONTEXT_Q0_Q1)]; \ + ldp q2, q3, [tmp, #(THREAD_FPU32_CONTEXT_Q2_Q3)]; \ + ldp q8, q9, [tmp, #(THREAD_FPU32_CONTEXT_Q8_Q9)]; \ + ldp q10, q11, [tmp, #(THREAD_FPU32_CONTEXT_Q10_Q11)]; \ + ldp q12, q13, [tmp, #(THREAD_FPU32_CONTEXT_Q12_Q13)]; \ + ldp q14, q15, [tmp, #(THREAD_FPU32_CONTEXT_Q14_Q15)]; + +#define RESTORE_FPU32_ALL_REGISTERS(ctx, tmp) \ + RESTORE_FPU32_CALLEE_SAVE_REGISTERS(ctx) \ + RESTORE_FPU32_CALLER_SAVE_REGISTERS(tmp) + +#define ENABLE_AND_RESTORE_FPU(ctx, xtmp1, xtmp2, wtmp1, wtmp2, label_32, label_done) \ + ENABLE_FPU(xtmp1) \ + GET_THREAD_CONTEXT_AND_RESTORE_FPCR_FPSR(ctx, xtmp1, xtmp2, wtmp1, wtmp2) \ + \ + ldrb wtmp1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)]; \ + tbz wtmp1, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_64_BIT), label_32##f; \ + \ + RESTORE_FPU64_ALL_REGISTERS(ctx, xtmp1) \ + \ + b label_done##f; \ + \ +label_32: \ + RESTORE_FPU32_ALL_REGISTERS(ctx, xtmp1) \ +label_done: + +#define ENABLE_AND_RESTORE_FPU64(ctx, xtmp1, xtmp2, wtmp1, wtmp2) \ + ENABLE_FPU(xtmp1) \ + GET_THREAD_CONTEXT_AND_RESTORE_FPCR_FPSR(ctx, xtmp1, xtmp2, wtmp1, wtmp2) \ + RESTORE_FPU64_ALL_REGISTERS(ctx, xtmp1) + +#define ENABLE_AND_RESTORE_FPU32(ctx, xtmp1, xtmp2, wtmp1, wtmp2) \ + ENABLE_FPU(xtmp1) \ + GET_THREAD_CONTEXT_AND_RESTORE_FPCR_FPSR(ctx, xtmp1, xtmp2, wtmp1, wtmp2) \ + RESTORE_FPU32_ALL_REGISTERS(ctx, xtmp1) diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_offsets.h b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_offsets.h index 4ad886c29..d2988d923 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_offsets.h +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_assembly_offsets.h @@ -20,55 +20,157 @@ #define AMS_KERN_NUM_SUPERVISOR_CALLS 0xC0 /* ams::kern::KThread, https://github.com/Atmosphere-NX/Atmosphere/blob/master/libraries/libmesosphere/include/mesosphere/kern_k_thread.hpp */ -#define THREAD_THREAD_CONTEXT 0xD0 +#define THREAD_KERNEL_STACK_TOP 0x280 /* ams::kern::KThread::StackParameters, https://github.com/Atmosphere-NX/Atmosphere/blob/master/libraries/libmesosphere/include/mesosphere/kern_k_thread.hpp */ -#define THREAD_STACK_PARAMETERS_SIZE 0x30 -#define THREAD_STACK_PARAMETERS_SVC_PERMISSION 0x00 -#define THREAD_STACK_PARAMETERS_CONTEXT 0x18 -#define THREAD_STACK_PARAMETERS_CUR_THREAD 0x20 -#define THREAD_STACK_PARAMETERS_DISABLE_COUNT 0x28 -#define THREAD_STACK_PARAMETERS_DPC_FLAGS 0x2A -#define THREAD_STACK_PARAMETERS_CURRENT_SVC_ID 0x2B -#define THREAD_STACK_PARAMETERS_IS_CALLING_SVC 0x2C -#define THREAD_STACK_PARAMETERS_IS_IN_EXCEPTION_HANDLER 0x2D -#define THREAD_STACK_PARAMETERS_IS_PINNED 0x2E +#define THREAD_STACK_PARAMETERS_SIZE 0x130 +#define THREAD_STACK_PARAMETERS_SVC_PERMISSION 0x00 +#define THREAD_STACK_PARAMETERS_CALLER_SAVE_FPU_REGISTERS 0x18 +#define THREAD_STACK_PARAMETERS_CUR_THREAD 0x20 +#define THREAD_STACK_PARAMETERS_DISABLE_COUNT 0x28 +#define THREAD_STACK_PARAMETERS_DPC_FLAGS 0x2A +#define THREAD_STACK_PARAMETERS_CURRENT_SVC_ID 0x2B +#define THREAD_STACK_PARAMETERS_RESERVED_2C 0x2C +#define THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS 0x2D +#define THREAD_STACK_PARAMETERS_IS_PINNED 0x2E +#define THREAD_STACK_PARAMETERS_RESERVED_2F 0x2F +#define THREAD_STACK_PARAMETERS_THREAD_CONTEXT 0x30 + +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_CALLING_SVC (0) +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_EXCEPTION_HANDLER (1) +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED (2) +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_64_BIT (3) +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_USERMODE_EXCEPTION_HANDLER (4) +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_CACHE_MAINTENANCE_OPERATION (5) +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_TLB_MAINTENANCE_OPERATION (6) #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) -#define THREAD_STACK_PARAMETERS_IS_SINGLE_STEP 0x2F +#define THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_HARDWARE_SINGLE_STEP (7) +#endif + +#define THREAD_EXCEPTION_FLAG_IS_CALLING_SVC (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_CALLING_SVC) +#define THREAD_EXCEPTION_FLAG_IS_IN_EXCEPTION_HANDLER (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_EXCEPTION_HANDLER) +#define THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED) +#define THREAD_EXCEPTION_FLAG_IS_FPU_64_BIT (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_64_BIT) +#define THREAD_EXCEPTION_FLAG_IS_IN_USERMODE_EXCEPTION_HANDLER (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_USERMODE_EXCEPTION_HANDLER) +#define THREAD_EXCEPTION_FLAG_IS_IN_CACHE_MAINTENANCE_OPERATION (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_CACHE_MAINTENANCE_OPERATION) +#define THREAD_EXCEPTION_FLAG_IS_IN_TLB_MAINTENANCE_OPERATION (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_TLB_MAINTENANCE_OPERATION) + +#if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) +#define THREAD_EXCEPTION_FLAG_IS_HARDWARE_SINGLE_STEP (1 << THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_HARDWARE_SINGLE_STEP) #endif /* ams::kern::arch::arm64::KThreadContext, https://github.com/Atmosphere-NX/Atmosphere/blob/master/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_thread_context.hpp */ -#define THREAD_CONTEXT_SIZE 0x290 -#define THREAD_CONTEXT_CPU_REGISTERS 0x000 -#define THREAD_CONTEXT_X19 0x000 -#define THREAD_CONTEXT_X20 0x008 -#define THREAD_CONTEXT_X21 0x010 -#define THREAD_CONTEXT_X22 0x018 -#define THREAD_CONTEXT_X23 0x020 -#define THREAD_CONTEXT_X24 0x028 -#define THREAD_CONTEXT_X25 0x030 -#define THREAD_CONTEXT_X26 0x038 -#define THREAD_CONTEXT_X27 0x040 -#define THREAD_CONTEXT_X28 0x048 -#define THREAD_CONTEXT_X29 0x050 -#define THREAD_CONTEXT_LR 0x058 -#define THREAD_CONTEXT_SP 0x060 -#define THREAD_CONTEXT_CPACR 0x068 -#define THREAD_CONTEXT_FPCR 0x070 -#define THREAD_CONTEXT_FPSR 0x078 -#define THREAD_CONTEXT_FPU_REGISTERS 0x080 -#define THREAD_CONTEXT_LOCKED 0x280 +#define THREAD_CONTEXT_SIZE 0x100 +#define THREAD_CONTEXT_CPU_REGISTERS 0x000 +#define THREAD_CONTEXT_X19 0x000 +#define THREAD_CONTEXT_X20 0x008 +#define THREAD_CONTEXT_X21 0x010 +#define THREAD_CONTEXT_X22 0x018 +#define THREAD_CONTEXT_X23 0x020 +#define THREAD_CONTEXT_X24 0x028 +#define THREAD_CONTEXT_X25 0x030 +#define THREAD_CONTEXT_X26 0x038 +#define THREAD_CONTEXT_X27 0x040 +#define THREAD_CONTEXT_X28 0x048 +#define THREAD_CONTEXT_X29 0x050 +#define THREAD_CONTEXT_LR 0x058 +#define THREAD_CONTEXT_SP 0x060 +#define THREAD_CONTEXT_FPCR 0x068 +#define THREAD_CONTEXT_FPSR 0x06C +#define THREAD_CONTEXT_FPU_REGISTERS 0x070 +#define THREAD_CONTEXT_LOCKED 0x0F0 -#define THREAD_CONTEXT_X19_X20 THREAD_CONTEXT_X19 -#define THREAD_CONTEXT_X21_X22 THREAD_CONTEXT_X21 -#define THREAD_CONTEXT_X23_X24 THREAD_CONTEXT_X23 -#define THREAD_CONTEXT_X25_X26 THREAD_CONTEXT_X25 -#define THREAD_CONTEXT_X27_X28 THREAD_CONTEXT_X27 -#define THREAD_CONTEXT_X29_X30 THREAD_CONTEXT_X29 -#define THREAD_CONTEXT_LR_SP THREAD_CONTEXT_LR -#define THREAD_CONTEXT_SP_CPACR THREAD_CONTEXT_SP -#define THREAD_CONTEXT_FPCR_FPSR THREAD_CONTEXT_FPCR +#define THREAD_CONTEXT_X19_X20 THREAD_CONTEXT_X19 +#define THREAD_CONTEXT_X21_X22 THREAD_CONTEXT_X21 +#define THREAD_CONTEXT_X23_X24 THREAD_CONTEXT_X23 +#define THREAD_CONTEXT_X25_X26 THREAD_CONTEXT_X25 +#define THREAD_CONTEXT_X27_X28 THREAD_CONTEXT_X27 +#define THREAD_CONTEXT_X29_X30 THREAD_CONTEXT_X29 +#define THREAD_CONTEXT_LR_SP THREAD_CONTEXT_LR +#define THREAD_CONTEXT_SP_FPCR_FPSR THREAD_CONTEXT_SP + +#define THREAD_CONTEXT_FPCR_FPSR THREAD_CONTEXT_FPCR + +#define THREAD_CONTEXT_FPU64_Q8 (THREAD_CONTEXT_FPU_REGISTERS + 0x00) +#define THREAD_CONTEXT_FPU64_Q9 (THREAD_CONTEXT_FPU_REGISTERS + 0x10) +#define THREAD_CONTEXT_FPU64_Q10 (THREAD_CONTEXT_FPU_REGISTERS + 0x20) +#define THREAD_CONTEXT_FPU64_Q11 (THREAD_CONTEXT_FPU_REGISTERS + 0x30) +#define THREAD_CONTEXT_FPU64_Q12 (THREAD_CONTEXT_FPU_REGISTERS + 0x40) +#define THREAD_CONTEXT_FPU64_Q13 (THREAD_CONTEXT_FPU_REGISTERS + 0x50) +#define THREAD_CONTEXT_FPU64_Q14 (THREAD_CONTEXT_FPU_REGISTERS + 0x60) +#define THREAD_CONTEXT_FPU64_Q15 (THREAD_CONTEXT_FPU_REGISTERS + 0x70) + +#define THREAD_CONTEXT_FPU64_Q8_Q9 THREAD_CONTEXT_FPU64_Q8 +#define THREAD_CONTEXT_FPU64_Q10_Q11 THREAD_CONTEXT_FPU64_Q10 +#define THREAD_CONTEXT_FPU64_Q12_Q13 THREAD_CONTEXT_FPU64_Q12 +#define THREAD_CONTEXT_FPU64_Q14_Q15 THREAD_CONTEXT_FPU64_Q14 + +#define THREAD_CONTEXT_FPU32_Q4 (THREAD_CONTEXT_FPU_REGISTERS + 0x00) +#define THREAD_CONTEXT_FPU32_Q5 (THREAD_CONTEXT_FPU_REGISTERS + 0x10) +#define THREAD_CONTEXT_FPU32_Q6 (THREAD_CONTEXT_FPU_REGISTERS + 0x20) +#define THREAD_CONTEXT_FPU32_Q7 (THREAD_CONTEXT_FPU_REGISTERS + 0x30) + +#define THREAD_CONTEXT_FPU32_Q4_Q5 THREAD_CONTEXT_FPU32_Q4 +#define THREAD_CONTEXT_FPU32_Q6_Q7 THREAD_CONTEXT_FPU32_Q6 + +#define THREAD_FPU64_CONTEXT_Q0 0x000 +#define THREAD_FPU64_CONTEXT_Q1 0x010 +#define THREAD_FPU64_CONTEXT_Q2 0x020 +#define THREAD_FPU64_CONTEXT_Q3 0x030 +#define THREAD_FPU64_CONTEXT_Q4 0x040 +#define THREAD_FPU64_CONTEXT_Q5 0x050 +#define THREAD_FPU64_CONTEXT_Q6 0x060 +#define THREAD_FPU64_CONTEXT_Q7 0x070 +#define THREAD_FPU64_CONTEXT_Q16 0x080 +#define THREAD_FPU64_CONTEXT_Q17 0x090 +#define THREAD_FPU64_CONTEXT_Q18 0x0A0 +#define THREAD_FPU64_CONTEXT_Q19 0x0B0 +#define THREAD_FPU64_CONTEXT_Q20 0x0C0 +#define THREAD_FPU64_CONTEXT_Q21 0x0D0 +#define THREAD_FPU64_CONTEXT_Q22 0x0E0 +#define THREAD_FPU64_CONTEXT_Q23 0x0F0 +#define THREAD_FPU64_CONTEXT_Q24 0x100 +#define THREAD_FPU64_CONTEXT_Q25 0x110 +#define THREAD_FPU64_CONTEXT_Q26 0x120 +#define THREAD_FPU64_CONTEXT_Q27 0x130 +#define THREAD_FPU64_CONTEXT_Q28 0x140 +#define THREAD_FPU64_CONTEXT_Q29 0x150 +#define THREAD_FPU64_CONTEXT_Q30 0x160 +#define THREAD_FPU64_CONTEXT_Q31 0x170 + +#define THREAD_FPU64_CONTEXT_Q0_Q1 THREAD_FPU64_CONTEXT_Q0 +#define THREAD_FPU64_CONTEXT_Q2_Q3 THREAD_FPU64_CONTEXT_Q2 +#define THREAD_FPU64_CONTEXT_Q4_Q5 THREAD_FPU64_CONTEXT_Q4 +#define THREAD_FPU64_CONTEXT_Q6_Q7 THREAD_FPU64_CONTEXT_Q6 +#define THREAD_FPU64_CONTEXT_Q16_Q17 THREAD_FPU64_CONTEXT_Q16 +#define THREAD_FPU64_CONTEXT_Q18_Q19 THREAD_FPU64_CONTEXT_Q18 +#define THREAD_FPU64_CONTEXT_Q20_Q21 THREAD_FPU64_CONTEXT_Q20 +#define THREAD_FPU64_CONTEXT_Q22_Q23 THREAD_FPU64_CONTEXT_Q22 +#define THREAD_FPU64_CONTEXT_Q24_Q25 THREAD_FPU64_CONTEXT_Q24 +#define THREAD_FPU64_CONTEXT_Q26_Q27 THREAD_FPU64_CONTEXT_Q26 +#define THREAD_FPU64_CONTEXT_Q28_Q29 THREAD_FPU64_CONTEXT_Q28 +#define THREAD_FPU64_CONTEXT_Q30_Q31 THREAD_FPU64_CONTEXT_Q30 + +#define THREAD_FPU32_CONTEXT_Q0 0x000 +#define THREAD_FPU32_CONTEXT_Q1 0x010 +#define THREAD_FPU32_CONTEXT_Q2 0x020 +#define THREAD_FPU32_CONTEXT_Q3 0x030 +#define THREAD_FPU32_CONTEXT_Q8 0x040 +#define THREAD_FPU32_CONTEXT_Q9 0x050 +#define THREAD_FPU32_CONTEXT_Q10 0x060 +#define THREAD_FPU32_CONTEXT_Q11 0x070 +#define THREAD_FPU32_CONTEXT_Q12 0x080 +#define THREAD_FPU32_CONTEXT_Q13 0x090 +#define THREAD_FPU32_CONTEXT_Q14 0x0A0 +#define THREAD_FPU32_CONTEXT_Q15 0x0B0 + +#define THREAD_FPU32_CONTEXT_Q0_Q1 THREAD_FPU32_CONTEXT_Q0 +#define THREAD_FPU32_CONTEXT_Q2_Q3 THREAD_FPU32_CONTEXT_Q2 +#define THREAD_FPU32_CONTEXT_Q8_Q9 THREAD_FPU32_CONTEXT_Q8 +#define THREAD_FPU32_CONTEXT_Q10_Q11 THREAD_FPU32_CONTEXT_Q10 +#define THREAD_FPU32_CONTEXT_Q12_Q13 THREAD_FPU32_CONTEXT_Q12 +#define THREAD_FPU32_CONTEXT_Q14_Q15 THREAD_FPU32_CONTEXT_Q14 /* ams::kern::arch::arm64::KExceptionContext, https://github.com/Atmosphere-NX/Atmosphere/blob/master/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_exception_context.hpp */ #define EXCEPTION_CONTEXT_SIZE 0x120 diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_thread_context.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_thread_context.hpp index b4fb3a08c..40cc1f860 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_thread_context.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_thread_context.hpp @@ -27,10 +27,12 @@ namespace ams::kern::arch::arm64 { class KThreadContext { public: - static constexpr size_t NumCalleeSavedRegisters = (29 - 19) + 1; - static constexpr size_t NumFpuRegisters = 32; - private: - union { + static constexpr size_t NumCalleeSavedRegisters = (29 - 19) + 1; + static constexpr size_t NumCalleeSavedFpuRegisters = 8; + static constexpr size_t NumCallerSavedFpuRegisters = 24; + static constexpr size_t NumFpuRegisters = NumCalleeSavedFpuRegisters + NumCallerSavedFpuRegisters; + public: + union CalleeSaveRegisters { u64 registers[NumCalleeSavedRegisters]; struct { u64 x19; @@ -45,23 +47,125 @@ namespace ams::kern::arch::arm64 { u64 x28; u64 x29; }; - } m_callee_saved; + }; + + union CalleeSaveFpu64Registers { + u128 v[NumCalleeSavedFpuRegisters]; + struct { + u128 q8; + u128 q9; + u128 q10; + u128 q11; + u128 q12; + u128 q13; + u128 q14; + u128 q15; + }; + }; + + union CalleeSaveFpu32Registers { + u128 v[NumCalleeSavedFpuRegisters / 2]; + struct { + u128 q4; + u128 q5; + u128 q6; + u128 q7; + }; + }; + + union CalleeSaveFpuRegisters { + CalleeSaveFpu64Registers fpu64; + CalleeSaveFpu32Registers fpu32; + }; + + union CallerSaveFpu64Registers { + u128 v[NumCallerSavedFpuRegisters]; + struct { + union { + u128 v0_7[NumCallerSavedFpuRegisters / 3]; + struct { + u128 q0; + u128 q1; + u128 q2; + u128 q3; + u128 q4; + u128 q5; + u128 q6; + u128 q7; + }; + }; + union { + u128 v16_31[2 * NumCallerSavedFpuRegisters / 3]; + struct { + u128 q16; + u128 q17; + u128 q18; + u128 q19; + u128 q20; + u128 q21; + u128 q22; + u128 q23; + u128 q24; + u128 q25; + u128 q26; + u128 q27; + u128 q28; + u128 q29; + u128 q30; + u128 q31; + }; + }; + }; + }; + + union CallerSaveFpu32Registers { + u128 v[NumCallerSavedFpuRegisters / 2]; + struct { + union { + u128 v0_3[(NumCallerSavedFpuRegisters / 3) / 2]; + struct { + u128 q0; + u128 q1; + u128 q2; + u128 q3; + }; + }; + union { + u128 v8_15[(2 * NumCallerSavedFpuRegisters / 3) / 2]; + struct { + u128 q8; + u128 q9; + u128 q10; + u128 q11; + u128 q12; + u128 q13; + u128 q14; + u128 q15; + }; + }; + }; + }; + + union CallerSaveFpuRegisters { + CallerSaveFpu64Registers fpu64; + CallerSaveFpu32Registers fpu32; + }; + private: + CalleeSaveRegisters m_callee_saved; u64 m_lr; u64 m_sp; - u64 m_cpacr; - u64 m_fpcr; - u64 m_fpsr; - alignas(0x10) u128 m_fpu_registers[NumFpuRegisters]; + u32 m_fpcr; + u32 m_fpsr; + alignas(0x10) CalleeSaveFpuRegisters m_callee_saved_fpu; bool m_locked; private: static void RestoreFpuRegisters64(const KThreadContext &); static void RestoreFpuRegisters32(const KThreadContext &); public: - constexpr explicit KThreadContext(util::ConstantInitializeTag) : m_callee_saved(), m_lr(), m_sp(), m_cpacr(), m_fpcr(), m_fpsr(), m_fpu_registers(), m_locked() { /* ... */ } + constexpr explicit KThreadContext(util::ConstantInitializeTag) : m_callee_saved(), m_lr(), m_sp(), m_fpcr(), m_fpsr(), m_callee_saved_fpu(), m_locked() { /* ... */ } explicit KThreadContext() { /* ... */ } Result Initialize(KVirtualAddress u_pc, KVirtualAddress k_sp, KVirtualAddress u_sp, uintptr_t arg, bool is_user, bool is_64_bit, bool is_main); - Result Finalize(); void SetArguments(uintptr_t arg0, uintptr_t arg1); @@ -75,13 +179,58 @@ namespace ams::kern::arch::arm64 { void CloneFpuStatus(); - void SetFpuRegisters(const u128 *v, bool is_64_bit); - - const u128 *GetFpuRegisters() const { return m_fpu_registers; } + const auto &GetCalleeSaveFpuRegisters() const { return m_callee_saved_fpu; } + auto &GetCalleeSaveFpuRegisters() { return m_callee_saved_fpu; } public: static void OnThreadTerminating(const KThread *thread); public: static consteval bool ValidateOffsets(); + + template requires ((std::same_as && std::same_as) || (std::same_as && std::same_as)) + static void GetFpuRegisters(u128 *out, const CallerSave &caller_save, const CalleeSave &callee_save) { + /* Check that the register counts are correct. */ + constexpr size_t RegisterUnitCount = util::size(CalleeSave{}.v); + static_assert(util::size(CalleeSave{}.v) == 1 * RegisterUnitCount); + static_assert(util::size(CallerSave{}.v) == 3 * RegisterUnitCount); + + /* Copy the low caller-save registers. */ + for (size_t i = 0; i < RegisterUnitCount; ++i) { + *(out++) = caller_save.v[i]; + } + + /* Copy the callee-save registers. */ + for (size_t i = 0; i < RegisterUnitCount; ++i) { + *(out++) = callee_save.v[i]; + } + + /* Copy the remaining caller-save registers. */ + for (size_t i = 0; i < 2 * RegisterUnitCount; ++i) { + *(out++) = caller_save.v[RegisterUnitCount + i]; + } + } + + template requires ((std::same_as && std::same_as) || (std::same_as && std::same_as)) + static ALWAYS_INLINE void SetFpuRegisters(CallerSave &caller_save, CalleeSave &callee_save, const u128 *v) { + /* Check that the register counts are correct. */ + constexpr size_t RegisterUnitCount = util::size(CalleeSave{}.v); + static_assert(util::size(CalleeSave{}.v) == 1 * RegisterUnitCount); + static_assert(util::size(CallerSave{}.v) == 3 * RegisterUnitCount); + + /* Copy the low caller-save registers. */ + for (size_t i = 0; i < RegisterUnitCount; ++i) { + caller_save.v[i] = *(v++); + } + + /* Copy the callee-save registers. */ + for (size_t i = 0; i < RegisterUnitCount; ++i) { + callee_save.v[i] = *(v++); + } + + /* Copy the remaining caller-save registers. */ + for (size_t i = 0; i < 2 * RegisterUnitCount; ++i) { + caller_save.v[RegisterUnitCount + i] = *(v++); + } + } }; consteval bool KThreadContext::ValidateOffsets() { @@ -101,17 +250,67 @@ namespace ams::kern::arch::arm64 { static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved.x29) == THREAD_CONTEXT_X29); static_assert(AMS_OFFSETOF(KThreadContext, m_lr) == THREAD_CONTEXT_LR); static_assert(AMS_OFFSETOF(KThreadContext, m_sp) == THREAD_CONTEXT_SP); - static_assert(AMS_OFFSETOF(KThreadContext, m_cpacr) == THREAD_CONTEXT_CPACR); static_assert(AMS_OFFSETOF(KThreadContext, m_fpcr) == THREAD_CONTEXT_FPCR); static_assert(AMS_OFFSETOF(KThreadContext, m_fpsr) == THREAD_CONTEXT_FPSR); - static_assert(AMS_OFFSETOF(KThreadContext, m_fpu_registers) == THREAD_CONTEXT_FPU_REGISTERS); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu) == THREAD_CONTEXT_FPU_REGISTERS); static_assert(AMS_OFFSETOF(KThreadContext, m_locked) == THREAD_CONTEXT_LOCKED); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q8 ) == THREAD_CONTEXT_FPU64_Q8 ); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q9 ) == THREAD_CONTEXT_FPU64_Q9 ); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q10) == THREAD_CONTEXT_FPU64_Q10); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q11) == THREAD_CONTEXT_FPU64_Q11); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q12) == THREAD_CONTEXT_FPU64_Q12); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q13) == THREAD_CONTEXT_FPU64_Q13); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q14) == THREAD_CONTEXT_FPU64_Q14); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu64.q15) == THREAD_CONTEXT_FPU64_Q15); + + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu32.q4 ) == THREAD_CONTEXT_FPU32_Q4 ); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu32.q5 ) == THREAD_CONTEXT_FPU32_Q5 ); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu32.q6 ) == THREAD_CONTEXT_FPU32_Q6 ); + static_assert(AMS_OFFSETOF(KThreadContext, m_callee_saved_fpu.fpu32.q7 ) == THREAD_CONTEXT_FPU32_Q7 ); + + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q0 ) == THREAD_FPU64_CONTEXT_Q0 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q1 ) == THREAD_FPU64_CONTEXT_Q1 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q2 ) == THREAD_FPU64_CONTEXT_Q2 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q3 ) == THREAD_FPU64_CONTEXT_Q3 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q4 ) == THREAD_FPU64_CONTEXT_Q4 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q5 ) == THREAD_FPU64_CONTEXT_Q5 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q6 ) == THREAD_FPU64_CONTEXT_Q6 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q7 ) == THREAD_FPU64_CONTEXT_Q7 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q16) == THREAD_FPU64_CONTEXT_Q16); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q17) == THREAD_FPU64_CONTEXT_Q17); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q18) == THREAD_FPU64_CONTEXT_Q18); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q19) == THREAD_FPU64_CONTEXT_Q19); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q20) == THREAD_FPU64_CONTEXT_Q20); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q21) == THREAD_FPU64_CONTEXT_Q21); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q22) == THREAD_FPU64_CONTEXT_Q22); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q23) == THREAD_FPU64_CONTEXT_Q23); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q24) == THREAD_FPU64_CONTEXT_Q24); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q25) == THREAD_FPU64_CONTEXT_Q25); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q26) == THREAD_FPU64_CONTEXT_Q26); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q27) == THREAD_FPU64_CONTEXT_Q27); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q28) == THREAD_FPU64_CONTEXT_Q28); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q29) == THREAD_FPU64_CONTEXT_Q29); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q30) == THREAD_FPU64_CONTEXT_Q30); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu64.q31) == THREAD_FPU64_CONTEXT_Q31); + + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q0 ) == THREAD_FPU32_CONTEXT_Q0 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q1 ) == THREAD_FPU32_CONTEXT_Q1 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q2 ) == THREAD_FPU32_CONTEXT_Q2 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q3 ) == THREAD_FPU32_CONTEXT_Q3 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q8 ) == THREAD_FPU32_CONTEXT_Q8 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q9 ) == THREAD_FPU32_CONTEXT_Q9 ); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q10) == THREAD_FPU32_CONTEXT_Q10); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q11) == THREAD_FPU32_CONTEXT_Q11); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q12) == THREAD_FPU32_CONTEXT_Q12); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q13) == THREAD_FPU32_CONTEXT_Q13); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q14) == THREAD_FPU32_CONTEXT_Q14); + static_assert(AMS_OFFSETOF(KThreadContext::CallerSaveFpuRegisters, fpu32.q15) == THREAD_FPU32_CONTEXT_Q15); + return true; } static_assert(KThreadContext::ValidateOffsets()); - void GetUserContext(ams::svc::ThreadContext *out, const KThread *thread); } \ No newline at end of file diff --git a/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp b/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp index e175f118a..dbc973de5 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp @@ -239,78 +239,7 @@ namespace ams::kern { constexpr u64 GetPriorityMask() const { return m_priority_mask; } constexpr s32 GetHandleTableSize() const { return m_handle_table_size; } - ALWAYS_INLINE void CopySvcPermissionsTo(KThread::StackParameters &sp) const { - /* Copy permissions. */ - sp.svc_access_flags = m_svc_access_flags; - - /* Clear specific SVCs based on our state. */ - sp.svc_access_flags[svc::SvcId_ReturnFromException] = false; - sp.svc_access_flags[svc::SvcId_SynchronizePreemptionState] = false; - if (sp.is_pinned) { - sp.svc_access_flags[svc::SvcId_GetInfo] = false; - } - } - - ALWAYS_INLINE void CopyPinnedSvcPermissionsTo(KThread::StackParameters &sp) const { - /* Get whether we have access to return from exception. */ - const bool return_from_exception = sp.svc_access_flags[svc::SvcId_ReturnFromException]; - - /* Clear all permissions. */ - sp.svc_access_flags.Reset(); - - /* Set SynchronizePreemptionState if allowed. */ - if (m_svc_access_flags[svc::SvcId_SynchronizePreemptionState]) { - sp.svc_access_flags[svc::SvcId_SynchronizePreemptionState] = true; - } - - /* If we previously had ReturnFromException, potentially grant it and GetInfo. */ - if (return_from_exception) { - /* Set ReturnFromException (guaranteed allowed, if we're here). */ - sp.svc_access_flags[svc::SvcId_ReturnFromException] = true; - - /* Set GetInfo if allowed. */ - if (m_svc_access_flags[svc::SvcId_GetInfo]) { - sp.svc_access_flags[svc::SvcId_GetInfo] = true; - } - } - } - - ALWAYS_INLINE void CopyUnpinnedSvcPermissionsTo(KThread::StackParameters &sp) const { - /* Get whether we have access to return from exception. */ - const bool return_from_exception = sp.svc_access_flags[svc::SvcId_ReturnFromException]; - - /* Copy permissions. */ - sp.svc_access_flags = m_svc_access_flags; - - /* Clear specific SVCs based on our state. */ - sp.svc_access_flags[svc::SvcId_SynchronizePreemptionState] = false; - - if (!return_from_exception) { - sp.svc_access_flags[svc::SvcId_ReturnFromException] = false; - } - } - - ALWAYS_INLINE void CopyEnterExceptionSvcPermissionsTo(KThread::StackParameters &sp) const { - /* Set ReturnFromException if allowed. */ - if (m_svc_access_flags[svc::SvcId_ReturnFromException]) { - sp.svc_access_flags[svc::SvcId_ReturnFromException] = true; - } - - /* Set GetInfo if allowed. */ - if (m_svc_access_flags[svc::SvcId_GetInfo]) { - sp.svc_access_flags[svc::SvcId_GetInfo] = true; - } - } - - ALWAYS_INLINE void CopyLeaveExceptionSvcPermissionsTo(KThread::StackParameters &sp) const { - /* Clear ReturnFromException. */ - sp.svc_access_flags[svc::SvcId_ReturnFromException] = false; - - /* If pinned, clear GetInfo. */ - if (sp.is_pinned) { - sp.svc_access_flags[svc::SvcId_GetInfo] = false; - } - } + constexpr const svc::SvcAccessFlagSet &GetSvcPermissions() const { return m_svc_access_flags; } constexpr bool IsPermittedSvc(svc::SvcId id) const { return (id < m_svc_access_flags.GetCount()) && m_svc_access_flags[id]; diff --git a/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp b/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp index 97429df05..8e3fb85ac 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp @@ -233,25 +233,7 @@ namespace ams::kern { return m_pinned_threads[core_id]; } - void CopySvcPermissionsTo(KThread::StackParameters &sp) { - m_capabilities.CopySvcPermissionsTo(sp); - } - - void CopyPinnedSvcPermissionsTo(KThread::StackParameters &sp) { - m_capabilities.CopyPinnedSvcPermissionsTo(sp); - } - - void CopyUnpinnedSvcPermissionsTo(KThread::StackParameters &sp) { - m_capabilities.CopyUnpinnedSvcPermissionsTo(sp); - } - - void CopyEnterExceptionSvcPermissionsTo(KThread::StackParameters &sp) { - m_capabilities.CopyEnterExceptionSvcPermissionsTo(sp); - } - - void CopyLeaveExceptionSvcPermissionsTo(KThread::StackParameters &sp) { - m_capabilities.CopyLeaveExceptionSvcPermissionsTo(sp); - } + const svc::SvcAccessFlagSet &GetSvcPermissions() const { return m_capabilities.GetSvcPermissions(); } constexpr KResourceLimit *GetResourceLimit() const { return m_resource_limit; } diff --git a/libraries/libmesosphere/include/mesosphere/kern_k_thread.hpp b/libraries/libmesosphere/include/mesosphere/kern_k_thread.hpp index a9574e584..a3f19b0ae 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_k_thread.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_k_thread.hpp @@ -84,35 +84,56 @@ namespace ams::kern { DpcFlag_PerformDestruction = (1 << 2), }; + enum ExceptionFlag : u32 { + ExceptionFlag_IsCallingSvc = (1 << 0), + ExceptionFlag_IsInExceptionHandler = (1 << 1), + ExceptionFlag_IsFpuContextRestoreNeeded = (1 << 2), + ExceptionFlag_IsFpu64Bit = (1 << 3), + ExceptionFlag_IsInUsermodeExceptionHandler = (1 << 4), + ExceptionFlag_IsInCacheMaintenanceOperation = (1 << 5), + ExceptionFlag_IsInTlbMaintenanceOperation = (1 << 6), + #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) + ExceptionFlag_IsHardwareSingleStep = (1 << 7), + #endif + }; + struct StackParameters { - alignas(0x10) svc::SvcAccessFlagSet svc_access_flags; - KThreadContext *context; + svc::SvcAccessFlagSet svc_access_flags; + KThreadContext::CallerSaveFpuRegisters *caller_save_fpu_registers; KThread *cur_thread; s16 disable_count; util::Atomic dpc_flags; u8 current_svc_id; - bool is_calling_svc; - bool is_in_exception_handler; + u8 reserved_2c; + u8 exception_flags; bool is_pinned; - #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) - bool is_single_step; - #endif + u8 reserved_2f; + KThreadContext context; }; - static_assert(alignof(StackParameters) == 0x10); - static_assert(sizeof(StackParameters) == THREAD_STACK_PARAMETERS_SIZE); - static_assert(AMS_OFFSETOF(StackParameters, svc_access_flags) == THREAD_STACK_PARAMETERS_SVC_PERMISSION); - static_assert(AMS_OFFSETOF(StackParameters, context) == THREAD_STACK_PARAMETERS_CONTEXT); - static_assert(AMS_OFFSETOF(StackParameters, cur_thread) == THREAD_STACK_PARAMETERS_CUR_THREAD); - static_assert(AMS_OFFSETOF(StackParameters, disable_count) == THREAD_STACK_PARAMETERS_DISABLE_COUNT); - static_assert(AMS_OFFSETOF(StackParameters, dpc_flags) == THREAD_STACK_PARAMETERS_DPC_FLAGS); - static_assert(AMS_OFFSETOF(StackParameters, current_svc_id) == THREAD_STACK_PARAMETERS_CURRENT_SVC_ID); - static_assert(AMS_OFFSETOF(StackParameters, is_calling_svc) == THREAD_STACK_PARAMETERS_IS_CALLING_SVC); - static_assert(AMS_OFFSETOF(StackParameters, is_in_exception_handler) == THREAD_STACK_PARAMETERS_IS_IN_EXCEPTION_HANDLER); - static_assert(AMS_OFFSETOF(StackParameters, is_pinned) == THREAD_STACK_PARAMETERS_IS_PINNED); + static_assert(util::IsAligned(AMS_OFFSETOF(StackParameters, context), 0x10)); + static_assert(AMS_OFFSETOF(StackParameters, svc_access_flags) == THREAD_STACK_PARAMETERS_SVC_PERMISSION); + static_assert(AMS_OFFSETOF(StackParameters, caller_save_fpu_registers) == THREAD_STACK_PARAMETERS_CALLER_SAVE_FPU_REGISTERS); + static_assert(AMS_OFFSETOF(StackParameters, cur_thread) == THREAD_STACK_PARAMETERS_CUR_THREAD); + static_assert(AMS_OFFSETOF(StackParameters, disable_count) == THREAD_STACK_PARAMETERS_DISABLE_COUNT); + static_assert(AMS_OFFSETOF(StackParameters, dpc_flags) == THREAD_STACK_PARAMETERS_DPC_FLAGS); + static_assert(AMS_OFFSETOF(StackParameters, current_svc_id) == THREAD_STACK_PARAMETERS_CURRENT_SVC_ID); + static_assert(AMS_OFFSETOF(StackParameters, reserved_2c) == THREAD_STACK_PARAMETERS_RESERVED_2C); + static_assert(AMS_OFFSETOF(StackParameters, exception_flags) == THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS); + static_assert(AMS_OFFSETOF(StackParameters, is_pinned) == THREAD_STACK_PARAMETERS_IS_PINNED); + static_assert(AMS_OFFSETOF(StackParameters, reserved_2f) == THREAD_STACK_PARAMETERS_RESERVED_2F); + static_assert(AMS_OFFSETOF(StackParameters, context) == THREAD_STACK_PARAMETERS_THREAD_CONTEXT); + + static_assert(ExceptionFlag_IsCallingSvc == THREAD_EXCEPTION_FLAG_IS_CALLING_SVC); + static_assert(ExceptionFlag_IsInExceptionHandler == THREAD_EXCEPTION_FLAG_IS_IN_EXCEPTION_HANDLER); + static_assert(ExceptionFlag_IsFpuContextRestoreNeeded == THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED); + static_assert(ExceptionFlag_IsFpu64Bit == THREAD_EXCEPTION_FLAG_IS_FPU_64_BIT); + static_assert(ExceptionFlag_IsInUsermodeExceptionHandler == THREAD_EXCEPTION_FLAG_IS_IN_USERMODE_EXCEPTION_HANDLER); + static_assert(ExceptionFlag_IsInCacheMaintenanceOperation == THREAD_EXCEPTION_FLAG_IS_IN_CACHE_MAINTENANCE_OPERATION); + static_assert(ExceptionFlag_IsInTlbMaintenanceOperation == THREAD_EXCEPTION_FLAG_IS_IN_TLB_MAINTENANCE_OPERATION); #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) - static_assert(AMS_OFFSETOF(StackParameters, is_single_step) == THREAD_STACK_PARAMETERS_IS_SINGLE_STEP); + static_assert(ExceptionFlag_IsHardwareSingleStep == THREAD_EXCEPTION_FLAG_IS_HARDWARE_SINGLE_STEP); #endif struct QueueEntry { @@ -184,60 +205,60 @@ namespace ams::kern { using ConditionVariableThreadTreeTraits = util::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&KThread::m_condvar_arbiter_tree_node>; using ConditionVariableThreadTree = ConditionVariableThreadTreeTraits::TreeType; - ConditionVariableThreadTree *m_condvar_tree; - uintptr_t m_condvar_key; - alignas(16) KThreadContext m_thread_context; - u64 m_virtual_affinity_mask; - KAffinityMask m_physical_affinity_mask; - u64 m_thread_id; - util::Atomic m_cpu_time; - KProcessAddress m_address_key; - KProcess *m_parent; - void *m_kernel_stack_top; - u32 *m_light_ipc_data; - KProcessAddress m_tls_address; - void *m_tls_heap_address; - KLightLock m_activity_pause_lock; - SyncObjectBuffer m_sync_object_buffer; - s64 m_schedule_count; - s64 m_last_scheduled_tick; - QueueEntry m_per_core_priority_queue_entry[cpu::NumCores]; - KThreadQueue *m_wait_queue; - WaiterList m_waiter_list; - WaiterList m_pinned_waiter_list; - KThread *m_lock_owner; - uintptr_t m_debug_params[3]; - KAutoObject *m_closed_object; - u32 m_address_key_value; - u32 m_suspend_request_flags; - u32 m_suspend_allowed_flags; - s32 m_synced_index; - Result m_wait_result; - Result m_debug_exception_result; - s32 m_base_priority; - s32 m_base_priority_on_unpin; - s32 m_physical_ideal_core_id; - s32 m_virtual_ideal_core_id; - s32 m_num_kernel_waiters; - s32 m_current_core_id; - s32 m_core_id; - KAffinityMask m_original_physical_affinity_mask; - s32 m_original_physical_ideal_core_id; - s32 m_num_core_migration_disables; - ThreadState m_thread_state; - util::Atomic m_termination_requested; - bool m_wait_cancelled; - bool m_cancellable; - bool m_signaled; - bool m_initialized; - bool m_debug_attached; - s8 m_priority_inheritance_count; - bool m_resource_limit_release_hint; + ConditionVariableThreadTree *m_condvar_tree; + uintptr_t m_condvar_key; + alignas(16) KThreadContext::CallerSaveFpuRegisters m_caller_save_fpu_registers; + u64 m_virtual_affinity_mask; + KAffinityMask m_physical_affinity_mask; + u64 m_thread_id; + util::Atomic m_cpu_time; + KProcessAddress m_address_key; + KProcess *m_parent; + void *m_kernel_stack_top; + u32 *m_light_ipc_data; + KProcessAddress m_tls_address; + void *m_tls_heap_address; + KLightLock m_activity_pause_lock; + SyncObjectBuffer m_sync_object_buffer; + s64 m_schedule_count; + s64 m_last_scheduled_tick; + QueueEntry m_per_core_priority_queue_entry[cpu::NumCores]; + KThreadQueue *m_wait_queue; + WaiterList m_waiter_list; + WaiterList m_pinned_waiter_list; + KThread *m_lock_owner; + uintptr_t m_debug_params[3]; + KAutoObject *m_closed_object; + u32 m_address_key_value; + u32 m_suspend_request_flags; + u32 m_suspend_allowed_flags; + s32 m_synced_index; + Result m_wait_result; + Result m_debug_exception_result; + s32 m_base_priority; + s32 m_base_priority_on_unpin; + s32 m_physical_ideal_core_id; + s32 m_virtual_ideal_core_id; + s32 m_num_kernel_waiters; + s32 m_current_core_id; + s32 m_core_id; + KAffinityMask m_original_physical_affinity_mask; + s32 m_original_physical_ideal_core_id; + s32 m_num_core_migration_disables; + ThreadState m_thread_state; + util::Atomic m_termination_requested; + bool m_wait_cancelled; + bool m_cancellable; + bool m_signaled; + bool m_initialized; + bool m_debug_attached; + s8 m_priority_inheritance_count; + bool m_resource_limit_release_hint; public: constexpr explicit KThread(util::ConstantInitializeTag) : KAutoObjectWithSlabHeapAndContainer(util::ConstantInitialize), KTimerTask(util::ConstantInitialize), m_process_list_node{}, m_condvar_arbiter_tree_node{util::ConstantInitialize}, m_priority{-1}, m_condvar_tree{}, m_condvar_key{}, - m_thread_context{util::ConstantInitialize}, m_virtual_affinity_mask{}, m_physical_affinity_mask{}, m_thread_id{}, m_cpu_time{0}, m_address_key{Null}, m_parent{}, + m_caller_save_fpu_registers{}, m_virtual_affinity_mask{}, m_physical_affinity_mask{}, m_thread_id{}, m_cpu_time{0}, m_address_key{Null}, m_parent{}, m_kernel_stack_top{}, m_light_ipc_data{}, m_tls_address{Null}, m_tls_heap_address{}, m_activity_pause_lock{}, m_sync_object_buffer{util::ConstantInitialize}, m_schedule_count{}, m_last_scheduled_tick{}, m_per_core_priority_queue_entry{}, m_wait_queue{}, m_waiter_list{}, m_pinned_waiter_list{}, m_lock_owner{}, m_debug_params{}, m_closed_object{}, m_address_key_value{}, m_suspend_request_flags{}, m_suspend_allowed_flags{}, m_synced_index{}, @@ -269,17 +290,8 @@ namespace ams::kern { static void ResumeThreadsSuspendedForInit(); private: - StackParameters &GetStackParameters() { - return *(reinterpret_cast(m_kernel_stack_top) - 1); - } - - const StackParameters &GetStackParameters() const { - return *(reinterpret_cast(m_kernel_stack_top) - 1); - } - public: - StackParameters &GetStackParametersForExceptionSvcPermission() { - return *(reinterpret_cast(m_kernel_stack_top) - 1); - } + ALWAYS_INLINE StackParameters &GetStackParameters() { return *(reinterpret_cast< StackParameters *>(m_kernel_stack_top) - 1); } + ALWAYS_INLINE const StackParameters &GetStackParameters() const { return *(reinterpret_cast(m_kernel_stack_top) - 1); } public: ALWAYS_INLINE s16 GetDisableDispatchCount() const { MESOSPHERE_ASSERT_THIS(); @@ -315,51 +327,61 @@ namespace ams::kern { NOINLINE void DisableCoreMigration(); NOINLINE void EnableCoreMigration(); - - ALWAYS_INLINE void SetInExceptionHandler() { + private: + ALWAYS_INLINE void SetExceptionFlag(ExceptionFlag flag) { MESOSPHERE_ASSERT_THIS(); - this->GetStackParameters().is_in_exception_handler = true; + this->GetStackParameters().exception_flags |= flag; } - ALWAYS_INLINE void ClearInExceptionHandler() { + ALWAYS_INLINE void ClearExceptionFlag(ExceptionFlag flag) { MESOSPHERE_ASSERT_THIS(); - this->GetStackParameters().is_in_exception_handler = false; + this->GetStackParameters().exception_flags &= ~flag; } - ALWAYS_INLINE bool IsInExceptionHandler() const { + ALWAYS_INLINE bool IsExceptionFlagSet(ExceptionFlag flag) const { MESOSPHERE_ASSERT_THIS(); - return this->GetStackParameters().is_in_exception_handler; + return this->GetStackParameters().exception_flags & flag; } + public: + /* ALWAYS_INLINE void SetCallingSvc() { return this->SetExceptionFlag(ExceptionFlag_IsCallingSvc); } */ + /* ALWAYS_INLINE void ClearCallingSvc() { return this->ClearExceptionFlag(ExceptionFlag_IsCallingSvc); } */ + ALWAYS_INLINE bool IsCallingSvc() const { return this->IsExceptionFlagSet(ExceptionFlag_IsCallingSvc); } - ALWAYS_INLINE bool IsCallingSvc() const { - MESOSPHERE_ASSERT_THIS(); - return this->GetStackParameters().is_calling_svc; - } + ALWAYS_INLINE void SetInExceptionHandler() { return this->SetExceptionFlag(ExceptionFlag_IsInExceptionHandler); } + ALWAYS_INLINE void ClearInExceptionHandler() { return this->ClearExceptionFlag(ExceptionFlag_IsInExceptionHandler); } + ALWAYS_INLINE bool IsInExceptionHandler() const { return this->IsExceptionFlagSet(ExceptionFlag_IsInExceptionHandler); } + + /* ALWAYS_INLINE void SetFpuContextRestoreNeeded() { return this->SetExceptionFlag(ExceptionFlag_IsFpuContextRestoreNeeded); } */ + /* ALWAYS_INLINE void ClearFpuContextRestoreNeeded() { return this->ClearExceptionFlag(ExceptionFlag_IsFpuContextRestoreNeeded); } */ + /* ALWAYS_INLINE bool IsFpuContextRestoreNeeded() const { return this->IsExceptionFlagSet(ExceptionFlag_IsFpuContextRestoreNeeded); } */ + + ALWAYS_INLINE void SetFpu64Bit() { return this->SetExceptionFlag(ExceptionFlag_IsFpu64Bit); } + /* ALWAYS_INLINE void ClearFpu64Bit() { return this->ClearExceptionFlag(ExceptionFlag_IsFpu64Bit); } */ + /* ALWAYS_INLINE bool IsFpu64Bit() const { return this->IsExceptionFlagSet(ExceptionFlag_IsFpu64Bit); } */ + + ALWAYS_INLINE void SetInUsermodeExceptionHandler() { return this->SetExceptionFlag(ExceptionFlag_IsInUsermodeExceptionHandler); } + ALWAYS_INLINE void ClearInUsermodeExceptionHandler() { return this->ClearExceptionFlag(ExceptionFlag_IsInUsermodeExceptionHandler); } + ALWAYS_INLINE bool IsInUsermodeExceptionHandler() const { return this->IsExceptionFlagSet(ExceptionFlag_IsInUsermodeExceptionHandler); } + + ALWAYS_INLINE void SetInCacheMaintenanceOperation() { return this->SetExceptionFlag(ExceptionFlag_IsInCacheMaintenanceOperation); } + ALWAYS_INLINE void ClearInCacheMaintenanceOperation() { return this->ClearExceptionFlag(ExceptionFlag_IsInCacheMaintenanceOperation); } + ALWAYS_INLINE bool IsInCacheMaintenanceOperation() const { return this->IsExceptionFlagSet(ExceptionFlag_IsInCacheMaintenanceOperation); } + + ALWAYS_INLINE void SetInTlbMaintenanceOperation() { return this->SetExceptionFlag(ExceptionFlag_IsInTlbMaintenanceOperation); } + ALWAYS_INLINE void ClearInTlbMaintenanceOperation() { return this->ClearExceptionFlag(ExceptionFlag_IsInTlbMaintenanceOperation); } + ALWAYS_INLINE bool IsInTlbMaintenanceOperation() const { return this->IsExceptionFlagSet(ExceptionFlag_IsInTlbMaintenanceOperation); } + + #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) + ALWAYS_INLINE void SetHardwareSingleStep() { return this->SetExceptionFlag(ExceptionFlag_IsHardwareSingleStep); } + ALWAYS_INLINE void ClearHardwareSingleStep() { return this->ClearExceptionFlag(ExceptionFlag_IsHardwareSingleStep); } + ALWAYS_INLINE bool IsHardwareSingleStep() const { return this->IsExceptionFlagSet(ExceptionFlag_IsHardwareSingleStep); } + #endif ALWAYS_INLINE u8 GetSvcId() const { MESOSPHERE_ASSERT_THIS(); return this->GetStackParameters().current_svc_id; } - #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) - - ALWAYS_INLINE void SetSingleStep() { - MESOSPHERE_ASSERT_THIS(); - this->GetStackParameters().is_single_step = true; - } - - ALWAYS_INLINE void ClearSingleStep() { - MESOSPHERE_ASSERT_THIS(); - this->GetStackParameters().is_single_step = false; - } - - ALWAYS_INLINE bool IsSingleStep() const { - MESOSPHERE_ASSERT_THIS(); - return this->GetStackParameters().is_single_step; - } - - #endif - ALWAYS_INLINE void RegisterDpc(DpcFlag flag) { this->GetStackParameters().dpc_flags |= flag; } @@ -376,6 +398,13 @@ namespace ams::kern { MESOSPHERE_ASSERT_THIS(); return this->GetDpc() != 0; } + + private: + void SetPinnedSvcPermissions(); + void SetUnpinnedSvcPermissions(); + + void SetUsermodeExceptionSvcPermissions(); + void ClearUsermodeExceptionSvcPermissions(); private: void UpdateState(); ALWAYS_INLINE void AddWaiterImpl(KThread *thread); @@ -391,8 +420,11 @@ namespace ams::kern { public: constexpr u64 GetThreadId() const { return m_thread_id; } - constexpr KThreadContext &GetContext() { return m_thread_context; } - constexpr const KThreadContext &GetContext() const { return m_thread_context; } + const KThreadContext &GetContext() const { return this->GetStackParameters().context; } + KThreadContext &GetContext() { return this->GetStackParameters().context; } + + const auto &GetCallerSaveFpuRegisters() const { return m_caller_save_fpu_registers; } + auto &GetCallerSaveFpuRegisters() { return m_caller_save_fpu_registers; } constexpr u64 GetVirtualAffinityMask() const { return m_virtual_affinity_mask; } constexpr const KAffinityMask &GetAffinityMask() const { return m_physical_affinity_mask; } @@ -518,6 +550,8 @@ namespace ams::kern { void SetInterruptFlag() const { static_cast(m_tls_heap_address)->interrupt_flag = 1; } void ClearInterruptFlag() const { static_cast(m_tls_heap_address)->interrupt_flag = 0; } + bool IsInUserCacheMaintenanceOperation() const { return static_cast(m_tls_heap_address)->cache_maintenance_flag != 0; } + ALWAYS_INLINE KAutoObject *GetClosedObject() { return m_closed_object; } ALWAYS_INLINE void SetClosedObject(KAutoObject *object) { @@ -606,6 +640,9 @@ namespace ams::kern { } size_t GetKernelStackUsage() const; + + void OnEnterUsermodeException(); + void OnLeaveUsermodeException(); public: /* Overridden parent functions. */ ALWAYS_INLINE u64 GetIdImpl() const { return this->GetThreadId(); } @@ -628,6 +665,7 @@ namespace ams::kern { static Result GetThreadList(s32 *out_num_threads, ams::kern::svc::KUserPointer out_thread_ids, s32 max_out_count); using ConditionVariableThreadTreeType = ConditionVariableThreadTree; + }; static_assert(alignof(KThread) == 0x10); @@ -636,7 +674,7 @@ namespace ams::kern { static_assert(ConditionVariableThreadTreeTraits::IsValid()); /* Check that the assembly offsets are valid. */ - static_assert(AMS_OFFSETOF(KThread, m_thread_context) == THREAD_THREAD_CONTEXT); + static_assert(AMS_OFFSETOF(KThread, m_kernel_stack_top) == THREAD_KERNEL_STACK_TOP); return true; } diff --git a/libraries/libmesosphere/include/mesosphere/kern_select_assembly_macros.h b/libraries/libmesosphere/include/mesosphere/kern_select_assembly_macros.h new file mode 100644 index 000000000..519863c86 --- /dev/null +++ b/libraries/libmesosphere/include/mesosphere/kern_select_assembly_macros.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#ifdef ATMOSPHERE_ARCH_ARM64 + + #include + +#else + + #error "Unknown architecture for CPU" + +#endif diff --git a/libraries/libmesosphere/source/arch/arm64/kern_exception_handlers.cpp b/libraries/libmesosphere/source/arch/arm64/kern_exception_handlers.cpp index 4d7334066..52a2e5ced 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_exception_handlers.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_exception_handlers.cpp @@ -214,16 +214,16 @@ namespace ams::kern::arch::arm64 { context->psr = 0x10; } - /* Set exception SVC permissions. */ - cur_process.CopyEnterExceptionSvcPermissionsTo(GetCurrentThread().GetStackParametersForExceptionSvcPermission()); + /* Process that we're entering a usermode exception on the current thread. */ + GetCurrentThread().OnEnterUsermodeException(); return; } } /* If we should, clear the thread's state as single-step. */ #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) - if (AMS_UNLIKELY(GetCurrentThread().IsSingleStep())) { - GetCurrentThread().ClearSingleStep(); + if (AMS_UNLIKELY(GetCurrentThread().IsHardwareSingleStep())) { + GetCurrentThread().ClearHardwareSingleStep(); cpu::MonitorDebugSystemControlRegisterAccessor().SetSoftwareStep(false).Store(); cpu::InstructionMemoryBarrier(); } @@ -388,8 +388,8 @@ namespace ams::kern::arch::arm64 { /* Try to leave the user exception. */ if (cur_process.LeaveUserException()) { - /* We left user exception. Alter our SVC permissions accordingly. */ - cur_process.CopyLeaveExceptionSvcPermissionsTo(cur_thread->GetStackParametersForExceptionSvcPermission()); + /* Process that we're leaving a usermode exception on the current thread. */ + GetCurrentThread().OnLeaveUsermodeException(); /* Copy the user context to the thread context. */ if (is_aarch64) { diff --git a/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp b/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp index 6f6b1334a..690e3134e 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp @@ -202,16 +202,13 @@ namespace ams::kern::arch::arm64 { /* Get the FPU registers, if required. */ if ((context_flags & ams::svc::ThreadContextFlag_Fpu) != 0) { static_assert(util::size(ams::svc::ThreadContext{}.v) == KThreadContext::NumFpuRegisters); - const u128 *f = t_ctx->GetFpuRegisters(); + const auto &caller_save = thread->GetCallerSaveFpuRegisters(); + const auto &callee_save = t_ctx->GetCalleeSaveFpuRegisters(); if (this->Is64Bit()) { - for (size_t i = 0; i < KThreadContext::NumFpuRegisters; ++i) { - out->v[i] = f[i]; - } + KThreadContext::GetFpuRegisters(out->v, caller_save.fpu64, callee_save.fpu64); } else { - for (size_t i = 0; i < KThreadContext::NumFpuRegisters / 2; ++i) { - out->v[i] = f[i]; - } + KThreadContext::GetFpuRegisters(out->v, caller_save.fpu32, callee_save.fpu32); for (size_t i = KThreadContext::NumFpuRegisters / 2; i < KThreadContext::NumFpuRegisters; ++i) { out->v[i] = 0; } @@ -240,7 +237,14 @@ namespace ams::kern::arch::arm64 { /* Set the FPU registers, if required. */ if ((context_flags & ams::svc::ThreadContextFlag_Fpu) != 0) { static_assert(util::size(ams::svc::ThreadContext{}.v) == KThreadContext::NumFpuRegisters); - t_ctx->SetFpuRegisters(ctx.v, this->Is64Bit()); + auto &caller_save = thread->GetCallerSaveFpuRegisters(); + auto &callee_save = t_ctx->GetCalleeSaveFpuRegisters(); + + if (this->Is64Bit()) { + KThreadContext::SetFpuRegisters(caller_save.fpu64, callee_save.fpu64, ctx.v); + } else { + KThreadContext::SetFpuRegisters(caller_save.fpu32, callee_save.fpu32, ctx.v); + } } R_SUCCEED(); diff --git a/libraries/libmesosphere/source/arch/arm64/kern_k_interrupt_manager.cpp b/libraries/libmesosphere/source/arch/arm64/kern_k_interrupt_manager.cpp index 95e917901..e4cf075dc 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_k_interrupt_manager.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_k_interrupt_manager.cpp @@ -178,19 +178,37 @@ namespace ams::kern::arch::arm64 { /* If we need scheduling, */ if (needs_scheduling) { - /* If the user disable count is set, we may need to pin the current thread. */ - if (user_mode && GetCurrentThread().GetUserDisableCount() != 0 && GetCurrentProcess().GetPinnedThread(GetCurrentCoreId()) == nullptr) { - KScopedSchedulerLock sl; + if (user_mode) { + /* If the interrupt occurred in the middle of a userland cache maintenance operation, ensure memory consistency before rescheduling. */ + if (GetCurrentThread().IsInUserCacheMaintenanceOperation()) { + cpu::DataSynchronizationBarrier(); + } - /* Pin the current thread. */ - GetCurrentProcess().PinCurrentThread(); + /* If the user disable count is set, we may need to pin the current thread. */ + if (GetCurrentThread().GetUserDisableCount() != 0 && GetCurrentProcess().GetPinnedThread(GetCurrentCoreId()) == nullptr) { + KScopedSchedulerLock sl; - /* Set the interrupt flag for the thread. */ - GetCurrentThread().SetInterruptFlag(); + /* Pin the current thread. */ + GetCurrentProcess().PinCurrentThread(); - /* Request interrupt scheduling. */ - Kernel::GetScheduler().RequestScheduleOnInterrupt(); + /* Set the interrupt flag for the thread. */ + GetCurrentThread().SetInterruptFlag(); + + /* Request interrupt scheduling. */ + Kernel::GetScheduler().RequestScheduleOnInterrupt(); + } else { + /* Request interrupt scheduling. */ + Kernel::GetScheduler().RequestScheduleOnInterrupt(); + } } else { + /* If the interrupt occurred in the middle of a cache maintenance operation, ensure memory consistency before rescheduling. */ + if (GetCurrentThread().IsInCacheMaintenanceOperation()) { + cpu::DataSynchronizationBarrier(); + } else if (GetCurrentThread().IsInTlbMaintenanceOperation()) { + /* Otherwise, if we're in the middle of a tlb maintenance operation, ensure inner shareable memory consistency before rescheduling. */ + cpu::DataSynchronizationBarrierInnerShareable(); + } + /* Request interrupt scheduling. */ Kernel::GetScheduler().RequestScheduleOnInterrupt(); } diff --git a/libraries/libmesosphere/source/arch/arm64/kern_k_thread_context.cpp b/libraries/libmesosphere/source/arch/arm64/kern_k_thread_context.cpp index 1fb245dd8..321f43565 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_k_thread_context.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_k_thread_context.cpp @@ -52,11 +52,11 @@ namespace ams::kern::arch::arm64 { } uintptr_t SetupStackForUserModeThreadStarter(KVirtualAddress pc, KVirtualAddress k_sp, KVirtualAddress u_sp, uintptr_t arg, const bool is_64_bit) { - /* NOTE: Stack layout on entry looks like following: */ - /* SP */ - /* | */ - /* v */ - /* | KExceptionContext (size 0x120) | KThread::StackParameters (size 0x30) | */ + /* NOTE: Stack layout on entry looks like following: */ + /* SP */ + /* | */ + /* v */ + /* | KExceptionContext (size 0x120) | KThread::StackParameters (size 0x130) | */ KExceptionContext *ctx = GetPointer(k_sp) - 1; /* Clear context. */ @@ -92,12 +92,12 @@ namespace ams::kern::arch::arm64 { } uintptr_t SetupStackForSupervisorModeThreadStarter(KVirtualAddress pc, KVirtualAddress sp, uintptr_t arg) { - /* NOTE: Stack layout on entry looks like following: */ - /* SP */ - /* | */ - /* v */ - /* | u64 argument | u64 entrypoint | KThread::StackParameters (size 0x30) | */ - static_assert(sizeof(KThread::StackParameters) == 0x30); + /* NOTE: Stack layout on entry looks like following: */ + /* SP */ + /* | */ + /* v */ + /* | u64 argument | u64 entrypoint | KThread::StackParameters (size 0x130) | */ + static_assert(sizeof(KThread::StackParameters) == 0x130); u64 *stack = GetPointer(sp); *(--stack) = GetInteger(pc); @@ -142,9 +142,8 @@ namespace ams::kern::arch::arm64 { /* Clear FPU state. */ m_fpcr = 0; m_fpsr = 0; - m_cpacr = 0; - for (size_t i = 0; i < util::size(m_fpu_registers); i++) { - m_fpu_registers[i] = 0; + for (size_t i = 0; i < util::size(m_callee_saved_fpu.fpu64.v); ++i) { + m_callee_saved_fpu.fpu64.v[i] = 0; } /* Lock the context, if we're a main thread. */ @@ -153,37 +152,18 @@ namespace ams::kern::arch::arm64 { R_SUCCEED(); } - Result KThreadContext::Finalize() { - /* This doesn't actually do anything. */ - R_SUCCEED(); - } - void KThreadContext::SetArguments(uintptr_t arg0, uintptr_t arg1) { u64 *stack = reinterpret_cast(m_sp); stack[0] = arg0; stack[1] = arg1; } - void KThreadContext::FpuContextSwitchHandler(KThread *thread) { - MESOSPHERE_ASSERT(!KInterruptManager::AreInterruptsEnabled()); - MESOSPHERE_ASSERT(!IsFpuEnabled()); - - /* Enable the FPU. */ - EnableFpu(); - - /* Restore the FPU registers. */ - KProcess *process = thread->GetOwnerProcess(); - MESOSPHERE_ASSERT(process != nullptr); - if (process->Is64Bit()) { - RestoreFpuRegisters64(thread->GetContext()); - } else { - RestoreFpuRegisters32(thread->GetContext()); - } - } - void KThreadContext::CloneFpuStatus() { u64 pcr, psr; cpu::InstructionMemoryBarrier(); + + KScopedInterruptDisable di; + if (IsFpuEnabled()) { __asm__ __volatile__("mrs %[pcr], fpcr" : [pcr]"=r"(pcr) :: "memory"); __asm__ __volatile__("mrs %[psr], fpsr" : [psr]"=r"(psr) :: "memory"); @@ -196,18 +176,6 @@ namespace ams::kern::arch::arm64 { this->SetFpsr(psr); } - void KThreadContext::SetFpuRegisters(const u128 *v, bool is_64_bit) { - if (is_64_bit) { - for (size_t i = 0; i < KThreadContext::NumFpuRegisters; ++i) { - m_fpu_registers[i] = v[i]; - } - } else { - for (size_t i = 0; i < KThreadContext::NumFpuRegisters / 2; ++i) { - m_fpu_registers[i] = v[i]; - } - } - } - void GetUserContext(ams::svc::ThreadContext *out, const KThread *thread) { MESOSPHERE_ASSERT(KScheduler::IsSchedulerLockedByCurrentThread()); MESOSPHERE_ASSERT(thread->IsSuspended()); @@ -244,9 +212,17 @@ namespace ams::kern::arch::arm64 { /* Copy fpu registers. */ static_assert(util::size(ams::svc::ThreadContext{}.v) == KThreadContext::NumFpuRegisters); - const u128 *f = t_ctx->GetFpuRegisters(); - for (size_t i = 0; i < KThreadContext::NumFpuRegisters; ++i) { - out->v[i] = f[i]; + static_assert(KThreadContext::NumCallerSavedFpuRegisters == KThreadContext::NumCalleeSavedFpuRegisters * 3); + static_assert(KThreadContext::NumFpuRegisters == KThreadContext::NumCallerSavedFpuRegisters + KThreadContext::NumCalleeSavedFpuRegisters); + const auto &caller_save_fpu = thread->GetCallerSaveFpuRegisters().fpu64; + const auto &callee_save_fpu = t_ctx->GetCalleeSaveFpuRegisters().fpu64; + + if (!thread->IsCallingSvc() || thread->IsInUsermodeExceptionHandler()) { + KThreadContext::GetFpuRegisters(out->v, caller_save_fpu, callee_save_fpu); + } else { + for (size_t i = 0; i < KThreadContext::NumCalleeSavedFpuRegisters; ++i) { + out->v[(KThreadContext::NumCallerSavedFpuRegisters / 3) + i] = caller_save_fpu.v[i]; + } } } else { /* Set special registers. */ @@ -271,12 +247,17 @@ namespace ams::kern::arch::arm64 { /* Copy fpu registers. */ static_assert(util::size(ams::svc::ThreadContext{}.v) == KThreadContext::NumFpuRegisters); - const u128 *f = t_ctx->GetFpuRegisters(); - for (size_t i = 0; i < KThreadContext::NumFpuRegisters / 2; ++i) { - out->v[i] = f[i]; - } - for (size_t i = KThreadContext::NumFpuRegisters / 2; i < KThreadContext::NumFpuRegisters; ++i) { - out->v[i] = 0; + static_assert(KThreadContext::NumCallerSavedFpuRegisters == KThreadContext::NumCalleeSavedFpuRegisters * 3); + static_assert(KThreadContext::NumFpuRegisters == KThreadContext::NumCallerSavedFpuRegisters + KThreadContext::NumCalleeSavedFpuRegisters); + const auto &caller_save_fpu = thread->GetCallerSaveFpuRegisters().fpu32; + const auto &callee_save_fpu = t_ctx->GetCalleeSaveFpuRegisters().fpu32; + + if (!thread->IsCallingSvc() || thread->IsInUsermodeExceptionHandler()) { + KThreadContext::GetFpuRegisters(out->v, caller_save_fpu, callee_save_fpu); + } else { + for (size_t i = 0; i < KThreadContext::NumCalleeSavedFpuRegisters / 2; ++i) { + out->v[((KThreadContext::NumCallerSavedFpuRegisters / 3) / 2) + i] = caller_save_fpu.v[i]; + } } } diff --git a/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_exception_asm.s b/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_exception_asm.s index 8a97715aa..da0b65fb8 100644 --- a/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_exception_asm.s +++ b/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_exception_asm.s @@ -13,7 +13,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include +#include /* ams::kern::svc::CallReturnFromException64(Result result) */ .section .text._ZN3ams4kern3svc25CallReturnFromException64Ev, "ax", %progbits @@ -82,8 +82,20 @@ _ZN3ams4kern3svc14RestoreContextEm: b 0b 1: /* We're done with DPC, and should return from the svc. */ - /* Clear our in-SVC note. */ - strb wzr, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_CALLING_SVC)] + + /* Get our exception flags. */ + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Clear in-svc and needs-fpu-restore flags. */ + and w10, w9, #(~(THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED)) + and w10, w10, #(~(THREAD_EXCEPTION_FLAG_IS_CALLING_SVC)) + strb w10, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* If we don't need to restore the fpu, skip restoring it. */ + tbz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 3f + + /* Enable and restore the fpu. */ + ENABLE_AND_RESTORE_FPU(x10, x8, x9, w8, w9, 2, 3) /* Restore registers. */ ldp x30, x8, [sp, #(EXCEPTION_CONTEXT_X30_SP)] diff --git a/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_handlers_asm.s b/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_handlers_asm.s index b7371c89d..d795c6aae 100644 --- a/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_handlers_asm.s +++ b/libraries/libmesosphere/source/arch/arm64/svc/kern_svc_handlers_asm.s @@ -14,7 +14,7 @@ * along with this program. If not, see . */ #include -#include +#include /* ams::kern::arch::arm64::SvcHandler64() */ .section .text._ZN3ams4kern4arch5arm6412SvcHandler64Ev, "ax", %progbits @@ -81,9 +81,10 @@ _ZN3ams4kern4arch5arm6412SvcHandler64Ev: cbz x11, 3f /* Note that we're calling the SVC. */ - mov w10, #1 - strb w10, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_CALLING_SVC)] - strb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CURRENT_SVC_ID)] + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + orr w9, w9, #(THREAD_EXCEPTION_FLAG_IS_CALLING_SVC) + strb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + strb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CURRENT_SVC_ID)] /* If we should, trace the svc entry. */ #if defined(MESOSPHERE_BUILD_FOR_TRACING) @@ -111,7 +112,7 @@ _ZN3ams4kern4arch5arm6412SvcHandler64Ev: 2: /* We completed the SVC, and we should handle DPC. */ /* Check the dpc flags. */ ldrb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_DPC_FLAGS)] - cbz w8, 4f + cbz w8, 5f /* We have DPC to do! */ /* Save registers and call ams::kern::KDpcManager::HandleDpc(). */ @@ -150,7 +151,18 @@ _ZN3ams4kern4arch5arm6412SvcHandler64Ev: mov x0, sp bl _ZN3ams4kern4arch5arm6415HandleExceptionEPNS2_17KExceptionContextE - /* Restore registers. */ + /* If we don't need to restore the fpu, skip restoring it. */ + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + tbz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 4f + + /* Clear the needs-fpu-restore flag. */ + and w9, w9, #(~THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Enable and restore the fpu. */ + ENABLE_AND_RESTORE_FPU64(x10, x8, x9, w8, w9) + +4: /* Restore registers. */ ldp x30, x8, [sp, #(EXCEPTION_CONTEXT_X30_SP)] ldp x9, x10, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] ldr x11, [sp, #(EXCEPTION_CONTEXT_TPIDR)] @@ -184,9 +196,7 @@ _ZN3ams4kern4arch5arm6412SvcHandler64Ev: add sp, sp, #(EXCEPTION_CONTEXT_SIZE) eret -4: /* Return from SVC. */ - /* Clear our in-SVC note. */ - strb wzr, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_CALLING_SVC)] +5: /* Return from SVC. */ /* If we should, trace the svc exit. */ #if defined(MESOSPHERE_BUILD_FOR_TRACING) @@ -204,7 +214,60 @@ _ZN3ams4kern4arch5arm6412SvcHandler64Ev: add sp, sp, #0x40 #endif - /* Restore registers. */ + /* Get our exception flags. */ + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Clear in-svc and needs-fpu-restore flags. */ + and w8, w9, #(~(THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED)) + and w8, w8, #(~(THREAD_EXCEPTION_FLAG_IS_CALLING_SVC)) + strb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* If we don't need to restore the fpu, skip restoring it. */ + tbz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 7f + + /* If we need to restore the fpu, check if we need to do a full restore. */ + tbnz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_USERMODE_EXCEPTION_HANDLER), 6f + + /* Enable the fpu. */ + ENABLE_FPU(x8) + + /* Get the thread context and restore fpsr/fpcr. */ + GET_THREAD_CONTEXT_AND_RESTORE_FPCR_FPSR(x10, x8, x9, w8, w9) + + /* Restore callee-saved registers to 64-bit fpu. */ + RESTORE_FPU64_CALLEE_SAVE_REGISTERS(x10) + + /* Clear caller-saved registers to 64-bit fpu. */ + movi v0.2d, #0 + movi v1.2d, #0 + movi v2.2d, #0 + movi v3.2d, #0 + movi v4.2d, #0 + movi v5.2d, #0 + movi v6.2d, #0 + movi v7.2d, #0 + movi v16.2d, #0 + movi v17.2d, #0 + movi v18.2d, #0 + movi v19.2d, #0 + movi v20.2d, #0 + movi v21.2d, #0 + movi v22.2d, #0 + movi v23.2d, #0 + movi v24.2d, #0 + movi v25.2d, #0 + movi v26.2d, #0 + movi v27.2d, #0 + movi v28.2d, #0 + movi v29.2d, #0 + movi v30.2d, #0 + movi v31.2d, #0 + b 7f + +6: /* We need to do a full fpu restore. */ + ENABLE_AND_RESTORE_FPU64(x10, x8, x9, w8, w9) + +7: /* Restore registers. */ ldp x30, x8, [sp, #(EXCEPTION_CONTEXT_X30_SP)] ldp x9, x10, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] ldr x11, [sp, #(EXCEPTION_CONTEXT_TPIDR)] @@ -273,38 +336,40 @@ _ZN3ams4kern4arch5arm6412SvcHandler32Ev: stp x14, xzr, [sp, #(EXCEPTION_CONTEXT_X14_X15)] /* Check if the SVC index is out of range. */ - mrs x16, esr_el1 - and x16, x16, #0xFF - cmp x16, #(AMS_KERN_NUM_SUPERVISOR_CALLS) + mrs x8, esr_el1 + and x8, x8, #0xFF + cmp x8, #(AMS_KERN_NUM_SUPERVISOR_CALLS) b.ge 3f /* Check the specific SVC permission bit for allowal. */ - mov x20, sp - add x20, x20, x16, lsr#3 - ldrb w20, [x20, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_SVC_PERMISSION)] - and x17, x16, #0x7 - lsr x17, x20, x17 - tst x17, #1 + mov x9, sp + add x9, x9, x8, lsr#3 + ldrb w9, [x9, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_SVC_PERMISSION)] + and x10, x8, #0x7 + lsr x10, x9, x10 + tst x10, #1 b.eq 3f /* Check if our disable count allows us to call SVCs. */ - mrs x15, tpidrro_el0 - ldrh w15, [x15, #(THREAD_LOCAL_REGION_DISABLE_COUNT)] - cbz w15, 1f + mrs x10, tpidrro_el0 + ldrh w10, [x10, #(THREAD_LOCAL_REGION_DISABLE_COUNT)] + cbz w10, 1f /* It might not, so check the stack params to see if we must not allow the SVC. */ - ldrb w15, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_PINNED)] - cbz w15, 3f + ldrb w10, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_PINNED)] + cbz w10, 3f 1: /* We can call the SVC. */ - adr x15, _ZN3ams4kern3svc16SvcTable64From32E - ldr x19, [x15, x16, lsl#3] - cbz x19, 3f + adr x10, _ZN3ams4kern3svc16SvcTable64From32E + ldr x11, [x10, x8, lsl#3] + cbz x11, 3f + /* Note that we're calling the SVC. */ - mov w15, #1 - strb w15, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_CALLING_SVC)] - strb w16, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CURRENT_SVC_ID)] + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + orr w9, w9, #(THREAD_EXCEPTION_FLAG_IS_CALLING_SVC) + strb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + strb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CURRENT_SVC_ID)] /* If we should, trace the svc entry. */ #if defined(MESOSPHERE_BUILD_FOR_TRACING) @@ -313,26 +378,26 @@ _ZN3ams4kern4arch5arm6412SvcHandler32Ev: stp x2, x3, [sp, #(8 * 2)] stp x4, x5, [sp, #(8 * 4)] stp x6, x7, [sp, #(8 * 6)] - str x19, [sp, #(8 * 8)] + str x11, [sp, #(8 * 8)] mov x0, sp bl _ZN3ams4kern3svc13TraceSvcEntryEPKm ldp x0, x1, [sp, #(8 * 0)] ldp x2, x3, [sp, #(8 * 2)] ldp x4, x5, [sp, #(8 * 4)] ldp x6, x7, [sp, #(8 * 6)] - ldr x19, [sp, #(8 * 8)] + ldr x11, [sp, #(8 * 8)] add sp, sp, #0x50 #endif /* Invoke the SVC handler. */ msr daifclr, #2 - blr x19 + blr x11 msr daifset, #2 2: /* We completed the SVC, and we should handle DPC. */ /* Check the dpc flags. */ - ldrb w16, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_DPC_FLAGS)] - cbz w16, 4f + ldrb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_DPC_FLAGS)] + cbz w8, 5f /* We have DPC to do! */ /* Save registers and call ams::kern::KDpcManager::HandleDpc(). */ @@ -368,18 +433,29 @@ _ZN3ams4kern4arch5arm6412SvcHandler32Ev: mov x0, sp bl _ZN3ams4kern4arch5arm6415HandleExceptionEPNS2_17KExceptionContextE - /* Restore registers. */ - ldp x17, x20, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] - ldr x19, [sp, #(EXCEPTION_CONTEXT_TPIDR)] + /* If we don't need to restore the fpu, skip restoring it. */ + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + tbz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 4f + + /* Clear the needs-fpu-restore flag. */ + and w9, w9, #(~THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Enable and restore the fpu. */ + ENABLE_AND_RESTORE_FPU32(x10, x8, x9, w8, w9) + +4: /* Restore registers. */ + ldp x9, x10, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] + ldr x11, [sp, #(EXCEPTION_CONTEXT_TPIDR)] #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) /* Since we're returning from an SVC, make sure SPSR.SS is cleared so that if we're single-stepping we break instantly on the instruction after the SVC. */ - bic x20, x20, #(1 << 21) + bic x10, x10, #(1 << 21) #endif - msr elr_el1, x17 - msr spsr_el1, x20 - msr tpidr_el0, x19 + msr elr_el1, x9 + msr spsr_el1, x10 + msr tpidr_el0, x11 ldp x0, x1, [sp, #(EXCEPTION_CONTEXT_X0_X1)] ldp x2, x3, [sp, #(EXCEPTION_CONTEXT_X2_X3)] ldp x4, x5, [sp, #(EXCEPTION_CONTEXT_X4_X5)] @@ -393,9 +469,7 @@ _ZN3ams4kern4arch5arm6412SvcHandler32Ev: add sp, sp, #(EXCEPTION_CONTEXT_SIZE) eret -4: /* Return from SVC. */ - /* Clear our in-SVC note. */ - strb wzr, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_IS_CALLING_SVC)] +5: /* Return from SVC. */ /* If we should, trace the svc exit. */ #if defined(MESOSPHERE_BUILD_FOR_TRACING) @@ -413,22 +487,63 @@ _ZN3ams4kern4arch5arm6412SvcHandler32Ev: add sp, sp, #0x40 #endif - /* Restore registers. */ + /* Get our exception flags. */ + ldrb w9, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Clear in-svc and needs-fpu-restore flags. */ + and w8, w9, #(~(THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED)) + and w8, w8, #(~(THREAD_EXCEPTION_FLAG_IS_CALLING_SVC)) + strb w8, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* If we don't need to restore the fpu, skip restoring it. */ + tbz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 7f + + /* If we need to restore the fpu, check if we need to do a full restore. */ + tbnz w9, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_USERMODE_EXCEPTION_HANDLER), 6f + + /* Enable the fpu. */ + ENABLE_FPU(x8) + + /* Get the thread context and restore fpsr/fpcr. */ + GET_THREAD_CONTEXT_AND_RESTORE_FPCR_FPSR(x10, x8, x9, w8, w9) + + /* Restore callee-saved registers to 32-bit fpu. */ + RESTORE_FPU32_CALLEE_SAVE_REGISTERS(x10) + + /* Clear caller-saved registers to 32-bit fpu. */ + movi v0.2d, #0 + movi v1.2d, #0 + movi v2.2d, #0 + movi v3.2d, #0 + movi v8.2d, #0 + movi v9.2d, #0 + movi v10.2d, #0 + movi v11.2d, #0 + movi v12.2d, #0 + movi v13.2d, #0 + movi v14.2d, #0 + movi v15.2d, #0 + b 7f + +6: /* We need to do a full fpu restore. */ + ENABLE_AND_RESTORE_FPU32(x10, x8, x9, w8, w9) + +7: /* Restore registers. */ + ldp x9, x10, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] + ldr x11, [sp, #(EXCEPTION_CONTEXT_TPIDR)] + + #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) + /* Since we're returning from an SVC, make sure SPSR.SS is cleared so that if we're single-stepping we break instantly on the instruction after the SVC. */ + bic x10, x10, #(1 << 21) + #endif + + msr elr_el1, x9 + msr spsr_el1, x10 + msr tpidr_el0, x11 ldp x8, x9, [sp, #(EXCEPTION_CONTEXT_X8_X9)] ldp x10, x11, [sp, #(EXCEPTION_CONTEXT_X10_X11)] ldp x12, x13, [sp, #(EXCEPTION_CONTEXT_X12_X13)] ldp x14, xzr, [sp, #(EXCEPTION_CONTEXT_X14_X15)] - ldp x17, x20, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] - ldr x19, [sp, #(EXCEPTION_CONTEXT_TPIDR)] - - #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) - /* Since we're returning from an SVC, make sure SPSR.SS is cleared so that if we're single-stepping we break instantly on the instruction after the SVC. */ - bic x20, x20, #(1 << 21) - #endif - - msr elr_el1, x17 - msr spsr_el1, x20 - msr tpidr_el0, x19 /* Return. */ add sp, sp, #(EXCEPTION_CONTEXT_SIZE) diff --git a/libraries/libmesosphere/source/kern_k_debug_base.cpp b/libraries/libmesosphere/source/kern_k_debug_base.cpp index 5493e62fa..c971a94b0 100644 --- a/libraries/libmesosphere/source/kern_k_debug_base.cpp +++ b/libraries/libmesosphere/source/kern_k_debug_base.cpp @@ -447,7 +447,7 @@ namespace ams::kern { { auto end = target->GetThreadList().end(); for (auto it = target->GetThreadList().begin(); it != end; ++it) { - it->ClearSingleStep(); + it->ClearHardwareSingleStep(); } } #endif @@ -595,13 +595,13 @@ namespace ams::kern { { if ((context_flags & ams::svc::ThreadContextFlag_SetSingleStep) != 0) { /* Set single step. */ - thread->SetSingleStep(); + thread->SetHardwareSingleStep(); /* If no other thread flags are present, we're done. */ R_SUCCEED_IF((context_flags & ~ams::svc::ThreadContextFlag_SetSingleStep) == 0); } else if ((context_flags & ams::svc::ThreadContextFlag_ClearSingleStep) != 0) { /* Clear single step. */ - thread->ClearSingleStep(); + thread->ClearHardwareSingleStep(); /* If no other thread flags are present, we're done. */ R_SUCCEED_IF((context_flags & ~ams::svc::ThreadContextFlag_ClearSingleStep) == 0); @@ -1022,7 +1022,7 @@ namespace ams::kern { for (auto it = process->GetThreadList().begin(); it != end; ++it) { #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) /* Clear the thread's single-step state. */ - it->ClearSingleStep(); + it->ClearHardwareSingleStep(); #endif if (resume) { diff --git a/libraries/libmesosphere/source/kern_k_scheduler.cpp b/libraries/libmesosphere/source/kern_k_scheduler.cpp index df9ec681f..810ed1c37 100644 --- a/libraries/libmesosphere/source/kern_k_scheduler.cpp +++ b/libraries/libmesosphere/source/kern_k_scheduler.cpp @@ -255,7 +255,7 @@ namespace ams::kern { /* in EL0...which implies a return-from-exception has occurred since we set the bit. Thus, forcing */ /* an ISB is unnecessary, and we can modify the register safely and be confident it will affect the next */ /* userland instruction executed. */ - cpu::MonitorDebugSystemControlRegisterAccessor().SetSoftwareStep(next_thread->IsSingleStep()).Store(); + cpu::MonitorDebugSystemControlRegisterAccessor().SetSoftwareStep(next_thread->IsHardwareSingleStep()).Store(); #endif /* Switch the current process, if we're switching processes. */ diff --git a/libraries/libmesosphere/source/kern_k_thread.cpp b/libraries/libmesosphere/source/kern_k_thread.cpp index a4f06e236..38f7f8a8b 100644 --- a/libraries/libmesosphere/source/kern_k_thread.cpp +++ b/libraries/libmesosphere/source/kern_k_thread.cpp @@ -68,6 +68,91 @@ namespace ams::kern { } + ALWAYS_INLINE void KThread::SetPinnedSvcPermissions() { + /* Get our stack parameters. */ + auto &sp = this->GetStackParameters(); + + /* Get our parent's svc permissions. */ + MESOSPHERE_ASSERT(m_parent != nullptr); + const auto &svc_permissions = m_parent->GetSvcPermissions(); + + /* Get whether we have access to return from exception. */ + const bool return_from_exception = sp.svc_access_flags[svc::SvcId_ReturnFromException]; + + /* Clear all permissions. */ + sp.svc_access_flags.Reset(); + + /* Set SynchronizePreemptionState if allowed. */ + if (svc_permissions[svc::SvcId_SynchronizePreemptionState]) { + sp.svc_access_flags[svc::SvcId_SynchronizePreemptionState] = true; + } + + /* If we previously had ReturnFromException, potentially grant it and GetInfo. */ + if (return_from_exception) { + /* Set ReturnFromException (guaranteed allowed, if we're here). */ + sp.svc_access_flags[svc::SvcId_ReturnFromException] = true; + + /* Set GetInfo if allowed. */ + if (svc_permissions[svc::SvcId_GetInfo]) { + sp.svc_access_flags[svc::SvcId_GetInfo] = true; + } + } + } + + ALWAYS_INLINE void KThread::SetUnpinnedSvcPermissions() { + /* Get our stack parameters. */ + auto &sp = this->GetStackParameters(); + + /* Get our parent's svc permissions. */ + MESOSPHERE_ASSERT(m_parent != nullptr); + const auto &svc_permissions = m_parent->GetSvcPermissions(); + + /* Get whether we have access to return from exception. */ + const bool return_from_exception = sp.svc_access_flags[svc::SvcId_ReturnFromException]; + + /* Copy permissions. */ + sp.svc_access_flags = svc_permissions; + + /* Clear specific SVCs based on our state. */ + sp.svc_access_flags[svc::SvcId_SynchronizePreemptionState] = false; + + if (!return_from_exception) { + sp.svc_access_flags[svc::SvcId_ReturnFromException] = false; + } + } + + ALWAYS_INLINE void KThread::SetUsermodeExceptionSvcPermissions() { + /* Get our stack parameters. */ + auto &sp = this->GetStackParameters(); + + /* Get our parent's svc permissions. */ + MESOSPHERE_ASSERT(m_parent != nullptr); + const auto &svc_permissions = m_parent->GetSvcPermissions(); + + /* Set ReturnFromException if allowed. */ + if (svc_permissions[svc::SvcId_ReturnFromException]) { + sp.svc_access_flags[svc::SvcId_ReturnFromException] = true; + } + + /* Set GetInfo if allowed. */ + if (svc_permissions[svc::SvcId_GetInfo]) { + sp.svc_access_flags[svc::SvcId_GetInfo] = true; + } + } + + ALWAYS_INLINE void KThread::ClearUsermodeExceptionSvcPermissions() { + /* Get our stack parameters. */ + auto &sp = this->GetStackParameters(); + + /* Clear ReturnFromException. */ + sp.svc_access_flags[svc::SvcId_ReturnFromException] = false; + + /* If pinned, clear GetInfo. */ + if (sp.is_pinned) { + sp.svc_access_flags[svc::SvcId_GetInfo] = false; + } + } + Result KThread::Initialize(KThreadFunction func, uintptr_t arg, void *kern_stack_top, KProcessAddress user_stack_top, s32 prio, s32 virt_core, KProcess *owner, ThreadType type) { /* Assert parameters are valid. */ MESOSPHERE_ASSERT_THIS(); @@ -209,18 +294,23 @@ namespace ams::kern { const bool is_64_bit = m_parent ? m_parent->Is64Bit() : IsDefault64Bit; const bool is_user = (type == ThreadType_User); const bool is_main = (type == ThreadType_Main); - m_thread_context.Initialize(reinterpret_cast(func), reinterpret_cast(this->GetStackTop()), GetInteger(user_stack_top), arg, is_user, is_64_bit, is_main); + this->GetContext().Initialize(reinterpret_cast(func), reinterpret_cast(this->GetStackTop()), GetInteger(user_stack_top), arg, is_user, is_64_bit, is_main); /* Setup the stack parameters. */ StackParameters &sp = this->GetStackParameters(); if (m_parent != nullptr) { - m_parent->CopySvcPermissionsTo(sp); + this->SetUnpinnedSvcPermissions(); + this->ClearUsermodeExceptionSvcPermissions(); } - sp.context = std::addressof(m_thread_context); - sp.cur_thread = this; - sp.disable_count = 1; + sp.caller_save_fpu_registers = std::addressof(m_caller_save_fpu_registers); + sp.cur_thread = this; + sp.disable_count = 1; this->SetInExceptionHandler(); + if (m_parent != nullptr && is_64_bit) { + this->SetFpu64Bit(); + } + /* Set thread ID. */ m_thread_id = g_thread_id++; @@ -329,9 +419,6 @@ namespace ams::kern { } } - /* Finalize the thread context. */ - m_thread_context.Finalize(); - /* Cleanup the kernel stack. */ if (m_kernel_stack_top != nullptr) { CleanupKernelStack(reinterpret_cast(m_kernel_stack_top)); @@ -411,6 +498,16 @@ namespace ams::kern { this->FinishTermination(); } + void KThread::OnEnterUsermodeException() { + this->SetUsermodeExceptionSvcPermissions(); + this->SetInUsermodeExceptionHandler(); + } + + void KThread::OnLeaveUsermodeException() { + this->ClearUsermodeExceptionSvcPermissions(); + this->ClearInUsermodeExceptionHandler(); + } + void KThread::Pin() { MESOSPHERE_ASSERT_THIS(); MESOSPHERE_ASSERT(KScheduler::IsSchedulerLockedByCurrentThread()); @@ -458,8 +555,7 @@ namespace ams::kern { } /* Update our SVC access permissions. */ - MESOSPHERE_ASSERT(m_parent != nullptr); - m_parent->CopyPinnedSvcPermissionsTo(this->GetStackParameters()); + this->SetPinnedSvcPermissions(); } void KThread::Unpin() { @@ -507,7 +603,7 @@ namespace ams::kern { /* Update our SVC access permissions. */ MESOSPHERE_ASSERT(m_parent != nullptr); - m_parent->CopyUnpinnedSvcPermissionsTo(this->GetStackParameters()); + this->SetUnpinnedSvcPermissions(); } /* Resume any threads that began waiting on us while we were pinned. */ diff --git a/libraries/libvapours/include/vapours/svc/arch/arm64/svc_thread_local_region.hpp b/libraries/libvapours/include/vapours/svc/arch/arm64/svc_thread_local_region.hpp index e7b457a7a..e32c9a572 100644 --- a/libraries/libvapours/include/vapours/svc/arch/arm64/svc_thread_local_region.hpp +++ b/libraries/libvapours/include/vapours/svc/arch/arm64/svc_thread_local_region.hpp @@ -25,9 +25,13 @@ namespace ams::svc::arch::arm64 { u32 message_buffer[MessageBufferSize / sizeof(u32)]; volatile u16 disable_count; volatile u16 interrupt_flag; + volatile u8 cache_maintenance_flag; /* TODO: How should we handle libnx vs Nintendo user thread local space? */ uintptr_t TODO[(0x200 - 0x108) / sizeof(uintptr_t)]; }; + static_assert(__builtin_offsetof(ThreadLocalRegion, disable_count) == 0x100); + static_assert(__builtin_offsetof(ThreadLocalRegion, interrupt_flag) == 0x102); + static_assert(__builtin_offsetof(ThreadLocalRegion, cache_maintenance_flag) == 0x104); ALWAYS_INLINE ThreadLocalRegion *GetThreadLocalRegion() { ThreadLocalRegion *tlr; diff --git a/mesosphere/kernel/source/arch/arm64/exception_vectors.s b/mesosphere/kernel/source/arch/arm64/exception_vectors.s index 772c77228..88a93d51c 100644 --- a/mesosphere/kernel/source/arch/arm64/exception_vectors.s +++ b/mesosphere/kernel/source/arch/arm64/exception_vectors.s @@ -125,7 +125,7 @@ vector_entry synch_a64 vector_entry irq_a64 clrex - b _ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv + b _ZN3ams4kern4arch5arm6425EL0A64IrqExceptionHandlerEv check_vector_size irq_a64 vector_entry fiq_a64 @@ -148,7 +148,7 @@ vector_entry synch_a32 vector_entry irq_a32 clrex - b _ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv + b _ZN3ams4kern4arch5arm6425EL0A32IrqExceptionHandlerEv check_vector_size irq_a32 vector_entry fiq_a32 diff --git a/mesosphere/kernel/source/arch/arm64/kern_exception_handlers_asm.s b/mesosphere/kernel/source/arch/arm64/kern_exception_handlers_asm.s index 77aa3043f..a23cebc0e 100644 --- a/mesosphere/kernel/source/arch/arm64/kern_exception_handlers_asm.s +++ b/mesosphere/kernel/source/arch/arm64/kern_exception_handlers_asm.s @@ -13,7 +13,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include +#include /* ams::kern::arch::arm64::EL1IrqExceptionHandler() */ .section .text._ZN3ams4kern4arch5arm6422EL1IrqExceptionHandlerEv, "ax", %progbits @@ -66,11 +66,11 @@ _ZN3ams4kern4arch5arm6422EL1IrqExceptionHandlerEv: /* Return from the exception. */ eret -/* ams::kern::arch::arm64::EL0IrqExceptionHandler() */ -.section .text._ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv, "ax", %progbits -.global _ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv -.type _ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv, %function -_ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv: +/* ams::kern::arch::arm64::EL0A64IrqExceptionHandler() */ +.section .text._ZN3ams4kern4arch5arm6425EL0A64IrqExceptionHandlerEv, "ax", %progbits +.global _ZN3ams4kern4arch5arm6425EL0A64IrqExceptionHandlerEv +.type _ZN3ams4kern4arch5arm6425EL0A64IrqExceptionHandlerEv, %function +_ZN3ams4kern4arch5arm6425EL0A64IrqExceptionHandlerEv: /* Save registers that need saving. */ sub sp, sp, #(EXCEPTION_CONTEXT_SIZE) @@ -105,7 +105,18 @@ _ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv: mov x0, #1 bl _ZN3ams4kern4arch5arm6417KInterruptManager15HandleInterruptEb - /* Restore state from the context. */ + /* If we don't need to restore the fpu, skip restoring it. */ + ldrb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + tbz w1, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 1f + + /* Clear the needs-fpu-restore flag. */ + and w1, w1, #(~THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Perform a full fpu restore. */ + ENABLE_AND_RESTORE_FPU64(x2, x0, x1, w0, w1) + +1: /* Restore state from the context. */ ldp x30, x20, [sp, #(EXCEPTION_CONTEXT_X30_SP)] ldp x21, x22, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] ldr x23, [sp, #(EXCEPTION_CONTEXT_TPIDR)] @@ -141,6 +152,74 @@ _ZN3ams4kern4arch5arm6422EL0IrqExceptionHandlerEv: /* Return from the exception. */ eret +/* ams::kern::arch::arm64::EL0A32IrqExceptionHandler() */ +.section .text._ZN3ams4kern4arch5arm6425EL0A32IrqExceptionHandlerEv, "ax", %progbits +.global _ZN3ams4kern4arch5arm6425EL0A32IrqExceptionHandlerEv +.type _ZN3ams4kern4arch5arm6425EL0A32IrqExceptionHandlerEv, %function +_ZN3ams4kern4arch5arm6425EL0A32IrqExceptionHandlerEv: + /* Save registers that need saving. */ + sub sp, sp, #(EXCEPTION_CONTEXT_SIZE) + + stp x0, x1, [sp, #(EXCEPTION_CONTEXT_X0_X1)] + stp x2, x3, [sp, #(EXCEPTION_CONTEXT_X2_X3)] + stp x4, x5, [sp, #(EXCEPTION_CONTEXT_X4_X5)] + stp x6, x7, [sp, #(EXCEPTION_CONTEXT_X6_X7)] + stp x8, x9, [sp, #(EXCEPTION_CONTEXT_X8_X9)] + stp x10, x11, [sp, #(EXCEPTION_CONTEXT_X10_X11)] + stp x12, x13, [sp, #(EXCEPTION_CONTEXT_X12_X13)] + stp x14, x15, [sp, #(EXCEPTION_CONTEXT_X14_X15)] + + mrs x21, elr_el1 + mrs x22, spsr_el1 + mrs x23, tpidr_el0 + mov w22, w22 + + stp x21, x22, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] + str x23, [sp, #(EXCEPTION_CONTEXT_TPIDR)] + + /* Invoke KInterruptManager::HandleInterrupt(bool user_mode). */ + ldr x18, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CUR_THREAD)] + mov x0, #1 + bl _ZN3ams4kern4arch5arm6417KInterruptManager15HandleInterruptEb + + /* If we don't need to restore the fpu, skip restoring it. */ + ldrb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + tbz w1, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 1f + + /* Clear the needs-fpu-restore flag. */ + and w1, w1, #(~THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Perform a full fpu restore. */ + ENABLE_AND_RESTORE_FPU32(x2, x0, x1, w0, w1) + +1: /* Restore state from the context. */ + ldp x21, x22, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] + ldr x23, [sp, #(EXCEPTION_CONTEXT_TPIDR)] + + #if defined(MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP) + /* Since we're returning from an exception, set SPSR.SS so that we advance an instruction if single-stepping. */ + orr x22, x22, #(1 << 21) + #endif + + msr elr_el1, x21 + msr spsr_el1, x22 + msr tpidr_el0, x23 + + ldp x0, x1, [sp, #(EXCEPTION_CONTEXT_X0_X1)] + ldp x2, x3, [sp, #(EXCEPTION_CONTEXT_X2_X3)] + ldp x4, x5, [sp, #(EXCEPTION_CONTEXT_X4_X5)] + ldp x6, x7, [sp, #(EXCEPTION_CONTEXT_X6_X7)] + ldp x8, x9, [sp, #(EXCEPTION_CONTEXT_X8_X9)] + ldp x10, x11, [sp, #(EXCEPTION_CONTEXT_X10_X11)] + ldp x12, x13, [sp, #(EXCEPTION_CONTEXT_X12_X13)] + ldp x14, x15, [sp, #(EXCEPTION_CONTEXT_X14_X15)] + + add sp, sp, #(EXCEPTION_CONTEXT_SIZE) + + /* Return from the exception. */ + eret + /* ams::kern::arch::arm64::EL0SynchronousExceptionHandler() */ .section .text._ZN3ams4kern4arch5arm6430EL0SynchronousExceptionHandlerEv, "ax", %progbits .global _ZN3ams4kern4arch5arm6430EL0SynchronousExceptionHandlerEv @@ -155,23 +234,23 @@ _ZN3ams4kern4arch5arm6430EL0SynchronousExceptionHandlerEv: /* Is this an aarch32 SVC? */ cmp x17, #0x11 - b.eq 2f + b.eq 4f /* Is this an aarch64 SVC? */ cmp x17, #0x15 - b.eq 3f + b.eq 5f /* Is this an FPU error? */ cmp x17, #0x7 - b.eq 4f + b.eq 6f /* Is this a data abort? */ cmp x17, #0x24 - b.eq 5f + b.eq 7f /* Is this an instruction abort? */ cmp x17, #0x20 - b.eq 5f + b.eq 7f 1: /* The exception is not a data abort or instruction abort caused by a TLB conflict. */ /* It is also not an SVC or an FPU exception. Handle it generically! */ @@ -212,6 +291,17 @@ _ZN3ams4kern4arch5arm6430EL0SynchronousExceptionHandlerEv: mov x0, sp bl _ZN3ams4kern4arch5arm6415HandleExceptionEPNS2_17KExceptionContextE + /* If we don't need to restore the fpu, skip restoring it. */ + ldrb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + tbz w1, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 3f + + /* Clear the needs-fpu-restore flag. */ + and w1, w1, #(~THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Enable and restore the fpu. */ + ENABLE_AND_RESTORE_FPU(x2, x0, x1, w0, w1, 2, 3) + /* Restore state from the context. */ ldp x30, x20, [sp, #(EXCEPTION_CONTEXT_X30_SP)] ldp x21, x22, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] @@ -243,19 +333,19 @@ _ZN3ams4kern4arch5arm6430EL0SynchronousExceptionHandlerEv: /* Return from the exception. */ eret -2: /* SVC from aarch32. */ +4: /* SVC from aarch32. */ ldp x16, x17, [sp], 16 b _ZN3ams4kern4arch5arm6412SvcHandler32Ev -3: /* SVC from aarch64. */ +5: /* SVC from aarch64. */ ldp x16, x17, [sp], 16 b _ZN3ams4kern4arch5arm6412SvcHandler64Ev -4: /* FPU exception. */ +6: /* FPU exception. */ ldp x16, x17, [sp], 16 b _ZN3ams4kern4arch5arm6425FpuAccessExceptionHandlerEv -5: /* Check if there's a TLB conflict that caused the abort. */ +7: /* Check if there's a TLB conflict that caused the abort. */ and x17, x16, #0x3F cmp x17, #0x30 b.ne 1b @@ -265,20 +355,20 @@ _ZN3ams4kern4arch5arm6430EL0SynchronousExceptionHandlerEv: and x17, x17, #(0xFFFF << 48) /* Check if FAR is valid by examining the FnV bit. */ - tbnz x16, #10, 6f + tbnz x16, #10, 8f /* FAR is valid, so we can invalidate the address it holds. */ mrs x16, far_el1 lsr x16, x16, #12 orr x17, x16, x17 tlbi vae1, x17 - b 7f + b 9f -6: /* There's a TLB conflict and FAR isn't valid. */ +8: /* There's a TLB conflict and FAR isn't valid. */ /* Invalidate the entire TLB. */ tlbi aside1, x17 -7: /* Return from a TLB conflict. */ +9: /* Return from a TLB conflict. */ /* Ensure instruction consistency. */ dsb ish isb @@ -304,11 +394,11 @@ _ZN3ams4kern4arch5arm6430EL1SynchronousExceptionHandlerEv: /* Is this an instruction abort? */ cmp x0, #0x21 - b.eq 5f + b.eq 4f /* Is this a data abort? */ cmp x0, #0x25 - b.eq 5f + b.eq 4f 1: /* The exception is not a data abort or instruction abort caused by a TLB conflict. */ /* Load the exception stack top from otherwise "unused" virtual timer compare value. */ @@ -331,16 +421,16 @@ _ZN3ams4kern4arch5arm6430EL1SynchronousExceptionHandlerEv: mrs x0, esr_el1 lsr x1, x0, #0x1a cmp x1, #0x25 - b.ne 3f + b.ne 2f /* Data abort. Check if it was from trying to access userspace memory. */ mrs x1, elr_el1 adr x0, _ZN3ams4kern4arch5arm6432UserspaceAccessFunctionAreaBeginEv cmp x1, x0 - b.lo 3f + b.lo 2f adr x0, _ZN3ams4kern4arch5arm6430UserspaceAccessFunctionAreaEndEv cmp x1, x0 - b.hs 3f + b.hs 2f /* We aborted trying to access userspace memory. */ /* All functions that access user memory return a boolean for whether they succeeded. */ @@ -353,7 +443,7 @@ _ZN3ams4kern4arch5arm6430EL1SynchronousExceptionHandlerEv: msr elr_el1, x30 eret -3: /* The exception wasn't an triggered by copying memory from userspace. */ +2: /* The exception wasn't an triggered by copying memory from userspace. */ ldr x0, [sp, #8] ldr x1, [sp, #16] @@ -390,10 +480,10 @@ _ZN3ams4kern4arch5arm6430EL1SynchronousExceptionHandlerEv: mov x0, sp bl _ZN3ams4kern4arch5arm6415HandleExceptionEPNS2_17KExceptionContextE -4: /* HandleException should never return. The best we can do is infinite loop. */ - b 4b +3: /* HandleException should never return. The best we can do is infinite loop. */ + b 3b -5: /* Check if there's a TLB conflict that caused the abort. */ +4: /* Check if there's a TLB conflict that caused the abort. */ /* NOTE: There is a Nintendo bug in this code that we correct. */ /* Nintendo compares the low 6 bits of x0 without restoring the value. */ /* They intend to check the DFSC/IFSC bits of esr_el1, but because they */ @@ -408,19 +498,19 @@ _ZN3ams4kern4arch5arm6430EL1SynchronousExceptionHandlerEv: /* They do not refresh the value of x0, and again compare with */ /* the relevant bit already masked out of x0. */ mrs x0, esr_el1 - tbnz x0, #10, 6f + tbnz x0, #10, 5f /* FAR is valid, so we can invalidate the address it holds. */ mrs x0, far_el1 lsr x0, x0, #12 tlbi vaae1, x0 - b 7f + b 6f -6: /* There's a TLB conflict and FAR isn't valid. */ +5: /* There's a TLB conflict and FAR isn't valid. */ /* Invalidate the entire TLB. */ tlbi vmalle1 -7: /* Return from a TLB conflict. */ +6: /* Return from a TLB conflict. */ /* Ensure instruction consistency. */ dsb ish isb @@ -437,52 +527,17 @@ _ZN3ams4kern4arch5arm6430EL1SynchronousExceptionHandlerEv: .global _ZN3ams4kern4arch5arm6425FpuAccessExceptionHandlerEv .type _ZN3ams4kern4arch5arm6425FpuAccessExceptionHandlerEv, %function _ZN3ams4kern4arch5arm6425FpuAccessExceptionHandlerEv: - /* Save registers that need saving. */ + /* Save registers. */ sub sp, sp, #(EXCEPTION_CONTEXT_SIZE) stp x0, x1, [sp, #(EXCEPTION_CONTEXT_X0_X1)] stp x2, x3, [sp, #(EXCEPTION_CONTEXT_X2_X3)] - stp x4, x5, [sp, #(EXCEPTION_CONTEXT_X4_X5)] - stp x6, x7, [sp, #(EXCEPTION_CONTEXT_X6_X7)] - stp x8, x9, [sp, #(EXCEPTION_CONTEXT_X8_X9)] - stp x10, x11, [sp, #(EXCEPTION_CONTEXT_X10_X11)] - stp x12, x13, [sp, #(EXCEPTION_CONTEXT_X12_X13)] - stp x14, x15, [sp, #(EXCEPTION_CONTEXT_X14_X15)] - stp x16, x17, [sp, #(EXCEPTION_CONTEXT_X16_X17)] - stp x18, x19, [sp, #(EXCEPTION_CONTEXT_X18_X19)] - stp x20, x21, [sp, #(EXCEPTION_CONTEXT_X20_X21)] - mrs x19, sp_el0 - mrs x20, elr_el1 - mrs x21, spsr_el1 - mov w21, w21 - - stp x30, x19, [sp, #(EXCEPTION_CONTEXT_X30_SP)] - stp x20, x21, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] - - /* Invoke the FPU context switch handler. */ - ldr x18, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_CUR_THREAD)] - bl _ZN3ams4kern4arch5arm6423FpuContextSwitchHandlerEv - - /* Restore registers that we saved. */ - ldp x30, x19, [sp, #(EXCEPTION_CONTEXT_X30_SP)] - ldp x20, x21, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] - - msr sp_el0, x19 - msr elr_el1, x20 - msr spsr_el1, x21 + ENABLE_AND_RESTORE_FPU(x2, x0, x1, w0, w1, 1, 2) + /* Restore registers. */ ldp x0, x1, [sp, #(EXCEPTION_CONTEXT_X0_X1)] ldp x2, x3, [sp, #(EXCEPTION_CONTEXT_X2_X3)] - ldp x4, x5, [sp, #(EXCEPTION_CONTEXT_X4_X5)] - ldp x6, x7, [sp, #(EXCEPTION_CONTEXT_X6_X7)] - ldp x8, x9, [sp, #(EXCEPTION_CONTEXT_X8_X9)] - ldp x10, x11, [sp, #(EXCEPTION_CONTEXT_X10_X11)] - ldp x12, x13, [sp, #(EXCEPTION_CONTEXT_X12_X13)] - ldp x14, x15, [sp, #(EXCEPTION_CONTEXT_X14_X15)] - ldp x16, x17, [sp, #(EXCEPTION_CONTEXT_X16_X17)] - ldp x18, x19, [sp, #(EXCEPTION_CONTEXT_X18_X19)] - ldp x20, x21, [sp, #(EXCEPTION_CONTEXT_X20_X21)] add sp, sp, #(EXCEPTION_CONTEXT_SIZE) @@ -585,6 +640,17 @@ _ZN3ams4kern4arch5arm6421EL0SystemErrorHandlerEv: mov x0, sp bl _ZN3ams4kern4arch5arm6415HandleExceptionEPNS2_17KExceptionContextE + /* If we don't need to restore the fpu, skip restoring it. */ + ldrb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + tbz w1, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_CONTEXT_RESTORE_NEEDED), 2f + + /* Clear the needs-fpu-restore flag. */ + and w1, w1, #(~THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w1, [sp, #(EXCEPTION_CONTEXT_SIZE + THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* Enable and restore the fpu. */ + ENABLE_AND_RESTORE_FPU(x2, x0, x1, w0, w1, 1, 2) + /* Restore state from the context. */ ldp x30, x20, [sp, #(EXCEPTION_CONTEXT_X30_SP)] ldp x21, x22, [sp, #(EXCEPTION_CONTEXT_PC_PSR)] diff --git a/mesosphere/kernel/source/arch/arm64/kern_k_scheduler_asm.s b/mesosphere/kernel/source/arch/arm64/kern_k_scheduler_asm.s index 3c0d2ccbb..fdc327e21 100644 --- a/mesosphere/kernel/source/arch/arm64/kern_k_scheduler_asm.s +++ b/mesosphere/kernel/source/arch/arm64/kern_k_scheduler_asm.s @@ -13,91 +13,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include - -#define SAVE_THREAD_CONTEXT(ctx, tmp0, tmp1, done_label) \ - /* Save the callee save registers + SP and cpacr. */ \ - mov tmp0, sp; \ - mrs tmp1, cpacr_el1; \ - stp x19, x20, [ctx, #(THREAD_CONTEXT_X19_X20)]; \ - stp x21, x22, [ctx, #(THREAD_CONTEXT_X21_X22)]; \ - stp x23, x24, [ctx, #(THREAD_CONTEXT_X23_X24)]; \ - stp x25, x26, [ctx, #(THREAD_CONTEXT_X25_X26)]; \ - stp x27, x28, [ctx, #(THREAD_CONTEXT_X27_X28)]; \ - stp x29, x30, [ctx, #(THREAD_CONTEXT_X29_X30)]; \ - \ - stp tmp0, tmp1, [ctx, #(THREAD_CONTEXT_SP_CPACR)]; \ - \ - /* Check whether the FPU is enabled. */ \ - /* If it isn't, skip saving FPU state. */ \ - and tmp1, tmp1, #0x300000; \ - cbz tmp1, done_label; \ - \ - /* Save fpcr and fpsr. */ \ - mrs tmp0, fpcr; \ - mrs tmp1, fpsr; \ - stp tmp0, tmp1, [ctx, #(THREAD_CONTEXT_FPCR_FPSR)]; \ - \ - /* Save the FPU registers. */ \ - stp q0, q1, [ctx, #(16 * 0 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q2, q3, [ctx, #(16 * 2 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q4, q5, [ctx, #(16 * 4 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q6, q7, [ctx, #(16 * 6 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q8, q9, [ctx, #(16 * 8 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q10, q11, [ctx, #(16 * 10 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q12, q13, [ctx, #(16 * 12 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q14, q15, [ctx, #(16 * 14 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q16, q17, [ctx, #(16 * 16 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q18, q19, [ctx, #(16 * 18 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q20, q21, [ctx, #(16 * 20 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q22, q23, [ctx, #(16 * 22 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q24, q25, [ctx, #(16 * 24 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q26, q27, [ctx, #(16 * 26 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q28, q29, [ctx, #(16 * 28 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - stp q30, q31, [ctx, #(16 * 30 + THREAD_CONTEXT_FPU_REGISTERS)]; - -#define RESTORE_THREAD_CONTEXT(ctx, tmp0, tmp1, done_label) \ - /* Restore the callee save registers + SP and cpacr. */ \ - ldp tmp0, tmp1, [ctx, #(THREAD_CONTEXT_SP_CPACR)]; \ - mov sp, tmp0; \ - ldp x19, x20, [ctx, #(THREAD_CONTEXT_X19_X20)]; \ - ldp x21, x22, [ctx, #(THREAD_CONTEXT_X21_X22)]; \ - ldp x23, x24, [ctx, #(THREAD_CONTEXT_X23_X24)]; \ - ldp x25, x26, [ctx, #(THREAD_CONTEXT_X25_X26)]; \ - ldp x27, x28, [ctx, #(THREAD_CONTEXT_X27_X28)]; \ - ldp x29, x30, [ctx, #(THREAD_CONTEXT_X29_X30)]; \ - \ - msr cpacr_el1, tmp1; \ - isb; \ - \ - /* Check whether the FPU is enabled. */ \ - /* If it isn't, skip saving FPU state. */ \ - and tmp1, tmp1, #0x300000; \ - cbz tmp1, done_label; \ - \ - /* Save fpcr and fpsr. */ \ - ldp tmp0, tmp1, [ctx, #(THREAD_CONTEXT_FPCR_FPSR)]; \ - msr fpcr, tmp0; \ - msr fpsr, tmp1; \ - \ - /* Save the FPU registers. */ \ - ldp q0, q1, [ctx, #(16 * 0 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q2, q3, [ctx, #(16 * 2 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q4, q5, [ctx, #(16 * 4 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q6, q7, [ctx, #(16 * 6 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q8, q9, [ctx, #(16 * 8 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q10, q11, [ctx, #(16 * 10 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q12, q13, [ctx, #(16 * 12 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q14, q15, [ctx, #(16 * 14 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q16, q17, [ctx, #(16 * 16 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q18, q19, [ctx, #(16 * 18 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q20, q21, [ctx, #(16 * 20 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q22, q23, [ctx, #(16 * 22 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q24, q25, [ctx, #(16 * 24 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q26, q27, [ctx, #(16 * 26 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q28, q29, [ctx, #(16 * 28 + THREAD_CONTEXT_FPU_REGISTERS)]; \ - ldp q30, q31, [ctx, #(16 * 30 + THREAD_CONTEXT_FPU_REGISTERS)]; - +#include /* ams::kern::KScheduler::ScheduleImpl() */ .section .text._ZN3ams4kern10KScheduler12ScheduleImplEv, "ax", %progbits @@ -138,21 +54,107 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv: /* Get a reference to the current thread's stack parameters. */ add x2, sp, #0x1000 and x2, x2, #~(0x1000-1) + sub x2, x2, #(THREAD_STACK_PARAMETERS_SIZE) - /* Check if the thread has terminated. We can do this by checking the DPC flags for DpcFlag_Terminated. */ - ldurb w3, [x2, #-(THREAD_STACK_PARAMETERS_SIZE - THREAD_STACK_PARAMETERS_DPC_FLAGS)] - tbnz w3, #1, 3f + /* Get a reference to the current thread's context. */ + add x3, x2, #(THREAD_STACK_PARAMETERS_THREAD_CONTEXT) - /* The current thread hasn't terminated, so we want to save its context. */ - ldur x2, [x2, #-(THREAD_STACK_PARAMETERS_SIZE - THREAD_STACK_PARAMETERS_CONTEXT)] - SAVE_THREAD_CONTEXT(x2, x4, x5, 2f) + /* Save the callee-save registers + SP. */ + stp x19, x20, [x3, #(THREAD_CONTEXT_X19_X20)] + stp x21, x22, [x3, #(THREAD_CONTEXT_X21_X22)] + stp x23, x24, [x3, #(THREAD_CONTEXT_X23_X24)] + stp x25, x26, [x3, #(THREAD_CONTEXT_X25_X26)] + stp x27, x28, [x3, #(THREAD_CONTEXT_X27_X28)] + stp x29, x30, [x3, #(THREAD_CONTEXT_X29_X30)] -2: /* We're done saving this thread's context, so we need to unlock it. */ - /* We can just do an atomic write to the relevant KThreadContext member. */ - add x2, x2, #(THREAD_CONTEXT_LOCKED) - stlrb wzr, [x2] + mov x4, sp + str x4, [x3, #(THREAD_CONTEXT_SP)] -3: /* The current thread's context has been entirely taken care of. */ + /* Check if the fpu is enabled; if it is, we need to save it. */ + mrs x5, cpacr_el1 + and x4, x5, #0x300000 + cbz x4, 8f + + /* We need to save the fpu state; save fpsr/fpcr. */ + mrs x4, fpcr + mrs x6, fpsr + stp w4, w6, [x3, #(THREAD_CONTEXT_FPCR_FPSR)] + + /* Set fpu-restore-needed in our exception flags. */ + ldrb w4, [x2, #(THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + orr w4, w4, #(THREAD_EXCEPTION_FLAG_IS_FPU_CONTEXT_RESTORE_NEEDED) + strb w4, [x2, #(THREAD_STACK_PARAMETERS_EXCEPTION_FLAGS)] + + /* We need to save fpu state based on whether we're a 64-bit or 32-bit thread. */ + tbz w4, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_FPU_64_BIT), 4f + + /* We have a 64-bit fpu. */ + + /* If we're in a usermode exception, we need to save the caller-save fpu registers. */ + tbz w4, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_USERMODE_EXCEPTION_HANDLER), 2f + + /* If we're in an SVC (and not a usermode exception), we only need to save the callee-save fpu registers. */ + tbz w4, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_CALLING_SVC), 3f + +2: /* Save the 64-bit caller-save fpu registers. */ + ldr x6, [x2, #(THREAD_STACK_PARAMETERS_CALLER_SAVE_FPU_REGISTERS)] + stp q0, q1, [x6, #(THREAD_FPU64_CONTEXT_Q0_Q1)] + stp q2, q3, [x6, #(THREAD_FPU64_CONTEXT_Q2_Q3)] + stp q4, q5, [x6, #(THREAD_FPU64_CONTEXT_Q4_Q5)] + stp q6, q7, [x6, #(THREAD_FPU64_CONTEXT_Q6_Q7)] + stp q16, q17, [x6, #(THREAD_FPU64_CONTEXT_Q16_Q17)] + stp q18, q19, [x6, #(THREAD_FPU64_CONTEXT_Q18_Q19)] + stp q20, q21, [x6, #(THREAD_FPU64_CONTEXT_Q20_Q21)] + stp q22, q23, [x6, #(THREAD_FPU64_CONTEXT_Q22_Q23)] + stp q24, q25, [x6, #(THREAD_FPU64_CONTEXT_Q24_Q25)] + stp q26, q27, [x6, #(THREAD_FPU64_CONTEXT_Q26_Q27)] + stp q28, q29, [x6, #(THREAD_FPU64_CONTEXT_Q28_Q29)] + stp q30, q31, [x6, #(THREAD_FPU64_CONTEXT_Q30_Q31)] + +3: /* Save the 64-bit callee-save fpu registers. */ + stp q8, q9, [x3, #(THREAD_CONTEXT_FPU64_Q8_Q9)] + stp q10, q11, [x3, #(THREAD_CONTEXT_FPU64_Q10_Q11)] + stp q12, q13, [x3, #(THREAD_CONTEXT_FPU64_Q12_Q13)] + stp q14, q15, [x3, #(THREAD_CONTEXT_FPU64_Q14_Q15)] + b 7f + +4: /* We have a 32-bit fpu. */ + + /* If we're in a usermode exception, we need to save the caller-save fpu registers. */ + tbz w4, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_IN_USERMODE_EXCEPTION_HANDLER), 5f + + /* If we're in an SVC (and not a usermode exception), we only need to save the callee-save fpu registers. */ + tbz w4, #(THREAD_EXCEPTION_FLAG_BIT_INDEX_IS_CALLING_SVC), 6f + +5: /* Save the 32-bit caller-save fpu registers. */ + ldr x6, [x2, #(THREAD_STACK_PARAMETERS_CALLER_SAVE_FPU_REGISTERS)] + stp q0, q1, [x6, #(THREAD_FPU32_CONTEXT_Q0_Q1)] + stp q2, q3, [x6, #(THREAD_FPU32_CONTEXT_Q2_Q3)] + stp q8, q9, [x6, #(THREAD_FPU32_CONTEXT_Q8_Q9)] + stp q10, q11, [x6, #(THREAD_FPU32_CONTEXT_Q10_Q11)] + stp q12, q13, [x6, #(THREAD_FPU32_CONTEXT_Q12_Q13)] + stp q14, q15, [x6, #(THREAD_FPU32_CONTEXT_Q14_Q15)] + +6: /* Save the 32-bit callee-save fpu registers. */ + stp q4, q5, [x3, #(THREAD_CONTEXT_FPU32_Q4_Q5)] + stp q6, q7, [x3, #(THREAD_CONTEXT_FPU32_Q6_Q7)] + +7: /* With the fpu state saved, disable the fpu. */ + and x5, x5, #(~0x300000) + msr cpacr_el1, x5 + +8: /* We're done saving this thread's context. */ + + /* Check if the thread is terminated by checking the DPC flags for DpcFlag_Terminated. */ + ldrb w4, [x2, #(THREAD_STACK_PARAMETERS_DPC_FLAGS)] + tbnz w4, #1, 9f + + /* The thread isn't terminated, so we want to unlock it. */ + /* Write atomically to the context's locked member. */ + add x3, x3, #(THREAD_CONTEXT_LOCKED) + stlrb wzr, [x3] + +9: /* The current thread's context has been entirely taken care of. */ /* Now we want to loop until we successfully switch the thread context. */ /* Start by saving all the values we care about in callee-save registers. */ mov x19, x0 /* this */ @@ -162,54 +164,54 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv: /* Set our stack to the idle thread stack. */ ldr x3, [x20, #(KSCHEDULER_IDLE_THREAD_STACK)] mov sp, x3 - b 5f + b 11f -4: /* We failed to successfully do the context switch, and need to retry. */ +10: /* We failed to successfully do the context switch, and need to retry. */ /* Clear the exclusive monitor. */ clrex /* Clear the need's scheduling bool (and dmb ish after, as it's an atomic). */ - /* TODO: Should this be a stlrb? Nintendo does not do one. */ strb wzr, [x20] dmb ish /* Refresh the highest priority thread. */ ldr x21, [x20, #(KSCHEDULER_HIGHEST_PRIORITY_THREAD)] -5: /* We're starting to try to do the context switch. */ +11: /* We're starting to try to do the context switch. */ /* Check if the highest priority thread if null. */ /* If it is, we want to branch to a special idle thread loop. */ - cbz x21, 11f + cbz x21, 16f /* Get the highest priority thread's context, and save it. */ /* ams::kern::KThread::GetContextForSchedulerLoop() */ - add x22, x21, #(THREAD_THREAD_CONTEXT) + ldr x22, [x21, #(THREAD_KERNEL_STACK_TOP)] + sub x22, x22, #(THREAD_STACK_PARAMETERS_SIZE - THREAD_STACK_PARAMETERS_THREAD_CONTEXT) /* Prepare to try to acquire the context lock. */ add x1, x22, #(THREAD_CONTEXT_LOCKED) mov w2, #1 -6: /* We want to try to lock the highest priority thread's context. */ +12: /* We want to try to lock the highest priority thread's context. */ /* Check if the lock is already held. */ ldaxrb w3, [x1] - cbnz w3, 7f + cbnz w3, 13f /* If it's not, try to take it. */ stxrb w3, w2, [x1] - cbnz w3, 6b + cbnz w3, 12b /* We hold the lock, so we can now switch the thread. */ - b 8f + b 14f -7: /* The highest priority thread's context is already locked. */ +13: /* The highest priority thread's context is already locked. */ /* Check if we need scheduling. If we don't, we can retry directly. */ ldarb w3, [x20] // ldarb w3, [x20, #(KSCHEDULER_NEEDS_SCHEDULING)] - cbz w3, 6b + cbz w3, 12b /* If we do, another core is interfering, and we must start from the top. */ - b 4b + b 10b -8: /* It's time to switch the thread. */ +14: /* It's time to switch the thread. */ /* Switch to the highest priority thread. */ mov x0, x19 mov x1, x21 @@ -219,22 +221,30 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv: /* Check if we need scheduling. If we don't, then we can't complete the switch and should retry. */ ldarb w1, [x20] // ldarb w1, [x20, #(KSCHEDULER_NEEDS_SCHEDULING)] - cbnz w1, 10f + cbnz w1, 15f /* Restore the thread context. */ mov x0, x22 - RESTORE_THREAD_CONTEXT(x0, x1, x2, 9f) + ldp x19, x20, [x0, #(THREAD_CONTEXT_X19_X20)] + ldp x21, x22, [x0, #(THREAD_CONTEXT_X21_X22)] + ldp x23, x24, [x0, #(THREAD_CONTEXT_X23_X24)] + ldp x25, x26, [x0, #(THREAD_CONTEXT_X25_X26)] + ldp x27, x28, [x0, #(THREAD_CONTEXT_X27_X28)] + ldp x29, x30, [x0, #(THREAD_CONTEXT_X29_X30)] -9: /* We're done restoring the thread context, and can return safely. */ + ldr x1, [x0, #(THREAD_CONTEXT_SP)] + mov sp, x1 + + /* Return. */ ret -10: /* Our switch failed. */ +15: /* Our switch failed. */ /* We should unlock the thread context, and then retry. */ add x1, x22, #(THREAD_CONTEXT_LOCKED) stlrb wzr, [x1] - b 4b + b 10b -11: /* The next thread is nullptr! */ +16: /* The next thread is nullptr! */ /* Switch to nullptr. This will actually switch to the idle thread. */ mov x0, x19 mov x1, #0 @@ -242,14 +252,14 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv: /* Call ams::kern::KScheduler::SwitchThread(ams::kern::KThread *) */ bl _ZN3ams4kern10KScheduler12SwitchThreadEPNS0_7KThreadE -12: /* We've switched to the idle thread, so we want to process interrupt tasks until we schedule a non-idle thread. */ +17: /* We've switched to the idle thread, so we want to process interrupt tasks until we schedule a non-idle thread. */ /* Check whether there are runnable interrupt tasks. */ ldrb w3, [x20, #(KSCHEDULER_INTERRUPT_TASK_RUNNABLE)] - cbnz w3, 13f + cbnz w3, 18f /* Check if we need scheduling. */ ldarb w3, [x20] // ldarb w3, [x20, #(KSCHEDULER_NEEDS_SCHEDULING)] - cbnz w3, 4b + cbnz w3, 10b /* Clear the previous thread. */ str xzr, [x20, #(KSCHEDULER_PREVIOUS_THREAD)] @@ -260,9 +270,9 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv: msr daifclr, #2 msr daifset, #2 - b 12b + b 17b -13: /* We have interrupt tasks to execute! */ +18: /* We have interrupt tasks to execute! */ /* Execute any pending interrupt tasks. */ ldr x0, [x20, #(KSCHEDULER_INTERRUPT_TASK_MANAGER)] bl _ZN3ams4kern21KInterruptTaskManager7DoTasksEv @@ -271,4 +281,4 @@ _ZN3ams4kern10KScheduler12ScheduleImplEv: strb wzr, [x20, #(KSCHEDULER_INTERRUPT_TASK_RUNNABLE)] /* Retry the scheduling loop. */ - b 4b + b 10b diff --git a/mesosphere/kernel/source/arch/arm64/kern_k_thread_context_asm.s b/mesosphere/kernel/source/arch/arm64/kern_k_thread_context_asm.s index 7ea6849ec..080048598 100644 --- a/mesosphere/kernel/source/arch/arm64/kern_k_thread_context_asm.s +++ b/mesosphere/kernel/source/arch/arm64/kern_k_thread_context_asm.s @@ -87,58 +87,3 @@ _ZN3ams4kern4arch5arm6427SupervisorModeThreadStarterEv: /* This should never execute, but Nintendo includes an ERET here. */ eret - - -/* ams::kern::arch::arm64::KThreadContext::RestoreFpuRegisters64(const KThreadContext &) */ -.section .text._ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters64ERKS3_, "ax", %progbits -.global _ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters64ERKS3_ -.type _ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters64ERKS3_, %function -_ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters64ERKS3_: - /* Load and restore FPCR and FPSR from the context. */ - ldr x1, [x0, #(THREAD_CONTEXT_FPCR)] - msr fpcr, x1 - ldr x1, [x0, #(THREAD_CONTEXT_FPSR)] - msr fpsr, x1 - - /* Restore the FPU registers. */ - ldp q0, q1, [x0, #(16 * 0 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q2, q3, [x0, #(16 * 2 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q4, q5, [x0, #(16 * 4 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q6, q7, [x0, #(16 * 6 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q8, q9, [x0, #(16 * 8 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q10, q11, [x0, #(16 * 10 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q12, q13, [x0, #(16 * 12 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q14, q15, [x0, #(16 * 14 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q16, q17, [x0, #(16 * 16 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q18, q19, [x0, #(16 * 18 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q20, q21, [x0, #(16 * 20 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q22, q23, [x0, #(16 * 22 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q24, q25, [x0, #(16 * 24 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q26, q27, [x0, #(16 * 26 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q28, q29, [x0, #(16 * 28 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q30, q31, [x0, #(16 * 30 + THREAD_CONTEXT_FPU_REGISTERS)] - - ret - -/* ams::kern::arch::arm64::KThreadContext::RestoreFpuRegisters32(const KThreadContext &) */ -.section .text._ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters32ERKS3_, "ax", %progbits -.global _ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters32ERKS3_ -.type _ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters32ERKS3_, %function -_ZN3ams4kern4arch5arm6414KThreadContext21RestoreFpuRegisters32ERKS3_: - /* Load and restore FPCR and FPSR from the context. */ - ldr x1, [x0, #(THREAD_CONTEXT_FPCR)] - msr fpcr, x1 - ldr x1, [x0, #(THREAD_CONTEXT_FPSR)] - msr fpsr, x1 - - /* Restore the FPU registers. */ - ldp q0, q1, [x0, #(16 * 0 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q2, q3, [x0, #(16 * 2 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q4, q5, [x0, #(16 * 4 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q6, q7, [x0, #(16 * 6 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q8, q9, [x0, #(16 * 8 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q10, q11, [x0, #(16 * 10 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q12, q13, [x0, #(16 * 12 + THREAD_CONTEXT_FPU_REGISTERS)] - ldp q14, q15, [x0, #(16 * 14 + THREAD_CONTEXT_FPU_REGISTERS)] - - ret