From 8538fed043c30d471e2b0e7a07b2546408ca3e0c Mon Sep 17 00:00:00 2001 From: TuxSH <1922548+TuxSH@users.noreply.github.com> Date: Sun, 26 Jan 2020 15:07:26 +0000 Subject: [PATCH] thermosphere: optimize barrier and core_ctx --- thermosphere/src/asm_macros.s | 7 ++-- thermosphere/src/barrier.c | 6 ++- thermosphere/src/core_ctx.h | 57 +++++++++++++++------------- thermosphere/src/debug_pause.c | 9 ++++- thermosphere/src/exception_vectors.s | 2 + thermosphere/src/spinlock_impl.s | 2 +- thermosphere/src/start.s | 3 ++ 7 files changed, 53 insertions(+), 33 deletions(-) diff --git a/thermosphere/src/asm_macros.s b/thermosphere/src/asm_macros.s index 5668eba2b..bd605d5bd 100644 --- a/thermosphere/src/asm_macros.s +++ b/thermosphere/src/asm_macros.s @@ -16,9 +16,10 @@ #define EXCEP_STACK_FRAME_SIZE 0x140 -#define CORECTX_GUEST_FRAME_OFFSET 0x000 -#define CORECTX_SCRATCH_OFFSET 0x008 -#define CORECTX_CRASH_STACK_OFFSET 0x010 +#define CORECTX_CRASH_STACK_OFFSET 0x000 +#define CORECTX_GUEST_FRAME_OFFSET 0x040 +#define CORECTX_SCRATCH_OFFSET 0x048 + .macro FUNCTION name .section .text.\name, "ax", %progbits diff --git a/thermosphere/src/barrier.c b/thermosphere/src/barrier.c index 0ba1528a7..bc191f521 100644 --- a/thermosphere/src/barrier.c +++ b/thermosphere/src/barrier.c @@ -36,5 +36,9 @@ void barrierInitAll(Barrier *barrier) void barrierWait(Barrier *barrier) { - while (atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId))) != 0); + atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId))); + __sev(); + do { + __wfe(); + } while (atomic_load(&barrier->val) != 0); } diff --git a/thermosphere/src/core_ctx.h b/thermosphere/src/core_ctx.h index 2f17c6c09..974688d1c 100644 --- a/thermosphere/src/core_ctx.h +++ b/thermosphere/src/core_ctx.h @@ -22,39 +22,44 @@ #include "execute_function.h" struct ExceptionStackFrame; -typedef struct CoreCtx { - struct ExceptionStackFrame *guestFrame; // @0x00 - u64 scratch; // @0x08 - u8 *crashStack; // @0x10 - u64 kernelArgument; // @0x18 - uintptr_t kernelEntrypoint; // @0x20 - u32 coreId; // @0x28 - u8 gicInterfaceMask; // @0x2C. Equal to BIT(coreId) anyway - bool isBootCore; // @0x2D - bool warmboot; // @0x2E +typedef struct ALIGN(64) CoreCtx { + // Most likely only just read (assume cache line size of at most 64 bytes): - // Timer stuff - u64 totalTimeInHypervisor; // @0x30. cntvoff_el2 is updated to that value. - u64 emulPtimerCval; // @0x38. When setting cntp_cval_el0 and on interrupt - - // "Execute function" - ExecutedFunction executedFunction; // @0x40 - void *executedFunctionArgs; // @0x48 - Barrier executedFunctionBarrier; // @0x50 - u32 executedFunctionSrcCore; // @0x54 - bool executedFunctionSync; // @0x58. Receiver fills it + u8 *crashStack; // @0x00 + u64 kernelArgument; // @0x08 + uintptr_t kernelEntrypoint; // @0x10 + u32 coreId; // @0x18 + u8 gicInterfaceMask; // @0x1C. Equal to BIT(coreId) anyway + bool isBootCore; // @0x1D + bool warmboot; // @0x1E // Debug features - bool wasPaused; // @0x59 + bool wasPaused; // @0x1F + + // Most likely written to: + + ALIGN(64) struct ExceptionStackFrame *guestFrame; // @0x40 + u64 scratch; // @0x48 + + // Timer stuff + u64 totalTimeInHypervisor; // @0x50. cntvoff_el2 is updated to that value. + u64 emulPtimerCval; // @0x58. When setting cntp_cval_el0 and on interrupt + + // "Execute function" + ExecutedFunction executedFunction; // @0x60 + void *executedFunctionArgs; // @0x68 + Barrier executedFunctionBarrier; // @0x70 + u32 executedFunctionSrcCore; // @0x74 + bool executedFunctionSync; // @0x78. Receiver fills it // Cache stuff - u32 setWayCounter; // @0x5C + u32 setWayCounter; // @0x7C } CoreCtx; -static_assert(offsetof(CoreCtx, warmboot) == 0x2E, "Wrong definition for CoreCtx"); -static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x38, "Wrong definition for CoreCtx"); -static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x58, "Wrong definition for CoreCtx"); -static_assert(offsetof(CoreCtx, setWayCounter) == 0x5C, "Wrong definition for CoreCtx"); +static_assert(offsetof(CoreCtx, warmboot) == 0x1E, "Wrong definition for CoreCtx"); +static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x58, "Wrong definition for CoreCtx"); +static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x78, "Wrong definition for CoreCtx"); +static_assert(offsetof(CoreCtx, setWayCounter) == 0x7C, "Wrong definition for CoreCtx"); extern CoreCtx g_coreCtxs[4]; register CoreCtx *currentCoreCtx asm("x18"); diff --git a/thermosphere/src/debug_pause.c b/thermosphere/src/debug_pause.c index 683917992..e3714ba91 100644 --- a/thermosphere/src/debug_pause.c +++ b/thermosphere/src/debug_pause.c @@ -23,8 +23,8 @@ #include "single_step.h" static Barrier g_debugPauseBarrier; -static atomic_uint g_debugPausePausedCoreList; -static atomic_uint g_debugPauseSingleStepCoreList; +static ALIGN(64) atomic_uint g_debugPausePausedCoreList; +static atomic_uint g_debugPauseSingleStepCoreList; // TODO: put this variable on the same cache line as the above static inline void debugSetThisCorePaused(void) { @@ -40,6 +40,7 @@ void debugPauseSgiHandler(void) void debugPauseWaitAndUpdateSingleStep(void) { u32 coreId = currentCoreCtx->coreId; + __builtin_prefetch(&g_debugPausePausedCoreList, 0, 3); if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) { unmaskIrq(); do { @@ -64,6 +65,8 @@ void debugPauseCores(u32 coreList) { maskIrq(); + __builtin_prefetch(&g_debugPausePausedCoreList, 1, 3); + u32 desiredList = coreList; u32 remainingList = coreList; u32 readList = atomic_load(&g_debugPausePausedCoreList); @@ -91,6 +94,8 @@ void debugUnpauseCores(u32 coreList, u32 singleStepList) { singleStepList &= coreList; + __builtin_prefetch(&g_debugPausePausedCoreList, 1, 0); + // Since we're using a debugger lock, a simple stlr should be fine... atomic_store(&g_debugPauseSingleStepCoreList, singleStepList); atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList); diff --git a/thermosphere/src/exception_vectors.s b/thermosphere/src/exception_vectors.s index e13abba44..5572a2393 100644 --- a/thermosphere/src/exception_vectors.s +++ b/thermosphere/src/exception_vectors.s @@ -104,6 +104,8 @@ vector_entry \name .if \type == EXCEPTION_TYPE_GUEST ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE] + prfm pldl1keep, [x18] + prfm pstl1keep, [x18, #0x40] str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET] mov w1, #1 .else diff --git a/thermosphere/src/spinlock_impl.s b/thermosphere/src/spinlock_impl.s index 86bdde4c8..2382d087c 100644 --- a/thermosphere/src/spinlock_impl.s +++ b/thermosphere/src/spinlock_impl.s @@ -42,7 +42,7 @@ END_FUNCTION FUNCTION spinlockTryLock mov x1, x0 mov w2, #1 - prfm pstl1strm, [x1] + prfm pstl1keep, [x1] 1: ldaxr w0, [x1] cbnz w0, 2f diff --git a/thermosphere/src/start.s b/thermosphere/src/start.s index ae9249177..ec33bfc51 100644 --- a/thermosphere/src/start.s +++ b/thermosphere/src/start.s @@ -97,6 +97,9 @@ _postMmuEnableReturnAddr: mov x1, x20 bl thermosphereMain + prfm pldl1keep, [x18] + prfm pstl1keep, [x18, #0x40] + dsb sy isb