thermosphere: optimize barrier and core_ctx

This commit is contained in:
TuxSH 2020-01-26 15:07:26 +00:00
parent 1f2b8e7918
commit 8538fed043
7 changed files with 53 additions and 33 deletions

View file

@ -16,9 +16,10 @@
#define EXCEP_STACK_FRAME_SIZE 0x140 #define EXCEP_STACK_FRAME_SIZE 0x140
#define CORECTX_GUEST_FRAME_OFFSET 0x000 #define CORECTX_CRASH_STACK_OFFSET 0x000
#define CORECTX_SCRATCH_OFFSET 0x008 #define CORECTX_GUEST_FRAME_OFFSET 0x040
#define CORECTX_CRASH_STACK_OFFSET 0x010 #define CORECTX_SCRATCH_OFFSET 0x048
.macro FUNCTION name .macro FUNCTION name
.section .text.\name, "ax", %progbits .section .text.\name, "ax", %progbits

View file

@ -36,5 +36,9 @@ void barrierInitAll(Barrier *barrier)
void barrierWait(Barrier *barrier) void barrierWait(Barrier *barrier)
{ {
while (atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId))) != 0); atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId)));
__sev();
do {
__wfe();
} while (atomic_load(&barrier->val) != 0);
} }

View file

@ -22,39 +22,44 @@
#include "execute_function.h" #include "execute_function.h"
struct ExceptionStackFrame; struct ExceptionStackFrame;
typedef struct CoreCtx { typedef struct ALIGN(64) CoreCtx {
struct ExceptionStackFrame *guestFrame; // @0x00 // Most likely only just read (assume cache line size of at most 64 bytes):
u64 scratch; // @0x08
u8 *crashStack; // @0x10
u64 kernelArgument; // @0x18
uintptr_t kernelEntrypoint; // @0x20
u32 coreId; // @0x28
u8 gicInterfaceMask; // @0x2C. Equal to BIT(coreId) anyway
bool isBootCore; // @0x2D
bool warmboot; // @0x2E
// Timer stuff u8 *crashStack; // @0x00
u64 totalTimeInHypervisor; // @0x30. cntvoff_el2 is updated to that value. u64 kernelArgument; // @0x08
u64 emulPtimerCval; // @0x38. When setting cntp_cval_el0 and on interrupt uintptr_t kernelEntrypoint; // @0x10
u32 coreId; // @0x18
// "Execute function" u8 gicInterfaceMask; // @0x1C. Equal to BIT(coreId) anyway
ExecutedFunction executedFunction; // @0x40 bool isBootCore; // @0x1D
void *executedFunctionArgs; // @0x48 bool warmboot; // @0x1E
Barrier executedFunctionBarrier; // @0x50
u32 executedFunctionSrcCore; // @0x54
bool executedFunctionSync; // @0x58. Receiver fills it
// Debug features // Debug features
bool wasPaused; // @0x59 bool wasPaused; // @0x1F
// Most likely written to:
ALIGN(64) struct ExceptionStackFrame *guestFrame; // @0x40
u64 scratch; // @0x48
// Timer stuff
u64 totalTimeInHypervisor; // @0x50. cntvoff_el2 is updated to that value.
u64 emulPtimerCval; // @0x58. When setting cntp_cval_el0 and on interrupt
// "Execute function"
ExecutedFunction executedFunction; // @0x60
void *executedFunctionArgs; // @0x68
Barrier executedFunctionBarrier; // @0x70
u32 executedFunctionSrcCore; // @0x74
bool executedFunctionSync; // @0x78. Receiver fills it
// Cache stuff // Cache stuff
u32 setWayCounter; // @0x5C u32 setWayCounter; // @0x7C
} CoreCtx; } CoreCtx;
static_assert(offsetof(CoreCtx, warmboot) == 0x2E, "Wrong definition for CoreCtx"); static_assert(offsetof(CoreCtx, warmboot) == 0x1E, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x38, "Wrong definition for CoreCtx"); static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x58, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x58, "Wrong definition for CoreCtx"); static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x78, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, setWayCounter) == 0x5C, "Wrong definition for CoreCtx"); static_assert(offsetof(CoreCtx, setWayCounter) == 0x7C, "Wrong definition for CoreCtx");
extern CoreCtx g_coreCtxs[4]; extern CoreCtx g_coreCtxs[4];
register CoreCtx *currentCoreCtx asm("x18"); register CoreCtx *currentCoreCtx asm("x18");

View file

@ -23,8 +23,8 @@
#include "single_step.h" #include "single_step.h"
static Barrier g_debugPauseBarrier; static Barrier g_debugPauseBarrier;
static atomic_uint g_debugPausePausedCoreList; static ALIGN(64) atomic_uint g_debugPausePausedCoreList;
static atomic_uint g_debugPauseSingleStepCoreList; static atomic_uint g_debugPauseSingleStepCoreList; // TODO: put this variable on the same cache line as the above
static inline void debugSetThisCorePaused(void) static inline void debugSetThisCorePaused(void)
{ {
@ -40,6 +40,7 @@ void debugPauseSgiHandler(void)
void debugPauseWaitAndUpdateSingleStep(void) void debugPauseWaitAndUpdateSingleStep(void)
{ {
u32 coreId = currentCoreCtx->coreId; u32 coreId = currentCoreCtx->coreId;
__builtin_prefetch(&g_debugPausePausedCoreList, 0, 3);
if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) { if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) {
unmaskIrq(); unmaskIrq();
do { do {
@ -64,6 +65,8 @@ void debugPauseCores(u32 coreList)
{ {
maskIrq(); maskIrq();
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 3);
u32 desiredList = coreList; u32 desiredList = coreList;
u32 remainingList = coreList; u32 remainingList = coreList;
u32 readList = atomic_load(&g_debugPausePausedCoreList); u32 readList = atomic_load(&g_debugPausePausedCoreList);
@ -91,6 +94,8 @@ void debugUnpauseCores(u32 coreList, u32 singleStepList)
{ {
singleStepList &= coreList; singleStepList &= coreList;
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 0);
// Since we're using a debugger lock, a simple stlr should be fine... // Since we're using a debugger lock, a simple stlr should be fine...
atomic_store(&g_debugPauseSingleStepCoreList, singleStepList); atomic_store(&g_debugPauseSingleStepCoreList, singleStepList);
atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList); atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList);

View file

@ -104,6 +104,8 @@ vector_entry \name
.if \type == EXCEPTION_TYPE_GUEST .if \type == EXCEPTION_TYPE_GUEST
ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE] ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE]
prfm pldl1keep, [x18]
prfm pstl1keep, [x18, #0x40]
str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET] str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET]
mov w1, #1 mov w1, #1
.else .else

View file

@ -42,7 +42,7 @@ END_FUNCTION
FUNCTION spinlockTryLock FUNCTION spinlockTryLock
mov x1, x0 mov x1, x0
mov w2, #1 mov w2, #1
prfm pstl1strm, [x1] prfm pstl1keep, [x1]
1: 1:
ldaxr w0, [x1] ldaxr w0, [x1]
cbnz w0, 2f cbnz w0, 2f

View file

@ -97,6 +97,9 @@ _postMmuEnableReturnAddr:
mov x1, x20 mov x1, x20
bl thermosphereMain bl thermosphereMain
prfm pldl1keep, [x18]
prfm pstl1keep, [x18, #0x40]
dsb sy dsb sy
isb isb