thermosphere: optimize barrier and core_ctx

This commit is contained in:
TuxSH 2020-01-26 15:07:26 +00:00
parent 1f2b8e7918
commit 8538fed043
7 changed files with 53 additions and 33 deletions

View file

@ -16,9 +16,10 @@
#define EXCEP_STACK_FRAME_SIZE 0x140
#define CORECTX_GUEST_FRAME_OFFSET 0x000
#define CORECTX_SCRATCH_OFFSET 0x008
#define CORECTX_CRASH_STACK_OFFSET 0x010
#define CORECTX_CRASH_STACK_OFFSET 0x000
#define CORECTX_GUEST_FRAME_OFFSET 0x040
#define CORECTX_SCRATCH_OFFSET 0x048
.macro FUNCTION name
.section .text.\name, "ax", %progbits

View file

@ -36,5 +36,9 @@ void barrierInitAll(Barrier *barrier)
void barrierWait(Barrier *barrier)
{
while (atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId))) != 0);
atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId)));
__sev();
do {
__wfe();
} while (atomic_load(&barrier->val) != 0);
}

View file

@ -22,39 +22,44 @@
#include "execute_function.h"
struct ExceptionStackFrame;
typedef struct CoreCtx {
struct ExceptionStackFrame *guestFrame; // @0x00
u64 scratch; // @0x08
u8 *crashStack; // @0x10
u64 kernelArgument; // @0x18
uintptr_t kernelEntrypoint; // @0x20
u32 coreId; // @0x28
u8 gicInterfaceMask; // @0x2C. Equal to BIT(coreId) anyway
bool isBootCore; // @0x2D
bool warmboot; // @0x2E
typedef struct ALIGN(64) CoreCtx {
// Most likely only just read (assume cache line size of at most 64 bytes):
// Timer stuff
u64 totalTimeInHypervisor; // @0x30. cntvoff_el2 is updated to that value.
u64 emulPtimerCval; // @0x38. When setting cntp_cval_el0 and on interrupt
// "Execute function"
ExecutedFunction executedFunction; // @0x40
void *executedFunctionArgs; // @0x48
Barrier executedFunctionBarrier; // @0x50
u32 executedFunctionSrcCore; // @0x54
bool executedFunctionSync; // @0x58. Receiver fills it
u8 *crashStack; // @0x00
u64 kernelArgument; // @0x08
uintptr_t kernelEntrypoint; // @0x10
u32 coreId; // @0x18
u8 gicInterfaceMask; // @0x1C. Equal to BIT(coreId) anyway
bool isBootCore; // @0x1D
bool warmboot; // @0x1E
// Debug features
bool wasPaused; // @0x59
bool wasPaused; // @0x1F
// Most likely written to:
ALIGN(64) struct ExceptionStackFrame *guestFrame; // @0x40
u64 scratch; // @0x48
// Timer stuff
u64 totalTimeInHypervisor; // @0x50. cntvoff_el2 is updated to that value.
u64 emulPtimerCval; // @0x58. When setting cntp_cval_el0 and on interrupt
// "Execute function"
ExecutedFunction executedFunction; // @0x60
void *executedFunctionArgs; // @0x68
Barrier executedFunctionBarrier; // @0x70
u32 executedFunctionSrcCore; // @0x74
bool executedFunctionSync; // @0x78. Receiver fills it
// Cache stuff
u32 setWayCounter; // @0x5C
u32 setWayCounter; // @0x7C
} CoreCtx;
static_assert(offsetof(CoreCtx, warmboot) == 0x2E, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x38, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x58, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, setWayCounter) == 0x5C, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, warmboot) == 0x1E, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x58, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x78, "Wrong definition for CoreCtx");
static_assert(offsetof(CoreCtx, setWayCounter) == 0x7C, "Wrong definition for CoreCtx");
extern CoreCtx g_coreCtxs[4];
register CoreCtx *currentCoreCtx asm("x18");

View file

@ -23,8 +23,8 @@
#include "single_step.h"
static Barrier g_debugPauseBarrier;
static atomic_uint g_debugPausePausedCoreList;
static atomic_uint g_debugPauseSingleStepCoreList;
static ALIGN(64) atomic_uint g_debugPausePausedCoreList;
static atomic_uint g_debugPauseSingleStepCoreList; // TODO: put this variable on the same cache line as the above
static inline void debugSetThisCorePaused(void)
{
@ -40,6 +40,7 @@ void debugPauseSgiHandler(void)
void debugPauseWaitAndUpdateSingleStep(void)
{
u32 coreId = currentCoreCtx->coreId;
__builtin_prefetch(&g_debugPausePausedCoreList, 0, 3);
if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) {
unmaskIrq();
do {
@ -64,6 +65,8 @@ void debugPauseCores(u32 coreList)
{
maskIrq();
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 3);
u32 desiredList = coreList;
u32 remainingList = coreList;
u32 readList = atomic_load(&g_debugPausePausedCoreList);
@ -91,6 +94,8 @@ void debugUnpauseCores(u32 coreList, u32 singleStepList)
{
singleStepList &= coreList;
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 0);
// Since we're using a debugger lock, a simple stlr should be fine...
atomic_store(&g_debugPauseSingleStepCoreList, singleStepList);
atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList);

View file

@ -104,6 +104,8 @@ vector_entry \name
.if \type == EXCEPTION_TYPE_GUEST
ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE]
prfm pldl1keep, [x18]
prfm pstl1keep, [x18, #0x40]
str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET]
mov w1, #1
.else

View file

@ -42,7 +42,7 @@ END_FUNCTION
FUNCTION spinlockTryLock
mov x1, x0
mov w2, #1
prfm pstl1strm, [x1]
prfm pstl1keep, [x1]
1:
ldaxr w0, [x1]
cbnz w0, 2f

View file

@ -97,6 +97,9 @@ _postMmuEnableReturnAddr:
mov x1, x20
bl thermosphereMain
prfm pldl1keep, [x18]
prfm pstl1keep, [x18, #0x40]
dsb sy
isb