mirror of
https://github.com/Atmosphere-NX/Atmosphere
synced 2025-01-22 06:36:10 +00:00
thermosphere: optimize barrier and core_ctx
This commit is contained in:
parent
b168b0c2eb
commit
5dc54d8764
7 changed files with 53 additions and 33 deletions
|
@ -16,9 +16,10 @@
|
|||
|
||||
#define EXCEP_STACK_FRAME_SIZE 0x140
|
||||
|
||||
#define CORECTX_GUEST_FRAME_OFFSET 0x000
|
||||
#define CORECTX_SCRATCH_OFFSET 0x008
|
||||
#define CORECTX_CRASH_STACK_OFFSET 0x010
|
||||
#define CORECTX_CRASH_STACK_OFFSET 0x000
|
||||
#define CORECTX_GUEST_FRAME_OFFSET 0x040
|
||||
#define CORECTX_SCRATCH_OFFSET 0x048
|
||||
|
||||
|
||||
.macro FUNCTION name
|
||||
.section .text.\name, "ax", %progbits
|
||||
|
|
|
@ -36,5 +36,9 @@ void barrierInitAll(Barrier *barrier)
|
|||
|
||||
void barrierWait(Barrier *barrier)
|
||||
{
|
||||
while (atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId))) != 0);
|
||||
atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId)));
|
||||
__sev();
|
||||
do {
|
||||
__wfe();
|
||||
} while (atomic_load(&barrier->val) != 0);
|
||||
}
|
||||
|
|
|
@ -22,39 +22,44 @@
|
|||
#include "execute_function.h"
|
||||
|
||||
struct ExceptionStackFrame;
|
||||
typedef struct CoreCtx {
|
||||
struct ExceptionStackFrame *guestFrame; // @0x00
|
||||
u64 scratch; // @0x08
|
||||
u8 *crashStack; // @0x10
|
||||
u64 kernelArgument; // @0x18
|
||||
uintptr_t kernelEntrypoint; // @0x20
|
||||
u32 coreId; // @0x28
|
||||
u8 gicInterfaceMask; // @0x2C. Equal to BIT(coreId) anyway
|
||||
bool isBootCore; // @0x2D
|
||||
bool warmboot; // @0x2E
|
||||
typedef struct ALIGN(64) CoreCtx {
|
||||
// Most likely only just read (assume cache line size of at most 64 bytes):
|
||||
|
||||
// Timer stuff
|
||||
u64 totalTimeInHypervisor; // @0x30. cntvoff_el2 is updated to that value.
|
||||
u64 emulPtimerCval; // @0x38. When setting cntp_cval_el0 and on interrupt
|
||||
|
||||
// "Execute function"
|
||||
ExecutedFunction executedFunction; // @0x40
|
||||
void *executedFunctionArgs; // @0x48
|
||||
Barrier executedFunctionBarrier; // @0x50
|
||||
u32 executedFunctionSrcCore; // @0x54
|
||||
bool executedFunctionSync; // @0x58. Receiver fills it
|
||||
u8 *crashStack; // @0x00
|
||||
u64 kernelArgument; // @0x08
|
||||
uintptr_t kernelEntrypoint; // @0x10
|
||||
u32 coreId; // @0x18
|
||||
u8 gicInterfaceMask; // @0x1C. Equal to BIT(coreId) anyway
|
||||
bool isBootCore; // @0x1D
|
||||
bool warmboot; // @0x1E
|
||||
|
||||
// Debug features
|
||||
bool wasPaused; // @0x59
|
||||
bool wasPaused; // @0x1F
|
||||
|
||||
// Most likely written to:
|
||||
|
||||
ALIGN(64) struct ExceptionStackFrame *guestFrame; // @0x40
|
||||
u64 scratch; // @0x48
|
||||
|
||||
// Timer stuff
|
||||
u64 totalTimeInHypervisor; // @0x50. cntvoff_el2 is updated to that value.
|
||||
u64 emulPtimerCval; // @0x58. When setting cntp_cval_el0 and on interrupt
|
||||
|
||||
// "Execute function"
|
||||
ExecutedFunction executedFunction; // @0x60
|
||||
void *executedFunctionArgs; // @0x68
|
||||
Barrier executedFunctionBarrier; // @0x70
|
||||
u32 executedFunctionSrcCore; // @0x74
|
||||
bool executedFunctionSync; // @0x78. Receiver fills it
|
||||
|
||||
// Cache stuff
|
||||
u32 setWayCounter; // @0x5C
|
||||
u32 setWayCounter; // @0x7C
|
||||
} CoreCtx;
|
||||
|
||||
static_assert(offsetof(CoreCtx, warmboot) == 0x2E, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x38, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x58, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, setWayCounter) == 0x5C, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, warmboot) == 0x1E, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x58, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x78, "Wrong definition for CoreCtx");
|
||||
static_assert(offsetof(CoreCtx, setWayCounter) == 0x7C, "Wrong definition for CoreCtx");
|
||||
|
||||
extern CoreCtx g_coreCtxs[4];
|
||||
register CoreCtx *currentCoreCtx asm("x18");
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
#include "single_step.h"
|
||||
|
||||
static Barrier g_debugPauseBarrier;
|
||||
static atomic_uint g_debugPausePausedCoreList;
|
||||
static atomic_uint g_debugPauseSingleStepCoreList;
|
||||
static ALIGN(64) atomic_uint g_debugPausePausedCoreList;
|
||||
static atomic_uint g_debugPauseSingleStepCoreList; // TODO: put this variable on the same cache line as the above
|
||||
|
||||
static inline void debugSetThisCorePaused(void)
|
||||
{
|
||||
|
@ -40,6 +40,7 @@ void debugPauseSgiHandler(void)
|
|||
void debugPauseWaitAndUpdateSingleStep(void)
|
||||
{
|
||||
u32 coreId = currentCoreCtx->coreId;
|
||||
__builtin_prefetch(&g_debugPausePausedCoreList, 0, 3);
|
||||
if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) {
|
||||
unmaskIrq();
|
||||
do {
|
||||
|
@ -64,6 +65,8 @@ void debugPauseCores(u32 coreList)
|
|||
{
|
||||
maskIrq();
|
||||
|
||||
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 3);
|
||||
|
||||
u32 desiredList = coreList;
|
||||
u32 remainingList = coreList;
|
||||
u32 readList = atomic_load(&g_debugPausePausedCoreList);
|
||||
|
@ -91,6 +94,8 @@ void debugUnpauseCores(u32 coreList, u32 singleStepList)
|
|||
{
|
||||
singleStepList &= coreList;
|
||||
|
||||
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 0);
|
||||
|
||||
// Since we're using a debugger lock, a simple stlr should be fine...
|
||||
atomic_store(&g_debugPauseSingleStepCoreList, singleStepList);
|
||||
atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList);
|
||||
|
|
|
@ -104,6 +104,8 @@ vector_entry \name
|
|||
|
||||
.if \type == EXCEPTION_TYPE_GUEST
|
||||
ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE]
|
||||
prfm pldl1keep, [x18]
|
||||
prfm pstl1keep, [x18, #0x40]
|
||||
str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET]
|
||||
mov w1, #1
|
||||
.else
|
||||
|
|
|
@ -42,7 +42,7 @@ END_FUNCTION
|
|||
FUNCTION spinlockTryLock
|
||||
mov x1, x0
|
||||
mov w2, #1
|
||||
prfm pstl1strm, [x1]
|
||||
prfm pstl1keep, [x1]
|
||||
1:
|
||||
ldaxr w0, [x1]
|
||||
cbnz w0, 2f
|
||||
|
|
|
@ -97,6 +97,9 @@ _postMmuEnableReturnAddr:
|
|||
mov x1, x20
|
||||
bl thermosphereMain
|
||||
|
||||
prfm pldl1keep, [x18]
|
||||
prfm pstl1keep, [x18, #0x40]
|
||||
|
||||
dsb sy
|
||||
isb
|
||||
|
||||
|
|
Loading…
Reference in a new issue