mirror of
https://github.com/Atmosphere-NX/Atmosphere
synced 2025-01-20 13:43:35 +00:00
thermosphere: optimize barrier and core_ctx
This commit is contained in:
parent
1f2b8e7918
commit
8538fed043
7 changed files with 53 additions and 33 deletions
|
@ -16,9 +16,10 @@
|
||||||
|
|
||||||
#define EXCEP_STACK_FRAME_SIZE 0x140
|
#define EXCEP_STACK_FRAME_SIZE 0x140
|
||||||
|
|
||||||
#define CORECTX_GUEST_FRAME_OFFSET 0x000
|
#define CORECTX_CRASH_STACK_OFFSET 0x000
|
||||||
#define CORECTX_SCRATCH_OFFSET 0x008
|
#define CORECTX_GUEST_FRAME_OFFSET 0x040
|
||||||
#define CORECTX_CRASH_STACK_OFFSET 0x010
|
#define CORECTX_SCRATCH_OFFSET 0x048
|
||||||
|
|
||||||
|
|
||||||
.macro FUNCTION name
|
.macro FUNCTION name
|
||||||
.section .text.\name, "ax", %progbits
|
.section .text.\name, "ax", %progbits
|
||||||
|
|
|
@ -36,5 +36,9 @@ void barrierInitAll(Barrier *barrier)
|
||||||
|
|
||||||
void barrierWait(Barrier *barrier)
|
void barrierWait(Barrier *barrier)
|
||||||
{
|
{
|
||||||
while (atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId))) != 0);
|
atomic_fetch_and(&barrier->val, ~(BIT(currentCoreCtx->coreId)));
|
||||||
|
__sev();
|
||||||
|
do {
|
||||||
|
__wfe();
|
||||||
|
} while (atomic_load(&barrier->val) != 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,39 +22,44 @@
|
||||||
#include "execute_function.h"
|
#include "execute_function.h"
|
||||||
|
|
||||||
struct ExceptionStackFrame;
|
struct ExceptionStackFrame;
|
||||||
typedef struct CoreCtx {
|
typedef struct ALIGN(64) CoreCtx {
|
||||||
struct ExceptionStackFrame *guestFrame; // @0x00
|
// Most likely only just read (assume cache line size of at most 64 bytes):
|
||||||
u64 scratch; // @0x08
|
|
||||||
u8 *crashStack; // @0x10
|
|
||||||
u64 kernelArgument; // @0x18
|
|
||||||
uintptr_t kernelEntrypoint; // @0x20
|
|
||||||
u32 coreId; // @0x28
|
|
||||||
u8 gicInterfaceMask; // @0x2C. Equal to BIT(coreId) anyway
|
|
||||||
bool isBootCore; // @0x2D
|
|
||||||
bool warmboot; // @0x2E
|
|
||||||
|
|
||||||
// Timer stuff
|
u8 *crashStack; // @0x00
|
||||||
u64 totalTimeInHypervisor; // @0x30. cntvoff_el2 is updated to that value.
|
u64 kernelArgument; // @0x08
|
||||||
u64 emulPtimerCval; // @0x38. When setting cntp_cval_el0 and on interrupt
|
uintptr_t kernelEntrypoint; // @0x10
|
||||||
|
u32 coreId; // @0x18
|
||||||
// "Execute function"
|
u8 gicInterfaceMask; // @0x1C. Equal to BIT(coreId) anyway
|
||||||
ExecutedFunction executedFunction; // @0x40
|
bool isBootCore; // @0x1D
|
||||||
void *executedFunctionArgs; // @0x48
|
bool warmboot; // @0x1E
|
||||||
Barrier executedFunctionBarrier; // @0x50
|
|
||||||
u32 executedFunctionSrcCore; // @0x54
|
|
||||||
bool executedFunctionSync; // @0x58. Receiver fills it
|
|
||||||
|
|
||||||
// Debug features
|
// Debug features
|
||||||
bool wasPaused; // @0x59
|
bool wasPaused; // @0x1F
|
||||||
|
|
||||||
|
// Most likely written to:
|
||||||
|
|
||||||
|
ALIGN(64) struct ExceptionStackFrame *guestFrame; // @0x40
|
||||||
|
u64 scratch; // @0x48
|
||||||
|
|
||||||
|
// Timer stuff
|
||||||
|
u64 totalTimeInHypervisor; // @0x50. cntvoff_el2 is updated to that value.
|
||||||
|
u64 emulPtimerCval; // @0x58. When setting cntp_cval_el0 and on interrupt
|
||||||
|
|
||||||
|
// "Execute function"
|
||||||
|
ExecutedFunction executedFunction; // @0x60
|
||||||
|
void *executedFunctionArgs; // @0x68
|
||||||
|
Barrier executedFunctionBarrier; // @0x70
|
||||||
|
u32 executedFunctionSrcCore; // @0x74
|
||||||
|
bool executedFunctionSync; // @0x78. Receiver fills it
|
||||||
|
|
||||||
// Cache stuff
|
// Cache stuff
|
||||||
u32 setWayCounter; // @0x5C
|
u32 setWayCounter; // @0x7C
|
||||||
} CoreCtx;
|
} CoreCtx;
|
||||||
|
|
||||||
static_assert(offsetof(CoreCtx, warmboot) == 0x2E, "Wrong definition for CoreCtx");
|
static_assert(offsetof(CoreCtx, warmboot) == 0x1E, "Wrong definition for CoreCtx");
|
||||||
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x38, "Wrong definition for CoreCtx");
|
static_assert(offsetof(CoreCtx, emulPtimerCval) == 0x58, "Wrong definition for CoreCtx");
|
||||||
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x58, "Wrong definition for CoreCtx");
|
static_assert(offsetof(CoreCtx, executedFunctionSync) == 0x78, "Wrong definition for CoreCtx");
|
||||||
static_assert(offsetof(CoreCtx, setWayCounter) == 0x5C, "Wrong definition for CoreCtx");
|
static_assert(offsetof(CoreCtx, setWayCounter) == 0x7C, "Wrong definition for CoreCtx");
|
||||||
|
|
||||||
extern CoreCtx g_coreCtxs[4];
|
extern CoreCtx g_coreCtxs[4];
|
||||||
register CoreCtx *currentCoreCtx asm("x18");
|
register CoreCtx *currentCoreCtx asm("x18");
|
||||||
|
|
|
@ -23,8 +23,8 @@
|
||||||
#include "single_step.h"
|
#include "single_step.h"
|
||||||
|
|
||||||
static Barrier g_debugPauseBarrier;
|
static Barrier g_debugPauseBarrier;
|
||||||
static atomic_uint g_debugPausePausedCoreList;
|
static ALIGN(64) atomic_uint g_debugPausePausedCoreList;
|
||||||
static atomic_uint g_debugPauseSingleStepCoreList;
|
static atomic_uint g_debugPauseSingleStepCoreList; // TODO: put this variable on the same cache line as the above
|
||||||
|
|
||||||
static inline void debugSetThisCorePaused(void)
|
static inline void debugSetThisCorePaused(void)
|
||||||
{
|
{
|
||||||
|
@ -40,6 +40,7 @@ void debugPauseSgiHandler(void)
|
||||||
void debugPauseWaitAndUpdateSingleStep(void)
|
void debugPauseWaitAndUpdateSingleStep(void)
|
||||||
{
|
{
|
||||||
u32 coreId = currentCoreCtx->coreId;
|
u32 coreId = currentCoreCtx->coreId;
|
||||||
|
__builtin_prefetch(&g_debugPausePausedCoreList, 0, 3);
|
||||||
if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) {
|
if (atomic_load(&g_debugPausePausedCoreList) & BIT(coreId)) {
|
||||||
unmaskIrq();
|
unmaskIrq();
|
||||||
do {
|
do {
|
||||||
|
@ -64,6 +65,8 @@ void debugPauseCores(u32 coreList)
|
||||||
{
|
{
|
||||||
maskIrq();
|
maskIrq();
|
||||||
|
|
||||||
|
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 3);
|
||||||
|
|
||||||
u32 desiredList = coreList;
|
u32 desiredList = coreList;
|
||||||
u32 remainingList = coreList;
|
u32 remainingList = coreList;
|
||||||
u32 readList = atomic_load(&g_debugPausePausedCoreList);
|
u32 readList = atomic_load(&g_debugPausePausedCoreList);
|
||||||
|
@ -91,6 +94,8 @@ void debugUnpauseCores(u32 coreList, u32 singleStepList)
|
||||||
{
|
{
|
||||||
singleStepList &= coreList;
|
singleStepList &= coreList;
|
||||||
|
|
||||||
|
__builtin_prefetch(&g_debugPausePausedCoreList, 1, 0);
|
||||||
|
|
||||||
// Since we're using a debugger lock, a simple stlr should be fine...
|
// Since we're using a debugger lock, a simple stlr should be fine...
|
||||||
atomic_store(&g_debugPauseSingleStepCoreList, singleStepList);
|
atomic_store(&g_debugPauseSingleStepCoreList, singleStepList);
|
||||||
atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList);
|
atomic_fetch_and(&g_debugPausePausedCoreList, ~coreList);
|
||||||
|
|
|
@ -104,6 +104,8 @@ vector_entry \name
|
||||||
|
|
||||||
.if \type == EXCEPTION_TYPE_GUEST
|
.if \type == EXCEPTION_TYPE_GUEST
|
||||||
ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE]
|
ldp x18, xzr, [sp, #EXCEP_STACK_FRAME_SIZE]
|
||||||
|
prfm pldl1keep, [x18]
|
||||||
|
prfm pstl1keep, [x18, #0x40]
|
||||||
str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET]
|
str x0, [x18, #CORECTX_GUEST_FRAME_OFFSET]
|
||||||
mov w1, #1
|
mov w1, #1
|
||||||
.else
|
.else
|
||||||
|
|
|
@ -42,7 +42,7 @@ END_FUNCTION
|
||||||
FUNCTION spinlockTryLock
|
FUNCTION spinlockTryLock
|
||||||
mov x1, x0
|
mov x1, x0
|
||||||
mov w2, #1
|
mov w2, #1
|
||||||
prfm pstl1strm, [x1]
|
prfm pstl1keep, [x1]
|
||||||
1:
|
1:
|
||||||
ldaxr w0, [x1]
|
ldaxr w0, [x1]
|
||||||
cbnz w0, 2f
|
cbnz w0, 2f
|
||||||
|
|
|
@ -97,6 +97,9 @@ _postMmuEnableReturnAddr:
|
||||||
mov x1, x20
|
mov x1, x20
|
||||||
bl thermosphereMain
|
bl thermosphereMain
|
||||||
|
|
||||||
|
prfm pldl1keep, [x18]
|
||||||
|
prfm pstl1keep, [x18, #0x40]
|
||||||
|
|
||||||
dsb sy
|
dsb sy
|
||||||
isb
|
isb
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue