thermosphere: trap set/way dcache access

note: qemu does not implement the trap
This commit is contained in:
TuxSH 2020-01-15 17:30:25 +00:00
parent 72d1992eec
commit 6b8a843ffb
6 changed files with 92 additions and 4 deletions

View file

@ -16,6 +16,7 @@
#include "caches.h"
#include "preprocessor.h"
#include "core_ctx.h"
#define DEFINE_CACHE_RANGE_FUNC(isn, name, cache, post)\
void name(const void *addr, size_t size)\
@ -47,14 +48,35 @@ static inline ALINLINE void cacheInvalidateDataCacheLevel(u32 level)
u32 setShift = (ccsidr & 7) + 4;
u32 lbits = (level & 7) << 1;
for (u32 way = 0; way <= numWays; way++) {
for (u32 set = 0; set <= numSets; set++) {
for (u32 way = 0; way < numWays; way++) {
for (u32 set = 0; set < numSets; set++) {
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
__asm__ __volatile__ ("dc isw, %0" :: "r"(val) : "memory");
}
}
}
static inline ALINLINE void cacheCleanInvalidateDataCacheLevel(u32 level)
{
cacheSelectByLevel(false, level);
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
u32 wayShift = __builtin_clz(numWays);
u32 setShift = (ccsidr & 7) + 4;
u32 lbits = (level & 7) << 1;
for (u32 way = 0; way < numWays; way++) {
for (u32 set = 0; set < numSets; set++) {
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
__asm__ __volatile__ ("dc cisw, %0" :: "r"(val) : "memory");
}
}
__dsb_sy();
__isb();
}
static inline ALINLINE void cacheInvalidateDataCacheLevels(u32 from, u32 to)
{
// Let's hope it doesn't generate a stack frame...
@ -97,3 +119,48 @@ void cacheClearLocalDataCacheOnBoot(void)
u32 louis = (clidr >> 21) & 7;
cacheInvalidateDataCacheLevels(0, louis);
}
/* Ok so:
- cache set/way ops can't really be virtualized
- since we have only one guest OS & don't care about security (for space limitations),
we do the following:
- ignore all cache s/w ops applying before the Level Of Unification Inner Shareable (L1, typically).
These clearly break coherency and should only be done once, on power on/off/suspend/resume only. And we already
do it ourselves...
- allow ops after the LoUIS, but do it ourselves and ignore the next (numSets*numWay - 1) requests. This is because
we have to handle Nintendo's dodgy code
- ignore "invalidate only" ops by the guest. Should only be done on power on/resume and we already did it ourselves...
- transform "clean only" into "clean and invalidate"
*/
void cacheHandleTrappedSetWayOperation(bool invalidateOnly)
{
DEBUG("hello");
if (invalidateOnly) {
return;
}
u32 clidr = (u32)GET_SYSREG(clidr_el1);
u32 louis = (clidr >> 21) & 7;
u32 csselr = (u32)GET_SYSREG(csselr_el1);
u32 level = (csselr >> 1) & 7;
if (csselr & BIT(0)) {
// Icache, ignore
return;
} else if (level < louis) {
return;
}
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
if (currentCoreCtx->setWayCounter++ == 0) {
cacheCleanInvalidateDataCacheLevel(level);
}
if (currentCoreCtx->setWayCounter >= numSets * numWays) {
currentCoreCtx->setWayCounter = 0;
}
}

View file

@ -56,3 +56,5 @@ void cacheHandleSelfModifyingCodePoU(const void *addr, size_t size);
void cacheClearSharedDataCachesOnBoot(void);
void cacheClearLocalDataCacheOnBoot(void);
void cacheHandleTrappedSetWayOperation(bool invalidateOnly);

View file

@ -41,6 +41,9 @@ typedef struct CoreCtx {
void *executedFunctionArgs; // @0x48
Barrier executedFunctionBarrier; // @0x50
bool executedFunctionSync; // @0x54
// Cache stuff
u32 setWayCounter; // @0x58
} CoreCtx;
static_assert(offsetof(CoreCtx, warmboot) == 0x2E, "Wrong definition for CoreCtx");

View file

@ -24,6 +24,10 @@
#define BITL(n) (1ull << (n))
#endif
#define TUP_DC_ISW (1, 0, 7, 6, 2)
#define TUP_DC_CSW (1, 0, 7, 10, 2)
#define TUP_DC_CISW (1, 0, 7, 14, 2)
#define TUP_OSDTRRX_EL1 (2, 0, 0, 0, 2)
#define TUP_MDCCINT_EL1 (2, 0, 0, 2, 0)
#define TUP_MDSCR_EL1 (2, 0, 0, 2, 2)

View file

@ -16,7 +16,7 @@
#include "sysreg_traps.h"
#include "guest_timers.h"
#include "software_breakpoints.h"
#include "caches.h"
static inline u64 doSystemRegisterRead(const ExceptionStackFrame *frame, u32 normalizedIss)
{
@ -43,7 +43,7 @@ static inline u64 doSystemRegisterRead(const ExceptionStackFrame *frame, u32 nor
val = currentCoreCtx->emulPtimerCval;
break;
}
// NOTE: We should trap ID_AA64* register to lie to the guest about e.g. MemTag but it would take too much space
default: {
// We shouldn't have trapped on other registers other than debug regs
// and we want the latter as RA0/WI
@ -74,6 +74,15 @@ static inline void doSystemRegisterWrite(ExceptionStackFrame *frame, u32 normali
writeEmulatedPhysicalCompareValue(frame, val);
break;
}
case ENCODE_SYSREG_ISS(DC_CSW):
case ENCODE_SYSREG_ISS(DC_CISW): {
cacheHandleTrappedSetWayOperation(false);
break;
}
case ENCODE_SYSREG_ISS(DC_ISW): {
cacheHandleTrappedSetWayOperation(true);
break;
}
default: {
// We shouldn't have trapped on other registers other than debug regs

View file

@ -44,6 +44,9 @@ void enableTraps(void)
// Trap SMC instructions
hcr |= HCR_TSC;
// Trap set/way isns
hcr |= HCR_TSW;
// Reroute physical IRQs to EL2
hcr |= HCR_IMO;