mirror of
https://github.com/Atmosphere-NX/Atmosphere
synced 2024-12-22 12:21:18 +00:00
thermosphere: cache rewrite
This commit is contained in:
parent
613402121a
commit
5a445e9394
4 changed files with 235 additions and 232 deletions
|
@ -1,166 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "caches.h"
|
||||
#include "preprocessor.h"
|
||||
#include "core_ctx.h"
|
||||
|
||||
#define DEFINE_CACHE_RANGE_FUNC(isn, name, cache, post)\
|
||||
void name(const void *addr, size_t size)\
|
||||
{\
|
||||
u32 lineCacheSize = cacheGetSmallest##cache##CacheLineSize();\
|
||||
uintptr_t begin = (uintptr_t)addr & ~(lineCacheSize - 1);\
|
||||
uintptr_t end = ((uintptr_t)addr + size + lineCacheSize - 1) & ~(lineCacheSize - 1);\
|
||||
for (uintptr_t pos = begin; pos < end; pos += lineCacheSize) {\
|
||||
__asm__ __volatile__ (isn ", %0" :: "r"(pos) : "memory");\
|
||||
}\
|
||||
post;\
|
||||
}
|
||||
|
||||
static inline ALINLINE void cacheSelectByLevel(bool instructionCache, u32 level)
|
||||
{
|
||||
u32 ibit = instructionCache ? 1 : 0;
|
||||
u32 lbits = (level & 7) << 1;
|
||||
SET_SYSREG(csselr_el1, lbits | ibit);
|
||||
__isb();
|
||||
}
|
||||
|
||||
static inline ALINLINE void cacheInvalidateDataCacheLevel(u32 level)
|
||||
{
|
||||
cacheSelectByLevel(false, level);
|
||||
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
|
||||
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
|
||||
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
|
||||
u32 wayShift = __builtin_clz(numWays);
|
||||
u32 setShift = (ccsidr & 7) + 4;
|
||||
u32 lbits = (level & 7) << 1;
|
||||
|
||||
for (u32 way = 0; way < numWays; way++) {
|
||||
for (u32 set = 0; set < numSets; set++) {
|
||||
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
|
||||
__asm__ __volatile__ ("dc isw, %0" :: "r"(val) : "memory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline ALINLINE void cacheCleanInvalidateDataCacheLevel(u32 level)
|
||||
{
|
||||
cacheSelectByLevel(false, level);
|
||||
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
|
||||
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
|
||||
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
|
||||
u32 wayShift = __builtin_clz(numWays);
|
||||
u32 setShift = (ccsidr & 7) + 4;
|
||||
u32 lbits = (level & 7) << 1;
|
||||
|
||||
for (u32 way = 0; way < numWays; way++) {
|
||||
for (u32 set = 0; set < numSets; set++) {
|
||||
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
|
||||
__asm__ __volatile__ ("dc cisw, %0" :: "r"(val) : "memory");
|
||||
}
|
||||
}
|
||||
|
||||
__dsb_sy();
|
||||
__isb();
|
||||
}
|
||||
|
||||
static inline ALINLINE void cacheInvalidateDataCacheLevels(u32 from, u32 to)
|
||||
{
|
||||
// Let's hope it doesn't generate a stack frame...
|
||||
for (u32 level = from; level < to; level++) {
|
||||
cacheInvalidateDataCacheLevel(level);
|
||||
}
|
||||
|
||||
__dsb_sy();
|
||||
__isb();
|
||||
}
|
||||
|
||||
DEFINE_CACHE_RANGE_FUNC("dc civac", cacheCleanInvalidateDataCacheRange, Data, __dsb())
|
||||
DEFINE_CACHE_RANGE_FUNC("dc cvau", cacheCleanDataCacheRangePoU, Data, __dsb())
|
||||
DEFINE_CACHE_RANGE_FUNC("ic ivau", cacheInvalidateInstructionCacheRangePoU, Instruction, __dsb(); __isb())
|
||||
|
||||
void cacheHandleSelfModifyingCodePoU(const void *addr, size_t size)
|
||||
{
|
||||
// See docs for ctr_el0.{dic, idc}. It's unclear when these bits have been added, but they're
|
||||
// RES0 if not implemented, so that's fine
|
||||
u32 ctr = (u32)GET_SYSREG(ctr_el0);
|
||||
if (!(ctr & BIT(28))) {
|
||||
cacheCleanDataCacheRangePoU(addr, size);
|
||||
}
|
||||
if (!(ctr & BIT(29))) {
|
||||
cacheInvalidateInstructionCacheRangePoU(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void cacheClearSharedDataCachesOnBoot(void)
|
||||
{
|
||||
u32 clidr = (u32)GET_SYSREG(clidr_el1);
|
||||
u32 louis = (clidr >> 21) & 7;
|
||||
u32 loc = (clidr >> 24) & 7;
|
||||
cacheInvalidateDataCacheLevels(louis, loc);
|
||||
}
|
||||
|
||||
void cacheClearLocalDataCacheOnBoot(void)
|
||||
{
|
||||
u32 clidr = (u32)GET_SYSREG(clidr_el1);
|
||||
u32 louis = (clidr >> 21) & 7;
|
||||
cacheInvalidateDataCacheLevels(0, louis);
|
||||
}
|
||||
|
||||
|
||||
/* Ok so:
|
||||
- cache set/way ops can't really be virtualized
|
||||
- since we have only one guest OS & don't care about security (for space limitations),
|
||||
we do the following:
|
||||
- ignore all cache s/w ops applying before the Level Of Unification Inner Shareable (L1, typically).
|
||||
These clearly break coherency and should only be done once, on power on/off/suspend/resume only. And we already
|
||||
do it ourselves...
|
||||
- allow ops after the LoUIS, but do it ourselves and ignore the next (numSets*numWay - 1) requests. This is because
|
||||
we have to handle Nintendo's dodgy code
|
||||
- ignore "invalidate only" ops by the guest. Should only be done on power on/resume and we already did it ourselves...
|
||||
- transform "clean only" into "clean and invalidate"
|
||||
*/
|
||||
void cacheHandleTrappedSetWayOperation(bool invalidateOnly)
|
||||
{
|
||||
DEBUG("hello");
|
||||
if (invalidateOnly) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 clidr = (u32)GET_SYSREG(clidr_el1);
|
||||
u32 louis = (clidr >> 21) & 7;
|
||||
|
||||
u32 csselr = (u32)GET_SYSREG(csselr_el1);
|
||||
u32 level = (csselr >> 1) & 7;
|
||||
if (csselr & BIT(0)) {
|
||||
// Icache, ignore
|
||||
return;
|
||||
} else if (level < louis) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
|
||||
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
|
||||
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
|
||||
if (currentCoreCtx->setWayCounter++ == 0) {
|
||||
cacheCleanInvalidateDataCacheLevel(level);
|
||||
}
|
||||
|
||||
if (currentCoreCtx->setWayCounter >= numSets * numWays) {
|
||||
currentCoreCtx->setWayCounter = 0;
|
||||
}
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "utils.h"
|
||||
#include "sysreg.h"
|
||||
|
||||
static inline u32 cacheGetInstructionCachePolicy(void)
|
||||
{
|
||||
u32 ctr = (u32)GET_SYSREG(ctr_el0);
|
||||
return (ctr >> 14) & 3;
|
||||
}
|
||||
|
||||
static inline u32 cacheGetSmallestInstructionCacheLineSize(void)
|
||||
{
|
||||
u32 ctr = (u32)GET_SYSREG(ctr_el0);
|
||||
u32 shift = ctr & 0xF;
|
||||
// "log2 of the number of words"...
|
||||
return 4 << shift;
|
||||
}
|
||||
|
||||
static inline u32 cacheGetSmallestDataCacheLineSize(void)
|
||||
{
|
||||
u32 ctr = (u32)GET_SYSREG(ctr_el0);
|
||||
u32 shift = (ctr >> 16) & 0xF;
|
||||
// "log2 of the number of words"...
|
||||
return 4 << shift;
|
||||
}
|
||||
|
||||
static inline void cacheInvalidateInstructionCache(void)
|
||||
{
|
||||
__asm__ __volatile__ ("ic ialluis" ::: "memory");
|
||||
__isb();
|
||||
}
|
||||
|
||||
static inline void cacheInvalidateInstructionCacheLocal(void)
|
||||
{
|
||||
__asm__ __volatile__ ("ic iallu" ::: "memory");
|
||||
__isb();
|
||||
}
|
||||
|
||||
void cacheCleanInvalidateDataCacheRange(const void *addr, size_t size);
|
||||
void cacheCleanDataCacheRangePoU(const void *addr, size_t size);
|
||||
|
||||
void cacheInvalidateInstructionCacheRangePoU(const void *addr, size_t size);
|
||||
|
||||
void cacheHandleSelfModifyingCodePoU(const void *addr, size_t size);
|
||||
|
||||
void cacheClearSharedDataCachesOnBoot(void);
|
||||
void cacheClearLocalDataCacheOnBoot(void);
|
||||
|
||||
void cacheHandleTrappedSetWayOperation(bool invalidateOnly);
|
164
thermosphere/src/cpu/hvisor_cpu_caches.cpp
Normal file
164
thermosphere/src/cpu/hvisor_cpu_caches.cpp
Normal file
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Copyright (c) 2019-2020 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hvisor_cpu_caches.hpp"
|
||||
#include "../core_ctx.h"
|
||||
|
||||
#define DEFINE_CACHE_RANGE_FUNC(isn, name, cache, post)\
|
||||
void name(const void *addr, size_t size)\
|
||||
{\
|
||||
u32 lineCacheSize = GetSmallest##cache##CacheLineSize();\
|
||||
uintptr_t begin = reinterpret_cast<uintptr_t>(addr) & ~(lineCacheSize - 1);\
|
||||
uintptr_t end = (reinterpret_cast<uintptr_t>(addr) + size + lineCacheSize - 1) & ~(lineCacheSize - 1);\
|
||||
for (uintptr_t pos = begin; pos < end; pos += lineCacheSize) {\
|
||||
__asm__ __volatile__ (isn ", %0" :: "r"(pos) : "memory");\
|
||||
}\
|
||||
post;\
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
ALWAYS_INLINE void SelectCacheLevel(bool instructionCache, u32 level)
|
||||
{
|
||||
u32 ibit = instructionCache ? 1 : 0;
|
||||
u32 lbits = (level & 7) << 1;
|
||||
THERMOSPHERE_SET_SYSREG(csselr_el1, lbits | ibit);
|
||||
ams::hvisor::cpu::isb();
|
||||
}
|
||||
|
||||
[[gnu::optimize("O2")]] ALWAYS_INLINE void InvalidateDataCacheLevel(u32 level)
|
||||
{
|
||||
SelectCacheLevel(false, level);
|
||||
u32 ccsidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ccsidr_el1));
|
||||
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
|
||||
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
|
||||
u32 wayShift = __builtin_clz(numWays);
|
||||
u32 setShift = (ccsidr & 7) + 4;
|
||||
u32 lbits = (level & 7) << 1;
|
||||
|
||||
for (u32 way = 0; way < numWays; way++) {
|
||||
for (u32 set = 0; set < numSets; set++) {
|
||||
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
|
||||
__asm__ __volatile__ ("dc isw, %0" :: "r"(val) : "memory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void CleanInvalidateDataCacheLevel(u32 level)
|
||||
{
|
||||
SelectCacheLevel(false, level);
|
||||
u32 ccsidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ccsidr_el1));
|
||||
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
|
||||
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
|
||||
u32 wayShift = __builtin_clz(numWays);
|
||||
u32 setShift = (ccsidr & 7) + 4;
|
||||
u32 lbits = (level & 7) << 1;
|
||||
|
||||
for (u32 way = 0; way < numWays; way++) {
|
||||
for (u32 set = 0; set < numSets; set++) {
|
||||
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
|
||||
__asm__ __volatile__ ("dc cisw, %0" :: "r"(val) : "memory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[[gnu::optimize("O2")]] ALWAYS_INLINE void InvalidateDataCacheLevels(u32 from, u32 to)
|
||||
{
|
||||
// Let's hope it doesn't generate a stack frame...
|
||||
for (u32 level = from; level < to; level++) {
|
||||
InvalidateDataCacheLevel(level);
|
||||
}
|
||||
|
||||
ams::hvisor::cpu::dsbSy();
|
||||
ams::hvisor::cpu::isb();
|
||||
}
|
||||
|
||||
}
|
||||
namespace ams::hvisor::cpu {
|
||||
|
||||
DEFINE_CACHE_RANGE_FUNC("dc civac", CleanInvalidateDataCacheRange, Data, dsbSy())
|
||||
DEFINE_CACHE_RANGE_FUNC("dc cvau", CleanDataCacheRangePoU, Data, dsb())
|
||||
DEFINE_CACHE_RANGE_FUNC("ic ivau", InvalidateInstructionCacheRangePoU, Instruction, dsb(); isb())
|
||||
|
||||
void HandleSelfModifyingCodePoU(const void *addr, size_t size)
|
||||
{
|
||||
// See docs for ctr_el0.{dic, idc}. It's unclear when these bits have been added, but they're
|
||||
// RES0 if not implemented, so that's fine
|
||||
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
|
||||
if (!(ctr & BIT(28))) {
|
||||
CleanDataCacheRangePoU(addr, size);
|
||||
}
|
||||
if (!(ctr & BIT(29))) {
|
||||
InvalidateInstructionCacheRangePoU(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
[[gnu::optimize("O2")]] void ClearSharedDataCachesOnBoot(void)
|
||||
{
|
||||
u32 clidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(clidr_el1));
|
||||
u32 louis = (clidr >> 21) & 7;
|
||||
u32 loc = (clidr >> 24) & 7;
|
||||
InvalidateDataCacheLevels(louis, loc);
|
||||
}
|
||||
|
||||
[[gnu::optimize("O2")]] void ClearLocalDataCacheOnBoot(void)
|
||||
{
|
||||
u32 clidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(clidr_el1));
|
||||
u32 louis = (clidr >> 21) & 7;
|
||||
InvalidateDataCacheLevels(0, louis);
|
||||
}
|
||||
|
||||
/* Ok so:
|
||||
- cache set/way ops can't really be virtualized
|
||||
- since we have only one guest OS & don't care about security (for space limitations),
|
||||
we do the following:
|
||||
- ignore all cache s/w ops applying before the Level Of Unification Inner Shareable (L1, typically).
|
||||
These clearly break coherency and should only be done once, on power on/off/suspend/resume only. And we already
|
||||
do it ourselves...
|
||||
- allow ops after the LoUIS, but do it ourselves and ignore the next (numSets*numWay - 1) requests. This is because
|
||||
we have to handle Nintendo's dodgy code
|
||||
- transform all s/w cache ops into clean and invalidate
|
||||
*/
|
||||
void HandleTrappedSetWayOperation()
|
||||
{
|
||||
u32 clidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(clidr_el1));
|
||||
u32 louis = (clidr >> 21) & 7;
|
||||
|
||||
u32 csselr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(csselr_el1));
|
||||
u32 level = (csselr >> 1) & 7;
|
||||
if (csselr & BIT(0)) {
|
||||
// Icache, ignore
|
||||
return;
|
||||
} else if (level < louis) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
u32 ccsidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ccsidr_el1));
|
||||
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
|
||||
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
|
||||
if (currentCoreCtx->setWayCounter++ == 0) {
|
||||
CleanInvalidateDataCacheLevel(level);
|
||||
ams::hvisor::cpu::dsbSy();
|
||||
ams::hvisor::cpu::isb();
|
||||
}
|
||||
|
||||
if (currentCoreCtx->setWayCounter >= numSets * numWays) {
|
||||
currentCoreCtx->setWayCounter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
71
thermosphere/src/cpu/hvisor_cpu_caches.hpp
Normal file
71
thermosphere/src/cpu/hvisor_cpu_caches.hpp
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2019-2020 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hvisor_cpu_instructions.hpp"
|
||||
#include "hvisor_cpu_sysreg_general.hpp"
|
||||
|
||||
namespace ams::hvisor::cpu {
|
||||
|
||||
static inline u32 GetInstructionCachePolicy(void)
|
||||
{
|
||||
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
|
||||
return (ctr >> 14) & 3;
|
||||
}
|
||||
|
||||
static inline u32 GetSmallestInstructionCacheLineSize(void)
|
||||
{
|
||||
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
|
||||
u32 shift = ctr & 0xF;
|
||||
// "log2 of the number of words"...
|
||||
return 4 << shift;
|
||||
}
|
||||
|
||||
static inline u32 GetSmallestDataCacheLineSize(void)
|
||||
{
|
||||
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
|
||||
u32 shift = (ctr >> 16) & 0xF;
|
||||
// "log2 of the number of words"...
|
||||
return 4 << shift;
|
||||
}
|
||||
|
||||
static inline void InvalidateInstructionCache(void)
|
||||
{
|
||||
__asm__ __volatile__ ("ic ialluis" ::: "memory");
|
||||
cpu::isb();
|
||||
}
|
||||
|
||||
static inline void InvalidateInstructionCacheLocal(void)
|
||||
{
|
||||
__asm__ __volatile__ ("ic iallu" ::: "memory");
|
||||
cpu::isb();
|
||||
}
|
||||
|
||||
void CleanInvalidateDataCacheRange(const void *addr, size_t size);
|
||||
void CleanDataCacheRangePoU(const void *addr, size_t size);
|
||||
|
||||
void InvalidateInstructionCacheRangePoU(const void *addr, size_t size);
|
||||
|
||||
void HandleSelfModifyingCodePoU(const void *addr, size_t size);
|
||||
|
||||
void ClearSharedDataCachesOnBoot(void);
|
||||
void ClearLocalDataCacheOnBoot(void);
|
||||
|
||||
// Dunno where else to put that
|
||||
void HandleTrappedSetWayOperation();
|
||||
|
||||
}
|
Loading…
Reference in a new issue