thermosphere: reduce usage of nonvolatile memory by around 4KB

This commit is contained in:
TuxSH 2020-01-31 18:42:16 +00:00
parent 1fd2cdb664
commit 680a768178
12 changed files with 54 additions and 80 deletions

View file

@ -22,21 +22,21 @@ ifeq ($(PLATFORM), qemu)
export PLATFORM := qemu
PLATFORM_SOURCES := src/platform/qemu
PLATFORM_DEFINES := -DPLATFORM_QEMU
PLATFORM_DEFINES := -DPLATFORM_QEMU -DMAX_CORE=4 -DMAX_BCR=6 -DMAX_WCR=4
else ifeq ($(PLATFORM), tegra-t210-arm-tf)
export PLATFORM := tegra-t210-arm-tf
PLATFORM_SOURCES := src/platform/tegra
PLATFORM_DEFINES := -DPLATFORM_TEGRA -DPLATFORM_TEGRA_T210_ARM_TF
PLATFORM_DEFINES := -DPLATFORM_TEGRA -DPLATFORM_TEGRA_T210_ARM_TF -DMAX_CORE=4 -DMAX_BCR=6 -DMAX_WCR=4
else
export PLATFORM := tegra-t210-nintendo
PLATFORM_SOURCES := src/platform/tegra
PLATFORM_DEFINES := -DPLATFORM_TEGRA -D DPLATFORM_TEGRA_T210_NINTENDO
PLATFORM_DEFINES := -DPLATFORM_TEGRA -D DPLATFORM_TEGRA_T210_NINTENDO -DMAX_CORE=4 -DMAX_BCR=6 -DMAX_WCR=4
endif
@ -57,7 +57,7 @@ INCLUDES := include ../common/include
# options for code generation
#---------------------------------------------------------------------------------
# Note: -ffixed-x18 and -mgeneral-regs-only are very important and must be enabled
ARCH := -march=armv8-a -mtune=cortex-a57 -mgeneral-regs-only -ffixed-x18
ARCH := -march=armv8-a -mtune=cortex-a57 -mgeneral-regs-only -ffixed-x18 -Wno-psabi
DEFINES := -D__CCPLEX__ -DATMOSPHERE_GIT_BRANCH=\"$(AMSBRANCH)\" -DATMOSPHERE_GIT_REV=\"$(AMSREV)\"\
-DATMOSPHERE_RELEASE_VERSION_HASH="0x$(AMSHASH)" $(PLATFORM_DEFINES)
CFLAGS := \
@ -79,7 +79,7 @@ CFLAGS += $(INCLUDE)
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
ASFLAGS := -g $(ARCH)
ASFLAGS := -g $(ARCH) $(DEFINES)
LDFLAGS = -specs=$(TOPDIR)/linker.specs -nostartfiles -nostdlib -g $(ARCH) -Wl,-Map,$(notdir $*.map)
LIBS := -lgcc

View file

@ -123,13 +123,6 @@ SECTIONS
. = ALIGN(8);
} >mainVa AT>main :main
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } >mainVa AT>main :main
.eh_frame : { KEEP (*(.eh_frame)) *(.eh_frame.*) } >mainVa AT>main :main
.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } >mainVa AT>main :main
.gnu_extab : { *(.gnu_extab*) } >mainVa AT>main :main
.exception_ranges : { *(.exception_ranges .exception_ranges*) } >mainVa AT>main :main
.dynamic : { *(.dynamic) } >mainVa AT>main :main
.interp : { *(.interp) } >mainVa AT>main :main
.note.gnu.build-id : { *(.note.gnu.build-id) } >mainVa AT>main :main
@ -144,29 +137,36 @@ SECTIONS
.bss (NOLOAD) :
{
. = ALIGN(0x1000);
__bss_start__ = ABSOLUTE(.);
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
. = ALIGN(8);
} >mainVa :NONE
.tempbss (NOLOAD) :
{
. = ALIGN(0x1000);
__real_bss_end__ = ABSOLUTE(.);
__image_size__ = ABSOLUTE(__real_bss_end__ - __start__);
/*ASSERT(__image_size__ <= __max_image_size__, "Image too big!");*/
/*
The logic here: tempbss *additional pages* are at a very different PA, but
we can allow .tempbss to use unused "non-temporary" BSS space. Their VAs are
contiguous.
*/
*(.tempbss .tempbss.*)
. = ALIGN(0x1000);
__bss_end__ = ABSOLUTE(.);
__temp_size__ = ABSOLUTE(__bss_end__ - __real_bss_end__);
ASSERT(__temp_size__ <= __max_temp_size__, "tempbss too big!");
} >mainVa :NONE
. = ALIGN(8);
/* Shit we keep in the elf but otherwise discard */
.eh_frame_hdr (NOLOAD) : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } >mainVa :NONE
.eh_frame (NOLOAD) : { KEEP (*(.eh_frame)) *(.eh_frame.*) } >mainVa :NONE
.gcc_except_table (NOLOAD) : { *(.gcc_except_table .gcc_except_table.*) } >mainVa :NONE
.gnu_extab (NOLOAD) : { *(.gnu_extab*) } >mainVa :NONE
.exception_ranges (NOLOAD) : { *(.exception_ranges .exception_ranges*) } >mainVa :NONE
/* ==================
==== Metadata ====
================== */

View file

@ -1,4 +1,4 @@
%rename link old_link
*link:
%(old_link) -T %:getenv(TOPDIR /%:getenv(PLATFORM .mem)) -T %:getenv(TOPDIR /linker.ld) --nmagic --gc-sections
%(old_link) -T %:getenv(TOPDIR /%:getenv(PLATFORM .mem)) -T %:getenv(TOPDIR /linker.ld) -no-pie --nmagic --gc-sections

View file

@ -24,7 +24,7 @@
/// Structure to synchronize and keep track of breakpoints
typedef struct BreakpointManager {
DebugRegisterPair breakpoints[16];
DebugRegisterPair breakpoints[MAX_BCR];
RecursiveSpinlock lock;
u32 maxBreakpoints;
u16 allocationBitmap;

View file

@ -16,25 +16,32 @@
#include "asm_macros.s"
.altmacro
.macro LOAD_DBG_REG_PAIRS what, id
msr dbg\what\()cr\id\()_el1, x2
msr dbg\what\()vr\id\()_el1, x3
.if \id != 0
LOAD_DBG_REG_PAIRS \what, %(\id - 1)
.endif
.endm
// Precondition: x1 <= 16
FUNCTION loadBreakpointRegs
// x1 = number
dmb ish
adr x16, 1f
add x0, x0, #(16 * 8)
mov x4, #(16 * 12)
add x0, x0, #(MAX_BCR * 8)
mov x4, #(MAX_BCR * 12)
sub x4, x4, x1,lsl #3
sub x4, x4, x1,lsl #2
add x16, x16, x4
br x16
1:
.irp count, 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
ldp x2, x3, [x0, #-0x10]!
msr dbgbcr\count\()_el1, x2
msr dbgbvr\count\()_el1, x3
.endr
LOAD_DBG_REG_PAIRS b, MAX_BCR
dsb ish
isb
ret
@ -46,19 +53,16 @@ FUNCTION loadWatchpointRegs
dmb ish
adr x16, 1f
add x0, x0, #(16 * 8)
mov x4, #(16 * 12)
add x0, x0, #(MAX_WCR * 8)
mov x4, #(MAX_WCR * 12)
sub x4, x4, x1,lsl #3
sub x4, x4, x1,lsl #2
add x16, x16, x4
br x16
1:
.irp count, 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
ldp x2, x3, [x0, #-0x10]!
msr dbgwcr\count\()_el1, x2
msr dbgwvr\count\()_el1, x3
.endr
LOAD_DBG_REG_PAIRS w, MAX_WCR
dsb ish
isb
ret

View file

@ -50,9 +50,9 @@ static const struct{
GDBCommandHandler handler;
} gdbCommandHandlers[] = {
{ '?', GDB_HANDLER(GetStopReason) },
{ '!', GDB_HANDLER(EnableExtendedMode) }, // note: stubbed
{ 'c', GDB_HANDLER(ContinueOrStepDeprecated) },
{ 'C', GDB_HANDLER(ContinueOrStepDeprecated) },
//{ '!', GDB_HANDLER(EnableExtendedMode) }, // note: stubbed
//{ 'c', GDB_HANDLER(ContinueOrStepDeprecated) },
//{ 'C', GDB_HANDLER(ContinueOrStepDeprecated) },
{ 'D', GDB_HANDLER(Detach) },
{ 'F', GDB_HANDLER(HioReply) },
{ 'g', GDB_HANDLER(ReadRegisters) },
@ -65,8 +65,8 @@ static const struct{
{ 'P', GDB_HANDLER(WriteRegister) },
{ 'q', GDB_HANDLER(ReadQuery) },
{ 'Q', GDB_HANDLER(WriteQuery) },
{ 's', GDB_HANDLER(ContinueOrStepDeprecated) },
{ 'S', GDB_HANDLER(ContinueOrStepDeprecated) },
//{ 's', GDB_HANDLER(ContinueOrStepDeprecated) },
//{ 'S', GDB_HANDLER(ContinueOrStepDeprecated) },
{ 'T', GDB_HANDLER(IsThreadAlive) },
{ 'v', GDB_HANDLER(VerboseCommand) },
{ 'X', GDB_HANDLER(WriteMemoryRaw) },

View file

@ -45,14 +45,14 @@ void memoryMapSetupMmu(const LoadImageLayout *layout, u64 *mmuTable)
Layout in physmem:
Location1
Image (code and data incl. BSS)
Part of "temp" (tempbss, stacks) if there's enough space left
Location2
Remaining of "temp" (note: we don't and can't check if there's enough mem left!)
tempbss
MMU table (taken from temp physmem)
Layout in vmem:
Location1
Image
padding
tempbss
Location2
Crash stacks
@ -62,32 +62,18 @@ void memoryMapSetupMmu(const LoadImageLayout *layout, u64 *mmuTable)
*/
// Map our code & data (.text/other code, .rodata, .data, .bss) at the bottom of our L3 range, all RWX
// Note that BSS is page-aligned
// Note that the end of "image" is page-aligned
// See LD script for more details
uintptr_t curVa = MEMORY_MAP_VA_IMAGE;
uintptr_t curPa = layout->startPa;
size_t tempInImageRegionMaxSize = layout->maxImageSize - layout->imageSize;
size_t tempInImageRegionSize;
size_t tempExtraSize;
if (layout->tempSize <= tempInImageRegionMaxSize) {
tempInImageRegionSize = layout->tempSize;
tempExtraSize = 0;
} else {
// We need extra data
tempInImageRegionSize = tempInImageRegionMaxSize;
tempExtraSize = layout->tempSize - tempInImageRegionSize;
}
size_t imageRegionMapSize = (layout->imageSize + tempInImageRegionSize + 0xFFF) & ~0xFFFul;
size_t tempExtraMapSize = (tempExtraSize + 0xFFF) & ~0xFFFul;
// Do not map the MMU table in that mapping:
mmu_map_page_range(mmuTable, curVa, curPa, imageRegionMapSize, normalAttribs);
mmu_map_page_range(mmuTable, curVa, curPa, layout->imageSize, normalAttribs);
curVa += imageRegionMapSize;
curVa += layout->imageSize;
curPa = layout->tempPa;
mmu_map_page_range(mmuTable, curVa, curPa, tempExtraMapSize, normalAttribs);
curPa += tempExtraMapSize;
mmu_map_page_range(mmuTable, curVa, curPa, layout->tempSize , normalAttribs);
curPa += layout->tempSize;
// Map the remaining temporary data as stacks, aligned 0x1000

View file

@ -49,7 +49,6 @@
typedef struct LoadImageLayout {
uintptr_t startPa;
size_t maxImageSize;
size_t imageSize; // "image" includes "real" BSS but not tempbss
uintptr_t tempPa;

View file

@ -22,7 +22,7 @@
#include <stdatomic.h>
#include "spinlock.h"
#define MAX_SW_BREAKPOINTS 32
#define MAX_SW_BREAKPOINTS 16
typedef struct SoftwareBreakpoint {
uintptr_t address; // VA

View file

@ -59,7 +59,7 @@ _startCommon:
// Temporarily use temp end region as stack, then create the translation table
// The stack top is also equal to the mmu table address...
adr x0, g_loadImageLayout
ldp x2, x3, [x0, #0x18]
ldp x2, x3, [x0, #0x10]
add x1, x2, x3
mov sp, x1
bl memoryMapSetupMmu
@ -111,23 +111,9 @@ _postMmuEnableReturnAddr:
.pool
/*
typedef struct LoadImageLayout {
uintptr_t startPa;
size_t imageSize; // "image" includes "real" BSS but not tempbss
size_t maxImageSize;
uintptr_t tempPa;
size_t maxTempSize;
size_t tempSize;
uintptr_t vbar;
} LoadImageLayout;
*/
.global g_loadImageLayout
g_loadImageLayout:
.quad __start_pa__
.quad __max_image_size__
.quad __image_size__
.quad __temp_pa__
.quad __max_temp_size__

View file

@ -20,7 +20,6 @@
#include "preprocessor.h"
#include "debug_log.h"
#define MAX_CORE 4
#define BIT(n) (1u << (n))
#define BITL(n) (1ull << (n))
#define MASK(n) (BIT(n) - 1)

View file

@ -29,7 +29,7 @@ typedef struct WatchpointManager {
u32 maxWatchpoints;
u32 maxSplitWatchpoints;
u16 allocationBitmap;
DebugRegisterPair splitWatchpoints[16 * 8];
DebugRegisterPair splitWatchpoints[MAX_WCR * 8];
} WatchpointManager;
extern WatchpointManager g_watchpointManager;