From af80d5816b94534efeab7970ada27160c765d772 Mon Sep 17 00:00:00 2001 From: TuxSH Date: Mon, 29 Jul 2019 22:38:44 +0200 Subject: [PATCH] thermosphere: unfuck qemu JIT, fix exc. handling bug, add cache funcs --- thermosphere/Makefile | 2 +- thermosphere/src/arm.h | 10 ++ thermosphere/src/arm.s | 214 +++++++++++++++++++++++++++ thermosphere/src/exception_vectors.s | 11 +- thermosphere/src/main.c | 14 +- thermosphere/src/start.s | 48 +++--- 6 files changed, 274 insertions(+), 25 deletions(-) create mode 100644 thermosphere/src/arm.h create mode 100644 thermosphere/src/arm.s diff --git a/thermosphere/Makefile b/thermosphere/Makefile index aafcdb4b5..b7d655ca2 100644 --- a/thermosphere/Makefile +++ b/thermosphere/Makefile @@ -132,7 +132,7 @@ export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) all: $(BUILD) ifeq ($(PLATFORM), qemu) -QEMUFLAGS := -nographic -machine virt,secure=on,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 2 -m 1024\ +QEMUFLAGS := -nographic -machine virt,secure=on,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4 -m 1024\ -bios bl1.bin -d unimp -semihosting-config enable,target=native -serial mon:stdio # NOTE: copy bl1.bin, bl2.bin, bl31.bin from your own build of Arm Trusted Firmware! diff --git a/thermosphere/src/arm.h b/thermosphere/src/arm.h new file mode 100644 index 000000000..58e00bae3 --- /dev/null +++ b/thermosphere/src/arm.h @@ -0,0 +1,10 @@ +#pragma once + +void flush_dcache_all(void); +void invalidate_dcache_all(void); + +void flush_dcache_range(const void *start, const void *end); +void invalidate_dcache_range(const void *start, const void *end); + +void invalidate_icache_all_inner_shareable(void); +void invalidate_icache_all(void); diff --git a/thermosphere/src/arm.s b/thermosphere/src/arm.s new file mode 100644 index 000000000..9b8228b98 --- /dev/null +++ b/thermosphere/src/arm.s @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2018-2019 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* The following functions are taken/adapted from https://github.com/u-boot/u-boot/blob/master/arch/arm/cpu/armv8/cache.S */ + +/* + * (C) Copyright 2013 + * David Feng + * + * This file is based on sample code from ARMv8 ARM. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +/* + * void __asm_dcache_level(level) + * + * flush or invalidate one level cache. + * + * x0: cache level + * x1: 0 clean & invalidate, 1 invalidate only + * x2~x9: clobbered + */ +.section .text.__asm_dcache_level, "ax", %progbits +.type __asm_dcache_level, %function +__asm_dcache_level: + lsl x12, x0, #1 + msr csselr_el1, x12 /* select cache level */ + isb /* sync change of cssidr_el1 */ + mrs x6, ccsidr_el1 /* read the new cssidr_el1 */ + and x2, x6, #7 /* x2 <- log2(cache line size)-4 */ + add x2, x2, #4 /* x2 <- log2(cache line size) */ + mov x3, #0x3ff + and x3, x3, x6, lsr #3 /* x3 <- max number of #ways */ + clz w5, w3 /* bit position of #ways */ + mov x4, #0x7fff + and x4, x4, x6, lsr #13 /* x4 <- max number of #sets */ + /* x12 <- cache level << 1 */ + /* x2 <- line length offset */ + /* x3 <- number of cache ways - 1 */ + /* x4 <- number of cache sets - 1 */ + /* x5 <- bit position of #ways */ + +loop_set: + mov x6, x3 /* x6 <- working copy of #ways */ +loop_way: + lsl x7, x6, x5 + orr x9, x12, x7 /* map way and level to cisw value */ + lsl x7, x4, x2 + orr x9, x9, x7 /* map set number to cisw value */ + tbz w1, #0, 1f + dc isw, x9 + b 2f +1: dc cisw, x9 /* clean & invalidate by set/way */ +2: subs x6, x6, #1 /* decrement the way */ + b.ge loop_way + subs x4, x4, #1 /* decrement the set */ + b.ge loop_set + + ret + +/* + * void __asm_flush_dcache_all(int invalidate_only) + * + * x0: 0 clean & invalidate, 1 invalidate only + * + * flush or invalidate all data cache by SET/WAY. + */ +.section .text.__asm_dcache_all, "ax", %progbits +.type __asm_dcache_all, %function +__asm_dcache_all: + mov x1, x0 + dsb sy + mrs x10, clidr_el1 /* read clidr_el1 */ + lsr x11, x10, #24 + and x11, x11, #0x7 /* x11 <- loc */ + cbz x11, finished /* if loc is 0, exit */ + mov x15, lr + mov x0, #0 /* start flush at cache level 0 */ + /* x0 <- cache level */ + /* x10 <- clidr_el1 */ + /* x11 <- loc */ + /* x15 <- return address */ + +loop_level: + lsl x12, x0, #1 + add x12, x12, x0 /* x0 <- tripled cache level */ + lsr x12, x10, x12 + and x12, x12, #7 /* x12 <- cache type */ + cmp x12, #2 + b.lt skip /* skip if no cache or icache */ + bl __asm_dcache_level /* x1 = 0 flush, 1 invalidate */ +skip: + add x0, x0, #1 /* increment cache level */ + cmp x11, x0 + b.gt loop_level + + mov x0, #0 + msr csselr_el1, x0 /* restore csselr_el1 */ + dsb sy + isb + mov lr, x15 + +finished: + ret + +.section .text.flush_dcache_all, "ax", %progbits +.type flush_dcache_all, %function +.global flush_dcache_all +flush_dcache_all: + mov x0, #0 + b __asm_dcache_all + +.section .text.invalidate_dcache_all, "ax", %progbits +.type invalidate_dcache_all, %function +.global invalidate_dcache_all +invalidate_dcache_all: + mov x0, #1 + b __asm_dcache_all + +/* + * void __asm_flush_dcache_range(start, end) (renamed -> flush_dcache_range) + * + * clean & invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +.section .text.flush_dcache_range, "ax", %progbits +.type flush_dcache_range, %function +.global flush_dcache_range +flush_dcache_range: + mrs x3, ctr_el0 + lsr x3, x3, #16 + and x3, x3, #0xf + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 /* clean & invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret + +/* + * void __asm_invalidate_dcache_range(start, end) (-> invalidate_dcache_range) + * + * invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +.section .text.invalidate_dcache_range, "ax", %progbits +.type invalidate_dcache_range, %function +.global invalidate_dcache_range +invalidate_dcache_range: + mrs x3, ctr_el0 + ubfm x3, x3, #16, #19 + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc ivac, x0 /* invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret + +/* + * void __asm_invalidate_icache_all(void) (-> invalidate_icache_inner_shareable) + * + * invalidate all icache entries. + */ +.section .text.invalidate_icache_all_inner_shareable, "ax", %progbits +.type invalidate_icache_all_inner_shareable, %function +.global invalidate_icache_all_inner_shareable +invalidate_icache_all_inner_shareable: + dsb ish + isb + ic ialluis + dsb ish + isb + ret + +.section .text.invalidate_icache_all, "ax", %progbits +.type invalidate_icache_all, %function +.global invalidate_icache_all +invalidate_icache_all: + dsb ish + isb + ic iallu + dsb ish + isb + ret diff --git a/thermosphere/src/exception_vectors.s b/thermosphere/src/exception_vectors.s index 767dadbd7..e60b8adc5 100644 --- a/thermosphere/src/exception_vectors.s +++ b/thermosphere/src/exception_vectors.s @@ -55,6 +55,13 @@ bl _save_all_regs .endm +.macro save_all_regs_reload_x18 + save_all_regs + + // Reload our x18 value (currentCoreCtx) + ldp x18, xzr, [sp, #0x120] +.endm + .macro pivot_stack_for_crash // Note: reset x18 assumed uncorrupted // Note: replace sp_el0 with crashing sp @@ -124,8 +131,6 @@ vector_entry irq_sp0 mov x30, x29 - // Reload our x18 value (currentCoreCtx) - ldp x18, xzr, [sp, #0x120] ret vector_entry fiq_sp0 @@ -194,7 +199,7 @@ vector_entry serror_spx /* Lower EL, A64 */ vector_entry synch_a64 - save_all_regs + save_all_regs_reload_x18 mov x0, sp mrs x1, esr_el2 diff --git a/thermosphere/src/main.c b/thermosphere/src/main.c index 35ca98309..cd07962dc 100644 --- a/thermosphere/src/main.c +++ b/thermosphere/src/main.c @@ -1,13 +1,19 @@ #include "utils.h" +#include "core_ctx.h" #include "log.h" #include "platform/uart.h" int main(void) { - uartInit(115200); + if (currentCoreCtx->coreId == 0) { + uartInit(115200); + serialLog("Hello from Thermosphere!\n"); + __builtin_trap(); + } + + else { + serialLog("Core %u booted\n", currentCoreCtx->coreId); + } - serialLog("fifo flush fifo flush\n"); - serialLog("Hello from Thermosphere!\n"); - __builtin_trap(); return 0; } diff --git a/thermosphere/src/start.s b/thermosphere/src/start.s index 1fe41e3e5..d99348671 100644 --- a/thermosphere/src/start.s +++ b/thermosphere/src/start.s @@ -40,17 +40,35 @@ _startCommon: msr daifset, 0b1111 msr spsel, #1 - mrs x20, sctlr_el2 + // Set VBAR + ldr x8, =__vectors_start__ + msr vbar_el2, x8 + + // Set system to sane defaults, aarch64 for el1 + mov x4, #0x0838 + movk x4, #0xC5, lsl #16 + orr x1, x4, #0x30000000 + mov x2, #(1 << 31) + mov x3, #0xFFFFFFFF + + msr sctlr_el2, x1 + msr hcr_el2, x2 + msr dacr32_el2, x3 + + dsb sy + isb + + // Mov x20 (and no other register (?)) with != 0 is needed to unfuck QEMU's JIT + mov x20, #0x31 + // Get core ID - mrs x20, mpidr_el1 - and x20, x20, #0xFF + mrs x10, mpidr_el1 + and x10, x10, #0xFF // Set tmp stack ldr x8, =__stacks_top__ - - /* lsl x9, x20, #10 - sub x8, x8, x9*/ - mov sp, x8 + lsl x9, x10, #10 + sub sp, x8, x9 // Set up x18 adrp x18, g_coreCtxs @@ -66,26 +84,22 @@ _startCommon: _store_arg: str x0, [x18, #0] - // Set VBAR - ldr x8, =__vectors_start__ - msr vbar_el2, x8 - - // Make sure the regs have been set - dsb sy - isb - // Don't call init array to save space? // Clear BSS & call main for the first core executing this code - cbz x20, _jump_to_kernel + cbz x20, _jump_to_main ldr x0, =__bss_start__ mov w1, #0 ldr x2, =__end__ sub x2, x2, x0 bl memset + dsb sy + isb + +_jump_to_main: + bl main -_jump_to_kernel: // Jump to kernel mov x8, #(0b1111 << 6 | 0b0101) // EL1h+DAIF msr spsr_el2, x8