diff --git a/thermosphere/Makefile b/thermosphere/Makefile
index aafcdb4b5..b7d655ca2 100644
--- a/thermosphere/Makefile
+++ b/thermosphere/Makefile
@@ -132,7 +132,7 @@ export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
all: $(BUILD)
ifeq ($(PLATFORM), qemu)
-QEMUFLAGS := -nographic -machine virt,secure=on,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 2 -m 1024\
+QEMUFLAGS := -nographic -machine virt,secure=on,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4 -m 1024\
-bios bl1.bin -d unimp -semihosting-config enable,target=native -serial mon:stdio
# NOTE: copy bl1.bin, bl2.bin, bl31.bin from your own build of Arm Trusted Firmware!
diff --git a/thermosphere/src/arm.h b/thermosphere/src/arm.h
new file mode 100644
index 000000000..58e00bae3
--- /dev/null
+++ b/thermosphere/src/arm.h
@@ -0,0 +1,10 @@
+#pragma once
+
+void flush_dcache_all(void);
+void invalidate_dcache_all(void);
+
+void flush_dcache_range(const void *start, const void *end);
+void invalidate_dcache_range(const void *start, const void *end);
+
+void invalidate_icache_all_inner_shareable(void);
+void invalidate_icache_all(void);
diff --git a/thermosphere/src/arm.s b/thermosphere/src/arm.s
new file mode 100644
index 000000000..9b8228b98
--- /dev/null
+++ b/thermosphere/src/arm.s
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2018-2019 Atmosphère-NX
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+/* The following functions are taken/adapted from https://github.com/u-boot/u-boot/blob/master/arch/arm/cpu/armv8/cache.S */
+
+/*
+ * (C) Copyright 2013
+ * David Feng
+ *
+ * This file is based on sample code from ARMv8 ARM.
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+/*
+ * void __asm_dcache_level(level)
+ *
+ * flush or invalidate one level cache.
+ *
+ * x0: cache level
+ * x1: 0 clean & invalidate, 1 invalidate only
+ * x2~x9: clobbered
+ */
+.section .text.__asm_dcache_level, "ax", %progbits
+.type __asm_dcache_level, %function
+__asm_dcache_level:
+ lsl x12, x0, #1
+ msr csselr_el1, x12 /* select cache level */
+ isb /* sync change of cssidr_el1 */
+ mrs x6, ccsidr_el1 /* read the new cssidr_el1 */
+ and x2, x6, #7 /* x2 <- log2(cache line size)-4 */
+ add x2, x2, #4 /* x2 <- log2(cache line size) */
+ mov x3, #0x3ff
+ and x3, x3, x6, lsr #3 /* x3 <- max number of #ways */
+ clz w5, w3 /* bit position of #ways */
+ mov x4, #0x7fff
+ and x4, x4, x6, lsr #13 /* x4 <- max number of #sets */
+ /* x12 <- cache level << 1 */
+ /* x2 <- line length offset */
+ /* x3 <- number of cache ways - 1 */
+ /* x4 <- number of cache sets - 1 */
+ /* x5 <- bit position of #ways */
+
+loop_set:
+ mov x6, x3 /* x6 <- working copy of #ways */
+loop_way:
+ lsl x7, x6, x5
+ orr x9, x12, x7 /* map way and level to cisw value */
+ lsl x7, x4, x2
+ orr x9, x9, x7 /* map set number to cisw value */
+ tbz w1, #0, 1f
+ dc isw, x9
+ b 2f
+1: dc cisw, x9 /* clean & invalidate by set/way */
+2: subs x6, x6, #1 /* decrement the way */
+ b.ge loop_way
+ subs x4, x4, #1 /* decrement the set */
+ b.ge loop_set
+
+ ret
+
+/*
+ * void __asm_flush_dcache_all(int invalidate_only)
+ *
+ * x0: 0 clean & invalidate, 1 invalidate only
+ *
+ * flush or invalidate all data cache by SET/WAY.
+ */
+.section .text.__asm_dcache_all, "ax", %progbits
+.type __asm_dcache_all, %function
+__asm_dcache_all:
+ mov x1, x0
+ dsb sy
+ mrs x10, clidr_el1 /* read clidr_el1 */
+ lsr x11, x10, #24
+ and x11, x11, #0x7 /* x11 <- loc */
+ cbz x11, finished /* if loc is 0, exit */
+ mov x15, lr
+ mov x0, #0 /* start flush at cache level 0 */
+ /* x0 <- cache level */
+ /* x10 <- clidr_el1 */
+ /* x11 <- loc */
+ /* x15 <- return address */
+
+loop_level:
+ lsl x12, x0, #1
+ add x12, x12, x0 /* x0 <- tripled cache level */
+ lsr x12, x10, x12
+ and x12, x12, #7 /* x12 <- cache type */
+ cmp x12, #2
+ b.lt skip /* skip if no cache or icache */
+ bl __asm_dcache_level /* x1 = 0 flush, 1 invalidate */
+skip:
+ add x0, x0, #1 /* increment cache level */
+ cmp x11, x0
+ b.gt loop_level
+
+ mov x0, #0
+ msr csselr_el1, x0 /* restore csselr_el1 */
+ dsb sy
+ isb
+ mov lr, x15
+
+finished:
+ ret
+
+.section .text.flush_dcache_all, "ax", %progbits
+.type flush_dcache_all, %function
+.global flush_dcache_all
+flush_dcache_all:
+ mov x0, #0
+ b __asm_dcache_all
+
+.section .text.invalidate_dcache_all, "ax", %progbits
+.type invalidate_dcache_all, %function
+.global invalidate_dcache_all
+invalidate_dcache_all:
+ mov x0, #1
+ b __asm_dcache_all
+
+/*
+ * void __asm_flush_dcache_range(start, end) (renamed -> flush_dcache_range)
+ *
+ * clean & invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+.section .text.flush_dcache_range, "ax", %progbits
+.type flush_dcache_range, %function
+.global flush_dcache_range
+flush_dcache_range:
+ mrs x3, ctr_el0
+ lsr x3, x3, #16
+ and x3, x3, #0xf
+ mov x2, #4
+ lsl x2, x2, x3 /* cache line size */
+
+ /* x2 <- minimal cache line size in cache system */
+ sub x3, x2, #1
+ bic x0, x0, x3
+1: dc civac, x0 /* clean & invalidate data or unified cache */
+ add x0, x0, x2
+ cmp x0, x1
+ b.lo 1b
+ dsb sy
+ ret
+
+/*
+ * void __asm_invalidate_dcache_range(start, end) (-> invalidate_dcache_range)
+ *
+ * invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+.section .text.invalidate_dcache_range, "ax", %progbits
+.type invalidate_dcache_range, %function
+.global invalidate_dcache_range
+invalidate_dcache_range:
+ mrs x3, ctr_el0
+ ubfm x3, x3, #16, #19
+ mov x2, #4
+ lsl x2, x2, x3 /* cache line size */
+
+ /* x2 <- minimal cache line size in cache system */
+ sub x3, x2, #1
+ bic x0, x0, x3
+1: dc ivac, x0 /* invalidate data or unified cache */
+ add x0, x0, x2
+ cmp x0, x1
+ b.lo 1b
+ dsb sy
+ ret
+
+/*
+ * void __asm_invalidate_icache_all(void) (-> invalidate_icache_inner_shareable)
+ *
+ * invalidate all icache entries.
+ */
+.section .text.invalidate_icache_all_inner_shareable, "ax", %progbits
+.type invalidate_icache_all_inner_shareable, %function
+.global invalidate_icache_all_inner_shareable
+invalidate_icache_all_inner_shareable:
+ dsb ish
+ isb
+ ic ialluis
+ dsb ish
+ isb
+ ret
+
+.section .text.invalidate_icache_all, "ax", %progbits
+.type invalidate_icache_all, %function
+.global invalidate_icache_all
+invalidate_icache_all:
+ dsb ish
+ isb
+ ic iallu
+ dsb ish
+ isb
+ ret
diff --git a/thermosphere/src/exception_vectors.s b/thermosphere/src/exception_vectors.s
index 767dadbd7..e60b8adc5 100644
--- a/thermosphere/src/exception_vectors.s
+++ b/thermosphere/src/exception_vectors.s
@@ -55,6 +55,13 @@
bl _save_all_regs
.endm
+.macro save_all_regs_reload_x18
+ save_all_regs
+
+ // Reload our x18 value (currentCoreCtx)
+ ldp x18, xzr, [sp, #0x120]
+.endm
+
.macro pivot_stack_for_crash
// Note: reset x18 assumed uncorrupted
// Note: replace sp_el0 with crashing sp
@@ -124,8 +131,6 @@ vector_entry irq_sp0
mov x30, x29
- // Reload our x18 value (currentCoreCtx)
- ldp x18, xzr, [sp, #0x120]
ret
vector_entry fiq_sp0
@@ -194,7 +199,7 @@ vector_entry serror_spx
/* Lower EL, A64 */
vector_entry synch_a64
- save_all_regs
+ save_all_regs_reload_x18
mov x0, sp
mrs x1, esr_el2
diff --git a/thermosphere/src/main.c b/thermosphere/src/main.c
index 35ca98309..cd07962dc 100644
--- a/thermosphere/src/main.c
+++ b/thermosphere/src/main.c
@@ -1,13 +1,19 @@
#include "utils.h"
+#include "core_ctx.h"
#include "log.h"
#include "platform/uart.h"
int main(void)
{
- uartInit(115200);
+ if (currentCoreCtx->coreId == 0) {
+ uartInit(115200);
+ serialLog("Hello from Thermosphere!\n");
+ __builtin_trap();
+ }
+
+ else {
+ serialLog("Core %u booted\n", currentCoreCtx->coreId);
+ }
- serialLog("fifo flush fifo flush\n");
- serialLog("Hello from Thermosphere!\n");
- __builtin_trap();
return 0;
}
diff --git a/thermosphere/src/start.s b/thermosphere/src/start.s
index 1fe41e3e5..d99348671 100644
--- a/thermosphere/src/start.s
+++ b/thermosphere/src/start.s
@@ -40,17 +40,35 @@ _startCommon:
msr daifset, 0b1111
msr spsel, #1
- mrs x20, sctlr_el2
+ // Set VBAR
+ ldr x8, =__vectors_start__
+ msr vbar_el2, x8
+
+ // Set system to sane defaults, aarch64 for el1
+ mov x4, #0x0838
+ movk x4, #0xC5, lsl #16
+ orr x1, x4, #0x30000000
+ mov x2, #(1 << 31)
+ mov x3, #0xFFFFFFFF
+
+ msr sctlr_el2, x1
+ msr hcr_el2, x2
+ msr dacr32_el2, x3
+
+ dsb sy
+ isb
+
+ // Mov x20 (and no other register (?)) with != 0 is needed to unfuck QEMU's JIT
+ mov x20, #0x31
+
// Get core ID
- mrs x20, mpidr_el1
- and x20, x20, #0xFF
+ mrs x10, mpidr_el1
+ and x10, x10, #0xFF
// Set tmp stack
ldr x8, =__stacks_top__
-
- /* lsl x9, x20, #10
- sub x8, x8, x9*/
- mov sp, x8
+ lsl x9, x10, #10
+ sub sp, x8, x9
// Set up x18
adrp x18, g_coreCtxs
@@ -66,26 +84,22 @@ _startCommon:
_store_arg:
str x0, [x18, #0]
- // Set VBAR
- ldr x8, =__vectors_start__
- msr vbar_el2, x8
-
- // Make sure the regs have been set
- dsb sy
- isb
-
// Don't call init array to save space?
// Clear BSS & call main for the first core executing this code
- cbz x20, _jump_to_kernel
+ cbz x20, _jump_to_main
ldr x0, =__bss_start__
mov w1, #0
ldr x2, =__end__
sub x2, x2, x0
bl memset
+ dsb sy
+ isb
+
+_jump_to_main:
+
bl main
-_jump_to_kernel:
// Jump to kernel
mov x8, #(0b1111 << 6 | 0b0101) // EL1h+DAIF
msr spsr_el2, x8