From 507ab467097a64a87daa4a3e68075ed9347e2cb2 Mon Sep 17 00:00:00 2001 From: Michael Scire Date: Wed, 29 Jan 2020 04:36:18 -0800 Subject: [PATCH] cpu: optimize core barrier --- .../source/arch/arm64/kern_cpu.cpp | 29 ++++----- .../source/arch/arm64/kern_cpu_asm.s | 63 +++++++++++++++++++ 2 files changed, 78 insertions(+), 14 deletions(-) create mode 100644 libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s diff --git a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp index 43748f070..ec4097944 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp @@ -17,9 +17,14 @@ namespace ams::kern::arm64::cpu { + /* Declare prototype to be implemented in asm. */ + void SynchronizeAllCoresImpl(s32 *sync_var, s32 num_cores); + + namespace { - std::atomic g_all_core_sync_count; + /* Expose this as a global, for asm to use. */ + s32 g_all_core_sync_count; void FlushEntireDataCacheImpl(int level) { /* Used in multiple locations. */ @@ -48,6 +53,14 @@ namespace ams::kern::arm64::cpu { } } + ALWAYS_INLINE void SetEventLocally() { + __asm__ __volatile__("sevl" ::: "memory"); + } + + ALWAYS_INLINE void WaitForEvent() { + __asm__ __volatile__("wfe" ::: "memory"); + } + } void FlushEntireDataCacheShared() { @@ -70,19 +83,7 @@ namespace ams::kern::arm64::cpu { } NOINLINE void SynchronizeAllCores() { - /* Wait until the count can be read. */ - while (!(g_all_core_sync_count < static_cast(cpu::NumCores))) { /* ... */ } - - const s32 per_core_idx = g_all_core_sync_count.fetch_add(1); - - /* Loop until it's our turn. This will act on each core in order. */ - while (g_all_core_sync_count != per_core_idx + static_cast(cpu::NumCores)) { /* ... */ } - - if (g_all_core_sync_count != 2 * static_cast(cpu::NumCores) - 1) { - g_all_core_sync_count++; - } else { - g_all_core_sync_count = 0; - } + SynchronizeAllCoresImpl(&g_all_core_sync_count, static_cast(cpu::NumCores)); } } diff --git a/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s b/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s new file mode 100644 index 000000000..fbc000e8b --- /dev/null +++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* ams::kern::arm64::cpu::SynchronizeAllCoresImpl(int *sync_var, int num_cores) */ +.section .text._ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii, "ax", %progbits +.global _ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii +.type _ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii, %function +_ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii: + /* Loop until the sync var is less than num cores. */ + sevl +1: + wfe + ldaxr w2, [x0] + cmp w2, w1 + b.gt 1b + + /* Increment the sync var. */ +2: + ldaxr w2, [x0] + add w3, w2, #1 + stlxr w4, w3, [x0] + cbnz w4, 2b + + /* Loop until the sync var matches our ticket. */ + add w3, w2, w1 + sevl +3: + wfe + ldaxr w2, [x0] + cmp w2, w3 + b.ne 3b + + /* Check if the ticket is the last. */ + sub w2, w1, #1 + add w2, w2, w1 + cmp w3, w2 + b.eq 5f + + /* Our ticket is not the last one. Increment. */ +4: + ldaxr w2, [x0] + add w3, w2, #1 + stlxr w4, w3, [x0] + cbnz w4, 4b + ret + + /* Our ticket is the last one. */ +5: + stlr wzr, [x0] + ret