cpu: optimize core barrier

This commit is contained in:
Michael Scire 2020-01-29 04:36:18 -08:00
parent 7820e5b759
commit 507ab46709
2 changed files with 78 additions and 14 deletions

View file

@ -17,9 +17,14 @@
namespace ams::kern::arm64::cpu {
/* Declare prototype to be implemented in asm. */
void SynchronizeAllCoresImpl(s32 *sync_var, s32 num_cores);
namespace {
std::atomic<s32> g_all_core_sync_count;
/* Expose this as a global, for asm to use. */
s32 g_all_core_sync_count;
void FlushEntireDataCacheImpl(int level) {
/* Used in multiple locations. */
@ -48,6 +53,14 @@ namespace ams::kern::arm64::cpu {
}
}
ALWAYS_INLINE void SetEventLocally() {
__asm__ __volatile__("sevl" ::: "memory");
}
ALWAYS_INLINE void WaitForEvent() {
__asm__ __volatile__("wfe" ::: "memory");
}
}
void FlushEntireDataCacheShared() {
@ -70,19 +83,7 @@ namespace ams::kern::arm64::cpu {
}
NOINLINE void SynchronizeAllCores() {
/* Wait until the count can be read. */
while (!(g_all_core_sync_count < static_cast<s32>(cpu::NumCores))) { /* ... */ }
const s32 per_core_idx = g_all_core_sync_count.fetch_add(1);
/* Loop until it's our turn. This will act on each core in order. */
while (g_all_core_sync_count != per_core_idx + static_cast<s32>(cpu::NumCores)) { /* ... */ }
if (g_all_core_sync_count != 2 * static_cast<s32>(cpu::NumCores) - 1) {
g_all_core_sync_count++;
} else {
g_all_core_sync_count = 0;
}
SynchronizeAllCoresImpl(&g_all_core_sync_count, static_cast<s32>(cpu::NumCores));
}
}

View file

@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* ams::kern::arm64::cpu::SynchronizeAllCoresImpl(int *sync_var, int num_cores) */
.section .text._ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii, "ax", %progbits
.global _ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii
.type _ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii, %function
_ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii:
/* Loop until the sync var is less than num cores. */
sevl
1:
wfe
ldaxr w2, [x0]
cmp w2, w1
b.gt 1b
/* Increment the sync var. */
2:
ldaxr w2, [x0]
add w3, w2, #1
stlxr w4, w3, [x0]
cbnz w4, 2b
/* Loop until the sync var matches our ticket. */
add w3, w2, w1
sevl
3:
wfe
ldaxr w2, [x0]
cmp w2, w3
b.ne 3b
/* Check if the ticket is the last. */
sub w2, w1, #1
add w2, w2, w1
cmp w3, w2
b.eq 5f
/* Our ticket is not the last one. Increment. */
4:
ldaxr w2, [x0]
add w3, w2, #1
stlxr w4, w3, [x0]
cbnz w4, 4b
ret
/* Our ticket is the last one. */
5:
stlr wzr, [x0]
ret