kern: implement revised IPI/SGI semantics

This commit is contained in:
Michael Scire 2022-03-22 23:02:10 -07:00 committed by SciresM
parent 9d89835ff8
commit ff07ba4201
9 changed files with 103 additions and 20 deletions

View file

@ -188,6 +188,7 @@ namespace ams::kern::arch::arm64::cpu {
/* Synchronization helpers. */
NOINLINE void SynchronizeAllCores();
void SynchronizeCores(u64 core_mask);
/* Cache management helpers. */
void StoreCacheForInit(void *addr, size_t size);

View file

@ -21,11 +21,12 @@ namespace ams::kern::arch::arm64 {
enum KInterruptName : s32 {
/* SGIs */
KInterruptName_ThreadTerminate = 4,
KInterruptName_CacheOperation = 5,
KInterruptName_Scheduler = 6,
KInterruptName_ThreadTerminate = 0,
KInterruptName_CacheOperation = 1,
KInterruptName_Scheduler = 2,
KInterruptName_CoreBarrier = 3,
KInterruptName_PerformanceCounter = 8,
KInterruptName_PerformanceCounter = 4,
/* PPIs */
#if defined(ATMOSPHERE_BOARD_NINTENDO_NX)

View file

@ -30,6 +30,7 @@
#endif
//#define MESOSPHERE_BUILD_FOR_TRACING
//#define MESOSPHERE_ENABLE_PERFORMANCE_COUNTER
#define MESOSPHERE_ENABLE_PANIC_REGISTER_DUMP
#define MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP

View file

@ -184,6 +184,7 @@ namespace ams::kern {
svc::SvcAccessFlagSet m_svc_access_flags;
InterruptFlagSet m_irq_access_flags;
u64 m_core_mask;
u64 m_phys_core_mask;
u64 m_priority_mask;
util::BitPack32 m_debug_capabilities;
s32 m_handle_table_size;
@ -227,7 +228,7 @@ namespace ams::kern {
Result SetCapabilities(const u32 *caps, s32 num_caps, KProcessPageTable *page_table);
Result SetCapabilities(svc::KUserPointer<const u32 *> user_caps, s32 num_caps, KProcessPageTable *page_table);
public:
constexpr explicit KCapabilities(util::ConstantInitializeTag) : m_svc_access_flags{}, m_irq_access_flags{}, m_core_mask{}, m_priority_mask{}, m_debug_capabilities{0}, m_handle_table_size{}, m_intended_kernel_version{}, m_program_type{} { /* ... */ }
constexpr explicit KCapabilities(util::ConstantInitializeTag) : m_svc_access_flags{}, m_irq_access_flags{}, m_core_mask{}, m_phys_core_mask{}, m_priority_mask{}, m_debug_capabilities{0}, m_handle_table_size{}, m_intended_kernel_version{}, m_program_type{} { /* ... */ }
KCapabilities() { /* ... */ }
Result Initialize(const u32 *caps, s32 num_caps, KProcessPageTable *page_table);
@ -236,6 +237,7 @@ namespace ams::kern {
static Result CheckCapabilities(svc::KUserPointer<const u32 *> user_caps, s32 num_caps);
constexpr u64 GetCoreMask() const { return m_core_mask; }
constexpr u64 GetPhysicalCoreMask() const { return m_phys_core_mask; }
constexpr u64 GetPriorityMask() const { return m_priority_mask; }
constexpr s32 GetHandleTableSize() const { return m_handle_table_size; }

View file

@ -160,6 +160,7 @@ namespace ams::kern {
constexpr State GetState() const { return m_state; }
constexpr u64 GetCoreMask() const { return m_capabilities.GetCoreMask(); }
constexpr u64 GetPhysicalCoreMask() const { return m_capabilities.GetPhysicalCoreMask(); }
constexpr u64 GetPriorityMask() const { return m_capabilities.GetPriorityMask(); }
constexpr s32 GetIdealCoreId() const { return m_ideal_core_id; }

View file

@ -67,6 +67,16 @@ namespace ams::kern {
return mask;
}();
static constexpr inline u64 ConvertVirtualCoreMaskToPhysical(u64 v_core_mask) {
u64 p_core_mask = 0;
while (v_core_mask != 0) {
const u64 next = __builtin_ctzll(v_core_mask);
v_core_mask &= ~(static_cast<u64>(1) << next);
p_core_mask |= (static_cast<u64>(1) << cpu::VirtualToPhysicalCoreMap[next]);
}
return p_core_mask;
}
}
static_assert(cpu::NumCores <= cpu::NumVirtualCores);

View file

@ -23,6 +23,14 @@ namespace ams::kern::arch::arm64::cpu {
namespace {
ALWAYS_INLINE void SetEventLocally() {
__asm__ __volatile__("sevl" ::: "memory");
}
ALWAYS_INLINE void WaitForEvent() {
__asm__ __volatile__("wfe" ::: "memory");
}
class KScopedCoreMigrationDisable {
public:
ALWAYS_INLINE KScopedCoreMigrationDisable() { GetCurrentThread().DisableCoreMigration(); }
@ -82,6 +90,51 @@ namespace ams::kern::arch::arm64::cpu {
}
};
class KCoreBarrierInterruptHandler : public KInterruptHandler {
private:
util::Atomic<u64> m_target_cores;
KSpinLock m_lock;
public:
constexpr KCoreBarrierInterruptHandler() : KInterruptHandler(), m_target_cores(0), m_lock() { /* ... */ }
virtual KInterruptTask *OnInterrupt(s32 interrupt_id) override {
MESOSPHERE_UNUSED(interrupt_id);
m_target_cores &= ~(1ul << GetCurrentCoreId());
return nullptr;
}
void SynchronizeCores(u64 core_mask) {
/* Disable dispatch while we synchronize. */
KScopedDisableDispatch dd;
/* Acquire exclusive access to ourselves. */
KScopedSpinLock lk(m_lock);
/* If necessary, force synchronization with other cores. */
if (const u64 other_cores_mask = core_mask & ~(1ul << GetCurrentCoreId()); other_cores_mask != 0) {
/* Send an interrupt to the other cores. */
m_target_cores = other_cores_mask;
cpu::DataSynchronizationBarrierInnerShareable();
Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_CoreBarrier, other_cores_mask);
/* Wait for all cores to acknowledge. */
{
u64 v;
__asm__ __volatile__("ldaxr %[v], %[p]\n"
"cbz %[v], 1f\n"
"0:\n"
"wfe\n"
"ldaxr %[v], %[p]\n"
"cbnz %[v], 0b\n"
"1:\n"
: [v]"=&r"(v)
: [p]"Q"(*reinterpret_cast<u64 *>(std::addressof(m_target_cores)))
: "memory");
}
}
}
};
class KCacheHelperInterruptHandler : public KInterruptHandler {
private:
static constexpr s32 ThreadPriority = 8;
@ -215,7 +268,11 @@ namespace ams::kern::arch::arm64::cpu {
/* Instances of the interrupt handlers. */
constinit KThreadTerminationInterruptHandler g_thread_termination_handler;
constinit KCacheHelperInterruptHandler g_cache_operation_handler;
constinit KCoreBarrierInterruptHandler g_core_barrier_handler;
#if defined(MESOSPHERE_ENABLE_PERFORMANCE_COUNTER)
constinit KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores];
#endif
/* Expose this as a global, for asm to use. */
constinit s32 g_all_core_sync_count;
@ -296,14 +353,6 @@ namespace ams::kern::arch::arm64::cpu {
}
}
ALWAYS_INLINE void SetEventLocally() {
__asm__ __volatile__("sevl" ::: "memory");
}
ALWAYS_INLINE void WaitForEvent() {
__asm__ __volatile__("wfe" ::: "memory");
}
ALWAYS_INLINE Result InvalidateDataCacheRange(uintptr_t start, uintptr_t end) {
MESOSPHERE_ASSERT(util::IsAligned(start, DataCacheLineSize));
MESOSPHERE_ASSERT(util::IsAligned(end, DataCacheLineSize));
@ -338,6 +387,11 @@ namespace ams::kern::arch::arm64::cpu {
}
void SynchronizeCores(u64 core_mask) {
/* Request a core barrier interrupt. */
g_core_barrier_handler.SynchronizeCores(core_mask);
}
void StoreCacheForInit(void *addr, size_t size) {
/* Store the data cache for the specified range. */
const uintptr_t start = util::AlignDown(reinterpret_cast<uintptr_t>(addr), DataCacheLineSize);
@ -446,9 +500,15 @@ namespace ams::kern::arch::arm64::cpu {
/* Bind all handlers to the relevant interrupts. */
Kernel::GetInterruptManager().BindHandler(std::addressof(g_cache_operation_handler), KInterruptName_CacheOperation, core_id, KInterruptController::PriorityLevel_High, false, false);
Kernel::GetInterruptManager().BindHandler(std::addressof(g_thread_termination_handler), KInterruptName_ThreadTerminate, core_id, KInterruptController::PriorityLevel_Scheduler, false, false);
Kernel::GetInterruptManager().BindHandler(std::addressof(g_core_barrier_handler), KInterruptName_CoreBarrier, core_id, KInterruptController::PriorityLevel_Scheduler, false, false);
/* If we should, enable user access to the performance counter registers. */
if (KTargetSystem::IsUserPmuAccessEnabled()) { SetPmUserEnrEl0(1ul); }
/* If we should, enable the kernel performance counter interrupt handler. */
#if defined(MESOSPHERE_ENABLE_PERFORMANCE_COUNTER)
Kernel::GetInterruptManager().BindHandler(std::addressof(g_performance_counter_handler[core_id]), KInterruptName_PerformanceCounter, core_id, KInterruptController::PriorityLevel_Timer, false, false);
#endif
}
void SynchronizeAllCores() {

View file

@ -27,7 +27,11 @@ namespace ams::kern {
m_program_type = 0;
/* Initial processes may run on all cores. */
m_core_mask = cpu::VirtualCoreMask;
constexpr u64 VirtMask = cpu::VirtualCoreMask;
constexpr u64 PhysMask = cpu::ConvertVirtualCoreMaskToPhysical(VirtMask);
m_core_mask = VirtMask;
m_phys_core_mask = PhysMask;
/* Initial processes may use any user priority they like. */
m_priority_mask = ~0xFul;
@ -60,7 +64,7 @@ namespace ams::kern {
Result KCapabilities::SetCorePriorityCapability(const util::BitPack32 cap) {
/* We can't set core/priority if we've already set them. */
R_UNLESS(m_core_mask == 0, svc::ResultInvalidArgument());
R_UNLESS(m_core_mask == 0, svc::ResultInvalidArgument());
R_UNLESS(m_priority_mask == 0, svc::ResultInvalidArgument());
/* Validate the core/priority. */
@ -81,6 +85,9 @@ namespace ams::kern {
}
MESOSPHERE_ASSERT((m_core_mask & cpu::VirtualCoreMask) == m_core_mask);
/* Set physical core mask. */
m_phys_core_mask = cpu::ConvertVirtualCoreMaskToPhysical(m_core_mask);
/* Set priority mask. */
for (auto prio = min_prio; prio <= max_prio; prio++) {
m_priority_mask |= (1ul << prio);

View file

@ -481,12 +481,16 @@ namespace ams::kern {
/* Ensure that the thread is not executing on any core. */
if (m_parent != nullptr) {
/* Wait for the thread to not be current on any core. */
for (size_t i = 0; i < cpu::NumCores; ++i) {
KThread *core_thread;
do {
core_thread = Kernel::GetScheduler(i).GetSchedulerCurrentThread();
} while (core_thread == this);
}
/* Ensure that all cores are synchronized at this point. */
cpu::SynchronizeCores(m_parent->GetPhysicalCoreMask());
}
/* Close the thread. */
@ -724,11 +728,7 @@ namespace ams::kern {
}
/* Translate the virtual affinity mask to a physical one. */
while (v_affinity_mask != 0) {
const u64 next = __builtin_ctzll(v_affinity_mask);
v_affinity_mask &= ~(1ul << next);
p_affinity_mask |= (1ul << cpu::VirtualToPhysicalCoreMap[next]);
}
p_affinity_mask = cpu::ConvertVirtualCoreMaskToPhysical(v_affinity_mask);
/* If we haven't disabled migration, perform an affinity change. */
if (m_num_core_migration_disables == 0) {