kern: implement revised IPI/SGI semantics

2024-12-22 20:31:14 +00:00 · 2022-03-22 23:02:10 -07:00 · 2022-03-22 23:02:10 -07:00 · ff07ba4201
commit ff07ba4201
parent 9d89835ff8
9 changed files with 103 additions and 20 deletions
--- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp
+++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp
@ -188,6 +188,7 @@ namespace ams::kern::arch::arm64::cpu {

    /* Synchronization helpers. */
    NOINLINE void SynchronizeAllCores();
+    void SynchronizeCores(u64 core_mask);

    /* Cache management helpers. */
    void StoreCacheForInit(void *addr, size_t size);
--- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp
+++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp
@ -21,11 +21,12 @@ namespace ams::kern::arch::arm64 {

        enum KInterruptName : s32 {
            /* SGIs */
-            KInterruptName_ThreadTerminate        = 4,
-            KInterruptName_CacheOperation         = 5,
-            KInterruptName_Scheduler              = 6,
+            KInterruptName_ThreadTerminate        = 0,
+            KInterruptName_CacheOperation         = 1,
+            KInterruptName_Scheduler              = 2,
+            KInterruptName_CoreBarrier            = 3,

-            KInterruptName_PerformanceCounter     = 8,
+            KInterruptName_PerformanceCounter     = 4,

            /* PPIs */
    #if defined(ATMOSPHERE_BOARD_NINTENDO_NX)
--- a/libraries/libmesosphere/include/mesosphere/kern_build_config.hpp
+++ b/libraries/libmesosphere/include/mesosphere/kern_build_config.hpp
@ -30,6 +30,7 @@
 #endif

 //#define MESOSPHERE_BUILD_FOR_TRACING
+//#define MESOSPHERE_ENABLE_PERFORMANCE_COUNTER
 #define MESOSPHERE_ENABLE_PANIC_REGISTER_DUMP
 #define MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP

--- a/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp
+++ b/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp
@ -184,6 +184,7 @@ namespace ams::kern {
            svc::SvcAccessFlagSet m_svc_access_flags;
            InterruptFlagSet m_irq_access_flags;
            u64 m_core_mask;
+            u64 m_phys_core_mask;
            u64 m_priority_mask;
            util::BitPack32 m_debug_capabilities;
            s32 m_handle_table_size;
@ -227,7 +228,7 @@ namespace ams::kern {
            Result SetCapabilities(const u32 *caps, s32 num_caps, KProcessPageTable *page_table);
            Result SetCapabilities(svc::KUserPointer<const u32 *> user_caps, s32 num_caps, KProcessPageTable *page_table);
        public:
-            constexpr explicit KCapabilities(util::ConstantInitializeTag) : m_svc_access_flags{}, m_irq_access_flags{}, m_core_mask{}, m_priority_mask{}, m_debug_capabilities{0}, m_handle_table_size{}, m_intended_kernel_version{}, m_program_type{} { /* ... */ }
+            constexpr explicit KCapabilities(util::ConstantInitializeTag) : m_svc_access_flags{}, m_irq_access_flags{}, m_core_mask{}, m_phys_core_mask{}, m_priority_mask{}, m_debug_capabilities{0}, m_handle_table_size{}, m_intended_kernel_version{}, m_program_type{} { /* ... */ }
            KCapabilities() { /* ... */ }

            Result Initialize(const u32 *caps, s32 num_caps, KProcessPageTable *page_table);
@ -236,6 +237,7 @@ namespace ams::kern {
            static Result CheckCapabilities(svc::KUserPointer<const u32 *> user_caps, s32 num_caps);

            constexpr u64 GetCoreMask() const { return m_core_mask; }
+            constexpr u64 GetPhysicalCoreMask() const { return m_phys_core_mask; }
            constexpr u64 GetPriorityMask() const { return m_priority_mask; }
            constexpr s32 GetHandleTableSize() const { return m_handle_table_size; }

--- a/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp
+++ b/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp
@ -160,6 +160,7 @@ namespace ams::kern {
            constexpr State GetState() const { return m_state; }

            constexpr u64 GetCoreMask() const { return m_capabilities.GetCoreMask(); }
+            constexpr u64 GetPhysicalCoreMask() const { return m_capabilities.GetPhysicalCoreMask(); }
            constexpr u64 GetPriorityMask() const { return m_capabilities.GetPriorityMask(); }

            constexpr s32 GetIdealCoreId() const { return m_ideal_core_id; }
--- a/libraries/libmesosphere/include/mesosphere/kern_select_cpu.hpp
+++ b/libraries/libmesosphere/include/mesosphere/kern_select_cpu.hpp
@ -67,6 +67,16 @@ namespace ams::kern {
            return mask;
        }();

+        static constexpr inline u64 ConvertVirtualCoreMaskToPhysical(u64 v_core_mask) {
+            u64 p_core_mask = 0;
+            while (v_core_mask != 0) {
+                const u64 next = __builtin_ctzll(v_core_mask);
+                v_core_mask &= ~(static_cast<u64>(1) << next);
+                p_core_mask |=  (static_cast<u64>(1) << cpu::VirtualToPhysicalCoreMap[next]);
+            }
+            return p_core_mask;
+        }
+
    }

    static_assert(cpu::NumCores <= cpu::NumVirtualCores);
--- a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp
+++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp
@ -23,6 +23,14 @@ namespace ams::kern::arch::arm64::cpu {

    namespace {

+        ALWAYS_INLINE void SetEventLocally() {
+            __asm__ __volatile__("sevl" ::: "memory");
+        }
+
+        ALWAYS_INLINE void WaitForEvent() {
+            __asm__ __volatile__("wfe" ::: "memory");
+        }
+
        class KScopedCoreMigrationDisable {
            public:
                ALWAYS_INLINE KScopedCoreMigrationDisable() { GetCurrentThread().DisableCoreMigration(); }
@ -82,6 +90,51 @@ namespace ams::kern::arch::arm64::cpu {
                }
        };

+        class KCoreBarrierInterruptHandler : public KInterruptHandler {
+            private:
+                util::Atomic<u64> m_target_cores;
+                KSpinLock m_lock;
+            public:
+                constexpr KCoreBarrierInterruptHandler() : KInterruptHandler(), m_target_cores(0), m_lock() { /* ... */ }
+
+                virtual KInterruptTask *OnInterrupt(s32 interrupt_id) override {
+                    MESOSPHERE_UNUSED(interrupt_id);
+                    m_target_cores &= ~(1ul << GetCurrentCoreId());
+                    return nullptr;
+                }
+
+                void SynchronizeCores(u64 core_mask) {
+                    /* Disable dispatch while we synchronize. */
+                    KScopedDisableDispatch dd;
+
+                    /* Acquire exclusive access to ourselves. */
+                    KScopedSpinLock lk(m_lock);
+
+                    /* If necessary, force synchronization with other cores. */
+                    if (const u64 other_cores_mask = core_mask & ~(1ul << GetCurrentCoreId()); other_cores_mask != 0) {
+                        /* Send an interrupt to the other cores. */
+                        m_target_cores = other_cores_mask;
+                        cpu::DataSynchronizationBarrierInnerShareable();
+                        Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_CoreBarrier, other_cores_mask);
+
+                        /* Wait for all cores to acknowledge. */
+                        {
+                            u64 v;
+                            __asm__ __volatile__("ldaxr %[v], %[p]\n"
+                                                 "cbz %[v], 1f\n"
+                                                 "0:\n"
+                                                 "wfe\n"
+                                                 "ldaxr %[v], %[p]\n"
+                                                 "cbnz %[v], 0b\n"
+                                                 "1:\n"
+                                                 : [v]"=&r"(v)
+                                                 : [p]"Q"(*reinterpret_cast<u64 *>(std::addressof(m_target_cores)))
+                                                 : "memory");
+                        }
+                    }
+                }
+        };
+
        class KCacheHelperInterruptHandler : public KInterruptHandler {
            private:
                static constexpr s32 ThreadPriority = 8;
@ -215,7 +268,11 @@ namespace ams::kern::arch::arm64::cpu {
        /* Instances of the interrupt handlers. */
        constinit KThreadTerminationInterruptHandler  g_thread_termination_handler;
        constinit KCacheHelperInterruptHandler        g_cache_operation_handler;
+        constinit KCoreBarrierInterruptHandler        g_core_barrier_handler;
+
+        #if defined(MESOSPHERE_ENABLE_PERFORMANCE_COUNTER)
        constinit KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores];
+        #endif

        /* Expose this as a global, for asm to use. */
        constinit s32 g_all_core_sync_count;
@ -296,14 +353,6 @@ namespace ams::kern::arch::arm64::cpu {
            }
        }

-        ALWAYS_INLINE void SetEventLocally() {
-            __asm__ __volatile__("sevl" ::: "memory");
-        }
-
-        ALWAYS_INLINE void WaitForEvent() {
-            __asm__ __volatile__("wfe" ::: "memory");
-        }
-
        ALWAYS_INLINE Result InvalidateDataCacheRange(uintptr_t start, uintptr_t end) {
            MESOSPHERE_ASSERT(util::IsAligned(start, DataCacheLineSize));
            MESOSPHERE_ASSERT(util::IsAligned(end,   DataCacheLineSize));
@ -338,6 +387,11 @@ namespace ams::kern::arch::arm64::cpu {

    }

+    void SynchronizeCores(u64 core_mask) {
+        /* Request a core barrier interrupt. */
+        g_core_barrier_handler.SynchronizeCores(core_mask);
+    }
+
    void StoreCacheForInit(void *addr, size_t size) {
        /* Store the data cache for the specified range. */
        const uintptr_t start = util::AlignDown(reinterpret_cast<uintptr_t>(addr), DataCacheLineSize);
@ -446,9 +500,15 @@ namespace ams::kern::arch::arm64::cpu {
        /* Bind all handlers to the relevant interrupts. */
        Kernel::GetInterruptManager().BindHandler(std::addressof(g_cache_operation_handler),              KInterruptName_CacheOperation,     core_id, KInterruptController::PriorityLevel_High,      false, false);
        Kernel::GetInterruptManager().BindHandler(std::addressof(g_thread_termination_handler),           KInterruptName_ThreadTerminate,    core_id, KInterruptController::PriorityLevel_Scheduler, false, false);
+        Kernel::GetInterruptManager().BindHandler(std::addressof(g_core_barrier_handler),                 KInterruptName_CoreBarrier,        core_id, KInterruptController::PriorityLevel_Scheduler, false, false);

+        /* If we should, enable user access to the performance counter registers. */
        if (KTargetSystem::IsUserPmuAccessEnabled()) { SetPmUserEnrEl0(1ul); }
+
+        /* If we should, enable the kernel performance counter interrupt handler. */
+        #if defined(MESOSPHERE_ENABLE_PERFORMANCE_COUNTER)
        Kernel::GetInterruptManager().BindHandler(std::addressof(g_performance_counter_handler[core_id]), KInterruptName_PerformanceCounter, core_id, KInterruptController::PriorityLevel_Timer,     false, false);
+        #endif
    }

    void SynchronizeAllCores() {
--- a/libraries/libmesosphere/source/kern_k_capabilities.cpp
+++ b/libraries/libmesosphere/source/kern_k_capabilities.cpp
@ -27,7 +27,11 @@ namespace ams::kern {
        m_program_type            = 0;

        /* Initial processes may run on all cores. */
-        m_core_mask = cpu::VirtualCoreMask;
+        constexpr u64 VirtMask = cpu::VirtualCoreMask;
+        constexpr u64 PhysMask = cpu::ConvertVirtualCoreMaskToPhysical(VirtMask);
+
+        m_core_mask      = VirtMask;
+        m_phys_core_mask = PhysMask;

        /* Initial processes may use any user priority they like. */
        m_priority_mask = ~0xFul;
@ -60,7 +64,7 @@ namespace ams::kern {

    Result KCapabilities::SetCorePriorityCapability(const util::BitPack32 cap) {
        /* We can't set core/priority if we've already set them. */
-        R_UNLESS(m_core_mask    == 0,  svc::ResultInvalidArgument());
+        R_UNLESS(m_core_mask     == 0, svc::ResultInvalidArgument());
        R_UNLESS(m_priority_mask == 0, svc::ResultInvalidArgument());

        /* Validate the core/priority. */
@ -81,6 +85,9 @@ namespace ams::kern {
        }
        MESOSPHERE_ASSERT((m_core_mask & cpu::VirtualCoreMask) == m_core_mask);

+        /* Set physical core mask. */
+        m_phys_core_mask = cpu::ConvertVirtualCoreMaskToPhysical(m_core_mask);
+
        /* Set priority mask. */
        for (auto prio = min_prio; prio <= max_prio; prio++) {
            m_priority_mask |= (1ul << prio);
--- a/libraries/libmesosphere/source/kern_k_thread.cpp
+++ b/libraries/libmesosphere/source/kern_k_thread.cpp
@ -481,12 +481,16 @@ namespace ams::kern {

        /* Ensure that the thread is not executing on any core. */
        if (m_parent != nullptr) {
+            /* Wait for the thread to not be current on any core. */
            for (size_t i = 0; i < cpu::NumCores; ++i) {
                KThread *core_thread;
                do {
                    core_thread = Kernel::GetScheduler(i).GetSchedulerCurrentThread();
                } while (core_thread == this);
            }
+
+            /* Ensure that all cores are synchronized at this point. */
+            cpu::SynchronizeCores(m_parent->GetPhysicalCoreMask());
        }

        /* Close the thread. */
@ -724,11 +728,7 @@ namespace ams::kern {
            }

            /* Translate the virtual affinity mask to a physical one. */
-            while (v_affinity_mask != 0) {
-                const u64 next = __builtin_ctzll(v_affinity_mask);
-                v_affinity_mask &= ~(1ul << next);
-                p_affinity_mask |=  (1ul << cpu::VirtualToPhysicalCoreMap[next]);
-            }
+            p_affinity_mask = cpu::ConvertVirtualCoreMaskToPhysical(v_affinity_mask);

            /* If we haven't disabled migration, perform an affinity change. */
            if (m_num_core_migration_disables == 0) {