fs: implement PooledBuffer

2025-01-05 11:58:00 +00:00 · 2020-04-06 03:15:24 -07:00 · 2020-04-06 03:15:24 -07:00 · 496be5ecd4
commit 496be5ecd4
parent 50a91b1d6e
5 changed files with 521 additions and 6 deletions
--- a/libraries/libstratosphere/include/stratosphere/fssystem.hpp
+++ b/libraries/libstratosphere/include/stratosphere/fssystem.hpp
@ -26,3 +26,4 @@
 #include <stratosphere/fssystem/fssystem_romfs_file_system.hpp>
 #include <stratosphere/fssystem/buffers/fssystem_buffer_manager_utils.hpp>
 #include <stratosphere/fssystem/buffers/fssystem_file_system_buddy_heap.hpp>
 #include <stratosphere/fssystem/fssystem_pooled_buffer.hpp>
--- a/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp
+++ b/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp
@ -0,0 +1,108 @@
 /*
 * Copyright (c) 2018-2020 Atmosphère-NX
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #pragma once
 #include <vapours.hpp>
 #include <stratosphere/fs/impl/fs_newable.hpp>
 namespace ams::fssystem {
    constexpr inline size_t BufferPoolAlignment = 4_KB;
    constexpr inline size_t BufferPoolWorkSize  = 320;
    class PooledBuffer {
        NON_COPYABLE(PooledBuffer);
        private:
            char *buffer;
            size_t size;
        private:
            static size_t GetAllocatableSizeMaxCore(bool large);
        public:
            static size_t GetAllocatableSizeMax() { return GetAllocatableSizeMaxCore(false); }
            static size_t GetAllocatableParticularlyLargeSizeMax() { return GetAllocatableSizeMaxCore(true); }
        private:
            void Swap(PooledBuffer &rhs) {
                std::swap(this->buffer, rhs.buffer);
                std::swap(this->size, rhs.size);
            }
        public:
            /* Constructor/Destructor. */
            constexpr PooledBuffer() : buffer(), size() { /* ... */ }
            PooledBuffer(size_t ideal_size, size_t required_size) : buffer(), size() {
                this->Allocate(ideal_size, required_size);
            }
            ~PooledBuffer() {
                this->Deallocate();
            }
            /* Move and assignment. */
            explicit PooledBuffer(PooledBuffer &&rhs) : buffer(rhs.buffer), size(rhs.size) {
                rhs.buffer = nullptr;
                rhs.size   = 0;
            }
            PooledBuffer &operator=(PooledBuffer &&rhs) {
                PooledBuffer(std::move(rhs)).Swap(*this);
                return *this;
            }
            /* Allocation API. */
            void Allocate(size_t ideal_size, size_t required_size) {
                return this->AllocateCore(ideal_size, required_size, false);
            }
            void AllocateParticularlyLarge(size_t ideal_size, size_t required_size) {
                return this->AllocateCore(ideal_size, required_size, true);
            }
            void Shrink(size_t ideal_size);
            void Deallocate() {
                /* Shrink the buffer to empty. */
                this->Shrink(0);
                AMS_ASSERT(this->buffer == nullptr);
            }
            char *GetBuffer() const {
                AMS_ASSERT(this->buffer != nullptr);
                return this->buffer;
            }
            size_t GetSize() const {
                AMS_ASSERT(this->buffer != nullptr);
                return this->size;
            }
        private:
            void AllocateCore(size_t ideal_size, size_t required_size, bool large);
    };
    Result InitializeBufferPool(char *buffer, size_t size);
    Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size);
    bool IsPooledBuffer(const void *buffer);
    size_t GetPooledBufferRetriedCount();
    size_t GetPooledBufferReduceAllocationCount();
    size_t GetPooledBufferFreeSizePeak();
    void ClearPooledBufferPeak();
    void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size);
    void UnregisterAdditionalDeviceAddress(uintptr_t address);
    bool IsAdditionalDeviceAddress(const void *ptr);
 }
--- a/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp
+++ b/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp
@ -0,0 +1,272 @@
 /*
 * Copyright (c) 2018-2020 Atmosphère-NX
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include <stratosphere.hpp>
 namespace ams::fssystem {
    namespace {
        class AdditionalDeviceAddressEntry {
            private:
                /* TODO: SdkMutex */
                os::Mutex mutex;
                bool is_registered;
                uintptr_t address;
                size_t size;
            public:
                constexpr AdditionalDeviceAddressEntry() : mutex(), is_registered(), address(), size() { /* ... */ }
                void Register(uintptr_t addr, size_t sz) {
                    std::scoped_lock lk(this->mutex);
                    AMS_ASSERT(!this->is_registered);
                    if (!this->is_registered) {
                        this->is_registered = true;
                        this->address       = addr;
                        this->size          = size;
                    }
                }
                void Unregister(uintptr_t addr) {
                    std::scoped_lock lk(this->mutex);
                    if (this->is_registered && this->address == addr) {
                        this->is_registered = false;
                        this->address       = 0;
                        this->size          = 0;
                    }
                }
                bool Includes(const void *ptr) {
                    std::scoped_lock lk(this->mutex);
                    if (this->is_registered) {
                        const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
                        return this->address <= addr && addr < this->address + this->size;
                    } else {
                        return false;
                    }
                }
        };
        constexpr auto RetryWait = TimeSpan::FromMilliSeconds(10);
        constexpr size_t HeapBlockSize = BufferPoolAlignment;
        static_assert(HeapBlockSize == 4_KB);
        /* A heap block is 4KB. An order is a power of two. */
        /* This gives blocks of the order 32KB, 512KB, 4MB. */
        constexpr s32    HeapOrderTrim        = 3;
        constexpr s32    HeapOrderMax         = 7;
        constexpr s32    HeapOrderMaxForLarge = HeapOrderMax + 3;
        constexpr size_t HeapAllocatableSizeTrim        = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderTrim);
        constexpr size_t HeapAllocatableSizeMax         = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMax);
        constexpr size_t HeapAllocatableSizeMaxForLarge = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMaxForLarge);
        /* TODO: SdkMutex */
        os::Mutex g_heap_mutex;
        FileSystemBuddyHeap g_heap;
        std::atomic<size_t> g_retry_count;
        std::atomic<size_t> g_reduce_allocation_count;
        void *g_heap_buffer;
        size_t g_heap_size;
        size_t g_heap_free_size_peak;
        AdditionalDeviceAddressEntry g_additional_device_address_entry;
    }
    size_t PooledBuffer::GetAllocatableSizeMaxCore(bool large) {
        return large ? HeapAllocatableSizeMaxForLarge : HeapAllocatableSizeMax;
    }
    void PooledBuffer::AllocateCore(size_t ideal_size, size_t required_size, bool large) {
        /* Ensure preconditions. */
        AMS_ASSERT(g_heap_buffer != nullptr);
        AMS_ASSERT(this->buffer == nullptr);
        AMS_ASSERT(g_heap.GetBlockSize() == HeapBlockSize);
        /* Check that we can allocate this size. */
        AMS_ASSERT(required_size <= GetAllocatableSizeMaxCore(large));
        const size_t target_size = std::min(std::max(ideal_size, required_size), GetAllocatableSizeMaxCore(large));
        /* Loop until we allocate. */
        while (true) {
            /* Lock the heap and try to allocate. */
            {
                std::scoped_lock lk(g_heap_mutex);
                /* Determine how much we can allocate, and don't allocate more than half the heap. */
                size_t allocatable_size = g_heap.GetAllocatableSizeMax();
                if (allocatable_size > HeapBlockSize) {
                    allocatable_size >>= 1;
                }
                /* Check if this allocation is acceptable. */
                if (allocatable_size >= required_size) {
                    /* Get the order. */
                    const auto order = g_heap.GetOrderFromBytes(std::min(target_size, allocatable_size));
                    /* Allocate and get the size. */
                    this->buffer = reinterpret_cast<char *>(g_heap.AllocateByOrder(order));
                    this->size   = g_heap.GetBytesFromOrder(order);
                }
            }
            /* Check if we allocated. */
            if (this->buffer != nullptr) {
                /* If we need to trim the end, do so. */
                if (this->GetSize() >= target_size + HeapAllocatableSizeTrim) {
                    this->Shrink(util::AlignUp(target_size, HeapAllocatableSizeTrim));
                }
                AMS_ASSERT(this->GetSize() >= required_size);
                /* If we reduced, note so. */
                if (this->GetSize() < std::min(target_size, HeapAllocatableSizeMax)) {
                    g_reduce_allocation_count++;
                }
                break;
            } else {
                /* Sleep. */
                /* TODO: os::SleepThread() */
                svc::SleepThread(RetryWait.GetNanoSeconds());
                g_retry_count++;
            }
        }
        /* Update metrics. */
        {
            std::scoped_lock lk(g_heap_mutex);
            const size_t free_size = g_heap.GetTotalFreeSize();
            if (free_size < g_heap_free_size_peak) {
                g_heap_free_size_peak = free_size;
            }
        }
    }
    void PooledBuffer::Shrink(size_t ideal_size) {
        AMS_ASSERT(ideal_size <= GetAllocatableSizeMaxCore(true));
        /* Check if we actually need to shrink. */
        if (this->size > ideal_size) {
            /* If we do, we need to have a buffer allocated from the heap. */
            AMS_ASSERT(this->buffer != nullptr);
            AMS_ASSERT(g_heap.GetBlockSize(), HeapBlockSize);
            const size_t new_size = util::AlignUp(ideal_size, HeapBlockSize);
            /* Repeatedly free the tail of our buffer until we're done. */
            {
                std::scoped_lock lk(g_heap_mutex);
                while (new_size < this->size) {
                    /* Determine the size and order to free. */
                    const size_t tail_align = util::LeastSignificantOneBit(this->size);
                    const size_t free_size  = std::min(util::FloorPowerOfTwo(this->size - new_size), tail_align);
                    const s32 free_order    = g_heap.GetOrderFromBytes(free_size);
                    /* Ensure we determined size correctly. */
                    AMS_ASSERT(util::IsAligned(free_size, HeapBlockSize));
                    AMS_ASSERT(free_size == g_heap.GetBytesFromOrder(free_order));
                    /* Actually free the memory. */
                    g_heap.Free(this->buffer + this->size - free_size, free_order);
                    this->size -= free_size;
                }
            }
            /* Shrinking to zero means that we have no buffer. */
            if (this->size == 0) {
                this->buffer = nullptr;
            }
        }
    }
    Result InitializeBufferPool(char *buffer, size_t size) {
        AMS_ASSERT(g_heap_buffer == nullptr);
        AMS_ASSERT(buffer != nullptr);
        AMS_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(buffer), BufferPoolAlignment));
        /* Initialize the heap. */
        R_TRY(g_heap.Initialize(reinterpret_cast<uintptr_t>(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1));
        /* Initialize metrics. */
        g_heap_buffer         = buffer;
        g_heap_size           = size;
        g_heap_free_size_peak = size;
        return ResultSuccess();
    }
    Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size) {
        AMS_ASSERT(g_heap_buffer == nullptr);
        AMS_ASSERT(buffer != nullptr);
        AMS_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(buffer), BufferPoolAlignment));
        AMS_ASSERT(work_size >= BufferPoolWorkSize);
        /* Initialize the heap. */
        R_TRY(g_heap.Initialize(reinterpret_cast<uintptr_t>(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1, work, work_size));
        /* Initialize metrics. */
        g_heap_buffer         = buffer;
        g_heap_size           = size;
        g_heap_free_size_peak = size;
        return ResultSuccess();
    }
    bool IsPooledBuffer(const void *buffer) {
        AMS_ASSERT(buffer != nullptr);
        return g_heap_buffer <= buffer && buffer < reinterpret_cast<char *>(g_heap_buffer) + g_heap_size;
    }
    size_t GetPooledBufferRetriedCount() {
        return g_retry_count;
    }
    size_t GetPooledBufferReduceAllocationCount() {
        return g_reduce_allocation_count;
    }
    size_t GetPooledBufferFreeSizePeak() {
        return g_heap_free_size_peak;
    }
    void ClearPooledBufferPeak() {
        std::scoped_lock lk(g_heap_mutex);
        g_heap_free_size_peak     = g_heap.GetTotalFreeSize();
        g_retry_count             = 0;
        g_reduce_allocation_count = 0;
    }
    void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size) {
        g_additional_device_address_entry.Register(address, size);
    }
    void UnregisterAdditionalDeviceAddress(uintptr_t address) {
        g_additional_device_address_entry.Unregister(address);
    }
    bool IsAdditionalDeviceAddress(const void *ptr) {
        return g_additional_device_address_entry.Includes(ptr);
    }
 }
--- a/libraries/libvapours/include/vapours/util/util_alignment.hpp
+++ b/libraries/libvapours/include/vapours/util/util_alignment.hpp
@ -17,16 +17,11 @@
 #pragma once
 #include <vapours/common.hpp>
 #include <vapours/assert.hpp>
 #include <vapours/util/util_bitutil.hpp>
 namespace ams::util {
    /* Utilities for alignment to power of two. */
    template<typename T>
    constexpr ALWAYS_INLINE bool IsPowerOfTwo(T value) {
        using U = typename std::make_unsigned<T>::type;
        return (static_cast<U>(value) & static_cast<U>(value - 1)) == 0;
    }
    template<typename T>
    constexpr ALWAYS_INLINE T AlignUp(T value, size_t alignment) {
        using U = typename std::make_unsigned<T>::type;
--- a/libraries/libvapours/include/vapours/util/util_bitutil.hpp
+++ b/libraries/libvapours/include/vapours/util/util_bitutil.hpp
@ -20,6 +20,16 @@
 namespace ams::util {
    namespace impl {
        template<size_t N>
        constexpr inline size_t Log2 = Log2<N / 2> + 1;
        template<>
        constexpr inline size_t Log2<1> = 0;
    }
    template <typename T>
    class BitsOf {
        private:
@ -73,4 +83,133 @@ namespace ams::util {
        return (... | (T(1u) << args));
    }
    template<typename T>
    constexpr ALWAYS_INLINE T ResetLeastSignificantOneBit(T x) {
        return x & (x - 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T SetLeastSignificantZeroBit(T x) {
        return x | (x + 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T LeastSignificantOneBit(T x) {
        return x & ~(x - 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T LeastSignificantZeroBit(T x) {
        return ~x & (x + 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T ResetTrailingOnes(T x) {
        return x & (x + 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T SetTrailingZeros(T x) {
        return x | (x - 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T MaskTrailingZeros(T x) {
        return (~x) & (x - 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T MaskTrailingOnes(T x) {
        return ~((~x) | (x + 1));
    }
    template<typename T>
    constexpr ALWAYS_INLINE T MaskTrailingZerosAndLeastSignificantOneBit(T x) {
        return x ^ (x - 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE T MaskTrailingOnesAndLeastSignificantZeroBit(T x) {
        return x ^ (x + 1);
    }
    template<typename T>
    constexpr ALWAYS_INLINE int PopCount(T x) {
        /* TODO: C++20 std::bit_cast */
        using U = typename std::make_unsigned<T>::type;
        U u = static_cast<U>(x);
        /* TODO: C++20 std::is_constant_evaluated */
        if (false) {
            /* https://en.wikipedia.org/wiki/Hamming_weight */
            constexpr U m1 = U(-1) / 0x03;
            constexpr U m2 = U(-1) / 0x05;
            constexpr U m4 = U(-1) / 0x11;
            u = static_cast<U>(u - ((u >> 1) & m1));
            u = static_cast<U>((u & m2) + ((u >> 2) & m2));
            u = static_cast<U>((u + (u >> 4)) & m4);
            for (size_t i = 0; i < impl::Log2<sizeof(T)>; ++i) {
                const size_t shift = (0x1 << i) * BITSIZEOF(u8);
                u += u >> shift;
            }
            return static_cast<int>(u & 0x7Fu);
        } else {
            if constexpr (std::is_same<U, unsigned long long>::value) {
                return __builtin_popcountll(u);
            } else if constexpr (std::is_same<U, unsigned long>::value) {
                return __builtin_popcountl(u);
            } else {
                static_assert(sizeof(U) <= sizeof(unsigned int));
                return __builtin_popcount(static_cast<unsigned int>(u));
            }
        }
    }
    template<typename T>
    constexpr ALWAYS_INLINE int CountLeadingZeros(T x) {
        /* TODO: C++20 std::is_constant_evaluated */
        if (false) {
            for (size_t i = 0; i < impl::Log2<BITSIZEOF(T)>; ++i) {
                const size_t shift = (0x1 << i);
                x |= x >> shift;
            }
            return PopCount(static_cast<T>(~x));
        } else {
            /* TODO: C++20 std::bit_cast */
            using U = typename std::make_unsigned<T>::type;
            const U u = static_cast<U>(x);
            if constexpr (std::is_same<U, unsigned long long>::value) {
                return __builtin_clzll(u);
            } else if constexpr (std::is_same<U, unsigned long>::value) {
                return __builtin_clzl(u);
            }  else if constexpr(std::is_same<U, unsigned int>::value) {
                return __builtin_clz(u);
            } else {
                static_assert(sizeof(U) < sizeof(unsigned int));
                constexpr size_t BitDiff = BITSIZEOF(unsigned int) - BITSIZEOF(U);
                return __builtin_clz(static_cast<unsigned int>(u)) - BitDiff;
            }
        }
    }
    template<typename T>
    constexpr ALWAYS_INLINE bool IsPowerOfTwo(T x) {
        return x > 0 && ResetLeastSignificantOneBit(x) == 0;
    }
    template<typename T>
    constexpr ALWAYS_INLINE T CeilingPowerOfTwo(T x) {
        AMS_ASSERT(x > 0);
        return T(1) << (BITSIZEOF(T) - CountLeadingZeros(T(x - 1)));
    }
    template<typename T>
    constexpr ALWAYS_INLINE T FloorPowerOfTwo(T x) {
        AMS_ASSERT(x > 0);
        return T(1) << (BITSIZEOF(T) - CountLeadingZeros(x) - 1);
    }
 }