diff --git a/libraries/libstratosphere/include/stratosphere/fssystem.hpp b/libraries/libstratosphere/include/stratosphere/fssystem.hpp index c7802c151..85c03d7be 100644 --- a/libraries/libstratosphere/include/stratosphere/fssystem.hpp +++ b/libraries/libstratosphere/include/stratosphere/fssystem.hpp @@ -26,3 +26,4 @@ #include #include #include +#include diff --git a/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp b/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp new file mode 100644 index 000000000..7dd9e385f --- /dev/null +++ b/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once +#include +#include + +namespace ams::fssystem { + + constexpr inline size_t BufferPoolAlignment = 4_KB; + constexpr inline size_t BufferPoolWorkSize = 320; + + class PooledBuffer { + NON_COPYABLE(PooledBuffer); + private: + char *buffer; + size_t size; + private: + static size_t GetAllocatableSizeMaxCore(bool large); + public: + static size_t GetAllocatableSizeMax() { return GetAllocatableSizeMaxCore(false); } + static size_t GetAllocatableParticularlyLargeSizeMax() { return GetAllocatableSizeMaxCore(true); } + private: + void Swap(PooledBuffer &rhs) { + std::swap(this->buffer, rhs.buffer); + std::swap(this->size, rhs.size); + } + public: + /* Constructor/Destructor. */ + constexpr PooledBuffer() : buffer(), size() { /* ... */ } + + PooledBuffer(size_t ideal_size, size_t required_size) : buffer(), size() { + this->Allocate(ideal_size, required_size); + } + + ~PooledBuffer() { + this->Deallocate(); + } + + /* Move and assignment. */ + explicit PooledBuffer(PooledBuffer &&rhs) : buffer(rhs.buffer), size(rhs.size) { + rhs.buffer = nullptr; + rhs.size = 0; + } + + PooledBuffer &operator=(PooledBuffer &&rhs) { + PooledBuffer(std::move(rhs)).Swap(*this); + return *this; + } + + /* Allocation API. */ + void Allocate(size_t ideal_size, size_t required_size) { + return this->AllocateCore(ideal_size, required_size, false); + } + + void AllocateParticularlyLarge(size_t ideal_size, size_t required_size) { + return this->AllocateCore(ideal_size, required_size, true); + } + + void Shrink(size_t ideal_size); + + void Deallocate() { + /* Shrink the buffer to empty. */ + this->Shrink(0); + AMS_ASSERT(this->buffer == nullptr); + } + + char *GetBuffer() const { + AMS_ASSERT(this->buffer != nullptr); + return this->buffer; + } + + size_t GetSize() const { + AMS_ASSERT(this->buffer != nullptr); + return this->size; + } + private: + void AllocateCore(size_t ideal_size, size_t required_size, bool large); + }; + + Result InitializeBufferPool(char *buffer, size_t size); + Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size); + + bool IsPooledBuffer(const void *buffer); + + size_t GetPooledBufferRetriedCount(); + size_t GetPooledBufferReduceAllocationCount(); + size_t GetPooledBufferFreeSizePeak(); + + void ClearPooledBufferPeak(); + + void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size); + void UnregisterAdditionalDeviceAddress(uintptr_t address); + bool IsAdditionalDeviceAddress(const void *ptr); + +} diff --git a/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp b/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp new file mode 100644 index 000000000..7b224aa00 --- /dev/null +++ b/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include + +namespace ams::fssystem { + + namespace { + + class AdditionalDeviceAddressEntry { + private: + /* TODO: SdkMutex */ + os::Mutex mutex; + bool is_registered; + uintptr_t address; + size_t size; + public: + constexpr AdditionalDeviceAddressEntry() : mutex(), is_registered(), address(), size() { /* ... */ } + + void Register(uintptr_t addr, size_t sz) { + std::scoped_lock lk(this->mutex); + + AMS_ASSERT(!this->is_registered); + if (!this->is_registered) { + this->is_registered = true; + this->address = addr; + this->size = size; + } + } + + void Unregister(uintptr_t addr) { + std::scoped_lock lk(this->mutex); + + if (this->is_registered && this->address == addr) { + this->is_registered = false; + this->address = 0; + this->size = 0; + } + } + + bool Includes(const void *ptr) { + std::scoped_lock lk(this->mutex); + + if (this->is_registered) { + const uintptr_t addr = reinterpret_cast(ptr); + return this->address <= addr && addr < this->address + this->size; + } else { + return false; + } + } + }; + + constexpr auto RetryWait = TimeSpan::FromMilliSeconds(10); + + constexpr size_t HeapBlockSize = BufferPoolAlignment; + static_assert(HeapBlockSize == 4_KB); + + /* A heap block is 4KB. An order is a power of two. */ + /* This gives blocks of the order 32KB, 512KB, 4MB. */ + constexpr s32 HeapOrderTrim = 3; + constexpr s32 HeapOrderMax = 7; + constexpr s32 HeapOrderMaxForLarge = HeapOrderMax + 3; + + constexpr size_t HeapAllocatableSizeTrim = HeapBlockSize * (static_cast(1) << HeapOrderTrim); + constexpr size_t HeapAllocatableSizeMax = HeapBlockSize * (static_cast(1) << HeapOrderMax); + constexpr size_t HeapAllocatableSizeMaxForLarge = HeapBlockSize * (static_cast(1) << HeapOrderMaxForLarge); + + /* TODO: SdkMutex */ + os::Mutex g_heap_mutex; + FileSystemBuddyHeap g_heap; + + std::atomic g_retry_count; + std::atomic g_reduce_allocation_count; + + void *g_heap_buffer; + size_t g_heap_size; + size_t g_heap_free_size_peak; + + AdditionalDeviceAddressEntry g_additional_device_address_entry; + + } + + size_t PooledBuffer::GetAllocatableSizeMaxCore(bool large) { + return large ? HeapAllocatableSizeMaxForLarge : HeapAllocatableSizeMax; + } + + void PooledBuffer::AllocateCore(size_t ideal_size, size_t required_size, bool large) { + /* Ensure preconditions. */ + AMS_ASSERT(g_heap_buffer != nullptr); + AMS_ASSERT(this->buffer == nullptr); + AMS_ASSERT(g_heap.GetBlockSize() == HeapBlockSize); + + /* Check that we can allocate this size. */ + AMS_ASSERT(required_size <= GetAllocatableSizeMaxCore(large)); + + const size_t target_size = std::min(std::max(ideal_size, required_size), GetAllocatableSizeMaxCore(large)); + + /* Loop until we allocate. */ + while (true) { + /* Lock the heap and try to allocate. */ + { + std::scoped_lock lk(g_heap_mutex); + + /* Determine how much we can allocate, and don't allocate more than half the heap. */ + size_t allocatable_size = g_heap.GetAllocatableSizeMax(); + if (allocatable_size > HeapBlockSize) { + allocatable_size >>= 1; + } + + /* Check if this allocation is acceptable. */ + if (allocatable_size >= required_size) { + /* Get the order. */ + const auto order = g_heap.GetOrderFromBytes(std::min(target_size, allocatable_size)); + + /* Allocate and get the size. */ + this->buffer = reinterpret_cast(g_heap.AllocateByOrder(order)); + this->size = g_heap.GetBytesFromOrder(order); + } + } + + /* Check if we allocated. */ + if (this->buffer != nullptr) { + /* If we need to trim the end, do so. */ + if (this->GetSize() >= target_size + HeapAllocatableSizeTrim) { + this->Shrink(util::AlignUp(target_size, HeapAllocatableSizeTrim)); + } + AMS_ASSERT(this->GetSize() >= required_size); + + /* If we reduced, note so. */ + if (this->GetSize() < std::min(target_size, HeapAllocatableSizeMax)) { + g_reduce_allocation_count++; + } + break; + } else { + /* Sleep. */ + /* TODO: os::SleepThread() */ + svc::SleepThread(RetryWait.GetNanoSeconds()); + g_retry_count++; + } + } + + /* Update metrics. */ + { + std::scoped_lock lk(g_heap_mutex); + + const size_t free_size = g_heap.GetTotalFreeSize(); + if (free_size < g_heap_free_size_peak) { + g_heap_free_size_peak = free_size; + } + } + } + + void PooledBuffer::Shrink(size_t ideal_size) { + AMS_ASSERT(ideal_size <= GetAllocatableSizeMaxCore(true)); + + /* Check if we actually need to shrink. */ + if (this->size > ideal_size) { + /* If we do, we need to have a buffer allocated from the heap. */ + AMS_ASSERT(this->buffer != nullptr); + AMS_ASSERT(g_heap.GetBlockSize(), HeapBlockSize); + + const size_t new_size = util::AlignUp(ideal_size, HeapBlockSize); + + /* Repeatedly free the tail of our buffer until we're done. */ + { + std::scoped_lock lk(g_heap_mutex); + + while (new_size < this->size) { + /* Determine the size and order to free. */ + const size_t tail_align = util::LeastSignificantOneBit(this->size); + const size_t free_size = std::min(util::FloorPowerOfTwo(this->size - new_size), tail_align); + const s32 free_order = g_heap.GetOrderFromBytes(free_size); + + /* Ensure we determined size correctly. */ + AMS_ASSERT(util::IsAligned(free_size, HeapBlockSize)); + AMS_ASSERT(free_size == g_heap.GetBytesFromOrder(free_order)); + + /* Actually free the memory. */ + g_heap.Free(this->buffer + this->size - free_size, free_order); + this->size -= free_size; + } + } + + /* Shrinking to zero means that we have no buffer. */ + if (this->size == 0) { + this->buffer = nullptr; + } + } + } + + Result InitializeBufferPool(char *buffer, size_t size) { + AMS_ASSERT(g_heap_buffer == nullptr); + AMS_ASSERT(buffer != nullptr); + AMS_ASSERT(util::IsAligned(reinterpret_cast(buffer), BufferPoolAlignment)); + + /* Initialize the heap. */ + R_TRY(g_heap.Initialize(reinterpret_cast(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1)); + + /* Initialize metrics. */ + g_heap_buffer = buffer; + g_heap_size = size; + g_heap_free_size_peak = size; + + return ResultSuccess(); + } + + Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size) { + AMS_ASSERT(g_heap_buffer == nullptr); + AMS_ASSERT(buffer != nullptr); + AMS_ASSERT(util::IsAligned(reinterpret_cast(buffer), BufferPoolAlignment)); + AMS_ASSERT(work_size >= BufferPoolWorkSize); + + /* Initialize the heap. */ + R_TRY(g_heap.Initialize(reinterpret_cast(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1, work, work_size)); + + /* Initialize metrics. */ + g_heap_buffer = buffer; + g_heap_size = size; + g_heap_free_size_peak = size; + + return ResultSuccess(); + } + + bool IsPooledBuffer(const void *buffer) { + AMS_ASSERT(buffer != nullptr); + return g_heap_buffer <= buffer && buffer < reinterpret_cast(g_heap_buffer) + g_heap_size; + } + + size_t GetPooledBufferRetriedCount() { + return g_retry_count; + } + + size_t GetPooledBufferReduceAllocationCount() { + return g_reduce_allocation_count; + } + + size_t GetPooledBufferFreeSizePeak() { + return g_heap_free_size_peak; + } + + void ClearPooledBufferPeak() { + std::scoped_lock lk(g_heap_mutex); + g_heap_free_size_peak = g_heap.GetTotalFreeSize(); + g_retry_count = 0; + g_reduce_allocation_count = 0; + } + + void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size) { + g_additional_device_address_entry.Register(address, size); + } + + void UnregisterAdditionalDeviceAddress(uintptr_t address) { + g_additional_device_address_entry.Unregister(address); + } + + bool IsAdditionalDeviceAddress(const void *ptr) { + return g_additional_device_address_entry.Includes(ptr); + } + +} diff --git a/libraries/libvapours/include/vapours/util/util_alignment.hpp b/libraries/libvapours/include/vapours/util/util_alignment.hpp index 8c3a24e5b..849e141c0 100644 --- a/libraries/libvapours/include/vapours/util/util_alignment.hpp +++ b/libraries/libvapours/include/vapours/util/util_alignment.hpp @@ -17,16 +17,11 @@ #pragma once #include #include +#include namespace ams::util { /* Utilities for alignment to power of two. */ - template - constexpr ALWAYS_INLINE bool IsPowerOfTwo(T value) { - using U = typename std::make_unsigned::type; - return (static_cast(value) & static_cast(value - 1)) == 0; - } - template constexpr ALWAYS_INLINE T AlignUp(T value, size_t alignment) { using U = typename std::make_unsigned::type; diff --git a/libraries/libvapours/include/vapours/util/util_bitutil.hpp b/libraries/libvapours/include/vapours/util/util_bitutil.hpp index 536aa0723..de8e78983 100644 --- a/libraries/libvapours/include/vapours/util/util_bitutil.hpp +++ b/libraries/libvapours/include/vapours/util/util_bitutil.hpp @@ -20,6 +20,16 @@ namespace ams::util { + namespace impl { + + template + constexpr inline size_t Log2 = Log2 + 1; + + template<> + constexpr inline size_t Log2<1> = 0; + + } + template class BitsOf { private: @@ -73,4 +83,133 @@ namespace ams::util { return (... | (T(1u) << args)); } + template + constexpr ALWAYS_INLINE T ResetLeastSignificantOneBit(T x) { + return x & (x - 1); + } + + template + constexpr ALWAYS_INLINE T SetLeastSignificantZeroBit(T x) { + return x | (x + 1); + } + + template + constexpr ALWAYS_INLINE T LeastSignificantOneBit(T x) { + return x & ~(x - 1); + } + + template + constexpr ALWAYS_INLINE T LeastSignificantZeroBit(T x) { + return ~x & (x + 1); + } + + template + constexpr ALWAYS_INLINE T ResetTrailingOnes(T x) { + return x & (x + 1); + } + + template + constexpr ALWAYS_INLINE T SetTrailingZeros(T x) { + return x | (x - 1); + } + + template + constexpr ALWAYS_INLINE T MaskTrailingZeros(T x) { + return (~x) & (x - 1); + } + + template + constexpr ALWAYS_INLINE T MaskTrailingOnes(T x) { + return ~((~x) | (x + 1)); + } + + template + constexpr ALWAYS_INLINE T MaskTrailingZerosAndLeastSignificantOneBit(T x) { + return x ^ (x - 1); + } + + template + constexpr ALWAYS_INLINE T MaskTrailingOnesAndLeastSignificantZeroBit(T x) { + return x ^ (x + 1); + } + + template + constexpr ALWAYS_INLINE int PopCount(T x) { + /* TODO: C++20 std::bit_cast */ + using U = typename std::make_unsigned::type; + U u = static_cast(x); + + /* TODO: C++20 std::is_constant_evaluated */ + if (false) { + /* https://en.wikipedia.org/wiki/Hamming_weight */ + constexpr U m1 = U(-1) / 0x03; + constexpr U m2 = U(-1) / 0x05; + constexpr U m4 = U(-1) / 0x11; + + u = static_cast(u - ((u >> 1) & m1)); + u = static_cast((u & m2) + ((u >> 2) & m2)); + u = static_cast((u + (u >> 4)) & m4); + + for (size_t i = 0; i < impl::Log2; ++i) { + const size_t shift = (0x1 << i) * BITSIZEOF(u8); + u += u >> shift; + } + + return static_cast(u & 0x7Fu); + } else { + if constexpr (std::is_same::value) { + return __builtin_popcountll(u); + } else if constexpr (std::is_same::value) { + return __builtin_popcountl(u); + } else { + static_assert(sizeof(U) <= sizeof(unsigned int)); + return __builtin_popcount(static_cast(u)); + } + } + } + + template + constexpr ALWAYS_INLINE int CountLeadingZeros(T x) { + /* TODO: C++20 std::is_constant_evaluated */ + if (false) { + for (size_t i = 0; i < impl::Log2; ++i) { + const size_t shift = (0x1 << i); + x |= x >> shift; + } + return PopCount(static_cast(~x)); + } else { + /* TODO: C++20 std::bit_cast */ + using U = typename std::make_unsigned::type; + const U u = static_cast(x); + if constexpr (std::is_same::value) { + return __builtin_clzll(u); + } else if constexpr (std::is_same::value) { + return __builtin_clzl(u); + } else if constexpr(std::is_same::value) { + return __builtin_clz(u); + } else { + static_assert(sizeof(U) < sizeof(unsigned int)); + constexpr size_t BitDiff = BITSIZEOF(unsigned int) - BITSIZEOF(U); + return __builtin_clz(static_cast(u)) - BitDiff; + } + } + } + + template + constexpr ALWAYS_INLINE bool IsPowerOfTwo(T x) { + return x > 0 && ResetLeastSignificantOneBit(x) == 0; + } + + template + constexpr ALWAYS_INLINE T CeilingPowerOfTwo(T x) { + AMS_ASSERT(x > 0); + return T(1) << (BITSIZEOF(T) - CountLeadingZeros(T(x - 1))); + } + + template + constexpr ALWAYS_INLINE T FloorPowerOfTwo(T x) { + AMS_ASSERT(x > 0); + return T(1) << (BITSIZEOF(T) - CountLeadingZeros(x) - 1); + } + }