mirror of
https://github.com/Atmosphere-NX/Atmosphere
synced 2025-01-05 11:58:00 +00:00
fs: implement PooledBuffer
This commit is contained in:
parent
50a91b1d6e
commit
496be5ecd4
5 changed files with 521 additions and 6 deletions
|
@ -26,3 +26,4 @@
|
||||||
#include <stratosphere/fssystem/fssystem_romfs_file_system.hpp>
|
#include <stratosphere/fssystem/fssystem_romfs_file_system.hpp>
|
||||||
#include <stratosphere/fssystem/buffers/fssystem_buffer_manager_utils.hpp>
|
#include <stratosphere/fssystem/buffers/fssystem_buffer_manager_utils.hpp>
|
||||||
#include <stratosphere/fssystem/buffers/fssystem_file_system_buddy_heap.hpp>
|
#include <stratosphere/fssystem/buffers/fssystem_file_system_buddy_heap.hpp>
|
||||||
|
#include <stratosphere/fssystem/fssystem_pooled_buffer.hpp>
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018-2020 Atmosphère-NX
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
#include <vapours.hpp>
|
||||||
|
#include <stratosphere/fs/impl/fs_newable.hpp>
|
||||||
|
|
||||||
|
namespace ams::fssystem {
|
||||||
|
|
||||||
|
constexpr inline size_t BufferPoolAlignment = 4_KB;
|
||||||
|
constexpr inline size_t BufferPoolWorkSize = 320;
|
||||||
|
|
||||||
|
class PooledBuffer {
|
||||||
|
NON_COPYABLE(PooledBuffer);
|
||||||
|
private:
|
||||||
|
char *buffer;
|
||||||
|
size_t size;
|
||||||
|
private:
|
||||||
|
static size_t GetAllocatableSizeMaxCore(bool large);
|
||||||
|
public:
|
||||||
|
static size_t GetAllocatableSizeMax() { return GetAllocatableSizeMaxCore(false); }
|
||||||
|
static size_t GetAllocatableParticularlyLargeSizeMax() { return GetAllocatableSizeMaxCore(true); }
|
||||||
|
private:
|
||||||
|
void Swap(PooledBuffer &rhs) {
|
||||||
|
std::swap(this->buffer, rhs.buffer);
|
||||||
|
std::swap(this->size, rhs.size);
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
/* Constructor/Destructor. */
|
||||||
|
constexpr PooledBuffer() : buffer(), size() { /* ... */ }
|
||||||
|
|
||||||
|
PooledBuffer(size_t ideal_size, size_t required_size) : buffer(), size() {
|
||||||
|
this->Allocate(ideal_size, required_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
~PooledBuffer() {
|
||||||
|
this->Deallocate();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Move and assignment. */
|
||||||
|
explicit PooledBuffer(PooledBuffer &&rhs) : buffer(rhs.buffer), size(rhs.size) {
|
||||||
|
rhs.buffer = nullptr;
|
||||||
|
rhs.size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PooledBuffer &operator=(PooledBuffer &&rhs) {
|
||||||
|
PooledBuffer(std::move(rhs)).Swap(*this);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocation API. */
|
||||||
|
void Allocate(size_t ideal_size, size_t required_size) {
|
||||||
|
return this->AllocateCore(ideal_size, required_size, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void AllocateParticularlyLarge(size_t ideal_size, size_t required_size) {
|
||||||
|
return this->AllocateCore(ideal_size, required_size, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Shrink(size_t ideal_size);
|
||||||
|
|
||||||
|
void Deallocate() {
|
||||||
|
/* Shrink the buffer to empty. */
|
||||||
|
this->Shrink(0);
|
||||||
|
AMS_ASSERT(this->buffer == nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *GetBuffer() const {
|
||||||
|
AMS_ASSERT(this->buffer != nullptr);
|
||||||
|
return this->buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetSize() const {
|
||||||
|
AMS_ASSERT(this->buffer != nullptr);
|
||||||
|
return this->size;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
void AllocateCore(size_t ideal_size, size_t required_size, bool large);
|
||||||
|
};
|
||||||
|
|
||||||
|
Result InitializeBufferPool(char *buffer, size_t size);
|
||||||
|
Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size);
|
||||||
|
|
||||||
|
bool IsPooledBuffer(const void *buffer);
|
||||||
|
|
||||||
|
size_t GetPooledBufferRetriedCount();
|
||||||
|
size_t GetPooledBufferReduceAllocationCount();
|
||||||
|
size_t GetPooledBufferFreeSizePeak();
|
||||||
|
|
||||||
|
void ClearPooledBufferPeak();
|
||||||
|
|
||||||
|
void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size);
|
||||||
|
void UnregisterAdditionalDeviceAddress(uintptr_t address);
|
||||||
|
bool IsAdditionalDeviceAddress(const void *ptr);
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,272 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018-2020 Atmosphère-NX
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <stratosphere.hpp>
|
||||||
|
|
||||||
|
namespace ams::fssystem {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class AdditionalDeviceAddressEntry {
|
||||||
|
private:
|
||||||
|
/* TODO: SdkMutex */
|
||||||
|
os::Mutex mutex;
|
||||||
|
bool is_registered;
|
||||||
|
uintptr_t address;
|
||||||
|
size_t size;
|
||||||
|
public:
|
||||||
|
constexpr AdditionalDeviceAddressEntry() : mutex(), is_registered(), address(), size() { /* ... */ }
|
||||||
|
|
||||||
|
void Register(uintptr_t addr, size_t sz) {
|
||||||
|
std::scoped_lock lk(this->mutex);
|
||||||
|
|
||||||
|
AMS_ASSERT(!this->is_registered);
|
||||||
|
if (!this->is_registered) {
|
||||||
|
this->is_registered = true;
|
||||||
|
this->address = addr;
|
||||||
|
this->size = size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Unregister(uintptr_t addr) {
|
||||||
|
std::scoped_lock lk(this->mutex);
|
||||||
|
|
||||||
|
if (this->is_registered && this->address == addr) {
|
||||||
|
this->is_registered = false;
|
||||||
|
this->address = 0;
|
||||||
|
this->size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Includes(const void *ptr) {
|
||||||
|
std::scoped_lock lk(this->mutex);
|
||||||
|
|
||||||
|
if (this->is_registered) {
|
||||||
|
const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
|
||||||
|
return this->address <= addr && addr < this->address + this->size;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr auto RetryWait = TimeSpan::FromMilliSeconds(10);
|
||||||
|
|
||||||
|
constexpr size_t HeapBlockSize = BufferPoolAlignment;
|
||||||
|
static_assert(HeapBlockSize == 4_KB);
|
||||||
|
|
||||||
|
/* A heap block is 4KB. An order is a power of two. */
|
||||||
|
/* This gives blocks of the order 32KB, 512KB, 4MB. */
|
||||||
|
constexpr s32 HeapOrderTrim = 3;
|
||||||
|
constexpr s32 HeapOrderMax = 7;
|
||||||
|
constexpr s32 HeapOrderMaxForLarge = HeapOrderMax + 3;
|
||||||
|
|
||||||
|
constexpr size_t HeapAllocatableSizeTrim = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderTrim);
|
||||||
|
constexpr size_t HeapAllocatableSizeMax = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMax);
|
||||||
|
constexpr size_t HeapAllocatableSizeMaxForLarge = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMaxForLarge);
|
||||||
|
|
||||||
|
/* TODO: SdkMutex */
|
||||||
|
os::Mutex g_heap_mutex;
|
||||||
|
FileSystemBuddyHeap g_heap;
|
||||||
|
|
||||||
|
std::atomic<size_t> g_retry_count;
|
||||||
|
std::atomic<size_t> g_reduce_allocation_count;
|
||||||
|
|
||||||
|
void *g_heap_buffer;
|
||||||
|
size_t g_heap_size;
|
||||||
|
size_t g_heap_free_size_peak;
|
||||||
|
|
||||||
|
AdditionalDeviceAddressEntry g_additional_device_address_entry;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t PooledBuffer::GetAllocatableSizeMaxCore(bool large) {
|
||||||
|
return large ? HeapAllocatableSizeMaxForLarge : HeapAllocatableSizeMax;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PooledBuffer::AllocateCore(size_t ideal_size, size_t required_size, bool large) {
|
||||||
|
/* Ensure preconditions. */
|
||||||
|
AMS_ASSERT(g_heap_buffer != nullptr);
|
||||||
|
AMS_ASSERT(this->buffer == nullptr);
|
||||||
|
AMS_ASSERT(g_heap.GetBlockSize() == HeapBlockSize);
|
||||||
|
|
||||||
|
/* Check that we can allocate this size. */
|
||||||
|
AMS_ASSERT(required_size <= GetAllocatableSizeMaxCore(large));
|
||||||
|
|
||||||
|
const size_t target_size = std::min(std::max(ideal_size, required_size), GetAllocatableSizeMaxCore(large));
|
||||||
|
|
||||||
|
/* Loop until we allocate. */
|
||||||
|
while (true) {
|
||||||
|
/* Lock the heap and try to allocate. */
|
||||||
|
{
|
||||||
|
std::scoped_lock lk(g_heap_mutex);
|
||||||
|
|
||||||
|
/* Determine how much we can allocate, and don't allocate more than half the heap. */
|
||||||
|
size_t allocatable_size = g_heap.GetAllocatableSizeMax();
|
||||||
|
if (allocatable_size > HeapBlockSize) {
|
||||||
|
allocatable_size >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if this allocation is acceptable. */
|
||||||
|
if (allocatable_size >= required_size) {
|
||||||
|
/* Get the order. */
|
||||||
|
const auto order = g_heap.GetOrderFromBytes(std::min(target_size, allocatable_size));
|
||||||
|
|
||||||
|
/* Allocate and get the size. */
|
||||||
|
this->buffer = reinterpret_cast<char *>(g_heap.AllocateByOrder(order));
|
||||||
|
this->size = g_heap.GetBytesFromOrder(order);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if we allocated. */
|
||||||
|
if (this->buffer != nullptr) {
|
||||||
|
/* If we need to trim the end, do so. */
|
||||||
|
if (this->GetSize() >= target_size + HeapAllocatableSizeTrim) {
|
||||||
|
this->Shrink(util::AlignUp(target_size, HeapAllocatableSizeTrim));
|
||||||
|
}
|
||||||
|
AMS_ASSERT(this->GetSize() >= required_size);
|
||||||
|
|
||||||
|
/* If we reduced, note so. */
|
||||||
|
if (this->GetSize() < std::min(target_size, HeapAllocatableSizeMax)) {
|
||||||
|
g_reduce_allocation_count++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
/* Sleep. */
|
||||||
|
/* TODO: os::SleepThread() */
|
||||||
|
svc::SleepThread(RetryWait.GetNanoSeconds());
|
||||||
|
g_retry_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update metrics. */
|
||||||
|
{
|
||||||
|
std::scoped_lock lk(g_heap_mutex);
|
||||||
|
|
||||||
|
const size_t free_size = g_heap.GetTotalFreeSize();
|
||||||
|
if (free_size < g_heap_free_size_peak) {
|
||||||
|
g_heap_free_size_peak = free_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PooledBuffer::Shrink(size_t ideal_size) {
|
||||||
|
AMS_ASSERT(ideal_size <= GetAllocatableSizeMaxCore(true));
|
||||||
|
|
||||||
|
/* Check if we actually need to shrink. */
|
||||||
|
if (this->size > ideal_size) {
|
||||||
|
/* If we do, we need to have a buffer allocated from the heap. */
|
||||||
|
AMS_ASSERT(this->buffer != nullptr);
|
||||||
|
AMS_ASSERT(g_heap.GetBlockSize(), HeapBlockSize);
|
||||||
|
|
||||||
|
const size_t new_size = util::AlignUp(ideal_size, HeapBlockSize);
|
||||||
|
|
||||||
|
/* Repeatedly free the tail of our buffer until we're done. */
|
||||||
|
{
|
||||||
|
std::scoped_lock lk(g_heap_mutex);
|
||||||
|
|
||||||
|
while (new_size < this->size) {
|
||||||
|
/* Determine the size and order to free. */
|
||||||
|
const size_t tail_align = util::LeastSignificantOneBit(this->size);
|
||||||
|
const size_t free_size = std::min(util::FloorPowerOfTwo(this->size - new_size), tail_align);
|
||||||
|
const s32 free_order = g_heap.GetOrderFromBytes(free_size);
|
||||||
|
|
||||||
|
/* Ensure we determined size correctly. */
|
||||||
|
AMS_ASSERT(util::IsAligned(free_size, HeapBlockSize));
|
||||||
|
AMS_ASSERT(free_size == g_heap.GetBytesFromOrder(free_order));
|
||||||
|
|
||||||
|
/* Actually free the memory. */
|
||||||
|
g_heap.Free(this->buffer + this->size - free_size, free_order);
|
||||||
|
this->size -= free_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shrinking to zero means that we have no buffer. */
|
||||||
|
if (this->size == 0) {
|
||||||
|
this->buffer = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Result InitializeBufferPool(char *buffer, size_t size) {
|
||||||
|
AMS_ASSERT(g_heap_buffer == nullptr);
|
||||||
|
AMS_ASSERT(buffer != nullptr);
|
||||||
|
AMS_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(buffer), BufferPoolAlignment));
|
||||||
|
|
||||||
|
/* Initialize the heap. */
|
||||||
|
R_TRY(g_heap.Initialize(reinterpret_cast<uintptr_t>(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1));
|
||||||
|
|
||||||
|
/* Initialize metrics. */
|
||||||
|
g_heap_buffer = buffer;
|
||||||
|
g_heap_size = size;
|
||||||
|
g_heap_free_size_peak = size;
|
||||||
|
|
||||||
|
return ResultSuccess();
|
||||||
|
}
|
||||||
|
|
||||||
|
Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size) {
|
||||||
|
AMS_ASSERT(g_heap_buffer == nullptr);
|
||||||
|
AMS_ASSERT(buffer != nullptr);
|
||||||
|
AMS_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(buffer), BufferPoolAlignment));
|
||||||
|
AMS_ASSERT(work_size >= BufferPoolWorkSize);
|
||||||
|
|
||||||
|
/* Initialize the heap. */
|
||||||
|
R_TRY(g_heap.Initialize(reinterpret_cast<uintptr_t>(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1, work, work_size));
|
||||||
|
|
||||||
|
/* Initialize metrics. */
|
||||||
|
g_heap_buffer = buffer;
|
||||||
|
g_heap_size = size;
|
||||||
|
g_heap_free_size_peak = size;
|
||||||
|
|
||||||
|
return ResultSuccess();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsPooledBuffer(const void *buffer) {
|
||||||
|
AMS_ASSERT(buffer != nullptr);
|
||||||
|
return g_heap_buffer <= buffer && buffer < reinterpret_cast<char *>(g_heap_buffer) + g_heap_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetPooledBufferRetriedCount() {
|
||||||
|
return g_retry_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetPooledBufferReduceAllocationCount() {
|
||||||
|
return g_reduce_allocation_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetPooledBufferFreeSizePeak() {
|
||||||
|
return g_heap_free_size_peak;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClearPooledBufferPeak() {
|
||||||
|
std::scoped_lock lk(g_heap_mutex);
|
||||||
|
g_heap_free_size_peak = g_heap.GetTotalFreeSize();
|
||||||
|
g_retry_count = 0;
|
||||||
|
g_reduce_allocation_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size) {
|
||||||
|
g_additional_device_address_entry.Register(address, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnregisterAdditionalDeviceAddress(uintptr_t address) {
|
||||||
|
g_additional_device_address_entry.Unregister(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsAdditionalDeviceAddress(const void *ptr) {
|
||||||
|
return g_additional_device_address_entry.Includes(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -17,16 +17,11 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <vapours/common.hpp>
|
#include <vapours/common.hpp>
|
||||||
#include <vapours/assert.hpp>
|
#include <vapours/assert.hpp>
|
||||||
|
#include <vapours/util/util_bitutil.hpp>
|
||||||
|
|
||||||
namespace ams::util {
|
namespace ams::util {
|
||||||
|
|
||||||
/* Utilities for alignment to power of two. */
|
/* Utilities for alignment to power of two. */
|
||||||
template<typename T>
|
|
||||||
constexpr ALWAYS_INLINE bool IsPowerOfTwo(T value) {
|
|
||||||
using U = typename std::make_unsigned<T>::type;
|
|
||||||
return (static_cast<U>(value) & static_cast<U>(value - 1)) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
constexpr ALWAYS_INLINE T AlignUp(T value, size_t alignment) {
|
constexpr ALWAYS_INLINE T AlignUp(T value, size_t alignment) {
|
||||||
using U = typename std::make_unsigned<T>::type;
|
using U = typename std::make_unsigned<T>::type;
|
||||||
|
|
|
@ -20,6 +20,16 @@
|
||||||
|
|
||||||
namespace ams::util {
|
namespace ams::util {
|
||||||
|
|
||||||
|
namespace impl {
|
||||||
|
|
||||||
|
template<size_t N>
|
||||||
|
constexpr inline size_t Log2 = Log2<N / 2> + 1;
|
||||||
|
|
||||||
|
template<>
|
||||||
|
constexpr inline size_t Log2<1> = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class BitsOf {
|
class BitsOf {
|
||||||
private:
|
private:
|
||||||
|
@ -73,4 +83,133 @@ namespace ams::util {
|
||||||
return (... | (T(1u) << args));
|
return (... | (T(1u) << args));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T ResetLeastSignificantOneBit(T x) {
|
||||||
|
return x & (x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T SetLeastSignificantZeroBit(T x) {
|
||||||
|
return x | (x + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T LeastSignificantOneBit(T x) {
|
||||||
|
return x & ~(x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T LeastSignificantZeroBit(T x) {
|
||||||
|
return ~x & (x + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T ResetTrailingOnes(T x) {
|
||||||
|
return x & (x + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T SetTrailingZeros(T x) {
|
||||||
|
return x | (x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T MaskTrailingZeros(T x) {
|
||||||
|
return (~x) & (x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T MaskTrailingOnes(T x) {
|
||||||
|
return ~((~x) | (x + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T MaskTrailingZerosAndLeastSignificantOneBit(T x) {
|
||||||
|
return x ^ (x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T MaskTrailingOnesAndLeastSignificantZeroBit(T x) {
|
||||||
|
return x ^ (x + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE int PopCount(T x) {
|
||||||
|
/* TODO: C++20 std::bit_cast */
|
||||||
|
using U = typename std::make_unsigned<T>::type;
|
||||||
|
U u = static_cast<U>(x);
|
||||||
|
|
||||||
|
/* TODO: C++20 std::is_constant_evaluated */
|
||||||
|
if (false) {
|
||||||
|
/* https://en.wikipedia.org/wiki/Hamming_weight */
|
||||||
|
constexpr U m1 = U(-1) / 0x03;
|
||||||
|
constexpr U m2 = U(-1) / 0x05;
|
||||||
|
constexpr U m4 = U(-1) / 0x11;
|
||||||
|
|
||||||
|
u = static_cast<U>(u - ((u >> 1) & m1));
|
||||||
|
u = static_cast<U>((u & m2) + ((u >> 2) & m2));
|
||||||
|
u = static_cast<U>((u + (u >> 4)) & m4);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < impl::Log2<sizeof(T)>; ++i) {
|
||||||
|
const size_t shift = (0x1 << i) * BITSIZEOF(u8);
|
||||||
|
u += u >> shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
return static_cast<int>(u & 0x7Fu);
|
||||||
|
} else {
|
||||||
|
if constexpr (std::is_same<U, unsigned long long>::value) {
|
||||||
|
return __builtin_popcountll(u);
|
||||||
|
} else if constexpr (std::is_same<U, unsigned long>::value) {
|
||||||
|
return __builtin_popcountl(u);
|
||||||
|
} else {
|
||||||
|
static_assert(sizeof(U) <= sizeof(unsigned int));
|
||||||
|
return __builtin_popcount(static_cast<unsigned int>(u));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE int CountLeadingZeros(T x) {
|
||||||
|
/* TODO: C++20 std::is_constant_evaluated */
|
||||||
|
if (false) {
|
||||||
|
for (size_t i = 0; i < impl::Log2<BITSIZEOF(T)>; ++i) {
|
||||||
|
const size_t shift = (0x1 << i);
|
||||||
|
x |= x >> shift;
|
||||||
|
}
|
||||||
|
return PopCount(static_cast<T>(~x));
|
||||||
|
} else {
|
||||||
|
/* TODO: C++20 std::bit_cast */
|
||||||
|
using U = typename std::make_unsigned<T>::type;
|
||||||
|
const U u = static_cast<U>(x);
|
||||||
|
if constexpr (std::is_same<U, unsigned long long>::value) {
|
||||||
|
return __builtin_clzll(u);
|
||||||
|
} else if constexpr (std::is_same<U, unsigned long>::value) {
|
||||||
|
return __builtin_clzl(u);
|
||||||
|
} else if constexpr(std::is_same<U, unsigned int>::value) {
|
||||||
|
return __builtin_clz(u);
|
||||||
|
} else {
|
||||||
|
static_assert(sizeof(U) < sizeof(unsigned int));
|
||||||
|
constexpr size_t BitDiff = BITSIZEOF(unsigned int) - BITSIZEOF(U);
|
||||||
|
return __builtin_clz(static_cast<unsigned int>(u)) - BitDiff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE bool IsPowerOfTwo(T x) {
|
||||||
|
return x > 0 && ResetLeastSignificantOneBit(x) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T CeilingPowerOfTwo(T x) {
|
||||||
|
AMS_ASSERT(x > 0);
|
||||||
|
return T(1) << (BITSIZEOF(T) - CountLeadingZeros(T(x - 1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr ALWAYS_INLINE T FloorPowerOfTwo(T x) {
|
||||||
|
AMS_ASSERT(x > 0);
|
||||||
|
return T(1) << (BITSIZEOF(T) - CountLeadingZeros(x) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue