mirror of
https://github.com/Atmosphere-NX/Atmosphere
synced 2024-12-22 12:21:18 +00:00
kern: optimize timespan -> tick codegen, improve .text layout
This commit is contained in:
parent
89926f44c6
commit
aaa3770806
5 changed files with 88 additions and 45 deletions
|
@ -33,8 +33,6 @@
|
||||||
#define ALWAYS_INLINE inline __attribute__((always_inline))
|
#define ALWAYS_INLINE inline __attribute__((always_inline))
|
||||||
#define NOINLINE __attribute__((noinline))
|
#define NOINLINE __attribute__((noinline))
|
||||||
|
|
||||||
#define CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x))
|
|
||||||
|
|
||||||
#define CONCATENATE_IMPL(s1, s2) s1##s2
|
#define CONCATENATE_IMPL(s1, s2) s1##s2
|
||||||
#define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2)
|
#define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2)
|
||||||
|
|
||||||
|
|
|
@ -23,50 +23,71 @@ namespace ams::svc {
|
||||||
class Tick {
|
class Tick {
|
||||||
public:
|
public:
|
||||||
static constexpr s64 TicksPerSecond = ::ams::svc::TicksPerSecond;
|
static constexpr s64 TicksPerSecond = ::ams::svc::TicksPerSecond;
|
||||||
static constexpr s64 GetTicksPerSecond() { return TicksPerSecond; }
|
static consteval s64 GetTicksPerSecond() { return TicksPerSecond; }
|
||||||
private:
|
private:
|
||||||
s64 m_tick;
|
s64 m_tick;
|
||||||
private:
|
private:
|
||||||
static constexpr s64 NanoSecondsPerSecond = TimeSpan::FromSeconds(1).GetNanoSeconds();
|
static constexpr s64 NanoSecondsPerSecond = TimeSpan::FromSeconds(1).GetNanoSeconds();
|
||||||
|
|
||||||
static constexpr void DivNs(s64 &out, const s64 value) {
|
static constexpr ALWAYS_INLINE s64 ConvertTimeSpanToTickImpl(TimeSpan ts) {
|
||||||
out = value / NanoSecondsPerSecond;
|
/* Get nano-seconds. */
|
||||||
}
|
const s64 ns = ts.GetNanoSeconds();
|
||||||
|
|
||||||
static constexpr void DivModNs(s64 &out_div, s64 &out_mod, const s64 value) {
|
/* Special-case optimize arm64/nintendo-nx value. */
|
||||||
out_div = value / NanoSecondsPerSecond;
|
if (!std::is_constant_evaluated()) {
|
||||||
out_mod = value % NanoSecondsPerSecond;
|
if constexpr (TicksPerSecond == 19'200'000) {
|
||||||
}
|
#if defined(ATMOSPHERE_IS_MESOSPHERE) && defined(ATMOSPHERE_ARCH_ARM64)
|
||||||
|
s64 t0, t1, t2, t3;
|
||||||
|
__asm__ __volatile__("mov %[t1], #0x5A53\n"
|
||||||
|
"movk %[t1], #0xA09B, lsl #16\n"
|
||||||
|
"lsr %[t0], %[ns], #9\n"
|
||||||
|
"movk %[t1], #0xB82F, lsl #32\n"
|
||||||
|
"movk %[t1], #0x0044, lsl #48\n"
|
||||||
|
"umulh %[t0], %[t0], %[t1]\n"
|
||||||
|
"mov %[t1], #0xFFFFFFFFFFFF3600\n"
|
||||||
|
"movk %[t1], #0xC465, lsl #16\n"
|
||||||
|
"lsr %[t0], %[t0], #0xB\n"
|
||||||
|
"madd %[t1], %[t0], %[t1], %[ns]\n"
|
||||||
|
"mov %w[t2], #0xF800\n"
|
||||||
|
"movk %w[t2], #0x0124, lsl #16\n"
|
||||||
|
"mov %w[t3], #0xCA00\n"
|
||||||
|
"movk %w[t3], #0x3B9A, lsl #16\n"
|
||||||
|
"madd %[t1], %[t1], %[t2], %[t3]\n"
|
||||||
|
"mov %[t3], #0x94B3\n"
|
||||||
|
"movk %[t3], #0x26D6, lsl #16\n"
|
||||||
|
"movk %[t3], #0x0BE8, lsl #32\n"
|
||||||
|
"movk %[t3], #0x112E, lsl #48\n"
|
||||||
|
"sub %[t1], %[t1], #1\n"
|
||||||
|
"smulh %[t1], %[t1], %[t3]\n"
|
||||||
|
"asr %[t3], %[t1], #26\n"
|
||||||
|
"add %[t1], %[t3], %[t1], lsr #63\n"
|
||||||
|
"madd %[t0], %[t0], %[t2], %[t1]\n"
|
||||||
|
: [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3)
|
||||||
|
: [ns]"r"(ns)
|
||||||
|
: "cc");
|
||||||
|
return t0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static constexpr s64 ConvertTimeSpanToTickImpl(TimeSpan ts) {
|
return util::ScaleByConstantFactor<s64, TicksPerSecond, NanoSecondsPerSecond>(ns);
|
||||||
/* Split up timespan and ticks-per-second by ns. */
|
|
||||||
s64 ts_div = 0, ts_mod = 0;
|
|
||||||
s64 tick_div = 0, tick_mod = 0;
|
|
||||||
DivModNs(ts_div, ts_mod, ts.GetNanoSeconds());
|
|
||||||
DivModNs(tick_div, tick_mod, TicksPerSecond);
|
|
||||||
|
|
||||||
/* Convert the timespan into a tick count. */
|
|
||||||
s64 value = 0;
|
|
||||||
DivNs(value, ts_mod * tick_mod + NanoSecondsPerSecond - 1);
|
|
||||||
|
|
||||||
return (ts_div * tick_div) * NanoSecondsPerSecond + ts_div * tick_mod + ts_mod * tick_div + value;
|
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
constexpr explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ }
|
constexpr ALWAYS_INLINE explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ }
|
||||||
constexpr Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ }
|
constexpr ALWAYS_INLINE Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ }
|
||||||
|
|
||||||
constexpr operator s64() const { return m_tick; }
|
constexpr ALWAYS_INLINE operator s64() const { return m_tick; }
|
||||||
|
|
||||||
/* Tick arithmetic. */
|
/* Tick arithmetic. */
|
||||||
constexpr Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; }
|
constexpr ALWAYS_INLINE Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; }
|
||||||
constexpr Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; }
|
constexpr ALWAYS_INLINE Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; }
|
||||||
constexpr Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; }
|
constexpr ALWAYS_INLINE Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; }
|
||||||
constexpr Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; }
|
constexpr ALWAYS_INLINE Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; }
|
||||||
|
|
||||||
constexpr Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; }
|
constexpr ALWAYS_INLINE Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; }
|
||||||
constexpr Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; }
|
constexpr ALWAYS_INLINE Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; }
|
||||||
constexpr Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; }
|
constexpr ALWAYS_INLINE Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; }
|
||||||
constexpr Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; }
|
constexpr ALWAYS_INLINE Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; }
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -270,7 +270,7 @@ namespace ams::util {
|
||||||
|
|
||||||
template<std::memory_order Order = std::memory_order_seq_cst>
|
template<std::memory_order Order = std::memory_order_seq_cst>
|
||||||
ALWAYS_INLINE T Exchange(T arg) {
|
ALWAYS_INLINE T Exchange(T arg) {
|
||||||
return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg)));
|
return ConvertToType(impl::AtomicExchangeImpl<Order>(this->GetStoragePointer(), ConvertToStorage(arg)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<std::memory_order Order = std::memory_order_seq_cst>
|
template<std::memory_order Order = std::memory_order_seq_cst>
|
||||||
|
@ -374,7 +374,7 @@ namespace ams::util {
|
||||||
|
|
||||||
template<std::memory_order Order = std::memory_order_seq_cst>
|
template<std::memory_order Order = std::memory_order_seq_cst>
|
||||||
ALWAYS_INLINE T Exchange(T arg) const {
|
ALWAYS_INLINE T Exchange(T arg) const {
|
||||||
return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg)));
|
return ConvertToType(impl::AtomicExchangeImpl<Order>(this->GetStoragePointer(), ConvertToStorage(arg)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<std::memory_order Order = std::memory_order_seq_cst>
|
template<std::memory_order Order = std::memory_order_seq_cst>
|
||||||
|
|
|
@ -255,4 +255,28 @@ namespace ams::util {
|
||||||
return static_cast<T>((v + add) / d);
|
return static_cast<T>((v + add) / d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T, T N, T D>
|
||||||
|
constexpr ALWAYS_INLINE T ScaleByConstantFactor(const T V) {
|
||||||
|
/* Multiplying and dividing by large numerator/denominator can cause error to be introduced. */
|
||||||
|
/* This algorithm multiples/divides in stages, so as to mitigate this (particularly with large denominator). */
|
||||||
|
|
||||||
|
/* Justification for the algorithm. */
|
||||||
|
/* Calculate: (V * N) / D */
|
||||||
|
/* = (Quot_V * D + Rem_V) * (Quot_N * D + Rem_N) / D */
|
||||||
|
/* = (D^2 * (Quot_V * Quot_N) + D * (Quot_V * Rem_N + Rem_V * Quot_N) + Rem_V * Rem_N) / D */
|
||||||
|
/* = (D * Quot_V * Quot_N) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + ((Rem_V * Rem_N) / D) */
|
||||||
|
|
||||||
|
/* Calculate quotients/remainders. */
|
||||||
|
const T Quot_V = V / D;
|
||||||
|
const T Rem_V = V % D;
|
||||||
|
constexpr T Quot_N = N / D;
|
||||||
|
constexpr T Rem_N = N % D;
|
||||||
|
|
||||||
|
/* Calculate the remainder multiplication, rounding up. */
|
||||||
|
const T rem_mult = ((Rem_V * Rem_N) + (D - 1)) / D;
|
||||||
|
|
||||||
|
/* Calculate results. */
|
||||||
|
return (D * Quot_N * Quot_V) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + rem_mult;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,24 +51,24 @@ SECTIONS
|
||||||
. = ALIGN(8);
|
. = ALIGN(8);
|
||||||
} :code
|
} :code
|
||||||
|
|
||||||
/* .vectors. */
|
|
||||||
. = ALIGN(2K);
|
|
||||||
__vectors_start__ = . ;
|
|
||||||
.vectors :
|
|
||||||
{
|
|
||||||
KEEP( *(.vectors) )
|
|
||||||
. = ALIGN(8);
|
|
||||||
} :code
|
|
||||||
|
|
||||||
/* .sleep. */
|
/* .sleep. */
|
||||||
. = ALIGN(4K);
|
. = ALIGN(4K);
|
||||||
__sleep_start__ = . ;
|
__sleep_start__ = . ;
|
||||||
.sleep :
|
.sleep :
|
||||||
{
|
{
|
||||||
KEEP( *(.sleep .sleep.*) )
|
KEEP( *(.sleep .sleep.*) )
|
||||||
. = ALIGN(8);
|
. = ALIGN(8);
|
||||||
} :code
|
} :code
|
||||||
|
|
||||||
|
/* .vectors. */
|
||||||
|
. = ALIGN(2K);
|
||||||
|
__vectors_start__ = . ;
|
||||||
|
.vectors :
|
||||||
|
{
|
||||||
|
KEEP( *(.vectors) )
|
||||||
|
. = ALIGN(8);
|
||||||
|
} :code
|
||||||
|
|
||||||
/* =========== RODATA section =========== */
|
/* =========== RODATA section =========== */
|
||||||
. = ALIGN(0x1000);
|
. = ALIGN(0x1000);
|
||||||
__rodata_start = . ;
|
__rodata_start = . ;
|
||||||
|
|
Loading…
Reference in a new issue