mirror of
https://github.com/Atmosphere-NX/Atmosphere
synced 2024-12-24 13:31:12 +00:00
280 lines
7 KiB
ArmAsm
280 lines
7 KiB
ArmAsm
|
/*
|
||
|
* Copyright (c) 2018-2020 Atmosphère-NX
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or modify it
|
||
|
* under the terms and conditions of the GNU General Public License,
|
||
|
* version 2, as published by the Free Software Foundation.
|
||
|
*
|
||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||
|
* more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
/* ams::crypto::impl::BigNum::Add(Word *dst, const Word *lhs, const Word *rhs, size_t num_words) */
|
||
|
.section .text._ZN3ams6crypto4impl6BigNum3AddEPjPKjS5_m, "ax", %progbits
|
||
|
.global _ZN3ams6crypto4impl6BigNum3AddEPjPKjS5_m
|
||
|
.type _ZN3ams6crypto4impl6BigNum3AddEPjPKjS5_m, %function
|
||
|
.balign 0x10
|
||
|
_ZN3ams6crypto4impl6BigNum3AddEPjPKjPKjm:
|
||
|
/* Check if we have anything to do at all. */
|
||
|
msr nzcv, xzr
|
||
|
cbz x3, 7f
|
||
|
|
||
|
/* Save registers. */
|
||
|
stp x16, x17, [sp, #-16]!
|
||
|
stp xzr, x19, [sp, #-16]!
|
||
|
stp x20, x21, [sp, #-16]!
|
||
|
|
||
|
/* Check if we have less than 16 words to process. */
|
||
|
lsr x20, x3, #4
|
||
|
cbz x20, 2f
|
||
|
|
||
|
sub x3, x3, x20, lsl #4
|
||
|
|
||
|
1: /* Process 16 words at a time. */
|
||
|
/* NOTE: Nintendo uses X18 here, we will use X21 for EL1+ compat. */
|
||
|
ldp x4, x5, [x1], #16
|
||
|
ldp x12, x13, [x2], #16
|
||
|
ldp x6, x7, [x1], #16
|
||
|
ldp x14, x15, [x2], #16
|
||
|
ldp x8, x9, [x1], #16
|
||
|
ldp x16, x17, [x2], #16
|
||
|
ldp x10, x11, [x1], #16
|
||
|
ldp x21, x19, [x2], #16
|
||
|
|
||
|
adcs x4, x4, x12
|
||
|
adcs x5, x5, x13
|
||
|
stp x4, x5, [x0], #16
|
||
|
|
||
|
adcs x6, x6, x14
|
||
|
adcs x7, x7, x15
|
||
|
stp x6, x7, [x0], #16
|
||
|
|
||
|
adcs x8, x8, x16
|
||
|
adcs x9, x9, x17
|
||
|
stp x8, x9, [x0], #16
|
||
|
|
||
|
adcs x10, x10, x21
|
||
|
adcs x11, x11, x19
|
||
|
stp x10, x11, [x0], #16
|
||
|
|
||
|
sub x20, x20, #1
|
||
|
cbnz x20, 1b
|
||
|
|
||
|
2: /* We have less than 16 words to process. */
|
||
|
lsr x15, x3, #2
|
||
|
cbz x15, 4f
|
||
|
|
||
|
sub x3, x3, x15, lsl #2
|
||
|
|
||
|
3: /* Process 4 words at a time. */
|
||
|
ldp x4, x5, [x1], #16
|
||
|
ldp x8, x9, [x2], #16
|
||
|
|
||
|
sub x15, x15, #1
|
||
|
|
||
|
adcs x4, x4, x8
|
||
|
adcs x5, x5, x9
|
||
|
|
||
|
stp x4, x5, [x0], #16
|
||
|
|
||
|
cbnz x15, 3b
|
||
|
|
||
|
4: /* We have less than 4 words to process. */
|
||
|
cbz x3, 6f
|
||
|
|
||
|
5: /* Process 1 word at a time. */
|
||
|
ldr w4, [x1], #4
|
||
|
ldr w8, [x2], #4
|
||
|
adcs w4, w4, w8
|
||
|
str w4, [x0], #4
|
||
|
|
||
|
sub x3, x3, #1
|
||
|
cbnz x3, 5b
|
||
|
|
||
|
6: /* Restore registers we used while adding. */
|
||
|
ldp x20, x21, [sp], #16
|
||
|
ldp xzr, x19, [sp], #16
|
||
|
ldp x16, x17, [sp], #16
|
||
|
|
||
|
7: /* We're done. */
|
||
|
adc x0, xzr, xzr
|
||
|
ret
|
||
|
|
||
|
/* ams::crypto::impl::BigNum::Sub(Word *dst, const Word *lhs, const Word *rhs, size_t num_words) */
|
||
|
.section .text._ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m, "ax", %progbits
|
||
|
.global _ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m
|
||
|
.type _ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m, %function
|
||
|
.balign 0x10
|
||
|
_ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m:
|
||
|
/* Check if we have anything to do at all. */
|
||
|
mov x4, #0x20000000
|
||
|
msr nzcv, x4
|
||
|
cbz x3, 7f
|
||
|
|
||
|
/* Save registers. */
|
||
|
stp x16, x17, [sp, #-16]!
|
||
|
stp xzr, x19, [sp, #-16]!
|
||
|
stp x20, x21, [sp, #-16]!
|
||
|
|
||
|
/* Check if we have less than 16 words to process. */
|
||
|
lsr x20, x3, #4
|
||
|
cbz x20, 2f
|
||
|
|
||
|
sub x3, x3, x20, lsl #4
|
||
|
|
||
|
1: /* Process 16 words at a time. */
|
||
|
/* NOTE: Nintendo uses X18 here, we will use X21 for EL1+ compat. */
|
||
|
ldp x4, x5, [x1], #16
|
||
|
ldp x12, x13, [x2], #16
|
||
|
ldp x6, x7, [x1], #16
|
||
|
ldp x14, x15, [x2], #16
|
||
|
ldp x8, x9, [x1], #16
|
||
|
ldp x16, x17, [x2], #16
|
||
|
ldp x10, x11, [x1], #16
|
||
|
ldp x21, x19, [x2], #16
|
||
|
|
||
|
sbcs x4, x4, x12
|
||
|
sbcs x5, x5, x13
|
||
|
stp x4, x5, [x0], #16
|
||
|
|
||
|
sbcs x6, x6, x14
|
||
|
sbcs x7, x7, x15
|
||
|
stp x6, x7, [x0], #16
|
||
|
|
||
|
sbcs x8, x8, x16
|
||
|
sbcs x9, x9, x17
|
||
|
stp x8, x9, [x0], #16
|
||
|
|
||
|
sbcs x10, x10, x21
|
||
|
sbcs x11, x11, x19
|
||
|
stp x10, x11, [x0], #16
|
||
|
|
||
|
sub x20, x20, #1
|
||
|
cbnz x20, 1b
|
||
|
|
||
|
2: /* We have less than 16 words to process. */
|
||
|
lsr x15, x3, #2
|
||
|
cbz x15, 4f
|
||
|
|
||
|
sub x3, x3, x15, lsl #2
|
||
|
|
||
|
3: /* Process 4 words at a time. */
|
||
|
ldp x4, x5, [x1], #16
|
||
|
ldp x8, x9, [x2], #16
|
||
|
|
||
|
sub x15, x15, #1
|
||
|
|
||
|
sbcs x4, x4, x8
|
||
|
sbcs x5, x5, x9
|
||
|
|
||
|
stp x4, x5, [x0], #16
|
||
|
|
||
|
cbnz x15, 3b
|
||
|
|
||
|
4: /* We have less than 4 words to process. */
|
||
|
cbz x3, 6f
|
||
|
|
||
|
5: /* Process 1 word at a time. */
|
||
|
ldr w4, [x1], #4
|
||
|
ldr w8, [x2], #4
|
||
|
sbcs w4, w4, w8
|
||
|
str w4, [x0], #4
|
||
|
|
||
|
sub x3, x3, #1
|
||
|
cbnz x3, 5b
|
||
|
|
||
|
6: /* Restore registers we used while adding. */
|
||
|
ldp x20, x21, [sp], #16
|
||
|
ldp xzr, x19, [sp], #16
|
||
|
ldp x16, x17, [sp], #16
|
||
|
|
||
|
7: /* We're done. */
|
||
|
cinc x0, xzr, cc
|
||
|
ret
|
||
|
|
||
|
/* ams::crypto::impl::BigNum::MultAdd(Word *dst, const Word *w, size_t num_words, Word mult) */
|
||
|
.section .text._ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj, "ax", %progbits
|
||
|
.global _ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj
|
||
|
.type _ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj, %function
|
||
|
.balign 0x10
|
||
|
_ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj:
|
||
|
/* Check if we have anything to do at all. */
|
||
|
mov x15, xzr
|
||
|
cbz x2, 5f
|
||
|
|
||
|
/* Check if we have less than four words to process. */
|
||
|
lsr x6, x2, #2
|
||
|
cbz x6, 2f
|
||
|
|
||
|
/* We have more than four words to process. */
|
||
|
sub x2, x2, x6, lsl #2
|
||
|
stp x16, x17, [sp, #-16]!
|
||
|
|
||
|
1: /* Loop processing four words at a time. */
|
||
|
ldp w4, w5, [x1], #8
|
||
|
ldp w16, w7, [x1], #8
|
||
|
ldp w8, w9, [x0]
|
||
|
ldp w10, w11, [x0, #8]
|
||
|
|
||
|
umaddl x4, w3, w4, x8
|
||
|
umaddl x5, w3, w5, x9
|
||
|
umaddl x16, w3, w16, x10
|
||
|
umaddl x7, w3, w7, x11
|
||
|
|
||
|
add x12, x4, x15, lsr #32
|
||
|
add x13, x5, x12, lsr #32
|
||
|
stp w12, w13, [x0], #8
|
||
|
|
||
|
add x14, x16, x13, lsr #32
|
||
|
add x15, x7, x14, lsr #32
|
||
|
stp w14, w15, [x0], #8
|
||
|
|
||
|
sub x6, x6, #1
|
||
|
cbnz x6, 1b
|
||
|
|
||
|
ldp x16, x17, [sp], #16
|
||
|
|
||
|
2: /* We have less than four words. Check if we have less than two. */
|
||
|
lsr x6, x2, #1
|
||
|
cbz x6, 4f
|
||
|
|
||
|
/* We have more than two words to process. */
|
||
|
sub x2, x2, x6, lsl #1
|
||
|
|
||
|
3: /* Loop processing two words at a time. */
|
||
|
ldp w4, w5, [x1], #8
|
||
|
ldp w8, w9, [x0]
|
||
|
|
||
|
umaddl x4, w3, w4, x8
|
||
|
umaddl x5, w3, w5, x9
|
||
|
|
||
|
sub x6, x6, #1
|
||
|
|
||
|
add x14, x4, x15, lsr #32
|
||
|
add x15, x5, x14, lsr #32
|
||
|
|
||
|
stp w14, w15, [x0], #8
|
||
|
|
||
|
cbnz x6, 3b
|
||
|
|
||
|
4: /* We have less than two words to process. */
|
||
|
cbz x2, 5f
|
||
|
|
||
|
/* We have one word to process. */
|
||
|
ldr w4, [x1], #4
|
||
|
ldr w8, [x0]
|
||
|
|
||
|
umaddl x4, w3, w4, x8
|
||
|
add x15, x4, x15, lsr #32
|
||
|
|
||
|
str w15, [x0], #4
|
||
|
|
||
|
5: /* We're done. */
|
||
|
lsr x0, x15, #32
|
||
|
ret
|