Loader: Use HW-acceleration for SHA256

This commit is contained in:
Michael Scire 2018-07-24 01:26:37 -07:00
parent e58927a8ab
commit dd4993dfda
5 changed files with 287 additions and 171 deletions

View file

@ -36,7 +36,7 @@ Result NroUtils::LoadNro(Registration::Process *target_proc, Handle process_h, u
unsigned int i; unsigned int i;
Result rc; Result rc;
u8 nro_hash[0x20]; u8 nro_hash[0x20];
SHA256_CTX sha_ctx; struct sha256_state sha_ctx;
/* Ensure there is an available NRO slot. */ /* Ensure there is an available NRO slot. */
if (std::all_of(target_proc->nro_infos.begin(), target_proc->nro_infos.end(), std::mem_fn(&Registration::NroInfo::in_use))) { if (std::all_of(target_proc->nro_infos.begin(), target_proc->nro_infos.end(), std::mem_fn(&Registration::NroInfo::in_use))) {
return 0x6E09; return 0x6E09;
@ -78,7 +78,8 @@ Result NroUtils::LoadNro(Registration::Process *target_proc, Handle process_h, u
sha256_init(&sha_ctx); sha256_init(&sha_ctx);
sha256_update(&sha_ctx, (u8 *)nro, nro->nro_size); sha256_update(&sha_ctx, (u8 *)nro, nro->nro_size);
sha256_final(&sha_ctx, nro_hash); sha256_finalize(&sha_ctx);
sha256_finish(&sha_ctx, nro_hash);
if (!Registration::IsNroHashPresent(target_proc->index, nro_hash)) { if (!Registration::IsNroHashPresent(target_proc->index, nro_hash)) {
rc = 0x6C09; rc = 0x6C09;

View file

@ -199,6 +199,7 @@ Result NsoUtils::CalculateNsoLoadExtents(u32 addspace_type, u32 args_size, NsoLo
} }
Result NsoUtils::LoadNsoSegment(u64 title_id, unsigned int index, unsigned int segment, FILE *f_nso, u8 *map_base, u8 *map_end) { Result NsoUtils::LoadNsoSegment(u64 title_id, unsigned int index, unsigned int segment, FILE *f_nso, u8 *map_base, u8 *map_end) {
bool is_compressed = ((g_nso_headers[index].flags >> segment) & 1) != 0; bool is_compressed = ((g_nso_headers[index].flags >> segment) & 1) != 0;
bool check_hash = ((g_nso_headers[index].flags >> (segment + 3)) & 1) != 0; bool check_hash = ((g_nso_headers[index].flags >> (segment + 3)) & 1) != 0;
@ -230,10 +231,11 @@ Result NsoUtils::LoadNsoSegment(u64 title_id, unsigned int index, unsigned int s
if (check_hash) { if (check_hash) {
u8 hash[0x20] = {0}; u8 hash[0x20] = {0};
SHA256_CTX sha_ctx; struct sha256_state sha_ctx;
sha256_init(&sha_ctx); sha256_init(&sha_ctx);
sha256_update(&sha_ctx, dst_addr, out_size); sha256_update(&sha_ctx, dst_addr, out_size);
sha256_final(&sha_ctx, hash); sha256_finalize(&sha_ctx);
sha256_finish(&sha_ctx, hash);
if (std::memcmp(g_nso_headers[index].section_hashes[segment], hash, sizeof(hash))) { if (std::memcmp(g_nso_headers[index].section_hashes[segment], hash, sizeof(hash))) {
return 0xA09; return 0xA09;

View file

@ -1,158 +1,113 @@
/********************************************************************* /* Based on linux source code */
* Filename: sha256.c /*
* Author: Brad Conte (brad AT bradconte.com) * sha256_base.h - core logic for SHA-256 implementations
* Copyright: *
* Disclaimer: This code is presented "as is" without any guarantees. * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
* Details: Implementation of the SHA-256 hashing algorithm. *
SHA-256 is one of the three algorithms in the SHA2 * This program is free software; you can redistribute it and/or modify
specification. The others, SHA-384 and SHA-512, are not * it under the terms of the GNU General Public License version 2 as
offered in this implementation. * published by the Free Software Foundation.
Algorithm specification can be found here: */
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf
This implementation uses little endian byte order.
*********************************************************************/
/*************************** HEADER FILES ***************************/ #ifdef __cplusplus
#include <stdlib.h> extern "C" {
#include <memory.h> #endif
#include <string.h>
#include "sha256.h" #include "sha256.h"
/****************************** MACROS ******************************/ #define unlikely(x) __builtin_expect(!!(x), 0)
#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) void sha256_block_data_order (uint32_t *ctx, const void *in, size_t num);
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
/**************************** VARIABLES *****************************/ int sha256_init(struct sha256_state *sctx)
static const WORD k[64] = {
0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};
/*********************** FUNCTION DEFINITIONS ***********************/
void sha256_transform(SHA256_CTX *ctx, const BYTE data[])
{ {
WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
for (i = 0, j = 0; i < 16; ++i, j += 4) return 0;
m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);
for ( ; i < 64; ++i)
m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
a = ctx->state[0];
b = ctx->state[1];
c = ctx->state[2];
d = ctx->state[3];
e = ctx->state[4];
f = ctx->state[5];
g = ctx->state[6];
h = ctx->state[7];
for (i = 0; i < 64; ++i) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
} }
ctx->state[0] += a; int sha256_update(struct sha256_state *sctx,
ctx->state[1] += b; const void *data,
ctx->state[2] += c; size_t len)
ctx->state[3] += d;
ctx->state[4] += e;
ctx->state[5] += f;
ctx->state[6] += g;
ctx->state[7] += h;
}
void sha256_init(SHA256_CTX *ctx)
{ {
ctx->datalen = 0; const u8 *data8 = (const u8 *)data;
ctx->bitlen = 0; unsigned int len32 = (unsigned int)len;
ctx->state[0] = 0x6a09e667; unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
ctx->state[1] = 0xbb67ae85;
ctx->state[2] = 0x3c6ef372; sctx->count += len32;
ctx->state[3] = 0xa54ff53a;
ctx->state[4] = 0x510e527f; if (unlikely((partial + len32) >= SHA256_BLOCK_SIZE)) {
ctx->state[5] = 0x9b05688c; int blocks;
ctx->state[6] = 0x1f83d9ab;
ctx->state[7] = 0x5be0cd19; if (partial) {
int p = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data8, p);
data8 += p;
len32 -= p;
sha256_block_data_order(sctx->state, sctx->buf, 1);
} }
void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) blocks = len32 / SHA256_BLOCK_SIZE;
len32 %= SHA256_BLOCK_SIZE;
if (blocks) {
sha256_block_data_order(sctx->state, data8, blocks);
data8 += blocks * SHA256_BLOCK_SIZE;
}
partial = 0;
}
if (len32)
memcpy(sctx->buf + partial, data8, len32);
return 0;
}
int sha256_finalize(struct sha256_state *sctx)
{ {
WORD i; const int bit_offset = SHA256_BLOCK_SIZE - sizeof(u64);
u64 *bits = (u64 *)(sctx->buf + bit_offset);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
for (i = 0; i < len; ++i) { sctx->buf[partial++] = 0x80;
ctx->data[ctx->datalen] = data[i]; if (partial > bit_offset) {
ctx->datalen++; memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial);
if (ctx->datalen == 64) { partial = 0;
sha256_transform(ctx, ctx->data);
ctx->bitlen += 512; sha256_block_data_order(sctx->state, sctx->buf, 1);
ctx->datalen = 0;
}
}
} }
void sha256_final(SHA256_CTX *ctx, BYTE hash[]) memset(sctx->buf + partial, 0x0, bit_offset - partial);
*bits = __builtin_bswap64(sctx->count << 3);
sha256_block_data_order(sctx->state, sctx->buf, 1);
return 0;
}
int sha256_finish(struct sha256_state *sctx, void *out)
{ {
WORD i; unsigned int digest_size = 32;
u32 *digest = (u32 *)out;
int i;
i = ctx->datalen; // Switch: misalignment shouldn't be a problem here...
for (i = 0; digest_size > 0; i++, digest_size -= sizeof(u32))
*digest++ = __builtin_bswap32(sctx->state[i]);
// Pad whatever data is left in the buffer. *sctx = (struct sha256_state){};
if (ctx->datalen < 56) { return 0;
ctx->data[i++] = 0x80;
while (i < 56)
ctx->data[i++] = 0x00;
}
else {
ctx->data[i++] = 0x80;
while (i < 64)
ctx->data[i++] = 0x00;
sha256_transform(ctx, ctx->data);
memset(ctx->data, 0, 56);
} }
// Append to the padding the total message's length in bits and transform. #ifdef __cplusplus
ctx->bitlen += ctx->datalen * 8;
ctx->data[63] = ctx->bitlen;
ctx->data[62] = ctx->bitlen >> 8;
ctx->data[61] = ctx->bitlen >> 16;
ctx->data[60] = ctx->bitlen >> 24;
ctx->data[59] = ctx->bitlen >> 32;
ctx->data[58] = ctx->bitlen >> 40;
ctx->data[57] = ctx->bitlen >> 48;
ctx->data[56] = ctx->bitlen >> 56;
sha256_transform(ctx, ctx->data);
// Since this implementation uses little endian byte ordering and SHA uses big endian,
// reverse all the bytes when copying the final state to the output hash.
for (i = 0; i < 4; ++i) {
hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff;
hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff;
hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff;
hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff;
hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff;
hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff;
hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff;
hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff;
}
} }
#endif

View file

@ -1,41 +1,36 @@
/********************************************************************* #pragma once
* Filename: sha256.h
* Author: Brad Conte (brad AT bradconte.com)
* Copyright:
* Disclaimer: This code is presented "as is" without any guarantees.
* Details: Defines the API for the corresponding SHA1 implementation.
*********************************************************************/
#if defined (__cplusplus) /* Based on linux source code */
#ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#ifndef SHA256_H
#define SHA256_H
/*************************** HEADER FILES ***************************/ #include <switch/types.h>
#include <stddef.h>
/****************************** MACROS ******************************/ #define SHA256_DIGEST_SIZE 32
#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest #define SHA256_BLOCK_SIZE 64
/**************************** DATA TYPES ****************************/ #define SHA256_H0 0x6a09e667UL
typedef unsigned char BYTE; // 8-bit byte #define SHA256_H1 0xbb67ae85UL
typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines #define SHA256_H2 0x3c6ef372UL
#define SHA256_H3 0xa54ff53aUL
#define SHA256_H4 0x510e527fUL
#define SHA256_H5 0x9b05688cUL
#define SHA256_H6 0x1f83d9abUL
#define SHA256_H7 0x5be0cd19UL
typedef struct { struct sha256_state {
BYTE data[64]; u32 state[SHA256_DIGEST_SIZE / 4];
WORD datalen; u64 count;
unsigned long long bitlen; u8 buf[SHA256_BLOCK_SIZE];
WORD state[8]; };
} SHA256_CTX;
/*********************** FUNCTION DECLARATIONS **********************/ int sha256_init(struct sha256_state *sctx);
void sha256_init(SHA256_CTX *ctx); int sha256_update(struct sha256_state *sctx, const void *data, size_t len);
void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); int sha256_finalize(struct sha256_state *sctx);
void sha256_final(SHA256_CTX *ctx, BYTE hash[]); int sha256_finish(struct sha256_state *sctx, void *out);
#endif // SHA256_H #ifdef __cplusplus
#if defined (__cplusplus)
} }
#endif #endif

View file

@ -0,0 +1,163 @@
.section .text.sha256_armv8, "ax", %progbits
.align 5
.arch armv8-a+crypto
# SHA256 assembly implementation for ARMv8 AArch64 (based on linux source code)
.global sha256_block_data_order
.type sha256_block_data_order,%function
sha256_block_data_order:
.Lsha256prolog:
stp x29, x30, [sp,#-64]!
mov x29, sp
adr x3, .LKConstant256
str q8, [sp, #16]
ld1 {v16.4s-v19.4s}, [x3], #64
ld1 {v0.4s}, [x0], #16
ld1 {v20.4s-v23.4s}, [x3], #64
add x2, x1, x2, lsl #6
ld1 {v1.4s}, [x0]
ld1 {v24.4s-v27.4s}, [x3], #64
sub x0, x0, #16
str q9, [sp, #32]
str q10, [sp, #48]
ld1 {v28.4s-v31.4s}, [x3], #64
.Lsha256loop:
ld1 {v5.16b-v8.16b}, [x1], #64
mov v2.16b, v0.16b
mov v3.16b, v1.16b
rev32 v5.16b, v5.16b
rev32 v6.16b, v6.16b
add v9.4s, v5.4s, v16.4s
rev32 v7.16b, v7.16b
add v10.4s, v6.4s, v17.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v5.4s, v6.4s
rev32 v8.16b, v8.16b
add v9.4s, v7.4s, v18.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v6.4s, v7.4s
sha256su1 v5.4s, v7.4s, v8.4s
add v10.4s, v8.4s, v19.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v7.4s, v8.4s
sha256su1 v6.4s, v8.4s, v5.4s
add v9.4s, v5.4s, v20.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v8.4s, v5.4s
sha256su1 v7.4s, v5.4s, v6.4s
add v10.4s, v6.4s, v21.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v5.4s, v6.4s
sha256su1 v8.4s, v6.4s, v7.4s
add v9.4s, v7.4s, v22.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v6.4s, v7.4s
sha256su1 v5.4s, v7.4s, v8.4s
add v10.4s, v8.4s, v23.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v7.4s, v8.4s
sha256su1 v6.4s, v8.4s, v5.4s
add v9.4s, v5.4s, v24.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v8.4s, v5.4s
sha256su1 v7.4s, v5.4s, v6.4s
add v10.4s, v6.4s, v25.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v5.4s, v6.4s
sha256su1 v8.4s, v6.4s, v7.4s
add v9.4s, v7.4s, v26.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v6.4s, v7.4s
sha256su1 v5.4s, v7.4s, v8.4s
add v10.4s, v8.4s, v27.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su0 v7.4s, v8.4s
sha256su1 v6.4s, v8.4s, v5.4s
add v9.4s, v5.4s, v28.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
sha256su0 v8.4s, v5.4s
sha256su1 v7.4s, v5.4s, v6.4s
add v10.4s, v6.4s, v29.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
sha256su1 v8.4s, v6.4s, v7.4s
add v9.4s, v7.4s, v30.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
add v10.4s, v8.4s, v31.4s
mov v4.16b, v2.16b
sha256h q2, q3, v9.4s
sha256h2 q3, q4, v9.4s
mov v4.16b, v2.16b
sha256h q2, q3, v10.4s
sha256h2 q3, q4, v10.4s
cmp x1, x2
add v1.4s, v1.4s, v3.4s
add v0.4s, v0.4s, v2.4s
b.ne .Lsha256loop
.Lsha256epilog:
st1 {v0.4s,v1.4s}, [x0]
ldr q10, [sp, #48]
ldr q9, [sp, #32]
ldr q8, [sp, #16]
ldr x29, [sp], #64
ret
.align 5
.LKConstant256:
.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size sha256_block_data_order,.-sha256_block_data_order
.align 2