crypto: added runtime checks for SHA hardware

This commit is contained in:
Ed Tubbs 2022-08-02 21:54:29 -05:00
parent e607a452bd
commit e78cfc630d
8 changed files with 184 additions and 52 deletions

View File

@ -189,18 +189,6 @@ AC_ARG_WITH([intel-avx2],
[intel_avx2=$withval],
[intel_avx2=no])
AC_ARG_WITH([armv8-crypto],
[AS_HELP_STRING([--with-armv8-crypto],
[Build with armv8 crypto (default is no)])],
[armv8_crypto=$withval],
[armv8_crypto=no])
AC_ARG_WITH([armv82-crypto],
[AS_HELP_STRING([--with-armv82-crypto],
[Build with armv8.2 crypto sha512 (default is no)])],
[armv82_crypto=$withval],
[armv82_crypto=no])
AC_ARG_WITH([protoc-bindir],[AS_HELP_STRING([--with-protoc-bindir=BIN_DIR],[specify protoc bin path])], [protoc_bin_path=$withval], [])
AC_ARG_ENABLE(man,
@ -736,18 +724,17 @@ if test x$use_scrypt_sse2 = xyes; then
AC_DEFINE(USE_SSE2, 1, [Define this symbol if SSE2 works])
fi
if test x$armv8_crypto = xyes; then
AC_MSG_CHECKING([whether to build with armv8 crypto])
AC_MSG_RESULT(yes)
AC_DEFINE(USE_ARMV8, 1, [Define this symbol if armv8 crypto works])
CXXFLAGS="$CXXFLAGS -march=armv8-a+crypto"
fi
if test x$armv82_crypto = xyes; then
AC_CHECK_DECLS([vsha512su0q_u64],
[AC_DEFINE(USE_ARMV82, 1, [Define this symbol if armv8.2 crypto works])
CXXFLAGS="$CXXFLAGS -march=armv8.2-a+crypto+sha3"], AC_MSG_ERROR(sha512 missing), [#include <arm_neon.h>])
fi
AC_MSG_CHECKING([whether to build with armv8 crypto])
case $host in
aarch64-*)
AC_MSG_RESULT(yes)
AC_DEFINE(USE_ARMV8, 1, [Define this symbol if armv8 crypto works])
CXXFLAGS="$CXXFLAGS -march=armv8-a+crypto"
AC_CHECK_DECLS([vsha512su0q_u64],
[AC_DEFINE(USE_ARMV82, 1, [Define this symbol if armv8.2 crypto works])
CXXFLAGS="$CXXFLAGS -march=armv8.2-a+crypto+sha3"],, [#include <arm_neon.h>])
;;
esac
if test x$use_pkgconfig = xyes; then
: dnl

View File

@ -37,7 +37,6 @@ namespace
namespace sha1
{
#ifndef USE_AVX2
/** One round of SHA-1. */
void inline Round(uint32_t a, uint32_t& b, uint32_t c, uint32_t d, uint32_t& e, uint32_t f, uint32_t k, uint32_t w)
{
@ -50,7 +49,6 @@ uint32_t inline f2(uint32_t b, uint32_t c, uint32_t d) { return b ^ c ^ d; }
uint32_t inline f3(uint32_t b, uint32_t c, uint32_t d) { return (b & c) | (d & (b | c)); }
uint32_t inline left(uint32_t x) { return (x << 1) | (x >> 31); }
#endif
/** Initialize SHA-1 state. */
void inline Initialize(uint32_t* s)
@ -67,8 +65,8 @@ const uint32_t k2 = 0x6ED9EBA1ul;
const uint32_t k3 = 0x8F1BBCDCul;
const uint32_t k4 = 0xCA62C1D6ul;
/** Perform a SHA-1 transformation, processing a 64-byte chunk. */
void Transform(uint32_t* s, const unsigned char* chunk)
/** Perform a SHA-1 transformation, processing a 64-byte chunk. (ARMv8) */
void Transform_ARMV8(uint32_t* s, const unsigned char* chunk)
{
#if defined(USE_ARMV8) || defined(USE_ARMV82)
uint32x4_t ABCD, ABCD_SAVED;
@ -238,11 +236,22 @@ void Transform(uint32_t* s, const unsigned char* chunk)
/** Save state */
vst1q_u32(&s[0], ABCD);
s[4] = E0;
#endif
}
#elif USE_AVX2
/** Perform a SHA-1 transformation, processing a 64-byte chunk. (AVX2) */
void Transform_AVX2(uint32_t* s, const unsigned char* chunk)
{
#if USE_AVX2
// Perform SHA1 one block (Intel AVX2)
sha1_one_block_avx2(chunk, s);
#else
#endif
}
/** Perform a SHA-1 transformation, processing a 64-byte chunk. */
void Transform(uint32_t* s, const unsigned char* chunk)
{
// Perform SHA one block (legacy)
uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4];
@ -338,13 +347,41 @@ void Transform(uint32_t* s, const unsigned char* chunk)
s[2] += c;
s[3] += d;
s[4] += e;
}
/** Define SHA1 hardware */
#if defined(__linux__)
#define HWCAP_SHA1 (1<<5)
#include <sys/auxv.h>
#elif defined(__WIN64__)
#include <intrin.h>
bool isAVX (void) {
int cpuinfo[4];
__cpuid(cpuinfo, 1);
return ((cpuinfo[2] & (1 << 28)) != 0);
}
#endif
/** Define a function pointer for Transform */
void (*transform_ptr) (uint32_t*, const unsigned char*) = &Transform;
/** Initialize the function pointer */
void inline Initialize_transform_ptr(void)
{
// Override the function pointer for ARMV8/AVX2
#if (defined(USE_ARMV8) || defined(USE_ARMV82))
if (getauxval(AT_HWCAP) && HWCAP_SHA1)
transform_ptr = &Transform_ARMV8;
#elif USE_AVX2 && defined(__linux__)
if (getauxval(AT_HWCAP) && HWCAP_SHA1)
transform_ptr = &Transform_AVX2;
#elif USE_AVX2 && defined(__WIN64__)
if (isAVX)
transform_ptr = &Transform_AVX2;
#endif
}
} // namespace sha1
} // namespace
////// SHA1
@ -363,12 +400,12 @@ CSHA1& CSHA1::Write(const unsigned char* data, size_t len)
memcpy(buf + bufsize, data, 64 - bufsize);
bytes += 64 - bufsize;
data += 64 - bufsize;
sha1::Transform(s, buf);
sha1::transform_ptr(s, buf);
bufsize = 0;
}
while (end >= data + 64) {
// Process full chunks directly from the source.
sha1::Transform(s, data);
sha1::transform_ptr(s, data);
bytes += 64;
data += 64;
}
@ -400,3 +437,8 @@ CSHA1& CSHA1::Reset()
sha1::Initialize(s);
return *this;
}
void detect_sha1_hardware()
{
sha1::Initialize_transform_ptr();
}

View File

@ -25,4 +25,6 @@ public:
CSHA1& Reset();
};
void detect_sha1_hardware(void);
#endif // BITCOIN_CRYPTO_SHA1_H

View File

@ -28,7 +28,6 @@
# include "compat/arm_acle_selector.h"
# endif
# endif
#endif /** ARM Headers */
static const uint32_t K[] =
{
@ -49,6 +48,7 @@ static const uint32_t K[] =
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
};
#endif /** ARM Headers */
// Internal implementation code.
namespace
@ -56,7 +56,6 @@ namespace
/// Internal SHA-256 implementation.
namespace sha256
{
#ifndef USE_AVX2
uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
@ -72,7 +71,6 @@ void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, u
d += t1;
h = t1 + t2;
}
#endif
/** Initialize SHA-256 state. */
void inline Initialize(uint32_t* s)
@ -87,8 +85,8 @@ void inline Initialize(uint32_t* s)
s[7] = 0x5be0cd19ul;
}
/** Perform one SHA-256 transformation, processing a 64-byte chunk. */
void Transform(uint32_t* s, const unsigned char* chunk)
/** Perform one SHA-256 transformation, processing a 64-byte chunk. (ARMv8) */
void Transform_ARMV8(uint32_t* s, const unsigned char* chunk)
{
#if defined(USE_ARMV8) || defined(USE_ARMV82)
uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
@ -244,11 +242,21 @@ void Transform(uint32_t* s, const unsigned char* chunk)
/** Save state */
vst1q_u32(&s[0], STATE0);
vst1q_u32(&s[4], STATE1);
#endif
}
#elif USE_AVX2
/** Perform one SHA-256 transformation, processing a 64-byte chunk. (AVX2) */
void Transform_AVX2(uint32_t* s, const unsigned char* chunk)
{
#if USE_AVX2
// Perform SHA256 one block (Intel AVX2)
sha256_one_block_avx2(chunk, s);
#else
#endif
}
/** Perform one SHA-256 transformation, processing a 64-byte chunk. */
void Transform(uint32_t* s, const unsigned char* chunk)
{
// Perform SHA256 one block (legacy)
uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
@ -329,6 +337,37 @@ void Transform(uint32_t* s, const unsigned char* chunk)
s[5] += f;
s[6] += g;
s[7] += h;
}
/** Define SHA256 hardware */
#if defined(__linux__)
#define HWCAP_SHA2 (1<<6)
#include <sys/auxv.h>
#elif defined(__WIN64__)
#include <intrin.h>
bool isAVX (void) {
int cpuinfo[4];
__cpuid(cpuinfo, 1);
return ((cpuinfo[2] & (1 << 28)) != 0);
}
#endif
/** Define a function pointer for Transform */
void (*transform_ptr) (uint32_t*, const unsigned char*) = &Transform;
/** Initialize the function pointer */
void inline Initialize_transform_ptr(void)
{
// Override the function pointer for ARMV8/AVX2
#if (defined(USE_ARMV8) || defined(USE_ARMV82))
if (getauxval(AT_HWCAP) && HWCAP_SHA2)
transform_ptr = &Transform_ARMV8;
#elif USE_AVX2 && defined(__linux__)
if (getauxval(AT_HWCAP) && HWCAP_SHA2)
transform_ptr = &Transform_AVX2;
#elif USE_AVX2 && defined(__WIN64__)
if (isAVX)
transform_ptr = &Transform_AVX2;
#endif
}
@ -352,12 +391,12 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len)
memcpy(buf + bufsize, data, 64 - bufsize);
bytes += 64 - bufsize;
data += 64 - bufsize;
sha256::Transform(s, buf);
sha256::transform_ptr(s, buf);
bufsize = 0;
}
while (end >= data + 64) {
// Process full chunks directly from the source.
sha256::Transform(s, data);
sha256::transform_ptr(s, data);
bytes += 64;
data += 64;
}
@ -392,3 +431,8 @@ CSHA256& CSHA256::Reset()
sha256::Initialize(s);
return *this;
}
void detect_sha256_hardware()
{
sha256::Initialize_transform_ptr();
}

View File

@ -25,4 +25,6 @@ public:
CSHA256& Reset();
};
void detect_sha256_hardware();
#endif // BITCOIN_CRYPTO_SHA256_H

View File

@ -28,7 +28,6 @@
# include "compat/arm_acle_selector.h"
# endif
# endif
#endif /** ARM Headers */
static const uint64_t sha512_round_constants[] =
{
@ -73,6 +72,7 @@ static const uint64_t sha512_round_constants[] =
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817
};
#endif /** ARM Headers */
// Internal implementation code.
namespace
@ -80,7 +80,6 @@ namespace
/// Internal SHA-512 implementation.
namespace sha512
{
#ifndef USE_AVX2
uint64_t inline Ch(uint64_t x, uint64_t y, uint64_t z) { return z ^ (x & (y ^ z)); }
uint64_t inline Maj(uint64_t x, uint64_t y, uint64_t z) { return (x & y) | (z & (x | y)); }
uint64_t inline Sigma0(uint64_t x) { return (x >> 28 | x << 36) ^ (x >> 34 | x << 30) ^ (x >> 39 | x << 25); }
@ -96,7 +95,6 @@ void inline Round(uint64_t a, uint64_t b, uint64_t c, uint64_t& d, uint64_t e, u
d += t1;
h = t1 + t2;
}
#endif
#ifdef USE_ARMV82
@ -306,13 +304,19 @@ void inline Initialize(uint64_t* s)
s[7] = 0x5be0cd19137e2179ull;
}
/** Perform one SHA-512 transformation, processing a 128-byte chunk. */
void Transform(uint64_t* s, const unsigned char* chunk)
/** Perform one SHA-512 transformation, processing a 128-byte chunk. (AVX2) */
void Transform_AVX2(uint64_t* s, const unsigned char* chunk)
{
#ifdef USE_AVX2
// Perform SHA512 one block (Intel AVX2)
sha512_one_block_avx2(chunk, s);
#elif USE_ARMV82
#endif
}
/** Perform one SHA-512 transformation, processing a 128-byte chunk. (ARMv8.2) */
void Transform_ARMV82(uint64_t* s, const unsigned char* chunk)
{
#ifdef USE_ARMV82
sha512_neon_core core;
core.ab = vld1q_u64(s);
@ -331,7 +335,12 @@ void Transform(uint64_t* s, const unsigned char* chunk)
s[5] = vgetq_lane_u64 (core.ef, 1);
s[6] = vgetq_lane_u64 (core.gh, 0);
s[7] = vgetq_lane_u64 (core.gh, 1);
#else
#endif
}
/** Perform one SHA-512 transformation, processing a 128-byte chunk. */
void Transform(uint64_t* s, const unsigned char* chunk)
{
// Perform SHA512 one block (legacy)
uint64_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
uint64_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
@ -429,11 +438,41 @@ void Transform(uint64_t* s, const unsigned char* chunk)
s[5] += f;
s[6] += g;
s[7] += h;
}
/** Define SHA512 hardware */
#if defined(__linux__)
#define HWCAP_SHA512 (1<<21)
#include <sys/auxv.h>
#elif defined(__WIN64__)
#include <intrin.h>
bool isAVX (void) {
int cpuinfo[4];
__cpuid(cpuinfo, 1);
return ((cpuinfo[2] & (1 << 28)) != 0);
}
#endif
/** Define a function pointer for Transform */
void (*transform_ptr) (uint64_t*, const unsigned char*) = &Transform;
/** Initialize the function pointer */
void inline Initialize_transform_ptr(void)
{
// Override the function pointer for ARMV82/AVX2
#if defined(USE_ARMV82)
if (getauxval(AT_HWCAP) && HWCAP_SHA512)
transform_ptr = &Transform_ARMV82;
#elif USE_AVX2 && defined(__linux__)
if (getauxval(AT_HWCAP) && HWCAP_SHA512)
transform_ptr = &Transform_AVX2;
#elif USE_AVX2 && defined(__WIN64__)
if (isAVX)
transform_ptr = &Transform_AVX2;
#endif
}
} // namespace sha512
} // namespace
@ -453,12 +492,12 @@ CSHA512& CSHA512::Write(const unsigned char* data, size_t len)
memcpy(buf + bufsize, data, 128 - bufsize);
bytes += 128 - bufsize;
data += 128 - bufsize;
sha512::Transform(s, buf);
sha512::transform_ptr(s, buf);
bufsize = 0;
}
while (end >= data + 128) {
// Process full chunks directly from the source.
sha512::Transform(s, data);
sha512::transform_ptr(s, data);
data += 128;
bytes += 128;
}
@ -493,3 +532,8 @@ CSHA512& CSHA512::Reset()
sha512::Initialize(s);
return *this;
}
void detect_sha512_hardware()
{
sha512::Initialize_transform_ptr();
}

View File

@ -25,4 +25,6 @@ public:
CSHA512& Reset();
};
void detect_sha512_hardware(void);
#endif // BITCOIN_CRYPTO_SHA512_H

View File

@ -90,6 +90,10 @@
#include <openssl/rand.h>
#include <openssl/conf.h>
#include "crypto/sha1.h"
#include "crypto/sha256.h"
#include "crypto/sha512.h"
// Work around clang compilation problem in Boost 1.46:
// /usr/include/boost/program_options/detail/config_file.hpp:163:17: error: call to function 'to_internal' that is neither visible in the template definition nor found by argument-dependent lookup
// See also: http://stackoverflow.com/questions/10020179/compilation-fail-in-boost-librairies-program-options
@ -860,6 +864,11 @@ void SetupEnvironment()
// boost::filesystem::path, which is then used to explicitly imbue the path.
std::locale loc = boost::filesystem::path::imbue(std::locale::classic());
boost::filesystem::path::imbue(loc);
// Auto detect SHA1, SHA256 and SHA512 features of ARMv8/ARMv8.2/AVX2
detect_sha1_hardware();
detect_sha256_hardware();
detect_sha512_hardware();
}
bool SetupNetworking()