diff --git a/configure.ac b/configure.ac index 3a0f925a7..e8cdb2f80 100644 --- a/configure.ac +++ b/configure.ac @@ -189,18 +189,6 @@ AC_ARG_WITH([intel-avx2], [intel_avx2=$withval], [intel_avx2=no]) -AC_ARG_WITH([armv8-crypto], - [AS_HELP_STRING([--with-armv8-crypto], - [Build with armv8 crypto (default is no)])], - [armv8_crypto=$withval], - [armv8_crypto=no]) - -AC_ARG_WITH([armv82-crypto], - [AS_HELP_STRING([--with-armv82-crypto], - [Build with armv8.2 crypto sha512 (default is no)])], - [armv82_crypto=$withval], - [armv82_crypto=no]) - AC_ARG_WITH([protoc-bindir],[AS_HELP_STRING([--with-protoc-bindir=BIN_DIR],[specify protoc bin path])], [protoc_bin_path=$withval], []) AC_ARG_ENABLE(man, @@ -736,18 +724,17 @@ if test x$use_scrypt_sse2 = xyes; then AC_DEFINE(USE_SSE2, 1, [Define this symbol if SSE2 works]) fi -if test x$armv8_crypto = xyes; then - AC_MSG_CHECKING([whether to build with armv8 crypto]) - AC_MSG_RESULT(yes) - AC_DEFINE(USE_ARMV8, 1, [Define this symbol if armv8 crypto works]) - CXXFLAGS="$CXXFLAGS -march=armv8-a+crypto" -fi - -if test x$armv82_crypto = xyes; then - AC_CHECK_DECLS([vsha512su0q_u64], - [AC_DEFINE(USE_ARMV82, 1, [Define this symbol if armv8.2 crypto works]) - CXXFLAGS="$CXXFLAGS -march=armv8.2-a+crypto+sha3"], AC_MSG_ERROR(sha512 missing), [#include ]) -fi +AC_MSG_CHECKING([whether to build with armv8 crypto]) +case $host in + aarch64-*) + AC_MSG_RESULT(yes) + AC_DEFINE(USE_ARMV8, 1, [Define this symbol if armv8 crypto works]) + CXXFLAGS="$CXXFLAGS -march=armv8-a+crypto" + AC_CHECK_DECLS([vsha512su0q_u64], + [AC_DEFINE(USE_ARMV82, 1, [Define this symbol if armv8.2 crypto works]) + CXXFLAGS="$CXXFLAGS -march=armv8.2-a+crypto+sha3"],, [#include ]) + ;; +esac if test x$use_pkgconfig = xyes; then : dnl diff --git a/src/crypto/sha1.cpp b/src/crypto/sha1.cpp index fcd510e40..595a8a588 100644 --- a/src/crypto/sha1.cpp +++ b/src/crypto/sha1.cpp @@ -37,7 +37,6 @@ namespace namespace sha1 { -#ifndef USE_AVX2 /** One round of SHA-1. */ void inline Round(uint32_t a, uint32_t& b, uint32_t c, uint32_t d, uint32_t& e, uint32_t f, uint32_t k, uint32_t w) { @@ -50,7 +49,6 @@ uint32_t inline f2(uint32_t b, uint32_t c, uint32_t d) { return b ^ c ^ d; } uint32_t inline f3(uint32_t b, uint32_t c, uint32_t d) { return (b & c) | (d & (b | c)); } uint32_t inline left(uint32_t x) { return (x << 1) | (x >> 31); } -#endif /** Initialize SHA-1 state. */ void inline Initialize(uint32_t* s) @@ -67,8 +65,8 @@ const uint32_t k2 = 0x6ED9EBA1ul; const uint32_t k3 = 0x8F1BBCDCul; const uint32_t k4 = 0xCA62C1D6ul; -/** Perform a SHA-1 transformation, processing a 64-byte chunk. */ -void Transform(uint32_t* s, const unsigned char* chunk) +/** Perform a SHA-1 transformation, processing a 64-byte chunk. (ARMv8) */ +void Transform_ARMV8(uint32_t* s, const unsigned char* chunk) { #if defined(USE_ARMV8) || defined(USE_ARMV82) uint32x4_t ABCD, ABCD_SAVED; @@ -238,11 +236,22 @@ void Transform(uint32_t* s, const unsigned char* chunk) /** Save state */ vst1q_u32(&s[0], ABCD); s[4] = E0; +#endif +} -#elif USE_AVX2 +/** Perform a SHA-1 transformation, processing a 64-byte chunk. (AVX2) */ +void Transform_AVX2(uint32_t* s, const unsigned char* chunk) +{ +#if USE_AVX2 // Perform SHA1 one block (Intel AVX2) + sha1_one_block_avx2(chunk, s); -#else +#endif +} + +/** Perform a SHA-1 transformation, processing a 64-byte chunk. */ +void Transform(uint32_t* s, const unsigned char* chunk) +{ // Perform SHA one block (legacy) uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4]; @@ -338,13 +347,41 @@ void Transform(uint32_t* s, const unsigned char* chunk) s[2] += c; s[3] += d; s[4] += e; +} +/** Define SHA1 hardware */ +#if defined(__linux__) +#define HWCAP_SHA1 (1<<5) +#include +#elif defined(__WIN64__) +#include +bool isAVX (void) { + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + return ((cpuinfo[2] & (1 << 28)) != 0); +} #endif +/** Define a function pointer for Transform */ +void (*transform_ptr) (uint32_t*, const unsigned char*) = &Transform; + +/** Initialize the function pointer */ +void inline Initialize_transform_ptr(void) +{ +// Override the function pointer for ARMV8/AVX2 +#if (defined(USE_ARMV8) || defined(USE_ARMV82)) + if (getauxval(AT_HWCAP) && HWCAP_SHA1) + transform_ptr = &Transform_ARMV8; +#elif USE_AVX2 && defined(__linux__) + if (getauxval(AT_HWCAP) && HWCAP_SHA1) + transform_ptr = &Transform_AVX2; +#elif USE_AVX2 && defined(__WIN64__) + if (isAVX) + transform_ptr = &Transform_AVX2; +#endif } } // namespace sha1 - } // namespace ////// SHA1 @@ -363,12 +400,12 @@ CSHA1& CSHA1::Write(const unsigned char* data, size_t len) memcpy(buf + bufsize, data, 64 - bufsize); bytes += 64 - bufsize; data += 64 - bufsize; - sha1::Transform(s, buf); + sha1::transform_ptr(s, buf); bufsize = 0; } while (end >= data + 64) { // Process full chunks directly from the source. - sha1::Transform(s, data); + sha1::transform_ptr(s, data); bytes += 64; data += 64; } @@ -400,3 +437,8 @@ CSHA1& CSHA1::Reset() sha1::Initialize(s); return *this; } + +void detect_sha1_hardware() +{ + sha1::Initialize_transform_ptr(); +} diff --git a/src/crypto/sha1.h b/src/crypto/sha1.h index 8b4568ee1..398424a8c 100644 --- a/src/crypto/sha1.h +++ b/src/crypto/sha1.h @@ -25,4 +25,6 @@ public: CSHA1& Reset(); }; +void detect_sha1_hardware(void); + #endif // BITCOIN_CRYPTO_SHA1_H diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index bd61c3858..b1c85f258 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -28,7 +28,6 @@ # include "compat/arm_acle_selector.h" # endif # endif -#endif /** ARM Headers */ static const uint32_t K[] = { @@ -49,6 +48,7 @@ static const uint32_t K[] = 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, }; +#endif /** ARM Headers */ // Internal implementation code. namespace @@ -56,7 +56,6 @@ namespace /// Internal SHA-256 implementation. namespace sha256 { -#ifndef USE_AVX2 uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); } uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); } uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); } @@ -72,7 +71,6 @@ void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, u d += t1; h = t1 + t2; } -#endif /** Initialize SHA-256 state. */ void inline Initialize(uint32_t* s) @@ -87,8 +85,8 @@ void inline Initialize(uint32_t* s) s[7] = 0x5be0cd19ul; } -/** Perform one SHA-256 transformation, processing a 64-byte chunk. */ -void Transform(uint32_t* s, const unsigned char* chunk) +/** Perform one SHA-256 transformation, processing a 64-byte chunk. (ARMv8) */ +void Transform_ARMV8(uint32_t* s, const unsigned char* chunk) { #if defined(USE_ARMV8) || defined(USE_ARMV82) uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE; @@ -244,11 +242,21 @@ void Transform(uint32_t* s, const unsigned char* chunk) /** Save state */ vst1q_u32(&s[0], STATE0); vst1q_u32(&s[4], STATE1); +#endif +} -#elif USE_AVX2 +/** Perform one SHA-256 transformation, processing a 64-byte chunk. (AVX2) */ +void Transform_AVX2(uint32_t* s, const unsigned char* chunk) +{ +#if USE_AVX2 // Perform SHA256 one block (Intel AVX2) sha256_one_block_avx2(chunk, s); -#else +#endif +} + +/** Perform one SHA-256 transformation, processing a 64-byte chunk. */ +void Transform(uint32_t* s, const unsigned char* chunk) +{ // Perform SHA256 one block (legacy) uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; @@ -329,6 +337,37 @@ void Transform(uint32_t* s, const unsigned char* chunk) s[5] += f; s[6] += g; s[7] += h; +} + +/** Define SHA256 hardware */ +#if defined(__linux__) +#define HWCAP_SHA2 (1<<6) +#include +#elif defined(__WIN64__) +#include +bool isAVX (void) { + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + return ((cpuinfo[2] & (1 << 28)) != 0); +} +#endif + +/** Define a function pointer for Transform */ +void (*transform_ptr) (uint32_t*, const unsigned char*) = &Transform; + +/** Initialize the function pointer */ +void inline Initialize_transform_ptr(void) +{ +// Override the function pointer for ARMV8/AVX2 +#if (defined(USE_ARMV8) || defined(USE_ARMV82)) + if (getauxval(AT_HWCAP) && HWCAP_SHA2) + transform_ptr = &Transform_ARMV8; +#elif USE_AVX2 && defined(__linux__) + if (getauxval(AT_HWCAP) && HWCAP_SHA2) + transform_ptr = &Transform_AVX2; +#elif USE_AVX2 && defined(__WIN64__) + if (isAVX) + transform_ptr = &Transform_AVX2; #endif } @@ -352,12 +391,12 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len) memcpy(buf + bufsize, data, 64 - bufsize); bytes += 64 - bufsize; data += 64 - bufsize; - sha256::Transform(s, buf); + sha256::transform_ptr(s, buf); bufsize = 0; } while (end >= data + 64) { // Process full chunks directly from the source. - sha256::Transform(s, data); + sha256::transform_ptr(s, data); bytes += 64; data += 64; } @@ -392,3 +431,8 @@ CSHA256& CSHA256::Reset() sha256::Initialize(s); return *this; } + +void detect_sha256_hardware() +{ + sha256::Initialize_transform_ptr(); +} diff --git a/src/crypto/sha256.h b/src/crypto/sha256.h index 127e62a22..65751ebf5 100644 --- a/src/crypto/sha256.h +++ b/src/crypto/sha256.h @@ -25,4 +25,6 @@ public: CSHA256& Reset(); }; +void detect_sha256_hardware(); + #endif // BITCOIN_CRYPTO_SHA256_H diff --git a/src/crypto/sha512.cpp b/src/crypto/sha512.cpp index c911ed312..ea23ea33e 100644 --- a/src/crypto/sha512.cpp +++ b/src/crypto/sha512.cpp @@ -28,7 +28,6 @@ # include "compat/arm_acle_selector.h" # endif # endif -#endif /** ARM Headers */ static const uint64_t sha512_round_constants[] = { @@ -73,6 +72,7 @@ static const uint64_t sha512_round_constants[] = 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 }; +#endif /** ARM Headers */ // Internal implementation code. namespace @@ -80,7 +80,6 @@ namespace /// Internal SHA-512 implementation. namespace sha512 { -#ifndef USE_AVX2 uint64_t inline Ch(uint64_t x, uint64_t y, uint64_t z) { return z ^ (x & (y ^ z)); } uint64_t inline Maj(uint64_t x, uint64_t y, uint64_t z) { return (x & y) | (z & (x | y)); } uint64_t inline Sigma0(uint64_t x) { return (x >> 28 | x << 36) ^ (x >> 34 | x << 30) ^ (x >> 39 | x << 25); } @@ -96,7 +95,6 @@ void inline Round(uint64_t a, uint64_t b, uint64_t c, uint64_t& d, uint64_t e, u d += t1; h = t1 + t2; } -#endif #ifdef USE_ARMV82 @@ -306,13 +304,19 @@ void inline Initialize(uint64_t* s) s[7] = 0x5be0cd19137e2179ull; } -/** Perform one SHA-512 transformation, processing a 128-byte chunk. */ -void Transform(uint64_t* s, const unsigned char* chunk) +/** Perform one SHA-512 transformation, processing a 128-byte chunk. (AVX2) */ +void Transform_AVX2(uint64_t* s, const unsigned char* chunk) { #ifdef USE_AVX2 // Perform SHA512 one block (Intel AVX2) sha512_one_block_avx2(chunk, s); -#elif USE_ARMV82 +#endif +} + +/** Perform one SHA-512 transformation, processing a 128-byte chunk. (ARMv8.2) */ +void Transform_ARMV82(uint64_t* s, const unsigned char* chunk) +{ +#ifdef USE_ARMV82 sha512_neon_core core; core.ab = vld1q_u64(s); @@ -331,7 +335,12 @@ void Transform(uint64_t* s, const unsigned char* chunk) s[5] = vgetq_lane_u64 (core.ef, 1); s[6] = vgetq_lane_u64 (core.gh, 0); s[7] = vgetq_lane_u64 (core.gh, 1); -#else +#endif +} + +/** Perform one SHA-512 transformation, processing a 128-byte chunk. */ +void Transform(uint64_t* s, const unsigned char* chunk) +{ // Perform SHA512 one block (legacy) uint64_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; uint64_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; @@ -429,11 +438,41 @@ void Transform(uint64_t* s, const unsigned char* chunk) s[5] += f; s[6] += g; s[7] += h; +} + +/** Define SHA512 hardware */ +#if defined(__linux__) +#define HWCAP_SHA512 (1<<21) +#include +#elif defined(__WIN64__) +#include +bool isAVX (void) { + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + return ((cpuinfo[2] & (1 << 28)) != 0); +} +#endif + +/** Define a function pointer for Transform */ +void (*transform_ptr) (uint64_t*, const unsigned char*) = &Transform; + +/** Initialize the function pointer */ +void inline Initialize_transform_ptr(void) +{ +// Override the function pointer for ARMV82/AVX2 +#if defined(USE_ARMV82) + if (getauxval(AT_HWCAP) && HWCAP_SHA512) + transform_ptr = &Transform_ARMV82; +#elif USE_AVX2 && defined(__linux__) + if (getauxval(AT_HWCAP) && HWCAP_SHA512) + transform_ptr = &Transform_AVX2; +#elif USE_AVX2 && defined(__WIN64__) + if (isAVX) + transform_ptr = &Transform_AVX2; #endif } } // namespace sha512 - } // namespace @@ -453,12 +492,12 @@ CSHA512& CSHA512::Write(const unsigned char* data, size_t len) memcpy(buf + bufsize, data, 128 - bufsize); bytes += 128 - bufsize; data += 128 - bufsize; - sha512::Transform(s, buf); + sha512::transform_ptr(s, buf); bufsize = 0; } while (end >= data + 128) { // Process full chunks directly from the source. - sha512::Transform(s, data); + sha512::transform_ptr(s, data); data += 128; bytes += 128; } @@ -493,3 +532,8 @@ CSHA512& CSHA512::Reset() sha512::Initialize(s); return *this; } + +void detect_sha512_hardware() +{ + sha512::Initialize_transform_ptr(); +} diff --git a/src/crypto/sha512.h b/src/crypto/sha512.h index cd1023bc8..42b4a33b8 100644 --- a/src/crypto/sha512.h +++ b/src/crypto/sha512.h @@ -25,4 +25,6 @@ public: CSHA512& Reset(); }; +void detect_sha512_hardware(void); + #endif // BITCOIN_CRYPTO_SHA512_H diff --git a/src/util.cpp b/src/util.cpp index dc5d818ec..cebb50fd4 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -90,6 +90,10 @@ #include #include +#include "crypto/sha1.h" +#include "crypto/sha256.h" +#include "crypto/sha512.h" + // Work around clang compilation problem in Boost 1.46: // /usr/include/boost/program_options/detail/config_file.hpp:163:17: error: call to function 'to_internal' that is neither visible in the template definition nor found by argument-dependent lookup // See also: http://stackoverflow.com/questions/10020179/compilation-fail-in-boost-librairies-program-options @@ -860,6 +864,11 @@ void SetupEnvironment() // boost::filesystem::path, which is then used to explicitly imbue the path. std::locale loc = boost::filesystem::path::imbue(std::locale::classic()); boost::filesystem::path::imbue(loc); + + // Auto detect SHA1, SHA256 and SHA512 features of ARMv8/ARMv8.2/AVX2 + detect_sha1_hardware(); + detect_sha256_hardware(); + detect_sha512_hardware(); } bool SetupNetworking()