From 4378d2f8415b0298f5cc69b93ac8fd0cc02f34e8 Mon Sep 17 00:00:00 2001 From: Jay D Dee Date: Wed, 30 Aug 2023 20:15:48 -0400 Subject: [PATCH] v3.23.0 --- RELEASE_NOTES | 12 +- algo-gate-api.c | 4 +- algo-gate-api.h | 9 +- algo/blake/blake256-hash-4way.c | 46 + algo/blake/blake2b-hash-4way.c | 68 +- algo/blake/blake2s-hash-4way.c | 96 +- algo/blake/sph_blake2b.c | 17 + algo/bmw/bmw256-hash-4way.c | 102 +- algo/bmw/bmw512-hash-4way.c | 102 +- algo/cubehash/cube-hash-2way.c | 27 +- algo/cubehash/cubehash_sse2.c | 178 +- algo/echo/aes_ni/hash.c | 8 +- algo/echo/echo-hash-4way.c | 9 +- algo/fugue/fugue-aesni.c | 12 +- algo/groestl/aes_ni/groestl-intr-aes.h | 4 +- algo/groestl/aes_ni/groestl256-intr-aes.h | 6 +- algo/groestl/aes_ni/hash-groestl.c | 12 +- algo/groestl/aes_ni/hash-groestl256.c | 2 +- algo/groestl/groestl256-intr-4way.h | 41 +- algo/groestl/groestl512-intr-4way.h | 35 +- algo/hamsi/hamsi-hash-4way.c | 200 +- algo/haval/haval-hash-4way.c | 67 +- algo/jh/jh-hash-4way.c | 194 +- algo/keccak/keccak-4way.c | 4 +- algo/keccak/keccak-hash-4way.c | 16 +- algo/keccak/sha3d-4way.c | 4 +- algo/luffa/luffa-hash-2way.c | 241 +- algo/luffa/luffa_for_sse2.c | 404 +- algo/lyra2/allium-4way.c | 4 +- algo/lyra2/lyra2rev2-4way.c | 4 +- algo/lyra2/lyra2rev3-4way.c | 4 +- algo/lyra2/lyra2z-4way.c | 6 +- algo/lyra2/sponge.c | 16 +- algo/lyra2/sponge.h | 59 +- algo/quark/anime-4way.c | 8 +- algo/quark/hmq1725-4way.c | 8 +- algo/quark/quark-4way.c | 8 +- algo/ripemd/ripemd-hash-4way.c | 38 +- algo/sha/sha256-hash-4way.c | 73 +- algo/sha/sha256d-4way.c | 78 +- algo/sha/sha256q-4way.c | 4 +- algo/sha/sha256t-4way.c | 104 +- algo/sha/sha512-hash-4way.c | 130 +- algo/shabal/shabal-hash-4way.c | 465 +- algo/shavite/shavite-hash-2way.c | 58 +- algo/shavite/shavite-hash-4way.c | 6 +- algo/simd/simd-hash-2way.c | 26 +- algo/skein/skein-4way.c | 4 +- algo/skein/skein-hash-4way.c | 216 +- algo/skein/skein2-4way.c | 4 +- algo/swifftx/swifftx.c | 715 +- algo/verthash/tiny_sha3/sha3-4way.c | 8 +- algo/x11/c11-4way.c | 4 +- algo/x13/skunk-4way.c | 4 +- algo/x16/x16r-4way.c | 4 +- algo/x16/x16rt-4way.c | 4 +- algo/x16/x16rt.c | 2 +- algo/x16/x16rv2-4way.c | 4 +- algo/x16/x21s-4way.c | 4 +- algo/x17/x17-4way.c | 4 +- algo/x22/x22i-4way.c | 8 +- algo/x22/x25x-4way.c | 4 +- configure | 20 +- configure.ac | 2 +- configure~ | 7647 +++++++++++++++++++++ cpu-miner.c | 26 +- simd-utils.h | 29 +- simd-utils/intrlv.h | 157 +- simd-utils/simd-128.h | 81 +- simd-utils/simd-256.h | 38 +- simd-utils/simd-512.h | 4 - sysinfos.c | 400 +- 72 files changed, 10207 insertions(+), 2205 deletions(-) create mode 100755 configure~ diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 6f9fc242..fda9d3bd 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -65,9 +65,19 @@ If not what makes it happen or not happen? Change Log ---------- +v3.23.0 + +#398: Prevent GBT fallback to Getwork on network error. +#398: Prevent excessive logs when conditional mining is paused when mining solo. +Fix a false start if stratum doesn't immediately send a new job after connecting. +Tweak diagonal shuffle in Blake2b & Blake256 1-way SIMD to reduce latency. +CPUID support for AVX10. +Initial changes to AVX2 targeted code in preparation for AVX10. +Code cleanup and miscellaneous small improvements. + v3.22.3 -Data interleaving and byte swap optimizations iwith AVX2, AVX512 & AVX512VBMI. +Data interleaving and byte swap optimizations with AVX2, AVX512 & AVX512VBMI. Faster Luffa with AVX2 & AVX512. Other small optimizations. Some code cleanup. diff --git a/algo-gate-api.c b/algo-gate-api.c index 73d6c0ff..7f971bd9 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -171,7 +171,7 @@ int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -227,7 +227,7 @@ int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; diff --git a/algo-gate-api.h b/algo-gate-api.h index 8c06a091..067d397a 100644 --- a/algo-gate-api.h +++ b/algo-gate-api.h @@ -94,10 +94,13 @@ typedef uint32_t set_t; #define SSE42_OPT 4 #define AVX_OPT 8 // Sandybridge #define AVX2_OPT 0x10 // Haswell, Zen1 -#define SHA_OPT 0x20 // Zen1, Icelake (sha256) -#define AVX512_OPT 0x40 // Skylake-X (AVX512[F,VL,DQ,BW]) -#define VAES_OPT 0x80 // Icelake (VAES & AVX512) +#define SHA_OPT 0x20 // Zen1, Icelake (deprecated) +#define AVX512_OPT 0x40 // Skylake-X, Zen4 (AVX512[F,VL,DQ,BW]) +#define VAES_OPT 0x80 // Icelake, Zen3 +// AVX10 does not have explicit algo features: +// AVX10_512 is compatible with AVX512 + VAES +// AVX10_256 is compatible with AVX2 + VAES // return set containing all elements from sets a & b inline set_t set_union ( set_t a, set_t b ) { return a | b; } diff --git a/algo/blake/blake256-hash-4way.c b/algo/blake/blake256-hash-4way.c index 84bde1ad..ac723132 100644 --- a/algo/blake/blake256-hash-4way.c +++ b/algo/blake/blake256-hash-4way.c @@ -308,7 +308,52 @@ static const sph_u32 CS[16] = { ///////////////////////////////////////// // // Blake-256 1 way SIMD +// Only used for prehash, otherwise 4way is used with SSE2. +// optimize shuffles to reduce latency caused by dependencies on V1. +#define BLAKE256_ROUND( r ) \ +{ \ + V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \ + _mm_set_epi32( CSx( r, 7 ) ^ Mx( r, 6 ), \ + CSx( r, 5 ) ^ Mx( r, 4 ), \ + CSx( r, 3 ) ^ Mx( r, 2 ), \ + CSx( r, 1 ) ^ Mx( r, 0 ) ) ) ); \ + V3 = mm128_swap32_16( _mm_xor_si128( V3, V0 ) ); \ + V2 = _mm_add_epi32( V2, V3 ); \ + V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 12 ); \ + V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \ + _mm_set_epi32( CSx( r, 6 ) ^ Mx( r, 7 ), \ + CSx( r, 4 ) ^ Mx( r, 5 ), \ + CSx( r, 2 ) ^ Mx( r, 3 ), \ + CSx( r, 0 ) ^ Mx( r, 1 ) ) ) ); \ + V3 = mm128_shuflr32_8( _mm_xor_si128( V3, V0 ) ); \ + V2 = _mm_add_epi32( V2, V3 ); \ + V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 7 ); \ + V0 = mm128_shufll_32( V0 ); \ + V3 = mm128_swap_64( V3 ); \ + V2 = mm128_shuflr_32( V2 ); \ + V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \ + _mm_set_epi32( CSx( r, D ) ^ Mx( r, C ), \ + CSx( r, B ) ^ Mx( r, A ), \ + CSx( r, 9 ) ^ Mx( r, 8 ), \ + CSx( r, F ) ^ Mx( r, E ) ) ) ); \ + V3 = mm128_swap32_16( _mm_xor_si128( V3, V0 ) ); \ + V2 = _mm_add_epi32( V2, V3 ); \ + V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 12 ); \ + V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \ + _mm_set_epi32( CSx( r, C ) ^ Mx( r, D ), \ + CSx( r, A ) ^ Mx( r, B ), \ + CSx( r, 8 ) ^ Mx( r, 9 ), \ + CSx( r, E ) ^ Mx( r, F ) ) ) ); \ + V3 = mm128_shuflr32_8( _mm_xor_si128( V3, V0 ) ); \ + V2 = _mm_add_epi32( V2, V3 ); \ + V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 7 ); \ + V0 = mm128_shuflr_32( V0 ); \ + V3 = mm128_swap_64( V3 ); \ + V2 = mm128_shufll_32( V2 ); \ +} + +/* #define BLAKE256_ROUND( r ) \ { \ V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \ @@ -350,6 +395,7 @@ static const sph_u32 CS[16] = { V2 = mm128_swap_64( V2 ); \ V1 = mm128_shufll_32( V1 ); \ } +*/ void blake256_transform_le( uint32_t *H, const uint32_t *buf, const uint32_t T0, const uint32_t T1 ) diff --git a/algo/blake/blake2b-hash-4way.c b/algo/blake/blake2b-hash-4way.c index 6437c7ba..14e51b9b 100644 --- a/algo/blake/blake2b-hash-4way.c +++ b/algo/blake/blake2b-hash-4way.c @@ -252,14 +252,14 @@ static void blake2b_8way_compress( blake2b_8way_ctx *ctx, int last ) v[ 5] = ctx->h[5]; v[ 6] = ctx->h[6]; v[ 7] = ctx->h[7]; - v[ 8] = m512_const1_64( 0x6A09E667F3BCC908 ); - v[ 9] = m512_const1_64( 0xBB67AE8584CAA73B ); - v[10] = m512_const1_64( 0x3C6EF372FE94F82B ); - v[11] = m512_const1_64( 0xA54FF53A5F1D36F1 ); - v[12] = m512_const1_64( 0x510E527FADE682D1 ); - v[13] = m512_const1_64( 0x9B05688C2B3E6C1F ); - v[14] = m512_const1_64( 0x1F83D9ABFB41BD6B ); - v[15] = m512_const1_64( 0x5BE0CD19137E2179 ); + v[ 8] = _mm512_set1_epi64( 0x6A09E667F3BCC908 ); + v[ 9] = _mm512_set1_epi64( 0xBB67AE8584CAA73B ); + v[10] = _mm512_set1_epi64( 0x3C6EF372FE94F82B ); + v[11] = _mm512_set1_epi64( 0xA54FF53A5F1D36F1 ); + v[12] = _mm512_set1_epi64( 0x510E527FADE682D1 ); + v[13] = _mm512_set1_epi64( 0x9B05688C2B3E6C1F ); + v[14] = _mm512_set1_epi64( 0x1F83D9ABFB41BD6B ); + v[15] = _mm512_set1_epi64( 0x5BE0CD19137E2179 ); v[12] = _mm512_xor_si512( v[12], _mm512_set1_epi64( ctx->t[0] ) ); v[13] = _mm512_xor_si512( v[13], _mm512_set1_epi64( ctx->t[1] ) ); @@ -310,16 +310,16 @@ int blake2b_8way_init( blake2b_8way_ctx *ctx ) { size_t i; - ctx->h[0] = m512_const1_64( 0x6A09E667F3BCC908 ); - ctx->h[1] = m512_const1_64( 0xBB67AE8584CAA73B ); - ctx->h[2] = m512_const1_64( 0x3C6EF372FE94F82B ); - ctx->h[3] = m512_const1_64( 0xA54FF53A5F1D36F1 ); - ctx->h[4] = m512_const1_64( 0x510E527FADE682D1 ); - ctx->h[5] = m512_const1_64( 0x9B05688C2B3E6C1F ); - ctx->h[6] = m512_const1_64( 0x1F83D9ABFB41BD6B ); - ctx->h[7] = m512_const1_64( 0x5BE0CD19137E2179 ); + ctx->h[0] = _mm512_set1_epi64( 0x6A09E667F3BCC908 ); + ctx->h[1] = _mm512_set1_epi64( 0xBB67AE8584CAA73B ); + ctx->h[2] = _mm512_set1_epi64( 0x3C6EF372FE94F82B ); + ctx->h[3] = _mm512_set1_epi64( 0xA54FF53A5F1D36F1 ); + ctx->h[4] = _mm512_set1_epi64( 0x510E527FADE682D1 ); + ctx->h[5] = _mm512_set1_epi64( 0x9B05688C2B3E6C1F ); + ctx->h[6] = _mm512_set1_epi64( 0x1F83D9ABFB41BD6B ); + ctx->h[7] = _mm512_set1_epi64( 0x5BE0CD19137E2179 ); - ctx->h[0] = _mm512_xor_si512( ctx->h[0], m512_const1_64( 0x01010020 ) ); + ctx->h[0] = _mm512_xor_si512( ctx->h[0], _mm512_set1_epi64( 0x01010020 ) ); ctx->t[0] = 0; ctx->t[1] = 0; @@ -419,14 +419,14 @@ static void blake2b_4way_compress( blake2b_4way_ctx *ctx, int last ) v[ 5] = ctx->h[5]; v[ 6] = ctx->h[6]; v[ 7] = ctx->h[7]; - v[ 8] = m256_const1_64( 0x6A09E667F3BCC908 ); - v[ 9] = m256_const1_64( 0xBB67AE8584CAA73B ); - v[10] = m256_const1_64( 0x3C6EF372FE94F82B ); - v[11] = m256_const1_64( 0xA54FF53A5F1D36F1 ); - v[12] = m256_const1_64( 0x510E527FADE682D1 ); - v[13] = m256_const1_64( 0x9B05688C2B3E6C1F ); - v[14] = m256_const1_64( 0x1F83D9ABFB41BD6B ); - v[15] = m256_const1_64( 0x5BE0CD19137E2179 ); + v[ 8] = _mm256_set1_epi64x( 0x6A09E667F3BCC908 ); + v[ 9] = _mm256_set1_epi64x( 0xBB67AE8584CAA73B ); + v[10] = _mm256_set1_epi64x( 0x3C6EF372FE94F82B ); + v[11] = _mm256_set1_epi64x( 0xA54FF53A5F1D36F1 ); + v[12] = _mm256_set1_epi64x( 0x510E527FADE682D1 ); + v[13] = _mm256_set1_epi64x( 0x9B05688C2B3E6C1F ); + v[14] = _mm256_set1_epi64x( 0x1F83D9ABFB41BD6B ); + v[15] = _mm256_set1_epi64x( 0x5BE0CD19137E2179 ); v[12] = _mm256_xor_si256( v[12], _mm256_set1_epi64x( ctx->t[0] ) ); v[13] = _mm256_xor_si256( v[13], _mm256_set1_epi64x( ctx->t[1] ) ); @@ -477,16 +477,16 @@ int blake2b_4way_init( blake2b_4way_ctx *ctx ) { size_t i; - ctx->h[0] = m256_const1_64( 0x6A09E667F3BCC908 ); - ctx->h[1] = m256_const1_64( 0xBB67AE8584CAA73B ); - ctx->h[2] = m256_const1_64( 0x3C6EF372FE94F82B ); - ctx->h[3] = m256_const1_64( 0xA54FF53A5F1D36F1 ); - ctx->h[4] = m256_const1_64( 0x510E527FADE682D1 ); - ctx->h[5] = m256_const1_64( 0x9B05688C2B3E6C1F ); - ctx->h[6] = m256_const1_64( 0x1F83D9ABFB41BD6B ); - ctx->h[7] = m256_const1_64( 0x5BE0CD19137E2179 ); + ctx->h[0] = _mm256_set1_epi64x( 0x6A09E667F3BCC908 ); + ctx->h[1] = _mm256_set1_epi64x( 0xBB67AE8584CAA73B ); + ctx->h[2] = _mm256_set1_epi64x( 0x3C6EF372FE94F82B ); + ctx->h[3] = _mm256_set1_epi64x( 0xA54FF53A5F1D36F1 ); + ctx->h[4] = _mm256_set1_epi64x( 0x510E527FADE682D1 ); + ctx->h[5] = _mm256_set1_epi64x( 0x9B05688C2B3E6C1F ); + ctx->h[6] = _mm256_set1_epi64x( 0x1F83D9ABFB41BD6B ); + ctx->h[7] = _mm256_set1_epi64x( 0x5BE0CD19137E2179 ); - ctx->h[0] = _mm256_xor_si256( ctx->h[0], m256_const1_64( 0x01010020 ) ); + ctx->h[0] = _mm256_xor_si256( ctx->h[0], _mm256_set1_epi64x( 0x01010020 ) ); ctx->t[0] = 0; ctx->t[1] = 0; diff --git a/algo/blake/blake2s-hash-4way.c b/algo/blake/blake2s-hash-4way.c index a69e5010..76e10490 100644 --- a/algo/blake/blake2s-hash-4way.c +++ b/algo/blake/blake2s-hash-4way.c @@ -62,14 +62,14 @@ int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen ) memset( S, 0, sizeof( blake2s_4way_state ) ); - S->h[0] = m128_const1_64( 0x6A09E6676A09E667ULL ); - S->h[1] = m128_const1_64( 0xBB67AE85BB67AE85ULL ); - S->h[2] = m128_const1_64( 0x3C6EF3723C6EF372ULL ); - S->h[3] = m128_const1_64( 0xA54FF53AA54FF53AULL ); - S->h[4] = m128_const1_64( 0x510E527F510E527FULL ); - S->h[5] = m128_const1_64( 0x9B05688C9B05688CULL ); - S->h[6] = m128_const1_64( 0x1F83D9AB1F83D9ABULL ); - S->h[7] = m128_const1_64( 0x5BE0CD195BE0CD19ULL ); + S->h[0] = _mm_set1_epi64x( 0x6A09E6676A09E667ULL ); + S->h[1] = _mm_set1_epi64x( 0xBB67AE85BB67AE85ULL ); + S->h[2] = _mm_set1_epi64x( 0x3C6EF3723C6EF372ULL ); + S->h[3] = _mm_set1_epi64x( 0xA54FF53AA54FF53AULL ); + S->h[4] = _mm_set1_epi64x( 0x510E527F510E527FULL ); + S->h[5] = _mm_set1_epi64x( 0x9B05688C9B05688CULL ); + S->h[6] = _mm_set1_epi64x( 0x1F83D9AB1F83D9ABULL ); + S->h[7] = _mm_set1_epi64x( 0x5BE0CD195BE0CD19ULL ); // for( int i = 0; i < 8; ++i ) // S->h[i] = _mm_set1_epi32( blake2s_IV[i] ); @@ -90,18 +90,18 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block ) memcpy_128( m, block, 16 ); memcpy_128( v, S->h, 8 ); - v[ 8] = m128_const1_64( 0x6A09E6676A09E667ULL ); - v[ 9] = m128_const1_64( 0xBB67AE85BB67AE85ULL ); - v[10] = m128_const1_64( 0x3C6EF3723C6EF372ULL ); - v[11] = m128_const1_64( 0xA54FF53AA54FF53AULL ); + v[ 8] = _mm_set1_epi64x( 0x6A09E6676A09E667ULL ); + v[ 9] = _mm_set1_epi64x( 0xBB67AE85BB67AE85ULL ); + v[10] = _mm_set1_epi64x( 0x3C6EF3723C6EF372ULL ); + v[11] = _mm_set1_epi64x( 0xA54FF53AA54FF53AULL ); v[12] = _mm_xor_si128( _mm_set1_epi32( S->t[0] ), - m128_const1_64( 0x510E527F510E527FULL ) ); + _mm_set1_epi64x( 0x510E527F510E527FULL ) ); v[13] = _mm_xor_si128( _mm_set1_epi32( S->t[1] ), - m128_const1_64( 0x9B05688C9B05688CULL ) ); + _mm_set1_epi64x( 0x9B05688C9B05688CULL ) ); v[14] = _mm_xor_si128( _mm_set1_epi32( S->f[0] ), - m128_const1_64( 0x1F83D9AB1F83D9ABULL ) ); + _mm_set1_epi64x( 0x1F83D9AB1F83D9ABULL ) ); v[15] = _mm_xor_si128( _mm_set1_epi32( S->f[1] ), - m128_const1_64( 0x5BE0CD195BE0CD19ULL ) ); + _mm_set1_epi64x( 0x5BE0CD195BE0CD19ULL ) ); #define G4W( sigma0, sigma1, a, b, c, d ) \ do { \ @@ -269,21 +269,21 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block ) memcpy_256( m, block, 16 ); memcpy_256( v, S->h, 8 ); - v[ 8] = m256_const1_64( 0x6A09E6676A09E667ULL ); - v[ 9] = m256_const1_64( 0xBB67AE85BB67AE85ULL ); - v[10] = m256_const1_64( 0x3C6EF3723C6EF372ULL ); - v[11] = m256_const1_64( 0xA54FF53AA54FF53AULL ); + v[ 8] = _mm256_set1_epi64x( 0x6A09E6676A09E667ULL ); + v[ 9] = _mm256_set1_epi64x( 0xBB67AE85BB67AE85ULL ); + v[10] = _mm256_set1_epi64x( 0x3C6EF3723C6EF372ULL ); + v[11] = _mm256_set1_epi64x( 0xA54FF53AA54FF53AULL ); v[12] = _mm256_xor_si256( _mm256_set1_epi32( S->t[0] ), - m256_const1_64( 0x510E527F510E527FULL ) ); + _mm256_set1_epi64x( 0x510E527F510E527FULL ) ); v[13] = _mm256_xor_si256( _mm256_set1_epi32( S->t[1] ), - m256_const1_64( 0x9B05688C9B05688CULL ) ); + _mm256_set1_epi64x( 0x9B05688C9B05688CULL ) ); v[14] = _mm256_xor_si256( _mm256_set1_epi32( S->f[0] ), - m256_const1_64( 0x1F83D9AB1F83D9ABULL ) ); + _mm256_set1_epi64x( 0x1F83D9AB1F83D9ABULL ) ); v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ), - m256_const1_64( 0x5BE0CD195BE0CD19ULL ) ); + _mm256_set1_epi64x( 0x5BE0CD195BE0CD19ULL ) ); /* v[ 8] = _mm256_set1_epi32( blake2s_IV[0] ); @@ -391,14 +391,14 @@ int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen ) memset( P->personal, 0, sizeof( P->personal ) ); memset( S, 0, sizeof( blake2s_8way_state ) ); - S->h[0] = m256_const1_64( 0x6A09E6676A09E667ULL ); - S->h[1] = m256_const1_64( 0xBB67AE85BB67AE85ULL ); - S->h[2] = m256_const1_64( 0x3C6EF3723C6EF372ULL ); - S->h[3] = m256_const1_64( 0xA54FF53AA54FF53AULL ); - S->h[4] = m256_const1_64( 0x510E527F510E527FULL ); - S->h[5] = m256_const1_64( 0x9B05688C9B05688CULL ); - S->h[6] = m256_const1_64( 0x1F83D9AB1F83D9ABULL ); - S->h[7] = m256_const1_64( 0x5BE0CD195BE0CD19ULL ); + S->h[0] = _mm256_set1_epi64x( 0x6A09E6676A09E667ULL ); + S->h[1] = _mm256_set1_epi64x( 0xBB67AE85BB67AE85ULL ); + S->h[2] = _mm256_set1_epi64x( 0x3C6EF3723C6EF372ULL ); + S->h[3] = _mm256_set1_epi64x( 0xA54FF53AA54FF53AULL ); + S->h[4] = _mm256_set1_epi64x( 0x510E527F510E527FULL ); + S->h[5] = _mm256_set1_epi64x( 0x9B05688C9B05688CULL ); + S->h[6] = _mm256_set1_epi64x( 0x1F83D9AB1F83D9ABULL ); + S->h[7] = _mm256_set1_epi64x( 0x5BE0CD195BE0CD19ULL ); // for( int i = 0; i < 8; ++i ) @@ -510,21 +510,21 @@ int blake2s_16way_compress( blake2s_16way_state *S, const __m512i *block ) memcpy_512( m, block, 16 ); memcpy_512( v, S->h, 8 ); - v[ 8] = m512_const1_64( 0x6A09E6676A09E667ULL ); - v[ 9] = m512_const1_64( 0xBB67AE85BB67AE85ULL ); - v[10] = m512_const1_64( 0x3C6EF3723C6EF372ULL ); - v[11] = m512_const1_64( 0xA54FF53AA54FF53AULL ); + v[ 8] = _mm512_set1_epi64( 0x6A09E6676A09E667ULL ); + v[ 9] = _mm512_set1_epi64( 0xBB67AE85BB67AE85ULL ); + v[10] = _mm512_set1_epi64( 0x3C6EF3723C6EF372ULL ); + v[11] = _mm512_set1_epi64( 0xA54FF53AA54FF53AULL ); v[12] = _mm512_xor_si512( _mm512_set1_epi32( S->t[0] ), - m512_const1_64( 0x510E527F510E527FULL ) ); + _mm512_set1_epi64( 0x510E527F510E527FULL ) ); v[13] = _mm512_xor_si512( _mm512_set1_epi32( S->t[1] ), - m512_const1_64( 0x9B05688C9B05688CULL ) ); + _mm512_set1_epi64( 0x9B05688C9B05688CULL ) ); v[14] = _mm512_xor_si512( _mm512_set1_epi32( S->f[0] ), - m512_const1_64( 0x1F83D9AB1F83D9ABULL ) ); + _mm512_set1_epi64( 0x1F83D9AB1F83D9ABULL ) ); v[15] = _mm512_xor_si512( _mm512_set1_epi32( S->f[1] ), - m512_const1_64( 0x5BE0CD195BE0CD19ULL ) ); + _mm512_set1_epi64( 0x5BE0CD195BE0CD19ULL ) ); #define G16W( sigma0, sigma1, a, b, c, d) \ @@ -589,14 +589,14 @@ int blake2s_16way_init( blake2s_16way_state *S, const uint8_t outlen ) memset( P->personal, 0, sizeof( P->personal ) ); memset( S, 0, sizeof( blake2s_16way_state ) ); - S->h[0] = m512_const1_64( 0x6A09E6676A09E667ULL ); - S->h[1] = m512_const1_64( 0xBB67AE85BB67AE85ULL ); - S->h[2] = m512_const1_64( 0x3C6EF3723C6EF372ULL ); - S->h[3] = m512_const1_64( 0xA54FF53AA54FF53AULL ); - S->h[4] = m512_const1_64( 0x510E527F510E527FULL ); - S->h[5] = m512_const1_64( 0x9B05688C9B05688CULL ); - S->h[6] = m512_const1_64( 0x1F83D9AB1F83D9ABULL ); - S->h[7] = m512_const1_64( 0x5BE0CD195BE0CD19ULL ); + S->h[0] = _mm512_set1_epi64( 0x6A09E6676A09E667ULL ); + S->h[1] = _mm512_set1_epi64( 0xBB67AE85BB67AE85ULL ); + S->h[2] = _mm512_set1_epi64( 0x3C6EF3723C6EF372ULL ); + S->h[3] = _mm512_set1_epi64( 0xA54FF53AA54FF53AULL ); + S->h[4] = _mm512_set1_epi64( 0x510E527F510E527FULL ); + S->h[5] = _mm512_set1_epi64( 0x9B05688C9B05688CULL ); + S->h[6] = _mm512_set1_epi64( 0x1F83D9AB1F83D9ABULL ); + S->h[7] = _mm512_set1_epi64( 0x5BE0CD195BE0CD19ULL ); uint32_t *p = ( uint32_t * )( P ); diff --git a/algo/blake/sph_blake2b.c b/algo/blake/sph_blake2b.c index 3bcb6e12..50a97586 100644 --- a/algo/blake/sph_blake2b.c +++ b/algo/blake/sph_blake2b.c @@ -64,6 +64,22 @@ V[1] = mm256_ror_64( _mm256_xor_si256( V[1], V[2] ), 63 ); \ } +// Pivot about V[1] instead of V[0] reduces latency. +#define BLAKE2B_ROUND( R ) \ +{ \ + __m256i *V = (__m256i*)v; \ + const uint8_t *sigmaR = sigma[R]; \ + BLAKE2B_G( 0, 1, 2, 3, 4, 5, 6, 7 ); \ + V[0] = mm256_shufll_64( V[0] ); \ + V[3] = mm256_swap_128( V[3] ); \ + V[2] = mm256_shuflr_64( V[2] ); \ + BLAKE2B_G( 14, 15, 8, 9, 10, 11, 12, 13 ); \ + V[0] = mm256_shuflr_64( V[0] ); \ + V[3] = mm256_swap_128( V[3] ); \ + V[2] = mm256_shufll_64( V[2] ); \ +} + +/* #define BLAKE2B_ROUND( R ) \ { \ __m256i *V = (__m256i*)v; \ @@ -77,6 +93,7 @@ V[2] = mm256_swap_128( V[2] ); \ V[1] = mm256_shufll_64( V[1] ); \ } +*/ #elif defined(__SSE2__) // always true diff --git a/algo/bmw/bmw256-hash-4way.c b/algo/bmw/bmw256-hash-4way.c index 8b9de767..08f7621f 100644 --- a/algo/bmw/bmw256-hash-4way.c +++ b/algo/bmw/bmw256-hash-4way.c @@ -451,22 +451,22 @@ static const __m128i final_s[16] = */ void bmw256_4way_init( bmw256_4way_context *ctx ) { - ctx->H[ 0] = m128_const1_64( 0x4041424340414243 ); - ctx->H[ 1] = m128_const1_64( 0x4445464744454647 ); - ctx->H[ 2] = m128_const1_64( 0x48494A4B48494A4B ); - ctx->H[ 3] = m128_const1_64( 0x4C4D4E4F4C4D4E4F ); - ctx->H[ 4] = m128_const1_64( 0x5051525350515253 ); - ctx->H[ 5] = m128_const1_64( 0x5455565754555657 ); - ctx->H[ 6] = m128_const1_64( 0x58595A5B58595A5B ); - ctx->H[ 7] = m128_const1_64( 0x5C5D5E5F5C5D5E5F ); - ctx->H[ 8] = m128_const1_64( 0x6061626360616263 ); - ctx->H[ 9] = m128_const1_64( 0x6465666764656667 ); - ctx->H[10] = m128_const1_64( 0x68696A6B68696A6B ); - ctx->H[11] = m128_const1_64( 0x6C6D6E6F6C6D6E6F ); - ctx->H[12] = m128_const1_64( 0x7071727370717273 ); - ctx->H[13] = m128_const1_64( 0x7475767774757677 ); - ctx->H[14] = m128_const1_64( 0x78797A7B78797A7B ); - ctx->H[15] = m128_const1_64( 0x7C7D7E7F7C7D7E7F ); + ctx->H[ 0] = _mm_set1_epi64x( 0x4041424340414243 ); + ctx->H[ 1] = _mm_set1_epi64x( 0x4445464744454647 ); + ctx->H[ 2] = _mm_set1_epi64x( 0x48494A4B48494A4B ); + ctx->H[ 3] = _mm_set1_epi64x( 0x4C4D4E4F4C4D4E4F ); + ctx->H[ 4] = _mm_set1_epi64x( 0x5051525350515253 ); + ctx->H[ 5] = _mm_set1_epi64x( 0x5455565754555657 ); + ctx->H[ 6] = _mm_set1_epi64x( 0x58595A5B58595A5B ); + ctx->H[ 7] = _mm_set1_epi64x( 0x5C5D5E5F5C5D5E5F ); + ctx->H[ 8] = _mm_set1_epi64x( 0x6061626360616263 ); + ctx->H[ 9] = _mm_set1_epi64x( 0x6465666764656667 ); + ctx->H[10] = _mm_set1_epi64x( 0x68696A6B68696A6B ); + ctx->H[11] = _mm_set1_epi64x( 0x6C6D6E6F6C6D6E6F ); + ctx->H[12] = _mm_set1_epi64x( 0x7071727370717273 ); + ctx->H[13] = _mm_set1_epi64x( 0x7475767774757677 ); + ctx->H[14] = _mm_set1_epi64x( 0x78797A7B78797A7B ); + ctx->H[15] = _mm_set1_epi64x( 0x7C7D7E7F7C7D7E7F ); // for ( int i = 0; i < 16; i++ ) @@ -529,7 +529,7 @@ bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n, buf = sc->buf; ptr = sc->ptr; - buf[ ptr>>2 ] = m128_const1_64( 0x0000008000000080 ); + buf[ ptr>>2 ] = _mm_set1_epi64x( 0x0000008000000080 ); ptr += 4; h = sc->H; @@ -959,22 +959,22 @@ static const __m256i final_s8[16] = void bmw256_8way_init( bmw256_8way_context *ctx ) { - ctx->H[ 0] = m256_const1_64( 0x4041424340414243 ); - ctx->H[ 1] = m256_const1_64( 0x4445464744454647 ); - ctx->H[ 2] = m256_const1_64( 0x48494A4B48494A4B ); - ctx->H[ 3] = m256_const1_64( 0x4C4D4E4F4C4D4E4F ); - ctx->H[ 4] = m256_const1_64( 0x5051525350515253 ); - ctx->H[ 5] = m256_const1_64( 0x5455565754555657 ); - ctx->H[ 6] = m256_const1_64( 0x58595A5B58595A5B ); - ctx->H[ 7] = m256_const1_64( 0x5C5D5E5F5C5D5E5F ); - ctx->H[ 8] = m256_const1_64( 0x6061626360616263 ); - ctx->H[ 9] = m256_const1_64( 0x6465666764656667 ); - ctx->H[10] = m256_const1_64( 0x68696A6B68696A6B ); - ctx->H[11] = m256_const1_64( 0x6C6D6E6F6C6D6E6F ); - ctx->H[12] = m256_const1_64( 0x7071727370717273 ); - ctx->H[13] = m256_const1_64( 0x7475767774757677 ); - ctx->H[14] = m256_const1_64( 0x78797A7B78797A7B ); - ctx->H[15] = m256_const1_64( 0x7C7D7E7F7C7D7E7F ); + ctx->H[ 0] = _mm256_set1_epi64x( 0x4041424340414243 ); + ctx->H[ 1] = _mm256_set1_epi64x( 0x4445464744454647 ); + ctx->H[ 2] = _mm256_set1_epi64x( 0x48494A4B48494A4B ); + ctx->H[ 3] = _mm256_set1_epi64x( 0x4C4D4E4F4C4D4E4F ); + ctx->H[ 4] = _mm256_set1_epi64x( 0x5051525350515253 ); + ctx->H[ 5] = _mm256_set1_epi64x( 0x5455565754555657 ); + ctx->H[ 6] = _mm256_set1_epi64x( 0x58595A5B58595A5B ); + ctx->H[ 7] = _mm256_set1_epi64x( 0x5C5D5E5F5C5D5E5F ); + ctx->H[ 8] = _mm256_set1_epi64x( 0x6061626360616263 ); + ctx->H[ 9] = _mm256_set1_epi64x( 0x6465666764656667 ); + ctx->H[10] = _mm256_set1_epi64x( 0x68696A6B68696A6B ); + ctx->H[11] = _mm256_set1_epi64x( 0x6C6D6E6F6C6D6E6F ); + ctx->H[12] = _mm256_set1_epi64x( 0x7071727370717273 ); + ctx->H[13] = _mm256_set1_epi64x( 0x7475767774757677 ); + ctx->H[14] = _mm256_set1_epi64x( 0x78797A7B78797A7B ); + ctx->H[15] = _mm256_set1_epi64x( 0x7C7D7E7F7C7D7E7F ); ctx->ptr = 0; ctx->bit_count = 0; } @@ -1030,7 +1030,7 @@ void bmw256_8way_close( bmw256_8way_context *ctx, void *dst ) buf = ctx->buf; ptr = ctx->ptr; - buf[ ptr>>2 ] = m256_const1_64( 0x0000008000000080 ); + buf[ ptr>>2 ] = _mm256_set1_epi64x( 0x0000008000000080 ); ptr += 4; h = ctx->H; @@ -1460,22 +1460,22 @@ static const __m512i final_s16[16] = void bmw256_16way_init( bmw256_16way_context *ctx ) { - ctx->H[ 0] = m512_const1_64( 0x4041424340414243 ); - ctx->H[ 1] = m512_const1_64( 0x4445464744454647 ); - ctx->H[ 2] = m512_const1_64( 0x48494A4B48494A4B ); - ctx->H[ 3] = m512_const1_64( 0x4C4D4E4F4C4D4E4F ); - ctx->H[ 4] = m512_const1_64( 0x5051525350515253 ); - ctx->H[ 5] = m512_const1_64( 0x5455565754555657 ); - ctx->H[ 6] = m512_const1_64( 0x58595A5B58595A5B ); - ctx->H[ 7] = m512_const1_64( 0x5C5D5E5F5C5D5E5F ); - ctx->H[ 8] = m512_const1_64( 0x6061626360616263 ); - ctx->H[ 9] = m512_const1_64( 0x6465666764656667 ); - ctx->H[10] = m512_const1_64( 0x68696A6B68696A6B ); - ctx->H[11] = m512_const1_64( 0x6C6D6E6F6C6D6E6F ); - ctx->H[12] = m512_const1_64( 0x7071727370717273 ); - ctx->H[13] = m512_const1_64( 0x7475767774757677 ); - ctx->H[14] = m512_const1_64( 0x78797A7B78797A7B ); - ctx->H[15] = m512_const1_64( 0x7C7D7E7F7C7D7E7F ); + ctx->H[ 0] = _mm512_set1_epi64( 0x4041424340414243 ); + ctx->H[ 1] = _mm512_set1_epi64( 0x4445464744454647 ); + ctx->H[ 2] = _mm512_set1_epi64( 0x48494A4B48494A4B ); + ctx->H[ 3] = _mm512_set1_epi64( 0x4C4D4E4F4C4D4E4F ); + ctx->H[ 4] = _mm512_set1_epi64( 0x5051525350515253 ); + ctx->H[ 5] = _mm512_set1_epi64( 0x5455565754555657 ); + ctx->H[ 6] = _mm512_set1_epi64( 0x58595A5B58595A5B ); + ctx->H[ 7] = _mm512_set1_epi64( 0x5C5D5E5F5C5D5E5F ); + ctx->H[ 8] = _mm512_set1_epi64( 0x6061626360616263 ); + ctx->H[ 9] = _mm512_set1_epi64( 0x6465666764656667 ); + ctx->H[10] = _mm512_set1_epi64( 0x68696A6B68696A6B ); + ctx->H[11] = _mm512_set1_epi64( 0x6C6D6E6F6C6D6E6F ); + ctx->H[12] = _mm512_set1_epi64( 0x7071727370717273 ); + ctx->H[13] = _mm512_set1_epi64( 0x7475767774757677 ); + ctx->H[14] = _mm512_set1_epi64( 0x78797A7B78797A7B ); + ctx->H[15] = _mm512_set1_epi64( 0x7C7D7E7F7C7D7E7F ); ctx->ptr = 0; ctx->bit_count = 0; } @@ -1531,7 +1531,7 @@ void bmw256_16way_close( bmw256_16way_context *ctx, void *dst ) buf = ctx->buf; ptr = ctx->ptr; - buf[ ptr>>2 ] = m512_const1_64( 0x0000008000000080 ); + buf[ ptr>>2 ] = _mm512_set1_epi64( 0x0000008000000080 ); ptr += 4; h = ctx->H; diff --git a/algo/bmw/bmw512-hash-4way.c b/algo/bmw/bmw512-hash-4way.c index 02dd71de..81378a0c 100644 --- a/algo/bmw/bmw512-hash-4way.c +++ b/algo/bmw/bmw512-hash-4way.c @@ -896,22 +896,22 @@ static const __m256i final_b[16] = static void bmw64_4way_init( bmw_4way_big_context *sc, const sph_u64 *iv ) { - sc->H[ 0] = m256_const1_64( 0x8081828384858687 ); - sc->H[ 1] = m256_const1_64( 0x88898A8B8C8D8E8F ); - sc->H[ 2] = m256_const1_64( 0x9091929394959697 ); - sc->H[ 3] = m256_const1_64( 0x98999A9B9C9D9E9F ); - sc->H[ 4] = m256_const1_64( 0xA0A1A2A3A4A5A6A7 ); - sc->H[ 5] = m256_const1_64( 0xA8A9AAABACADAEAF ); - sc->H[ 6] = m256_const1_64( 0xB0B1B2B3B4B5B6B7 ); - sc->H[ 7] = m256_const1_64( 0xB8B9BABBBCBDBEBF ); - sc->H[ 8] = m256_const1_64( 0xC0C1C2C3C4C5C6C7 ); - sc->H[ 9] = m256_const1_64( 0xC8C9CACBCCCDCECF ); - sc->H[10] = m256_const1_64( 0xD0D1D2D3D4D5D6D7 ); - sc->H[11] = m256_const1_64( 0xD8D9DADBDCDDDEDF ); - sc->H[12] = m256_const1_64( 0xE0E1E2E3E4E5E6E7 ); - sc->H[13] = m256_const1_64( 0xE8E9EAEBECEDEEEF ); - sc->H[14] = m256_const1_64( 0xF0F1F2F3F4F5F6F7 ); - sc->H[15] = m256_const1_64( 0xF8F9FAFBFCFDFEFF ); + sc->H[ 0] = _mm256_set1_epi64x( 0x8081828384858687 ); + sc->H[ 1] = _mm256_set1_epi64x( 0x88898A8B8C8D8E8F ); + sc->H[ 2] = _mm256_set1_epi64x( 0x9091929394959697 ); + sc->H[ 3] = _mm256_set1_epi64x( 0x98999A9B9C9D9E9F ); + sc->H[ 4] = _mm256_set1_epi64x( 0xA0A1A2A3A4A5A6A7 ); + sc->H[ 5] = _mm256_set1_epi64x( 0xA8A9AAABACADAEAF ); + sc->H[ 6] = _mm256_set1_epi64x( 0xB0B1B2B3B4B5B6B7 ); + sc->H[ 7] = _mm256_set1_epi64x( 0xB8B9BABBBCBDBEBF ); + sc->H[ 8] = _mm256_set1_epi64x( 0xC0C1C2C3C4C5C6C7 ); + sc->H[ 9] = _mm256_set1_epi64x( 0xC8C9CACBCCCDCECF ); + sc->H[10] = _mm256_set1_epi64x( 0xD0D1D2D3D4D5D6D7 ); + sc->H[11] = _mm256_set1_epi64x( 0xD8D9DADBDCDDDEDF ); + sc->H[12] = _mm256_set1_epi64x( 0xE0E1E2E3E4E5E6E7 ); + sc->H[13] = _mm256_set1_epi64x( 0xE8E9EAEBECEDEEEF ); + sc->H[14] = _mm256_set1_epi64x( 0xF0F1F2F3F4F5F6F7 ); + sc->H[15] = _mm256_set1_epi64x( 0xF8F9FAFBFCFDFEFF ); sc->ptr = 0; sc->bit_count = 0; } @@ -967,7 +967,7 @@ bmw64_4way_close(bmw_4way_big_context *sc, unsigned ub, unsigned n, buf = sc->buf; ptr = sc->ptr; - buf[ ptr>>3 ] = m256_const1_64( 0x80 ); + buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 ); ptr += 8; h = sc->H; @@ -1379,22 +1379,22 @@ static const __m512i final_b8[16] = void bmw512_8way_init( bmw512_8way_context *ctx ) //bmw64_4way_init( bmw_4way_big_context *sc, const sph_u64 *iv ) { - ctx->H[ 0] = m512_const1_64( 0x8081828384858687 ); - ctx->H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F ); - ctx->H[ 2] = m512_const1_64( 0x9091929394959697 ); - ctx->H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F ); - ctx->H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 ); - ctx->H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF ); - ctx->H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 ); - ctx->H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF ); - ctx->H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 ); - ctx->H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF ); - ctx->H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 ); - ctx->H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF ); - ctx->H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 ); - ctx->H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF ); - ctx->H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 ); - ctx->H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF ); + ctx->H[ 0] = _mm512_set1_epi64( 0x8081828384858687 ); + ctx->H[ 1] = _mm512_set1_epi64( 0x88898A8B8C8D8E8F ); + ctx->H[ 2] = _mm512_set1_epi64( 0x9091929394959697 ); + ctx->H[ 3] = _mm512_set1_epi64( 0x98999A9B9C9D9E9F ); + ctx->H[ 4] = _mm512_set1_epi64( 0xA0A1A2A3A4A5A6A7 ); + ctx->H[ 5] = _mm512_set1_epi64( 0xA8A9AAABACADAEAF ); + ctx->H[ 6] = _mm512_set1_epi64( 0xB0B1B2B3B4B5B6B7 ); + ctx->H[ 7] = _mm512_set1_epi64( 0xB8B9BABBBCBDBEBF ); + ctx->H[ 8] = _mm512_set1_epi64( 0xC0C1C2C3C4C5C6C7 ); + ctx->H[ 9] = _mm512_set1_epi64( 0xC8C9CACBCCCDCECF ); + ctx->H[10] = _mm512_set1_epi64( 0xD0D1D2D3D4D5D6D7 ); + ctx->H[11] = _mm512_set1_epi64( 0xD8D9DADBDCDDDEDF ); + ctx->H[12] = _mm512_set1_epi64( 0xE0E1E2E3E4E5E6E7 ); + ctx->H[13] = _mm512_set1_epi64( 0xE8E9EAEBECEDEEEF ); + ctx->H[14] = _mm512_set1_epi64( 0xF0F1F2F3F4F5F6F7 ); + ctx->H[15] = _mm512_set1_epi64( 0xF8F9FAFBFCFDFEFF ); ctx->ptr = 0; ctx->bit_count = 0; } @@ -1448,7 +1448,7 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst ) buf = ctx->buf; ptr = ctx->ptr; - buf[ ptr>>3 ] = m512_const1_64( 0x80 ); + buf[ ptr>>3 ] = _mm512_set1_epi64( 0x80 ); ptr += 8; h = ctx->H; @@ -1483,22 +1483,22 @@ void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data, // Init - H[ 0] = m512_const1_64( 0x8081828384858687 ); - H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F ); - H[ 2] = m512_const1_64( 0x9091929394959697 ); - H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F ); - H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 ); - H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF ); - H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 ); - H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF ); - H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 ); - H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF ); - H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 ); - H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF ); - H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 ); - H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF ); - H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 ); - H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF ); + H[ 0] = _mm512_set1_epi64( 0x8081828384858687 ); + H[ 1] = _mm512_set1_epi64( 0x88898A8B8C8D8E8F ); + H[ 2] = _mm512_set1_epi64( 0x9091929394959697 ); + H[ 3] = _mm512_set1_epi64( 0x98999A9B9C9D9E9F ); + H[ 4] = _mm512_set1_epi64( 0xA0A1A2A3A4A5A6A7 ); + H[ 5] = _mm512_set1_epi64( 0xA8A9AAABACADAEAF ); + H[ 6] = _mm512_set1_epi64( 0xB0B1B2B3B4B5B6B7 ); + H[ 7] = _mm512_set1_epi64( 0xB8B9BABBBCBDBEBF ); + H[ 8] = _mm512_set1_epi64( 0xC0C1C2C3C4C5C6C7 ); + H[ 9] = _mm512_set1_epi64( 0xC8C9CACBCCCDCECF ); + H[10] = _mm512_set1_epi64( 0xD0D1D2D3D4D5D6D7 ); + H[11] = _mm512_set1_epi64( 0xD8D9DADBDCDDDEDF ); + H[12] = _mm512_set1_epi64( 0xE0E1E2E3E4E5E6E7 ); + H[13] = _mm512_set1_epi64( 0xE8E9EAEBECEDEEEF ); + H[14] = _mm512_set1_epi64( 0xF0F1F2F3F4F5F6F7 ); + H[15] = _mm512_set1_epi64( 0xF8F9FAFBFCFDFEFF ); // Update @@ -1530,7 +1530,7 @@ void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data, __m512i h1[16], h2[16]; size_t u, v; - buf[ ptr>>3 ] = m512_const1_64( 0x80 ); + buf[ ptr>>3 ] = _mm512_set1_epi64( 0x80 ); ptr += 8; if ( ptr > (buf_size - 8) ) diff --git a/algo/cubehash/cube-hash-2way.c b/algo/cubehash/cube-hash-2way.c index 46fc14be..5888c2a4 100644 --- a/algo/cubehash/cube-hash-2way.c +++ b/algo/cubehash/cube-hash-2way.c @@ -423,21 +423,6 @@ int cube_4way_update_close( cube_4way_context *sp, void *output, // 2 way 128 -// This isn't expected to be used with AVX512 so HW rotate intruction -// is assumed not avaiable. -// Use double buffering to optimize serial bit rotations. Full double -// buffering isn't practical because it needs twice as many registers -// with AVX2 having only half as many as AVX512. -#define ROL2( out0, out1, in0, in1, c ) \ -{ \ - __m256i t0 = _mm256_slli_epi32( in0, c ); \ - __m256i t1 = _mm256_slli_epi32( in1, c ); \ - out0 = _mm256_srli_epi32( in0, 32-(c) ); \ - out1 = _mm256_srli_epi32( in1, 32-(c) ); \ - out0 = _mm256_or_si256( out0, t0 ); \ - out1 = _mm256_or_si256( out1, t1 ); \ -} - static void transform_2way( cube_2way_context *sp ) { int r; @@ -460,8 +445,10 @@ static void transform_2way( cube_2way_context *sp ) x5 = _mm256_add_epi32( x1, x5 ); x6 = _mm256_add_epi32( x2, x6 ); x7 = _mm256_add_epi32( x3, x7 ); - ROL2( y0, y1, x2, x3, 7 ); - ROL2( x2, x3, x0, x1, 7 ); + y0 = mm256_rol_32( x2, 7 ); + y1 = mm256_rol_32( x3, 7 ); + x2 = mm256_rol_32( x0, 7 ); + x3 = mm256_rol_32( x1, 7 ); x0 = _mm256_xor_si256( y0, x4 ); x1 = _mm256_xor_si256( y1, x5 ); x2 = _mm256_xor_si256( x2, x6 ); @@ -474,8 +461,10 @@ static void transform_2way( cube_2way_context *sp ) x5 = _mm256_add_epi32( x1, x5 ); x6 = _mm256_add_epi32( x2, x6 ); x7 = _mm256_add_epi32( x3, x7 ); - ROL2( y0, x1, x1, x0, 11 ); - ROL2( y1, x3, x3, x2, 11 ); + y0 = mm256_rol_32( x1, 11 ); + x1 = mm256_rol_32( x0, 11 ); + y1 = mm256_rol_32( x3, 11 ); + x3 = mm256_rol_32( x2, 11 ); x0 = _mm256_xor_si256( y0, x4 ); x1 = _mm256_xor_si256( x1, x5 ); x2 = _mm256_xor_si256( y1, x6 ); diff --git a/algo/cubehash/cubehash_sse2.c b/algo/cubehash/cubehash_sse2.c index 5ea1b6f6..20967fbf 100644 --- a/algo/cubehash/cubehash_sse2.c +++ b/algo/cubehash/cubehash_sse2.c @@ -32,7 +32,7 @@ static void transform( cubehashParam *sp ) { x1 = _mm512_add_epi32( x0, x1 ); x0 = mm512_swap_256( x0 ); - x0 = mm512_rol_32( x0, 7 ); + x0 = mm512_rol_32( x0, 7 ); x0 = _mm512_xor_si512( x0, x1 ); x1 = mm512_swap128_64( x1 ); x1 = _mm512_add_epi32( x0, x1 ); @@ -58,19 +58,18 @@ static void transform( cubehashParam *sp ) { x2 = _mm256_add_epi32( x0, x2 ); x3 = _mm256_add_epi32( x1, x3 ); - y0 = x0; - x0 = mm256_rol_32( x1, 7 ); - x1 = mm256_rol_32( y0, 7 ); - x0 = _mm256_xor_si256( x0, x2 ); - x1 = _mm256_xor_si256( x1, x3 ); + y0 = mm256_rol_32( x1, 7 ); + y1 = mm256_rol_32( x0, 7 ); + x0 = _mm256_xor_si256( y0, x2 ); + x1 = _mm256_xor_si256( y1, x3 ); x2 = mm256_swap128_64( x2 ); x3 = mm256_swap128_64( x3 ); x2 = _mm256_add_epi32( x0, x2 ); x3 = _mm256_add_epi32( x1, x3 ); - y0 = mm256_swap_128( x0 ); - y1 = mm256_swap_128( x1 ); - x0 = mm256_rol_32( y0, 11 ); - x1 = mm256_rol_32( y1, 11 ); + x0 = mm256_swap_128( x0 ); + x1 = mm256_swap_128( x1 ); + x0 = mm256_rol_32( x0, 11 ); + x1 = mm256_rol_32( x1, 11 ); x0 = _mm256_xor_si256( x0, x2 ); x1 = _mm256_xor_si256( x1, x3 ); x2 = mm256_swap64_32( x2 ); @@ -94,47 +93,48 @@ static void transform( cubehashParam *sp ) x6 = _mm_load_si128( (__m128i*)sp->x + 6 ); x7 = _mm_load_si128( (__m128i*)sp->x + 7 ); - for (r = 0; r < rounds; ++r) { - x4 = _mm_add_epi32(x0, x4); - x5 = _mm_add_epi32(x1, x5); - x6 = _mm_add_epi32(x2, x6); - x7 = _mm_add_epi32(x3, x7); - y0 = x2; - y1 = x3; - y2 = x0; - y3 = x1; - x0 = _mm_xor_si128(_mm_slli_epi32(y0, 7), _mm_srli_epi32(y0, 25)); - x1 = _mm_xor_si128(_mm_slli_epi32(y1, 7), _mm_srli_epi32(y1, 25)); - x2 = _mm_xor_si128(_mm_slli_epi32(y2, 7), _mm_srli_epi32(y2, 25)); - x3 = _mm_xor_si128(_mm_slli_epi32(y3, 7), _mm_srli_epi32(y3, 25)); - x0 = _mm_xor_si128(x0, x4); - x1 = _mm_xor_si128(x1, x5); - x2 = _mm_xor_si128(x2, x6); - x3 = _mm_xor_si128(x3, x7); - x4 = _mm_shuffle_epi32(x4, 0x4e); - x5 = _mm_shuffle_epi32(x5, 0x4e); - x6 = _mm_shuffle_epi32(x6, 0x4e); - x7 = _mm_shuffle_epi32(x7, 0x4e); - x4 = _mm_add_epi32(x0, x4); - x5 = _mm_add_epi32(x1, x5); - x6 = _mm_add_epi32(x2, x6); - x7 = _mm_add_epi32(x3, x7); - y0 = x1; - y1 = x0; - y2 = x3; - y3 = x2; - x0 = _mm_xor_si128(_mm_slli_epi32(y0, 11), _mm_srli_epi32(y0, 21)); - x1 = _mm_xor_si128(_mm_slli_epi32(y1, 11), _mm_srli_epi32(y1, 21)); - x2 = _mm_xor_si128(_mm_slli_epi32(y2, 11), _mm_srli_epi32(y2, 21)); - x3 = _mm_xor_si128(_mm_slli_epi32(y3, 11), _mm_srli_epi32(y3, 21)); - x0 = _mm_xor_si128(x0, x4); - x1 = _mm_xor_si128(x1, x5); - x2 = _mm_xor_si128(x2, x6); - x3 = _mm_xor_si128(x3, x7); - x4 = _mm_shuffle_epi32(x4, 0xb1); - x5 = _mm_shuffle_epi32(x5, 0xb1); - x6 = _mm_shuffle_epi32(x6, 0xb1); - x7 = _mm_shuffle_epi32(x7, 0xb1); + for ( r = 0; r < rounds; ++r ) + { + x4 = _mm_add_epi32( x0, x4 ); + x5 = _mm_add_epi32( x1, x5 ); + x6 = _mm_add_epi32( x2, x6 ); + x7 = _mm_add_epi32( x3, x7 ); + y0 = x2; + y1 = x3; + y2 = x0; + y3 = x1; + x0 = mm128_rol_32( y0, 7 ); + x1 = mm128_rol_32( y1, 7 ); + x2 = mm128_rol_32( y2, 7 ); + x3 = mm128_rol_32( y3, 7 ); + x0 = _mm_xor_si128( x0, x4 ); + x1 = _mm_xor_si128( x1, x5 ); + x2 = _mm_xor_si128( x2, x6 ); + x3 = _mm_xor_si128( x3, x7 ); + x4 = _mm_shuffle_epi32( x4, 0x4e ); + x5 = _mm_shuffle_epi32( x5, 0x4e ); + x6 = _mm_shuffle_epi32( x6, 0x4e ); + x7 = _mm_shuffle_epi32( x7, 0x4e ); + x4 = _mm_add_epi32( x0, x4 ); + x5 = _mm_add_epi32( x1, x5 ); + x6 = _mm_add_epi32( x2, x6 ); + x7 = _mm_add_epi32( x3, x7 ); + y0 = x1; + y1 = x0; + y2 = x3; + y3 = x2; + x0 = mm128_rol_32( y0, 11 ); + x1 = mm128_rol_32( y1, 11 ); + x2 = mm128_rol_32( y2, 11 ); + x3 = mm128_rol_32( y3, 11 ); + x0 = _mm_xor_si128( x0, x4 ); + x1 = _mm_xor_si128( x1, x5 ); + x2 = _mm_xor_si128( x2, x6 ); + x3 = _mm_xor_si128( x3, x7 ); + x4 = _mm_shuffle_epi32( x4, 0xb1 ); + x5 = _mm_shuffle_epi32( x5, 0xb1 ); + x6 = _mm_shuffle_epi32( x6, 0xb1 ); + x7 = _mm_shuffle_epi32( x7, 0xb1 ); } _mm_store_si128( (__m128i*)sp->x, x0 ); @@ -180,25 +180,25 @@ int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes) if ( hashbitlen == 512 ) { - x[0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 ); - x[1] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 ); - x[2] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 ); - x[3] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 ); - x[4] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 ); - x[5] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 ); - x[6] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B ); - x[7] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 ); + x[0] = _mm_set_epi64x( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 ); + x[1] = _mm_set_epi64x( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 ); + x[2] = _mm_set_epi64x( 0x825B453797CF0BEF, 0xA647A8B34D42C787 ); + x[3] = _mm_set_epi64x( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 ); + x[4] = _mm_set_epi64x( 0xB64445321B017BEF, 0x148FE485FCD398D9 ); + x[5] = _mm_set_epi64x( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 ); + x[6] = _mm_set_epi64x( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B ); + x[7] = _mm_set_epi64x( 0xD43E3B447795D246, 0xE7989AF11921C8F7 ); } else { - x[0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 ); - x[1] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B ); - x[2] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 ); - x[3] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 ); - x[4] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 ); - x[5] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 ); - x[6] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE ); - x[7] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB ); + x[0] = _mm_set_epi64x( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 ); + x[1] = _mm_set_epi64x( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B ); + x[2] = _mm_set_epi64x( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 ); + x[3] = _mm_set_epi64x( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 ); + x[4] = _mm_set_epi64x( 0xF0B266796C859D41, 0x6107FBD5D89041C3 ); + x[5] = _mm_set_epi64x( 0x93CB628565C892FD, 0x5FA2560309392549 ); + x[6] = _mm_set_epi64x( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE ); + x[7] = _mm_set_epi64x( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB ); } return SUCCESS; @@ -234,10 +234,10 @@ int cubehashDigest( cubehashParam *sp, byte *digest ) // pos is zero for 64 byte data, 1 for 80 byte data. sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], - m128_const_64( 0, 0x80 ) ); + _mm_set_epi64x( 0, 0x80 ) ); transform( sp ); - sp->x[7] = _mm_xor_si128( sp->x[7], m128_const_64( 0x100000000, 0 ) ); + sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi64x( 0x100000000, 0 ) ); transform( sp ); transform( sp ); transform( sp ); @@ -279,10 +279,10 @@ int cubehashUpdateDigest( cubehashParam *sp, byte *digest, // pos is zero for 64 byte data, 1 for 80 byte data. sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], - m128_const_64( 0, 0x80 ) ); + _mm_set_epi64x( 0, 0x80 ) ); transform( sp ); - sp->x[7] = _mm_xor_si128( sp->x[7], m128_const_64( 0x100000000, 0 ) ); + sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi64x( 0x100000000, 0 ) ); transform( sp ); transform( sp ); @@ -313,25 +313,25 @@ int cubehash_full( cubehashParam *sp, byte *digest, int hashbitlen, if ( hashbitlen == 512 ) { - x[0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 ); - x[1] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 ); - x[2] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 ); - x[3] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 ); - x[4] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 ); - x[5] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 ); - x[6] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B ); - x[7] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 ); + x[0] = _mm_set_epi64x( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 ); + x[1] = _mm_set_epi64x( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 ); + x[2] = _mm_set_epi64x( 0x825B453797CF0BEF, 0xA647A8B34D42C787 ); + x[3] = _mm_set_epi64x( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 ); + x[4] = _mm_set_epi64x( 0xB64445321B017BEF, 0x148FE485FCD398D9 ); + x[5] = _mm_set_epi64x( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 ); + x[6] = _mm_set_epi64x( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B ); + x[7] = _mm_set_epi64x( 0xD43E3B447795D246, 0xE7989AF11921C8F7 ); } else { - x[0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 ); - x[1] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B ); - x[2] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 ); - x[3] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 ); - x[4] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 ); - x[5] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 ); - x[6] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE ); - x[7] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB ); + x[0] = _mm_set_epi64x( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 ); + x[1] = _mm_set_epi64x( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B ); + x[2] = _mm_set_epi64x( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 ); + x[3] = _mm_set_epi64x( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 ); + x[4] = _mm_set_epi64x( 0xF0B266796C859D41, 0x6107FBD5D89041C3 ); + x[5] = _mm_set_epi64x( 0x93CB628565C892FD, 0x5FA2560309392549 ); + x[6] = _mm_set_epi64x( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE ); + x[7] = _mm_set_epi64x( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB ); } @@ -358,10 +358,10 @@ int cubehash_full( cubehashParam *sp, byte *digest, int hashbitlen, // pos is zero for 64 byte data, 1 for 80 byte data. sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], - m128_const_64( 0, 0x80 ) ); + _mm_set_epi64x( 0, 0x80 ) ); transform( sp ); - sp->x[7] = _mm_xor_si128( sp->x[7], m128_const_64( 0x100000000, 0 ) ); + sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi64x( 0x100000000, 0 ) ); transform( sp ); transform( sp ); diff --git a/algo/echo/aes_ni/hash.c b/algo/echo/aes_ni/hash.c index ca1688a4..605508f0 100644 --- a/algo/echo/aes_ni/hash.c +++ b/algo/echo/aes_ni/hash.c @@ -566,16 +566,16 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval, state->uHashSize = 256; state->uBlockLength = 192; state->uRounds = 8; - state->hashsize = m128_const_64( 0, 0x100 ); - state->const1536 = m128_const_64( 0, 0x600 ); + state->hashsize = _mm_set_epi64x( 0, 0x100 ); + state->const1536 = _mm_set_epi64x( 0, 0x600 ); break; case 512: state->uHashSize = 512; state->uBlockLength = 128; state->uRounds = 10; - state->hashsize = m128_const_64( 0, 0x200 ); - state->const1536 = m128_const_64( 0, 0x400 ); + state->hashsize = _mm_set_epi64x( 0, 0x200 ); + state->const1536 = _mm_set_epi64x( 0, 0x400 ); break; default: diff --git a/algo/echo/echo-hash-4way.c b/algo/echo/echo-hash-4way.c index 462bd7fe..7891ec52 100644 --- a/algo/echo/echo-hash-4way.c +++ b/algo/echo/echo-hash-4way.c @@ -469,8 +469,7 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize, t1 = _mm256_and_si256( t1, lsbmask_2way ); \ t2 = _mm256_shuffle_epi8( mul2mask_2way, t1 ); \ s2 = _mm256_xor_si256( s2, t2 );\ - state2[ 0 ][ j ] = _mm256_xor_si256( state2[ 0 ][ j ], \ - _mm256_xor_si256( s2, state1[ 1 ][ j1 ] ) ); \ + state2[ 0 ][ j ] = mm256_xor3( state2[ 0 ][ j ], s2, state1[ 1 ][ j1 ] ); \ state2[ 1 ][ j ] = _mm256_xor_si256( state2[ 1 ][ j ], s2 ); \ state2[ 2 ][ j ] = _mm256_xor_si256( state2[ 2 ][ j ], state1[ 1 ][ j1 ] ); \ state2[ 3 ][ j ] = _mm256_xor_si256( state2[ 3 ][ j ], state1[ 1 ][ j1 ] ); \ @@ -480,8 +479,7 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize, t2 = _mm256_shuffle_epi8( mul2mask_2way, t1 ); \ s2 = _mm256_xor_si256( s2, t2 ); \ state2[ 0 ][ j ] = _mm256_xor_si256( state2[ 0 ][ j ], state1[ 2 ][ j2 ] ); \ - state2[ 1 ][ j ] = _mm256_xor_si256( state2[ 1 ][ j ], \ - _mm256_xor_si256( s2, state1[ 2 ][ j2 ] ) ); \ + state2[ 1 ][ j ] = mm256_xor3( state2[ 1 ][ j ], s2, state1[ 2 ][ j2 ] ); \ state2[ 2 ][ j ] = _mm256_xor_si256( state2[ 2 ][ j ], s2 ); \ state2[ 3 ][ j ] = _mm256_xor_si256( state2[ 3][ j ], state1[ 2 ][ j2 ] ); \ s2 = _mm256_add_epi8( state1[ 3 ][ j3 ], state1[ 3 ][ j3 ] ); \ @@ -491,8 +489,7 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize, s2 = _mm256_xor_si256( s2, t2 ); \ state2[ 0 ][ j ] = _mm256_xor_si256( state2[ 0 ][ j ], state1[ 3 ][ j3 ] ); \ state2[ 1 ][ j ] = _mm256_xor_si256( state2[ 1 ][ j ], state1[ 3 ][ j3 ] ); \ - state2[ 2 ][ j ] = _mm256_xor_si256( state2[ 2 ][ j ], \ - _mm256_xor_si256( s2, state1[ 3 ][ j3] ) ); \ + state2[ 2 ][ j ] = mm256_xor3( state2[ 2 ][ j ], s2, state1[ 3 ][ j3] ); \ state2[ 3 ][ j ] = _mm256_xor_si256( state2[ 3 ][ j ], s2 ); \ } while(0) diff --git a/algo/fugue/fugue-aesni.c b/algo/fugue/fugue-aesni.c index 8f0af139..42d7b1d5 100644 --- a/algo/fugue/fugue-aesni.c +++ b/algo/fugue/fugue-aesni.c @@ -33,11 +33,11 @@ MYALIGN const unsigned long long _supermix4b[] = {0x07020d08080e0d0d, 0x07070908 MYALIGN const unsigned long long _supermix4c[] = {0x0706050403020000, 0x0302000007060504}; MYALIGN const unsigned long long _supermix7a[] = {0x010c0b060d080702, 0x0904030e03000104}; MYALIGN const unsigned long long _supermix7b[] = {0x8080808080808080, 0x0504070605040f06}; -MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E}; -MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2}; -MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8}; -MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5}; -MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; +//MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E}; +//MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2}; +//MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8}; +//MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5}; +//MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c}; MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000}; MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000}; @@ -131,7 +131,7 @@ MYALIGN const unsigned int _IV512[] = { t1 = _mm_srli_epi16(t0, 6);\ t1 = _mm_and_si128(t1, M128(_lsbmask2));\ t3 = _mm_xor_si128(t3, _mm_shuffle_epi8(M128(_mul2mask), t1));\ - t0 = _mm_xor_si128(t4, _mm_shuffle_epi8(M128(_mul4mask), t1)) + t0 = _mm_xor_si128(t4, _mm_shuffle_epi8(M128(_mul4mask), t1)) /* #define PRESUPERMIX(x, t1, s1, s2, t2)\ diff --git a/algo/groestl/aes_ni/groestl-intr-aes.h b/algo/groestl/aes_ni/groestl-intr-aes.h index f2d376e9..253b5695 100644 --- a/algo/groestl/aes_ni/groestl-intr-aes.h +++ b/algo/groestl/aes_ni/groestl-intr-aes.h @@ -139,7 +139,7 @@ static const __m128i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 }; \ /* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\ /* compute w_i : add y_{i+4} */\ - b1 = m128_const1_64( 0x1b1b1b1b1b1b1b1b );\ + b1 = _mm_set1_epi64x( 0x1b1b1b1b1b1b1b1b );\ MUL2(a0, b0, b1);\ a0 = _mm_xor_si128(a0, TEMP0);\ MUL2(a1, b0, b1);\ @@ -237,7 +237,7 @@ static const __m128i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 }; \ /* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\ /* compute w_i : add y_{i+4} */\ - b1 = m128_const1_64( 0x1b1b1b1b1b1b1b1b );\ + b1 = _mm_set1_epi64x( 0x1b1b1b1b1b1b1b1b );\ MUL2(a0, b0, b1);\ a0 = _mm_xor_si128(a0, TEMP0);\ MUL2(a1, b0, b1);\ diff --git a/algo/groestl/aes_ni/groestl256-intr-aes.h b/algo/groestl/aes_ni/groestl256-intr-aes.h index a8e76747..07d87b5b 100644 --- a/algo/groestl/aes_ni/groestl256-intr-aes.h +++ b/algo/groestl/aes_ni/groestl256-intr-aes.h @@ -128,7 +128,7 @@ static const __m128i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e }; \ /* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\ /* compute w_i : add y_{i+4} */\ - b1 = m128_const1_64( 0x1b1b1b1b1b1b1b1b );\ + b1 = _mm_set1_epi64x( 0x1b1b1b1b1b1b1b1b );\ MUL2(a0, b0, b1);\ a0 = _mm_xor_si128(a0, TEMP0);\ MUL2(a1, b0, b1);\ @@ -226,7 +226,7 @@ static const __m128i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e }; \ /* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\ /* compute w_i : add y_{i+4} */\ - b1 = m128_const1_64( 0x1b1b1b1b1b1b1b1b );\ + b1 = _mm_set1_epi64x( 0x1b1b1b1b1b1b1b1b );\ MUL2(a0, b0, b1);\ a0 = _mm_xor_si128(a0, TEMP0);\ MUL2(a1, b0, b1);\ @@ -275,7 +275,7 @@ static const __m128i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e }; */ #define ROUND(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\ /* AddRoundConstant */\ - b1 = m128_const_64( 0xffffffffffffffff, 0 ); \ + b1 = _mm_set_epi64x( 0xffffffffffffffff, 0 ); \ a0 = _mm_xor_si128( a0, casti_m128i( round_const_l0, i ) ); \ a1 = _mm_xor_si128( a1, b1 ); \ a2 = _mm_xor_si128( a2, b1 ); \ diff --git a/algo/groestl/aes_ni/hash-groestl.c b/algo/groestl/aes_ni/hash-groestl.c index cf40a250..fd5de6a9 100644 --- a/algo/groestl/aes_ni/hash-groestl.c +++ b/algo/groestl/aes_ni/hash-groestl.c @@ -31,7 +31,7 @@ HashReturn_gr init_groestl( hashState_groestl* ctx, int hashlen ) } // The only non-zero in the IV is len. It can be hard coded. - ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 ); + ctx->chaining[ 6 ] = _mm_set_epi64x( 0x0200000000000000, 0 ); ctx->buf_ptr = 0; ctx->rem_ptr = 0; @@ -48,7 +48,7 @@ HashReturn_gr reinit_groestl( hashState_groestl* ctx ) ctx->chaining[i] = _mm_setzero_si128(); ctx->buffer[i] = _mm_setzero_si128(); } - ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 ); + ctx->chaining[ 6 ] = _mm_set_epi64x( 0x0200000000000000, 0 ); ctx->buf_ptr = 0; ctx->rem_ptr = 0; @@ -116,7 +116,7 @@ HashReturn_gr final_groestl( hashState_groestl* ctx, void* output ) else { // add first padding - ctx->buffer[rem_ptr] = m128_const_64( 0, 0x80 ); + ctx->buffer[rem_ptr] = _mm_set_epi64x( 0, 0x80 ); // add zero padding for ( i = rem_ptr + 1; i < SIZE512 - 1; i++ ) ctx->buffer[i] = _mm_setzero_si128(); @@ -148,7 +148,7 @@ int groestl512_full( hashState_groestl* ctx, void* output, ctx->chaining[i] = _mm_setzero_si128(); ctx->buffer[i] = _mm_setzero_si128(); } - ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 ); + ctx->chaining[ 6 ] = _mm_set_epi64x( 0x0200000000000000, 0 ); ctx->buf_ptr = 0; // --- update --- @@ -182,7 +182,7 @@ int groestl512_full( hashState_groestl* ctx, void* output, else { // add first padding - ctx->buffer[i] = m128_const_64( 0, 0x80 ); + ctx->buffer[i] = _mm_set_epi64x( 0, 0x80 ); // add zero padding for ( i += 1; i < SIZE512 - 1; i++ ) ctx->buffer[i] = _mm_setzero_si128(); @@ -239,7 +239,7 @@ HashReturn_gr update_and_final_groestl( hashState_groestl* ctx, void* output, else { // add first padding - ctx->buffer[i] = m128_const_64( 0, 0x80 ); + ctx->buffer[i] = _mm_set_epi64x( 0, 0x80 ); // add zero padding for ( i += 1; i < SIZE512 - 1; i++ ) ctx->buffer[i] = _mm_setzero_si128(); diff --git a/algo/groestl/aes_ni/hash-groestl256.c b/algo/groestl/aes_ni/hash-groestl256.c index e94d977f..dede1c3c 100644 --- a/algo/groestl/aes_ni/hash-groestl256.c +++ b/algo/groestl/aes_ni/hash-groestl256.c @@ -46,7 +46,7 @@ HashReturn_gr reinit_groestl256(hashState_groestl256* ctx) ctx->buffer[i] = _mm_setzero_si128(); } - ctx->chaining[ 3 ] = m128_const_64( 0, 0x0100000000000000 ); + ctx->chaining[ 3 ] = _mm_set_epi64x( 0, 0x0100000000000000 ); ctx->buf_ptr = 0; ctx->rem_ptr = 0; diff --git a/algo/groestl/groestl256-intr-4way.h b/algo/groestl/groestl256-intr-4way.h index 540e092c..1981a69b 100644 --- a/algo/groestl/groestl256-intr-4way.h +++ b/algo/groestl/groestl256-intr-4way.h @@ -539,7 +539,7 @@ static const __m256i SUBSH_MASK7_2WAY = j = _mm256_cmpgt_epi8(j, i );\ i = _mm256_add_epi8(i, i);\ j = _mm256_and_si256(j, k);\ - i = _mm256_xor_si256(i, j);\ + i = mm256_xorand( i, j, k );\ } #define MixBytes_2way(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\ @@ -550,7 +550,7 @@ static const __m256i SUBSH_MASK7_2WAY = b0 = a2;\ a1 = _mm256_xor_si256(a1, a2);\ b1 = a3;\ - a2 = _mm256_xor_si256(a2, a3);\ + TEMP2 = _mm256_xor_si256(a2, a3);\ b2 = a4;\ a3 = _mm256_xor_si256(a3, a4);\ b3 = a5;\ @@ -562,34 +562,20 @@ static const __m256i SUBSH_MASK7_2WAY = a7 = _mm256_xor_si256(a7, b6);\ \ /* build y4 y5 y6 ... in regs xmm8, xmm9, xmm10 by adding t_i*/\ - b0 = _mm256_xor_si256(b0, a4);\ - b6 = _mm256_xor_si256(b6, a4);\ - b1 = _mm256_xor_si256(b1, a5);\ - b7 = _mm256_xor_si256(b7, a5);\ - b2 = _mm256_xor_si256(b2, a6);\ - b0 = _mm256_xor_si256(b0, a6);\ - /* spill values y_4, y_5 to memory */\ - TEMP0 = b0;\ - b3 = _mm256_xor_si256(b3, a7);\ - b1 = _mm256_xor_si256(b1, a7);\ - TEMP1 = b1;\ - b4 = _mm256_xor_si256(b4, a0);\ - b2 = _mm256_xor_si256(b2, a0);\ - /* save values t0, t1, t2 to xmm8, xmm9 and memory */\ - b0 = a0;\ - b5 = _mm256_xor_si256(b5, a1);\ - b3 = _mm256_xor_si256(b3, a1);\ - b1 = a1;\ - b6 = _mm256_xor_si256(b6, a2);\ - b4 = _mm256_xor_si256(b4, a2);\ - TEMP2 = a2;\ - b7 = _mm256_xor_si256(b7, a3);\ - b5 = _mm256_xor_si256(b5, a3);\ - \ + TEMP0 = mm256_xor3( b0, a4, a6 ); \ + TEMP1 = mm256_xor3( b1, a5, a7 ); \ + b2 = mm256_xor3( b2, a6, a0 ); \ + b0 = a0; \ + b3 = mm256_xor3( b3, a7, a1 ); \ + b1 = a1; \ + b6 = mm256_xor3( b6, a4, TEMP2 ); \ + b4 = mm256_xor3( b4, a0, TEMP2 ); \ + b7 = mm256_xor3( b7, a5, a3 ); \ + b5 = mm256_xor3( b5, a1, a3 ); \ /* compute x_i = t_i + t_{i+3} */\ a0 = _mm256_xor_si256(a0, a3);\ a1 = _mm256_xor_si256(a1, a4);\ - a2 = _mm256_xor_si256(a2, a5);\ + a2 = _mm256_xor_si256( TEMP2, a5);\ a3 = _mm256_xor_si256(a3, a6);\ a4 = _mm256_xor_si256(a4, a7);\ a5 = _mm256_xor_si256(a5, b0);\ @@ -671,7 +657,6 @@ static const __m256i SUBSH_MASK7_2WAY = \ /* MixBytes */\ MixBytes_2way(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\ -\ } /* 10 rounds, P and Q in parallel */ diff --git a/algo/groestl/groestl512-intr-4way.h b/algo/groestl/groestl512-intr-4way.h index 62eb0fda..33336a5c 100644 --- a/algo/groestl/groestl512-intr-4way.h +++ b/algo/groestl/groestl512-intr-4way.h @@ -710,7 +710,7 @@ static const __m256i SUBSH_MASK7_2WAY = b0 = a2;\ a1 = _mm256_xor_si256(a1, a2);\ b1 = a3;\ - a2 = _mm256_xor_si256(a2, a3);\ + TEMP2 = _mm256_xor_si256(a2, a3);\ b2 = a4;\ a3 = _mm256_xor_si256(a3, a4);\ b3 = a5;\ @@ -722,34 +722,23 @@ static const __m256i SUBSH_MASK7_2WAY = a7 = _mm256_xor_si256(a7, b6);\ \ /* build y4 y5 y6 ... in regs xmm8, xmm9, xmm10 by adding t_i*/\ - b0 = _mm256_xor_si256(b0, a4);\ - b6 = _mm256_xor_si256(b6, a4);\ - b1 = _mm256_xor_si256(b1, a5);\ - b7 = _mm256_xor_si256(b7, a5);\ - b2 = _mm256_xor_si256(b2, a6);\ - b0 = _mm256_xor_si256(b0, a6);\ + TEMP0 = mm256_xor3( b0, a4, a6 ); \ /* spill values y_4, y_5 to memory */\ - TEMP0 = b0;\ - b3 = _mm256_xor_si256(b3, a7);\ - b1 = _mm256_xor_si256(b1, a7);\ - TEMP1 = b1;\ - b4 = _mm256_xor_si256(b4, a0);\ - b2 = _mm256_xor_si256(b2, a0);\ + TEMP1 = mm256_xor3( b1, a5, a7 ); \ + b2 = mm256_xor3( b2, a6, a0 ); \ /* save values t0, t1, t2 to xmm8, xmm9 and memory */\ - b0 = a0;\ - b5 = _mm256_xor_si256(b5, a1);\ - b3 = _mm256_xor_si256(b3, a1);\ - b1 = a1;\ - b6 = _mm256_xor_si256(b6, a2);\ - b4 = _mm256_xor_si256(b4, a2);\ - TEMP2 = a2;\ - b7 = _mm256_xor_si256(b7, a3);\ - b5 = _mm256_xor_si256(b5, a3);\ + b0 = a0; \ + b3 = mm256_xor3( b3, a7, a1 ); \ + b1 = a1; \ + b6 = mm256_xor3( b6, a4, TEMP2 ); \ + b4 = mm256_xor3( b4, a0, TEMP2 ); \ + b7 = mm256_xor3( b7, a5, a3 ); \ + b5 = mm256_xor3( b5, a1, a3 ); \ \ /* compute x_i = t_i + t_{i+3} */\ a0 = _mm256_xor_si256(a0, a3);\ a1 = _mm256_xor_si256(a1, a4);\ - a2 = _mm256_xor_si256(a2, a5);\ + a2 = _mm256_xor_si256( TEMP2, a5);\ a3 = _mm256_xor_si256(a3, a6);\ a4 = _mm256_xor_si256(a4, a7);\ a5 = _mm256_xor_si256(a5, b0);\ diff --git a/algo/hamsi/hamsi-hash-4way.c b/algo/hamsi/hamsi-hash-4way.c index 5e0a70ac..89f8646c 100644 --- a/algo/hamsi/hamsi-hash-4way.c +++ b/algo/hamsi/hamsi-hash-4way.c @@ -562,14 +562,14 @@ do { \ for ( int u = 0; u < 64; u++ ) \ { \ const __mmask8 dm = _mm512_cmplt_epi64_mask( db, zero ); \ - m0 = _mm512_mask_xor_epi64( m0, dm, m0, m512_const1_64( tp[0] ) ); \ - m1 = _mm512_mask_xor_epi64( m1, dm, m1, m512_const1_64( tp[1] ) ); \ - m2 = _mm512_mask_xor_epi64( m2, dm, m2, m512_const1_64( tp[2] ) ); \ - m3 = _mm512_mask_xor_epi64( m3, dm, m3, m512_const1_64( tp[3] ) ); \ - m4 = _mm512_mask_xor_epi64( m4, dm, m4, m512_const1_64( tp[4] ) ); \ - m5 = _mm512_mask_xor_epi64( m5, dm, m5, m512_const1_64( tp[5] ) ); \ - m6 = _mm512_mask_xor_epi64( m6, dm, m6, m512_const1_64( tp[6] ) ); \ - m7 = _mm512_mask_xor_epi64( m7, dm, m7, m512_const1_64( tp[7] ) ); \ + m0 = _mm512_mask_xor_epi64( m0, dm, m0, _mm512_set1_epi64( tp[0] ) ); \ + m1 = _mm512_mask_xor_epi64( m1, dm, m1, _mm512_set1_epi64( tp[1] ) ); \ + m2 = _mm512_mask_xor_epi64( m2, dm, m2, _mm512_set1_epi64( tp[2] ) ); \ + m3 = _mm512_mask_xor_epi64( m3, dm, m3, _mm512_set1_epi64( tp[3] ) ); \ + m4 = _mm512_mask_xor_epi64( m4, dm, m4, _mm512_set1_epi64( tp[4] ) ); \ + m5 = _mm512_mask_xor_epi64( m5, dm, m5, _mm512_set1_epi64( tp[5] ) ); \ + m6 = _mm512_mask_xor_epi64( m6, dm, m6, _mm512_set1_epi64( tp[6] ) ); \ + m7 = _mm512_mask_xor_epi64( m7, dm, m7, _mm512_set1_epi64( tp[7] ) ); \ db = _mm512_ror_epi64( db, 1 ); \ tp += 8; \ } \ @@ -733,17 +733,17 @@ do { \ __m512i alpha[16]; \ const uint64_t A0 = ( (uint64_t*)alpha_n )[0]; \ for( int i = 0; i < 16; i++ ) \ - alpha[i] = m512_const1_64( ( (uint64_t*)alpha_n )[i] ); \ + alpha[i] = _mm512_set1_epi64( ( (uint64_t*)alpha_n )[i] ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (1ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (1ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (2ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (2ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (3ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (3ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (4ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (4ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (5ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (5ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ } while (0) @@ -752,29 +752,29 @@ do { \ __m512i alpha[16]; \ const uint64_t A0 = ( (uint64_t*)alpha_f )[0]; \ for( int i = 0; i < 16; i++ ) \ - alpha[i] = m512_const1_64( ( (uint64_t*)alpha_f )[i] ); \ + alpha[i] = _mm512_set1_epi64( ( (uint64_t*)alpha_f )[i] ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 1ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 1ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 2ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 2ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 3ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 3ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 4ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 4ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 5ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 5ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 6ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 6ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 7ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 7ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 8ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 8ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( ( 9ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( ( 9ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (10ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (10ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ - alpha[0] = m512_const1_64( (11ULL << 32) ^ A0 ); \ + alpha[0] = _mm512_set1_epi64( (11ULL << 32) ^ A0 ); \ ROUND_BIG8( alpha ); \ } while (0) @@ -829,14 +829,14 @@ void hamsi512_8way_init( hamsi_8way_big_context *sc ) sc->partial_len = 0; sc->count_high = sc->count_low = 0; - sc->h[0] = m512_const1_64( 0x6c70617273746565 ); - sc->h[1] = m512_const1_64( 0x656e62656b204172 ); - sc->h[2] = m512_const1_64( 0x302c206272672031 ); - sc->h[3] = m512_const1_64( 0x3434362c75732032 ); - sc->h[4] = m512_const1_64( 0x3030312020422d33 ); - sc->h[5] = m512_const1_64( 0x656e2d484c657576 ); - sc->h[6] = m512_const1_64( 0x6c65652c65766572 ); - sc->h[7] = m512_const1_64( 0x6769756d2042656c ); + sc->h[0] = _mm512_set1_epi64( 0x6c70617273746565 ); + sc->h[1] = _mm512_set1_epi64( 0x656e62656b204172 ); + sc->h[2] = _mm512_set1_epi64( 0x302c206272672031 ); + sc->h[3] = _mm512_set1_epi64( 0x3434362c75732032 ); + sc->h[4] = _mm512_set1_epi64( 0x3030312020422d33 ); + sc->h[5] = _mm512_set1_epi64( 0x656e2d484c657576 ); + sc->h[6] = _mm512_set1_epi64( 0x6c65652c65766572 ); + sc->h[7] = _mm512_set1_epi64( 0x6769756d2042656c ); } void hamsi512_8way_update( hamsi_8way_big_context *sc, const void *data, @@ -859,7 +859,7 @@ void hamsi512_8way_close( hamsi_8way_big_context *sc, void *dst ) sph_enc32be( &ch, sc->count_high ); sph_enc32be( &cl, sc->count_low + ( sc->partial_len << 3 ) ); pad[0] = _mm512_set1_epi64( ((uint64_t)cl << 32 ) | (uint64_t)ch ); - sc->buf[0] = m512_const1_64( 0x80 ); + sc->buf[0] = _mm512_set1_epi64( 0x80 ); hamsi_8way_big( sc, sc->buf, 1 ); hamsi_8way_big_final( sc, pad ); @@ -870,6 +870,32 @@ void hamsi512_8way_close( hamsi_8way_big_context *sc, void *dst ) // Hamsi 4 way AVX2 +#if defined(__AVX512VL__) + +#define INPUT_BIG \ +do { \ + __m256i db = _mm256_ror_epi64( *buf, 1 ); \ + const __m256i zero = m256_zero; \ + const uint64_t *tp = (const uint64_t*)T512; \ + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = zero; \ + for ( int u = 0; u < 64; u++ ) \ + { \ + const __mmask8 dm = _mm256_cmplt_epi64_mask( db, zero ); \ + m0 = _mm256_mask_xor_epi64( m0, dm, m0, _mm256_set1_epi64x( tp[0] ) ); \ + m1 = _mm256_mask_xor_epi64( m1, dm, m1, _mm256_set1_epi64x( tp[1] ) ); \ + m2 = _mm256_mask_xor_epi64( m2, dm, m2, _mm256_set1_epi64x( tp[2] ) ); \ + m3 = _mm256_mask_xor_epi64( m3, dm, m3, _mm256_set1_epi64x( tp[3] ) ); \ + m4 = _mm256_mask_xor_epi64( m4, dm, m4, _mm256_set1_epi64x( tp[4] ) ); \ + m5 = _mm256_mask_xor_epi64( m5, dm, m5, _mm256_set1_epi64x( tp[5] ) ); \ + m6 = _mm256_mask_xor_epi64( m6, dm, m6, _mm256_set1_epi64x( tp[6] ) ); \ + m7 = _mm256_mask_xor_epi64( m7, dm, m7, _mm256_set1_epi64x( tp[7] ) ); \ + db = _mm256_ror_epi64( db, 1 ); \ + tp += 8; \ + } \ +} while (0) + +#else + #define INPUT_BIG \ do { \ __m256i db = *buf; \ @@ -880,25 +906,58 @@ do { \ { \ __m256i dm = _mm256_cmpgt_epi64( zero, _mm256_slli_epi64( db, u ) ); \ m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \ - m256_const1_64( tp[0] ) ) ); \ + _mm256_set1_epi64x( tp[0] ) ) ); \ m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \ - m256_const1_64( tp[1] ) ) ); \ + _mm256_set1_epi64x( tp[1] ) ) ); \ m2 = _mm256_xor_si256( m2, _mm256_and_si256( dm, \ - m256_const1_64( tp[2] ) ) ); \ + _mm256_set1_epi64x( tp[2] ) ) ); \ m3 = _mm256_xor_si256( m3, _mm256_and_si256( dm, \ - m256_const1_64( tp[3] ) ) ); \ + _mm256_set1_epi64x( tp[3] ) ) ); \ m4 = _mm256_xor_si256( m4, _mm256_and_si256( dm, \ - m256_const1_64( tp[4] ) ) ); \ + _mm256_set1_epi64x( tp[4] ) ) ); \ m5 = _mm256_xor_si256( m5, _mm256_and_si256( dm, \ - m256_const1_64( tp[5] ) ) ); \ + _mm256_set1_epi64x( tp[5] ) ) ); \ m6 = _mm256_xor_si256( m6, _mm256_and_si256( dm, \ - m256_const1_64( tp[6] ) ) ); \ + _mm256_set1_epi64x( tp[6] ) ) ); \ m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \ - m256_const1_64( tp[7] ) ) ); \ + _mm256_set1_epi64x( tp[7] ) ) ); \ tp += 8; \ } \ } while (0) +#endif + +#define SBOX( a, b, c, d ) \ +do { \ + __m256i t; \ + t = a; \ + a = mm256_xorand( d, a, c ); \ + c = mm256_xor3( a, b, c ); \ + b = mm256_xoror( b, d, t ); \ + t = _mm256_xor_si256( t, c ); \ + d = mm256_xoror( a, b, t ); \ + t = mm256_xorand( t, a, b ); \ + a = c; \ + c = mm256_xor3( b, d, t ); \ + b = d; \ + d = mm256_not( t ); \ +} while (0) + +#define L( a, b, c, d ) \ +do { \ + a = mm256_rol_32( a, 13 ); \ + c = mm256_rol_32( c, 3 ); \ + b = mm256_xor3( a, b, c ); \ + d = mm256_xor3( d, c, _mm256_slli_epi32( a, 3 ) ); \ + b = mm256_rol_32( b, 1 ); \ + d = mm256_rol_32( d, 7 ); \ + a = mm256_xor3( a, b, d ); \ + c = mm256_xor3( c, d, _mm256_slli_epi32( b, 7 ) ); \ + a = mm256_rol_32( a, 5 ); \ + c = mm256_rol_32( c, 22 ); \ +} while (0) + +/* #define SBOX( a, b, c, d ) \ do { \ __m256i t; \ @@ -937,6 +996,7 @@ do { \ a = mm256_rol_32( a, 5 ); \ c = mm256_rol_32( c, 22 ); \ } while (0) +*/ #define DECL_STATE_BIG \ __m256i c0, c1, c2, c3, c4, c5, c6, c7; \ @@ -1066,17 +1126,17 @@ do { \ __m256i alpha[16]; \ const uint64_t A0 = ( (uint64_t*)alpha_n )[0]; \ for( int i = 0; i < 16; i++ ) \ - alpha[i] = m256_const1_64( ( (uint64_t*)alpha_n )[i] ); \ + alpha[i] = _mm256_set1_epi64x( ( (uint64_t*)alpha_n )[i] ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (1ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (1ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (2ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (2ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (3ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (3ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (4ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (4ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (5ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (5ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ } while (0) @@ -1085,29 +1145,29 @@ do { \ __m256i alpha[16]; \ const uint64_t A0 = ( (uint64_t*)alpha_f )[0]; \ for( int i = 0; i < 16; i++ ) \ - alpha[i] = m256_const1_64( ( (uint64_t*)alpha_f )[i] ); \ + alpha[i] = _mm256_set1_epi64x( ( (uint64_t*)alpha_f )[i] ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 1ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 1ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 2ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 2ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 3ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 3ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 4ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 4ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 5ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 5ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 6ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 6ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 7ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 7ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 8ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 8ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( ( 9ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( ( 9ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (10ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (10ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ - alpha[0] = m256_const1_64( (11ULL << 32) ^ A0 ); \ + alpha[0] = _mm256_set1_epi64x( (11ULL << 32) ^ A0 ); \ ROUND_BIG( alpha ); \ } while (0) @@ -1163,14 +1223,14 @@ void hamsi512_4way_init( hamsi_4way_big_context *sc ) sc->partial_len = 0; sc->count_high = sc->count_low = 0; - sc->h[0] = m256_const1_64( 0x6c70617273746565 ); - sc->h[1] = m256_const1_64( 0x656e62656b204172 ); - sc->h[2] = m256_const1_64( 0x302c206272672031 ); - sc->h[3] = m256_const1_64( 0x3434362c75732032 ); - sc->h[4] = m256_const1_64( 0x3030312020422d33 ); - sc->h[5] = m256_const1_64( 0x656e2d484c657576 ); - sc->h[6] = m256_const1_64( 0x6c65652c65766572 ); - sc->h[7] = m256_const1_64( 0x6769756d2042656c ); + sc->h[0] = _mm256_set1_epi64x( 0x6c70617273746565 ); + sc->h[1] = _mm256_set1_epi64x( 0x656e62656b204172 ); + sc->h[2] = _mm256_set1_epi64x( 0x302c206272672031 ); + sc->h[3] = _mm256_set1_epi64x( 0x3434362c75732032 ); + sc->h[4] = _mm256_set1_epi64x( 0x3030312020422d33 ); + sc->h[5] = _mm256_set1_epi64x( 0x656e2d484c657576 ); + sc->h[6] = _mm256_set1_epi64x( 0x6c65652c65766572 ); + sc->h[7] = _mm256_set1_epi64x( 0x6769756d2042656c ); } void hamsi512_4way_update( hamsi_4way_big_context *sc, const void *data, @@ -1193,7 +1253,7 @@ void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst ) sph_enc32be( &ch, sc->count_high ); sph_enc32be( &cl, sc->count_low + ( sc->partial_len << 3 ) ); pad[0] = _mm256_set1_epi64x( ((uint64_t)cl << 32 ) | (uint64_t)ch ); - sc->buf[0] = m256_const1_64( 0x80 ); + sc->buf[0] = _mm256_set1_epi64x( 0x80 ); hamsi_big( sc, sc->buf, 1 ); hamsi_big_final( sc, pad ); diff --git a/algo/haval/haval-hash-4way.c b/algo/haval/haval-hash-4way.c index fbdf0925..f3981c1c 100644 --- a/algo/haval/haval-hash-4way.c +++ b/algo/haval/haval-hash-4way.c @@ -52,6 +52,56 @@ extern "C"{ #define SPH_SMALL_FOOTPRINT_HAVAL 1 //#endif +#if defined(__AVX512VL__) + +// ( ~( a ^ b ) ) & c +#define mm128_andnotxor( a, b, c ) \ + _mm_ternarylogic_epi32( a, b, c, 0x82 ) + +#else + +#define mm128_andnotxor( a, b, c ) \ + _mm_andnot_si128( _mm_xor_si128( a, b ), c ) + +#endif + +#define F1(x6, x5, x4, x3, x2, x1, x0) \ + mm128_xor3( x0, mm128_andxor( x1, x0, x4 ), \ + _mm_xor_si128( _mm_and_si128( x2, x5 ), \ + _mm_and_si128( x3, x6 ) ) ) \ + +#define F2(x6, x5, x4, x3, x2, x1, x0) \ + mm128_xor3( mm128_andxor( x2, _mm_andnot_si128( x3, x1 ), \ + mm128_xor3( _mm_and_si128( x4, x5 ), x6, x0 ) ), \ + mm128_andxor( x4, x1, x5 ), \ + mm128_xorand( x0, x3, x5 ) ) \ + +#define F3(x6, x5, x4, x3, x2, x1, x0) \ + mm128_xor3( x0, \ + _mm_and_si128( x3, \ + mm128_xor3( _mm_and_si128( x1, x2 ), x6, x0 ) ), \ + _mm_xor_si128( _mm_and_si128( x1, x4 ), \ + _mm_and_si128( x2, x5 ) ) ) + +#define F4(x6, x5, x4, x3, x2, x1, x0) \ + mm128_xor3( \ + mm128_andxor( x3, x5, \ + _mm_xor_si128( _mm_and_si128( x1, x2 ), \ + _mm_or_si128( x4, x6 ) ) ), \ + _mm_and_si128( x4, \ + mm128_xor3( x0, _mm_andnot_si128( x2, x5 ), \ + _mm_xor_si128( x1, x6 ) ) ), \ + mm128_xorand( x0, x2, x6 ) ) + +#define F5(x6, x5, x4, x3, x2, x1, x0) \ + _mm_xor_si128( \ + mm128_andnotxor( mm128_and3( x1, x2, x3 ), x5, x0 ), \ + mm128_xor3( _mm_and_si128( x1, x4 ), \ + _mm_and_si128( x2, x5 ), \ + _mm_and_si128( x3, x6 ) ) ) + + +/* #define F1(x6, x5, x4, x3, x2, x1, x0) \ _mm_xor_si128( x0, \ _mm_xor_si128( _mm_and_si128(_mm_xor_si128( x0, x4 ), x1 ), \ @@ -96,6 +146,7 @@ extern "C"{ _mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x4 ), \ _mm_and_si128( x2, x5 ) ), \ _mm_and_si128( x3, x6 ) ) ) +*/ /* * The macros below integrate the phi() permutations, depending on the @@ -740,14 +791,14 @@ do { \ static void haval_8way_init( haval_8way_context *sc, unsigned olen, unsigned passes ) { - sc->s0 = m256_const1_32( 0x243F6A88UL ); - sc->s1 = m256_const1_32( 0x85A308D3UL ); - sc->s2 = m256_const1_32( 0x13198A2EUL ); - sc->s3 = m256_const1_32( 0x03707344UL ); - sc->s4 = m256_const1_32( 0xA4093822UL ); - sc->s5 = m256_const1_32( 0x299F31D0UL ); - sc->s6 = m256_const1_32( 0x082EFA98UL ); - sc->s7 = m256_const1_32( 0xEC4E6C89UL ); + sc->s0 = _mm256_set1_epi32( 0x243F6A88UL ); + sc->s1 = _mm256_set1_epi32( 0x85A308D3UL ); + sc->s2 = _mm256_set1_epi32( 0x13198A2EUL ); + sc->s3 = _mm256_set1_epi32( 0x03707344UL ); + sc->s4 = _mm256_set1_epi32( 0xA4093822UL ); + sc->s5 = _mm256_set1_epi32( 0x299F31D0UL ); + sc->s6 = _mm256_set1_epi32( 0x082EFA98UL ); + sc->s7 = _mm256_set1_epi32( 0xEC4E6C89UL ); sc->olen = olen; sc->passes = passes; sc->count_high = 0; diff --git a/algo/jh/jh-hash-4way.c b/algo/jh/jh-hash-4way.c index 9b624448..6e804125 100644 --- a/algo/jh/jh-hash-4way.c +++ b/algo/jh/jh-hash-4way.c @@ -76,19 +76,31 @@ do { \ #endif +#if defined(__AVX512VL__) +//TODO enable for AVX10_256, not used with AVX512VL + +#define notxorandnot( a, b, c ) \ + _mm256_ternarylogic_epi64( a, b, c, 0x2d ) + +#else + +#define notxorandnot( a, b, c ) \ + _mm256_xor_si256( mm256_not( a ), _mm256_andnot_si256( b, c ) ) + +#endif + #define Sb(x0, x1, x2, x3, c) \ do { \ - const __m256i cc = _mm256_set1_epi64x( c ); \ - x3 = mm256_not( x3 ); \ - x0 = _mm256_xor_si256( x0, _mm256_andnot_si256( x2, cc ) ); \ - tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \ - x0 = _mm256_xor_si256( x0, _mm256_and_si256( x2, x3 ) ); \ - x3 = _mm256_xor_si256( x3, _mm256_andnot_si256( x1, x2 ) ); \ - x1 = _mm256_xor_si256( x1, _mm256_and_si256( x0, x2 ) ); \ - x2 = _mm256_xor_si256( x2, _mm256_andnot_si256( x3, x0 ) ); \ - x0 = _mm256_xor_si256( x0, _mm256_or_si256( x1, x3 ) ); \ - x3 = _mm256_xor_si256( x3, _mm256_and_si256( x1, x2 ) ); \ - x1 = _mm256_xor_si256( x1, _mm256_and_si256( tmp, x0 ) ); \ + const __m256i cc = _mm256_set1_epi64x( c ); \ + x0 = mm256_xorandnot( x0, x2, cc ); \ + tmp = mm256_xorand( cc, x0, x1 ); \ + x0 = mm256_xorandnot( x0, x3, x2 ); \ + x3 = notxorandnot( x3, x1, x2 ); \ + x1 = mm256_xorand( x1, x0, x2 ); \ + x2 = mm256_xorandnot( x2, x3, x0 ); \ + x0 = mm256_xoror( x0, x1, x3 ); \ + x3 = mm256_xorand( x3, x1, x2 ); \ + x1 = mm256_xorand( x1, tmp, x0 ); \ x2 = _mm256_xor_si256( x2, tmp ); \ } while (0) @@ -96,11 +108,11 @@ do { \ do { \ x4 = _mm256_xor_si256( x4, x1 ); \ x5 = _mm256_xor_si256( x5, x2 ); \ - x6 = _mm256_xor_si256( x6, _mm256_xor_si256( x3, x0 ) ); \ + x6 = mm256_xor3( x6, x3, x0 ); \ x7 = _mm256_xor_si256( x7, x0 ); \ x0 = _mm256_xor_si256( x0, x5 ); \ x1 = _mm256_xor_si256( x1, x6 ); \ - x2 = _mm256_xor_si256( x2, _mm256_xor_si256( x7, x4 ) ); \ + x2 = mm256_xor3( x2, x7, x4 ); \ x3 = _mm256_xor_si256( x3, x4 ); \ } while (0) @@ -323,12 +335,12 @@ do { \ } while (0) -#define W80(x) Wz_8W(x, m512_const1_64( 0x5555555555555555 ), 1 ) -#define W81(x) Wz_8W(x, m512_const1_64( 0x3333333333333333 ), 2 ) -#define W82(x) Wz_8W(x, m512_const1_64( 0x0F0F0F0F0F0F0F0F ), 4 ) -#define W83(x) Wz_8W(x, m512_const1_64( 0x00FF00FF00FF00FF ), 8 ) -#define W84(x) Wz_8W(x, m512_const1_64( 0x0000FFFF0000FFFF ), 16 ) -#define W85(x) Wz_8W(x, m512_const1_64( 0x00000000FFFFFFFF ), 32 ) +#define W80(x) Wz_8W(x, _mm512_set1_epi64( 0x5555555555555555 ), 1 ) +#define W81(x) Wz_8W(x, _mm512_set1_epi64( 0x3333333333333333 ), 2 ) +#define W82(x) Wz_8W(x, _mm512_set1_epi64( 0x0F0F0F0F0F0F0F0F ), 4 ) +#define W83(x) Wz_8W(x, _mm512_set1_epi64( 0x00FF00FF00FF00FF ), 8 ) +#define W84(x) Wz_8W(x, _mm512_set1_epi64( 0x0000FFFF0000FFFF ), 16 ) +#define W85(x) Wz_8W(x, _mm512_set1_epi64( 0x00000000FFFFFFFF ), 32 ) #define W86(x) \ do { \ __m512i t = x ## h; \ @@ -352,12 +364,12 @@ do { \ x ## l = _mm256_or_si256( _mm256_and_si256((x ## l >> (n)), (c)), t ); \ } while (0) -#define W0(x) Wz(x, m256_const1_64( 0x5555555555555555 ), 1 ) -#define W1(x) Wz(x, m256_const1_64( 0x3333333333333333 ), 2 ) -#define W2(x) Wz(x, m256_const1_64( 0x0F0F0F0F0F0F0F0F ), 4 ) -#define W3(x) Wz(x, m256_const1_64( 0x00FF00FF00FF00FF ), 8 ) -#define W4(x) Wz(x, m256_const1_64( 0x0000FFFF0000FFFF ), 16 ) -#define W5(x) Wz(x, m256_const1_64( 0x00000000FFFFFFFF ), 32 ) +#define W0(x) Wz(x, _mm256_set1_epi64x( 0x5555555555555555 ), 1 ) +#define W1(x) Wz(x, _mm256_set1_epi64x( 0x3333333333333333 ), 2 ) +#define W2(x) Wz(x, _mm256_set1_epi64x( 0x0F0F0F0F0F0F0F0F ), 4 ) +#define W3(x) Wz(x, _mm256_set1_epi64x( 0x00FF00FF00FF00FF ), 8 ) +#define W4(x) Wz(x, _mm256_set1_epi64x( 0x0000FFFF0000FFFF ), 16 ) +#define W5(x) Wz(x, _mm256_set1_epi64x( 0x00000000FFFFFFFF ), 32 ) #define W6(x) \ do { \ __m256i t = x ## h; \ @@ -624,22 +636,22 @@ static const sph_u64 IV512[] = { void jh256_8way_init( jh_8way_context *sc ) { // bswapped IV256 - sc->H[ 0] = m512_const1_64( 0xebd3202c41a398eb ); - sc->H[ 1] = m512_const1_64( 0xc145b29c7bbecd92 ); - sc->H[ 2] = m512_const1_64( 0xfac7d4609151931c ); - sc->H[ 3] = m512_const1_64( 0x038a507ed6820026 ); - sc->H[ 4] = m512_const1_64( 0x45b92677269e23a4 ); - sc->H[ 5] = m512_const1_64( 0x77941ad4481afbe0 ); - sc->H[ 6] = m512_const1_64( 0x7a176b0226abb5cd ); - sc->H[ 7] = m512_const1_64( 0xa82fff0f4224f056 ); - sc->H[ 8] = m512_const1_64( 0x754d2e7f8996a371 ); - sc->H[ 9] = m512_const1_64( 0x62e27df70849141d ); - sc->H[10] = m512_const1_64( 0x948f2476f7957627 ); - sc->H[11] = m512_const1_64( 0x6c29804757b6d587 ); - sc->H[12] = m512_const1_64( 0x6c0d8eac2d275e5c ); - sc->H[13] = m512_const1_64( 0x0f7a0557c6508451 ); - sc->H[14] = m512_const1_64( 0xea12247067d3e47b ); - sc->H[15] = m512_const1_64( 0x69d71cd313abe389 ); + sc->H[ 0] = _mm512_set1_epi64( 0xebd3202c41a398eb ); + sc->H[ 1] = _mm512_set1_epi64( 0xc145b29c7bbecd92 ); + sc->H[ 2] = _mm512_set1_epi64( 0xfac7d4609151931c ); + sc->H[ 3] = _mm512_set1_epi64( 0x038a507ed6820026 ); + sc->H[ 4] = _mm512_set1_epi64( 0x45b92677269e23a4 ); + sc->H[ 5] = _mm512_set1_epi64( 0x77941ad4481afbe0 ); + sc->H[ 6] = _mm512_set1_epi64( 0x7a176b0226abb5cd ); + sc->H[ 7] = _mm512_set1_epi64( 0xa82fff0f4224f056 ); + sc->H[ 8] = _mm512_set1_epi64( 0x754d2e7f8996a371 ); + sc->H[ 9] = _mm512_set1_epi64( 0x62e27df70849141d ); + sc->H[10] = _mm512_set1_epi64( 0x948f2476f7957627 ); + sc->H[11] = _mm512_set1_epi64( 0x6c29804757b6d587 ); + sc->H[12] = _mm512_set1_epi64( 0x6c0d8eac2d275e5c ); + sc->H[13] = _mm512_set1_epi64( 0x0f7a0557c6508451 ); + sc->H[14] = _mm512_set1_epi64( 0xea12247067d3e47b ); + sc->H[15] = _mm512_set1_epi64( 0x69d71cd313abe389 ); sc->ptr = 0; sc->block_count = 0; } @@ -647,22 +659,22 @@ void jh256_8way_init( jh_8way_context *sc ) void jh512_8way_init( jh_8way_context *sc ) { // bswapped IV512 - sc->H[ 0] = m512_const1_64( 0x17aa003e964bd16f ); - sc->H[ 1] = m512_const1_64( 0x43d5157a052e6a63 ); - sc->H[ 2] = m512_const1_64( 0x0bef970c8d5e228a ); - sc->H[ 3] = m512_const1_64( 0x61c3b3f2591234e9 ); - sc->H[ 4] = m512_const1_64( 0x1e806f53c1a01d89 ); - sc->H[ 5] = m512_const1_64( 0x806d2bea6b05a92a ); - sc->H[ 6] = m512_const1_64( 0xa6ba7520dbcc8e58 ); - sc->H[ 7] = m512_const1_64( 0xf73bf8ba763a0fa9 ); - sc->H[ 8] = m512_const1_64( 0x694ae34105e66901 ); - sc->H[ 9] = m512_const1_64( 0x5ae66f2e8e8ab546 ); - sc->H[10] = m512_const1_64( 0x243c84c1d0a74710 ); - sc->H[11] = m512_const1_64( 0x99c15a2db1716e3b ); - sc->H[12] = m512_const1_64( 0x56f8b19decf657cf ); - sc->H[13] = m512_const1_64( 0x56b116577c8806a7 ); - sc->H[14] = m512_const1_64( 0xfb1785e6dffcc2e3 ); - sc->H[15] = m512_const1_64( 0x4bdd8ccc78465a54 ); + sc->H[ 0] = _mm512_set1_epi64( 0x17aa003e964bd16f ); + sc->H[ 1] = _mm512_set1_epi64( 0x43d5157a052e6a63 ); + sc->H[ 2] = _mm512_set1_epi64( 0x0bef970c8d5e228a ); + sc->H[ 3] = _mm512_set1_epi64( 0x61c3b3f2591234e9 ); + sc->H[ 4] = _mm512_set1_epi64( 0x1e806f53c1a01d89 ); + sc->H[ 5] = _mm512_set1_epi64( 0x806d2bea6b05a92a ); + sc->H[ 6] = _mm512_set1_epi64( 0xa6ba7520dbcc8e58 ); + sc->H[ 7] = _mm512_set1_epi64( 0xf73bf8ba763a0fa9 ); + sc->H[ 8] = _mm512_set1_epi64( 0x694ae34105e66901 ); + sc->H[ 9] = _mm512_set1_epi64( 0x5ae66f2e8e8ab546 ); + sc->H[10] = _mm512_set1_epi64( 0x243c84c1d0a74710 ); + sc->H[11] = _mm512_set1_epi64( 0x99c15a2db1716e3b ); + sc->H[12] = _mm512_set1_epi64( 0x56f8b19decf657cf ); + sc->H[13] = _mm512_set1_epi64( 0x56b116577c8806a7 ); + sc->H[14] = _mm512_set1_epi64( 0xfb1785e6dffcc2e3 ); + sc->H[15] = _mm512_set1_epi64( 0x4bdd8ccc78465a54 ); sc->ptr = 0; sc->block_count = 0; } @@ -721,7 +733,7 @@ jh_8way_close( jh_8way_context *sc, unsigned ub, unsigned n, void *dst, size_t numz, u; uint64_t l0, l1; - buf[0] = m512_const1_64( 0x80ULL ); + buf[0] = _mm512_set1_epi64( 0x80ULL ); if ( sc->ptr == 0 ) numz = 48; @@ -772,22 +784,22 @@ jh512_8way_close(void *cc, void *dst) void jh256_4way_init( jh_4way_context *sc ) { // bswapped IV256 - sc->H[ 0] = m256_const1_64( 0xebd3202c41a398eb ); - sc->H[ 1] = m256_const1_64( 0xc145b29c7bbecd92 ); - sc->H[ 2] = m256_const1_64( 0xfac7d4609151931c ); - sc->H[ 3] = m256_const1_64( 0x038a507ed6820026 ); - sc->H[ 4] = m256_const1_64( 0x45b92677269e23a4 ); - sc->H[ 5] = m256_const1_64( 0x77941ad4481afbe0 ); - sc->H[ 6] = m256_const1_64( 0x7a176b0226abb5cd ); - sc->H[ 7] = m256_const1_64( 0xa82fff0f4224f056 ); - sc->H[ 8] = m256_const1_64( 0x754d2e7f8996a371 ); - sc->H[ 9] = m256_const1_64( 0x62e27df70849141d ); - sc->H[10] = m256_const1_64( 0x948f2476f7957627 ); - sc->H[11] = m256_const1_64( 0x6c29804757b6d587 ); - sc->H[12] = m256_const1_64( 0x6c0d8eac2d275e5c ); - sc->H[13] = m256_const1_64( 0x0f7a0557c6508451 ); - sc->H[14] = m256_const1_64( 0xea12247067d3e47b ); - sc->H[15] = m256_const1_64( 0x69d71cd313abe389 ); + sc->H[ 0] = _mm256_set1_epi64x( 0xebd3202c41a398eb ); + sc->H[ 1] = _mm256_set1_epi64x( 0xc145b29c7bbecd92 ); + sc->H[ 2] = _mm256_set1_epi64x( 0xfac7d4609151931c ); + sc->H[ 3] = _mm256_set1_epi64x( 0x038a507ed6820026 ); + sc->H[ 4] = _mm256_set1_epi64x( 0x45b92677269e23a4 ); + sc->H[ 5] = _mm256_set1_epi64x( 0x77941ad4481afbe0 ); + sc->H[ 6] = _mm256_set1_epi64x( 0x7a176b0226abb5cd ); + sc->H[ 7] = _mm256_set1_epi64x( 0xa82fff0f4224f056 ); + sc->H[ 8] = _mm256_set1_epi64x( 0x754d2e7f8996a371 ); + sc->H[ 9] = _mm256_set1_epi64x( 0x62e27df70849141d ); + sc->H[10] = _mm256_set1_epi64x( 0x948f2476f7957627 ); + sc->H[11] = _mm256_set1_epi64x( 0x6c29804757b6d587 ); + sc->H[12] = _mm256_set1_epi64x( 0x6c0d8eac2d275e5c ); + sc->H[13] = _mm256_set1_epi64x( 0x0f7a0557c6508451 ); + sc->H[14] = _mm256_set1_epi64x( 0xea12247067d3e47b ); + sc->H[15] = _mm256_set1_epi64x( 0x69d71cd313abe389 ); sc->ptr = 0; sc->block_count = 0; } @@ -795,22 +807,22 @@ void jh256_4way_init( jh_4way_context *sc ) void jh512_4way_init( jh_4way_context *sc ) { // bswapped IV512 - sc->H[ 0] = m256_const1_64( 0x17aa003e964bd16f ); - sc->H[ 1] = m256_const1_64( 0x43d5157a052e6a63 ); - sc->H[ 2] = m256_const1_64( 0x0bef970c8d5e228a ); - sc->H[ 3] = m256_const1_64( 0x61c3b3f2591234e9 ); - sc->H[ 4] = m256_const1_64( 0x1e806f53c1a01d89 ); - sc->H[ 5] = m256_const1_64( 0x806d2bea6b05a92a ); - sc->H[ 6] = m256_const1_64( 0xa6ba7520dbcc8e58 ); - sc->H[ 7] = m256_const1_64( 0xf73bf8ba763a0fa9 ); - sc->H[ 8] = m256_const1_64( 0x694ae34105e66901 ); - sc->H[ 9] = m256_const1_64( 0x5ae66f2e8e8ab546 ); - sc->H[10] = m256_const1_64( 0x243c84c1d0a74710 ); - sc->H[11] = m256_const1_64( 0x99c15a2db1716e3b ); - sc->H[12] = m256_const1_64( 0x56f8b19decf657cf ); - sc->H[13] = m256_const1_64( 0x56b116577c8806a7 ); - sc->H[14] = m256_const1_64( 0xfb1785e6dffcc2e3 ); - sc->H[15] = m256_const1_64( 0x4bdd8ccc78465a54 ); + sc->H[ 0] = _mm256_set1_epi64x( 0x17aa003e964bd16f ); + sc->H[ 1] = _mm256_set1_epi64x( 0x43d5157a052e6a63 ); + sc->H[ 2] = _mm256_set1_epi64x( 0x0bef970c8d5e228a ); + sc->H[ 3] = _mm256_set1_epi64x( 0x61c3b3f2591234e9 ); + sc->H[ 4] = _mm256_set1_epi64x( 0x1e806f53c1a01d89 ); + sc->H[ 5] = _mm256_set1_epi64x( 0x806d2bea6b05a92a ); + sc->H[ 6] = _mm256_set1_epi64x( 0xa6ba7520dbcc8e58 ); + sc->H[ 7] = _mm256_set1_epi64x( 0xf73bf8ba763a0fa9 ); + sc->H[ 8] = _mm256_set1_epi64x( 0x694ae34105e66901 ); + sc->H[ 9] = _mm256_set1_epi64x( 0x5ae66f2e8e8ab546 ); + sc->H[10] = _mm256_set1_epi64x( 0x243c84c1d0a74710 ); + sc->H[11] = _mm256_set1_epi64x( 0x99c15a2db1716e3b ); + sc->H[12] = _mm256_set1_epi64x( 0x56f8b19decf657cf ); + sc->H[13] = _mm256_set1_epi64x( 0x56b116577c8806a7 ); + sc->H[14] = _mm256_set1_epi64x( 0xfb1785e6dffcc2e3 ); + sc->H[15] = _mm256_set1_epi64x( 0x4bdd8ccc78465a54 ); sc->ptr = 0; sc->block_count = 0; } @@ -869,7 +881,7 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst, size_t numz, u; uint64_t l0, l1; - buf[0] = m256_const1_64( 0x80ULL ); + buf[0] = _mm256_set1_epi64x( 0x80ULL ); if ( sc->ptr == 0 ) numz = 48; diff --git a/algo/keccak/keccak-4way.c b/algo/keccak/keccak-4way.c index 514fc757..f8b0cd1a 100644 --- a/algo/keccak/keccak-4way.c +++ b/algo/keccak/keccak-4way.c @@ -49,7 +49,7 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( (n < max_nonce-8) && !work_restart[thr_id].restart); @@ -101,7 +101,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); pdata[19] = n; diff --git a/algo/keccak/keccak-hash-4way.c b/algo/keccak/keccak-hash-4way.c index 855b00d1..5830c17c 100644 --- a/algo/keccak/keccak-hash-4way.c +++ b/algo/keccak/keccak-hash-4way.c @@ -180,15 +180,15 @@ static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst, if ( kc->ptr == (lim - 8) ) { const uint64_t t = eb | 0x8000000000000000; - u.tmp[0] = m512_const1_64( t ); + u.tmp[0] = _mm512_set1_epi64( t ); j = 8; } else { j = lim - kc->ptr; - u.tmp[0] = m512_const1_64( eb ); + u.tmp[0] = _mm512_set1_epi64( eb ); memset_zero_512( u.tmp + 1, (j>>3) - 2 ); - u.tmp[ (j>>3) - 1] = m512_const1_64( 0x8000000000000000 ); + u.tmp[ (j>>3) - 1] = _mm512_set1_epi64( 0x8000000000000000 ); } keccak64_8way_core( kc, u.tmp, j, lim ); /* Finalize the "lane complement" */ @@ -264,8 +264,8 @@ keccak512_8way_close(void *cc, void *dst) #define OR64(d, a, b) (d = _mm256_or_si256(a,b)) #define NOT64(d, s) (d = mm256_not( s ) ) #define ROL64(d, v, n) (d = mm256_rol_64(v, n)) -#define XOROR(d, a, b, c) (d = _mm256_xor_si256(a, _mm256_or_si256(b, c))) -#define XORAND(d, a, b, c) (d = _mm256_xor_si256(a, _mm256_and_si256(b, c))) +#define XOROR(d, a, b, c) (d = mm256_xoror( a, b, c ) ) +#define XORAND(d, a, b, c) (d = mm256_xorand( a, b, c ) ) #define XOR3( d, a, b, c ) (d = mm256_xor3( a, b, c )) #include "keccak-macros.c" @@ -368,15 +368,15 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len, if ( kc->ptr == (lim - 8) ) { const uint64_t t = eb | 0x8000000000000000; - u.tmp[0] = m256_const1_64( t ); + u.tmp[0] = _mm256_set1_epi64x( t ); j = 8; } else { j = lim - kc->ptr; - u.tmp[0] = m256_const1_64( eb ); + u.tmp[0] = _mm256_set1_epi64x( eb ); memset_zero_256( u.tmp + 1, (j>>3) - 2 ); - u.tmp[ (j>>3) - 1] = m256_const1_64( 0x8000000000000000 ); + u.tmp[ (j>>3) - 1] = _mm256_set1_epi64x( 0x8000000000000000 ); } keccak64_core( kc, u.tmp, j, lim ); /* Finalize the "lane complement" */ diff --git a/algo/keccak/sha3d-4way.c b/algo/keccak/sha3d-4way.c index 14451c06..ffb4056d 100644 --- a/algo/keccak/sha3d-4way.c +++ b/algo/keccak/sha3d-4way.c @@ -56,7 +56,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); @@ -115,7 +115,7 @@ int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); pdata[19] = n; diff --git a/algo/luffa/luffa-hash-2way.c b/algo/luffa/luffa-hash-2way.c index 342a1c42..64d14069 100644 --- a/algo/luffa/luffa-hash-2way.c +++ b/algo/luffa/luffa-hash-2way.c @@ -69,7 +69,7 @@ static const uint32 CNS_INIT[128] __attribute((aligned(64))) = { #define MULT24W( a0, a1 ) \ { \ __m512i b = _mm512_xor_si512( a0, \ - _mm512_maskz_shuffle_epi32( 0xbbbb, a1, 16 ) ); \ + _mm512_maskz_shuffle_epi32( 0xbbbb, a1, 0x10 ) ); \ a0 = _mm512_alignr_epi8( a1, b, 4 ); \ a1 = _mm512_alignr_epi8( b, a1, 4 ); \ } @@ -107,49 +107,37 @@ static const uint32 CNS_INIT[128] __attribute((aligned(64))) = { ADD_CONSTANT4W( x0, x4, c0, c1 ); #define STEP_PART24W( a0, a1, t0, t1, c0, c1 ) \ - a1 = _mm512_shuffle_epi32( a1, 147 ); \ - t0 = _mm512_load_si512( &a1 ); \ - a1 = _mm512_unpacklo_epi32( a1, a0 ); \ + t0 = _mm512_shuffle_epi32( a1, 147 ); \ + a1 = _mm512_unpacklo_epi32( t0, a0 ); \ t0 = _mm512_unpackhi_epi32( t0, a0 ); \ t1 = _mm512_shuffle_epi32( t0, 78 ); \ a0 = _mm512_shuffle_epi32( a1, 78 ); \ SUBCRUMB4W( t1, t0, a0, a1 ); \ t0 = _mm512_unpacklo_epi32( t0, t1 ); \ a1 = _mm512_unpacklo_epi32( a1, a0 ); \ - a0 = _mm512_load_si512( &a1 ); \ - a0 = _mm512_unpackhi_epi64( a0, t0 ); \ + a0 = _mm512_unpackhi_epi64( a1, t0 ); \ a1 = _mm512_unpacklo_epi64( a1, t0 ); \ a1 = _mm512_shuffle_epi32( a1, 57 ); \ MIXWORD4W( a0, a1 ); \ ADD_CONSTANT4W( a0, a1, c0, c1 ); #define NMLTOM10244W(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\ - s1 = _mm512_load_si512(&r3);\ - q1 = _mm512_load_si512(&p3);\ - s3 = _mm512_load_si512(&r3);\ - q3 = _mm512_load_si512(&p3);\ - s1 = _mm512_unpackhi_epi32(s1,r2);\ - q1 = _mm512_unpackhi_epi32(q1,p2);\ - s3 = _mm512_unpacklo_epi32(s3,r2);\ - q3 = _mm512_unpacklo_epi32(q3,p2);\ - s0 = _mm512_load_si512(&s1);\ - q0 = _mm512_load_si512(&q1);\ - s2 = _mm512_load_si512(&s3);\ - q2 = _mm512_load_si512(&q3);\ - r3 = _mm512_load_si512(&r1);\ - p3 = _mm512_load_si512(&p1);\ - r1 = _mm512_unpacklo_epi32(r1,r0);\ - p1 = _mm512_unpacklo_epi32(p1,p0);\ - r3 = _mm512_unpackhi_epi32(r3,r0);\ - p3 = _mm512_unpackhi_epi32(p3,p0);\ - s0 = _mm512_unpackhi_epi64(s0,r3);\ - q0 = _mm512_unpackhi_epi64(q0,p3);\ - s1 = _mm512_unpacklo_epi64(s1,r3);\ - q1 = _mm512_unpacklo_epi64(q1,p3);\ - s2 = _mm512_unpackhi_epi64(s2,r1);\ - q2 = _mm512_unpackhi_epi64(q2,p1);\ - s3 = _mm512_unpacklo_epi64(s3,r1);\ - q3 = _mm512_unpacklo_epi64(q3,p1); + s1 = _mm512_unpackhi_epi32( r3, r2 ); \ + q1 = _mm512_unpackhi_epi32( p3, p2 ); \ + s3 = _mm512_unpacklo_epi32( r3, r2 ); \ + q3 = _mm512_unpacklo_epi32( p3, p2 ); \ + r3 = _mm512_unpackhi_epi32( r1, r0 ); \ + r1 = _mm512_unpacklo_epi32( r1, r0 ); \ + p3 = _mm512_unpackhi_epi32( p1, p0 ); \ + p1 = _mm512_unpacklo_epi32( p1, p0 ); \ + s0 = _mm512_unpackhi_epi64( s1, r3 ); \ + q0 = _mm512_unpackhi_epi64( q1 ,p3 ); \ + s1 = _mm512_unpacklo_epi64( s1, r3 ); \ + q1 = _mm512_unpacklo_epi64( q1, p3 ); \ + s2 = _mm512_unpackhi_epi64( s3, r1 ); \ + q2 = _mm512_unpackhi_epi64( q3, p1 ); \ + s3 = _mm512_unpacklo_epi64( s3, r1 ); \ + q3 = _mm512_unpacklo_epi64( q3, p1 ); #define MIXTON10244W(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\ NMLTOM10244W(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3); @@ -198,11 +186,8 @@ void rnd512_4way( luffa_4way_context *state, const __m512i *msg ) chainv[7] = _mm512_xor_si512(chainv[7], chainv[9]); MULT24W( chainv[8], chainv[9] ); - chainv[8] = _mm512_xor_si512( chainv[8], t0 ); - chainv[9] = _mm512_xor_si512( chainv[9], t1 ); - - t0 = chainv[8]; - t1 = chainv[9]; + t0 = chainv[8] = _mm512_xor_si512( chainv[8], t0 ); + t1 = chainv[9] = _mm512_xor_si512( chainv[9], t1 ); MULT24W( chainv[8], chainv[9] ); chainv[8] = _mm512_xor_si512( chainv[8], chainv[6] ); @@ -538,10 +523,39 @@ int luffa_4way_update_close( luffa_4way_context *state, a = _mm256_xor_si256( a, c0 ); \ b = _mm256_xor_si256( b, c1 ); +//TODO Enable for AVX10_256, not used with AVX512 or AVX10_512 +#if defined(__AVX512VL__) + +#define MULT2( a0, a1 ) \ +{ \ + __m256i b = _mm256_xor_si256( a0, \ + _mm256_maskz_shuffle_epi32( 0xbb, a1, 0x10 ) ); \ + a0 = _mm256_alignr_epi8( a1, b, 4 ); \ + a1 = _mm256_alignr_epi8( b, a1, 4 ); \ +} + +#define SUBCRUMB( a0, a1, a2, a3 ) \ +{ \ + __m256i t = a0; \ + a0 = mm256_xoror( a3, a0, a1 ); \ + a2 = _mm256_xor_si256( a2, a3 ); \ + a1 = _mm256_ternarylogic_epi64( a1, a3, t, 0x87 ); /* a1 xnor (a3 & t) */ \ + a3 = mm256_xorand( a2, a3, t ); \ + a2 = mm256_xorand( a1, a2, a0); \ + a1 = _mm256_or_si256( a1, a3 ); \ + a3 = _mm256_xor_si256( a3, a2 ); \ + t = _mm256_xor_si256( t, a1 ); \ + a2 = _mm256_and_si256( a2, a1 ); \ + a1 = mm256_xnor( a1, a0 ); \ + a0 = t; \ +} + +#else + #define MULT2( a0, a1 ) \ { \ __m256i b = _mm256_xor_si256( a0, _mm256_shuffle_epi32( \ - _mm256_blend_epi32( a1, m256_zero, 0xee ), 16 ) ); \ + _mm256_blend_epi32( a1, m256_zero, 0xee ), 0x10 ) ); \ a0 = _mm256_alignr_epi8( a1, b, 4 ); \ a1 = _mm256_alignr_epi8( b, a1, 4 ); \ } @@ -567,26 +581,14 @@ int luffa_4way_update_close( luffa_4way_context *state, a0 = t; \ } +#endif + #define MIXWORD( a, b ) \ -{ \ - __m256i t1, t2; \ - b = _mm256_xor_si256( a,b ); \ - t1 = _mm256_slli_epi32( a, 2 ); \ - t2 = _mm256_srli_epi32( a, 30 ); \ - a = _mm256_or_si256( t1, t2 ); \ - a = _mm256_xor_si256( a, b ); \ - t1 = _mm256_slli_epi32( b, 14 ); \ - t2 = _mm256_srli_epi32( b, 18 ); \ - b = _mm256_or_si256( t1, t2 ); \ - b = _mm256_xor_si256( a, b ); \ - t1 = _mm256_slli_epi32( a, 10 ); \ - t2 = _mm256_srli_epi32( a, 22 ); \ - a = _mm256_or_si256( t1,t2 ); \ - a = _mm256_xor_si256( a,b ); \ - t1 = _mm256_slli_epi32( b,1 ); \ - t2 = _mm256_srli_epi32( b,31 ); \ - b = _mm256_or_si256( t1, t2 ); \ -} + b = _mm256_xor_si256( a, b ); \ + a = _mm256_xor_si256( b, mm256_rol_32( a, 2 ) ); \ + b = _mm256_xor_si256( a, mm256_rol_32( b, 14 ) ); \ + a = _mm256_xor_si256( b, mm256_rol_32( a, 10 ) ); \ + b = mm256_rol_32( b, 1 ); #define STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, c0, c1 ) \ SUBCRUMB( x0, x1, x2, x3 ); \ @@ -598,49 +600,37 @@ int luffa_4way_update_close( luffa_4way_context *state, ADD_CONSTANT( x0, x4, c0, c1 ); #define STEP_PART2( a0, a1, t0, t1, c0, c1 ) \ - a1 = _mm256_shuffle_epi32( a1, 147); \ - t0 = _mm256_load_si256( &a1 ); \ - a1 = _mm256_unpacklo_epi32( a1, a0 ); \ + t0 = _mm256_shuffle_epi32( a1, 147 ); \ + a1 = _mm256_unpacklo_epi32( t0, a0 ); \ t0 = _mm256_unpackhi_epi32( t0, a0 ); \ t1 = _mm256_shuffle_epi32( t0, 78 ); \ a0 = _mm256_shuffle_epi32( a1, 78 ); \ - SUBCRUMB( t1, t0, a0, a1 );\ + SUBCRUMB( t1, t0, a0, a1 ); \ t0 = _mm256_unpacklo_epi32( t0, t1 ); \ a1 = _mm256_unpacklo_epi32( a1, a0 ); \ - a0 = _mm256_load_si256( &a1 ); \ - a0 = _mm256_unpackhi_epi64( a0, t0 ); \ + a0 = _mm256_unpackhi_epi64( a1, t0 ); \ a1 = _mm256_unpacklo_epi64( a1, t0 ); \ a1 = _mm256_shuffle_epi32( a1, 57 ); \ MIXWORD( a0, a1 ); \ ADD_CONSTANT( a0, a1, c0, c1 ); #define NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\ - s1 = _mm256_load_si256(&r3);\ - q1 = _mm256_load_si256(&p3);\ - s3 = _mm256_load_si256(&r3);\ - q3 = _mm256_load_si256(&p3);\ - s1 = _mm256_unpackhi_epi32(s1,r2);\ - q1 = _mm256_unpackhi_epi32(q1,p2);\ - s3 = _mm256_unpacklo_epi32(s3,r2);\ - q3 = _mm256_unpacklo_epi32(q3,p2);\ - s0 = _mm256_load_si256(&s1);\ - q0 = _mm256_load_si256(&q1);\ - s2 = _mm256_load_si256(&s3);\ - q2 = _mm256_load_si256(&q3);\ - r3 = _mm256_load_si256(&r1);\ - p3 = _mm256_load_si256(&p1);\ - r1 = _mm256_unpacklo_epi32(r1,r0);\ - p1 = _mm256_unpacklo_epi32(p1,p0);\ - r3 = _mm256_unpackhi_epi32(r3,r0);\ - p3 = _mm256_unpackhi_epi32(p3,p0);\ - s0 = _mm256_unpackhi_epi64(s0,r3);\ - q0 = _mm256_unpackhi_epi64(q0,p3);\ - s1 = _mm256_unpacklo_epi64(s1,r3);\ - q1 = _mm256_unpacklo_epi64(q1,p3);\ - s2 = _mm256_unpackhi_epi64(s2,r1);\ - q2 = _mm256_unpackhi_epi64(q2,p1);\ - s3 = _mm256_unpacklo_epi64(s3,r1);\ - q3 = _mm256_unpacklo_epi64(q3,p1); + s1 = _mm256_unpackhi_epi32( r3, r2 ); \ + q1 = _mm256_unpackhi_epi32( p3, p2 ); \ + s3 = _mm256_unpacklo_epi32( r3, r2 ); \ + q3 = _mm256_unpacklo_epi32( p3, p2 ); \ + r3 = _mm256_unpackhi_epi32( r1, r0 ); \ + r1 = _mm256_unpacklo_epi32( r1, r0 ); \ + p3 = _mm256_unpackhi_epi32( p1, p0 ); \ + p1 = _mm256_unpacklo_epi32( p1, p0 ); \ + s0 = _mm256_unpackhi_epi64( s1, r3 ); \ + q0 = _mm256_unpackhi_epi64( q1 ,p3 ); \ + s1 = _mm256_unpacklo_epi64( s1, r3 ); \ + q1 = _mm256_unpacklo_epi64( q1, p3 ); \ + s2 = _mm256_unpackhi_epi64( s3, r1 ); \ + q2 = _mm256_unpackhi_epi64( q3, p1 ); \ + s3 = _mm256_unpacklo_epi64( s3, r1 ); \ + q3 = _mm256_unpacklo_epi64( q3, p1 ); #define MIXTON1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\ NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3); @@ -656,17 +646,10 @@ void rnd512_2way( luffa_2way_context *state, const __m256i *msg ) __m256i *chainv = state->chainv; __m256i x0, x1, x2, x3, x4, x5, x6, x7; - t0 = chainv[0]; - t1 = chainv[1]; - - t0 = _mm256_xor_si256( t0, chainv[2] ); - t1 = _mm256_xor_si256( t1, chainv[3] ); - t0 = _mm256_xor_si256( t0, chainv[4] ); - t1 = _mm256_xor_si256( t1, chainv[5] ); - t0 = _mm256_xor_si256( t0, chainv[6] ); - t1 = _mm256_xor_si256( t1, chainv[7] ); - t0 = _mm256_xor_si256( t0, chainv[8] ); - t1 = _mm256_xor_si256( t1, chainv[9] ); + t0 = mm256_xor3( chainv[0], chainv[2], chainv[4] ); + t1 = mm256_xor3( chainv[1], chainv[3], chainv[5] ); + t0 = mm256_xor3( t0, chainv[6], chainv[8] ); + t1 = mm256_xor3( t1, chainv[7], chainv[9] ); MULT2( t0, t1 ); @@ -701,11 +684,8 @@ void rnd512_2way( luffa_2way_context *state, const __m256i *msg ) chainv[7] = _mm256_xor_si256(chainv[7], chainv[9]); MULT2( chainv[8], chainv[9] ); - chainv[8] = _mm256_xor_si256( chainv[8], t0 ); - chainv[9] = _mm256_xor_si256( chainv[9], t1 ); - - t0 = chainv[8]; - t1 = chainv[9]; + t0 = chainv[8] = _mm256_xor_si256( chainv[8], t0 ); + t1 = chainv[9] = _mm256_xor_si256( chainv[9], t1 ); MULT2( chainv[8], chainv[9] ); chainv[8] = _mm256_xor_si256( chainv[8], chainv[6] ); @@ -794,29 +774,22 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b ) { uint32 hash[8*2] __attribute((aligned(64))); __m256i* chainv = state->chainv; - __m256i t[2]; + __m256i t0, t1; const __m256i shuff_bswap32 = mm256_set2_64( 0x0c0d0e0f08090a0b, 0x0405060700010203 ); /*---- blank round with m=0 ----*/ rnd512_2way( state, NULL ); - t[0] = chainv[0]; - t[1] = chainv[1]; - - t[0] = _mm256_xor_si256( t[0], chainv[2] ); - t[1] = _mm256_xor_si256( t[1], chainv[3] ); - t[0] = _mm256_xor_si256( t[0], chainv[4] ); - t[1] = _mm256_xor_si256( t[1], chainv[5] ); - t[0] = _mm256_xor_si256( t[0], chainv[6] ); - t[1] = _mm256_xor_si256( t[1], chainv[7] ); - t[0] = _mm256_xor_si256( t[0], chainv[8] ); - t[1] = _mm256_xor_si256( t[1], chainv[9] ); + t0 = mm256_xor3( chainv[0], chainv[2], chainv[4] ); + t1 = mm256_xor3( chainv[1], chainv[3], chainv[5] ); + t0 = mm256_xor3( t0, chainv[6], chainv[8] ); + t1 = mm256_xor3( t1, chainv[7], chainv[9] ); - t[0] = _mm256_shuffle_epi32( t[0], 27 ); - t[1] = _mm256_shuffle_epi32( t[1], 27 ); + t0 = _mm256_shuffle_epi32( t0, 27 ); + t1 = _mm256_shuffle_epi32( t1, 27 ); - _mm256_store_si256( (__m256i*)&hash[0], t[0] ); - _mm256_store_si256( (__m256i*)&hash[8], t[1] ); + _mm256_store_si256( (__m256i*)&hash[0], t0 ); + _mm256_store_si256( (__m256i*)&hash[8], t1 ); casti_m256i( b, 0 ) = _mm256_shuffle_epi8( casti_m256i( hash, 0 ), shuff_bswap32 ); @@ -825,22 +798,16 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b ) rnd512_2way( state, NULL ); - t[0] = chainv[0]; - t[1] = chainv[1]; - t[0] = _mm256_xor_si256( t[0], chainv[2] ); - t[1] = _mm256_xor_si256( t[1], chainv[3] ); - t[0] = _mm256_xor_si256( t[0], chainv[4] ); - t[1] = _mm256_xor_si256( t[1], chainv[5] ); - t[0] = _mm256_xor_si256( t[0], chainv[6] ); - t[1] = _mm256_xor_si256( t[1], chainv[7] ); - t[0] = _mm256_xor_si256( t[0], chainv[8] ); - t[1] = _mm256_xor_si256( t[1], chainv[9] ); - - t[0] = _mm256_shuffle_epi32( t[0], 27 ); - t[1] = _mm256_shuffle_epi32( t[1], 27 ); - - _mm256_store_si256( (__m256i*)&hash[0], t[0] ); - _mm256_store_si256( (__m256i*)&hash[8], t[1] ); + t0 = mm256_xor3( chainv[0], chainv[2], chainv[4] ); + t1 = mm256_xor3( chainv[1], chainv[3], chainv[5] ); + t0 = mm256_xor3( t0, chainv[6], chainv[8] ); + t1 = mm256_xor3( t1, chainv[7], chainv[9] ); + + t0 = _mm256_shuffle_epi32( t0, 27 ); + t1 = _mm256_shuffle_epi32( t1, 27 ); + + _mm256_store_si256( (__m256i*)&hash[0], t0 ); + _mm256_store_si256( (__m256i*)&hash[8], t1 ); casti_m256i( b, 2 ) = _mm256_shuffle_epi8( casti_m256i( hash, 0 ), shuff_bswap32 ); diff --git a/algo/luffa/luffa_for_sse2.c b/algo/luffa/luffa_for_sse2.c index d8a47840..500bb860 100644 --- a/algo/luffa/luffa_for_sse2.c +++ b/algo/luffa/luffa_for_sse2.c @@ -22,20 +22,29 @@ #include "simd-utils.h" #include "luffa_for_sse2.h" +#define cns(i) ( ( (__m128i*)CNS_INIT)[i] ) + +#define ADD_CONSTANT( a, b, c0 ,c1 ) \ + a = _mm_xor_si128( a, c0 ); \ + b = _mm_xor_si128( b, c1 ); \ + #if defined(__AVX512VL__) +//TODO enable for AVX10_512 AVX10_256 #define MULT2( a0, a1 ) \ { \ - __m128i b = _mm_xor_si128( a0, _mm_maskz_shuffle_epi32( 0xb, a1, 0x10 ) ); \ - a0 = _mm_alignr_epi32( a1, b, 1 ); \ - a1 = _mm_alignr_epi32( b, a1, 1 ); \ + __m128i b = _mm_xor_si128( a0, \ + _mm_maskz_shuffle_epi32( 0xb, a1, 0x10 ) ); \ + a0 = _mm_alignr_epi8( a1, b, 4 ); \ + a1 = _mm_alignr_epi8( b, a1, 4 ); \ } #elif defined(__SSE4_1__) #define MULT2( a0, a1 ) do \ { \ - __m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( mm128_mask_32( a1, 0xe ), 0x10 ) ); \ + __m128i b = _mm_xor_si128( a0, \ + _mm_shuffle_epi32( mm128_mask_32( a1, 0xe ), 0x10 ) ); \ a0 = _mm_alignr_epi8( a1, b, 4 ); \ a1 = _mm_alignr_epi8( b, a1, 4 ); \ } while(0) @@ -44,79 +53,88 @@ #define MULT2( a0, a1 ) do \ { \ - __m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( _mm_and_si128( a1, MASK ), 0x10 ) ); \ - a0 = _mm_or_si128( _mm_srli_si128( b, 4 ), _mm_slli_si128( a1, 12 ) ); \ - a1 = _mm_or_si128( _mm_srli_si128( a1, 4 ), _mm_slli_si128( b, 12 ) ); \ + __m128i b = _mm_xor_si128( a0, \ + _mm_shuffle_epi32( _mm_and_si128( a1, MASK ), 0x10 ) ); \ + a0 = _mm_or_si128( _mm_srli_si128( b, 4 ), _mm_slli_si128( a1, 12 ) ); \ + a1 = _mm_or_si128( _mm_srli_si128( a1, 4 ), _mm_slli_si128( b, 12 ) ); \ } while(0) #endif -#define STEP_PART(x,c,t)\ - SUBCRUMB(*x,*(x+1),*(x+2),*(x+3),*t);\ - SUBCRUMB(*(x+5),*(x+6),*(x+7),*(x+4),*t);\ - MIXWORD(*x,*(x+4),*t,*(t+1));\ - MIXWORD(*(x+1),*(x+5),*t,*(t+1));\ - MIXWORD(*(x+2),*(x+6),*t,*(t+1));\ - MIXWORD(*(x+3),*(x+7),*t,*(t+1));\ - ADD_CONSTANT(*x, *(x+4), *c, *(c+1)); - -#define STEP_PART2(a0,a1,t0,t1,c0,c1,tmp0,tmp1)\ - a1 = _mm_shuffle_epi32(a1,147);\ - t0 = _mm_load_si128(&a1);\ - a1 = _mm_unpacklo_epi32(a1,a0);\ - t0 = _mm_unpackhi_epi32(t0,a0);\ - t1 = _mm_shuffle_epi32(t0,78);\ - a0 = _mm_shuffle_epi32(a1,78);\ - SUBCRUMB(t1,t0,a0,a1,tmp0);\ - t0 = _mm_unpacklo_epi32(t0,t1);\ - a1 = _mm_unpacklo_epi32(a1,a0);\ - a0 = _mm_load_si128(&a1);\ - a0 = _mm_unpackhi_epi64(a0,t0);\ - a1 = _mm_unpacklo_epi64(a1,t0);\ - a1 = _mm_shuffle_epi32(a1,57);\ - MIXWORD(a0,a1,tmp0,tmp1);\ - ADD_CONSTANT(a0,a1,c0,c1); - -#define SUBCRUMB(a0,a1,a2,a3,t)\ - t = _mm_load_si128(&a0);\ - a0 = _mm_or_si128(a0,a1);\ - a2 = _mm_xor_si128(a2,a3);\ - a1 = mm128_not( a1 );\ - a0 = _mm_xor_si128(a0,a3);\ - a3 = _mm_and_si128(a3,t);\ - a1 = _mm_xor_si128(a1,a3);\ - a3 = _mm_xor_si128(a3,a2);\ - a2 = _mm_and_si128(a2,a0);\ - a0 = mm128_not( a0 );\ - a2 = _mm_xor_si128(a2,a1);\ - a1 = _mm_or_si128(a1,a3);\ - t = _mm_xor_si128(t,a1);\ - a3 = _mm_xor_si128(a3,a2);\ - a2 = _mm_and_si128(a2,a1);\ - a1 = _mm_xor_si128(a1,a0);\ - a0 = _mm_load_si128(&t);\ - -#define MIXWORD(a,b,t1,t2)\ - b = _mm_xor_si128(a,b);\ - t1 = _mm_slli_epi32(a,2);\ - t2 = _mm_srli_epi32(a,30);\ - a = _mm_or_si128(t1,t2);\ - a = _mm_xor_si128(a,b);\ - t1 = _mm_slli_epi32(b,14);\ - t2 = _mm_srli_epi32(b,18);\ - b = _mm_or_si128(t1,t2);\ - b = _mm_xor_si128(a,b);\ - t1 = _mm_slli_epi32(a,10);\ - t2 = _mm_srli_epi32(a,22);\ - a = _mm_or_si128(t1,t2);\ - a = _mm_xor_si128(a,b);\ - t1 = _mm_slli_epi32(b,1);\ - t2 = _mm_srli_epi32(b,31);\ - b = _mm_or_si128(t1,t2); - -#define ADD_CONSTANT(a,b,c0,c1)\ - a = _mm_xor_si128(a,c0);\ - b = _mm_xor_si128(b,c1);\ +#if defined(__AVX512VL__) +//TODO enable for AVX10_512 AVX10_256 + +#define SUBCRUMB( a0, a1, a2, a3 ) \ +{ \ + __m128i t = a0; \ + a0 = mm128_xoror( a3, a0, a1 ); \ + a2 = _mm_xor_si128( a2, a3 ); \ + a1 = _mm_ternarylogic_epi64( a1, a3, t, 0x87 ); /* a1 xnor (a3 & t) */ \ + a3 = mm128_xorand( a2, a3, t ); \ + a2 = mm128_xorand( a1, a2, a0 ); \ + a1 = _mm_or_si128( a1, a3 ); \ + a3 = _mm_xor_si128( a3, a2 ); \ + t = _mm_xor_si128( t, a1 ); \ + a2 = _mm_and_si128( a2, a1 ); \ + a1 = mm128_xnor( a1, a0 ); \ + a0 = t; \ +} + +#else + +#define SUBCRUMB( a0, a1, a2, a3 ) \ +{ \ + __m128i t = a0; \ + a0 = _mm_or_si128( a0, a1 ); \ + a2 = _mm_xor_si128( a2, a3 ); \ + a1 = mm128_not( a1 ); \ + a0 = _mm_xor_si128( a0, a3 ); \ + a3 = _mm_and_si128( a3, t ); \ + a1 = _mm_xor_si128( a1, a3 ); \ + a3 = _mm_xor_si128( a3, a2 ); \ + a2 = _mm_and_si128( a2, a0 ); \ + a0 = mm128_not( a0 ); \ + a2 = _mm_xor_si128( a2, a1 ); \ + a1 = _mm_or_si128( a1, a3 ); \ + t = _mm_xor_si128( t , a1 ); \ + a3 = _mm_xor_si128( a3, a2 ); \ + a2 = _mm_and_si128( a2, a1 ); \ + a1 = _mm_xor_si128( a1, a0 ); \ + a0 = t; \ +} + +#endif + +#define MIXWORD( a, b ) \ + b = _mm_xor_si128( a, b ); \ + a = _mm_xor_si128( b, mm128_rol_32( a, 2 ) ); \ + b = _mm_xor_si128( a, mm128_rol_32( b, 14 ) ); \ + a = _mm_xor_si128( b, mm128_rol_32( a, 10 ) ); \ + b = mm128_rol_32( b, 1 ); + +#define STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, c0, c1 ) \ + SUBCRUMB( x0, x1, x2, x3 ); \ + SUBCRUMB( x5, x6, x7, x4 ); \ + MIXWORD( x0, x4 ); \ + MIXWORD( x1, x5 ); \ + MIXWORD( x2, x6 ); \ + MIXWORD( x3, x7 ); \ + ADD_CONSTANT( x0, x4, c0, c1 ); + +#define STEP_PART2( a0, a1, t0, t1, c0, c1 ) \ + t0 = _mm_shuffle_epi32( a1, 147 ); \ + a1 = _mm_unpacklo_epi32( t0, a0 ); \ + t0 = _mm_unpackhi_epi32( t0, a0 ); \ + t1 = _mm_shuffle_epi32( t0, 78 ); \ + a0 = _mm_shuffle_epi32( a1, 78 ); \ + SUBCRUMB( t1, t0, a0, a1 ); \ + t0 = _mm_unpacklo_epi32( t0, t1 ); \ + a1 = _mm_unpacklo_epi32( a1, a0 ); \ + a0 = _mm_unpackhi_epi64( a1, t0 ); \ + a1 = _mm_unpacklo_epi64( a1, t0 ); \ + a1 = _mm_shuffle_epi32( a1, 57 ); \ + MIXWORD( a0, a1 ); \ + ADD_CONSTANT( a0, a1, c0, c1 ); #define NMLTOM768(r0,r1,r2,s0,s1,s2,s3,p0,p1,p2,q0,q1,q2,q3)\ s2 = _mm_load_si128(&r1);\ @@ -177,32 +195,22 @@ q1 = _mm_load_si128(&p1);\ #define NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\ - s1 = _mm_load_si128(&r3);\ - q1 = _mm_load_si128(&p3);\ - s3 = _mm_load_si128(&r3);\ - q3 = _mm_load_si128(&p3);\ - s1 = _mm_unpackhi_epi32(s1,r2);\ - q1 = _mm_unpackhi_epi32(q1,p2);\ - s3 = _mm_unpacklo_epi32(s3,r2);\ - q3 = _mm_unpacklo_epi32(q3,p2);\ - s0 = _mm_load_si128(&s1);\ - q0 = _mm_load_si128(&q1);\ - s2 = _mm_load_si128(&s3);\ - q2 = _mm_load_si128(&q3);\ - r3 = _mm_load_si128(&r1);\ - p3 = _mm_load_si128(&p1);\ - r1 = _mm_unpacklo_epi32(r1,r0);\ - p1 = _mm_unpacklo_epi32(p1,p0);\ - r3 = _mm_unpackhi_epi32(r3,r0);\ - p3 = _mm_unpackhi_epi32(p3,p0);\ - s0 = _mm_unpackhi_epi64(s0,r3);\ - q0 = _mm_unpackhi_epi64(q0,p3);\ - s1 = _mm_unpacklo_epi64(s1,r3);\ - q1 = _mm_unpacklo_epi64(q1,p3);\ - s2 = _mm_unpackhi_epi64(s2,r1);\ - q2 = _mm_unpackhi_epi64(q2,p1);\ - s3 = _mm_unpacklo_epi64(s3,r1);\ - q3 = _mm_unpacklo_epi64(q3,p1); + s1 = _mm_unpackhi_epi32( r3, r2 ); \ + q1 = _mm_unpackhi_epi32( p3, p2 ); \ + s3 = _mm_unpacklo_epi32( r3, r2 ); \ + q3 = _mm_unpacklo_epi32( p3, p2 ); \ + r3 = _mm_unpackhi_epi32( r1, r0 ); \ + r1 = _mm_unpacklo_epi32( r1, r0 ); \ + p3 = _mm_unpackhi_epi32( p1, p0 ); \ + p1 = _mm_unpacklo_epi32( p1, p0 ); \ + s0 = _mm_unpackhi_epi64( s1, r3 ); \ + q0 = _mm_unpackhi_epi64( q1 ,p3 ); \ + s1 = _mm_unpacklo_epi64( s1, r3 ); \ + q1 = _mm_unpacklo_epi64( q1, p3 ); \ + s2 = _mm_unpackhi_epi64( s3, r1 ); \ + q2 = _mm_unpackhi_epi64( q3, p1 ); \ + s3 = _mm_unpacklo_epi64( s3, r1 ); \ + q3 = _mm_unpacklo_epi64( q3, p1 ); #define MIXTON1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\ NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3); @@ -306,8 +314,7 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data, // remaining data bytes casti_m128i( state->buffer, 0 ) = mm128_bswap_32( cast_m128i( data ) ); // padding of partial block - casti_m128i( state->buffer, 1 ) = - _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ); + casti_m128i( state->buffer, 1 ) = _mm_set_epi32( 0, 0, 0, 0x80000000 ); } return SUCCESS; @@ -325,8 +332,7 @@ HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval) else { // empty pad block, constant data - rnd512( state, _mm_setzero_si128(), - _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) ); + rnd512( state, _mm_setzero_si128(), _mm_set_epi32( 0, 0, 0, 0x80000000 ) ); } finalization512(state, (uint32*) hashval); @@ -423,163 +429,119 @@ int luffa_full( hashState_luffa *state, BitSequence* output, int hashbitlen, static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 ) { - __m128i t[2]; + __m128i t0, t1; __m128i *chainv = state->chainv; - __m128i tmp[2]; - __m128i x[8]; + __m128i x0, x1, x2, x3, x4, x5, x6, x7; - t[0] = chainv[0]; - t[1] = chainv[1]; + t0 = mm128_xor3( chainv[0], chainv[2], chainv[4] ); + t1 = mm128_xor3( chainv[1], chainv[3], chainv[5] ); + t0 = mm128_xor3( t0, chainv[6], chainv[8] ); + t1 = mm128_xor3( t1, chainv[7], chainv[9] ); - t[0] = _mm_xor_si128( t[0], chainv[2] ); - t[1] = _mm_xor_si128( t[1], chainv[3] ); - t[0] = _mm_xor_si128( t[0], chainv[4] ); - t[1] = _mm_xor_si128( t[1], chainv[5] ); - t[0] = _mm_xor_si128( t[0], chainv[6] ); - t[1] = _mm_xor_si128( t[1], chainv[7] ); - t[0] = _mm_xor_si128( t[0], chainv[8] ); - t[1] = _mm_xor_si128( t[1], chainv[9] ); - - MULT2( t[0], t[1] ); + MULT2( t0, t1 ); msg0 = _mm_shuffle_epi32( msg0, 27 ); msg1 = _mm_shuffle_epi32( msg1, 27 ); - chainv[0] = _mm_xor_si128( chainv[0], t[0] ); - chainv[1] = _mm_xor_si128( chainv[1], t[1] ); - chainv[2] = _mm_xor_si128( chainv[2], t[0] ); - chainv[3] = _mm_xor_si128( chainv[3], t[1] ); - chainv[4] = _mm_xor_si128( chainv[4], t[0] ); - chainv[5] = _mm_xor_si128( chainv[5], t[1] ); - chainv[6] = _mm_xor_si128( chainv[6], t[0] ); - chainv[7] = _mm_xor_si128( chainv[7], t[1] ); - chainv[8] = _mm_xor_si128( chainv[8], t[0] ); - chainv[9] = _mm_xor_si128( chainv[9], t[1] ); + chainv[0] = _mm_xor_si128( chainv[0], t0 ); + chainv[1] = _mm_xor_si128( chainv[1], t1 ); + chainv[2] = _mm_xor_si128( chainv[2], t0 ); + chainv[3] = _mm_xor_si128( chainv[3], t1 ); + chainv[4] = _mm_xor_si128( chainv[4], t0 ); + chainv[5] = _mm_xor_si128( chainv[5], t1 ); + chainv[6] = _mm_xor_si128( chainv[6], t0 ); + chainv[7] = _mm_xor_si128( chainv[7], t1 ); + chainv[8] = _mm_xor_si128( chainv[8], t0 ); + chainv[9] = _mm_xor_si128( chainv[9], t1 ); - t[0] = chainv[0]; - t[1] = chainv[1]; + t0 = chainv[0]; + t1 = chainv[1]; MULT2( chainv[0], chainv[1]); - chainv[0] = _mm_xor_si128( chainv[0], chainv[2] ); chainv[1] = _mm_xor_si128( chainv[1], chainv[3] ); MULT2( chainv[2], chainv[3]); - chainv[2] = _mm_xor_si128(chainv[2], chainv[4]); chainv[3] = _mm_xor_si128(chainv[3], chainv[5]); MULT2( chainv[4], chainv[5]); - chainv[4] = _mm_xor_si128(chainv[4], chainv[6]); chainv[5] = _mm_xor_si128(chainv[5], chainv[7]); MULT2( chainv[6], chainv[7]); - chainv[6] = _mm_xor_si128(chainv[6], chainv[8]); chainv[7] = _mm_xor_si128(chainv[7], chainv[9]); MULT2( chainv[8], chainv[9]); - - chainv[8] = _mm_xor_si128( chainv[8], t[0] ); - chainv[9] = _mm_xor_si128( chainv[9], t[1] ); - - t[0] = chainv[8]; - t[1] = chainv[9]; + t0 = chainv[8] = _mm_xor_si128( chainv[8], t0 ); + t1 = chainv[9] = _mm_xor_si128( chainv[9], t1 ); MULT2( chainv[8], chainv[9]); - chainv[8] = _mm_xor_si128( chainv[8], chainv[6] ); chainv[9] = _mm_xor_si128( chainv[9], chainv[7] ); MULT2( chainv[6], chainv[7]); - chainv[6] = _mm_xor_si128( chainv[6], chainv[4] ); chainv[7] = _mm_xor_si128( chainv[7], chainv[5] ); MULT2( chainv[4], chainv[5]); - chainv[4] = _mm_xor_si128( chainv[4], chainv[2] ); chainv[5] = _mm_xor_si128( chainv[5], chainv[3] ); MULT2( chainv[2], chainv[3] ); - chainv[2] = _mm_xor_si128( chainv[2], chainv[0] ); chainv[3] = _mm_xor_si128( chainv[3], chainv[1] ); MULT2( chainv[0], chainv[1] ); - - chainv[0] = _mm_xor_si128( _mm_xor_si128( chainv[0], t[0] ), msg0 ); - chainv[1] = _mm_xor_si128( _mm_xor_si128( chainv[1], t[1] ), msg1 ); + chainv[0] = _mm_xor_si128( _mm_xor_si128( chainv[0], t0 ), msg0 ); + chainv[1] = _mm_xor_si128( _mm_xor_si128( chainv[1], t1 ), msg1 ); MULT2( msg0, msg1); - chainv[2] = _mm_xor_si128( chainv[2], msg0 ); chainv[3] = _mm_xor_si128( chainv[3], msg1 ); MULT2( msg0, msg1); - chainv[4] = _mm_xor_si128( chainv[4], msg0 ); chainv[5] = _mm_xor_si128( chainv[5], msg1 ); MULT2( msg0, msg1); - chainv[6] = _mm_xor_si128( chainv[6], msg0 ); chainv[7] = _mm_xor_si128( chainv[7], msg1 ); MULT2( msg0, msg1); - chainv[8] = _mm_xor_si128( chainv[8], msg0 ); chainv[9] = _mm_xor_si128( chainv[9], msg1 ); MULT2( msg0, msg1); - - chainv[3] = _mm_or_si128( _mm_slli_epi32(chainv[3], 1), - _mm_srli_epi32(chainv[3], 31) ); - chainv[5] = _mm_or_si128( _mm_slli_epi32(chainv[5], 2), - _mm_srli_epi32(chainv[5], 30) ); - chainv[7] = _mm_or_si128( _mm_slli_epi32(chainv[7], 3), - _mm_srli_epi32(chainv[7], 29) ); - chainv[9] = _mm_or_si128( _mm_slli_epi32(chainv[9], 4), - _mm_srli_epi32(chainv[9], 28) ); - - - NMLTOM1024( chainv[0], chainv[2], chainv[4], chainv[6], - x[0], x[1], x[2], x[3], - chainv[1],chainv[3],chainv[5],chainv[7], - x[4], x[5], x[6], x[7] ); - - STEP_PART( &x[0], &CNS128[ 0], &tmp[0] ); - STEP_PART( &x[0], &CNS128[ 2], &tmp[0] ); - STEP_PART( &x[0], &CNS128[ 4], &tmp[0] ); - STEP_PART( &x[0], &CNS128[ 6], &tmp[0] ); - STEP_PART( &x[0], &CNS128[ 8], &tmp[0] ); - STEP_PART( &x[0], &CNS128[10], &tmp[0] ); - STEP_PART( &x[0], &CNS128[12], &tmp[0] ); - STEP_PART( &x[0], &CNS128[14], &tmp[0] ); - - MIXTON1024( x[0], x[1], x[2], x[3], - chainv[0], chainv[2], chainv[4],chainv[6], - x[4], x[5], x[6], x[7], - chainv[1],chainv[3],chainv[5],chainv[7]); - - /* Process last 256-bit block */ - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[16], CNS128[17], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[18], CNS128[19], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[20], CNS128[21], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[22], CNS128[23], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[24], CNS128[25], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[26], CNS128[27], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[28], CNS128[29], - tmp[0], tmp[1] ); - STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[30], CNS128[31], - tmp[0], tmp[1] ); + chainv[3] = mm128_rol_32( chainv[3], 1 ); + chainv[5] = mm128_rol_32( chainv[5], 2 ); + chainv[7] = mm128_rol_32( chainv[7], 3 ); + chainv[9] = mm128_rol_32( chainv[9], 4 ); + + NMLTOM1024( chainv[0], chainv[2], chainv[4], chainv[6], x0, x1, x2, x3, + chainv[1], chainv[3], chainv[5], chainv[7], x4, x5, x6, x7 ); + + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns( 0), cns( 1) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns( 2), cns( 3) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns( 4), cns( 5) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns( 6), cns( 7) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns( 8), cns( 9) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns(10), cns(11) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns(12), cns(13) ); + STEP_PART( x0, x1, x2, x3, x4, x5, x6, x7, cns(14), cns(15) ); + + MIXTON1024( x0, x1, x2, x3, chainv[0], chainv[2], chainv[4], chainv[6], + x4, x5, x6, x7, chainv[1], chainv[3], chainv[5], chainv[7]); + + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(16), cns(17) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(18), cns(19) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(20), cns(21) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(22), cns(23) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(24), cns(25) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(26), cns(27) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(28), cns(29) ); + STEP_PART2( chainv[8], chainv[9], t0, t1, cns(30), cns(31) ); } @@ -588,51 +550,6 @@ static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 ) /* state: hash context */ /* b[8]: hash values */ -#if defined (__AVX2__) - -static void finalization512( hashState_luffa *state, uint32 *b ) -{ - uint32 hash[8] __attribute((aligned(64))); - __m256i* chainv = (__m256i*)state->chainv; - __m256i t; - const __m128i zero = m128_zero; - const __m256i shuff_bswap32 = _mm256_set_epi64x( 0x1c1d1e1f18191a1b, - 0x1415161710111213, - 0x0c0d0e0f08090a0b, - 0x0405060700010203 ); - - rnd512( state, zero, zero ); - - t = chainv[0]; - t = _mm256_xor_si256( t, chainv[1] ); - t = _mm256_xor_si256( t, chainv[2] ); - t = _mm256_xor_si256( t, chainv[3] ); - t = _mm256_xor_si256( t, chainv[4] ); - - t = _mm256_shuffle_epi32( t, 27 ); - - _mm256_store_si256( (__m256i*)hash, t ); - - casti_m256i( b, 0 ) = _mm256_shuffle_epi8( - casti_m256i( hash, 0 ), shuff_bswap32 ); - - rnd512( state, zero, zero ); - - t = chainv[0]; - t = _mm256_xor_si256( t, chainv[1] ); - t = _mm256_xor_si256( t, chainv[2] ); - t = _mm256_xor_si256( t, chainv[3] ); - t = _mm256_xor_si256( t, chainv[4] ); - t = _mm256_shuffle_epi32( t, 27 ); - - _mm256_store_si256( (__m256i*)hash, t ); - - casti_m256i( b, 1 ) = _mm256_shuffle_epi8( - casti_m256i( hash, 0 ), shuff_bswap32 ); -} - -#else - static void finalization512( hashState_luffa *state, uint32 *b ) { uint32 hash[8] __attribute((aligned(64))); @@ -685,6 +602,5 @@ static void finalization512( hashState_luffa *state, uint32 *b ) casti_m128i( b, 2 ) = mm128_bswap_32( casti_m128i( hash, 0 ) ); casti_m128i( b, 3 ) = mm128_bswap_32( casti_m128i( hash, 1 ) ); } -#endif /***************************************************/ diff --git a/algo/lyra2/allium-4way.c b/algo/lyra2/allium-4way.c index 118deefa..6a1ae344 100644 --- a/algo/lyra2/allium-4way.c +++ b/algo/lyra2/allium-4way.c @@ -212,7 +212,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce, const uint32_t last_nonce = max_nonce - 16; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m512i sixteen = m512_const1_32( 16 ); + const __m512i sixteen = _mm512_set1_epi32( 16 ); if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff; @@ -398,7 +398,7 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m256i eight = m256_const1_32( 8 ); + const __m256i eight = _mm256_set1_epi32( 8 ); // Prehash first block blake256_transform_le( phash, pdata, 512, 0 ); diff --git a/algo/lyra2/lyra2rev2-4way.c b/algo/lyra2/lyra2rev2-4way.c index 9d6a0093..72ed3665 100644 --- a/algo/lyra2/lyra2rev2-4way.c +++ b/algo/lyra2/lyra2rev2-4way.c @@ -203,7 +203,7 @@ int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) ); + *noncev = _mm512_add_epi32( *noncev, _mm512_set1_epi32( 16 ) ); n += 16; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -345,7 +345,7 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) ); + *noncev = _mm256_add_epi32( *noncev, _mm256_set1_epi32( 8 ) ); n += 8; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); pdata[19] = n; diff --git a/algo/lyra2/lyra2rev3-4way.c b/algo/lyra2/lyra2rev3-4way.c index 13c442a5..062b0508 100644 --- a/algo/lyra2/lyra2rev3-4way.c +++ b/algo/lyra2/lyra2rev3-4way.c @@ -287,7 +287,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) ); + *noncev = _mm256_add_epi32( *noncev, _mm256_set1_epi32( 8 ) ); n += 8; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -389,7 +389,7 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) ); + *noncev = _mm_add_epi32( *noncev, _mm_set1_epi32( 4 ) ); n += 4; } while ( (n < max_nonce-4) && !work_restart[thr_id].restart); pdata[19] = n; diff --git a/algo/lyra2/lyra2z-4way.c b/algo/lyra2/lyra2z-4way.c index b50b071e..6272747c 100644 --- a/algo/lyra2/lyra2z-4way.c +++ b/algo/lyra2/lyra2z-4way.c @@ -103,7 +103,7 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce, const uint32_t last_nonce = max_nonce - 16; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m512i sixteen = m512_const1_32( 16 ); + const __m512i sixteen = _mm512_set1_epi32( 16 ); if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff; @@ -213,7 +213,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m256i eight = m256_const1_32( 8 ); + const __m256i eight = _mm256_set1_epi32( 8 ); // Prehash first block blake256_transform_le( phash, pdata, 512, 0 ); @@ -328,7 +328,7 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) ); + *noncev = _mm_add_epi32( *noncev, _mm_set1_epi32( 4 ) ); n += 4; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); diff --git a/algo/lyra2/sponge.c b/algo/lyra2/sponge.c index 207b6b73..72abce33 100644 --- a/algo/lyra2/sponge.c +++ b/algo/lyra2/sponge.c @@ -62,10 +62,10 @@ inline void initState( uint64_t State[/*16*/] ) state[1] = zero; state[2] = zero; state[3] = zero; - state[4] = m128_const_64( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL ); - state[5] = m128_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL ); - state[6] = m128_const_64( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL ); - state[7] = m128_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL ); + state[4] = _mm_set_epi64x( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL ); + state[5] = _mm_set_epi64x( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL ); + state[6] = _mm_set_epi64x( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL ); + state[7] = _mm_set_epi64x( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL ); #else //First 512 bis are zeros @@ -299,10 +299,10 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In, state1 = state2 = state3 = m128_zero; - state4 = m128_const_64( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL ); - state5 = m128_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL ); - state6 = m128_const_64( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL ); - state7 = m128_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL ); + state4 = _mm_set_epi64x( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL ); + state5 = _mm_set_epi64x( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL ); + state6 = _mm_set_epi64x( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL ); + state7 = _mm_set_epi64x( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL ); for ( int i = 0; i < nBlocks; i++ ) { diff --git a/algo/lyra2/sponge.h b/algo/lyra2/sponge.h index 636cbf2c..98728a7d 100644 --- a/algo/lyra2/sponge.h +++ b/algo/lyra2/sponge.h @@ -43,27 +43,29 @@ static const uint64_t blake2b_IV[8] = 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; -/*Blake2b's rotation*/ -static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ - return ( w >> c ) | ( w << ( 64 - c ) ); -} - -// serial data is only 32 bytes so AVX2 is the limit for that dimension. -// However, 2 way parallel looks trivial to code for AVX512 except for -// a data dependency with rowa. - #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #define G2W_4X64(a,b,c,d) \ a = _mm512_add_epi64( a, b ); \ - d = mm512_ror_64( _mm512_xor_si512( d, a ), 32 ); \ + d = _mm512_ror_epi64( _mm512_xor_si512( d, a ), 32 ); \ c = _mm512_add_epi64( c, d ); \ - b = mm512_ror_64( _mm512_xor_si512( b, c ), 24 ); \ + b = _mm512_ror_epi64( _mm512_xor_si512( b, c ), 24 ); \ a = _mm512_add_epi64( a, b ); \ - d = mm512_ror_64( _mm512_xor_si512( d, a ), 16 ); \ + d = _mm512_ror_epi64( _mm512_xor_si512( d, a ), 16 ); \ c = _mm512_add_epi64( c, d ); \ - b = mm512_ror_64( _mm512_xor_si512( b, c ), 63 ); + b = _mm512_ror_epi64( _mm512_xor_si512( b, c ), 63 ); +#define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \ + G2W_4X64( s0, s1, s2, s3 ); \ + s0 = mm512_shufll256_64( s0 ); \ + s3 = mm512_swap256_128( s3); \ + s2 = mm512_shuflr256_64( s2 ); \ + G2W_4X64( s0, s1, s2, s3 ); \ + s0 = mm512_shuflr256_64( s0 ); \ + s3 = mm512_swap256_128( s3 ); \ + s2 = mm512_shufll256_64( s2 ); + +/* #define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \ G2W_4X64( s0, s1, s2, s3 ); \ s3 = mm512_shufll256_64( s3 ); \ @@ -73,6 +75,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ s3 = mm512_shuflr256_64( s3 ); \ s1 = mm512_shufll256_64( s1 ); \ s2 = mm512_swap256_128( s2 ); +*/ #define LYRA_12_ROUNDS_2WAY_AVX512( s0, s1, s2, s3 ) \ LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \ @@ -88,13 +91,10 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \ LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) - #endif // AVX512 -#if defined __AVX2__ +#if defined(__AVX2__) -// process 4 columns in parallel -// returns void, updates all args #define G_4X64(a,b,c,d) \ a = _mm256_add_epi64( a, b ); \ d = mm256_swap64_32( _mm256_xor_si256( d, a ) ); \ @@ -105,6 +105,18 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ c = _mm256_add_epi64( c, d ); \ b = mm256_ror_64( _mm256_xor_si256( b, c ), 63 ); +// Pivot about s1 instead of s0 reduces latency. +#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \ + G_4X64( s0, s1, s2, s3 ); \ + s0 = mm256_shufll_64( s0 ); \ + s3 = mm256_swap_128( s3); \ + s2 = mm256_shuflr_64( s2 ); \ + G_4X64( s0, s1, s2, s3 ); \ + s0 = mm256_shuflr_64( s0 ); \ + s3 = mm256_swap_128( s3 ); \ + s2 = mm256_shufll_64( s2 ); + +/* #define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \ G_4X64( s0, s1, s2, s3 ); \ s3 = mm256_shufll_64( s3 ); \ @@ -114,6 +126,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ s3 = mm256_shuflr_64( s3 ); \ s1 = mm256_shufll_64( s1 ); \ s2 = mm256_swap_128( s2 ); +*/ #define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \ LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \ @@ -182,8 +195,13 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ #endif // AVX2 else SSE2 -// Scalar -//Blake2b's G function +/* +// Scalar, not used. + +static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + #define G(r,i,a,b,c,d) \ do { \ a = a + b; \ @@ -196,8 +214,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ b = rotr64(b ^ c, 63); \ } while(0) - -/*One Round of the Blake2b's compression function*/ #define ROUND_LYRA(r) \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ @@ -207,6 +223,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); +*/ #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) diff --git a/algo/quark/anime-4way.c b/algo/quark/anime-4way.c index e93afce7..030cc3e8 100644 --- a/algo/quark/anime-4way.c +++ b/algo/quark/anime-4way.c @@ -51,7 +51,7 @@ void anime_8way_hash( void *state, const void *input ) __m512i* vhA = (__m512i*)vhashA; __m512i* vhB = (__m512i*)vhashB; __m512i* vhC = (__m512i*)vhashC; - const __m512i bit3_mask = m512_const1_64( 8 ); + const __m512i bit3_mask = _mm512_set1_epi64( 8 ); __mmask8 vh_mask; anime_8way_context_overlay ctx __attribute__ ((aligned (64))); @@ -209,7 +209,7 @@ int scanhash_anime_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -248,7 +248,7 @@ void anime_4way_hash( void *state, const void *input ) __m256i* vhB = (__m256i*)vhashB; __m256i vh_mask; int h_mask; - const __m256i bit3_mask = m256_const1_64( 8 ); + const __m256i bit3_mask = _mm256_set1_epi64x( 8 ); const __m256i zero = _mm256_setzero_si256(); anime_4way_context_overlay ctx __attribute__ ((aligned (64))); @@ -388,7 +388,7 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; diff --git a/algo/quark/hmq1725-4way.c b/algo/quark/hmq1725-4way.c index dbbf10c9..45b10115 100644 --- a/algo/quark/hmq1725-4way.c +++ b/algo/quark/hmq1725-4way.c @@ -75,7 +75,7 @@ extern void hmq1725_8way_hash(void *state, const void *input) uint32_t hash7 [16] __attribute__ ((aligned (32))); hmq1725_8way_context_overlay ctx __attribute__ ((aligned (64))); __mmask8 vh_mask; - const __m512i vmask = m512_const1_64( 24 ); + const __m512i vmask = _mm512_set1_epi64( 24 ); const uint32_t mask = 24; __m512i* vh = (__m512i*)vhash; __m512i* vhA = (__m512i*)vhashA; @@ -593,7 +593,7 @@ int scanhash_hmq1725_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); @@ -647,7 +647,7 @@ extern void hmq1725_4way_hash(void *state, const void *input) hmq1725_4way_context_overlay ctx __attribute__ ((aligned (64))); __m256i vh_mask; int h_mask; - const __m256i vmask = m256_const1_64( 24 ); + const __m256i vmask = _mm256_set1_epi64x( 24 ); const uint32_t mask = 24; __m256i* vh = (__m256i*)vhash; __m256i* vhA = (__m256i*)vhashA; @@ -1041,7 +1041,7 @@ int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; diff --git a/algo/quark/quark-4way.c b/algo/quark/quark-4way.c index adcec11e..af543463 100644 --- a/algo/quark/quark-4way.c +++ b/algo/quark/quark-4way.c @@ -67,7 +67,7 @@ void quark_8way_hash( void *state, const void *input ) __mmask8 vh_mask; quark_8way_ctx_holder ctx; const uint32_t mask = 8; - const __m512i bit3_mask = m512_const1_64( mask ); + const __m512i bit3_mask = _mm512_set1_epi64( mask ); memcpy( &ctx, &quark_8way_ctx, sizeof(quark_8way_ctx) ); @@ -224,7 +224,7 @@ int scanhash_quark_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); @@ -271,7 +271,7 @@ void quark_4way_hash( void *state, const void *input ) __m256i vh_mask; int h_mask; quark_4way_ctx_holder ctx; - const __m256i bit3_mask = m256_const1_64( 8 ); + const __m256i bit3_mask = _mm256_set1_epi64x( 8 ); const __m256i zero = _mm256_setzero_si256(); memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) ); @@ -397,7 +397,7 @@ int scanhash_quark_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); diff --git a/algo/ripemd/ripemd-hash-4way.c b/algo/ripemd/ripemd-hash-4way.c index 38de159f..2a98afb1 100644 --- a/algo/ripemd/ripemd-hash-4way.c +++ b/algo/ripemd/ripemd-hash-4way.c @@ -47,7 +47,7 @@ static const uint32_t IV[5] = do{ \ a = _mm_add_epi32( mm128_rol_32( _mm_add_epi32( _mm_add_epi32( \ _mm_add_epi32( a, f( b ,c, d ) ), r ), \ - m128_const1_64( k ) ), s ), e ); \ + _mm_set1_epi64x( k ) ), s ), e ); \ c = mm128_rol_32( c, 10 );\ } while (0) @@ -251,11 +251,11 @@ static void ripemd160_4way_round( ripemd160_4way_context *sc ) void ripemd160_4way_init( ripemd160_4way_context *sc ) { - sc->val[0] = m128_const1_64( 0x6745230167452301 ); - sc->val[1] = m128_const1_64( 0xEFCDAB89EFCDAB89 ); - sc->val[2] = m128_const1_64( 0x98BADCFE98BADCFE ); - sc->val[3] = m128_const1_64( 0x1032547610325476 ); - sc->val[4] = m128_const1_64( 0xC3D2E1F0C3D2E1F0 ); + sc->val[0] = _mm_set1_epi64x( 0x6745230167452301 ); + sc->val[1] = _mm_set1_epi64x( 0xEFCDAB89EFCDAB89 ); + sc->val[2] = _mm_set1_epi64x( 0x98BADCFE98BADCFE ); + sc->val[3] = _mm_set1_epi64x( 0x1032547610325476 ); + sc->val[4] = _mm_set1_epi64x( 0xC3D2E1F0C3D2E1F0 ); sc->count_high = sc->count_low = 0; } @@ -347,7 +347,7 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst ) do{ \ a = _mm256_add_epi32( mm256_rol_32( _mm256_add_epi32( _mm256_add_epi32( \ _mm256_add_epi32( a, f( b ,c, d ) ), r ), \ - m256_const1_64( k ) ), s ), e ); \ + _mm256_set1_epi64x( k ) ), s ), e ); \ c = mm256_rol_32( c, 10 );\ } while (0) @@ -552,11 +552,11 @@ static void ripemd160_8way_round( ripemd160_8way_context *sc ) void ripemd160_8way_init( ripemd160_8way_context *sc ) { - sc->val[0] = m256_const1_64( 0x6745230167452301 ); - sc->val[1] = m256_const1_64( 0xEFCDAB89EFCDAB89 ); - sc->val[2] = m256_const1_64( 0x98BADCFE98BADCFE ); - sc->val[3] = m256_const1_64( 0x1032547610325476 ); - sc->val[4] = m256_const1_64( 0xC3D2E1F0C3D2E1F0 ); + sc->val[0] = _mm256_set1_epi64x( 0x6745230167452301 ); + sc->val[1] = _mm256_set1_epi64x( 0xEFCDAB89EFCDAB89 ); + sc->val[2] = _mm256_set1_epi64x( 0x98BADCFE98BADCFE ); + sc->val[3] = _mm256_set1_epi64x( 0x1032547610325476 ); + sc->val[4] = _mm256_set1_epi64x( 0xC3D2E1F0C3D2E1F0 ); sc->count_high = sc->count_low = 0; } @@ -649,7 +649,7 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst ) do{ \ a = _mm512_add_epi32( mm512_rol_32( _mm512_add_epi32( _mm512_add_epi32( \ _mm512_add_epi32( a, f( b ,c, d ) ), r ), \ - m512_const1_64( k ) ), s ), e ); \ + _mm512_set1_epi64( k ) ), s ), e ); \ c = mm512_rol_32( c, 10 );\ } while (0) @@ -853,11 +853,11 @@ static void ripemd160_16way_round( ripemd160_16way_context *sc ) void ripemd160_16way_init( ripemd160_16way_context *sc ) { - sc->val[0] = m512_const1_64( 0x6745230167452301 ); - sc->val[1] = m512_const1_64( 0xEFCDAB89EFCDAB89 ); - sc->val[2] = m512_const1_64( 0x98BADCFE98BADCFE ); - sc->val[3] = m512_const1_64( 0x1032547610325476 ); - sc->val[4] = m512_const1_64( 0xC3D2E1F0C3D2E1F0 ); + sc->val[0] = _mm512_set1_epi64( 0x6745230167452301 ); + sc->val[1] = _mm512_set1_epi64( 0xEFCDAB89EFCDAB89 ); + sc->val[2] = _mm512_set1_epi64( 0x98BADCFE98BADCFE ); + sc->val[3] = _mm512_set1_epi64( 0x1032547610325476 ); + sc->val[4] = _mm512_set1_epi64( 0xC3D2E1F0C3D2E1F0 ); sc->count_high = sc->count_low = 0; } @@ -902,7 +902,7 @@ void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst ) const int pad = block_size - 8; ptr = (unsigned)sc->count_low & ( block_size - 1U); - sc->buf[ ptr>>2 ] = m512_const1_32( 0x80 ); + sc->buf[ ptr>>2 ] = _mm512_set1_epi32( 0x80 ); ptr += 4; if ( ptr > pad ) diff --git a/algo/sha/sha256-hash-4way.c b/algo/sha/sha256-hash-4way.c index de170f80..d05c6621 100644 --- a/algo/sha/sha256-hash-4way.c +++ b/algo/sha/sha256-hash-4way.c @@ -311,7 +311,7 @@ int sha256_4way_transform_le_short( __m128i *state_out, const __m128i *data, __m128i A, B, C, D, E, F, G, H; __m128i W[16]; memcpy_128( W, data, 16 ); // Value required by H after round 60 to produce valid final hash - const __m128i H_ = m128_const1_32( 0x136032ED ); + const __m128i H_ = _mm_set1_epi32( 0x136032ED ); A = _mm_load_si128( state_in ); B = _mm_load_si128( state_in+1 ); @@ -408,14 +408,14 @@ int sha256_4way_transform_le_short( __m128i *state_out, const __m128i *data, void sha256_4way_init( sha256_4way_context *sc ) { sc->count_high = sc->count_low = 0; - sc->val[0] = m128_const1_64( 0x6A09E6676A09E667 ); - sc->val[1] = m128_const1_64( 0xBB67AE85BB67AE85 ); - sc->val[2] = m128_const1_64( 0x3C6EF3723C6EF372 ); - sc->val[3] = m128_const1_64( 0xA54FF53AA54FF53A ); - sc->val[4] = m128_const1_64( 0x510E527F510E527F ); - sc->val[5] = m128_const1_64( 0x9B05688C9B05688C ); - sc->val[6] = m128_const1_64( 0x1F83D9AB1F83D9AB ); - sc->val[7] = m128_const1_64( 0x5BE0CD195BE0CD19 ); + sc->val[0] = _mm_set1_epi64x( 0x6A09E6676A09E667 ); + sc->val[1] = _mm_set1_epi64x( 0xBB67AE85BB67AE85 ); + sc->val[2] = _mm_set1_epi64x( 0x3C6EF3723C6EF372 ); + sc->val[3] = _mm_set1_epi64x( 0xA54FF53AA54FF53A ); + sc->val[4] = _mm_set1_epi64x( 0x510E527F510E527F ); + sc->val[5] = _mm_set1_epi64x( 0x9B05688C9B05688C ); + sc->val[6] = _mm_set1_epi64x( 0x1F83D9AB1F83D9AB ); + sc->val[7] = _mm_set1_epi64x( 0x5BE0CD195BE0CD19 ); } void sha256_4way_update( sha256_4way_context *sc, const void *data, size_t len ) @@ -458,7 +458,7 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst ) const int pad = buf_size - 8; ptr = (unsigned)sc->count_low & (buf_size - 1U); - sc->buf[ ptr>>2 ] = m128_const1_64( 0x0000008000000080 ); + sc->buf[ ptr>>2 ] = _mm_set1_epi64x( 0x0000008000000080 ); ptr += 4; if ( ptr > pad ) @@ -474,8 +474,8 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst ) high = (sc->count_high << 3) | (low >> 29); low = low << 3; - sc->buf[ pad >> 2 ] = m128_const1_32( bswap_32( high ) ); - sc->buf[( pad+4 ) >> 2 ] = m128_const1_32( bswap_32( low ) ); + sc->buf[ pad >> 2 ] = _mm_set1_epi32( bswap_32( high ) ); + sc->buf[( pad+4 ) >> 2 ] = _mm_set1_epi32( bswap_32( low ) ); sha256_4way_transform_be( sc->val, sc->buf, sc->val ); mm128_block_bswap_32( dst, sc->val ); @@ -589,7 +589,6 @@ do { \ _mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \ Y_xor_Z ) ) - #define SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, i, j ) \ do { \ __m256i T0 = _mm256_add_epi32( _mm256_set1_epi32( K256[(j)+(i)] ), W[i] ); \ @@ -863,7 +862,7 @@ int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data, { __m256i A, B, C, D, E, F, G, H; __m256i W[16]; memcpy_256( W, data, 16 ); - const __m256i H_ = m256_const1_32( 0x136032ED ); + const __m256i H_ = _mm256_set1_epi32( 0x136032ED ); A = _mm256_load_si256( state_in ); B = _mm256_load_si256( state_in+1 ); @@ -979,14 +978,14 @@ int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data, void sha256_8way_init( sha256_8way_context *sc ) { sc->count_high = sc->count_low = 0; - sc->val[0] = m256_const1_64( 0x6A09E6676A09E667 ); - sc->val[1] = m256_const1_64( 0xBB67AE85BB67AE85 ); - sc->val[2] = m256_const1_64( 0x3C6EF3723C6EF372 ); - sc->val[3] = m256_const1_64( 0xA54FF53AA54FF53A ); - sc->val[4] = m256_const1_64( 0x510E527F510E527F ); - sc->val[5] = m256_const1_64( 0x9B05688C9B05688C ); - sc->val[6] = m256_const1_64( 0x1F83D9AB1F83D9AB ); - sc->val[7] = m256_const1_64( 0x5BE0CD195BE0CD19 ); + sc->val[0] = _mm256_set1_epi64x( 0x6A09E6676A09E667 ); + sc->val[1] = _mm256_set1_epi64x( 0xBB67AE85BB67AE85 ); + sc->val[2] = _mm256_set1_epi64x( 0x3C6EF3723C6EF372 ); + sc->val[3] = _mm256_set1_epi64x( 0xA54FF53AA54FF53A ); + sc->val[4] = _mm256_set1_epi64x( 0x510E527F510E527F ); + sc->val[5] = _mm256_set1_epi64x( 0x9B05688C9B05688C ); + sc->val[6] = _mm256_set1_epi64x( 0x1F83D9AB1F83D9AB ); + sc->val[7] = _mm256_set1_epi64x( 0x5BE0CD195BE0CD19 ); } // need to handle odd byte length for yespower. @@ -1032,7 +1031,7 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst ) const int pad = buf_size - 8; ptr = (unsigned)sc->count_low & (buf_size - 1U); - sc->buf[ ptr>>2 ] = m256_const1_64( 0x0000008000000080 ); + sc->buf[ ptr>>2 ] = _mm256_set1_epi64x( 0x0000008000000080 ); ptr += 4; if ( ptr > pad ) @@ -1048,8 +1047,8 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst ) high = (sc->count_high << 3) | (low >> 29); low = low << 3; - sc->buf[ pad >> 2 ] = m256_const1_32( bswap_32( high ) ); - sc->buf[ ( pad+4 ) >> 2 ] = m256_const1_32( bswap_32( low ) ); + sc->buf[ pad >> 2 ] = _mm256_set1_epi32( bswap_32( high ) ); + sc->buf[ ( pad+4 ) >> 2 ] = _mm256_set1_epi32( bswap_32( low ) ); sha256_8way_transform_be( sc->val, sc->buf, sc->val ); @@ -1360,7 +1359,7 @@ int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data, // Value for H at round 60, before adding K, needed to produce valid final // hash where H == 0. // H_ = -( H256[7] + K256[60] ); - const __m512i H_ = m512_const1_32( 0x136032ED ); + const __m512i H_ = _mm512_set1_epi32( 0x136032ED ); A = _mm512_load_si512( state_in ); B = _mm512_load_si512( state_in+1 ); @@ -1453,14 +1452,14 @@ int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data, void sha256_16way_init( sha256_16way_context *sc ) { sc->count_high = sc->count_low = 0; - sc->val[0] = m512_const1_64( 0x6A09E6676A09E667 ); - sc->val[1] = m512_const1_64( 0xBB67AE85BB67AE85 ); - sc->val[2] = m512_const1_64( 0x3C6EF3723C6EF372 ); - sc->val[3] = m512_const1_64( 0xA54FF53AA54FF53A ); - sc->val[4] = m512_const1_64( 0x510E527F510E527F ); - sc->val[5] = m512_const1_64( 0x9B05688C9B05688C ); - sc->val[6] = m512_const1_64( 0x1F83D9AB1F83D9AB ); - sc->val[7] = m512_const1_64( 0x5BE0CD195BE0CD19 ); + sc->val[0] = _mm512_set1_epi64( 0x6A09E6676A09E667 ); + sc->val[1] = _mm512_set1_epi64( 0xBB67AE85BB67AE85 ); + sc->val[2] = _mm512_set1_epi64( 0x3C6EF3723C6EF372 ); + sc->val[3] = _mm512_set1_epi64( 0xA54FF53AA54FF53A ); + sc->val[4] = _mm512_set1_epi64( 0x510E527F510E527F ); + sc->val[5] = _mm512_set1_epi64( 0x9B05688C9B05688C ); + sc->val[6] = _mm512_set1_epi64( 0x1F83D9AB1F83D9AB ); + sc->val[7] = _mm512_set1_epi64( 0x5BE0CD195BE0CD19 ); } void sha256_16way_update( sha256_16way_context *sc, const void *data, @@ -1504,7 +1503,7 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst ) const int pad = buf_size - 8; ptr = (unsigned)sc->count_low & (buf_size - 1U); - sc->buf[ ptr>>2 ] = m512_const1_64( 0x0000008000000080 ); + sc->buf[ ptr>>2 ] = _mm512_set1_epi64( 0x0000008000000080 ); ptr += 4; if ( ptr > pad ) @@ -1520,8 +1519,8 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst ) high = (sc->count_high << 3) | (low >> 29); low = low << 3; - sc->buf[ pad >> 2 ] = m512_const1_32( bswap_32( high ) ); - sc->buf[ ( pad+4 ) >> 2 ] = m512_const1_32( bswap_32( low ) ); + sc->buf[ pad >> 2 ] = _mm512_set1_epi32( bswap_32( high ) ); + sc->buf[ ( pad+4 ) >> 2 ] = _mm512_set1_epi32( bswap_32( low ) ); sha256_16way_transform_be( sc->val, sc->buf, sc->val ); diff --git a/algo/sha/sha256d-4way.c b/algo/sha/sha256d-4way.c index 18eceffe..6feccdf9 100644 --- a/algo/sha/sha256d-4way.c +++ b/algo/sha/sha256d-4way.c @@ -28,32 +28,32 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce, __m512i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m512i last_byte = m512_const1_32( 0x80000000 ); - const __m512i sixteen = m512_const1_32( 16 ); + const __m512i last_byte = _mm512_set1_epi32( 0x80000000 ); + const __m512i sixteen = _mm512_set1_epi32( 16 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m512_const1_32( pdata[i] ); + vdata[i] = _mm512_set1_epi32( pdata[i] ); *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8, n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_512( vdata+16 + 5, 10 ); - vdata[16+15] = m512_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm512_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_512( block + 9, 6 ); - block[15] = m512_const1_32( 32*8 ); // bit count + block[15] = _mm512_set1_epi32( 32*8 ); // bit count // initialize state - initstate[0] = m512_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m512_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m512_const1_64( 0x510E527F510E527F ); - initstate[5] = m512_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm512_set1_epi64( 0x6A09E6676A09E667 ); + initstate[1] = _mm512_set1_epi64( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm512_set1_epi64( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm512_set1_epi64( 0xA54FF53AA54FF53A ); + initstate[4] = _mm512_set1_epi64( 0x510E527F510E527F ); + initstate[5] = _mm512_set1_epi64( 0x9B05688C9B05688C ); + initstate[6] = _mm512_set1_epi64( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm512_set1_epi64( 0x5BE0CD195BE0CD19 ); sha256_16way_transform_le( midstate1, vdata, initstate ); @@ -116,31 +116,31 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce, __m256i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m256i last_byte = m256_const1_32( 0x80000000 ); - const __m256i eight = m256_const1_32( 8 ); + const __m256i last_byte = _mm256_set1_epi32( 0x80000000 ); + const __m256i eight = _mm256_set1_epi32( 8 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m256_const1_32( pdata[i] ); + vdata[i] = _mm256_set1_epi32( pdata[i] ); *noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_256( vdata+16 + 5, 10 ); - vdata[16+15] = m256_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm256_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_256( block + 9, 6 ); - block[15] = m256_const1_32( 32*8 ); // bit count + block[15] = _mm256_set1_epi32( 32*8 ); // bit count // initialize state - initstate[0] = m256_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m256_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m256_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m256_const1_64( 0x510E527F510E527F ); - initstate[5] = m256_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm256_set1_epi64x( 0x6A09E6676A09E667 ); + initstate[1] = _mm256_set1_epi64x( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm256_set1_epi64x( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm256_set1_epi64x( 0xA54FF53AA54FF53A ); + initstate[4] = _mm256_set1_epi64x( 0x510E527F510E527F ); + initstate[5] = _mm256_set1_epi64x( 0x9B05688C9B05688C ); + initstate[6] = _mm256_set1_epi64x( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm256_set1_epi64x( 0x5BE0CD195BE0CD19 ); sha256_8way_transform_le( midstate1, vdata, initstate ); @@ -204,31 +204,31 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce, __m128i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m128i last_byte = m128_const1_32( 0x80000000 ); - const __m128i four = m128_const1_32( 4 ); + const __m128i last_byte = _mm_set1_epi32( 0x80000000 ); + const __m128i four = _mm_set1_epi32( 4 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m128_const1_32( pdata[i] ); + vdata[i] = _mm_set1_epi32( pdata[i] ); *noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_128( vdata+16 + 5, 10 ); - vdata[16+15] = m128_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_128( block + 9, 6 ); - block[15] = m128_const1_32( 32*8 ); // bit count + block[15] = _mm_set1_epi32( 32*8 ); // bit count // initialize state - initstate[0] = m128_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m128_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m128_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m128_const1_64( 0x510E527F510E527F ); - initstate[5] = m128_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m128_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm_set1_epi64x( 0x6A09E6676A09E667 ); + initstate[1] = _mm_set1_epi64x( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm_set1_epi64x( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm_set1_epi64x( 0xA54FF53AA54FF53A ); + initstate[4] = _mm_set1_epi64x( 0x510E527F510E527F ); + initstate[5] = _mm_set1_epi64x( 0x9B05688C9B05688C ); + initstate[6] = _mm_set1_epi64x( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm_set1_epi64x( 0x5BE0CD195BE0CD19 ); // hash first 64 bytes of data sha256_4way_transform_le( midstate1, vdata, initstate ); diff --git a/algo/sha/sha256q-4way.c b/algo/sha/sha256q-4way.c index 19b8d107..a57c80b3 100644 --- a/algo/sha/sha256q-4way.c +++ b/algo/sha/sha256q-4way.c @@ -68,7 +68,7 @@ int scanhash_sha256q_16way( struct work *work, const uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) ); + *noncev = _mm512_add_epi32( *noncev, _mm512_set1_epi32( 16 ) ); n += 16; } while ( (n < last_nonce) && !work_restart[thr_id].restart ); pdata[19] = n; @@ -140,7 +140,7 @@ int scanhash_sha256q_8way( struct work *work, const uint32_t max_nonce, submit_solution( work, lane_hash, mythr ); } } - *noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) ); + *noncev = _mm256_add_epi32( *noncev, _mm256_set1_epi32( 8 ) ); n += 8; } while ( (n < last_nonce) && !work_restart[thr_id].restart ); pdata[19] = n; diff --git a/algo/sha/sha256t-4way.c b/algo/sha/sha256t-4way.c index 9c1677b1..7688c3d5 100644 --- a/algo/sha/sha256t-4way.c +++ b/algo/sha/sha256t-4way.c @@ -28,31 +28,31 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce, __m512i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m512i last_byte = m512_const1_32( 0x80000000 ); - const __m512i sixteen = m512_const1_32( 16 ); + const __m512i last_byte = _mm512_set1_epi32( 0x80000000 ); + const __m512i sixteen = _mm512_set1_epi32( 16 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m512_const1_32( pdata[i] ); + vdata[i] = _mm512_set1_epi32( pdata[i] ); *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8, n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_512( vdata+16 + 5, 10 ); - vdata[16+15] = m512_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm512_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_512( block + 9, 6 ); - block[15] = m512_const1_32( 32*8 ); // bit count + block[15] = _mm512_set1_epi32( 32*8 ); // bit count - initstate[0] = m512_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m512_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m512_const1_64( 0x510E527F510E527F ); - initstate[5] = m512_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm512_set1_epi64( 0x6A09E6676A09E667 ); + initstate[1] = _mm512_set1_epi64( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm512_set1_epi64( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm512_set1_epi64( 0xA54FF53AA54FF53A ); + initstate[4] = _mm512_set1_epi64( 0x510E527F510E527F ); + initstate[5] = _mm512_set1_epi64( 0x9B05688C9B05688C ); + initstate[6] = _mm512_set1_epi64( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm512_set1_epi64( 0x5BE0CD195BE0CD19 ); sha256_16way_transform_le( midstate1, vdata, initstate ); @@ -120,31 +120,31 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce, __m256i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m256i last_byte = m256_const1_32( 0x80000000 ); - const __m256i eight = m256_const1_32( 8 ); + const __m256i last_byte = _mm256_set1_epi32( 0x80000000 ); + const __m256i eight = _mm256_set1_epi32( 8 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m256_const1_32( pdata[i] ); + vdata[i] = _mm256_set1_epi32( pdata[i] ); *noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_256( vdata+16 + 5, 10 ); - vdata[16+15] = m256_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm256_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_256( block + 9, 6 ); - block[15] = m256_const1_32( 32*8 ); // bit count + block[15] = _mm256_set1_epi32( 32*8 ); // bit count // initialize state - initstate[0] = m256_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m256_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m256_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m256_const1_64( 0x510E527F510E527F ); - initstate[5] = m256_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm256_set1_epi64x( 0x6A09E6676A09E667 ); + initstate[1] = _mm256_set1_epi64x( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm256_set1_epi64x( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm256_set1_epi64x( 0xA54FF53AA54FF53A ); + initstate[4] = _mm256_set1_epi64x( 0x510E527F510E527F ); + initstate[5] = _mm256_set1_epi64x( 0x9B05688C9B05688C ); + initstate[6] = _mm256_set1_epi64x( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm256_set1_epi64x( 0x5BE0CD195BE0CD19 ); sha256_8way_transform_le( midstate1, vdata, initstate ); @@ -215,31 +215,31 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce, __m128i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m128i last_byte = m128_const1_32( 0x80000000 ); - const __m128i four = m128_const1_32( 4 ); + const __m128i last_byte = _mm_set1_epi32( 0x80000000 ); + const __m128i four = _mm_set1_epi32( 4 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m128_const1_32( pdata[i] ); + vdata[i] = _mm_set1_epi32( pdata[i] ); *noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_128( vdata+16 + 5, 10 ); - vdata[16+15] = m128_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_128( block + 9, 6 ); - block[15] = m128_const1_32( 32*8 ); // bit count + block[15] = _mm_set1_epi32( 32*8 ); // bit count // initialize state - initstate[0] = m128_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m128_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m128_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m128_const1_64( 0x510E527F510E527F ); - initstate[5] = m128_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m128_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm_set1_epi64x( 0x6A09E6676A09E667 ); + initstate[1] = _mm_set1_epi64x( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm_set1_epi64x( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm_set1_epi64x( 0xA54FF53AA54FF53A ); + initstate[4] = _mm_set1_epi64x( 0x510E527F510E527F ); + initstate[5] = _mm_set1_epi64x( 0x9B05688C9B05688C ); + initstate[6] = _mm_set1_epi64x( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm_set1_epi64x( 0x5BE0CD195BE0CD19 ); // hash first 64 bytes of data sha256_4way_transform_le( midstate1, vdata, initstate ); @@ -302,31 +302,31 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce, __m128i *noncev = vdata + 19; const int thr_id = mythr->id; const bool bench = opt_benchmark; - const __m128i last_byte = m128_const1_32( 0x80000000 ); - const __m128i four = m128_const1_32( 4 ); + const __m128i last_byte = _mm_set1_epi32( 0x80000000 ); + const __m128i four = _mm_set1_epi32( 4 ); for ( int i = 0; i < 19; i++ ) - vdata[i] = m128_const1_32( pdata[i] ); + vdata[i] = _mm_set1_epi32( pdata[i] ); *noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n ); vdata[16+4] = last_byte; memset_zero_128( vdata+16 + 5, 10 ); - vdata[16+15] = m128_const1_32( 80*8 ); // bit count + vdata[16+15] = _mm_set1_epi32( 80*8 ); // bit count block[ 8] = last_byte; memset_zero_128( block + 9, 6 ); - block[15] = m128_const1_32( 32*8 ); // bit count + block[15] = _mm_set1_epi32( 32*8 ); // bit count // initialize state - initstate[0] = m128_const1_64( 0x6A09E6676A09E667 ); - initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 ); - initstate[2] = m128_const1_64( 0x3C6EF3723C6EF372 ); - initstate[3] = m128_const1_64( 0xA54FF53AA54FF53A ); - initstate[4] = m128_const1_64( 0x510E527F510E527F ); - initstate[5] = m128_const1_64( 0x9B05688C9B05688C ); - initstate[6] = m128_const1_64( 0x1F83D9AB1F83D9AB ); - initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 ); + initstate[0] = _mm_set1_epi64x( 0x6A09E6676A09E667 ); + initstate[1] = _mm_set1_epi64x( 0xBB67AE85BB67AE85 ); + initstate[2] = _mm_set1_epi64x( 0x3C6EF3723C6EF372 ); + initstate[3] = _mm_set1_epi64x( 0xA54FF53AA54FF53A ); + initstate[4] = _mm_set1_epi64x( 0x510E527F510E527F ); + initstate[5] = _mm_set1_epi64x( 0x9B05688C9B05688C ); + initstate[6] = _mm_set1_epi64x( 0x1F83D9AB1F83D9AB ); + initstate[7] = _mm_set1_epi64x( 0x5BE0CD195BE0CD19 ); // hash first 64 bytes of data sha256_4way_transform_le( midstate, vdata, initstate ); diff --git a/algo/sha/sha512-hash-4way.c b/algo/sha/sha512-hash-4way.c index 1fb4bdb2..0cbd989c 100644 --- a/algo/sha/sha512-hash-4way.c +++ b/algo/sha/sha512-hash-4way.c @@ -243,7 +243,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst ) 0x08090a0b0c0d0e0f, 0x0001020304050607 ) ); ptr = (unsigned)sc->count & (buf_size - 1U); - sc->buf[ ptr>>3 ] = m512_const1_64( 0x80 ); + sc->buf[ ptr>>3 ] = _mm512_set1_epi64( 0x80 ); ptr += 8; if ( ptr > pad ) { @@ -268,51 +268,56 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst ) // SHA-512 4 way 64 bit +#define BSG5_0( x ) mm256_xor3( mm256_ror_64( x, 28 ), \ + mm256_ror_64( x, 34 ), \ + mm256_ror_64( x, 39 ) ) + +#define BSG5_1( x ) mm256_xor3( mm256_ror_64( x, 14 ), \ + mm256_ror_64( x, 18 ), \ + mm256_ror_64( x, 41 ) ) + +#define SSG5_0( x ) mm256_xor3( mm256_ror_64( x, 1 ), \ + mm256_ror_64( x, 8 ), \ + _mm256_srli_epi64( x, 7 ) ) + +#define SSG5_1( x ) mm256_xor3( mm256_ror_64( x, 19 ), \ + mm256_ror_64( x, 61 ), \ + _mm256_srli_epi64( x, 6 ) ) + +#if defined(__AVX512VL__) +//TODO Enable for AVX10_256 +// 4 way is not used whith AVX512 but will be whith AVX10_256 when it +// becomes available. + +#define CH( X, Y, Z ) _mm256_ternarylogic_epi64( X, Y, Z, 0xca ) + +#define MAJ( X, Y, Z ) _mm256_ternarylogic_epi64( X, Y, Z, 0xe8 ) + +#define SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i ) \ +do { \ + __m256i T0 = _mm256_add_epi64( _mm256_set1_epi64x( K512[i] ), W[i] ); \ + __m256i T1 = BSG5_1( E ); \ + __m256i T2 = BSG5_0( A ); \ + T0 = _mm256_add_epi64( T0, CH( E, F, G ) ); \ + T1 = _mm256_add_epi64( T1, H ); \ + T2 = _mm256_add_epi64( T2, MAJ( A, B, C ) ); \ + T1 = _mm256_add_epi64( T1, T0 ); \ + D = _mm256_add_epi64( D, T1 ); \ + H = _mm256_add_epi64( T1, T2 ); \ +} while (0) + +#else // AVX2 only + #define CH(X, Y, Z) \ _mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z ) #define MAJ(X, Y, Z) \ _mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \ Y_xor_Z ) ) - -#define BSG5_0(x) \ - mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \ - _mm256_xor_si256( mm256_ror_64( x, 5 ), x ), 6 ), x ), 28 ) - -#define BSG5_1(x) \ - mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \ - _mm256_xor_si256( mm256_ror_64( x, 23 ), x ), 4 ), x ), 14 ) - -/* -#define SSG5_0(x) \ - _mm256_xor_si256( _mm256_xor_si256( \ - mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) ) - -#define SSG5_1(x) \ - _mm256_xor_si256( _mm256_xor_si256( \ - mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) ) -*/ -// Interleave SSG0 & SSG1 for better throughput. -// return ssg0(w0) + ssg1(w1) -static inline __m256i ssg512_add( __m256i w0, __m256i w1 ) -{ - __m256i w0a, w1a, w0b, w1b; - w0a = mm256_ror_64( w0, 1 ); - w1a = mm256_ror_64( w1,19 ); - w0b = mm256_ror_64( w0, 8 ); - w1b = mm256_ror_64( w1,61 ); - w0a = _mm256_xor_si256( w0a, w0b ); - w1a = _mm256_xor_si256( w1a, w1b ); - w0b = _mm256_srli_epi64( w0, 7 ); - w1b = _mm256_srli_epi64( w1, 6 ); - w0a = _mm256_xor_si256( w0a, w0b ); - w1a = _mm256_xor_si256( w1a, w1b ); - return _mm256_add_epi64( w0a, w1a ); -} #define SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i ) \ do { \ - __m256i T0 = _mm256_add_epi64( _mm256_set1_epi64x( K512[i] ), W[ i ] ); \ + __m256i T0 = _mm256_add_epi64( _mm256_set1_epi64x( K512[i] ), W[i] ); \ __m256i T1 = BSG5_1( E ); \ __m256i T2 = BSG5_0( A ); \ T0 = _mm256_add_epi64( T0, CH( E, F, G ) ); \ @@ -324,19 +329,27 @@ do { \ H = _mm256_add_epi64( T1, T2 ); \ } while (0) +#endif // AVX512VL AVX10_256 + static void sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] ) { int i; - register __m256i A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z; + register __m256i A, B, C, D, E, F, G, H; + +#if !defined(__AVX512VL__) +// Disable for AVX10_256 + __m256i X_xor_Y, Y_xor_Z; +#endif + __m256i W[80]; mm256_block_bswap_64( W , in ); mm256_block_bswap_64( W+8, in+8 ); for ( i = 16; i < 80; i++ ) - W[i] = _mm256_add_epi64( ssg512_add( W[i-15], W[i-2] ), - _mm256_add_epi64( W[ i- 7 ], W[ i-16 ] ) ); + W[i] = mm256_add4_64( SSG5_0( W[i-15] ), SSG5_1( W[i-2] ), + W[ i- 7 ], W[ i-16 ] ); if ( ctx->initialized ) { @@ -351,17 +364,20 @@ sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] ) } else { - A = m256_const1_64( 0x6A09E667F3BCC908 ); - B = m256_const1_64( 0xBB67AE8584CAA73B ); - C = m256_const1_64( 0x3C6EF372FE94F82B ); - D = m256_const1_64( 0xA54FF53A5F1D36F1 ); - E = m256_const1_64( 0x510E527FADE682D1 ); - F = m256_const1_64( 0x9B05688C2B3E6C1F ); - G = m256_const1_64( 0x1F83D9ABFB41BD6B ); - H = m256_const1_64( 0x5BE0CD19137E2179 ); + A = _mm256_set1_epi64x( 0x6A09E667F3BCC908 ); + B = _mm256_set1_epi64x( 0xBB67AE8584CAA73B ); + C = _mm256_set1_epi64x( 0x3C6EF372FE94F82B ); + D = _mm256_set1_epi64x( 0xA54FF53A5F1D36F1 ); + E = _mm256_set1_epi64x( 0x510E527FADE682D1 ); + F = _mm256_set1_epi64x( 0x9B05688C2B3E6C1F ); + G = _mm256_set1_epi64x( 0x1F83D9ABFB41BD6B ); + H = _mm256_set1_epi64x( 0x5BE0CD19137E2179 ); } +#if !defined(__AVX512VL__) +// Disable for AVX10_256 Y_xor_Z = _mm256_xor_si256( B, C ); +#endif for ( i = 0; i < 80; i += 8 ) { @@ -389,14 +405,14 @@ sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] ) else { ctx->initialized = true; - r[0] = _mm256_add_epi64( A, m256_const1_64( 0x6A09E667F3BCC908 ) ); - r[1] = _mm256_add_epi64( B, m256_const1_64( 0xBB67AE8584CAA73B ) ); - r[2] = _mm256_add_epi64( C, m256_const1_64( 0x3C6EF372FE94F82B ) ); - r[3] = _mm256_add_epi64( D, m256_const1_64( 0xA54FF53A5F1D36F1 ) ); - r[4] = _mm256_add_epi64( E, m256_const1_64( 0x510E527FADE682D1 ) ); - r[5] = _mm256_add_epi64( F, m256_const1_64( 0x9B05688C2B3E6C1F ) ); - r[6] = _mm256_add_epi64( G, m256_const1_64( 0x1F83D9ABFB41BD6B ) ); - r[7] = _mm256_add_epi64( H, m256_const1_64( 0x5BE0CD19137E2179 ) ); + r[0] = _mm256_add_epi64( A, _mm256_set1_epi64x( 0x6A09E667F3BCC908 ) ); + r[1] = _mm256_add_epi64( B, _mm256_set1_epi64x( 0xBB67AE8584CAA73B ) ); + r[2] = _mm256_add_epi64( C, _mm256_set1_epi64x( 0x3C6EF372FE94F82B ) ); + r[3] = _mm256_add_epi64( D, _mm256_set1_epi64x( 0xA54FF53A5F1D36F1 ) ); + r[4] = _mm256_add_epi64( E, _mm256_set1_epi64x( 0x510E527FADE682D1 ) ); + r[5] = _mm256_add_epi64( F, _mm256_set1_epi64x( 0x9B05688C2B3E6C1F ) ); + r[6] = _mm256_add_epi64( G, _mm256_set1_epi64x( 0x1F83D9ABFB41BD6B ) ); + r[7] = _mm256_add_epi64( H, _mm256_set1_epi64x( 0x5BE0CD19137E2179 ) ); } } @@ -441,7 +457,7 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst ) 0x08090a0b0c0d0e0f, 0x0001020304050607 ) ); ptr = (unsigned)sc->count & (buf_size - 1U); - sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 ); + sc->buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 ); ptr += 8; if ( ptr > pad ) { diff --git a/algo/shabal/shabal-hash-4way.c b/algo/shabal/shabal-hash-4way.c index 6a3e318e..7cab8215 100644 --- a/algo/shabal/shabal-hash-4way.c +++ b/algo/shabal/shabal-hash-4way.c @@ -112,50 +112,50 @@ extern "C"{ else \ { \ (state)->state_loaded = true; \ - A0 = m256_const1_64( 0x20728DFD20728DFD ); \ - A1 = m256_const1_64( 0x46C0BD5346C0BD53 ); \ - A2 = m256_const1_64( 0xE782B699E782B699 ); \ - A3 = m256_const1_64( 0x5530463255304632 ); \ - A4 = m256_const1_64( 0x71B4EF9071B4EF90 ); \ - A5 = m256_const1_64( 0x0EA9E82C0EA9E82C ); \ - A6 = m256_const1_64( 0xDBB930F1DBB930F1 ); \ - A7 = m256_const1_64( 0xFAD06B8BFAD06B8B ); \ - A8 = m256_const1_64( 0xBE0CAE40BE0CAE40 ); \ - A9 = m256_const1_64( 0x8BD144108BD14410 ); \ - AA = m256_const1_64( 0x76D2ADAC76D2ADAC ); \ - AB = m256_const1_64( 0x28ACAB7F28ACAB7F ); \ - B0 = m256_const1_64( 0xC1099CB7C1099CB7 ); \ - B1 = m256_const1_64( 0x07B385F307B385F3 ); \ - B2 = m256_const1_64( 0xE7442C26E7442C26 ); \ - B3 = m256_const1_64( 0xCC8AD640CC8AD640 ); \ - B4 = m256_const1_64( 0xEB6F56C7EB6F56C7 ); \ - B5 = m256_const1_64( 0x1EA81AA91EA81AA9 ); \ - B6 = m256_const1_64( 0x73B9D31473B9D314 ); \ - B7 = m256_const1_64( 0x1DE85D081DE85D08 ); \ - B8 = m256_const1_64( 0x48910A5A48910A5A ); \ - B9 = m256_const1_64( 0x893B22DB893B22DB ); \ - BA = m256_const1_64( 0xC5A0DF44C5A0DF44 ); \ - BB = m256_const1_64( 0xBBC4324EBBC4324E ); \ - BC = m256_const1_64( 0x72D2F24072D2F240 ); \ - BD = m256_const1_64( 0x75941D9975941D99 ); \ - BE = m256_const1_64( 0x6D8BDE826D8BDE82 ); \ - BF = m256_const1_64( 0xA1A7502BA1A7502B ); \ - C0 = m256_const1_64( 0xD9BF68D1D9BF68D1 ); \ - C1 = m256_const1_64( 0x58BAD75058BAD750 ); \ - C2 = m256_const1_64( 0x56028CB256028CB2 ); \ - C3 = m256_const1_64( 0x8134F3598134F359 ); \ - C4 = m256_const1_64( 0xB5D469D8B5D469D8 ); \ - C5 = m256_const1_64( 0x941A8CC2941A8CC2 ); \ - C6 = m256_const1_64( 0x418B2A6E418B2A6E ); \ - C7 = m256_const1_64( 0x0405278004052780 ); \ - C8 = m256_const1_64( 0x7F07D7877F07D787 ); \ - C9 = m256_const1_64( 0x5194358F5194358F ); \ - CA = m256_const1_64( 0x3C60D6653C60D665 ); \ - CB = m256_const1_64( 0xBE97D79ABE97D79A ); \ - CC = m256_const1_64( 0x950C3434950C3434 ); \ - CD = m256_const1_64( 0xAED9A06DAED9A06D ); \ - CE = m256_const1_64( 0x2537DC8D2537DC8D ); \ - CF = m256_const1_64( 0x7CDB59697CDB5969 ); \ + A0 = _mm256_set1_epi64x( 0x20728DFD20728DFD ); \ + A1 = _mm256_set1_epi64x( 0x46C0BD5346C0BD53 ); \ + A2 = _mm256_set1_epi64x( 0xE782B699E782B699 ); \ + A3 = _mm256_set1_epi64x( 0x5530463255304632 ); \ + A4 = _mm256_set1_epi64x( 0x71B4EF9071B4EF90 ); \ + A5 = _mm256_set1_epi64x( 0x0EA9E82C0EA9E82C ); \ + A6 = _mm256_set1_epi64x( 0xDBB930F1DBB930F1 ); \ + A7 = _mm256_set1_epi64x( 0xFAD06B8BFAD06B8B ); \ + A8 = _mm256_set1_epi64x( 0xBE0CAE40BE0CAE40 ); \ + A9 = _mm256_set1_epi64x( 0x8BD144108BD14410 ); \ + AA = _mm256_set1_epi64x( 0x76D2ADAC76D2ADAC ); \ + AB = _mm256_set1_epi64x( 0x28ACAB7F28ACAB7F ); \ + B0 = _mm256_set1_epi64x( 0xC1099CB7C1099CB7 ); \ + B1 = _mm256_set1_epi64x( 0x07B385F307B385F3 ); \ + B2 = _mm256_set1_epi64x( 0xE7442C26E7442C26 ); \ + B3 = _mm256_set1_epi64x( 0xCC8AD640CC8AD640 ); \ + B4 = _mm256_set1_epi64x( 0xEB6F56C7EB6F56C7 ); \ + B5 = _mm256_set1_epi64x( 0x1EA81AA91EA81AA9 ); \ + B6 = _mm256_set1_epi64x( 0x73B9D31473B9D314 ); \ + B7 = _mm256_set1_epi64x( 0x1DE85D081DE85D08 ); \ + B8 = _mm256_set1_epi64x( 0x48910A5A48910A5A ); \ + B9 = _mm256_set1_epi64x( 0x893B22DB893B22DB ); \ + BA = _mm256_set1_epi64x( 0xC5A0DF44C5A0DF44 ); \ + BB = _mm256_set1_epi64x( 0xBBC4324EBBC4324E ); \ + BC = _mm256_set1_epi64x( 0x72D2F24072D2F240 ); \ + BD = _mm256_set1_epi64x( 0x75941D9975941D99 ); \ + BE = _mm256_set1_epi64x( 0x6D8BDE826D8BDE82 ); \ + BF = _mm256_set1_epi64x( 0xA1A7502BA1A7502B ); \ + C0 = _mm256_set1_epi64x( 0xD9BF68D1D9BF68D1 ); \ + C1 = _mm256_set1_epi64x( 0x58BAD75058BAD750 ); \ + C2 = _mm256_set1_epi64x( 0x56028CB256028CB2 ); \ + C3 = _mm256_set1_epi64x( 0x8134F3598134F359 ); \ + C4 = _mm256_set1_epi64x( 0xB5D469D8B5D469D8 ); \ + C5 = _mm256_set1_epi64x( 0x941A8CC2941A8CC2 ); \ + C6 = _mm256_set1_epi64x( 0x418B2A6E418B2A6E ); \ + C7 = _mm256_set1_epi64x( 0x0405278004052780 ); \ + C8 = _mm256_set1_epi64x( 0x7F07D7877F07D787 ); \ + C9 = _mm256_set1_epi64x( 0x5194358F5194358F ); \ + CA = _mm256_set1_epi64x( 0x3C60D6653C60D665 ); \ + CB = _mm256_set1_epi64x( 0xBE97D79ABE97D79A ); \ + CC = _mm256_set1_epi64x( 0x950C3434950C3434 ); \ + CD = _mm256_set1_epi64x( 0xAED9A06DAED9A06D ); \ + CE = _mm256_set1_epi64x( 0x2537DC8D2537DC8D ); \ + CF = _mm256_set1_epi64x( 0x7CDB59697CDB5969 ); \ } \ Wlow = (state)->Wlow; \ Whigh = (state)->Whigh; \ @@ -303,7 +303,7 @@ do { \ #define PERM_ELT8( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \ do { \ - xa0 = mm256_xor3( xm, xb1, mm256_xorandnot( \ + xa0 = mm256_xor3( xm, xb1, mm256_xorandnot( \ _mm256_mullo_epi32( mm256_xor3( xa0, xc, \ _mm256_mullo_epi32( mm256_rol_32( xa1, 15 ), FIVE ) ), THREE ), \ xb3, xb2 ) ); \ @@ -443,52 +443,52 @@ shabal_8way_init( void *cc, unsigned size ) else { // No users sc->state_loaded = true; - sc->A[ 0] = m256_const1_64( 0x52F8455252F84552 ); - sc->A[ 1] = m256_const1_64( 0xE54B7999E54B7999 ); - sc->A[ 2] = m256_const1_64( 0x2D8EE3EC2D8EE3EC ); - sc->A[ 3] = m256_const1_64( 0xB9645191B9645191 ); - sc->A[ 4] = m256_const1_64( 0xE0078B86E0078B86 ); - sc->A[ 5] = m256_const1_64( 0xBB7C44C9BB7C44C9 ); - sc->A[ 6] = m256_const1_64( 0xD2B5C1CAD2B5C1CA ); - sc->A[ 7] = m256_const1_64( 0xB0D2EB8CB0D2EB8C ); - sc->A[ 8] = m256_const1_64( 0x14CE5A4514CE5A45 ); - sc->A[ 9] = m256_const1_64( 0x22AF50DC22AF50DC ); - sc->A[10] = m256_const1_64( 0xEFFDBC6BEFFDBC6B ); - sc->A[11] = m256_const1_64( 0xEB21B74AEB21B74A ); - - sc->B[ 0] = m256_const1_64( 0xB555C6EEB555C6EE ); - sc->B[ 1] = m256_const1_64( 0x3E7105963E710596 ); - sc->B[ 2] = m256_const1_64( 0xA72A652FA72A652F ); - sc->B[ 3] = m256_const1_64( 0x9301515F9301515F ); - sc->B[ 4] = m256_const1_64( 0xDA28C1FADA28C1FA ); - sc->B[ 5] = m256_const1_64( 0x696FD868696FD868 ); - sc->B[ 6] = m256_const1_64( 0x9CB6BF729CB6BF72 ); - sc->B[ 7] = m256_const1_64( 0x0AFE40020AFE4002 ); - sc->B[ 8] = m256_const1_64( 0xA6E03615A6E03615 ); - sc->B[ 9] = m256_const1_64( 0x5138C1D45138C1D4 ); - sc->B[10] = m256_const1_64( 0xBE216306BE216306 ); - sc->B[11] = m256_const1_64( 0xB38B8890B38B8890 ); - sc->B[12] = m256_const1_64( 0x3EA8B96B3EA8B96B ); - sc->B[13] = m256_const1_64( 0x3299ACE43299ACE4 ); - sc->B[14] = m256_const1_64( 0x30924DD430924DD4 ); - sc->B[15] = m256_const1_64( 0x55CB34A555CB34A5 ); - - sc->C[ 0] = m256_const1_64( 0xB405F031B405F031 ); - sc->C[ 1] = m256_const1_64( 0xC4233EBAC4233EBA ); - sc->C[ 2] = m256_const1_64( 0xB3733979B3733979 ); - sc->C[ 3] = m256_const1_64( 0xC0DD9D55C0DD9D55 ); - sc->C[ 4] = m256_const1_64( 0xC51C28AEC51C28AE ); - sc->C[ 5] = m256_const1_64( 0xA327B8E1A327B8E1 ); - sc->C[ 6] = m256_const1_64( 0x56C5616756C56167 ); - sc->C[ 7] = m256_const1_64( 0xED614433ED614433 ); - sc->C[ 8] = m256_const1_64( 0x88B59D6088B59D60 ); - sc->C[ 9] = m256_const1_64( 0x60E2CEBA60E2CEBA ); - sc->C[10] = m256_const1_64( 0x758B4B8B758B4B8B ); - sc->C[11] = m256_const1_64( 0x83E82A7F83E82A7F ); - sc->C[12] = m256_const1_64( 0xBC968828BC968828 ); - sc->C[13] = m256_const1_64( 0xE6E00BF7E6E00BF7 ); - sc->C[14] = m256_const1_64( 0xBA839E55BA839E55 ); - sc->C[15] = m256_const1_64( 0x9B491C609B491C60 ); + sc->A[ 0] = _mm256_set1_epi64x( 0x52F8455252F84552 ); + sc->A[ 1] = _mm256_set1_epi64x( 0xE54B7999E54B7999 ); + sc->A[ 2] = _mm256_set1_epi64x( 0x2D8EE3EC2D8EE3EC ); + sc->A[ 3] = _mm256_set1_epi64x( 0xB9645191B9645191 ); + sc->A[ 4] = _mm256_set1_epi64x( 0xE0078B86E0078B86 ); + sc->A[ 5] = _mm256_set1_epi64x( 0xBB7C44C9BB7C44C9 ); + sc->A[ 6] = _mm256_set1_epi64x( 0xD2B5C1CAD2B5C1CA ); + sc->A[ 7] = _mm256_set1_epi64x( 0xB0D2EB8CB0D2EB8C ); + sc->A[ 8] = _mm256_set1_epi64x( 0x14CE5A4514CE5A45 ); + sc->A[ 9] = _mm256_set1_epi64x( 0x22AF50DC22AF50DC ); + sc->A[10] = _mm256_set1_epi64x( 0xEFFDBC6BEFFDBC6B ); + sc->A[11] = _mm256_set1_epi64x( 0xEB21B74AEB21B74A ); + + sc->B[ 0] = _mm256_set1_epi64x( 0xB555C6EEB555C6EE ); + sc->B[ 1] = _mm256_set1_epi64x( 0x3E7105963E710596 ); + sc->B[ 2] = _mm256_set1_epi64x( 0xA72A652FA72A652F ); + sc->B[ 3] = _mm256_set1_epi64x( 0x9301515F9301515F ); + sc->B[ 4] = _mm256_set1_epi64x( 0xDA28C1FADA28C1FA ); + sc->B[ 5] = _mm256_set1_epi64x( 0x696FD868696FD868 ); + sc->B[ 6] = _mm256_set1_epi64x( 0x9CB6BF729CB6BF72 ); + sc->B[ 7] = _mm256_set1_epi64x( 0x0AFE40020AFE4002 ); + sc->B[ 8] = _mm256_set1_epi64x( 0xA6E03615A6E03615 ); + sc->B[ 9] = _mm256_set1_epi64x( 0x5138C1D45138C1D4 ); + sc->B[10] = _mm256_set1_epi64x( 0xBE216306BE216306 ); + sc->B[11] = _mm256_set1_epi64x( 0xB38B8890B38B8890 ); + sc->B[12] = _mm256_set1_epi64x( 0x3EA8B96B3EA8B96B ); + sc->B[13] = _mm256_set1_epi64x( 0x3299ACE43299ACE4 ); + sc->B[14] = _mm256_set1_epi64x( 0x30924DD430924DD4 ); + sc->B[15] = _mm256_set1_epi64x( 0x55CB34A555CB34A5 ); + + sc->C[ 0] = _mm256_set1_epi64x( 0xB405F031B405F031 ); + sc->C[ 1] = _mm256_set1_epi64x( 0xC4233EBAC4233EBA ); + sc->C[ 2] = _mm256_set1_epi64x( 0xB3733979B3733979 ); + sc->C[ 3] = _mm256_set1_epi64x( 0xC0DD9D55C0DD9D55 ); + sc->C[ 4] = _mm256_set1_epi64x( 0xC51C28AEC51C28AE ); + sc->C[ 5] = _mm256_set1_epi64x( 0xA327B8E1A327B8E1 ); + sc->C[ 6] = _mm256_set1_epi64x( 0x56C5616756C56167 ); + sc->C[ 7] = _mm256_set1_epi64x( 0xED614433ED614433 ); + sc->C[ 8] = _mm256_set1_epi64x( 0x88B59D6088B59D60 ); + sc->C[ 9] = _mm256_set1_epi64x( 0x60E2CEBA60E2CEBA ); + sc->C[10] = _mm256_set1_epi64x( 0x758B4B8B758B4B8B ); + sc->C[11] = _mm256_set1_epi64x( 0x83E82A7F83E82A7F ); + sc->C[12] = _mm256_set1_epi64x( 0xBC968828BC968828 ); + sc->C[13] = _mm256_set1_epi64x( 0xE6E00BF7E6E00BF7 ); + sc->C[14] = _mm256_set1_epi64x( 0xBA839E55BA839E55 ); + sc->C[15] = _mm256_set1_epi64x( 0x9B491C609B491C60 ); } sc->Wlow = 1; sc->Whigh = 0; @@ -707,50 +707,50 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) else \ { \ (state)->state_loaded = true; \ - A0 = m128_const1_64( 0x20728DFD20728DFD ); \ - A1 = m128_const1_64( 0x46C0BD5346C0BD53 ); \ - A2 = m128_const1_64( 0xE782B699E782B699 ); \ - A3 = m128_const1_64( 0x5530463255304632 ); \ - A4 = m128_const1_64( 0x71B4EF9071B4EF90 ); \ - A5 = m128_const1_64( 0x0EA9E82C0EA9E82C ); \ - A6 = m128_const1_64( 0xDBB930F1DBB930F1 ); \ - A7 = m128_const1_64( 0xFAD06B8BFAD06B8B ); \ - A8 = m128_const1_64( 0xBE0CAE40BE0CAE40 ); \ - A9 = m128_const1_64( 0x8BD144108BD14410 ); \ - AA = m128_const1_64( 0x76D2ADAC76D2ADAC ); \ - AB = m128_const1_64( 0x28ACAB7F28ACAB7F ); \ - B0 = m128_const1_64( 0xC1099CB7C1099CB7 ); \ - B1 = m128_const1_64( 0x07B385F307B385F3 ); \ - B2 = m128_const1_64( 0xE7442C26E7442C26 ); \ - B3 = m128_const1_64( 0xCC8AD640CC8AD640 ); \ - B4 = m128_const1_64( 0xEB6F56C7EB6F56C7 ); \ - B5 = m128_const1_64( 0x1EA81AA91EA81AA9 ); \ - B6 = m128_const1_64( 0x73B9D31473B9D314 ); \ - B7 = m128_const1_64( 0x1DE85D081DE85D08 ); \ - B8 = m128_const1_64( 0x48910A5A48910A5A ); \ - B9 = m128_const1_64( 0x893B22DB893B22DB ); \ - BA = m128_const1_64( 0xC5A0DF44C5A0DF44 ); \ - BB = m128_const1_64( 0xBBC4324EBBC4324E ); \ - BC = m128_const1_64( 0x72D2F24072D2F240 ); \ - BD = m128_const1_64( 0x75941D9975941D99 ); \ - BE = m128_const1_64( 0x6D8BDE826D8BDE82 ); \ - BF = m128_const1_64( 0xA1A7502BA1A7502B ); \ - C0 = m128_const1_64( 0xD9BF68D1D9BF68D1 ); \ - C1 = m128_const1_64( 0x58BAD75058BAD750 ); \ - C2 = m128_const1_64( 0x56028CB256028CB2 ); \ - C3 = m128_const1_64( 0x8134F3598134F359 ); \ - C4 = m128_const1_64( 0xB5D469D8B5D469D8 ); \ - C5 = m128_const1_64( 0x941A8CC2941A8CC2 ); \ - C6 = m128_const1_64( 0x418B2A6E418B2A6E ); \ - C7 = m128_const1_64( 0x0405278004052780 ); \ - C8 = m128_const1_64( 0x7F07D7877F07D787 ); \ - C9 = m128_const1_64( 0x5194358F5194358F ); \ - CA = m128_const1_64( 0x3C60D6653C60D665 ); \ - CB = m128_const1_64( 0xBE97D79ABE97D79A ); \ - CC = m128_const1_64( 0x950C3434950C3434 ); \ - CD = m128_const1_64( 0xAED9A06DAED9A06D ); \ - CE = m128_const1_64( 0x2537DC8D2537DC8D ); \ - CF = m128_const1_64( 0x7CDB59697CDB5969 ); \ + A0 = _mm_set1_epi64x( 0x20728DFD20728DFD ); \ + A1 = _mm_set1_epi64x( 0x46C0BD5346C0BD53 ); \ + A2 = _mm_set1_epi64x( 0xE782B699E782B699 ); \ + A3 = _mm_set1_epi64x( 0x5530463255304632 ); \ + A4 = _mm_set1_epi64x( 0x71B4EF9071B4EF90 ); \ + A5 = _mm_set1_epi64x( 0x0EA9E82C0EA9E82C ); \ + A6 = _mm_set1_epi64x( 0xDBB930F1DBB930F1 ); \ + A7 = _mm_set1_epi64x( 0xFAD06B8BFAD06B8B ); \ + A8 = _mm_set1_epi64x( 0xBE0CAE40BE0CAE40 ); \ + A9 = _mm_set1_epi64x( 0x8BD144108BD14410 ); \ + AA = _mm_set1_epi64x( 0x76D2ADAC76D2ADAC ); \ + AB = _mm_set1_epi64x( 0x28ACAB7F28ACAB7F ); \ + B0 = _mm_set1_epi64x( 0xC1099CB7C1099CB7 ); \ + B1 = _mm_set1_epi64x( 0x07B385F307B385F3 ); \ + B2 = _mm_set1_epi64x( 0xE7442C26E7442C26 ); \ + B3 = _mm_set1_epi64x( 0xCC8AD640CC8AD640 ); \ + B4 = _mm_set1_epi64x( 0xEB6F56C7EB6F56C7 ); \ + B5 = _mm_set1_epi64x( 0x1EA81AA91EA81AA9 ); \ + B6 = _mm_set1_epi64x( 0x73B9D31473B9D314 ); \ + B7 = _mm_set1_epi64x( 0x1DE85D081DE85D08 ); \ + B8 = _mm_set1_epi64x( 0x48910A5A48910A5A ); \ + B9 = _mm_set1_epi64x( 0x893B22DB893B22DB ); \ + BA = _mm_set1_epi64x( 0xC5A0DF44C5A0DF44 ); \ + BB = _mm_set1_epi64x( 0xBBC4324EBBC4324E ); \ + BC = _mm_set1_epi64x( 0x72D2F24072D2F240 ); \ + BD = _mm_set1_epi64x( 0x75941D9975941D99 ); \ + BE = _mm_set1_epi64x( 0x6D8BDE826D8BDE82 ); \ + BF = _mm_set1_epi64x( 0xA1A7502BA1A7502B ); \ + C0 = _mm_set1_epi64x( 0xD9BF68D1D9BF68D1 ); \ + C1 = _mm_set1_epi64x( 0x58BAD75058BAD750 ); \ + C2 = _mm_set1_epi64x( 0x56028CB256028CB2 ); \ + C3 = _mm_set1_epi64x( 0x8134F3598134F359 ); \ + C4 = _mm_set1_epi64x( 0xB5D469D8B5D469D8 ); \ + C5 = _mm_set1_epi64x( 0x941A8CC2941A8CC2 ); \ + C6 = _mm_set1_epi64x( 0x418B2A6E418B2A6E ); \ + C7 = _mm_set1_epi64x( 0x0405278004052780 ); \ + C8 = _mm_set1_epi64x( 0x7F07D7877F07D787 ); \ + C9 = _mm_set1_epi64x( 0x5194358F5194358F ); \ + CA = _mm_set1_epi64x( 0x3C60D6653C60D665 ); \ + CB = _mm_set1_epi64x( 0xBE97D79ABE97D79A ); \ + CC = _mm_set1_epi64x( 0x950C3434950C3434 ); \ + CD = _mm_set1_epi64x( 0xAED9A06DAED9A06D ); \ + CE = _mm_set1_epi64x( 0x2537DC8D2537DC8D ); \ + CF = _mm_set1_epi64x( 0x7CDB59697CDB5969 ); \ } \ Wlow = (state)->Wlow; \ Whigh = (state)->Whigh; \ @@ -896,6 +896,16 @@ do { \ mm128_swap256_128( BF, CF ); \ } while (0) +#define PERM_ELT( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \ +do { \ + xa0 = mm128_xor3( xm, xb1, mm128_xorandnot( \ + _mm_mullo_epi32( mm128_xor3( xa0, xc, \ + _mm_mullo_epi32( mm128_rol_32( xa1, 15 ), FIVE ) ), THREE ), \ + xb3, xb2 ) ); \ + xb0 = mm128_xnor( xa0, mm128_rol_32( xb0, 1 ) ); \ +} while (0) + +/* #define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) \ do { \ xa0 = _mm_xor_si128( xm, _mm_xor_si128( xb1, _mm_xor_si128( \ @@ -905,6 +915,7 @@ do { \ ) ), THREE ) ) ) ); \ xb0 = mm128_not( _mm_xor_si128( xa0, mm128_rol_32( xb0, 1 ) ) ); \ } while (0) +*/ #define PERM_STEP_0 do { \ PERM_ELT(A0, AB, B0, BD, B9, B6, C8, M0); \ @@ -1078,103 +1089,103 @@ shabal_4way_init( void *cc, unsigned size ) { // copy immediate constants directly to working registers later. sc->state_loaded = false; /* - sc->A[ 0] = m128_const1_64( 0x20728DFD20728DFD ); - sc->A[ 1] = m128_const1_64( 0x46C0BD5346C0BD53 ); - sc->A[ 2] = m128_const1_64( 0xE782B699E782B699 ); - sc->A[ 3] = m128_const1_64( 0x5530463255304632 ); - sc->A[ 4] = m128_const1_64( 0x71B4EF9071B4EF90 ); - sc->A[ 5] = m128_const1_64( 0x0EA9E82C0EA9E82C ); - sc->A[ 6] = m128_const1_64( 0xDBB930F1DBB930F1 ); - sc->A[ 7] = m128_const1_64( 0xFAD06B8BFAD06B8B ); - sc->A[ 8] = m128_const1_64( 0xBE0CAE40BE0CAE40 ); - sc->A[ 9] = m128_const1_64( 0x8BD144108BD14410 ); - sc->A[10] = m128_const1_64( 0x76D2ADAC76D2ADAC ); - sc->A[11] = m128_const1_64( 0x28ACAB7F28ACAB7F ); - - sc->B[ 0] = m128_const1_64( 0xC1099CB7C1099CB7 ); - sc->B[ 1] = m128_const1_64( 0x07B385F307B385F3 ); - sc->B[ 2] = m128_const1_64( 0xE7442C26E7442C26 ); - sc->B[ 3] = m128_const1_64( 0xCC8AD640CC8AD640 ); - sc->B[ 4] = m128_const1_64( 0xEB6F56C7EB6F56C7 ); - sc->B[ 5] = m128_const1_64( 0x1EA81AA91EA81AA9 ); - sc->B[ 6] = m128_const1_64( 0x73B9D31473B9D314 ); - sc->B[ 7] = m128_const1_64( 0x1DE85D081DE85D08 ); - sc->B[ 8] = m128_const1_64( 0x48910A5A48910A5A ); - sc->B[ 9] = m128_const1_64( 0x893B22DB893B22DB ); - sc->B[10] = m128_const1_64( 0xC5A0DF44C5A0DF44 ); - sc->B[11] = m128_const1_64( 0xBBC4324EBBC4324E ); - sc->B[12] = m128_const1_64( 0x72D2F24072D2F240 ); - sc->B[13] = m128_const1_64( 0x75941D9975941D99 ); - sc->B[14] = m128_const1_64( 0x6D8BDE826D8BDE82 ); - sc->B[15] = m128_const1_64( 0xA1A7502BA1A7502B ); - - sc->C[ 0] = m128_const1_64( 0xD9BF68D1D9BF68D1 ); - sc->C[ 1] = m128_const1_64( 0x58BAD75058BAD750 ); - sc->C[ 2] = m128_const1_64( 0x56028CB256028CB2 ); - sc->C[ 3] = m128_const1_64( 0x8134F3598134F359 ); - sc->C[ 4] = m128_const1_64( 0xB5D469D8B5D469D8 ); - sc->C[ 5] = m128_const1_64( 0x941A8CC2941A8CC2 ); - sc->C[ 6] = m128_const1_64( 0x418B2A6E418B2A6E ); - sc->C[ 7] = m128_const1_64( 0x0405278004052780 ); - sc->C[ 8] = m128_const1_64( 0x7F07D7877F07D787 ); - sc->C[ 9] = m128_const1_64( 0x5194358F5194358F ); - sc->C[10] = m128_const1_64( 0x3C60D6653C60D665 ); - sc->C[11] = m128_const1_64( 0xBE97D79ABE97D79A ); - sc->C[12] = m128_const1_64( 0x950C3434950C3434 ); - sc->C[13] = m128_const1_64( 0xAED9A06DAED9A06D ); - sc->C[14] = m128_const1_64( 0x2537DC8D2537DC8D ); - sc->C[15] = m128_const1_64( 0x7CDB59697CDB5969 ); + sc->A[ 0] = _mm_set1_epi64x( 0x20728DFD20728DFD ); + sc->A[ 1] = _mm_set1_epi64x( 0x46C0BD5346C0BD53 ); + sc->A[ 2] = _mm_set1_epi64x( 0xE782B699E782B699 ); + sc->A[ 3] = _mm_set1_epi64x( 0x5530463255304632 ); + sc->A[ 4] = _mm_set1_epi64x( 0x71B4EF9071B4EF90 ); + sc->A[ 5] = _mm_set1_epi64x( 0x0EA9E82C0EA9E82C ); + sc->A[ 6] = _mm_set1_epi64x( 0xDBB930F1DBB930F1 ); + sc->A[ 7] = _mm_set1_epi64x( 0xFAD06B8BFAD06B8B ); + sc->A[ 8] = _mm_set1_epi64x( 0xBE0CAE40BE0CAE40 ); + sc->A[ 9] = _mm_set1_epi64x( 0x8BD144108BD14410 ); + sc->A[10] = _mm_set1_epi64x( 0x76D2ADAC76D2ADAC ); + sc->A[11] = _mm_set1_epi64x( 0x28ACAB7F28ACAB7F ); + + sc->B[ 0] = _mm_set1_epi64x( 0xC1099CB7C1099CB7 ); + sc->B[ 1] = _mm_set1_epi64x( 0x07B385F307B385F3 ); + sc->B[ 2] = _mm_set1_epi64x( 0xE7442C26E7442C26 ); + sc->B[ 3] = _mm_set1_epi64x( 0xCC8AD640CC8AD640 ); + sc->B[ 4] = _mm_set1_epi64x( 0xEB6F56C7EB6F56C7 ); + sc->B[ 5] = _mm_set1_epi64x( 0x1EA81AA91EA81AA9 ); + sc->B[ 6] = _mm_set1_epi64x( 0x73B9D31473B9D314 ); + sc->B[ 7] = _mm_set1_epi64x( 0x1DE85D081DE85D08 ); + sc->B[ 8] = _mm_set1_epi64x( 0x48910A5A48910A5A ); + sc->B[ 9] = _mm_set1_epi64x( 0x893B22DB893B22DB ); + sc->B[10] = _mm_set1_epi64x( 0xC5A0DF44C5A0DF44 ); + sc->B[11] = _mm_set1_epi64x( 0xBBC4324EBBC4324E ); + sc->B[12] = _mm_set1_epi64x( 0x72D2F24072D2F240 ); + sc->B[13] = _mm_set1_epi64x( 0x75941D9975941D99 ); + sc->B[14] = _mm_set1_epi64x( 0x6D8BDE826D8BDE82 ); + sc->B[15] = _mm_set1_epi64x( 0xA1A7502BA1A7502B ); + + sc->C[ 0] = _mm_set1_epi64x( 0xD9BF68D1D9BF68D1 ); + sc->C[ 1] = _mm_set1_epi64x( 0x58BAD75058BAD750 ); + sc->C[ 2] = _mm_set1_epi64x( 0x56028CB256028CB2 ); + sc->C[ 3] = _mm_set1_epi64x( 0x8134F3598134F359 ); + sc->C[ 4] = _mm_set1_epi64x( 0xB5D469D8B5D469D8 ); + sc->C[ 5] = _mm_set1_epi64x( 0x941A8CC2941A8CC2 ); + sc->C[ 6] = _mm_set1_epi64x( 0x418B2A6E418B2A6E ); + sc->C[ 7] = _mm_set1_epi64x( 0x0405278004052780 ); + sc->C[ 8] = _mm_set1_epi64x( 0x7F07D7877F07D787 ); + sc->C[ 9] = _mm_set1_epi64x( 0x5194358F5194358F ); + sc->C[10] = _mm_set1_epi64x( 0x3C60D6653C60D665 ); + sc->C[11] = _mm_set1_epi64x( 0xBE97D79ABE97D79A ); + sc->C[12] = _mm_set1_epi64x( 0x950C3434950C3434 ); + sc->C[13] = _mm_set1_epi64x( 0xAED9A06DAED9A06D ); + sc->C[14] = _mm_set1_epi64x( 0x2537DC8D2537DC8D ); + sc->C[15] = _mm_set1_epi64x( 0x7CDB59697CDB5969 ); */ } else { // No users sc->state_loaded = true; - sc->A[ 0] = m128_const1_64( 0x52F8455252F84552 ); - sc->A[ 1] = m128_const1_64( 0xE54B7999E54B7999 ); - sc->A[ 2] = m128_const1_64( 0x2D8EE3EC2D8EE3EC ); - sc->A[ 3] = m128_const1_64( 0xB9645191B9645191 ); - sc->A[ 4] = m128_const1_64( 0xE0078B86E0078B86 ); - sc->A[ 5] = m128_const1_64( 0xBB7C44C9BB7C44C9 ); - sc->A[ 6] = m128_const1_64( 0xD2B5C1CAD2B5C1CA ); - sc->A[ 7] = m128_const1_64( 0xB0D2EB8CB0D2EB8C ); - sc->A[ 8] = m128_const1_64( 0x14CE5A4514CE5A45 ); - sc->A[ 9] = m128_const1_64( 0x22AF50DC22AF50DC ); - sc->A[10] = m128_const1_64( 0xEFFDBC6BEFFDBC6B ); - sc->A[11] = m128_const1_64( 0xEB21B74AEB21B74A ); - - sc->B[ 0] = m128_const1_64( 0xB555C6EEB555C6EE ); - sc->B[ 1] = m128_const1_64( 0x3E7105963E710596 ); - sc->B[ 2] = m128_const1_64( 0xA72A652FA72A652F ); - sc->B[ 3] = m128_const1_64( 0x9301515F9301515F ); - sc->B[ 4] = m128_const1_64( 0xDA28C1FADA28C1FA ); - sc->B[ 5] = m128_const1_64( 0x696FD868696FD868 ); - sc->B[ 6] = m128_const1_64( 0x9CB6BF729CB6BF72 ); - sc->B[ 7] = m128_const1_64( 0x0AFE40020AFE4002 ); - sc->B[ 8] = m128_const1_64( 0xA6E03615A6E03615 ); - sc->B[ 9] = m128_const1_64( 0x5138C1D45138C1D4 ); - sc->B[10] = m128_const1_64( 0xBE216306BE216306 ); - sc->B[11] = m128_const1_64( 0xB38B8890B38B8890 ); - sc->B[12] = m128_const1_64( 0x3EA8B96B3EA8B96B ); - sc->B[13] = m128_const1_64( 0x3299ACE43299ACE4 ); - sc->B[14] = m128_const1_64( 0x30924DD430924DD4 ); - sc->B[15] = m128_const1_64( 0x55CB34A555CB34A5 ); - - sc->C[ 0] = m128_const1_64( 0xB405F031B405F031 ); - sc->C[ 1] = m128_const1_64( 0xC4233EBAC4233EBA ); - sc->C[ 2] = m128_const1_64( 0xB3733979B3733979 ); - sc->C[ 3] = m128_const1_64( 0xC0DD9D55C0DD9D55 ); - sc->C[ 4] = m128_const1_64( 0xC51C28AEC51C28AE ); - sc->C[ 5] = m128_const1_64( 0xA327B8E1A327B8E1 ); - sc->C[ 6] = m128_const1_64( 0x56C5616756C56167 ); - sc->C[ 7] = m128_const1_64( 0xED614433ED614433 ); - sc->C[ 8] = m128_const1_64( 0x88B59D6088B59D60 ); - sc->C[ 9] = m128_const1_64( 0x60E2CEBA60E2CEBA ); - sc->C[10] = m128_const1_64( 0x758B4B8B758B4B8B ); - sc->C[11] = m128_const1_64( 0x83E82A7F83E82A7F ); - sc->C[12] = m128_const1_64( 0xBC968828BC968828 ); - sc->C[13] = m128_const1_64( 0xE6E00BF7E6E00BF7 ); - sc->C[14] = m128_const1_64( 0xBA839E55BA839E55 ); - sc->C[15] = m128_const1_64( 0x9B491C609B491C60 ); + sc->A[ 0] = _mm_set1_epi64x( 0x52F8455252F84552 ); + sc->A[ 1] = _mm_set1_epi64x( 0xE54B7999E54B7999 ); + sc->A[ 2] = _mm_set1_epi64x( 0x2D8EE3EC2D8EE3EC ); + sc->A[ 3] = _mm_set1_epi64x( 0xB9645191B9645191 ); + sc->A[ 4] = _mm_set1_epi64x( 0xE0078B86E0078B86 ); + sc->A[ 5] = _mm_set1_epi64x( 0xBB7C44C9BB7C44C9 ); + sc->A[ 6] = _mm_set1_epi64x( 0xD2B5C1CAD2B5C1CA ); + sc->A[ 7] = _mm_set1_epi64x( 0xB0D2EB8CB0D2EB8C ); + sc->A[ 8] = _mm_set1_epi64x( 0x14CE5A4514CE5A45 ); + sc->A[ 9] = _mm_set1_epi64x( 0x22AF50DC22AF50DC ); + sc->A[10] = _mm_set1_epi64x( 0xEFFDBC6BEFFDBC6B ); + sc->A[11] = _mm_set1_epi64x( 0xEB21B74AEB21B74A ); + + sc->B[ 0] = _mm_set1_epi64x( 0xB555C6EEB555C6EE ); + sc->B[ 1] = _mm_set1_epi64x( 0x3E7105963E710596 ); + sc->B[ 2] = _mm_set1_epi64x( 0xA72A652FA72A652F ); + sc->B[ 3] = _mm_set1_epi64x( 0x9301515F9301515F ); + sc->B[ 4] = _mm_set1_epi64x( 0xDA28C1FADA28C1FA ); + sc->B[ 5] = _mm_set1_epi64x( 0x696FD868696FD868 ); + sc->B[ 6] = _mm_set1_epi64x( 0x9CB6BF729CB6BF72 ); + sc->B[ 7] = _mm_set1_epi64x( 0x0AFE40020AFE4002 ); + sc->B[ 8] = _mm_set1_epi64x( 0xA6E03615A6E03615 ); + sc->B[ 9] = _mm_set1_epi64x( 0x5138C1D45138C1D4 ); + sc->B[10] = _mm_set1_epi64x( 0xBE216306BE216306 ); + sc->B[11] = _mm_set1_epi64x( 0xB38B8890B38B8890 ); + sc->B[12] = _mm_set1_epi64x( 0x3EA8B96B3EA8B96B ); + sc->B[13] = _mm_set1_epi64x( 0x3299ACE43299ACE4 ); + sc->B[14] = _mm_set1_epi64x( 0x30924DD430924DD4 ); + sc->B[15] = _mm_set1_epi64x( 0x55CB34A555CB34A5 ); + + sc->C[ 0] = _mm_set1_epi64x( 0xB405F031B405F031 ); + sc->C[ 1] = _mm_set1_epi64x( 0xC4233EBAC4233EBA ); + sc->C[ 2] = _mm_set1_epi64x( 0xB3733979B3733979 ); + sc->C[ 3] = _mm_set1_epi64x( 0xC0DD9D55C0DD9D55 ); + sc->C[ 4] = _mm_set1_epi64x( 0xC51C28AEC51C28AE ); + sc->C[ 5] = _mm_set1_epi64x( 0xA327B8E1A327B8E1 ); + sc->C[ 6] = _mm_set1_epi64x( 0x56C5616756C56167 ); + sc->C[ 7] = _mm_set1_epi64x( 0xED614433ED614433 ); + sc->C[ 8] = _mm_set1_epi64x( 0x88B59D6088B59D60 ); + sc->C[ 9] = _mm_set1_epi64x( 0x60E2CEBA60E2CEBA ); + sc->C[10] = _mm_set1_epi64x( 0x758B4B8B758B4B8B ); + sc->C[11] = _mm_set1_epi64x( 0x83E82A7F83E82A7F ); + sc->C[12] = _mm_set1_epi64x( 0xBC968828BC968828 ); + sc->C[13] = _mm_set1_epi64x( 0xE6E00BF7E6E00BF7 ); + sc->C[14] = _mm_set1_epi64x( 0xBA839E55BA839E55 ); + sc->C[15] = _mm_set1_epi64x( 0x9B491C609B491C60 ); } sc->Wlow = 1; sc->Whigh = 0; diff --git a/algo/shavite/shavite-hash-2way.c b/algo/shavite/shavite-hash-2way.c index 3d8ac940..b96a393a 100644 --- a/algo/shavite/shavite-hash-2way.c +++ b/algo/shavite/shavite-hash-2way.c @@ -32,6 +32,44 @@ static const uint32_t IV512[] = #endif +#if defined (__AVX512VL__) +//TODO Enable for AVX10_256 + +#define DECL_m256i_count \ + const __m256i count = \ + mm256_set4_32( ctx->count3, ctx->count2, ctx->count1, ctx->count0 ); + +#define COUNT_R0 \ + _mm256_mask_xor_epi32( count, 0x88, count, m256_neg1 ) + +#define COUNT_R1 \ + mm256_shuflr128_32( _mm256_mask_xor_epi32( count, 0x11, count, m256_neg1 ) ) + +#define COUNT_R2 \ + mm256_swap128_64( _mm256_mask_xor_epi32( count, 0x22, count, m256_neg1 ) ) + +#define COUNT_R13 \ + mm256_swap64_32( _mm256_mask_xor_epi32( count, 0x44, count, m256_neg1 ) ) + +#else + +#define DECL_m256i_count + +// R matches the loop index not the round number, should changet that +#define COUNT_R0 \ + mm256_set4_32( ~ctx->count3, ctx->count2, ctx->count1, ctx->count0 ) + +#define COUNT_R1 \ + mm256_set4_32( ~ctx->count0, ctx->count1, ctx->count2, ctx->count3 ) + +#define COUNT_R2 \ + mm256_set4_32( ~ctx->count1, ctx->count0, ctx->count3, ctx->count2 ) + +#define COUNT_R13 \ + mm256_set4_32( ~ctx->count2, ctx->count3, ctx->count0, ctx->count1 ) + +#endif + static void c512_2way( shavite512_2way_context *ctx, const void *msg ) { @@ -40,6 +78,7 @@ c512_2way( shavite512_2way_context *ctx, const void *msg ) __m256i k00, k01, k02, k03, k10, k11, k12, k13; __m256i *m = (__m256i*)msg; __m256i *h = (__m256i*)ctx->h; + DECL_m256i_count; int r; p0 = h[0]; @@ -47,7 +86,8 @@ c512_2way( shavite512_2way_context *ctx, const void *msg ) p2 = h[2]; p3 = h[3]; - // round + // round 0 + k00 = m[0]; x = mm256_aesenc_2x128( _mm256_xor_si256( p1, k00 ), zero ); k01 = m[1]; @@ -78,18 +118,14 @@ c512_2way( shavite512_2way_context *ctx, const void *msg ) mm256_aesenc_2x128( k00, zero ) ) ); if ( r == 0 ) - k00 = _mm256_xor_si256( k00, _mm256_set_epi32( - ~ctx->count3, ctx->count2, ctx->count1, ctx->count0, - ~ctx->count3, ctx->count2, ctx->count1, ctx->count0 ) ); + k00 = _mm256_xor_si256( k00, COUNT_R0 ); x = mm256_aesenc_2x128( _mm256_xor_si256( p0, k00 ), zero ); k01 = _mm256_xor_si256( k00, mm256_shuflr128_32( mm256_aesenc_2x128( k01, zero ) ) ); if ( r == 1 ) - k01 = _mm256_xor_si256( k01, _mm256_set_epi32( - ~ctx->count0, ctx->count1, ctx->count2, ctx->count3, - ~ctx->count0, ctx->count1, ctx->count2, ctx->count3 ) ); + k01 = _mm256_xor_si256( k01, COUNT_R1 ); x = mm256_aesenc_2x128( _mm256_xor_si256( x, k01 ), zero ); k02 = _mm256_xor_si256( k01, @@ -114,9 +150,7 @@ c512_2way( shavite512_2way_context *ctx, const void *msg ) mm256_shuflr128_32( mm256_aesenc_2x128( k13, zero ) ) ); if ( r == 2 ) - k13 = _mm256_xor_si256( k13, _mm256_set_epi32( - ~ctx->count1, ctx->count0, ctx->count3, ctx->count2, - ~ctx->count1, ctx->count0, ctx->count3, ctx->count2 ) ); + k13 = _mm256_xor_si256( k13, COUNT_R2 ); x = mm256_aesenc_2x128( _mm256_xor_si256( x, k13 ), zero ); p1 = _mm256_xor_si256( p1, x ); @@ -228,9 +262,7 @@ c512_2way( shavite512_2way_context *ctx, const void *msg ) x = mm256_aesenc_2x128( _mm256_xor_si256( x, k11 ), zero ); k12 = mm256_shuflr128_32( mm256_aesenc_2x128( k12, zero ) ); - k12 = _mm256_xor_si256( k12, _mm256_xor_si256( k11, _mm256_set_epi32( - ~ctx->count2, ctx->count3, ctx->count0, ctx->count1, - ~ctx->count2, ctx->count3, ctx->count0, ctx->count1 ) ) ); + k12 = _mm256_xor_si256( k12, _mm256_xor_si256( k11, COUNT_R13 ) ); x = mm256_aesenc_2x128( _mm256_xor_si256( x, k12 ), zero ); k13 = _mm256_xor_si256( mm256_shuflr128_32( diff --git a/algo/shavite/shavite-hash-4way.c b/algo/shavite/shavite-hash-4way.c index 962b20b4..2e95e93d 100644 --- a/algo/shavite/shavite-hash-4way.c +++ b/algo/shavite/shavite-hash-4way.c @@ -204,11 +204,9 @@ c512_4way( shavite512_4way_context *ctx, const void *msg ) K5 = _mm512_xor_si512( mm512_shuflr128_32( _mm512_aesenc_epi128( K5, m512_zero ) ), K4 ); X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero ); - K6 = mm512_shuflr128_32( _mm512_aesenc_epi128( K6, m512_zero ) ); - K6 = _mm512_xor_si512( K6, _mm512_xor_si512( K5, _mm512_set4_epi32( - ~ctx->count2, ctx->count3, ctx->count0, ctx->count1 ) ) ); - + K6 = _mm512_xor_si512( K6, _mm512_xor_si512( K5, mm512_swap64_32( + _mm512_mask_xor_epi32( count, 0x4444, count, m512_neg1 ) ) ) ); X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero ); K7= _mm512_xor_si512( mm512_shuflr128_32( _mm512_aesenc_epi128( K7, m512_zero ) ), K6 ); diff --git a/algo/simd/simd-hash-2way.c b/algo/simd/simd-hash-2way.c index 67eb5d68..f3b6bbb9 100644 --- a/algo/simd/simd-hash-2way.c +++ b/algo/simd/simd-hash-2way.c @@ -212,14 +212,24 @@ do { \ // targetted #define shufxor2w(x,s) _mm256_shuffle_epi32( x, XCAT( SHUFXOR_, s )) +#if defined(__AVX512VL__) +//TODO Enable for AVX10_256 + +#define REDUCE(x) \ + _mm256_sub_epi16( _mm256_maskz_mov_epi8( 0x55555555, x ), \ + _mm256_srai_epi16( x, 8 ) ) +#else + #define REDUCE(x) \ - _mm256_sub_epi16( _mm256_and_si256( x, m256_const1_64( \ + _mm256_sub_epi16( _mm256_and_si256( x, _mm256_set1_epi64x( \ 0x00ff00ff00ff00ff ) ), _mm256_srai_epi16( x, 8 ) ) +#endif + #define EXTRA_REDUCE_S(x)\ _mm256_sub_epi16( x, _mm256_and_si256( \ - m256_const1_64( 0x0101010101010101 ), \ - _mm256_cmpgt_epi16( x, m256_const1_64( 0x0080008000800080 ) ) ) ) + _mm256_set1_epi64x( 0x0101010101010101 ), \ + _mm256_cmpgt_epi16( x, _mm256_set1_epi64x( 0x0080008000800080 ) ) ) ) #define REDUCE_FULL_S( x ) EXTRA_REDUCE_S( REDUCE (x ) ) @@ -387,17 +397,11 @@ static const m512_v16 FFT256_Twiddle4w[] = _mm512_sub_epi16( _mm512_maskz_mov_epi8( 0x5555555555555555, x ), \ _mm512_srai_epi16( x, 8 ) ) -/* -#define REDUCE4w(x) \ - _mm512_sub_epi16( _mm512_and_si512( x, m512_const1_64( \ - 0x00ff00ff00ff00ff ) ), _mm512_srai_epi16( x, 8 ) ) -*/ - #define EXTRA_REDUCE_S4w(x) \ _mm512_sub_epi16( x, _mm512_and_si512( \ - m512_const1_64( 0x0101010101010101 ), \ + _mm512_set1_epi64( 0x0101010101010101 ), \ _mm512_movm_epi16( _mm512_cmpgt_epi16_mask( \ - x, m512_const1_64( 0x0080008000800080 ) ) ) ) ) + x, _mm512_set1_epi64( 0x0080008000800080 ) ) ) ) ) // generic, except it calls targetted macros #define REDUCE_FULL_S4w( x ) EXTRA_REDUCE_S4w( REDUCE4w (x ) ) diff --git a/algo/skein/skein-4way.c b/algo/skein/skein-4way.c index 5a7cdbda..4e781681 100644 --- a/algo/skein/skein-4way.c +++ b/algo/skein/skein-4way.c @@ -63,7 +63,7 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); @@ -151,7 +151,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); diff --git a/algo/skein/skein-hash-4way.c b/algo/skein/skein-hash-4way.c index 390c74af..ee4bf095 100644 --- a/algo/skein/skein-hash-4way.c +++ b/algo/skein/skein-hash-4way.c @@ -285,7 +285,7 @@ static const uint64_t IV512[] = { #define SKBI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i)) #define SKBT(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v)) -#define READ_STATE_BIG(sc) do { \ +#define READ_STATE_BIG(sc) \ h0 = (sc)->h0; \ h1 = (sc)->h1; \ h2 = (sc)->h2; \ @@ -294,10 +294,9 @@ static const uint64_t IV512[] = { h5 = (sc)->h5; \ h6 = (sc)->h6; \ h7 = (sc)->h7; \ - bcount = sc->bcount; \ - } while (0) + bcount = sc->bcount; -#define WRITE_STATE_BIG(sc) do { \ +#define WRITE_STATE_BIG(sc) \ (sc)->h0 = h0; \ (sc)->h1 = h1; \ (sc)->h2 = h2; \ @@ -306,62 +305,54 @@ static const uint64_t IV512[] = { (sc)->h5 = h5; \ (sc)->h6 = h6; \ (sc)->h7 = h7; \ - sc->bcount = bcount; \ - } while (0) + sc->bcount = bcount; #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #define TFBIG_KINIT_8WAY( k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2 ) \ -do { \ - k8 = mm512_xor3( mm512_xor3( k0, k1, k2 ), mm512_xor3( k3, k4, k5 ), \ - mm512_xor3( k6, k7, m512_const1_64( 0x1BD11BDAA9FC1A22) ));\ - t2 = t0 ^ t1; \ -} while (0) + k8 = mm512_xor3( mm512_xor3( k0, k1, k2 ), \ + mm512_xor3( k3, k4, k5 ), \ + mm512_xor3( k6, k7, \ + _mm512_set1_epi64( 0x1BD11BDAA9FC1A22) ) ); \ + t2 = t0 ^ t1; #define TFBIG_ADDKEY_8WAY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) \ -do { \ w0 = _mm512_add_epi64( w0, SKBI(k,s,0) ); \ w1 = _mm512_add_epi64( w1, SKBI(k,s,1) ); \ w2 = _mm512_add_epi64( w2, SKBI(k,s,2) ); \ w3 = _mm512_add_epi64( w3, SKBI(k,s,3) ); \ w4 = _mm512_add_epi64( w4, SKBI(k,s,4) ); \ w5 = _mm512_add_epi64( w5, _mm512_add_epi64( SKBI(k,s,5), \ - m512_const1_64( SKBT(t,s,0) ) ) ); \ + _mm512_set1_epi64( SKBT(t,s,0) ) ) ); \ w6 = _mm512_add_epi64( w6, _mm512_add_epi64( SKBI(k,s,6), \ - m512_const1_64( SKBT(t,s,1) ) ) ); \ + _mm512_set1_epi64( SKBT(t,s,1) ) ) ); \ w7 = _mm512_add_epi64( w7, _mm512_add_epi64( SKBI(k,s,7), \ - m512_const1_64( s ) ) ); \ -} while (0) + _mm512_set1_epi64( s ) ) ); #define TFBIG_MIX_8WAY(x0, x1, rc) \ -do { \ x0 = _mm512_add_epi64( x0, x1 ); \ - x1 = _mm512_xor_si512( mm512_rol_64( x1, rc ), x0 ); \ -} while (0) + x1 = _mm512_xor_si512( mm512_rol_64( x1, rc ), x0 ); -#define TFBIG_MIX8_8WAY(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) do { \ +#define TFBIG_MIX8_8WAY(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) \ TFBIG_MIX_8WAY(w0, w1, rc0); \ TFBIG_MIX_8WAY(w2, w3, rc1); \ TFBIG_MIX_8WAY(w4, w5, rc2); \ - TFBIG_MIX_8WAY(w6, w7, rc3); \ - } while (0) + TFBIG_MIX_8WAY(w6, w7, rc3); -#define TFBIG_8WAY_4e(s) do { \ +#define TFBIG_8WAY_4e(s) \ TFBIG_ADDKEY_8WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \ TFBIG_MIX8_8WAY(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \ TFBIG_MIX8_8WAY(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \ TFBIG_MIX8_8WAY(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \ - TFBIG_MIX8_8WAY(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); \ - } while (0) + TFBIG_MIX8_8WAY(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); -#define TFBIG_8WAY_4o(s) do { \ +#define TFBIG_8WAY_4o(s) \ TFBIG_ADDKEY_8WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \ TFBIG_MIX8_8WAY(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \ TFBIG_MIX8_8WAY(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \ TFBIG_MIX8_8WAY(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \ - TFBIG_MIX8_8WAY(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); \ - } while (0) + TFBIG_MIX8_8WAY(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); #define UBI_BIG_8WAY(etype, extra) \ do { \ @@ -424,59 +415,48 @@ do { \ #endif // AVX512 #define TFBIG_KINIT_4WAY( k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2 ) \ -do { \ - k8 = _mm256_xor_si256( _mm256_xor_si256( \ - _mm256_xor_si256( _mm256_xor_si256( k0, k1 ), \ - _mm256_xor_si256( k2, k3 ) ), \ - _mm256_xor_si256( _mm256_xor_si256( k4, k5 ), \ - _mm256_xor_si256( k6, k7 ) ) ), \ - m256_const1_64( 0x1BD11BDAA9FC1A22) ); \ - t2 = t0 ^ t1; \ -} while (0) + k8 = mm256_xor3( mm256_xor3( k0, k1, k2 ), \ + mm256_xor3( k3, k4, k5 ), \ + mm256_xor3( k6, k7, \ + _mm256_set1_epi64x( 0x1BD11BDAA9FC1A22) ) ); \ + t2 = t0 ^ t1; #define TFBIG_ADDKEY_4WAY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) \ -do { \ w0 = _mm256_add_epi64( w0, SKBI(k,s,0) ); \ w1 = _mm256_add_epi64( w1, SKBI(k,s,1) ); \ w2 = _mm256_add_epi64( w2, SKBI(k,s,2) ); \ w3 = _mm256_add_epi64( w3, SKBI(k,s,3) ); \ w4 = _mm256_add_epi64( w4, SKBI(k,s,4) ); \ w5 = _mm256_add_epi64( w5, _mm256_add_epi64( SKBI(k,s,5), \ - m256_const1_64( SKBT(t,s,0) ) ) ); \ + _mm256_set1_epi64x( SKBT(t,s,0) ) ) ); \ w6 = _mm256_add_epi64( w6, _mm256_add_epi64( SKBI(k,s,6), \ - m256_const1_64( SKBT(t,s,1) ) ) ); \ + _mm256_set1_epi64x( SKBT(t,s,1) ) ) ); \ w7 = _mm256_add_epi64( w7, _mm256_add_epi64( SKBI(k,s,7), \ - m256_const1_64( s ) ) ); \ -} while (0) + _mm256_set1_epi64x( s ) ) ); #define TFBIG_MIX_4WAY(x0, x1, rc) \ -do { \ x0 = _mm256_add_epi64( x0, x1 ); \ - x1 = _mm256_xor_si256( mm256_rol_64( x1, rc ), x0 ); \ -} while (0) + x1 = _mm256_xor_si256( mm256_rol_64( x1, rc ), x0 ); -#define TFBIG_MIX8_4WAY(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) do { \ +#define TFBIG_MIX8_4WAY(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) \ TFBIG_MIX_4WAY(w0, w1, rc0); \ TFBIG_MIX_4WAY(w2, w3, rc1); \ TFBIG_MIX_4WAY(w4, w5, rc2); \ - TFBIG_MIX_4WAY(w6, w7, rc3); \ - } while (0) + TFBIG_MIX_4WAY(w6, w7, rc3); -#define TFBIG_4WAY_4e(s) do { \ +#define TFBIG_4WAY_4e(s) \ TFBIG_ADDKEY_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \ TFBIG_MIX8_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \ TFBIG_MIX8_4WAY(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \ TFBIG_MIX8_4WAY(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \ - TFBIG_MIX8_4WAY(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); \ - } while (0) + TFBIG_MIX8_4WAY(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); -#define TFBIG_4WAY_4o(s) do { \ +#define TFBIG_4WAY_4o(s) \ TFBIG_ADDKEY_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \ TFBIG_MIX8_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \ TFBIG_MIX8_4WAY(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \ TFBIG_MIX8_4WAY(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \ - TFBIG_MIX8_4WAY(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); \ - } while (0) + TFBIG_MIX8_4WAY(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); // scale buf offset by 4 #define UBI_BIG_4WAY(etype, extra) \ @@ -541,28 +521,28 @@ do { \ void skein256_8way_init( skein256_8way_context *sc ) { - sc->h0 = m512_const1_64( 0xCCD044A12FDB3E13 ); - sc->h1 = m512_const1_64( 0xE83590301A79A9EB ); - sc->h2 = m512_const1_64( 0x55AEA0614F816E6F ); - sc->h3 = m512_const1_64( 0x2A2767A4AE9B94DB ); - sc->h4 = m512_const1_64( 0xEC06025E74DD7683 ); - sc->h5 = m512_const1_64( 0xE7A436CDC4746251 ); - sc->h6 = m512_const1_64( 0xC36FBAF9393AD185 ); - sc->h7 = m512_const1_64( 0x3EEDBA1833EDFC13 ); + sc->h0 = _mm512_set1_epi64( 0xCCD044A12FDB3E13 ); + sc->h1 = _mm512_set1_epi64( 0xE83590301A79A9EB ); + sc->h2 = _mm512_set1_epi64( 0x55AEA0614F816E6F ); + sc->h3 = _mm512_set1_epi64( 0x2A2767A4AE9B94DB ); + sc->h4 = _mm512_set1_epi64( 0xEC06025E74DD7683 ); + sc->h5 = _mm512_set1_epi64( 0xE7A436CDC4746251 ); + sc->h6 = _mm512_set1_epi64( 0xC36FBAF9393AD185 ); + sc->h7 = _mm512_set1_epi64( 0x3EEDBA1833EDFC13 ); sc->bcount = 0; sc->ptr = 0; } void skein512_8way_init( skein512_8way_context *sc ) { - sc->h0 = m512_const1_64( 0x4903ADFF749C51CE ); - sc->h1 = m512_const1_64( 0x0D95DE399746DF03 ); - sc->h2 = m512_const1_64( 0x8FD1934127C79BCE ); - sc->h3 = m512_const1_64( 0x9A255629FF352CB1 ); - sc->h4 = m512_const1_64( 0x5DB62599DF6CA7B0 ); - sc->h5 = m512_const1_64( 0xEABE394CA9D5C3F4 ); - sc->h6 = m512_const1_64( 0x991112C71A75B523 ); - sc->h7 = m512_const1_64( 0xAE18A40B660FCC33 ); + sc->h0 = _mm512_set1_epi64( 0x4903ADFF749C51CE ); + sc->h1 = _mm512_set1_epi64( 0x0D95DE399746DF03 ); + sc->h2 = _mm512_set1_epi64( 0x8FD1934127C79BCE ); + sc->h3 = _mm512_set1_epi64( 0x9A255629FF352CB1 ); + sc->h4 = _mm512_set1_epi64( 0x5DB62599DF6CA7B0 ); + sc->h5 = _mm512_set1_epi64( 0xEABE394CA9D5C3F4 ); + sc->h6 = _mm512_set1_epi64( 0x991112C71A75B523 ); + sc->h7 = _mm512_set1_epi64( 0xAE18A40B660FCC33 ); sc->bcount = 0; sc->ptr = 0; } @@ -660,14 +640,14 @@ void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data, // Init - h0 = m512_const1_64( 0x4903ADFF749C51CE ); - h1 = m512_const1_64( 0x0D95DE399746DF03 ); - h2 = m512_const1_64( 0x8FD1934127C79BCE ); - h3 = m512_const1_64( 0x9A255629FF352CB1 ); - h4 = m512_const1_64( 0x5DB62599DF6CA7B0 ); - h5 = m512_const1_64( 0xEABE394CA9D5C3F4 ); - h6 = m512_const1_64( 0x991112C71A75B523 ); - h7 = m512_const1_64( 0xAE18A40B660FCC33 ); + h0 = _mm512_set1_epi64( 0x4903ADFF749C51CE ); + h1 = _mm512_set1_epi64( 0x0D95DE399746DF03 ); + h2 = _mm512_set1_epi64( 0x8FD1934127C79BCE ); + h3 = _mm512_set1_epi64( 0x9A255629FF352CB1 ); + h4 = _mm512_set1_epi64( 0x5DB62599DF6CA7B0 ); + h5 = _mm512_set1_epi64( 0xEABE394CA9D5C3F4 ); + h6 = _mm512_set1_epi64( 0x991112C71A75B523 ); + h7 = _mm512_set1_epi64( 0xAE18A40B660FCC33 ); // Update @@ -734,14 +714,14 @@ skein512_8way_prehash64( skein512_8way_context *sc, const void *data ) buf[5] = vdata[5]; buf[6] = vdata[6]; buf[7] = vdata[7]; - register __m512i h0 = m512_const1_64( 0x4903ADFF749C51CE ); - register __m512i h1 = m512_const1_64( 0x0D95DE399746DF03 ); - register __m512i h2 = m512_const1_64( 0x8FD1934127C79BCE ); - register __m512i h3 = m512_const1_64( 0x9A255629FF352CB1 ); - register __m512i h4 = m512_const1_64( 0x5DB62599DF6CA7B0 ); - register __m512i h5 = m512_const1_64( 0xEABE394CA9D5C3F4 ); - register __m512i h6 = m512_const1_64( 0x991112C71A75B523 ); - register __m512i h7 = m512_const1_64( 0xAE18A40B660FCC33 ); + register __m512i h0 = _mm512_set1_epi64( 0x4903ADFF749C51CE ); + register __m512i h1 = _mm512_set1_epi64( 0x0D95DE399746DF03 ); + register __m512i h2 = _mm512_set1_epi64( 0x8FD1934127C79BCE ); + register __m512i h3 = _mm512_set1_epi64( 0x9A255629FF352CB1 ); + register __m512i h4 = _mm512_set1_epi64( 0x5DB62599DF6CA7B0 ); + register __m512i h5 = _mm512_set1_epi64( 0xEABE394CA9D5C3F4 ); + register __m512i h6 = _mm512_set1_epi64( 0x991112C71A75B523 ); + register __m512i h7 = _mm512_set1_epi64( 0xAE18A40B660FCC33 ); uint64_t bcount = 1; UBI_BIG_8WAY( 224, 0 ); @@ -830,28 +810,28 @@ skein512_8way_close(void *cc, void *dst) void skein256_4way_init( skein256_4way_context *sc ) { - sc->h0 = m256_const1_64( 0xCCD044A12FDB3E13 ); - sc->h1 = m256_const1_64( 0xE83590301A79A9EB ); - sc->h2 = m256_const1_64( 0x55AEA0614F816E6F ); - sc->h3 = m256_const1_64( 0x2A2767A4AE9B94DB ); - sc->h4 = m256_const1_64( 0xEC06025E74DD7683 ); - sc->h5 = m256_const1_64( 0xE7A436CDC4746251 ); - sc->h6 = m256_const1_64( 0xC36FBAF9393AD185 ); - sc->h7 = m256_const1_64( 0x3EEDBA1833EDFC13 ); + sc->h0 = _mm256_set1_epi64x( 0xCCD044A12FDB3E13 ); + sc->h1 = _mm256_set1_epi64x( 0xE83590301A79A9EB ); + sc->h2 = _mm256_set1_epi64x( 0x55AEA0614F816E6F ); + sc->h3 = _mm256_set1_epi64x( 0x2A2767A4AE9B94DB ); + sc->h4 = _mm256_set1_epi64x( 0xEC06025E74DD7683 ); + sc->h5 = _mm256_set1_epi64x( 0xE7A436CDC4746251 ); + sc->h6 = _mm256_set1_epi64x( 0xC36FBAF9393AD185 ); + sc->h7 = _mm256_set1_epi64x( 0x3EEDBA1833EDFC13 ); sc->bcount = 0; sc->ptr = 0; } void skein512_4way_init( skein512_4way_context *sc ) { - sc->h0 = m256_const1_64( 0x4903ADFF749C51CE ); - sc->h1 = m256_const1_64( 0x0D95DE399746DF03 ); - sc->h2 = m256_const1_64( 0x8FD1934127C79BCE ); - sc->h3 = m256_const1_64( 0x9A255629FF352CB1 ); - sc->h4 = m256_const1_64( 0x5DB62599DF6CA7B0 ); - sc->h5 = m256_const1_64( 0xEABE394CA9D5C3F4 ); - sc->h6 = m256_const1_64( 0x991112C71A75B523 ); - sc->h7 = m256_const1_64( 0xAE18A40B660FCC33 ); + sc->h0 = _mm256_set1_epi64x( 0x4903ADFF749C51CE ); + sc->h1 = _mm256_set1_epi64x( 0x0D95DE399746DF03 ); + sc->h2 = _mm256_set1_epi64x( 0x8FD1934127C79BCE ); + sc->h3 = _mm256_set1_epi64x( 0x9A255629FF352CB1 ); + sc->h4 = _mm256_set1_epi64x( 0x5DB62599DF6CA7B0 ); + sc->h5 = _mm256_set1_epi64x( 0xEABE394CA9D5C3F4 ); + sc->h6 = _mm256_set1_epi64x( 0x991112C71A75B523 ); + sc->h7 = _mm256_set1_epi64x( 0xAE18A40B660FCC33 ); sc->bcount = 0; sc->ptr = 0; } @@ -954,14 +934,14 @@ skein512_4way_full( skein512_4way_context *sc, void *out, const void *data, const int buf_size = 64; // 64 * __m256i uint64_t bcount = 0; - h0 = m256_const1_64( 0x4903ADFF749C51CE ); - h1 = m256_const1_64( 0x0D95DE399746DF03 ); - h2 = m256_const1_64( 0x8FD1934127C79BCE ); - h3 = m256_const1_64( 0x9A255629FF352CB1 ); - h4 = m256_const1_64( 0x5DB62599DF6CA7B0 ); - h5 = m256_const1_64( 0xEABE394CA9D5C3F4 ); - h6 = m256_const1_64( 0x991112C71A75B523 ); - h7 = m256_const1_64( 0xAE18A40B660FCC33 ); + h0 = _mm256_set1_epi64x( 0x4903ADFF749C51CE ); + h1 = _mm256_set1_epi64x( 0x0D95DE399746DF03 ); + h2 = _mm256_set1_epi64x( 0x8FD1934127C79BCE ); + h3 = _mm256_set1_epi64x( 0x9A255629FF352CB1 ); + h4 = _mm256_set1_epi64x( 0x5DB62599DF6CA7B0 ); + h5 = _mm256_set1_epi64x( 0xEABE394CA9D5C3F4 ); + h6 = _mm256_set1_epi64x( 0x991112C71A75B523 ); + h7 = _mm256_set1_epi64x( 0xAE18A40B660FCC33 ); // Update @@ -1028,14 +1008,14 @@ skein512_4way_prehash64( skein512_4way_context *sc, const void *data ) buf[5] = vdata[5]; buf[6] = vdata[6]; buf[7] = vdata[7]; - register __m256i h0 = m256_const1_64( 0x4903ADFF749C51CE ); - register __m256i h1 = m256_const1_64( 0x0D95DE399746DF03 ); - register __m256i h2 = m256_const1_64( 0x8FD1934127C79BCE ); - register __m256i h3 = m256_const1_64( 0x9A255629FF352CB1 ); - register __m256i h4 = m256_const1_64( 0x5DB62599DF6CA7B0 ); - register __m256i h5 = m256_const1_64( 0xEABE394CA9D5C3F4 ); - register __m256i h6 = m256_const1_64( 0x991112C71A75B523 ); - register __m256i h7 = m256_const1_64( 0xAE18A40B660FCC33 ); + register __m256i h0 = _mm256_set1_epi64x( 0x4903ADFF749C51CE ); + register __m256i h1 = _mm256_set1_epi64x( 0x0D95DE399746DF03 ); + register __m256i h2 = _mm256_set1_epi64x( 0x8FD1934127C79BCE ); + register __m256i h3 = _mm256_set1_epi64x( 0x9A255629FF352CB1 ); + register __m256i h4 = _mm256_set1_epi64x( 0x5DB62599DF6CA7B0 ); + register __m256i h5 = _mm256_set1_epi64x( 0xEABE394CA9D5C3F4 ); + register __m256i h6 = _mm256_set1_epi64x( 0x991112C71A75B523 ); + register __m256i h7 = _mm256_set1_epi64x( 0xAE18A40B660FCC33 ); uint64_t bcount = 1; UBI_BIG_4WAY( 224, 0 ); diff --git a/algo/skein/skein2-4way.c b/algo/skein/skein2-4way.c index 2eb0deb6..16b1627f 100644 --- a/algo/skein/skein2-4way.c +++ b/algo/skein/skein2-4way.c @@ -57,7 +57,7 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) ); @@ -119,7 +119,7 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( (n < last_nonce) && !work_restart[thr_id].restart ); diff --git a/algo/swifftx/swifftx.c b/algo/swifftx/swifftx.c index c7d8c727..d429bbc2 100644 --- a/algo/swifftx/swifftx.c +++ b/algo/swifftx/swifftx.c @@ -630,36 +630,35 @@ void InitializeSWIFFTX() } // In the original code the F matrix is rotated so it was not aranged -// the same as all the other data. Rearanging F to match all the other -// data made vectorizing possible, the compiler probably could have been -// able to auto-vectorize with proper data organisation. -// Also in the original code the custom 16 bit data types are all now 32 -// bit int32_t regardless of the type name. -// -void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) +// the same as the other data. Rearanging F made vectorizing up to 256 bits +// possible. +// Also in the original code the custom 16 bit data types are all now aliased +// to 32 bit int32_t. + +void FFT( const unsigned char input[EIGHTH_N], swift_int32_t *output ) { #if defined(__AVX2__) - __m256i F[8] __attribute__ ((aligned (64))); + __m256i F0, F1, F2, F3, F4, F5, F6, F7; + __m256i tbl = *(__m256i*)&( fftTable[ input[0] << 3 ] ); __m256i *mul = (__m256i*)multipliers; __m256i *out = (__m256i*)output; - __m256i *tbl = (__m256i*)&( fftTable[ input[0] << 3 ] ); - - F[0] = _mm256_mullo_epi32( mul[0], *tbl ); - tbl = (__m256i*)&( fftTable[ input[1] << 3 ] ); - F[1] = _mm256_mullo_epi32( mul[1], *tbl ); - tbl = (__m256i*)&( fftTable[ input[2] << 3 ] ); - F[2] = _mm256_mullo_epi32( mul[2], *tbl ); - tbl = (__m256i*)&( fftTable[ input[3] << 3 ] ); - F[3] = _mm256_mullo_epi32( mul[3], *tbl ); - tbl = (__m256i*)&( fftTable[ input[4] << 3 ] ); - F[4] = _mm256_mullo_epi32( mul[4], *tbl ); - tbl = (__m256i*)&( fftTable[ input[5] << 3 ] ); - F[5] = _mm256_mullo_epi32( mul[5], *tbl ); - tbl = (__m256i*)&( fftTable[ input[6] << 3 ] ); - F[6] = _mm256_mullo_epi32( mul[6], *tbl ); - tbl = (__m256i*)&( fftTable[ input[7] << 3 ] ); - F[7] = _mm256_mullo_epi32( mul[7], *tbl ); + + F0 = _mm256_mullo_epi32( mul[0], tbl ); + tbl = *(__m256i*)&( fftTable[ input[1] << 3 ] ); + F1 = _mm256_mullo_epi32( mul[1], tbl ); + tbl = *(__m256i*)&( fftTable[ input[2] << 3 ] ); + F2 = _mm256_mullo_epi32( mul[2], tbl ); + tbl = *(__m256i*)&( fftTable[ input[3] << 3 ] ); + F3 = _mm256_mullo_epi32( mul[3], tbl ); + tbl = *(__m256i*)&( fftTable[ input[4] << 3 ] ); + F4 = _mm256_mullo_epi32( mul[4], tbl ); + tbl = *(__m256i*)&( fftTable[ input[5] << 3 ] ); + F5 = _mm256_mullo_epi32( mul[5], tbl ); + tbl = *(__m256i*)&( fftTable[ input[6] << 3 ] ); + F6 = _mm256_mullo_epi32( mul[6], tbl ); + tbl = *(__m256i*)&( fftTable[ input[7] << 3 ] ); + F7 = _mm256_mullo_epi32( mul[7], tbl ); #define ADD_SUB( a, b ) \ { \ @@ -668,52 +667,50 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) a = _mm256_add_epi32( a, tmp ); \ } - ADD_SUB( F[0], F[1] ); - ADD_SUB( F[2], F[3] ); - ADD_SUB( F[4], F[5] ); - ADD_SUB( F[6], F[7] ); - - F[3] = _mm256_slli_epi32( F[3], 4 ); - F[7] = _mm256_slli_epi32( F[7], 4 ); - - ADD_SUB( F[0], F[2] ); - ADD_SUB( F[1], F[3] ); - ADD_SUB( F[4], F[6] ); - ADD_SUB( F[5], F[7] ); - - F[5] = _mm256_slli_epi32( F[5], 2 ); - F[6] = _mm256_slli_epi32( F[6], 4 ); - F[7] = _mm256_slli_epi32( F[7], 6 ); - - ADD_SUB( F[0], F[4] ); - ADD_SUB( F[1], F[5] ); - ADD_SUB( F[2], F[6] ); - ADD_SUB( F[3], F[7] ); + ADD_SUB( F0, F1 ); + ADD_SUB( F2, F3 ); + ADD_SUB( F4, F5 ); + ADD_SUB( F6, F7 ); + F3 = _mm256_slli_epi32( F3, 4 ); + F7 = _mm256_slli_epi32( F7, 4 ); + ADD_SUB( F0, F2 ); + ADD_SUB( F1, F3 ); + ADD_SUB( F4, F6 ); + ADD_SUB( F5, F7 ); + F5 = _mm256_slli_epi32( F5, 2 ); + F6 = _mm256_slli_epi32( F6, 4 ); + F7 = _mm256_slli_epi32( F7, 6 ); + ADD_SUB( F0, F4 ); + ADD_SUB( F1, F5 ); + ADD_SUB( F2, F6 ); + ADD_SUB( F3, F7 ); #undef ADD_SUB #if defined (__AVX512VL__) && defined(__AVX512BW__) - const __m256i mask = _mm256_movm_epi8( 0x11111111 ); - + #define Q_REDUCE( a ) \ + _mm256_sub_epi32( _mm256_maskz_mov_epi8( 0x11111111, a ), \ + _mm256_srai_epi32( a, 8 ) ) + #else - const __m256i mask = m256_const1_32( 0x000000ff ); - -#endif + const __m256i mask = _mm256_set1_epi32( 0x000000ff ); #define Q_REDUCE( a ) \ _mm256_sub_epi32( _mm256_and_si256( a, mask ), \ _mm256_srai_epi32( a, 8 ) ) + +#endif - out[0] = Q_REDUCE( F[0] ); - out[1] = Q_REDUCE( F[1] ); - out[2] = Q_REDUCE( F[2] ); - out[3] = Q_REDUCE( F[3] ); - out[4] = Q_REDUCE( F[4] ); - out[5] = Q_REDUCE( F[5] ); - out[6] = Q_REDUCE( F[6] ); - out[7] = Q_REDUCE( F[7] ); + out[0] = Q_REDUCE( F0 ); + out[1] = Q_REDUCE( F1 ); + out[2] = Q_REDUCE( F2 ); + out[3] = Q_REDUCE( F3 ); + out[4] = Q_REDUCE( F4 ); + out[5] = Q_REDUCE( F5 ); + out[6] = Q_REDUCE( F6 ); + out[7] = Q_REDUCE( F7 ); #undef Q_REDUCE @@ -763,12 +760,10 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) ADD_SUB( F[ 9], F[11] ); ADD_SUB( F[12], F[14] ); ADD_SUB( F[13], F[15] ); - F[ 6] = _mm_slli_epi32( F[ 6], 4 ); F[ 7] = _mm_slli_epi32( F[ 7], 4 ); F[14] = _mm_slli_epi32( F[14], 4 ); F[15] = _mm_slli_epi32( F[15], 4 ); - ADD_SUB( F[ 0], F[ 4] ); ADD_SUB( F[ 1], F[ 5] ); ADD_SUB( F[ 2], F[ 6] ); @@ -777,14 +772,12 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) ADD_SUB( F[ 9], F[13] ); ADD_SUB( F[10], F[14] ); ADD_SUB( F[11], F[15] ); - F[10] = _mm_slli_epi32( F[10], 2 ); F[11] = _mm_slli_epi32( F[11], 2 ); F[12] = _mm_slli_epi32( F[12], 4 ); F[13] = _mm_slli_epi32( F[13], 4 ); F[14] = _mm_slli_epi32( F[14], 6 ); F[15] = _mm_slli_epi32( F[15], 6 ); - ADD_SUB( F[ 0], F[ 8] ); ADD_SUB( F[ 1], F[ 9] ); ADD_SUB( F[ 2], F[10] ); @@ -796,7 +789,7 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) #undef ADD_SUB - const __m128i mask = m128_const1_32( 0x000000ff ); + const __m128i mask = _mm_set1_epi32( 0x000000ff ); #define Q_REDUCE( a ) \ _mm_sub_epi32( _mm_and_si128( a, mask ), _mm_srai_epi32( a, 8 ) ) @@ -820,16 +813,13 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) #undef Q_REDUCE -#else // < SSE4.1 +#else // AVX256 elif SSE4_1 swift_int16_t *mult = multipliers; - - // First loop unrolling: - register swift_int16_t *table = &(fftTable[input[0] << 3]); - -/* + swift_int16_t *table = &( fftTable[ input[0] << 3 ] ); swift_int32_t F[64]; + /* for (int i = 0; i < 8; i++) { int j = i<<3; @@ -845,99 +835,91 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) } */ - register swift_int32_t F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, - F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, - F30, F31, F32, F33, F34, F35, F36, F37, F38, F39, - F40, F41, F42, F43, F44, F45, F46, F47, F48, F49, - F50, F51, F52, F53, F54, F55, F56, F57, F58, F59, - F60, F61, F62, F63; - - F0 = mult[0] * table[0]; - F8 = mult[1] * table[1]; - F16 = mult[2] * table[2]; - F24 = mult[3] * table[3]; - F32 = mult[4] * table[4]; - F40 = mult[5] * table[5]; - F48 = mult[6] * table[6]; - F56 = mult[7] * table[7]; + F[ 0] = mult[ 0] * table[0]; + F[ 8] = mult[ 1] * table[1]; + F[16] = mult[ 2] * table[2]; + F[24] = mult[ 3] * table[3]; + F[32] = mult[ 4] * table[4]; + F[40] = mult[ 5] * table[5]; + F[48] = mult[ 6] * table[6]; + F[56] = mult[ 7] * table[7]; table = &(fftTable[input[1] << 3]); - F1 = mult[ 8] * table[0]; - F9 = mult[ 9] * table[1]; - F17 = mult[10] * table[2]; - F25 = mult[11] * table[3]; - F33 = mult[12] * table[4]; - F41 = mult[13] * table[5]; - F49 = mult[14] * table[6]; - F57 = mult[15] * table[7]; + F[ 1] = mult[ 8] * table[0]; + F[ 9] = mult[ 9] * table[1]; + F[17] = mult[10] * table[2]; + F[25] = mult[11] * table[3]; + F[33] = mult[12] * table[4]; + F[41] = mult[13] * table[5]; + F[49] = mult[14] * table[6]; + F[57] = mult[15] * table[7]; table = &(fftTable[input[2] << 3]); - F2 = mult[16] * table[0]; - F10 = mult[17] * table[1]; - F18 = mult[18] * table[2]; - F26 = mult[19] * table[3]; - F34 = mult[20] * table[4]; - F42 = mult[21] * table[5]; - F50 = mult[22] * table[6]; - F58 = mult[23] * table[7]; + F[ 2] = mult[16] * table[0]; + F[10] = mult[17] * table[1]; + F[18] = mult[18] * table[2]; + F[26] = mult[19] * table[3]; + F[34] = mult[20] * table[4]; + F[42] = mult[21] * table[5]; + F[50] = mult[22] * table[6]; + F[58] = mult[23] * table[7]; table = &(fftTable[input[3] << 3]); - F3 = mult[24] * table[0]; - F11 = mult[25] * table[1]; - F19 = mult[26] * table[2]; - F27 = mult[27] * table[3]; - F35 = mult[28] * table[4]; - F43 = mult[29] * table[5]; - F51 = mult[30] * table[6]; - F59 = mult[31] * table[7]; + F[ 3] = mult[24] * table[0]; + F[11] = mult[25] * table[1]; + F[19] = mult[26] * table[2]; + F[27] = mult[27] * table[3]; + F[35] = mult[28] * table[4]; + F[43] = mult[29] * table[5]; + F[51] = mult[30] * table[6]; + F[59] = mult[31] * table[7]; table = &(fftTable[input[4] << 3]); - F4 = mult[32] * table[0]; - F12 = mult[33] * table[1]; - F20 = mult[34] * table[2]; - F28 = mult[35] * table[3]; - F36 = mult[36] * table[4]; - F44 = mult[37] * table[5]; - F52 = mult[38] * table[6]; - F60 = mult[39] * table[7]; + F[ 4] = mult[32] * table[0]; + F[12] = mult[33] * table[1]; + F[20] = mult[34] * table[2]; + F[28] = mult[35] * table[3]; + F[36] = mult[36] * table[4]; + F[44] = mult[37] * table[5]; + F[52] = mult[38] * table[6]; + F[60] = mult[39] * table[7]; table = &(fftTable[input[5] << 3]); - F5 = mult[40] * table[0]; - F13 = mult[41] * table[1]; - F21 = mult[42] * table[2]; - F29 = mult[43] * table[3]; - F37 = mult[44] * table[4]; - F45 = mult[45] * table[5]; - F53 = mult[46] * table[6]; - F61 = mult[47] * table[7]; + F[ 5] = mult[40] * table[0]; + F[13] = mult[41] * table[1]; + F[21] = mult[42] * table[2]; + F[29] = mult[43] * table[3]; + F[37] = mult[44] * table[4]; + F[45] = mult[45] * table[5]; + F[53] = mult[46] * table[6]; + F[61] = mult[47] * table[7]; table = &(fftTable[input[6] << 3]); - F6 = mult[48] * table[0]; - F14 = mult[49] * table[1]; - F22 = mult[50] * table[2]; - F30 = mult[51] * table[3]; - F38 = mult[52] * table[4]; - F46 = mult[53] * table[5]; - F54 = mult[54] * table[6]; - F62 = mult[55] * table[7]; + F[ 6] = mult[48] * table[0]; + F[14] = mult[49] * table[1]; + F[22] = mult[50] * table[2]; + F[30] = mult[51] * table[3]; + F[38] = mult[52] * table[4]; + F[46] = mult[53] * table[5]; + F[54] = mult[54] * table[6]; + F[62] = mult[55] * table[7]; table = &(fftTable[input[7] << 3]); - F7 = mult[56] * table[0]; - F15 = mult[57] * table[1]; - F23 = mult[58] * table[2]; - F31 = mult[59] * table[3]; - F39 = mult[60] * table[4]; - F47 = mult[61] * table[5]; - F55 = mult[62] * table[6]; - F63 = mult[63] * table[7]; + F[ 7] = mult[56] * table[0]; + F[15] = mult[57] * table[1]; + F[23] = mult[58] * table[2]; + F[31] = mult[59] * table[3]; + F[39] = mult[60] * table[4]; + F[47] = mult[61] * table[5]; + F[55] = mult[62] * table[6]; + F[63] = mult[63] * table[7]; #define ADD_SUB( a, b ) \ { \ @@ -987,262 +969,229 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output) } */ - // Second loop unrolling: // Iteration 0: - ADD_SUB(F0, F1); - ADD_SUB(F2, F3); - ADD_SUB(F4, F5); - ADD_SUB(F6, F7); - - F3 <<= 4; - F7 <<= 4; - - ADD_SUB(F0, F2); - ADD_SUB(F1, F3); - ADD_SUB(F4, F6); - ADD_SUB(F5, F7); - - F5 <<= 2; - F6 <<= 4; - F7 <<= 6; - - ADD_SUB(F0, F4); - ADD_SUB(F1, F5); - ADD_SUB(F2, F6); - ADD_SUB(F3, F7); - - output[0] = Q_REDUCE(F0); - output[8] = Q_REDUCE(F1); - output[16] = Q_REDUCE(F2); - output[24] = Q_REDUCE(F3); - output[32] = Q_REDUCE(F4); - output[40] = Q_REDUCE(F5); - output[48] = Q_REDUCE(F6); - output[56] = Q_REDUCE(F7); + ADD_SUB( F[ 0], F[ 1] ); + ADD_SUB( F[ 2], F[ 3] ); + ADD_SUB( F[ 4], F[ 5] ); + ADD_SUB( F[ 6], F[ 7] ); + F[ 3] <<= 4; + F[ 7] <<= 4; + ADD_SUB( F[ 0], F[ 2] ); + ADD_SUB( F[ 1], F[ 3] ); + ADD_SUB( F[ 4], F[ 6] ); + ADD_SUB( F[ 5], F[ 7] ); + F[ 5] <<= 2; + F[ 6] <<= 4; + F[ 7] <<= 6; + ADD_SUB( F[ 0], F[ 4] ); + ADD_SUB( F[ 1], F[ 5] ); + ADD_SUB( F[ 2], F[ 6] ); + ADD_SUB( F[ 3], F[ 7] ); + + output[ 0] = Q_REDUCE( F[ 0] ); + output[ 8] = Q_REDUCE( F[ 1] ); + output[16] = Q_REDUCE( F[ 2] ); + output[24] = Q_REDUCE( F[ 3] ); + output[32] = Q_REDUCE( F[ 4] ); + output[40] = Q_REDUCE( F[ 5] ); + output[48] = Q_REDUCE( F[ 6] ); + output[56] = Q_REDUCE( F[ 7] ); // Iteration 1: - ADD_SUB(F8, F9); - ADD_SUB(F10, F11); - ADD_SUB(F12, F13); - ADD_SUB(F14, F15); - - F11 <<= 4; - F15 <<= 4; - - ADD_SUB(F8, F10); - ADD_SUB(F9, F11); - ADD_SUB(F12, F14); - ADD_SUB(F13, F15); - - F13 <<= 2; - F14 <<= 4; - F15 <<= 6; - - ADD_SUB(F8, F12); - ADD_SUB(F9, F13); - ADD_SUB(F10, F14); - ADD_SUB(F11, F15); - - output[1] = Q_REDUCE(F8); - output[9] = Q_REDUCE(F9); - output[17] = Q_REDUCE(F10); - output[25] = Q_REDUCE(F11); - output[33] = Q_REDUCE(F12); - output[41] = Q_REDUCE(F13); - output[49] = Q_REDUCE(F14); - output[57] = Q_REDUCE(F15); + ADD_SUB( F[ 8], F[ 9] ); + ADD_SUB( F[10], F[11] ); + ADD_SUB( F[12], F[13] ); + ADD_SUB( F[14], F[15] ); + F[11] <<= 4; + F[15] <<= 4; + ADD_SUB( F[ 8], F[10] ); + ADD_SUB( F[ 9], F[11] ); + ADD_SUB( F[12], F[14] ); + ADD_SUB( F[13], F[15] ); + F[13] <<= 2; + F[14] <<= 4; + F[15] <<= 6; + ADD_SUB( F[ 8], F[12] ); + ADD_SUB( F[ 9], F[13] ); + ADD_SUB( F[10], F[14] ); + ADD_SUB( F[11], F[15] ); + + output[ 1] = Q_REDUCE( F[ 8] ); + output[ 9] = Q_REDUCE( F[ 9] ); + output[17] = Q_REDUCE( F[10] ); + output[25] = Q_REDUCE( F[11] ); + output[33] = Q_REDUCE( F[12] ); + output[41] = Q_REDUCE( F[13] ); + output[49] = Q_REDUCE( F[14] ); + output[57] = Q_REDUCE( F[15] ); // Iteration 2: - ADD_SUB(F16, F17); - ADD_SUB(F18, F19); - ADD_SUB(F20, F21); - ADD_SUB(F22, F23); - - F19 <<= 4; - F23 <<= 4; - - ADD_SUB(F16, F18); - ADD_SUB(F17, F19); - ADD_SUB(F20, F22); - ADD_SUB(F21, F23); - - F21 <<= 2; - F22 <<= 4; - F23 <<= 6; - - ADD_SUB(F16, F20); - ADD_SUB(F17, F21); - ADD_SUB(F18, F22); - ADD_SUB(F19, F23); - - output[2] = Q_REDUCE(F16); - output[10] = Q_REDUCE(F17); - output[18] = Q_REDUCE(F18); - output[26] = Q_REDUCE(F19); - output[34] = Q_REDUCE(F20); - output[42] = Q_REDUCE(F21); - output[50] = Q_REDUCE(F22); - output[58] = Q_REDUCE(F23); + ADD_SUB( F[16], F[17] ); + ADD_SUB( F[18], F[19] ); + ADD_SUB( F[20], F[21] ); + ADD_SUB( F[22], F[23] ); + F[19] <<= 4; + F[23] <<= 4; + ADD_SUB( F[16], F[18]); + ADD_SUB( F[17], F[19]); + ADD_SUB( F[20], F[22]); + ADD_SUB( F[21], F[23]); + F[21] <<= 2; + F[22] <<= 4; + F[23] <<= 6; + ADD_SUB( F[16], F[20] ); + ADD_SUB( F[17], F[21] ); + ADD_SUB( F[18], F[22] ); + ADD_SUB( F[19], F[23] ); + + output[ 2] = Q_REDUCE( F[16] ); + output[10] = Q_REDUCE( F[17] ); + output[18] = Q_REDUCE( F[18] ); + output[26] = Q_REDUCE( F[19] ); + output[34] = Q_REDUCE( F[20] ); + output[42] = Q_REDUCE( F[21] ); + output[50] = Q_REDUCE( F[22] ); + output[58] = Q_REDUCE( F[23] ); // Iteration 3: - ADD_SUB(F24, F25); - ADD_SUB(F26, F27); - ADD_SUB(F28, F29); - ADD_SUB(F30, F31); - - F27 <<= 4; - F31 <<= 4; - - ADD_SUB(F24, F26); - ADD_SUB(F25, F27); - ADD_SUB(F28, F30); - ADD_SUB(F29, F31); - - F29 <<= 2; - F30 <<= 4; - F31 <<= 6; - - ADD_SUB(F24, F28); - ADD_SUB(F25, F29); - ADD_SUB(F26, F30); - ADD_SUB(F27, F31); - - output[3] = Q_REDUCE(F24); - output[11] = Q_REDUCE(F25); - output[19] = Q_REDUCE(F26); - output[27] = Q_REDUCE(F27); - output[35] = Q_REDUCE(F28); - output[43] = Q_REDUCE(F29); - output[51] = Q_REDUCE(F30); - output[59] = Q_REDUCE(F31); + ADD_SUB( F[24], F[25] ); + ADD_SUB( F[26], F[27] ); + ADD_SUB( F[28], F[29] ); + ADD_SUB( F[30], F[31] ); + F[27] <<= 4; + F[31] <<= 4; + ADD_SUB( F[24], F[26] ); + ADD_SUB( F[25], F[27] ); + ADD_SUB( F[28], F[30] ); + ADD_SUB( F[29], F[31] ); + F[29] <<= 2; + F[30] <<= 4; + F[31] <<= 6; + ADD_SUB( F[24], F[28] ); + ADD_SUB( F[25], F[29] ); + ADD_SUB( F[26], F[30] ); + ADD_SUB( F[27], F[31] ); + + output[ 3] = Q_REDUCE( F[24] ); + output[11] = Q_REDUCE( F[25] ); + output[19] = Q_REDUCE( F[26] ); + output[27] = Q_REDUCE( F[27] ); + output[35] = Q_REDUCE( F[28] ); + output[43] = Q_REDUCE( F[29] ); + output[51] = Q_REDUCE( F[30] ); + output[59] = Q_REDUCE( F[31] ); // Iteration 4: - ADD_SUB(F32, F33); - ADD_SUB(F34, F35); - ADD_SUB(F36, F37); - ADD_SUB(F38, F39); - - F35 <<= 4; - F39 <<= 4; - - ADD_SUB(F32, F34); - ADD_SUB(F33, F35); - ADD_SUB(F36, F38); - ADD_SUB(F37, F39); - - F37 <<= 2; - F38 <<= 4; - F39 <<= 6; - - ADD_SUB(F32, F36); - ADD_SUB(F33, F37); - ADD_SUB(F34, F38); - ADD_SUB(F35, F39); - - output[4] = Q_REDUCE(F32); - output[12] = Q_REDUCE(F33); - output[20] = Q_REDUCE(F34); - output[28] = Q_REDUCE(F35); - output[36] = Q_REDUCE(F36); - output[44] = Q_REDUCE(F37); - output[52] = Q_REDUCE(F38); - output[60] = Q_REDUCE(F39); + ADD_SUB( F[32], F[33] ); + ADD_SUB( F[34], F[35] ); + ADD_SUB( F[36], F[37] ); + ADD_SUB( F[38], F[39] ); + F[35] <<= 4; + F[39] <<= 4; + ADD_SUB( F[32], F[34] ); + ADD_SUB( F[33], F[35] ); + ADD_SUB( F[36], F[38] ); + ADD_SUB( F[37], F[39] ); + F[37] <<= 2; + F[38] <<= 4; + F[39] <<= 6; + ADD_SUB( F[32], F[36] ); + ADD_SUB( F[33], F[37] ); + ADD_SUB( F[34], F[38] ); + ADD_SUB( F[35], F[39] ); + + output[ 4] = Q_REDUCE( F[32] ); + output[12] = Q_REDUCE( F[33] ); + output[20] = Q_REDUCE( F[34] ); + output[28] = Q_REDUCE( F[35] ); + output[36] = Q_REDUCE( F[36] ); + output[44] = Q_REDUCE( F[37] ); + output[52] = Q_REDUCE( F[38] ); + output[60] = Q_REDUCE( F[39] ); // Iteration 5: - ADD_SUB(F40, F41); - ADD_SUB(F42, F43); - ADD_SUB(F44, F45); - ADD_SUB(F46, F47); - - F43 <<= 4; - F47 <<= 4; - - ADD_SUB(F40, F42); - ADD_SUB(F41, F43); - ADD_SUB(F44, F46); - ADD_SUB(F45, F47); - - F45 <<= 2; - F46 <<= 4; - F47 <<= 6; - - ADD_SUB(F40, F44); - ADD_SUB(F41, F45); - ADD_SUB(F42, F46); - ADD_SUB(F43, F47); - - output[5] = Q_REDUCE(F40); - output[13] = Q_REDUCE(F41); - output[21] = Q_REDUCE(F42); - output[29] = Q_REDUCE(F43); - output[37] = Q_REDUCE(F44); - output[45] = Q_REDUCE(F45); - output[53] = Q_REDUCE(F46); - output[61] = Q_REDUCE(F47); + ADD_SUB( F[40], F[41] ); + ADD_SUB( F[42], F[43] ); + ADD_SUB( F[44], F[45] ); + ADD_SUB( F[46], F[47] ); + F[43] <<= 4; + F[47] <<= 4; + ADD_SUB( F[40], F[42] ); + ADD_SUB( F[41], F[43] ); + ADD_SUB( F[44], F[46] ); + ADD_SUB( F[45], F[47] ); + F[45] <<= 2; + F[46] <<= 4; + F[47] <<= 6; + ADD_SUB( F[40], F[44] ); + ADD_SUB( F[41], F[45] ); + ADD_SUB( F[42], F[46] ); + ADD_SUB( F[43], F[47] ); + + output[ 5] = Q_REDUCE( F[40] ); + output[13] = Q_REDUCE( F[41] ); + output[21] = Q_REDUCE( F[42] ); + output[29] = Q_REDUCE( F[43] ); + output[37] = Q_REDUCE( F[44] ); + output[45] = Q_REDUCE( F[45] ); + output[53] = Q_REDUCE( F[46] ); + output[61] = Q_REDUCE( F[47] ); // Iteration 6: - ADD_SUB(F48, F49); - ADD_SUB(F50, F51); - ADD_SUB(F52, F53); - ADD_SUB(F54, F55); - - F51 <<= 4; - F55 <<= 4; - - ADD_SUB(F48, F50); - ADD_SUB(F49, F51); - ADD_SUB(F52, F54); - ADD_SUB(F53, F55); - - F53 <<= 2; - F54 <<= 4; - F55 <<= 6; - - ADD_SUB(F48, F52); - ADD_SUB(F49, F53); - ADD_SUB(F50, F54); - ADD_SUB(F51, F55); - - output[6] = Q_REDUCE(F48); - output[14] = Q_REDUCE(F49); - output[22] = Q_REDUCE(F50); - output[30] = Q_REDUCE(F51); - output[38] = Q_REDUCE(F52); - output[46] = Q_REDUCE(F53); - output[54] = Q_REDUCE(F54); - output[62] = Q_REDUCE(F55); + ADD_SUB( F[48], F[49] ); + ADD_SUB( F[50], F[51] ); + ADD_SUB( F[52], F[53] ); + ADD_SUB( F[54], F[55] ); + F[51] <<= 4; + F[55] <<= 4; + ADD_SUB( F[48], F[50] ); + ADD_SUB( F[49], F[51] ); + ADD_SUB( F[52], F[54] ); + ADD_SUB( F[53], F[55] ); + F[53] <<= 2; + F[54] <<= 4; + F[55] <<= 6; + ADD_SUB( F[48], F[52] ); + ADD_SUB( F[49], F[53] ); + ADD_SUB( F[50], F[54] ); + ADD_SUB( F[51], F[55] ); + + output[ 6] = Q_REDUCE( F[48] ); + output[14] = Q_REDUCE( F[49] ); + output[22] = Q_REDUCE( F[50] ); + output[30] = Q_REDUCE( F[51] ); + output[38] = Q_REDUCE( F[52] ); + output[46] = Q_REDUCE( F[53] ); + output[54] = Q_REDUCE( F[54] ); + output[62] = Q_REDUCE( F[55] ); // Iteration 7: - ADD_SUB(F56, F57); - ADD_SUB(F58, F59); - ADD_SUB(F60, F61); - ADD_SUB(F62, F63); - - F59 <<= 4; - F63 <<= 4; - - ADD_SUB(F56, F58); - ADD_SUB(F57, F59); - ADD_SUB(F60, F62); - ADD_SUB(F61, F63); - - F61 <<= 2; - F62 <<= 4; - F63 <<= 6; - - ADD_SUB(F56, F60); - ADD_SUB(F57, F61); - ADD_SUB(F58, F62); - ADD_SUB(F59, F63); - - output[7] = Q_REDUCE(F56); - output[15] = Q_REDUCE(F57); - output[23] = Q_REDUCE(F58); - output[31] = Q_REDUCE(F59); - output[39] = Q_REDUCE(F60); - output[47] = Q_REDUCE(F61); - output[55] = Q_REDUCE(F62); - output[63] = Q_REDUCE(F63); + ADD_SUB( F[56], F[57] ); + ADD_SUB( F[58], F[59] ); + ADD_SUB( F[60], F[61] ); + ADD_SUB( F[62], F[63] ); + F[59] <<= 4; + F[63] <<= 4; + ADD_SUB( F[56], F[58] ); + ADD_SUB( F[57], F[59] ); + ADD_SUB( F[60], F[62] ); + ADD_SUB( F[61], F[63] ); + F[61] <<= 2; + F[62] <<= 4; + F[63] <<= 6; + ADD_SUB( F[56], F[60] ); + ADD_SUB( F[57], F[61] ); + ADD_SUB( F[58], F[62] ); + ADD_SUB( F[59], F[63] ); + + output[ 7] = Q_REDUCE( F[56] ); + output[15] = Q_REDUCE( F[57] ); + output[23] = Q_REDUCE( F[58] ); + output[31] = Q_REDUCE( F[59] ); + output[39] = Q_REDUCE( F[60] ); + output[47] = Q_REDUCE( F[61] ); + output[55] = Q_REDUCE( F[62] ); + output[63] = Q_REDUCE( F[63] ); #undef ADD_SUB #undef Q_REDUCE diff --git a/algo/verthash/tiny_sha3/sha3-4way.c b/algo/verthash/tiny_sha3/sha3-4way.c index debbd775..1fada155 100644 --- a/algo/verthash/tiny_sha3/sha3-4way.c +++ b/algo/verthash/tiny_sha3/sha3-4way.c @@ -134,10 +134,10 @@ int sha3_4way_update( sha3_4way_ctx_t *c, const void *data, size_t len ) int sha3_4way_final( void *md, sha3_4way_ctx_t *c ) { c->st[ c->pt ] = _mm256_xor_si256( c->st[ c->pt ], - m256_const1_64( 6 ) ); + _mm256_set1_epi64x( 6 ) ); c->st[ c->rsiz / 8 - 1 ] = _mm256_xor_si256( c->st[ c->rsiz / 8 - 1 ], - m256_const1_64( 0x8000000000000000 ) ); + _mm256_set1_epi64x( 0x8000000000000000 ) ); sha3_4way_keccakf( c->st ); memcpy( md, c->st, c->mdlen * 4 ); return 1; @@ -268,10 +268,10 @@ int sha3_8way_final( void *md, sha3_8way_ctx_t *c ) { c->st[ c->pt ] = _mm512_xor_si512( c->st[ c->pt ], - m512_const1_64( 6 ) ); + _mm512_set1_epi64( 6 ) ); c->st[ c->rsiz / 8 - 1 ] = _mm512_xor_si512( c->st[ c->rsiz / 8 - 1 ], - m512_const1_64( 0x8000000000000000 ) ); + _mm512_set1_epi64( 0x8000000000000000 ) ); sha3_8way_keccakf( c->st ); memcpy( md, c->st, c->mdlen * 8 ); return 1; diff --git a/algo/x11/c11-4way.c b/algo/x11/c11-4way.c index 948fa200..e4b677b7 100644 --- a/algo/x11/c11-4way.c +++ b/algo/x11/c11-4way.c @@ -201,7 +201,7 @@ int scanhash_c11_8way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const uint32_t targ32_d7 = ptarget[7]; - const __m512i eight = m512_const1_64( 8 ); + const __m512i eight = _mm512_set1_epi64( 8 ); const bool bench = opt_benchmark; edata[0] = mm128_swap64_32( casti_m128i( pdata, 0 ) ); @@ -369,7 +369,7 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const uint32_t targ32_d7 = ptarget[7]; - const __m256i four = m256_const1_64( 4 ); + const __m256i four = _mm256_set1_epi64x( 4 ); const bool bench = opt_benchmark; edata[0] = mm128_swap64_32( casti_m128i( pdata, 0 ) ); diff --git a/algo/x13/skunk-4way.c b/algo/x13/skunk-4way.c index 73d0205f..de82196f 100644 --- a/algo/x13/skunk-4way.c +++ b/algo/x13/skunk-4way.c @@ -114,7 +114,7 @@ int scanhash_skunk_8way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n +=8; } while ( likely( ( n < last_nonce ) && !( *restart ) ) ); pdata[19] = n; @@ -218,7 +218,7 @@ int scanhash_skunk_4way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n +=4; } while ( likely( ( n < last_nonce ) && !( *restart ) ) ); pdata[19] = n; diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c index 7ce45463..3357db87 100644 --- a/algo/x16/x16r-4way.c +++ b/algo/x16/x16r-4way.c @@ -536,7 +536,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; @@ -963,7 +963,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; diff --git a/algo/x16/x16rt-4way.c b/algo/x16/x16rt-4way.c index 50c691ea..fa349873 100644 --- a/algo/x16/x16rt-4way.c +++ b/algo/x16/x16rt-4way.c @@ -49,7 +49,7 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; @@ -102,7 +102,7 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( ( n < last_nonce ) && !(*restart) ); pdata[19] = n; diff --git a/algo/x16/x16rt.c b/algo/x16/x16rt.c index 7ff8dc5d..0d2b6632 100644 --- a/algo/x16/x16rt.c +++ b/algo/x16/x16rt.c @@ -26,7 +26,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce, x16rt_getTimeHash( masked_ntime, &timeHash ); x16rt_getAlgoString( &timeHash[0], x16r_hash_order ); s_ntime = masked_ntime; - if ( opt_debug && !thr_id ) + if ( !thr_id ) applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)", x16r_hash_order, swab32( pdata[17] ), timeHash ); } diff --git a/algo/x16/x16rv2-4way.c b/algo/x16/x16rv2-4way.c index 6077e201..b94c4a32 100644 --- a/algo/x16/x16rv2-4way.c +++ b/algo/x16/x16rv2-4way.c @@ -658,7 +658,7 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; @@ -1143,7 +1143,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; diff --git a/algo/x16/x21s-4way.c b/algo/x16/x21s-4way.c index 2f27116f..103bc636 100644 --- a/algo/x16/x21s-4way.c +++ b/algo/x16/x21s-4way.c @@ -181,7 +181,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; @@ -335,7 +335,7 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !(*restart) ) ); pdata[19] = n; diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c index 5c44a18f..e84bad42 100644 --- a/algo/x17/x17-4way.c +++ b/algo/x17/x17-4way.c @@ -254,7 +254,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const uint32_t targ32_d7 = ptarget[7]; - const __m512i eight = m512_const1_64( 8 ); + const __m512i eight = _mm512_set1_epi64( 8 ); const bool bench = opt_benchmark; // convert LE32 to LE64 @@ -468,7 +468,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const uint32_t targ32_d7 = ptarget[7]; - const __m256i four = m256_const1_64( 4 ); + const __m256i four = _mm256_set1_epi64x( 4 ); const bool bench = opt_benchmark; // convert LE32 to LE64 diff --git a/algo/x22/x22i-4way.c b/algo/x22/x22i-4way.c index a67eb145..e94cb1c8 100644 --- a/algo/x22/x22i-4way.c +++ b/algo/x22/x22i-4way.c @@ -445,7 +445,7 @@ int scanhash_x22i_8way_sha( struct work *work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -494,7 +494,7 @@ int scanhash_x22i_8way( struct work *work, uint32_t max_nonce, } } *noncev = _mm512_add_epi32( *noncev, - m512_const1_64( 0x0000000800000000 ) ); + _mm512_set1_epi64( 0x0000000800000000 ) ); n += 8; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -787,7 +787,7 @@ int scanhash_x22i_4way_sha( struct work* work, uint32_t max_nonce, submit_solution( work, hash+(i<<3), mythr ); } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; @@ -835,7 +835,7 @@ int scanhash_x22i_4way( struct work* work, uint32_t max_nonce, } } *noncev = _mm256_add_epi32( *noncev, - m256_const1_64( 0x0000000400000000 ) ); + _mm256_set1_epi64x( 0x0000000400000000 ) ); n += 4; } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) ); pdata[19] = n; diff --git a/algo/x22/x25x-4way.c b/algo/x22/x25x-4way.c index 6b622bb5..76191219 100644 --- a/algo/x22/x25x-4way.c +++ b/algo/x22/x25x-4way.c @@ -571,7 +571,7 @@ int scanhash_x25x_8way( struct work *work, uint32_t max_nonce, const int thr_id = mythr->id; const uint32_t targ32 = ptarget[7]; const bool bench = opt_benchmark; - const __m512i eight = m512_const1_64( 8 ); + const __m512i eight = _mm512_set1_epi64( 8 ); if ( bench ) ptarget[7] = 0x08ff; edata[0] = mm128_swap64_32( casti_m128i( pdata, 0 ) ); @@ -927,7 +927,7 @@ int scanhash_x25x_4way( struct work* work, uint32_t max_nonce, uint32_t n = first_nonce; const int thr_id = mythr->id; const uint32_t targ32 = ptarget[7]; - const __m256i four = m256_const1_64( 4 ); + const __m256i four = _mm256_set1_epi64x( 4 ); const bool bench = opt_benchmark; if ( bench ) ptarget[7] = 0x08ff; diff --git a/configure b/configure index 32a5a284..247550dd 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for cpuminer-opt 3.22.3. +# Generated by GNU Autoconf 2.71 for cpuminer-opt 3.23.0. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -608,8 +608,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.22.3' -PACKAGE_STRING='cpuminer-opt 3.22.3' +PACKAGE_VERSION='3.23.0' +PACKAGE_STRING='cpuminer-opt 3.23.0' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.22.3 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.23.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1432,7 +1432,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.22.3:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.23.0:";; esac cat <<\_ACEOF @@ -1538,7 +1538,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.22.3 +cpuminer-opt configure 3.23.0 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.22.3, which was +It was created by cpuminer-opt $as_me 3.23.0, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3593,7 +3593,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.22.3' + VERSION='3.23.0' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.22.3, which was +This file was extended by cpuminer-opt $as_me 3.23.0, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -cpuminer-opt config.status 3.22.3 +cpuminer-opt config.status 3.23.0 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 151c0685..2911ab3c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.22.3]) +AC_INIT([cpuminer-opt], [3.23.0]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/configure~ b/configure~ new file mode 100755 index 00000000..41bc80dd --- /dev/null +++ b/configure~ @@ -0,0 +1,7647 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.23.0. +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='cpuminer-opt' +PACKAGE_TARNAME='cpuminer-opt' +PACKAGE_VERSION='3.23.0' +PACKAGE_STRING='cpuminer-opt 3.23.0' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' + +ac_unique_file="cpu-miner.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +WS2_LIBS +PTHREAD_LIBS +PTHREAD_FLAGS +JANSSON_LIBS +LIBCURL_CPPFLAGS +LIBCURL_CFLAGS +LIBCURL +MINGW_FALSE +MINGW_TRUE +ARCH_ARM_FALSE +ARCH_ARM_TRUE +ARCH_x86_64_FALSE +ARCH_x86_64_TRUE +ARCH_x86_FALSE +ARCH_x86_TRUE +USE_ASM_FALSE +USE_ASM_TRUE +HAVE_WINDOWS_FALSE +HAVE_WINDOWS_TRUE +WANT_JANSSON_FALSE +WANT_JANSSON_TRUE +ALLOCA +am__fastdepCXX_FALSE +am__fastdepCXX_TRUE +CXXDEPMODE +ac_ct_CXX +CXXFLAGS +CXX +RANLIB +am__fastdepCCAS_FALSE +am__fastdepCCAS_TRUE +CCASDEPMODE +CCASFLAGS +CCAS +EGREP +GREP +CPP +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +MAINT +MAINTAINER_MODE_FALSE +MAINTAINER_MODE_TRUE +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_os +target_vendor +target_cpu +target +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_maintainer_mode +enable_dependency_tracking +enable_assembly +with_curl +with_crypto +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP +CCAS +CCASFLAGS +CXX +CXXFLAGS +CCC' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures cpuminer-opt 3.23.0 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/cpuminer-opt] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] + --target=TARGET configure for building compilers for TARGET [HOST] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of cpuminer-opt 3.23.0:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-maintainer-mode + enable make rules and dependencies not useful (and + sometimes confusing) to the casual installer + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --disable-assembly disable assembly-language routines + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-curl=PATH prefix where curl is installed default=/usr + --with-crypto=PATH prefix where openssl crypto is installed default=/usr + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + CCAS assembler compiler command (defaults to CC) + CCASFLAGS assembler compiler flags (defaults to CFLAGS) + CXX C++ compiler command + CXXFLAGS C++ compiler flags + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +cpuminer-opt configure 3.23.0 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_compile + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES +# --------------------------------------------- +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. +ac_fn_c_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +$as_echo_n "checking whether $as_decl_name is declared... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_decl + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by cpuminer-opt $as_me 3.23.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +$as_echo_n "checking host system type... " >&6; } +if ${ac_cv_host+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +$as_echo "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 +$as_echo_n "checking target system type... " >&6; } +if ${ac_cv_target+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$target_alias" = x; then + ac_cv_target=$ac_cv_host +else + ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 +$as_echo "$ac_cv_target" >&6; } +case $ac_cv_target in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;; +esac +target=$ac_cv_target +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_target +shift +target_cpu=$1 +target_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +target_os=$* +IFS=$ac_save_IFS +case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac + + +# The aliases save the names the user supplied, while $host etc. +# will get canonicalized. +test -n "$target_alias" && + test "$program_prefix$program_suffix$program_transform_name" = \ + NONENONEs,x,x, && + program_prefix=${target_alias}- + + +ac_config_headers="$ac_config_headers cpuminer-config.h" + + + +am__api_version='1.16' + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='cpuminer-opt' + VERSION='3.23.0' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } + # Check whether --enable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then : + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else + USE_MAINTAINER_MODE=no +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +$as_echo "$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +$as_echo "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5 +$as_echo_n "checking for $CC option to accept ISO C99... " >&6; } +if ${ac_cv_prog_cc_c99+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +#include + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +#define debug(...) fprintf (stderr, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + your preprocessor is broken; +#endif +#if BIG_OK +#else + your preprocessor is broken; +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\0'; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static void +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str; + int number; + float fnumber; + + while (*format) + { + switch (*format++) + { + case 's': // string + str = va_arg (args_copy, const char *); + break; + case 'd': // int + number = va_arg (args_copy, int); + break; + case 'f': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); +} + +int +main () +{ + + // Check bool. + _Bool success = false; + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + test_varargs ("s, d' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' + || dynamic_array[ni.number - 1] != 543); + + ; + return 0; +} +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c99" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c99" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +$as_echo "$ac_cv_prog_cc_c99" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c99" != xno; then : + +fi + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +if test $ac_cv_c_compiler_gnu = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC needs -traditional" >&5 +$as_echo_n "checking whether $CC needs -traditional... " >&6; } +if ${ac_cv_prog_gcc_traditional+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_pattern="Autoconf.*'x'" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +Autoconf TIOCGETP +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "$ac_pattern" >/dev/null 2>&1; then : + ac_cv_prog_gcc_traditional=yes +else + ac_cv_prog_gcc_traditional=no +fi +rm -f conftest* + + + if test $ac_cv_prog_gcc_traditional = no; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +Autoconf TCGETA +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "$ac_pattern" >/dev/null 2>&1; then : + ac_cv_prog_gcc_traditional=yes +fi +rm -f conftest* + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_gcc_traditional" >&5 +$as_echo "$ac_cv_prog_gcc_traditional" >&6; } + if test $ac_cv_prog_gcc_traditional = yes; then + CC="$CC -traditional" + fi +fi + + +# By default we simply use the C compiler to build assembly code. + +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS + + + +depcc="$CCAS" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CCAS_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CCAS_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CCAS_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CCAS_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CCAS_dependencies_compiler_type" >&6; } +CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CCAS_dependencies_compiler_type" = gcc3; then + am__fastdepCCAS_TRUE= + am__fastdepCCAS_FALSE='#' +else + am__fastdepCCAS_TRUE='#' + am__fastdepCCAS_FALSE= +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +$as_echo "$ac_ct_CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 +$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } +if ${ac_cv_cxx_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +$as_echo "$ac_cv_cxx_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +$as_echo_n "checking whether $CXX accepts -g... " >&6; } +if ${ac_cv_prog_cxx_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +else + CXXFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +$as_echo "$ac_cv_prog_cxx_g" >&6; } +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CXX" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CXX_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; } +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in sys/endian.h sys/param.h syslog.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +# sys/sysctl.h requires sys/types.h on FreeBSD +# sys/sysctl.h requires sys/param.h on OpenBSD +for ac_header in sys/sysctl.h +do : + ac_fn_c_check_header_compile "$LINENO" "sys/sysctl.h" "ac_cv_header_sys_sysctl_h" "#include +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +" +if test "x$ac_cv_header_sys_sysctl_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_SYSCTL_H 1 +_ACEOF + +fi + +done + + +ac_fn_c_check_decl "$LINENO" "be32dec" "ac_cv_have_decl_be32dec" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_be32dec" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_BE32DEC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "le32dec" "ac_cv_have_decl_le32dec" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_le32dec" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_LE32DEC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "be32enc" "ac_cv_have_decl_be32enc" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_be32enc" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_BE32ENC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "le32enc" "ac_cv_have_decl_le32enc" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_le32enc" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_LE32ENC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "le16dec" "ac_cv_have_decl_le16dec" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_le16dec" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_LE16DEC $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "le16enc" "ac_cv_have_decl_le16enc" "$ac_includes_default +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +" +if test "x$ac_cv_have_decl_le16enc" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_LE16ENC $ac_have_decl +_ACEOF + + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works +# for constant arguments. Useless! +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 +$as_echo_n "checking for working alloca.h... " >&6; } +if ${ac_cv_working_alloca_h+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +char *p = (char *) alloca (2 * sizeof (int)); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_working_alloca_h=yes +else + ac_cv_working_alloca_h=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 +$as_echo "$ac_cv_working_alloca_h" >&6; } +if test $ac_cv_working_alloca_h = yes; then + +$as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 +$as_echo_n "checking for alloca... " >&6; } +if ${ac_cv_func_alloca_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (size_t); +# endif +# endif +# endif +# endif +#endif + +int +main () +{ +char *p = (char *) alloca (1); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_func_alloca_works=yes +else + ac_cv_func_alloca_works=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 +$as_echo "$ac_cv_func_alloca_works" >&6; } + +if test $ac_cv_func_alloca_works = yes; then + +$as_echo "#define HAVE_ALLOCA 1" >>confdefs.h + +else + # The SVR3 libPW and SVR4 libucb both contain incompatible functions +# that cause trouble. Some versions do not even contain alloca or +# contain a buggy version. If you still want to use their alloca, +# use ar to extract alloca.o from them instead of compiling alloca.c. + +ALLOCA=\${LIBOBJDIR}alloca.$ac_objext + +$as_echo "#define C_ALLOCA 1" >>confdefs.h + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5 +$as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; } +if ${ac_cv_os_cray+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if defined CRAY && ! defined CRAY2 +webecray +#else +wenotbecray +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "webecray" >/dev/null 2>&1; then : + ac_cv_os_cray=yes +else + ac_cv_os_cray=no +fi +rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5 +$as_echo "$ac_cv_os_cray" >&6; } +if test $ac_cv_os_cray = yes; then + for ac_func in _getb67 GETB67 getb67; do + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + +cat >>confdefs.h <<_ACEOF +#define CRAY_STACKSEG_END $ac_func +_ACEOF + + break +fi + + done +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 +$as_echo_n "checking stack direction for C alloca... " >&6; } +if ${ac_cv_c_stack_direction+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_c_stack_direction=0 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +find_stack_direction (int *addr, int depth) +{ + int dir, dummy = 0; + if (! addr) + addr = &dummy; + *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; + dir = depth ? find_stack_direction (addr, depth - 1) : 0; + return dir + dummy; +} + +int +main (int argc, char **argv) +{ + return find_stack_direction (0, argc + !argv + 20) < 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_c_stack_direction=1 +else + ac_cv_c_stack_direction=-1 +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 +$as_echo "$ac_cv_c_stack_direction" >&6; } +cat >>confdefs.h <<_ACEOF +#define STACK_DIRECTION $ac_cv_c_stack_direction +_ACEOF + + +fi + +for ac_func in getopt_long +do : + ac_fn_c_check_func "$LINENO" "getopt_long" "ac_cv_func_getopt_long" +if test "x$ac_cv_func_getopt_long" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_GETOPT_LONG 1 +_ACEOF + +fi +done + + +MINGW_TARGET=`$CC -dumpmachine 2>&1` +case $MINGW_TARGET in + arm*-*-*) + have_arm=true + ;; + i*86-*-mingw*) + have_x86=true + have_win32=true + CFLAGS="-Icompat/pthreads $CFLAGS" + PTHREAD_LDFLAGS="-Lcompat/pthreads/x86" + WS2_LIBS="-lws2_32" + ;; + x86_64-*-mingw*|amd64-*-mingw*) + have_x86_64=true + have_win32=true + CFLAGS="-Icompat/pthreads $CFLAGS" + PTHREAD_LDFLAGS="-Lcompat/pthreads/x64" + # SHOULD BE AT END! after -lcrypto # + WS2_LIBS="-L/mingw/x86_64-w64-mingw32/lib -lws2_32" + ;; + i*86-*-*) + have_x86=true + ;; + x86_64-*-*|amd64-*-*) + have_x86_64=true + ;; +esac + +# Check whether --enable-assembly was given. +if test "${enable_assembly+set}" = set; then : + enableval=$enable_assembly; +fi + +if test x$enable_assembly != xno; then + +$as_echo "#define USE_ASM 1" >>confdefs.h + +fi + +if test x$enable_assembly != xno -a x$have_x86_64 = xtrue +then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX code" >&5 +$as_echo_n "checking whether we can compile AVX code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vmovdqa %ymm0, %ymm1"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_AVX 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile XOP code" >&5 +$as_echo_n "checking whether we can compile XOP code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vprotd \$7, %xmm0, %xmm1"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_XOP 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the XOP instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the XOP instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX2 code" >&5 +$as_echo_n "checking whether we can compile AVX2 code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vpaddd %ymm0, %ymm1, %ymm2"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_AVX2 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX512 code" >&5 +$as_echo_n "checking whether we can compile AVX512 code... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm ("vpaddd %zmm0, %zmm1, %zmm2{%k1}"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_AVX512 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX512 instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the AVX512 instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX2 instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the AVX2 instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX instruction set." >&5 +$as_echo "$as_me: WARNING: The assembler does not support the AVX instruction set." >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for json_loads in -ljansson" >&5 +$as_echo_n "checking for json_loads in -ljansson... " >&6; } +if ${ac_cv_lib_jansson_json_loads+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ljansson $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char json_loads (); +int +main () +{ +return json_loads (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_jansson_json_loads=yes +else + ac_cv_lib_jansson_json_loads=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_jansson_json_loads" >&5 +$as_echo "$ac_cv_lib_jansson_json_loads" >&6; } +if test "x$ac_cv_lib_jansson_json_loads" = xyes; then : + request_jansson=false +else + request_jansson=true +fi + + +# GC2 for GNU static +if test "x$have_win32" = "xtrue" ; then + # MinGW + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +$as_echo_n "checking for pthread_create in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_create=yes +else + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : + PTHREAD_LIBS="-lpthreadGC2" +fi + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +$as_echo_n "checking for pthread_create in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_create=yes +else + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : + PTHREAD_LIBS="-lpthread" +fi + +fi + +LDFLAGS="$PTHREAD_LDFLAGS $LDFLAGS" +# PTHREAD_LIBS="$PTHREAD_LIBS" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __uint128_t is supported" >&5 +$as_echo_n "checking whether __uint128_t is supported... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static __uint128_t i = 100; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define USE_INT128 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +# allow if in Makefile.am + if test x$request_jansson = xtrue; then + WANT_JANSSON_TRUE= + WANT_JANSSON_FALSE='#' +else + WANT_JANSSON_TRUE='#' + WANT_JANSSON_FALSE= +fi + + if test x$have_win32 = xtrue; then + HAVE_WINDOWS_TRUE= + HAVE_WINDOWS_FALSE='#' +else + HAVE_WINDOWS_TRUE='#' + HAVE_WINDOWS_FALSE= +fi + + if test x$enable_assembly != xno; then + USE_ASM_TRUE= + USE_ASM_FALSE='#' +else + USE_ASM_TRUE='#' + USE_ASM_FALSE= +fi + + if test x$have_x86 = xtrue; then + ARCH_x86_TRUE= + ARCH_x86_FALSE='#' +else + ARCH_x86_TRUE='#' + ARCH_x86_FALSE= +fi + + if test x$have_x86_64 = xtrue; then + ARCH_x86_64_TRUE= + ARCH_x86_64_FALSE='#' +else + ARCH_x86_64_TRUE='#' + ARCH_x86_64_FALSE= +fi + + if test x$have_arm = xtrue; then + ARCH_ARM_TRUE= + ARCH_ARM_FALSE='#' +else + ARCH_ARM_TRUE='#' + ARCH_ARM_FALSE= +fi + + if test "x$OS" = "xWindows_NT"; then + MINGW_TRUE= + MINGW_FALSE='#' +else + MINGW_TRUE='#' + MINGW_FALSE= +fi + + +if test x$request_jansson = xtrue ; then + JANSSON_LIBS="compat/jansson/libjansson.a" +else + JANSSON_LIBS=-ljansson +fi + +# libcurl install path (for mingw : --with-curl=/usr/local) + +# Check whether --with-curl was given. +if test "${with_curl+set}" = set; then : + withval=$with_curl; +fi + + +if test -n "$with_curl" ; then + LIBCURL_CFLAGS="$LIBCURL_CFLAGS -I$with_curl/include" + LIBCURL_CPPFLAGS="$LIBCURL_CPPFLAGS -I$with_curl/include" + LIBCURL_LDFLAGS="-L$with_curl/lib $LIBCURL_LDFLAGS" + LIBCURL="-lcurl -lz" +fi + +# SSL install path (for mingw : --with-crypto=/usr/local/ssl) + +# Check whether --with-crypto was given. +if test "${with_crypto+set}" = set; then : + withval=$with_crypto; +fi + + +if test -n "$with_crypto" ; then + LIBCURL_CFLAGS="$LIBCURL_CFLAGS -I$with_crypto/include" + LIBCURL_CPPFLAGS="$LIBCURL_CPPFLAGS -I$with_crypto/include" + LIBCURL_LDFLAGS="-L$with_crypto/lib $LIBCURL_LDFLAGS" + LIBCURL="$LIBCURL -lssl -lcrypto" +fi + +CFLAGS="$CFLAGS $LIBCURL_CFLAGS" +CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" +LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS" + +#AC_CHECK_LIB([z],[gzopen],[],[]) +#AC_CHECK_LIB([crypto],[OPENSSL_init], crypto=yes, [AC_MSG_ERROR([OpenSSL crypto library required])]) +#AC_CHECK_LIB([ssl],[SSL_new], ssl=yes, ssl=no) + +# AC_CHECK_LIB([curl], [curl_multi_timeout], +# have_libcurl=yes, +# have_libcurl=no AC_MSG_ERROR([curl library required]) +# ) + +# LIBCURL_CHECK_CONFIG([yes], 7.15, curlconfig=yes, curlconfig=no) + + + + +# AC_SUBST(LIBCURL_LDFLAGS) + + + + + + +ac_config_files="$ac_config_files Makefile compat/Makefile compat/jansson/Makefile" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WANT_JANSSON_TRUE}" && test -z "${WANT_JANSSON_FALSE}"; then + as_fn_error $? "conditional \"WANT_JANSSON\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_WINDOWS_TRUE}" && test -z "${HAVE_WINDOWS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_WINDOWS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${USE_ASM_TRUE}" && test -z "${USE_ASM_FALSE}"; then + as_fn_error $? "conditional \"USE_ASM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ARCH_x86_TRUE}" && test -z "${ARCH_x86_FALSE}"; then + as_fn_error $? "conditional \"ARCH_x86\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ARCH_x86_64_TRUE}" && test -z "${ARCH_x86_64_FALSE}"; then + as_fn_error $? "conditional \"ARCH_x86_64\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ARCH_ARM_TRUE}" && test -z "${ARCH_ARM_FALSE}"; then + as_fn_error $? "conditional \"ARCH_ARM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${MINGW_TRUE}" && test -z "${MINGW_FALSE}"; then + as_fn_error $? "conditional \"MINGW\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by cpuminer-opt $as_me 3.23.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +cpuminer-opt config.status 3.23.0 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "cpuminer-config.h") CONFIG_HEADERS="$CONFIG_HEADERS cpuminer-config.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "compat/Makefile") CONFIG_FILES="$CONFIG_FILES compat/Makefile" ;; + "compat/jansson/Makefile") CONFIG_FILES="$CONFIG_FILES compat/jansson/Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/cpu-miner.c b/cpu-miner.c index 5f59481b..e6c985a4 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -1532,6 +1532,7 @@ const char *getwork_req = #define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]" #define GBT_RULES "[\"segwit\"]" + static const char *gbt_req = "{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": " GBT_CAPABILITIES ", \"rules\": " GBT_RULES "}], \"id\":0}\r\n"; @@ -1589,18 +1590,21 @@ static bool get_upstream_work( CURL *curl, struct work *work ) json_decref( val ); goto start; } + allow_getwork = false; // GBT is working, disable fallback } else rc = work_decode( json_object_get( val, "result" ), work ); if ( rc ) { + bool new_work = true; + json_decref( val ); get_mininginfo( curl, work ); report_summary_log( false ); - if ( opt_protocol | opt_debug ) + if ( opt_protocol || opt_debug ) { timeval_subtract( &diff, &tv_end, &tv_start ); applog( LOG_INFO, "%s new work received in %.2f ms", @@ -1621,8 +1625,10 @@ static bool get_upstream_work( CURL *curl, struct work *work ) applog( LOG_BLUE, "New Work: Block %d, Tx %d, Net Diff %.5g, Ntime %08x", work->height, work->tx_count, net_diff, work->data[ algo_gate.ntime_index ] ); - - if ( !opt_quiet ) + else + new_work = false; + + if ( new_work && !opt_quiet ) { double miner_hr = 0.; double net_hr = net_hashrate; @@ -2745,10 +2751,14 @@ static void *stratum_thread(void *userdata ) } else { - stratum_down = false; +// sometimes stratum connects but doesn't immediately send a job, wait for one. +// stratum_down = false; applog(LOG_BLUE,"Stratum connection established" ); if ( stratum.new_job ) // prime first job + { + stratum_down = false; stratum_gen_work( &stratum, &g_work ); + } } } @@ -2757,6 +2767,7 @@ static void *stratum_thread(void *userdata ) { if ( likely( s = stratum_recv_line( &stratum ) ) ) { + stratum_down = false; if ( likely( !stratum_handle_method( &stratum, s ) ) ) stratum_handle_response( s ); free( s ); @@ -2848,6 +2859,7 @@ static bool cpu_capability( bool display_only ) bool cpu_has_sha = has_sha(); bool cpu_has_avx512 = has_avx512(); bool cpu_has_vaes = has_vaes(); + bool cpu_has_avx10 = has_avx10(); bool sw_has_aes = false; bool sw_has_sse2 = false; bool sw_has_sse42 = false; @@ -2912,8 +2924,8 @@ static bool cpu_capability( bool display_only ) #ifdef _MSC_VER " with VC++ 2013\n"); #elif defined(__GNUC__) - " with GCC"); - printf(" %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + " with GCC-"); + printf("%d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); #else printf("\n"); #endif @@ -2927,6 +2939,8 @@ static bool cpu_capability( bool display_only ) if ( cpu_has_vaes ) printf( " VAES" ); else if ( cpu_has_aes ) printf( " AES" ); if ( cpu_has_sha ) printf( " SHA" ); + if ( cpu_has_avx10 ) printf( " AVX10.%d-%d", + avx10_version(), avx10_vector_length() ); printf("\nSW features: "); if ( sw_has_avx512 ) printf( " AVX512" ); diff --git a/simd-utils.h b/simd-utils.h index 7558076b..196fbe9d 100644 --- a/simd-utils.h +++ b/simd-utils.h @@ -15,10 +15,6 @@ // data but not for vectors. The main categories are bit rotation // and endian byte swapping // -// An attempt was made to make the names as similar as possible to -// Intel's intrinsic function format. Most variations are to avoid -// confusion with actual Intel intrinsics, brevity, and clarity. -// // This suite supports some operations on regular 64 bit integers // as well as 128 bit integers available on recent versions of Linux // and GCC. @@ -37,6 +33,9 @@ // SSE2: 128 bit vectors (64 bit CPUs only, such as Intel Core2. // AVX2: 256 bit vectors (Starting with Intel Haswell and AMD Ryzen) // AVX512: 512 bit vectors (Starting with SkylakeX) +// AVX10: when available will supersede AVX512 and will bring AVX512 +// features, except 512 bit vectors, to Intel's Ecores. It needs to be +// enabled manually when the relevant GCC macros are known. // // Most functions are avalaible at the stated levels but in rare cases // a higher level feature may be required with no compatible alternative. @@ -53,21 +52,17 @@ // for the applications but also adds responsibility to ensure adequate data // alignment. // -// Windows has problems with function vector arguments larger than -// 128 bits. Stack alignment is only guaranteed to 16 bytes. Always use -// pointers for larger vectors in function arguments. Macros can be used -// for larger value arguments. -// // An attempt was made to make the names as similar as possible to // Intel's intrinsic function format. Most variations are to avoid -// confusion with actual Intel intrinsics, brevity, and clarity +// confusion with actual Intel intrinsics, brevity, and clarity. // // The main differences are: // -// - the leading underscore(s) "_" and the "i" are dropped from the -// prefix of vector instructions. -// - "mm64" and "mm128" used for 64 and 128 bit prefix respectively -// to avoid the ambiguity of "mm". +// - the leading underscore "_" is dropped from the prefix of vector function +// macros. +// - "mm128" is used 128 bit prefix to be consistent with mm256 & mm512 and +// to avoid the ambiguity of "mm" which is also used for 64 bit MMX +// intrinsics. // - the element size does not include additional type specifiers // like "epi". // - there is a subset of some functions for scalar data. They may have @@ -76,14 +71,14 @@ // // Function names follow this pattern: // -// prefix_op[vsize]_[esize] +// [prefix]_[op][vsize]_[esize] // // Prefix: usually the size of the returned vector. // Following are some examples: // // u64: unsigned 64 bit integer function // i128: signed 128 bit integer function (rarely used) -// m128: 128 bit vector identifier +// m128: 128 bit vector identifier (deprecated) // mm128: 128 bit vector function // // op: describes the operation of the function or names the data @@ -94,7 +89,7 @@ // vsize: optional, lane size used when a function operates on elements // within lanes of a larger vector. // -// mm256_shuflr128_32 rotates each 128 bit lane of a 256 bit vector +// Ex: mm256_shuflr128_32 rotates each 128 bit lane of a 256 bit vector // right by 32 bits. // // Vector constants diff --git a/simd-utils/intrlv.h b/simd-utils/intrlv.h index 4ec568fd..92ac5b04 100644 --- a/simd-utils/intrlv.h +++ b/simd-utils/intrlv.h @@ -731,6 +731,67 @@ static inline void extr_lane_8x32( void *d, const void *s, #if defined(__AVX2__) +#if defined(__AVX512VL__) && defined(__AVX512VBMI__) + +//TODO Enable for AVX10_256 AVX10_512 + +// Combine byte swap & broadcast in one permute +static inline void mm256_bswap32_intrlv80_8x32( void *d, const void *src ) +{ + const __m256i c0 = _mm256_set1_epi32( 0x00010203 ); + const __m256i c1 = _mm256_set1_epi32( 0x04050607 ); + const __m256i c2 = _mm256_set1_epi32( 0x08090a0b ); + const __m256i c3 = _mm256_set1_epi32( 0x0c0d0e0f ); + const __m128i s0 = casti_m128i( src,0 ); + const __m128i s1 = casti_m128i( src,1 ); + const __m128i s2 = casti_m128i( src,2 ); + const __m128i s3 = casti_m128i( src,3 ); + const __m128i s4 = casti_m128i( src,4 ); + + casti_m256i( d, 0 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s0 ) ); + casti_m256i( d, 1 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s0 ) ); + casti_m256i( d, 2 ) = _mm256_permutexvar_epi8( c2, + _mm256_castsi128_si256( s0 ) ); + casti_m256i( d, 3 ) = _mm256_permutexvar_epi8( c3, + _mm256_castsi128_si256( s0 ) ); + casti_m256i( d, 4 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s1 ) ); + casti_m256i( d, 5 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s1 ) ); + casti_m256i( d, 6 ) = _mm256_permutexvar_epi8( c2, + _mm256_castsi128_si256( s1 ) ); + casti_m256i( d, 7 ) = _mm256_permutexvar_epi8( c3, + _mm256_castsi128_si256( s1 ) ); + casti_m256i( d, 8 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s2 ) ); + casti_m256i( d, 9 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s2 ) ); + casti_m256i( d,10 ) = _mm256_permutexvar_epi8( c2, + _mm256_castsi128_si256( s2 ) ); + casti_m256i( d,11 ) = _mm256_permutexvar_epi8( c3, + _mm256_castsi128_si256( s2 ) ); + casti_m256i( d,12 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s3 ) ); + casti_m256i( d,13 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s3 ) ); + casti_m256i( d,14 ) = _mm256_permutexvar_epi8( c2, + _mm256_castsi128_si256( s3 ) ); + casti_m256i( d,15 ) = _mm256_permutexvar_epi8( c3, + _mm256_castsi128_si256( s3 ) ); + casti_m256i( d,16 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s4 ) ); + casti_m256i( d,17 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s4 ) ); + casti_m256i( d,18 ) = _mm256_permutexvar_epi8( c2, + _mm256_castsi128_si256( s4 ) ); + casti_m256i( d,19 ) = _mm256_permutexvar_epi8( c3, + _mm256_castsi128_si256( s4 ) ); +} + +#else + static inline void mm256_bswap32_intrlv80_8x32( void *d, const void *src ) { const __m128i bswap_shuf = _mm_set_epi64x( 0x0c0d0e0f08090a0b, @@ -792,6 +853,7 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, const void *src ) _mm256_castsi128_si256( s4 ), c3 ); } +#endif // AVX512VBMI else #endif // AVX2 // 16x32 @@ -1173,10 +1235,12 @@ static inline void extr_lane_16x32( void *d, const void *s, ((uint32_t*)d)[15] = ((const uint32_t*)s)[ lane+240 ]; } -#if defined(__AVX512F__) && defined(__AVX512VL__) +#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512VBMI__) +// TODO Enable for AVX10_512 + // Combine byte swap & broadcast in one permute static inline void mm512_bswap32_intrlv80_16x32( void *d, const void *src ) { @@ -1496,10 +1560,48 @@ static inline void mm256_intrlv80_4x64( void *d, const void *src ) _mm256_castsi128_si256( s4 ), 0x55 ); } +#if defined(__AVX512VL__) && defined(__AVX512VBMI__) + +//TODO Enable for AVX10_256 AVX10_512 + +static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src ) +{ + const __m256i c0 = _mm256_set1_epi64x( 0x0405060700010203 ); + const __m256i c1 = _mm256_set1_epi64x( 0x0c0d0e0f08090a0b ); + const __m128i s0 = casti_m128i( src,0 ); + const __m128i s1 = casti_m128i( src,1 ); + const __m128i s2 = casti_m128i( src,2 ); + const __m128i s3 = casti_m128i( src,3 ); + const __m128i s4 = casti_m128i( src,4 ); + + casti_m256i( d,0 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s0 ) ); + casti_m256i( d,1 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s0 ) ); + casti_m256i( d,2 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s1 ) ); + casti_m256i( d,3 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s1 ) ); + casti_m256i( d,4 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s2 ) ); + casti_m256i( d,5 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s2 ) ); + casti_m256i( d,6 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s3 ) ); + casti_m256i( d,7 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s3 ) ); + casti_m256i( d,8 ) = _mm256_permutexvar_epi8( c0, + _mm256_castsi128_si256( s4 ) ); + casti_m256i( d,9 ) = _mm256_permutexvar_epi8( c1, + _mm256_castsi128_si256( s4 ) ); +} + +#else + static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src ) { const __m256i bswap_shuf = mm256_bcast_m128( - _mm_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203 ) ); + _mm_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203 ) ); __m256i s0 = casti_m256i( src,0 ); __m256i s1 = casti_m256i( src,1 ); __m128i s4 = casti_m128i( src,4 ); @@ -1524,6 +1626,8 @@ static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src ) _mm256_castsi128_si256( s4 ), 0x55 ); } +#endif + #endif // AVX2 // 8x64 (AVX512) @@ -1846,6 +1950,8 @@ static inline void extr_lane_8x64( void *dst, const void *src, const int lane, #if defined(__AVX512F__) && defined(__AVX512VL__) +//TODO Enable for AVX10_512 + // broadcast to all lanes static inline void mm512_intrlv80_8x64( void *dst, const void *src ) { @@ -2089,10 +2195,36 @@ static inline void dintrlv_4x128_512( void *dst0, void *dst1, void *dst2, d0[3] = s[12]; d1[3] = s[13]; d2[3] = s[14]; d3[3] = s[15]; } - #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) -static inline void mm512_bswap32_intrlv80_4x128( void *d, void *src ) +#if defined(__AVX512VBMI__) +//TODO Enable for AVX10_512 + +static inline void mm512_bswap32_intrlv80_4x128( void *d, const void *src ) +{ + const __m512i bswap_shuf = mm512_bcast_m128( + _mm_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203 ) ); + const __m128i s0 = casti_m128i( src,0 ); + const __m128i s1 = casti_m128i( src,1 ); + const __m128i s2 = casti_m128i( src,2 ); + const __m128i s3 = casti_m128i( src,3 ); + const __m128i s4 = casti_m128i( src,4 ); + + casti_m512i( d,0 ) = _mm512_permutexvar_epi8( _mm512_castsi128_si512( s0 ), + bswap_shuf ); + casti_m512i( d,1 ) = _mm512_permutexvar_epi8( _mm512_castsi128_si512( s1 ), + bswap_shuf ); + casti_m512i( d,2 ) = _mm512_permutexvar_epi8( _mm512_castsi128_si512( s2 ), + bswap_shuf ); + casti_m512i( d,3 ) = _mm512_permutexvar_epi8( _mm512_castsi128_si512( s3 ), + bswap_shuf ); + casti_m512i( d,4 ) = _mm512_permutexvar_epi8( _mm512_castsi128_si512( s4 ), + bswap_shuf ); +} + +#else + +static inline void mm512_bswap32_intrlv80_4x128( void *d, const void *src ) { const __m128i bswap_shuf = _mm_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203 ); @@ -2108,14 +2240,15 @@ static inline void mm512_bswap32_intrlv80_4x128( void *d, void *src ) s3 = _mm_shuffle_epi8( s3, bswap_shuf ); s4 = _mm_shuffle_epi8( s4, bswap_shuf ); - casti_m512i( d, 0 ) = mm512_bcast_m128( s0 ); - casti_m512i( d, 1 ) = mm512_bcast_m128( s1 ); - casti_m512i( d, 2 ) = mm512_bcast_m128( s2 ); - casti_m512i( d, 3 ) = mm512_bcast_m128( s3 ); - casti_m512i( d, 4 ) = mm512_bcast_m128( s4 ); -} + casti_m512i( d,0 ) = mm512_bcast_m128( s0 ); + casti_m512i( d,1 ) = mm512_bcast_m128( s1 ); + casti_m512i( d,2 ) = mm512_bcast_m128( s2 ); + casti_m512i( d,3 ) = mm512_bcast_m128( s3 ); + casti_m512i( d,4 ) = mm512_bcast_m128( s4 ); +} -#endif +#endif // AVX512VBMI ELSE +#endif // AVX512 // 2x256 (AVX512) @@ -2955,6 +3088,8 @@ do { \ #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) +//TODO Enable for AVX10_512 + /* #define mm512_intrlv_blend_128( hi, lo ) \ _mm512_mask_blend_epi32( 0x0f0f, hi, lo ) diff --git a/simd-utils/simd-128.h b/simd-utils/simd-128.h index 51de048a..83b6eb48 100644 --- a/simd-utils/simd-128.h +++ b/simd-utils/simd-128.h @@ -43,9 +43,11 @@ typedef union } __attribute__ ((aligned (16))) m128_ovly; -// Deprecated. EVEX adds support for integer argument in broadcast instruction -// eliminating the need for an explicit move in most cases. Use the set1 -// intrinsic with integers and let the compiler figure it out. +// Deprecated. AVX512 adds EVEX encoding (3rd operand) and other improvements +// that make these functions either unnecessary or inefficient. +// In cases where an explicit move betweeen GP & SIMD registers is still +// necessary the cvt, set, or set1 intrinsics can be used allowing the +// compiler to exploilt new features to produce optimum code. static inline __m128i mm128_mov64_128( const uint64_t n ) { __m128i a; @@ -73,15 +75,7 @@ static inline __m128i mm128_mov32_128( const uint32_t n ) //#define mm128_bcast_m64( v ) _mm_shuffle_epi32( v, 0x44 ) //#define mm128_bcast_m32( v ) _mm_shuffle_epi32( v, 0x00 ) -// Deprecated, use set1 directly -#define m128_const1_64 _mm_set1_epi64x -#define m128_const1_32 _mm_set1_epi32 - -// Deprecated, use set directly -#define m128_const_64 _mm_set_epi64x - // Pseudo constants - #define m128_zero _mm_setzero_si128() #define m128_one_128 mm128_mov64_128( 1 ) //#define m128_one_64 _mm_set1_epi64x( 1 ) @@ -141,7 +135,7 @@ static inline __m128i mm128_neg1_fn() // Examples of simple operations using xim: -// Insert 32 bit integer into v at element c and return updated v. +// Copy i to element c of dest and copy remaining elemnts from v. static inline __m128i mm128_insert_32( const __m128i v, const uint32_t i, const int c ) { return mm128_xim_32( v, mm128_mov32_128( i ), c<<4 ); } @@ -161,6 +155,7 @@ static inline __m128i mm128_mask_32( const __m128i v, const int m ) // Bitwise not (~v) #if defined(__AVX512VL__) +//TODO Enable for AVX10_256 static inline __m128i mm128_not( const __m128i v ) { return _mm_ternarylogic_epi64( v, v, v, 1 ); } @@ -223,18 +218,54 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n ) { for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; } #if defined(__AVX512VL__) +//TODO Enable for AVX10_256 // a ^ b ^ c -#define mm128_xor3( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x96 ) +#define mm128_xor3( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x96 ) + +// a & b & c +#define mm128_and3( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x80 ) + +// a | b | c +#define mm128_or3( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0xfe ) // a ^ ( b & c ) -#define mm128_xorand( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x78 ) +#define mm128_xorand( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x78 ) + +// a & ( b ^ c ) +#define mm128_andxor( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x60 ) + +// a ^ ( b | c ) +#define mm128_xoror( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0x1e ) + +// a ^ ( ~b & c ) +#define mm128_xorandnot( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0xd2 ) + +// a | ( b & c ) +#define mm128_orand( a, b, c ) _mm_ternarylogic_epi64( a, b, c, 0xf8 ) + +// ~( a ^ b ), same as (~a) ^ b +#define mm128_xnor( a, b ) _mm_ternarylogic_epi64( a, b, b, 0x81 ) #else -#define mm128_xor3( a, b, c ) _mm_xor_si128( a, _mm_xor_si128( b, c ) ) +#define mm128_xor3( a, b, c ) _mm_xor_si128( a, _mm_xor_si128( b, c ) ) + +#define mm128_and3( a, b, c ) _mm_and_si128( a, _mm_and_si128( b, c ) ) -#define mm128_xorand( a, b, c ) _mm_xor_si128( a, _mm_and_si128( b, c ) ) +#define mm128_or3( a, b, c ) _mm_or_si128( a, _mm_or_si128( b, c ) ) + +#define mm128_xorand( a, b, c ) _mm_xor_si128( a, _mm_and_si128( b, c ) ) + +#define mm128_andxor( a, b, c ) _mm_and_si128( a, _mm_xor_si128( b, c )) + +#define mm128_xoror( a, b, c ) _mm_xor_si128( a, _mm_or_si128( b, c ) ) + +#define mm128_xorandnot( a, b, c ) _mm_xor_si128( a, _mm_andnot_si128( b, c ) ) + +#define mm128_orand( a, b, c ) _mm_or_si128( a, _mm_and_si128( b, c ) ) + +#define mm128_xnor( a, b ) mm128_not( _mm_xor_si128( a, b ) ) #endif @@ -257,6 +288,7 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n ) // transparency. #if defined(__AVX512VL__) +//TODO Enable for AVX10_256 #define mm128_ror_64 _mm_ror_epi64 #define mm128_rol_64 _mm_rol_epi64 @@ -372,7 +404,10 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c ) #define mm128_shuflr64_32 mm128_swap64_32 #define mm128_shufll64_32 mm128_swap64_32 -#if defined(__SSSE3__) && !defined(__AVX512VL__) +//TODO Enable for AVX10_256 +#if defined(__AVX512VL__) + #define m1286_shuflr64_24( v ) _mm_ror_epi64( v, 24 ) +#elif defined(__SSSE3__) #define mm128_shuflr64_24( v ) \ _mm_shuffle_epi8( v, _mm_set_epi64x( \ 0x0a09080f0e0d0c0b, 0x0201000706050403 ) ) @@ -380,7 +415,9 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c ) #define mm128_shuflr64_24( v ) mm128_ror_64( v, 24 ) #endif -#if defined(__SSSE3__) && !defined(__AVX512VL__) +#if defined(__AVX512VL__) + #define mm128_shuflr64_16( v ) _mm_ror_epi64( v, 16 ) +#elif defined(__SSSE3__) #define mm128_shuflr64_16( v ) \ _mm_shuffle_epi8( v, _mm_set_epi64x( \ 0x09080f0e0d0c0b0a, 0x0100070605040302 ) ) @@ -390,7 +427,9 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c ) // Rotate 32 bit lanes -#if defined(__SSSE3__) && !defined(__AVX512VL__) +#if defined(__AVX512VL__) + #define mm128_swap32_16( v ) _mm_ror_epi32( v, 16 ) +#elif defined(__SSSE3__) #define mm128_swap32_16( v ) \ _mm_shuffle_epi8( v, _mm_set_epi64x( \ 0x0d0c0f0e09080b0a, 0x0504070601000302 ) ) @@ -400,7 +439,9 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c ) #define mm128_shuflr32_16 mm128_swap32_16 #define mm128_shufll32_16 mm128_swap32_16 -#if defined(__SSSE3__) && !defined(__AVX512VL__) +#if defined(__AVX512VL__) + #define mm128_shuflr32_8( v ) _mm_ror_epi32( v, 8 ) +#elif defined(__SSSE3__) #define mm128_shuflr32_8( v ) \ _mm_shuffle_epi8( v, _mm_set_epi64x( \ 0x0c0f0e0d080b0a09, 0x0407060500030201 ) ) diff --git a/simd-utils/simd-256.h b/simd-utils/simd-256.h index 5394d623..58a71d13 100644 --- a/simd-utils/simd-256.h +++ b/simd-utils/simd-256.h @@ -13,17 +13,14 @@ // automatically but their use is limited because 256 bit vectors are less // likely to be used when 512 is available. // +// AVX10_256 will support AVX512VL instructions on CPUs limited to 256 bit +// vectors. This will require enabling when the compiler's AVX10 feature +// macros are known. +// // "_mm256_shuffle_epi8" and "_mm256_alignr_epi8" are restricted to 128 bit // lanes and data can't cross the 128 bit lane boundary. -// Full width byte shuffle is available with AVX512VL using the mask version -// with a full mask (-1). // Instructions that can move data across 128 bit lane boundary incur a // performance penalty over those that can't. -// Some usage of index vectors may be encoded as if full vector shuffles are -// supported. This has no side effects and would have the same results using -// either version. -// If the need arises and AVX512VL is available, 256 bit full vector byte -// shuffles can be implemented using the AVX512 mask feature with a NULL mask. #if defined(__AVX__) @@ -66,6 +63,7 @@ typedef union // Set either the low or high 64 bit elements in 128 bit lanes, other elements // are set to zero. #if defined(__AVX512VL__) +//TODO Enable for AVX10_256 #define mm256_bcast128lo_64( i64 ) _mm256_maskz_set1_epi64( 0x55, i64 ) #define mm256_bcast128hi_64( i64 ) _mm256_maskz_set1_epi64( 0xaa, i64 ) @@ -81,11 +79,9 @@ typedef union #define mm256_set2_64( i1, i0 ) mm256_bcast_m128( _mm_set_epi64x( i1, i0 ) ) -// Deprecated -#define m256_const1_64 _mm256_set1_epi64x -#define m256_const1_32 _mm256_set1_epi32 +#define mm256_set4_32( i3, i2, i1, i0 ) \ + mm256_bcast_m128( _mm_set_epi32( i3, i2, i1, i0 ) ) -// // All SIMD constant macros are actually functions containing executable // code and therefore can't be used as compile time initializers. @@ -121,6 +117,7 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n ) // Basic operations without SIMD equivalent #if defined(__AVX512VL__) +//TODO Enable for AVX10_256 static inline __m256i mm256_not( const __m256i v ) { return _mm256_ternarylogic_epi64( v, v, v, 1 ); } @@ -140,8 +137,7 @@ static inline __m256i mm256_not( const __m256i v ) _mm256_add_epi32( _mm256_add_epi32( a, b ), _mm256_add_epi32( c, d ) ) #if defined(__AVX512VL__) - -// AVX512 has ternary logic that supports any 3 input boolean expression. +//TODO Enable for AVX10_256 // a ^ b ^ c #define mm256_xor3( a, b, c ) _mm256_ternarylogic_epi64( a, b, c, 0x96 ) @@ -176,31 +172,31 @@ static inline __m256i mm256_not( const __m256i v ) #else #define mm256_xor3( a, b, c ) \ - _mm256_xor_si256( a, _mm256_xor_si256( b, c ) ) + _mm256_xor_si256( a, _mm256_xor_si256( b, c ) ) #define mm256_xor4( a, b, c, d ) \ - _mm256_xor_si256( _mm256_xor_si256( a, b ), _mm256_xor_si256( c, d ) ) + _mm256_xor_si256( _mm256_xor_si256( a, b ), _mm256_xor_si256( c, d ) ) #define mm256_and3( a, b, c ) \ - _mm256_and_si256( a, _mm256_and_si256( b, c ) ) + _mm256_and_si256( a, _mm256_and_si256( b, c ) ) #define mm256_or3( a, b, c ) \ _mm256_or_si256( a, _mm256_or_si256( b, c ) ) #define mm256_xorand( a, b, c ) \ - _mm256_xor_si256( a, _mm256_and_si256( b, c ) ) + _mm256_xor_si256( a, _mm256_and_si256( b, c ) ) #define mm256_andxor( a, b, c ) \ _mm256_and_si256( a, _mm256_xor_si256( b, c )) #define mm256_xoror( a, b, c ) \ - _mm256_xor_si256( a, _mm256_or_si256( b, c ) ) + _mm256_xor_si256( a, _mm256_or_si256( b, c ) ) #define mm256_xorandnot( a, b, c ) \ - _mm256_xor_si256( a, _mm256_andnot_si256( b, c ) ) + _mm256_xor_si256( a, _mm256_andnot_si256( b, c ) ) #define mm256_orand( a, b, c ) \ - _mm256_or_si256( a, _mm256_and_si256( b, c ) ) + _mm256_or_si256( a, _mm256_and_si256( b, c ) ) #define mm256_xnor( a, b ) \ mm256_not( _mm256_xor_si256( a, b ) ) @@ -226,6 +222,7 @@ static inline __m256i mm256_not( const __m256i v ) // transparency. #if defined(__AVX512VL__) +//TODO Enable for AVX10_256 #define mm256_ror_64 _mm256_ror_epi64 #define mm256_rol_64 _mm256_rol_epi64 @@ -380,6 +377,7 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c ) #define mm256_shuflr64_32 mm256_swap64_32 #define mm256_shufll64_32 mm256_swap64_32 +//TODO Enable for AVX10_256 #if defined(__AVX512VL__) #define mm256_shuflr64_24( v ) _mm256_ror_epi64( v, 24 ) #else diff --git a/simd-utils/simd-512.h b/simd-utils/simd-512.h index 8f3cadc7..ebd7d764 100644 --- a/simd-utils/simd-512.h +++ b/simd-utils/simd-512.h @@ -113,10 +113,6 @@ static inline __m512i mm512_perm_128( const __m512i v, const int c ) #define mm512_set2_64( i1, i0 ) \ mm512_bcast_m128( _mm_set_epi64x( i1, i0 ) ) -// Deprecated, use set -#define m512_const1_64 _mm512_set1_epi64 -#define m512_const1_32 _mm512_set1_epi32 - // Pseudo constants. #define m512_zero _mm512_setzero_si512() // Deprecated diff --git a/sysinfos.c b/sysinfos.c index 5b6f9d88..960ae17f 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -174,35 +174,147 @@ static inline int cpu_fanpercent() return 0; } + +// CPUID + +// This list is incomplete, it only contains features of interest to cpuminer. +// refer to http://en.wikipedia.org/wiki/CPUID for details. + +// AVX10 compatibility notes +// +// Notation used: AVX10i.[version]_[vectorwidth] +// AVX10.1_512 is a rebranding of AVX512 and is effectively the AVX* superset +// with full 512 bit vector support. +// AVX10.2_256 is effectively AVX2 + AVX512_VL, all AVX512 instructions and +// features applied only to 256 bit and 128 bit vectors. +// Future AVX10 versions will add new instructions and features. + +// Register array indexes +#define EAX_Reg (0) +#define EBX_Reg (1) +#define ECX_Reg (2) +#define EDX_Reg (3) + +// CPUID function number, aka leaf (EAX) +#define VENDOR_ID (0) +#define CPU_INFO (1) +#define CACHE_TLB_DESCRIPTOR (2) +#define EXTENDED_FEATURES (7) +#define AVX10_FEATURES (0x24) +#define HIGHEST_EXT_FUNCTION (0x80000000) +#define EXTENDED_CPU_INFO (0x80000001) +#define CPU_BRAND_1 (0x80000002) +#define CPU_BRAND_2 (0x80000003) +#define CPU_BRAND_3 (0x80000004) + +// CPU_INFO: EAX=1, ECX=0 +// ECX +#define SSE3_Flag 1 +#define SSSE3_Flag (1<< 9) +#define XOP_Flag (1<<11) // obsolete +#define FMA3_Flag (1<<12) +#define SSE41_Flag (1<<19) +#define SSE42_Flag (1<<20) +#define AES_NI_Flag (1<<25) +#define XSAVE_Flag (1<<26) +#define OSXSAVE_Flag (1<<27) +#define AVX_Flag (1<<28) +// EDX +#define MMX_Flag (1<<23) +#define SSE_Flag (1<<25) +#define SSE2_Flag (1<<26) + +// EXTENDED_FEATURES subleaf 0: EAX=7, ECX=0 +// EBX +#define AVX2_Flag (1<< 5) +#define AVX512_F_Flag (1<<16) +#define AVX512_DQ_Flag (1<<17) +#define AVX512_IFMA_Flag (1<<21) +#define AVX512_PF_Flag (1<<26) +#define AVX512_ER_Flag (1<<27) +#define AVX512_CD_Flag (1<<28) +#define SHA_Flag (1<<29) +#define AVX512_BW_Flag (1<<30) +#define AVX512_VL_Flag (1<<31) +// ECX +#define AVX512_VBMI_Flag (1<< 1) +#define AVX512_VBMI2_Flag (1<< 6) +#define VAES_Flag (1<< 9) +#define AVX512_VNNI_Flag (1<<11) +#define AVX512_BITALG_Flag (1<<12) +#define AVX512_VPOPCNTDQ_Flag (1<<14) +// EDX +#define AVX512_4VNNIW_Flag (1<< 2) +#define AVX512_4FMAPS_Flag (1<< 3) +#define AVX512_VP2INTERSECT_Flag (1<< 8) +#define AMX_BF16_Flag (1<<22) +#define AVX512_FP16_Flag (1<<23) +#define AMX_TILE_Flag (1<<24) +#define AMX_INT8_Flag (1<<25) + +// EXTENDED_FEATURES subleaf 1: EAX=7, ECX=1 +// EAX +#define SHA512_Flag 1 +#define SM3_Flag (1<< 1) +#define SM4_Flag (1<< 2) +#define AVX_VNNI_Flag (1<< 4) +#define AVX512_BF16_Flag (1<< 5) +#define AMX_FP16_Flag (1<<21) +#define AVX_IFMA_Flag (1<<23) +// EDX +#define AVX_VNNI_INT8_Flag (1<< 4) +#define AVX_NE_CONVERT_Flag (1<< 5) +#define AMX_COMPLEX_Flag (1<< 8) +#define AVX_VNNI_INT16_Flag (1<<10) +#define AVX10_Flag (1<<19) +#define APX_F_Flag (1<<21) + +// AVX10_FEATURES: EAX=0x24, ECX=0 +// EBX +#define AVX10_VERSION_mask 0xff // bits [7:0] +#define AVX10_128_Flag (1<<16) +#define AVX10_256_Flag (1<<17) +#define AVX10_512_Flag (1<<18) + +// Use this to detect presence of feature +#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag) +#define FMA3_mask (FMA3_Flag|AVX_mask) +#define AVX512_mask (AVX512_VL_Flag|AVX512_BW_Flag|AVX512_DQ_Flag|AVX512_F_Flag) + + #ifndef __arm__ -static inline void cpuid(int functionnumber, int output[4]) { +static inline void cpuid( unsigned int leaf, unsigned int subleaf, + unsigned int output[4] ) +{ #if defined (_MSC_VER) || defined (__INTEL_COMPILER) - // Microsoft or Intel compiler, intrin.h included - __cpuidex(output, functionnumber, 0); + // Microsoft or Intel compiler, intrin.h included + __cpuidex(output, leaf, subleaf ); #elif defined(__GNUC__) || defined(__clang__) - // use inline assembly, Gnu/AT&T syntax - int a, b, c, d; - asm volatile("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(0)); - output[0] = a; - output[1] = b; - output[2] = c; - output[3] = d; + // use inline assembly, Gnu/AT&T syntax + unsigned int a, b, c, d; + asm volatile( "cpuid" + : "=a"(a), "=b"(b), "=c"(c), "=d"(d) + : "a"(leaf), "c"(subleaf) ); + output[ EAX_Reg ] = a; + output[ EBX_Reg ] = b; + output[ ECX_Reg ] = c; + output[ EDX_Reg ] = d; #else - // unknown platform. try inline assembly with masm/intel syntax - __asm { - mov eax, functionnumber - xor ecx, ecx - cpuid; - mov esi, output - mov[esi], eax - mov[esi + 4], ebx - mov[esi + 8], ecx - mov[esi + 12], edx - } + // unknown platform. try inline assembly with masm/intel syntax + __asm { + mov eax, leaf + mov ecx, subleaf + cpuid; + mov esi, output + mov[esi], eax + mov[esi + 4], ebx + mov[esi + 8], ecx + mov[esi + 12], edx + } #endif } #else /* !__arm__ */ -#define cpuid(fn, out) out[0] = 0; +#define cpuid(leaf, subleaf, out) out[0] = 0; #endif static inline void cpu_getname(char *outbuf, size_t maxsz) @@ -211,13 +323,13 @@ static inline void cpu_getname(char *outbuf, size_t maxsz) #ifdef WIN32 char brand[256] = { 0 }; int output[4] = { 0 }, ext; - cpuid(0x80000000, output); + cpuid( 0x80000000, 0, output ); ext = output[0]; if (ext >= 0x80000004) { for (int i = 2; i <= (ext & 0xF); i++) { - cpuid(0x80000000+i, output); + cpuid( 0x80000000+i, 0, output); memcpy(&brand[(i-2) * 4*sizeof(int)], output, 4*sizeof(int)); } snprintf(outbuf, maxsz, "%s", brand); @@ -309,70 +421,97 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz) #endif } -// http://en.wikipedia.org/wiki/CPUID - -// CPUID commands -#define VENDOR_ID (0) -#define CPU_INFO (1) -#define CACHE_TLB_DESCRIPTOR (2) -#define EXTENDED_FEATURES (7) -#define HIGHEST_EXT_FUNCTION (0x80000000) -#define EXTENDED_CPU_INFO (0x80000001) -#define CPU_BRAND_1 (0x80000002) -#define CPU_BRAND_2 (0x80000003) -#define CPU_BRAND_3 (0x80000004) +// Typical display format: AVX10.[version]_[vectorlength], if vector length is +// omitted 256 is the default. +// Ex: AVX10.1_512 +// Flags: +// AVX10 128 256 512 +// 0 0 0 0 = AVX10 not supported +// 1 1 1 0 = AVX10 256 bit max (version 2) +// 1 1 1 1 = AVX10 512 bit max (version 1 granite rapids) +// Other combinations are not defined. + +// Test AVX10_flag before AVX10_FEATURES flags. +static inline bool has_avx10() +{ +#ifdef __arm__ + return false; +#else + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 1, cpu_info ); + return cpu_info[ EDX_Reg ] & AVX10_Flag; +#endif +} -// Registers -#define EAX_Reg (0) -#define EBX_Reg (1) -#define ECX_Reg (2) -#define EDX_Reg (3) +static inline unsigned int avx10_version() +{ +#ifdef __arm__ + return 0; +#else + if ( has_avx10() ) + { + unsigned int cpu_info[4] = { 0 }; + cpuid( AVX10_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX10_VERSION_mask; + } + return 0; +#endif +} -// Feature flags - -// CPU_INFO ECX -#define SSE3_Flag 1 -#define SSSE3_Flag (1<< 9) -#define XOP_Flag (1<<11) // obsolete, only available on pre-Ryzen AMD -#define FMA3_Flag (1<<12) -#define AES_Flag (1<<25) -#define SSE41_Flag (1<<19) -#define SSE42_Flag (1<<20) -#define AES_Flag (1<<25) -#define XSAVE_Flag (1<<26) -#define OSXSAVE_Flag (1<<27) -#define AVX_Flag (1<<28) - -// CPU_INFO EDX -#define SSE_Flag (1<<25) -#define SSE2_Flag (1<<26) - -// EXTENDED_FEATURES EBX -#define AVX2_Flag (1<< 5) -#define AVX512F_Flag (1<<16) -#define AVX512DQ_Flag (1<<17) -#define SHA_Flag (1<<29) -#define AVX512BW_Flag (1<<30) -#define AVX512VL_Flag (1<<31) - -// EXTENDED_FEATURES ECX -#define AVX512VBMI_Flag (1<<1) -#define AVX512VBMI2_Flag (1<<6) -#define VAES_Flag (1<<9) +static inline bool has_avx10_512() +{ +#ifdef __arm__ + return false; +#else + if ( has_avx10() ) + { + unsigned int cpu_info[4] = { 0 }; + cpuid( AVX10_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX10_512_Flag; + } + return false; +#endif +} +static inline bool has_avx10_256() +{ +#ifdef __arm__ + return false; +#else + if ( has_avx10() ) + { + unsigned int cpu_info[4] = { 0 }; + cpuid( AVX10_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX10_256_Flag; + } + return false; +#endif +} -// Use this to detect presence of feature -#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag) -#define FMA3_mask (FMA3_Flag|AVX_mask) -#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag) +// Maximum vector length +static inline unsigned int avx10_vector_length() +{ +#ifdef __arm__ + return 0; +#else + if ( has_avx10() ) + { + unsigned int cpu_info[4] = { 0 }; + cpuid( AVX10_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX10_512_Flag ? 512 + : ( cpu_info[ EBX_Reg ] & AVX10_256_Flag ? 256 : 0 ); + } + return 0; +#endif +} static inline bool has_sha() { #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & SHA_Flag; #endif } @@ -382,8 +521,8 @@ static inline bool has_sse2() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ EDX_Reg ] & SSE2_Flag; #endif } @@ -394,9 +533,9 @@ static inline bool has_aes_ni() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); - return cpu_info[ ECX_Reg ] & AES_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( CPU_INFO, 0, cpu_info ); + return cpu_info[ ECX_Reg ] & AES_NI_Flag; #endif } @@ -406,8 +545,8 @@ static inline bool has_avx() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( CPU_INFO, 0, cpu_info ); return ( ( cpu_info[ ECX_Reg ] & AVX_mask ) == AVX_mask ); #endif } @@ -418,8 +557,8 @@ static inline bool has_avx2() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ EBX_Reg ] & AVX2_Flag; #endif } @@ -429,9 +568,9 @@ static inline bool has_avx512f() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); - return cpu_info[ EBX_Reg ] & AVX512F_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512_F_Flag; #endif } @@ -440,9 +579,9 @@ static inline bool has_avx512dq() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); - return cpu_info[ EBX_Reg ] & AVX512DQ_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512_DQ_Flag; #endif } @@ -451,9 +590,9 @@ static inline bool has_avx512bw() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); - return cpu_info[ EBX_Reg ] & AVX512BW_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512_BW_Flag; #endif } @@ -462,9 +601,9 @@ static inline bool has_avx512vl() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); - return cpu_info[ EBX_Reg ] & AVX512VL_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); + return cpu_info[ EBX_Reg ] & AVX512_VL_Flag; #endif } @@ -474,30 +613,19 @@ static inline bool has_avx512() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); return ( ( cpu_info[ EBX_Reg ] & AVX512_mask ) == AVX512_mask ); #endif } -// AMD Zen3 added support for 256 bit VAES without requiring AVX512. -// The original Intel spec requires AVX512F to support 512 bit VAES and -// requires AVX512VL to support 256 bit VAES. -// The CPUID VAES bit alone can't distiguish 256 vs 512 bit. -// If necessary: -// VAES 256 & 512 = VAES && AVX512VL -// VAES 512 = VAES && AVX512F -// VAES 256 = ( VAES && AVX512VL ) || ( VAES && !AVX512F ) -// VAES 512 only = VAES && AVX512F && !AVX512VL -// VAES 256 only = VAES && !AVX512F - static inline bool has_vaes() { #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); return cpu_info[ ECX_Reg ] & VAES_Flag; #endif } @@ -507,9 +635,9 @@ static inline bool has_vbmi() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); - return cpu_info[ ECX_Reg ] & AVX512VBMI_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); + return cpu_info[ ECX_Reg ] & AVX512_VBMI_Flag; #endif } @@ -518,9 +646,9 @@ static inline bool has_vbmi2() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_FEATURES, cpu_info ); - return cpu_info[ ECX_Reg ] & AVX512VBMI2_Flag; + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_FEATURES, 0, cpu_info ); + return cpu_info[ ECX_Reg ] & AVX512_VBMI2_Flag; #endif } @@ -530,8 +658,8 @@ static inline bool has_xop() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( EXTENDED_CPU_INFO, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( EXTENDED_CPU_INFO, 0, cpu_info ); return cpu_info[ ECX_Reg ] & XOP_Flag; #endif } @@ -541,8 +669,8 @@ static inline bool has_fma3() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( CPU_INFO, 0, cpu_info ); return ( ( cpu_info[ ECX_Reg ] & FMA3_mask ) == FMA3_mask ); #endif } @@ -552,8 +680,8 @@ static inline bool has_sse42() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ ECX_Reg ] & SSE42_Flag; #endif } @@ -563,16 +691,16 @@ static inline bool has_sse() #ifdef __arm__ return false; #else - int cpu_info[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); + unsigned int cpu_info[4] = { 0 }; + cpuid( CPU_INFO, 0, cpu_info ); return cpu_info[ EDX_Reg ] & SSE_Flag; #endif } static inline uint32_t cpuid_get_highest_function_number() { - uint32_t cpu_info[4] = {0}; - cpuid( VENDOR_ID, cpu_info); + unsigned int cpu_info[4] = {0}; + cpuid( VENDOR_ID, 0, cpu_info); return cpu_info[ EAX_Reg ]; } @@ -605,8 +733,8 @@ static inline void cpu_bestfeature(char *outbuf, size_t maxsz) #else int cpu_info[4] = { 0 }; int cpu_info_adv[4] = { 0 }; - cpuid( CPU_INFO, cpu_info ); - cpuid( EXTENDED_FEATURES, cpu_info_adv ); + cpuid( CPU_INFO, 0, cpu_info ); + cpuid( EXTENDED_FEATURES, 0, cpu_info_adv ); if ( has_avx() && has_avx2() ) sprintf(outbuf, "AVX2"); @@ -634,14 +762,14 @@ static inline void cpu_brand_string( char* s ) sprintf( s, "ARM" ); #else int cpu_info[4] = { 0 }; - cpuid( VENDOR_ID, cpu_info ); + cpuid( VENDOR_ID, 0, cpu_info ); if ( cpu_info[ EAX_Reg ] >= 4 ) { - cpuid( CPU_BRAND_1, cpu_info ); + cpuid( CPU_BRAND_1, 0, cpu_info ); memcpy( s, cpu_info, sizeof(cpu_info) ); - cpuid( CPU_BRAND_2, cpu_info ); + cpuid( CPU_BRAND_2, 0, cpu_info ); memcpy( s + 16, cpu_info, sizeof(cpu_info) ); - cpuid( CPU_BRAND_3, cpu_info ); + cpuid( CPU_BRAND_3, 0, cpu_info ); memcpy( s + 32, cpu_info, sizeof(cpu_info) ); } #endif